Index: head/sbin/fsck/dir.c =================================================================== --- head/sbin/fsck/dir.c (revision 34265) +++ head/sbin/fsck/dir.c (revision 34266) @@ -1,734 +1,737 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)dir.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include "fsck.h" char *lfname = "lost+found"; int lfmode = 01777; struct dirtemplate emptydir = { 0, DIRBLKSIZ }; struct dirtemplate dirhead = { 0, 12, DT_DIR, 1, ".", 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." }; struct odirtemplate odirhead = { 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." }; static int chgino __P((struct inodesc *)); static int dircheck __P((struct inodesc *, struct direct *)); static int expanddir __P((struct dinode *dp, char *name)); static void freedir __P((ino_t ino, ino_t parent)); static struct direct *fsck_readdir __P((struct inodesc *)); static struct bufarea *getdirblk __P((ufs_daddr_t blkno, long size)); static int lftempname __P((char *bufp, ino_t ino)); static int mkentry __P((struct inodesc *)); /* * Propagate connected state through the tree. */ void propagate() { register struct inoinfo **inpp, *inp; struct inoinfo **inpend; long change; inpend = &inpsort[inplast]; do { change = 0; for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_parent == 0) continue; if (statemap[inp->i_parent] == DFOUND && statemap[inp->i_number] == DSTATE) { statemap[inp->i_number] = DFOUND; change++; } } } while (change > 0); } /* * Scan each entry in a directory block. */ int dirscan(idesc) register struct inodesc *idesc; { register struct direct *dp; register struct bufarea *bp; int dsize, n; long blksiz; char dbuf[DIRBLKSIZ]; if (idesc->id_type != DATA) errx(EEXIT, "wrong type to dirscan %d", idesc->id_type); if (idesc->id_entryno == 0 && (idesc->id_filesize & (DIRBLKSIZ - 1)) != 0) idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ); blksiz = idesc->id_numfrags * sblock.fs_fsize; if (chkrange(idesc->id_blkno, idesc->id_numfrags)) { idesc->id_filesize -= blksiz; return (SKIP); } idesc->id_loc = 0; for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) { dsize = dp->d_reclen; memmove(dbuf, dp, (size_t)dsize); # if (BYTE_ORDER == LITTLE_ENDIAN) if (!newinofmt) { struct direct *tdp = (struct direct *)dbuf; u_char tmp; tmp = tdp->d_namlen; tdp->d_namlen = tdp->d_type; tdp->d_type = tmp; } # endif idesc->id_dirp = (struct direct *)dbuf; if ((n = (*idesc->id_func)(idesc)) & ALTERED) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (!newinofmt && !doinglevel2) { struct direct *tdp; u_char tmp; tdp = (struct direct *)dbuf; tmp = tdp->d_namlen; tdp->d_namlen = tdp->d_type; tdp->d_type = tmp; } # endif bp = getdirblk(idesc->id_blkno, blksiz); memmove(bp->b_un.b_buf + idesc->id_loc - dsize, dbuf, (size_t)dsize); dirty(bp); sbdirty(); } if (n & STOP) return (n); } return (idesc->id_filesize > 0 ? KEEPON : STOP); } /* * get next entry in a directory. */ static struct direct * fsck_readdir(idesc) register struct inodesc *idesc; { register struct direct *dp, *ndp; register struct bufarea *bp; long size, blksiz, fix, dploc; blksiz = idesc->id_numfrags * sblock.fs_fsize; bp = getdirblk(idesc->id_blkno, blksiz); if (idesc->id_loc % DIRBLKSIZ == 0 && idesc->id_filesize > 0 && idesc->id_loc < blksiz) { dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); if (dircheck(idesc, dp)) goto dpok; if (idesc->id_fix == IGNORE) return (0); fix = dofix(idesc, "DIRECTORY CORRUPTED"); bp = getdirblk(idesc->id_blkno, blksiz); dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); dp->d_reclen = DIRBLKSIZ; dp->d_ino = 0; dp->d_type = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; if (fix) dirty(bp); idesc->id_loc += DIRBLKSIZ; idesc->id_filesize -= DIRBLKSIZ; return (dp); } dpok: if (idesc->id_filesize <= 0 || idesc->id_loc >= blksiz) return NULL; dploc = idesc->id_loc; dp = (struct direct *)(bp->b_un.b_buf + dploc); idesc->id_loc += dp->d_reclen; idesc->id_filesize -= dp->d_reclen; if ((idesc->id_loc % DIRBLKSIZ) == 0) return (dp); ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); if (idesc->id_loc < blksiz && idesc->id_filesize > 0 && dircheck(idesc, ndp) == 0) { size = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); idesc->id_loc += size; idesc->id_filesize -= size; if (idesc->id_fix == IGNORE) return (0); fix = dofix(idesc, "DIRECTORY CORRUPTED"); bp = getdirblk(idesc->id_blkno, blksiz); dp = (struct direct *)(bp->b_un.b_buf + dploc); dp->d_reclen += size; if (fix) dirty(bp); } return (dp); } /* * Verify that a directory entry is valid. * This is a superset of the checks made in the kernel. */ static int dircheck(idesc, dp) struct inodesc *idesc; register struct direct *dp; { register int size; register char *cp; u_char namlen, type; int spaceleft; spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); if (dp->d_ino >= maxino || dp->d_reclen == 0 || dp->d_reclen > spaceleft || (dp->d_reclen & 0x3) != 0) return (0); if (dp->d_ino == 0) return (1); size = DIRSIZ(!newinofmt, dp); # if (BYTE_ORDER == LITTLE_ENDIAN) if (!newinofmt) { type = dp->d_namlen; namlen = dp->d_type; } else { namlen = dp->d_namlen; type = dp->d_type; } # else namlen = dp->d_namlen; type = dp->d_type; # endif if (dp->d_reclen < size || idesc->id_filesize < size || namlen > MAXNAMLEN || type > 15) return (0); for (cp = dp->d_name, size = 0; size < namlen; size++) if (*cp == '\0' || (*cp++ == '/')) return (0); if (*cp != '\0') return (0); return (1); } void direrror(ino, errmesg) ino_t ino; char *errmesg; { fileerror(ino, ino, errmesg); } void fileerror(cwd, ino, errmesg) ino_t cwd, ino; char *errmesg; { register struct dinode *dp; char pathbuf[MAXPATHLEN + 1]; pwarn("%s ", errmesg); pinode(ino); printf("\n"); getpathname(pathbuf, cwd, ino); if (ino < ROOTINO || ino > maxino) { pfatal("NAME=%s\n", pathbuf); return; } dp = ginode(ino); if (ftypeok(dp)) pfatal("%s=%s\n", (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE", pathbuf); else pfatal("NAME=%s\n", pathbuf); } void adjust(idesc, lcnt) register struct inodesc *idesc; int lcnt; { register struct dinode *dp; dp = ginode(idesc->id_number); if (dp->di_nlink == lcnt) { if (linkup(idesc->id_number, (ino_t)0) == 0) clri(idesc, "UNREF", 0); } else { pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname : ((dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE")); pinode(idesc->id_number); printf(" COUNT %d SHOULD BE %d", dp->di_nlink, dp->di_nlink - lcnt); - if (preen) { + if (preen || usedsoftdep) { if (lcnt < 0) { printf("\n"); pfatal("LINK COUNT INCREASING"); } - printf(" (ADJUSTED)\n"); + if (preen) + printf(" (ADJUSTED)\n"); } if (preen || reply("ADJUST") == 1) { dp->di_nlink -= lcnt; inodirty(); } } } static int mkentry(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; struct direct newent; int newlen, oldlen; newent.d_namlen = strlen(idesc->id_name); newlen = DIRSIZ(0, &newent); if (dirp->d_ino != 0) oldlen = DIRSIZ(0, dirp); else oldlen = 0; if (dirp->d_reclen - oldlen < newlen) return (KEEPON); newent.d_reclen = dirp->d_reclen - oldlen; dirp->d_reclen = oldlen; dirp = (struct direct *)(((char *)dirp) + oldlen); dirp->d_ino = idesc->id_parent; /* ino to be entered is in id_parent */ dirp->d_reclen = newent.d_reclen; if (newinofmt) dirp->d_type = typemap[idesc->id_parent]; else dirp->d_type = 0; dirp->d_namlen = newent.d_namlen; memmove(dirp->d_name, idesc->id_name, (size_t)newent.d_namlen + 1); # if (BYTE_ORDER == LITTLE_ENDIAN) /* * If the entry was split, dirscan() will only reverse the byte * order of the original entry, and not the new one, before * writing it back out. So, we reverse the byte order here if * necessary. */ if (oldlen != 0 && !newinofmt && !doinglevel2) { u_char tmp; tmp = dirp->d_namlen; dirp->d_namlen = dirp->d_type; dirp->d_type = tmp; } # endif return (ALTERED|STOP); } static int chgino(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; if (memcmp(dirp->d_name, idesc->id_name, (int)dirp->d_namlen + 1)) return (KEEPON); dirp->d_ino = idesc->id_parent; if (newinofmt) dirp->d_type = typemap[idesc->id_parent]; else dirp->d_type = 0; return (ALTERED|STOP); } int linkup(orphan, parentdir) ino_t orphan; ino_t parentdir; { register struct dinode *dp; int lostdir; ino_t oldlfdir; struct inodesc idesc; char tempname[BUFSIZ]; memset(&idesc, 0, sizeof(struct inodesc)); dp = ginode(orphan); lostdir = (dp->di_mode & IFMT) == IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); pinode(orphan); - if (preen && dp->di_size == 0) + if ((preen || usedsoftdep) && dp->di_size == 0) return (0); if (preen) printf(" (RECONNECTED)\n"); else if (reply("RECONNECT") == 0) return (0); + if (parentdir != 0) + lncntp[parentdir]++; if (lfdir == 0) { dp = ginode(ROOTINO); idesc.id_name = lfname; idesc.id_type = DATA; idesc.id_func = findino; idesc.id_number = ROOTINO; if ((ckinode(dp, &idesc) & FOUND) != 0) { lfdir = idesc.id_parent; } else { pwarn("NO lost+found DIRECTORY"); if (preen || reply("CREATE")) { lfdir = allocdir(ROOTINO, (ino_t)0, lfmode); if (lfdir != 0) { if (makeentry(ROOTINO, lfdir, lfname) != 0) { if (preen) printf(" (CREATED)\n"); } else { freedir(lfdir, ROOTINO); lfdir = 0; if (preen) printf("\n"); } } } } if (lfdir == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY"); printf("\n\n"); return (0); } } dp = ginode(lfdir); if ((dp->di_mode & IFMT) != IFDIR) { pfatal("lost+found IS NOT A DIRECTORY"); if (reply("REALLOCATE") == 0) return (0); oldlfdir = lfdir; if ((lfdir = allocdir(ROOTINO, (ino_t)0, lfmode)) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } if ((changeino(ROOTINO, lfname, lfdir) & ALTERED) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } inodirty(); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = oldlfdir; adjust(&idesc, lncntp[oldlfdir] + 1); lncntp[oldlfdir] = 0; dp = ginode(lfdir); } if (statemap[lfdir] != DFOUND) { pfatal("SORRY. NO lost+found DIRECTORY\n\n"); return (0); } (void)lftempname(tempname, orphan); if (makeentry(lfdir, orphan, tempname) == 0) { pfatal("SORRY. NO SPACE IN lost+found DIRECTORY"); printf("\n\n"); return (0); } lncntp[orphan]--; if (lostdir) { if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 && parentdir != (ino_t)-1) (void)makeentry(orphan, lfdir, ".."); dp = ginode(lfdir); dp->di_nlink++; inodirty(); lncntp[lfdir]++; pwarn("DIR I=%lu CONNECTED. ", orphan); if (parentdir != (ino_t)-1) { printf("PARENT WAS I=%lu\n", parentdir); /* * The parent directory, because of the ordering * guarantees, has had the link count incremented * for the child, but no entry was made. This * fixes the parent link count so that fsck does * not need to be rerun. */ lncntp[parentdir]++; } if (preen == 0) printf("\n"); } return (1); } /* * fix an entry in a directory. */ int changeino(dir, name, newnum) ino_t dir; char *name; ino_t newnum; { struct inodesc idesc; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_func = chgino; idesc.id_number = dir; idesc.id_fix = DONTKNOW; idesc.id_name = name; idesc.id_parent = newnum; /* new value for name */ return (ckinode(ginode(dir), &idesc)); } /* * make an entry in a directory */ int makeentry(parent, ino, name) ino_t parent, ino; char *name; { struct dinode *dp; struct inodesc idesc; char pathbuf[MAXPATHLEN + 1]; if (parent < ROOTINO || parent >= maxino || ino < ROOTINO || ino >= maxino) return (0); memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_func = mkentry; idesc.id_number = parent; idesc.id_parent = ino; /* this is the inode to enter */ idesc.id_fix = DONTKNOW; idesc.id_name = name; dp = ginode(parent); if (dp->di_size % DIRBLKSIZ) { dp->di_size = roundup(dp->di_size, DIRBLKSIZ); inodirty(); } if ((ckinode(dp, &idesc) & ALTERED) != 0) return (1); getpathname(pathbuf, parent, parent); dp = ginode(parent); if (expanddir(dp, pathbuf) == 0) return (0); return (ckinode(dp, &idesc) & ALTERED); } /* * Attempt to expand the size of a directory */ static int expanddir(dp, name) register struct dinode *dp; char *name; { ufs_daddr_t lastbn, newblk; register struct bufarea *bp; char *cp, firstblk[DIRBLKSIZ]; lastbn = lblkno(&sblock, dp->di_size); if (lastbn >= NDADDR - 1 || dp->di_db[lastbn] == 0 || dp->di_size == 0) return (0); if ((newblk = allocblk(sblock.fs_frag)) == 0) return (0); dp->di_db[lastbn + 1] = dp->di_db[lastbn]; dp->di_db[lastbn] = newblk; dp->di_size += sblock.fs_bsize; dp->di_blocks += btodb(sblock.fs_bsize); bp = getdirblk(dp->di_db[lastbn + 1], (long)dblksize(&sblock, dp, lastbn + 1)); if (bp->b_errs) goto bad; memmove(firstblk, bp->b_un.b_buf, DIRBLKSIZ); bp = getdirblk(newblk, sblock.fs_bsize); if (bp->b_errs) goto bad; memmove(bp->b_un.b_buf, firstblk, DIRBLKSIZ); for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; cp < &bp->b_un.b_buf[sblock.fs_bsize]; cp += DIRBLKSIZ) memmove(cp, &emptydir, sizeof emptydir); dirty(bp); bp = getdirblk(dp->di_db[lastbn + 1], (long)dblksize(&sblock, dp, lastbn + 1)); if (bp->b_errs) goto bad; memmove(bp->b_un.b_buf, &emptydir, sizeof emptydir); pwarn("NO SPACE LEFT IN %s", name); if (preen) printf(" (EXPANDED)\n"); else if (reply("EXPAND") == 0) goto bad; dirty(bp); inodirty(); return (1); bad: dp->di_db[lastbn] = dp->di_db[lastbn + 1]; dp->di_db[lastbn + 1] = 0; dp->di_size -= sblock.fs_bsize; dp->di_blocks -= btodb(sblock.fs_bsize); freeblk(newblk, sblock.fs_frag); return (0); } /* * allocate a new directory */ ino_t allocdir(parent, request, mode) ino_t parent, request; int mode; { ino_t ino; char *cp; struct dinode *dp; register struct bufarea *bp; struct dirtemplate *dirp; ino = allocino(request, IFDIR|mode); if (newinofmt) dirp = &dirhead; else dirp = (struct dirtemplate *)&odirhead; dirp->dot_ino = ino; dirp->dotdot_ino = parent; dp = ginode(ino); bp = getdirblk(dp->di_db[0], sblock.fs_fsize); if (bp->b_errs) { freeino(ino); return (0); } memmove(bp->b_un.b_buf, dirp, sizeof(struct dirtemplate)); for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; cp < &bp->b_un.b_buf[sblock.fs_fsize]; cp += DIRBLKSIZ) memmove(cp, &emptydir, sizeof emptydir); dirty(bp); dp->di_nlink = 2; inodirty(); if (ino == ROOTINO) { lncntp[ino] = dp->di_nlink; cacheino(dp, ino); return(ino); } if (statemap[parent] != DSTATE && statemap[parent] != DFOUND) { freeino(ino); return (0); } cacheino(dp, ino); statemap[ino] = statemap[parent]; if (statemap[ino] == DSTATE) { lncntp[ino] = dp->di_nlink; lncntp[parent]++; } dp = ginode(parent); dp->di_nlink++; inodirty(); return (ino); } /* * free a directory inode */ static void freedir(ino, parent) ino_t ino, parent; { struct dinode *dp; if (ino != parent) { dp = ginode(parent); dp->di_nlink--; inodirty(); } freeino(ino); } /* * generate a temporary name for the lost+found directory. */ static int lftempname(bufp, ino) char *bufp; ino_t ino; { register ino_t in; register char *cp; int namlen; cp = bufp + 2; for (in = maxino; in > 0; in /= 10) cp++; *--cp = 0; namlen = cp - bufp; in = ino; while (cp > bufp) { *--cp = (in % 10) + '0'; in /= 10; } *cp = '#'; return (namlen); } /* * Get a directory block. * Insure that it is held until another is requested. */ static struct bufarea * getdirblk(blkno, size) ufs_daddr_t blkno; long size; { if (pdirbp != 0) pdirbp->b_flags &= ~B_INUSE; pdirbp = getdatablk(blkno, size); return (pdirbp); } Index: head/sbin/fsck/fsck.h =================================================================== --- head/sbin/fsck/fsck.h (revision 34265) +++ head/sbin/fsck/fsck.h (revision 34266) @@ -1,281 +1,283 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fsck.h 8.4 (Berkeley) 5/9/95 */ #include #include #include #define MAXDUP 10 /* limit on dup blks (per inode) */ #define MAXBAD 10 /* limit on bad blks (per inode) */ #define MAXBUFSPACE 40*1024 /* maximum space to allocate to buffers */ #define INOBUFSIZE 56*1024 /* size of buffer to read inodes in pass1 */ #ifndef BUFSIZ #define BUFSIZ 1024 #endif #define USTATE 01 /* inode not allocated */ #define FSTATE 02 /* inode is file */ #define DSTATE 03 /* inode is directory */ #define DFOUND 04 /* directory found during descent */ #define DCLEAR 05 /* directory is to be cleared */ #define FCLEAR 06 /* file is to be cleared */ /* * buffer cache structure. */ struct bufarea { struct bufarea *b_next; /* free list queue */ struct bufarea *b_prev; /* free list queue */ ufs_daddr_t b_bno; int b_size; int b_errs; int b_flags; union { char *b_buf; /* buffer space */ ufs_daddr_t *b_indir; /* indirect block */ struct fs *b_fs; /* super block */ struct cg *b_cg; /* cylinder group */ struct dinode *b_dinode; /* inode block */ } b_un; char b_dirty; }; #define B_INUSE 1 #define MINBUFS 5 /* minimum number of buffers required */ struct bufarea bufhead; /* head of list of other blks in filesys */ struct bufarea sblk; /* file system superblock */ struct bufarea cgblk; /* cylinder group blocks */ struct bufarea *pdirbp; /* current directory contents */ struct bufarea *pbp; /* current inode block */ #define dirty(bp) (bp)->b_dirty = 1 #define initbarea(bp) \ (bp)->b_dirty = 0; \ (bp)->b_bno = (ufs_daddr_t)-1; \ (bp)->b_flags = 0; #define sbdirty() sblk.b_dirty = 1 #define cgdirty() cgblk.b_dirty = 1 #define sblock (*sblk.b_un.b_fs) #define cgrp (*cgblk.b_un.b_cg) enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE}; struct inodesc { enum fixstate id_fix; /* policy on fixing errors */ int (*id_func)(); /* function to be applied to blocks of inode */ ino_t id_number; /* inode number described */ ino_t id_parent; /* for DATA nodes, their parent */ ufs_daddr_t id_blkno; /* current block number being examined */ int id_numfrags; /* number of frags contained in block */ quad_t id_filesize; /* for DATA nodes, the size of the directory */ int id_loc; /* for DATA nodes, current location in dir */ int id_entryno; /* for DATA nodes, current entry number */ struct direct *id_dirp; /* for DATA nodes, ptr to current entry */ char *id_name; /* for DATA nodes, name to find or enter */ char id_type; /* type of descriptor, DATA or ADDR */ }; /* file types */ #define DATA 1 #define ADDR 2 /* * Linked list of duplicate blocks. * * The list is composed of two parts. The first part of the * list (from duplist through the node pointed to by muldup) * contains a single copy of each duplicate block that has been * found. The second part of the list (from muldup to the end) * contains duplicate blocks that have been found more than once. * To check if a block has been found as a duplicate it is only * necessary to search from duplist through muldup. To find the * total number of times that a block has been found as a duplicate * the entire list must be searched for occurences of the block * in question. The following diagram shows a sample list where * w (found twice), x (found once), y (found three times), and z * (found once) are duplicate block numbers: * * w -> y -> x -> z -> y -> w -> y * ^ ^ * | | * duplist muldup */ struct dups { struct dups *next; ufs_daddr_t dup; }; struct dups *duplist; /* head of dup list */ struct dups *muldup; /* end of unique duplicate dup block numbers */ /* * Linked list of inodes with zero link counts. */ struct zlncnt { struct zlncnt *next; ino_t zlncnt; }; struct zlncnt *zlnhead; /* head of zero link count list */ /* * Inode cache data structures. */ struct inoinfo { struct inoinfo *i_nexthash; /* next entry in hash chain */ ino_t i_number; /* inode number of this entry */ ino_t i_parent; /* inode number of parent */ ino_t i_dotdot; /* inode number of `..' */ size_t i_isize; /* size of inode */ u_int i_numblks; /* size of block array in bytes */ ufs_daddr_t i_blks[1]; /* actually longer */ } **inphead, **inpsort; long numdirs, listmax, inplast; char *cdevname; /* name of device being checked */ long dev_bsize; /* computed value of DEV_BSIZE */ long secsize; /* actual disk sector size */ char fflag; /* force fs check (ignore clean flag) */ char nflag; /* assume a no response */ char yflag; /* assume a yes response */ int bflag; /* location of alternate super block */ int debug; /* output debugging info */ int cvtlevel; /* convert to newer file system format */ int doinglevel1; /* converting to new cylinder group format */ int doinglevel2; /* converting to new inode format */ int newinofmt; /* filesystem has new inode format */ +char usedsoftdep; /* just fix soft dependency inconsistencies */ +char resolved; /* cleared if unresolved changes => not clean */ char preen; /* just fix normal inconsistencies */ char hotroot; /* checking root device */ char havesb; /* superblock has been read */ int fsmodified; /* 1 => write done to file system */ int fsreadfd; /* file descriptor for reading file system */ int fswritefd; /* file descriptor for writing file system */ int returntosingle; /* return to single user mode */ int rerun; /* rerun fsck. Only used in non-preen mode */ ufs_daddr_t maxfsblock; /* number of blocks in the file system */ char *blockmap; /* ptr to primary blk allocation map */ ino_t maxino; /* number of inodes in file system */ ino_t lastino; /* last inode in use */ char *statemap; /* ptr to inode state table */ u_char *typemap; /* ptr to inode type table */ short *lncntp; /* ptr to link count table */ ino_t lfdir; /* lost & found directory inode number */ char *lfname; /* lost & found directory name */ int lfmode; /* lost & found directory creation mode */ ufs_daddr_t n_blks; /* number of blocks in use */ ufs_daddr_t n_files; /* number of files in use */ #define clearinode(dp) (*(dp) = zino) struct dinode zino; #define setbmap(blkno) setbit(blockmap, blkno) #define testbmap(blkno) isset(blockmap, blkno) #define clrbmap(blkno) clrbit(blockmap, blkno) #define STOP 0x01 #define SKIP 0x02 #define KEEPON 0x04 #define ALTERED 0x08 #define FOUND 0x10 #define EEXIT 8 /* Standard error exit. */ struct fstab; void adjust __P((struct inodesc *, int lcnt)); ufs_daddr_t allocblk __P((long frags)); ino_t allocdir __P((ino_t parent, ino_t request, int mode)); ino_t allocino __P((ino_t request, int type)); void blkerror __P((ino_t ino, char *type, ufs_daddr_t blk)); char *blockcheck __P((char *name)); int bread __P((int fd, char *buf, ufs_daddr_t blk, long size)); void bufinit __P((void)); void bwrite __P((int fd, char *buf, ufs_daddr_t blk, long size)); void cacheino __P((struct dinode *dp, ino_t inumber)); void catch __P((int)); void catchquit __P((int)); int changeino __P((ino_t dir, char *name, ino_t newnum)); int checkfstab __P((int preen, int maxrun, int (*docheck)(struct fstab *), int (*chkit)(char *, char *, long, int))); int chkrange __P((ufs_daddr_t blk, int cnt)); void ckfini __P((int markclean)); int ckinode __P((struct dinode *dp, struct inodesc *)); void clri __P((struct inodesc *, char *type, int flag)); void direrror __P((ino_t ino, char *errmesg)); int dirscan __P((struct inodesc *)); int dofix __P((struct inodesc *, char *msg)); void ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t)); void ffs_fragacct __P((struct fs *, int, int32_t [], int)); int ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t)); void ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t)); void fileerror __P((ino_t cwd, ino_t ino, char *errmesg)); int findino __P((struct inodesc *)); int findname __P((struct inodesc *)); void flush __P((int fd, struct bufarea *bp)); void freeblk __P((ufs_daddr_t blkno, long frags)); void freeino __P((ino_t ino)); void freeinodebuf __P((void)); int ftypeok __P((struct dinode *dp)); void getblk __P((struct bufarea *bp, ufs_daddr_t blk, long size)); struct bufarea *getdatablk __P((ufs_daddr_t blkno, long size)); struct inoinfo *getinoinfo __P((ino_t inumber)); struct dinode *getnextinode __P((ino_t inumber)); void getpathname __P((char *namebuf, ino_t curdir, ino_t ino)); struct dinode *ginode __P((ino_t inumber)); void inocleanup __P((void)); void inodirty __P((void)); int linkup __P((ino_t orphan, ino_t parentdir)); int makeentry __P((ino_t parent, ino_t ino, char *name)); void panic __P((const char *fmt, ...)); void pass1 __P((void)); void pass1b __P((void)); int pass1check __P((struct inodesc *)); void pass2 __P((void)); void pass3 __P((void)); void pass4 __P((void)); int pass4check __P((struct inodesc *)); void pass5 __P((void)); void pfatal __P((const char *fmt, ...)); void pinode __P((ino_t ino)); void propagate __P((void)); void pwarn __P((const char *fmt, ...)); int reply __P((char *question)); void resetinodebuf __P((void)); int setup __P((char *dev)); void voidquit __P((int)); Index: head/sbin/fsck/inode.c =================================================================== --- head/sbin/fsck/inode.c (revision 34265) +++ head/sbin/fsck/inode.c (revision 34266) @@ -1,621 +1,632 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)inode.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include "fsck.h" static ino_t startinum; static int iblock __P((struct inodesc *, long ilevel, quad_t isize)); int ckinode(dp, idesc) struct dinode *dp; register struct inodesc *idesc; { ufs_daddr_t *ap; long ret, n, ndb, offset; struct dinode dino; quad_t remsize, sizepb; mode_t mode; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_fix != IGNORE) idesc->id_fix = DONTKNOW; idesc->id_entryno = 0; idesc->id_filesize = dp->di_size; mode = dp->di_mode & IFMT; if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && (dp->di_size < sblock.fs_maxsymlinklen || dp->di_blocks == 0))) return (KEEPON); dino = *dp; ndb = howmany(dino.di_size, sblock.fs_bsize); for (ap = &dino.di_db[0]; ap < &dino.di_db[NDADDR]; ap++) { if (--ndb == 0 && (offset = blkoff(&sblock, dino.di_size)) != 0) idesc->id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc->id_numfrags = sblock.fs_frag; if (*ap == 0) { if (idesc->id_type == DATA && ndb >= 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); dp->di_size = (ap - &dino.di_db[0]) * sblock.fs_bsize; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); } } continue; } idesc->id_blkno = *ap; if (idesc->id_type == ADDR) ret = (*idesc->id_func)(idesc); else ret = dirscan(idesc); if (ret & STOP) return (ret); } idesc->id_numfrags = sblock.fs_frag; remsize = dino.di_size - sblock.fs_bsize * NDADDR; sizepb = sblock.fs_bsize; for (ap = &dino.di_ib[0], n = 1; n <= NIADDR; ap++, n++) { if (*ap) { idesc->id_blkno = *ap; ret = iblock(idesc, n, remsize); if (ret & STOP) return (ret); } else { if (idesc->id_type == DATA && remsize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); dp->di_size -= remsize; remsize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); break; } } } sizepb *= NINDIR(&sblock); remsize -= sizepb; } return (KEEPON); } static int iblock(idesc, ilevel, isize) struct inodesc *idesc; long ilevel; quad_t isize; { ufs_daddr_t *ap; ufs_daddr_t *aplim; struct bufarea *bp; int i, n, (*func)(), nif; quad_t sizepb; char buf[BUFSIZ]; char pathbuf[MAXPATHLEN + 1]; struct dinode *dp; if (idesc->id_type == ADDR) { func = idesc->id_func; if (((n = (*func)(idesc)) & KEEPON) == 0) return (n); } else func = dirscan; if (chkrange(idesc->id_blkno, idesc->id_numfrags)) return (SKIP); bp = getdatablk(idesc->id_blkno, sblock.fs_bsize); ilevel--; for (sizepb = sblock.fs_bsize, i = 0; i < ilevel; i++) sizepb *= NINDIR(&sblock); nif = howmany(isize , sizepb); if (nif > NINDIR(&sblock)) nif = NINDIR(&sblock); if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) { aplim = &bp->b_un.b_indir[NINDIR(&sblock)]; for (ap = &bp->b_un.b_indir[nif]; ap < aplim; ap++) { if (*ap == 0) continue; (void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu", idesc->id_number); if (dofix(idesc, buf)) { *ap = 0; dirty(bp); } } flush(fswritefd, bp); } aplim = &bp->b_un.b_indir[nif]; for (ap = bp->b_un.b_indir; ap < aplim; ap++) { if (*ap) { idesc->id_blkno = *ap; if (ilevel == 0) n = (*func)(idesc); else n = iblock(idesc, ilevel, isize); if (n & STOP) { bp->b_flags &= ~B_INUSE; return (n); } } else { if (idesc->id_type == DATA && isize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); dp->di_size -= isize; isize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); bp->b_flags &= ~B_INUSE; return(STOP); } } } isize -= sizepb; } bp->b_flags &= ~B_INUSE; return (KEEPON); } /* * Check that a block in a legal block number. * Return 0 if in range, 1 if out of range. */ int chkrange(blk, cnt) ufs_daddr_t blk; int cnt; { register int c; if (blk < 0 || blk >= maxfsblock || cnt < 0 || cnt > maxfsblock - blk) return (1); c = dtog(&sblock, blk); if (blk < cgdmin(&sblock, c)) { if ((blk + cnt) > cgsblock(&sblock, c)) { if (debug) { printf("blk %ld < cgdmin %ld;", blk, cgdmin(&sblock, c)); printf(" blk + cnt %ld > cgsbase %ld\n", blk + cnt, cgsblock(&sblock, c)); } return (1); } } else { if ((blk + cnt) > cgbase(&sblock, c+1)) { if (debug) { printf("blk %ld >= cgdmin %ld;", blk, cgdmin(&sblock, c)); printf(" blk + cnt %ld > sblock.fs_fpg %ld\n", blk+cnt, sblock.fs_fpg); } return (1); } } return (0); } /* * General purpose interface for reading inodes. */ struct dinode * ginode(inumber) ino_t inumber; { ufs_daddr_t iblk; if (inumber < ROOTINO || inumber > maxino) errx(EEXIT, "bad inode number %d to ginode", inumber); if (startinum == 0 || inumber < startinum || inumber >= startinum + INOPB(&sblock)) { iblk = ino_to_fsba(&sblock, inumber); if (pbp != 0) pbp->b_flags &= ~B_INUSE; pbp = getdatablk(iblk, sblock.fs_bsize); startinum = (inumber / INOPB(&sblock)) * INOPB(&sblock); } return (&pbp->b_un.b_dinode[inumber % INOPB(&sblock)]); } /* * Special purpose version of ginode used to optimize first pass * over all the inodes in numerical order. */ ino_t nextino, lastinum; long readcnt, readpercg, fullcnt, inobufsize, partialcnt, partialsize; struct dinode *inodebuf; struct dinode * getnextinode(inumber) ino_t inumber; { long size; ufs_daddr_t dblk; static struct dinode *dp; if (inumber != nextino++ || inumber > maxino) errx(EEXIT, "bad inode number %d to nextinode", inumber); if (inumber >= lastinum) { readcnt++; dblk = fsbtodb(&sblock, ino_to_fsba(&sblock, lastinum)); if (readcnt % readpercg == 0) { size = partialsize; lastinum += partialcnt; } else { size = inobufsize; lastinum += fullcnt; } (void)bread(fsreadfd, (char *)inodebuf, dblk, size); /* ??? */ dp = inodebuf; } return (dp++); } void resetinodebuf() { startinum = 0; nextino = 0; lastinum = 0; readcnt = 0; inobufsize = blkroundup(&sblock, INOBUFSIZE); fullcnt = inobufsize / sizeof(struct dinode); readpercg = sblock.fs_ipg / fullcnt; partialcnt = sblock.fs_ipg % fullcnt; partialsize = partialcnt * sizeof(struct dinode); if (partialcnt != 0) { readpercg++; } else { partialcnt = fullcnt; partialsize = inobufsize; } if (inodebuf == NULL && (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL) errx(EEXIT, "Cannot allocate space for inode buffer"); while (nextino < ROOTINO) (void)getnextinode(nextino); } void freeinodebuf() { if (inodebuf != NULL) free((char *)inodebuf); inodebuf = NULL; } /* * Routines to maintain information about directory inodes. * This is built during the first pass and used during the * second and third passes. * * Enter inodes into the cache. */ void cacheino(dp, inumber) register struct dinode *dp; ino_t inumber; { register struct inoinfo *inp; struct inoinfo **inpp; unsigned int blks; blks = howmany(dp->di_size, sblock.fs_bsize); if (blks > NDADDR) blks = NDADDR + NIADDR; inp = (struct inoinfo *) malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs_daddr_t)); if (inp == NULL) return; inpp = &inphead[inumber % numdirs]; inp->i_nexthash = *inpp; *inpp = inp; if (inumber == ROOTINO) inp->i_parent = ROOTINO; else inp->i_parent = (ino_t)0; inp->i_dotdot = (ino_t)0; inp->i_number = inumber; inp->i_isize = dp->di_size; inp->i_numblks = blks * sizeof(ufs_daddr_t); memmove(&inp->i_blks[0], &dp->di_db[0], (size_t)inp->i_numblks); if (inplast == listmax) { listmax += 100; inpsort = (struct inoinfo **)realloc((char *)inpsort, (unsigned)listmax * sizeof(struct inoinfo *)); if (inpsort == NULL) errx(EEXIT, "cannot increase directory list"); } inpsort[inplast++] = inp; } /* * Look up an inode cache structure. */ struct inoinfo * getinoinfo(inumber) ino_t inumber; { register struct inoinfo *inp; for (inp = inphead[inumber % numdirs]; inp; inp = inp->i_nexthash) { if (inp->i_number != inumber) continue; return (inp); } errx(EEXIT, "cannot find inode %d", inumber); return ((struct inoinfo *)0); } /* * Clean up all the inode cache structure. */ void inocleanup() { register struct inoinfo **inpp; if (inphead == NULL) return; for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) free((char *)(*inpp)); free((char *)inphead); free((char *)inpsort); inphead = inpsort = NULL; } void inodirty() { dirty(pbp); } void clri(idesc, type, flag) register struct inodesc *idesc; char *type; int flag; { register struct dinode *dp; dp = ginode(idesc->id_number); if (flag == 1) { pwarn("%s %s", type, (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE"); pinode(idesc->id_number); } if (preen || reply("CLEAR") == 1) { if (preen) printf(" (CLEARED)\n"); n_files--; (void)ckinode(dp, idesc); clearinode(dp); statemap[idesc->id_number] = USTATE; inodirty(); } } int findname(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent) return (KEEPON); memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1); return (STOP|FOUND); } int findino(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; if (dirp->d_ino == 0) return (KEEPON); if (strcmp(dirp->d_name, idesc->id_name) == 0 && dirp->d_ino >= ROOTINO && dirp->d_ino <= maxino) { idesc->id_parent = dirp->d_ino; return (STOP|FOUND); } return (KEEPON); } void pinode(ino) ino_t ino; { register struct dinode *dp; register char *p; struct passwd *pw; time_t t; printf(" I=%lu ", ino); if (ino < ROOTINO || ino > maxino) return; dp = ginode(ino); printf(" OWNER="); if ((pw = getpwuid((int)dp->di_uid)) != 0) printf("%s ", pw->pw_name); else printf("%u ", (unsigned)dp->di_uid); printf("MODE=%o\n", dp->di_mode); if (preen) printf("%s: ", cdevname); printf("SIZE=%qu ", dp->di_size); t = dp->di_mtime; p = ctime(&t); printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]); } void blkerror(ino, type, blk) ino_t ino; char *type; ufs_daddr_t blk; { pfatal("%ld %s I=%lu", blk, type, ino); printf("\n"); switch (statemap[ino]) { case FSTATE: statemap[ino] = FCLEAR; return; case DSTATE: statemap[ino] = DCLEAR; return; case FCLEAR: case DCLEAR: return; default: errx(EEXIT, "BAD STATE %d TO BLKERR", statemap[ino]); /* NOTREACHED */ } } /* * allocate an unused inode */ ino_t allocino(request, type) ino_t request; int type; { register ino_t ino; register struct dinode *dp; + struct cg *cgp = &cgrp; + int cg; if (request == 0) request = ROOTINO; else if (statemap[request] != USTATE) return (0); for (ino = request; ino < maxino; ino++) if (statemap[ino] == USTATE) break; if (ino == maxino) return (0); + cg = ino_to_cg(&sblock, ino); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + setbit(cg_inosused(cgp), ino % sblock.fs_ipg); + cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: statemap[ino] = DSTATE; + cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: statemap[ino] = FSTATE; break; default: return (0); } + cgdirty(); dp = ginode(ino); dp->di_db[0] = allocblk((long)1); if (dp->di_db[0] == 0) { statemap[ino] = USTATE; return (0); } + dp->di_flags = 0; dp->di_mode = type; dp->di_atime = time(NULL); dp->di_mtime = dp->di_ctime = dp->di_atime; dp->di_size = sblock.fs_fsize; dp->di_blocks = btodb(sblock.fs_fsize); n_files++; inodirty(); if (newinofmt) typemap[ino] = IFTODT(type); return (ino); } /* * deallocate an inode */ void freeino(ino) ino_t ino; { struct inodesc idesc; struct dinode *dp; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = ino; dp = ginode(ino); (void)ckinode(dp, &idesc); clearinode(dp); inodirty(); statemap[ino] = USTATE; n_files--; } Index: head/sbin/fsck/main.c =================================================================== --- head/sbin/fsck/main.c (revision 34265) +++ head/sbin/fsck/main.c (revision 34266) @@ -1,353 +1,359 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif static const char rcsid[] = - "$Id$"; + "$Id: main.c,v 1.12 1997/12/20 22:24:32 bde Exp $"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include "fsck.h" int returntosingle; static int argtoi __P((int flag, char *req, char *str, int base)); static int docheck __P((struct fstab *fsp)); static int checkfilesys __P((char *filesys, char *mntpt, long auxdata, int child)); int main __P((int argc, char *argv[])); int main(argc, argv) int argc; char *argv[]; { int ch; int ret, maxrun = 0; sync(); while ((ch = getopt(argc, argv, "dfpnNyYb:c:l:m:")) != -1) { switch (ch) { case 'p': preen++; break; case 'b': bflag = argtoi('b', "number", optarg, 10); printf("Alternate super block location: %d\n", bflag); break; case 'c': cvtlevel = argtoi('c', "conversion level", optarg, 10); break; case 'd': debug++; break; case 'f': fflag++; break; case 'l': maxrun = argtoi('l', "number", optarg, 10); break; case 'm': lfmode = argtoi('m', "mode", optarg, 8); if (lfmode &~ 07777) errx(EEXIT, "bad mode to -m: %o", lfmode); printf("** lost+found creation mode %o\n", lfmode); break; case 'n': case 'N': nflag++; yflag = 0; break; case 'y': case 'Y': yflag++; nflag = 0; break; default: errx(EEXIT, "%c option?", ch); } } argc -= optind; argv += optind; if (signal(SIGINT, SIG_IGN) != SIG_IGN) (void)signal(SIGINT, catch); if (preen) (void)signal(SIGQUIT, catchquit); if (argc) { while (argc-- > 0) (void)checkfilesys(blockcheck(*argv++), 0, 0L, 0); exit(0); } ret = checkfstab(preen, maxrun, docheck, checkfilesys); if (returntosingle) exit(2); exit(ret); } static int argtoi(flag, req, str, base) int flag; char *req, *str; int base; { char *cp; int ret; ret = (int)strtol(str, &cp, base); if (cp == str || *cp) errx(EEXIT, "-%c flag requires a %s", flag, req); return (ret); } /* * Determine whether a filesystem should be checked. */ static int docheck(fsp) register struct fstab *fsp; { if (strcmp(fsp->fs_vfstype, "ufs") || (strcmp(fsp->fs_type, FSTAB_RW) && strcmp(fsp->fs_type, FSTAB_RO)) || fsp->fs_passno == 0) return (0); return (1); } /* * Check the specified filesystem. */ /* ARGSUSED */ static int checkfilesys(filesys, mntpt, auxdata, child) char *filesys, *mntpt; long auxdata; int child; { ufs_daddr_t n_ffree, n_bfree; struct dups *dp; struct zlncnt *zlnp; int cylno, flags; if (preen && child) (void)signal(SIGQUIT, voidquit); cdevname = filesys; if (debug && preen) pwarn("starting\n"); switch (setup(filesys)) { case 0: if (preen) pfatal("CAN'T CHECK FILE SYSTEM."); return (0); case -1: pwarn("clean, %ld free ", sblock.fs_cstotal.cs_nffree + sblock.fs_frag * sblock.fs_cstotal.cs_nbfree); printf("(%d frags, %d blocks, %.1f%% fragmentation)\n", sblock.fs_cstotal.cs_nffree, sblock.fs_cstotal.cs_nbfree, sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize); return (0); } /* + * Cleared if any questions answered no. Used to decide if + * the superblock should be marked clean. + */ + resolved = 1; + /* * 1: scan inodes tallying blocks used */ if (preen == 0) { printf("** Last Mounted on %s\n", sblock.fs_fsmnt); if (hotroot) printf("** Root file system\n"); printf("** Phase 1 - Check Blocks and Sizes\n"); } pass1(); /* * 1b: locate first references to duplicates, if any */ if (duplist) { - if (preen) + if (preen || usedsoftdep) pfatal("INTERNAL ERROR: dups with -p"); printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); } /* * 2: traverse directories from root to mark all connected directories */ if (preen == 0) printf("** Phase 2 - Check Pathnames\n"); pass2(); /* * 3: scan inodes looking for disconnected directories */ if (preen == 0) printf("** Phase 3 - Check Connectivity\n"); pass3(); /* * 4: scan inodes looking for disconnected files; check reference counts */ if (preen == 0) printf("** Phase 4 - Check Reference Counts\n"); pass4(); /* * 5: check and repair resource counts in cylinder groups */ if (preen == 0) printf("** Phase 5 - Check Cyl groups\n"); pass5(); /* * print out summary statistics */ n_ffree = sblock.fs_cstotal.cs_nffree; n_bfree = sblock.fs_cstotal.cs_nbfree; pwarn("%ld files, %ld used, %ld free ", n_files, n_blks, n_ffree + sblock.fs_frag * n_bfree); printf("(%d frags, %d blocks, %.1f%% fragmentation)\n", n_ffree, n_bfree, n_ffree * 100.0 / sblock.fs_dsize); if (debug && (n_files -= maxino - ROOTINO - sblock.fs_cstotal.cs_nifree)) printf("%d files missing\n", n_files); if (debug) { n_blks += sblock.fs_ncg * (cgdmin(&sblock, 0) - cgsblock(&sblock, 0)); n_blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0); n_blks += howmany(sblock.fs_cssize, sblock.fs_fsize); if (n_blks -= maxfsblock - (n_ffree + sblock.fs_frag * n_bfree)) printf("%d blocks missing\n", n_blks); if (duplist != NULL) { printf("The following duplicate blocks remain:"); for (dp = duplist; dp; dp = dp->next) printf(" %d,", dp->dup); printf("\n"); } if (zlnhead != NULL) { printf("The following zero link count inodes remain:"); for (zlnp = zlnhead; zlnp; zlnp = zlnp->next) printf(" %u,", zlnp->zlncnt); printf("\n"); } } zlnhead = (struct zlncnt *)0; duplist = (struct dups *)0; muldup = (struct dups *)0; inocleanup(); if (fsmodified) { (void)time(&sblock.fs_time); sbdirty(); } if (cvtlevel && sblk.b_dirty) { /* * Write out the duplicate super blocks */ for (cylno = 0; cylno < sblock.fs_ncg; cylno++) bwrite(fswritefd, (char *)&sblock, fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE); } - if (!hotroot) { - ckfini(1); - } else { + if (rerun) + resolved = 0; + flags = 0; + if (hotroot) { struct statfs stfs_buf; /* * Check to see if root is mounted read-write. */ if (statfs("/", &stfs_buf) == 0) flags = stfs_buf.f_flags; - else - flags = 0; - ckfini(flags & MNT_RDONLY); + if ((flags & MNT_RDONLY) == 0) + resolved = 0; } + ckfini(resolved); free(blockmap); free(statemap); free((char *)lncntp); if (!fsmodified) return (0); if (!preen) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n"); if (rerun) printf("\n***** PLEASE RERUN FSCK *****\n"); if (hotroot) { struct ufs_args args; int ret; /* * We modified the root. Do a mount update on * it, unless it is read-write, so we can continue. */ if (flags & MNT_RDONLY) { args.fspec = 0; args.export.ex_flags = 0; args.export.ex_root = 0; flags |= MNT_UPDATE | MNT_RELOAD; ret = mount("ufs", "/", flags, &args); if (ret == 0) return (0); } if (!preen) printf("\n***** REBOOT NOW *****\n"); sync(); return (4); } return (0); } Index: head/sbin/fsck/pass1.c =================================================================== --- head/sbin/fsck/pass1.c (revision 34265) +++ head/sbin/fsck/pass1.c (revision 34266) @@ -1,322 +1,330 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)pass1.c 8.6 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include "fsck.h" static ufs_daddr_t badblk; static ufs_daddr_t dupblk; static void checkinode __P((ino_t inumber, struct inodesc *)); void pass1() { ino_t inumber; int c, i, cgd; struct inodesc idesc; /* * Set file system reserved blocks in used block map. */ for (c = 0; c < sblock.fs_ncg; c++) { cgd = cgdmin(&sblock, c); if (c == 0) { i = cgbase(&sblock, c); cgd += howmany(sblock.fs_cssize, sblock.fs_fsize); } else i = cgsblock(&sblock, c); for (; i < cgd; i++) setbmap(i); } /* * Find all allocated blocks. */ memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = pass1check; inumber = 0; n_files = n_blks = 0; resetinodebuf(); for (c = 0; c < sblock.fs_ncg; c++) { for (i = 0; i < sblock.fs_ipg; i++, inumber++) { if (inumber < ROOTINO) continue; checkinode(inumber, &idesc); } } freeinodebuf(); } static void checkinode(inumber, idesc) ino_t inumber; register struct inodesc *idesc; { register struct dinode *dp; struct zlncnt *zlnp; int ndb, j; mode_t mode; char *symbuf; dp = getnextinode(inumber); mode = dp->di_mode & IFMT; if (mode == 0) { if (memcmp(dp->di_db, zino.di_db, NDADDR * sizeof(ufs_daddr_t)) || memcmp(dp->di_ib, zino.di_ib, NIADDR * sizeof(ufs_daddr_t)) || dp->di_mode || dp->di_size) { pfatal("PARTIALLY ALLOCATED INODE I=%lu", inumber); if (reply("CLEAR") == 1) { dp = ginode(inumber); clearinode(dp); inodirty(); } } statemap[inumber] = USTATE; return; } lastino = inumber; if (/* dp->di_size < 0 || */ dp->di_size + sblock.fs_bsize - 1 < dp->di_size || (mode == IFDIR && dp->di_size > MAXDIRSIZE)) { if (debug) printf("bad size %qu:", dp->di_size); goto unknown; } if (!preen && mode == IFMT && reply("HOLD BAD BLOCK") == 1) { dp = ginode(inumber); dp->di_size = sblock.fs_fsize; dp->di_mode = IFREG|0600; inodirty(); } ndb = howmany(dp->di_size, sblock.fs_bsize); if (ndb < 0) { if (debug) printf("bad size %qu ndb %d:", dp->di_size, ndb); goto unknown; } if (mode == IFBLK || mode == IFCHR) ndb++; if (mode == IFLNK) { if (doinglevel2 && dp->di_size > 0 && dp->di_size < MAXSYMLINKLEN && dp->di_blocks != 0) { symbuf = alloca(secsize); if (bread(fsreadfd, symbuf, fsbtodb(&sblock, dp->di_db[0]), (long)secsize) != 0) errx(EEXIT, "cannot read symlink"); if (debug) { symbuf[dp->di_size] = 0; printf("convert symlink %ld(%s) of size %ld\n", inumber, symbuf, (long)dp->di_size); } dp = ginode(inumber); memmove(dp->di_shortlink, symbuf, (long)dp->di_size); dp->di_blocks = 0; inodirty(); } /* * Fake ndb value so direct/indirect block checks below * will detect any garbage after symlink string. */ if (dp->di_size < sblock.fs_maxsymlinklen || dp->di_blocks == 0) { ndb = howmany(dp->di_size, sizeof(ufs_daddr_t)); if (ndb > NDADDR) { j = ndb - NDADDR; for (ndb = 1; j > 1; j--) ndb *= NINDIR(&sblock); ndb += NDADDR; } } } for (j = ndb; j < NDADDR; j++) if (dp->di_db[j] != 0) { if (debug) printf("bad direct addr: %ld\n", dp->di_db[j]); goto unknown; } for (j = 0, ndb -= NDADDR; ndb > 0; j++) ndb /= NINDIR(&sblock); for (; j < NIADDR; j++) if (dp->di_ib[j] != 0) { if (debug) printf("bad indirect addr: %ld\n", dp->di_ib[j]); goto unknown; } if (ftypeok(dp) == 0) goto unknown; n_files++; lncntp[inumber] = dp->di_nlink; if (dp->di_nlink <= 0) { zlnp = (struct zlncnt *)malloc(sizeof *zlnp); if (zlnp == NULL) { pfatal("LINK COUNT TABLE OVERFLOW"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } } else { zlnp->zlncnt = inumber; zlnp->next = zlnhead; zlnhead = zlnp; } } if (mode == IFDIR) { if (dp->di_size == 0) statemap[inumber] = DCLEAR; else statemap[inumber] = DSTATE; cacheino(dp, inumber); } else statemap[inumber] = FSTATE; typemap[inumber] = IFTODT(mode); if (doinglevel2 && (dp->di_ouid != (u_short)-1 || dp->di_ogid != (u_short)-1)) { dp = ginode(inumber); dp->di_uid = dp->di_ouid; dp->di_ouid = -1; dp->di_gid = dp->di_ogid; dp->di_ogid = -1; inodirty(); } badblk = dupblk = 0; idesc->id_number = inumber; (void)ckinode(dp, idesc); idesc->id_entryno *= btodb(sblock.fs_fsize); if (dp->di_blocks != idesc->id_entryno) { pwarn("INCORRECT BLOCK COUNT I=%lu (%ld should be %ld)", inumber, dp->di_blocks, idesc->id_entryno); if (preen) printf(" (CORRECTED)\n"); else if (reply("CORRECT") == 0) return; dp = ginode(inumber); dp->di_blocks = idesc->id_entryno; inodirty(); } return; unknown: pfatal("UNKNOWN FILE TYPE I=%lu", inumber); statemap[inumber] = FCLEAR; if (reply("CLEAR") == 1) { statemap[inumber] = USTATE; dp = ginode(inumber); clearinode(dp); inodirty(); } } int pass1check(idesc) register struct inodesc *idesc; { int res = KEEPON; int anyout, nfrags; ufs_daddr_t blkno = idesc->id_blkno; register struct dups *dlp; struct dups *new; if ((anyout = chkrange(blkno, idesc->id_numfrags)) != 0) { blkerror(idesc->id_number, "BAD", blkno); if (badblk++ >= MAXBAD) { pwarn("EXCESSIVE BAD BLKS I=%lu", idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } } for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) { if (anyout && chkrange(blkno, 1)) { res = SKIP; } else if (!testbmap(blkno)) { n_blks++; setbmap(blkno); } else { blkerror(idesc->id_number, "DUP", blkno); if (dupblk++ >= MAXDUP) { pwarn("EXCESSIVE DUP BLKS I=%lu", idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new = (struct dups *)malloc(sizeof(struct dups)); if (new == NULL) { pfatal("DUP TABLE OVERFLOW."); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new->dup = blkno; if (muldup == 0) { duplist = muldup = new; new->next = 0; } else { new->next = muldup->next; muldup->next = new; } for (dlp = duplist; dlp != muldup; dlp = dlp->next) if (dlp->dup == blkno) break; if (dlp == muldup && dlp->dup != blkno) muldup = new; } /* * count the number of blocks found in id_entryno */ idesc->id_entryno++; } return (res); } Index: head/sbin/fsck/pass2.c =================================================================== --- head/sbin/fsck/pass2.c (revision 34265) +++ head/sbin/fsck/pass2.c (revision 34266) @@ -1,467 +1,482 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)pass2.c 8.9 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include "fsck.h" #define MINDIRSIZE (sizeof (struct dirtemplate)) static int blksort __P((const void *, const void *)); static int pass2check __P((struct inodesc *)); void pass2() { register struct dinode *dp; register struct inoinfo **inpp, *inp; struct inoinfo **inpend; struct inodesc curino; struct dinode dino; char pathbuf[MAXPATHLEN + 1]; switch (statemap[ROOTINO]) { case USTATE: pfatal("ROOT INODE UNALLOCATED"); - if (reply("ALLOCATE") == 0) + if (reply("ALLOCATE") == 0) { + ckfini(0); exit(EEXIT); + } if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; case DCLEAR: pfatal("DUPS/BAD IN ROOT INODE"); if (reply("REALLOCATE")) { freeino(ROOTINO); if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } break; case FSTATE: case FCLEAR: pfatal("ROOT INODE NOT DIRECTORY"); if (reply("REALLOCATE")) { freeino(ROOTINO); if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("FIX") == 0) + if (reply("FIX") == 0) { + ckfini(0); exit(EEXIT); + } dp = ginode(ROOTINO); dp->di_mode &= ~IFMT; dp->di_mode |= IFDIR; inodirty(); break; case DSTATE: break; default: errx(EEXIT, "BAD STATE %d FOR ROOT INODE", statemap[ROOTINO]); } statemap[ROOTINO] = DFOUND; if (newinofmt) { statemap[WINO] = FSTATE; typemap[WINO] = DT_WHT; } /* * Sort the directory list into disk block order. */ qsort((char *)inpsort, (size_t)inplast, sizeof *inpsort, blksort); /* * Check the integrity of each directory. */ memset(&curino, 0, sizeof(struct inodesc)); curino.id_type = DATA; curino.id_func = pass2check; dp = &dino; inpend = &inpsort[inplast]; for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_isize == 0) continue; if (inp->i_isize < MINDIRSIZE) { direrror(inp->i_number, "DIRECTORY TOO SHORT"); inp->i_isize = roundup(MINDIRSIZE, DIRBLKSIZ); if (reply("FIX") == 1) { dp = ginode(inp->i_number); dp->di_size = inp->i_isize; inodirty(); dp = &dino; } } else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) { getpathname(pathbuf, inp->i_number, inp->i_number); - pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d", - pathbuf, inp->i_isize, DIRBLKSIZ); + if (usedsoftdep) + pfatal("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); + else + pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); if (preen) printf(" (ADJUSTED)\n"); inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ); if (preen || reply("ADJUST") == 1) { dp = ginode(inp->i_number); dp->di_size = roundup(inp->i_isize, DIRBLKSIZ); inodirty(); dp = &dino; } } memset(&dino, 0, sizeof(struct dinode)); dino.di_mode = IFDIR; dp->di_size = inp->i_isize; memmove(&dp->di_db[0], &inp->i_blks[0], (size_t)inp->i_numblks); curino.id_number = inp->i_number; curino.id_parent = inp->i_parent; (void)ckinode(dp, &curino); } /* * Now that the parents of all directories have been found, * make another pass to verify the value of `..' */ for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_parent == 0 || inp->i_isize == 0) continue; if (statemap[inp->i_parent] == DFOUND && statemap[inp->i_number] == DSTATE) statemap[inp->i_number] = DFOUND; if (inp->i_dotdot == inp->i_parent || inp->i_dotdot == (ino_t)-1) continue; if (inp->i_dotdot == 0) { inp->i_dotdot = inp->i_parent; fileerror(inp->i_parent, inp->i_number, "MISSING '..'"); if (reply("FIX") == 0) continue; (void)makeentry(inp->i_number, inp->i_parent, ".."); lncntp[inp->i_parent]--; continue; } fileerror(inp->i_parent, inp->i_number, "BAD INODE NUMBER FOR '..'"); if (reply("FIX") == 0) continue; lncntp[inp->i_dotdot]++; lncntp[inp->i_parent]--; inp->i_dotdot = inp->i_parent; (void)changeino(inp->i_number, "..", inp->i_parent); } /* * Mark all the directories that can be found from the root. */ propagate(); } static int pass2check(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; register struct inoinfo *inp; int n, entrysize, ret = 0; struct dinode *dp; char *errmsg; struct direct proto; char namebuf[MAXPATHLEN + 1]; char pathbuf[MAXPATHLEN + 1]; /* * If converting, set directory entry type. */ if (doinglevel2 && dirp->d_ino > 0 && dirp->d_ino < maxino) { dirp->d_type = typemap[dirp->d_ino]; ret |= ALTERED; } /* * check for "." */ if (idesc->id_entryno != 0) goto chk1; if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") == 0) { if (dirp->d_ino != idesc->id_number) { direrror(idesc->id_number, "BAD INODE NUMBER FOR '.'"); dirp->d_ino = idesc->id_number; if (reply("FIX") == 1) ret |= ALTERED; } if (newinofmt && dirp->d_type != DT_DIR) { direrror(idesc->id_number, "BAD TYPE VALUE FOR '.'"); dirp->d_type = DT_DIR; if (reply("FIX") == 1) ret |= ALTERED; } goto chk1; } direrror(idesc->id_number, "MISSING '.'"); proto.d_ino = idesc->id_number; if (newinofmt) proto.d_type = DT_DIR; else proto.d_type = 0; proto.d_namlen = 1; (void)strcpy(proto.d_name, "."); # if BYTE_ORDER == LITTLE_ENDIAN if (!newinofmt) { u_char tmp; tmp = proto.d_type; proto.d_type = proto.d_namlen; proto.d_namlen = tmp; } # endif entrysize = DIRSIZ(0, &proto); if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") != 0) { pfatal("CANNOT FIX, FIRST ENTRY IN DIRECTORY CONTAINS %s\n", dirp->d_name); } else if (dirp->d_reclen < entrysize) { pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '.'\n"); } else if (dirp->d_reclen < 2 * entrysize) { proto.d_reclen = dirp->d_reclen; memmove(dirp, &proto, (size_t)entrysize); if (reply("FIX") == 1) ret |= ALTERED; } else { n = dirp->d_reclen - entrysize; proto.d_reclen = entrysize; memmove(dirp, &proto, (size_t)entrysize); idesc->id_entryno++; lncntp[dirp->d_ino]--; dirp = (struct direct *)((char *)(dirp) + entrysize); memset(dirp, 0, (size_t)n); dirp->d_reclen = n; if (reply("FIX") == 1) ret |= ALTERED; } chk1: if (idesc->id_entryno > 1) goto chk2; inp = getinoinfo(idesc->id_number); proto.d_ino = inp->i_parent; if (newinofmt) proto.d_type = DT_DIR; else proto.d_type = 0; proto.d_namlen = 2; (void)strcpy(proto.d_name, ".."); # if BYTE_ORDER == LITTLE_ENDIAN if (!newinofmt) { u_char tmp; tmp = proto.d_type; proto.d_type = proto.d_namlen; proto.d_namlen = tmp; } # endif entrysize = DIRSIZ(0, &proto); if (idesc->id_entryno == 0) { n = DIRSIZ(0, dirp); if (dirp->d_reclen < n + entrysize) goto chk2; proto.d_reclen = dirp->d_reclen - n; dirp->d_reclen = n; idesc->id_entryno++; lncntp[dirp->d_ino]--; dirp = (struct direct *)((char *)(dirp) + n); memset(dirp, 0, (size_t)proto.d_reclen); dirp->d_reclen = proto.d_reclen; } if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") == 0) { inp->i_dotdot = dirp->d_ino; if (newinofmt && dirp->d_type != DT_DIR) { direrror(idesc->id_number, "BAD TYPE VALUE FOR '..'"); dirp->d_type = DT_DIR; if (reply("FIX") == 1) ret |= ALTERED; } goto chk2; } if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") != 0) { fileerror(inp->i_parent, idesc->id_number, "MISSING '..'"); pfatal("CANNOT FIX, SECOND ENTRY IN DIRECTORY CONTAINS %s\n", dirp->d_name); inp->i_dotdot = (ino_t)-1; } else if (dirp->d_reclen < entrysize) { fileerror(inp->i_parent, idesc->id_number, "MISSING '..'"); pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '..'\n"); inp->i_dotdot = (ino_t)-1; } else if (inp->i_parent != 0) { /* * We know the parent, so fix now. */ inp->i_dotdot = inp->i_parent; fileerror(inp->i_parent, idesc->id_number, "MISSING '..'"); proto.d_reclen = dirp->d_reclen; memmove(dirp, &proto, (size_t)entrysize); if (reply("FIX") == 1) ret |= ALTERED; } idesc->id_entryno++; if (dirp->d_ino != 0) lncntp[dirp->d_ino]--; return (ret|KEEPON); chk2: if (dirp->d_ino == 0) return (ret|KEEPON); if (dirp->d_namlen <= 2 && dirp->d_name[0] == '.' && idesc->id_entryno >= 2) { if (dirp->d_namlen == 1) { direrror(idesc->id_number, "EXTRA '.' ENTRY"); dirp->d_ino = 0; if (reply("FIX") == 1) ret |= ALTERED; return (KEEPON | ret); } if (dirp->d_name[1] == '.') { direrror(idesc->id_number, "EXTRA '..' ENTRY"); dirp->d_ino = 0; if (reply("FIX") == 1) ret |= ALTERED; return (KEEPON | ret); } } idesc->id_entryno++; n = 0; if (dirp->d_ino > maxino) { fileerror(idesc->id_number, dirp->d_ino, "I OUT OF RANGE"); n = reply("REMOVE"); } else if (newinofmt && ((dirp->d_ino == WINO && dirp->d_type != DT_WHT) || (dirp->d_ino != WINO && dirp->d_type == DT_WHT))) { fileerror(idesc->id_number, dirp->d_ino, "BAD WHITEOUT ENTRY"); dirp->d_ino = WINO; dirp->d_type = DT_WHT; if (reply("FIX") == 1) ret |= ALTERED; } else { again: switch (statemap[dirp->d_ino]) { case USTATE: if (idesc->id_entryno <= 2) break; fileerror(idesc->id_number, dirp->d_ino, "UNALLOCATED"); n = reply("REMOVE"); break; case DCLEAR: case FCLEAR: if (idesc->id_entryno <= 2) break; if (statemap[dirp->d_ino] == FCLEAR) errmsg = "DUP/BAD"; - else if (!preen) + else if (!preen && !usedsoftdep) errmsg = "ZERO LENGTH DIRECTORY"; else { n = 1; break; } fileerror(idesc->id_number, dirp->d_ino, errmsg); if ((n = reply("REMOVE")) == 1) break; dp = ginode(dirp->d_ino); statemap[dirp->d_ino] = (dp->di_mode & IFMT) == IFDIR ? DSTATE : FSTATE; lncntp[dirp->d_ino] = dp->di_nlink; goto again; case DSTATE: if (statemap[idesc->id_number] == DFOUND) statemap[dirp->d_ino] = DFOUND; /* fall through */ case DFOUND: inp = getinoinfo(dirp->d_ino); if (inp->i_parent != 0 && idesc->id_entryno > 2) { getpathname(pathbuf, idesc->id_number, idesc->id_number); getpathname(namebuf, dirp->d_ino, dirp->d_ino); pwarn("%s %s %s\n", pathbuf, "IS AN EXTRANEOUS HARD LINK TO DIRECTORY", namebuf); - if (preen) - printf(" (IGNORED)\n"); + if (preen) { + printf(" (REMOVED)\n"); + n = 1; + break; + } else if ((n = reply("REMOVE")) == 1) break; } if (idesc->id_entryno > 2) inp->i_parent = idesc->id_number; /* fall through */ case FSTATE: if (newinofmt && dirp->d_type != typemap[dirp->d_ino]) { fileerror(idesc->id_number, dirp->d_ino, "BAD TYPE VALUE"); dirp->d_type = typemap[dirp->d_ino]; if (reply("FIX") == 1) ret |= ALTERED; } lncntp[dirp->d_ino]--; break; default: errx(EEXIT, "BAD STATE %d FOR INODE I=%d", statemap[dirp->d_ino], dirp->d_ino); } } if (n == 0) return (ret|KEEPON); dirp->d_ino = 0; return (ret|KEEPON|ALTERED); } /* * Routine to sort disk blocks. */ static int blksort(arg1, arg2) const void *arg1, *arg2; { return ((*(struct inoinfo **)arg1)->i_blks[0] - (*(struct inoinfo **)arg2)->i_blks[0]); } Index: head/sbin/fsck/pass5.c =================================================================== --- head/sbin/fsck/pass5.c (revision 34265) +++ head/sbin/fsck/pass5.c (revision 34266) @@ -1,345 +1,375 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)pass5.c 8.9 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include "fsck.h" void pass5() { int c, blk, frags, basesize, sumsize, mapsize, savednrpos; + int inomapsize, blkmapsize; struct fs *fs = &sblock; struct cg *cg = &cgrp; ufs_daddr_t dbase, dmax; ufs_daddr_t d; - long i, j; + long i, j, k; struct csum *cs; struct csum cstotal; struct inodesc idesc[3]; char buf[MAXBSIZE]; register struct cg *newcg = (struct cg *)buf; struct ocg *ocg = (struct ocg *)buf; statemap[WINO] = USTATE; memset(newcg, 0, (size_t)fs->fs_cgsize); newcg->cg_niblk = fs->fs_ipg; if (cvtlevel >= 3) { if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) { if (preen) pwarn("DELETING CLUSTERING MAPS\n"); if (preen || reply("DELETE CLUSTERING MAPS")) { fs->fs_contigsumsize = 0; doinglevel1 = 1; sbdirty(); } } if (fs->fs_maxcontig > 1) { char *doit = 0; if (fs->fs_contigsumsize < 1) { doit = "CREAT"; } else if (fs->fs_contigsumsize < fs->fs_maxcontig && fs->fs_contigsumsize < FS_MAXCONTIG) { doit = "EXPAND"; } if (doit) { i = fs->fs_contigsumsize; fs->fs_contigsumsize = MIN(fs->fs_maxcontig, FS_MAXCONTIG); if (CGSIZE(fs) > fs->fs_bsize) { pwarn("CANNOT %s CLUSTER MAPS\n", doit); fs->fs_contigsumsize = i; } else if (preen || reply("CREATE CLUSTER MAPS")) { if (preen) pwarn("%sING CLUSTER MAPS\n", doit); fs->fs_cgsize = fragroundup(fs, CGSIZE(fs)); doinglevel1 = 1; sbdirty(); } } } } switch ((int)fs->fs_postblformat) { case FS_42POSTBLFMT: basesize = (char *)(&ocg->cg_btot[0]) - (char *)(&ocg->cg_firstfield); sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]); mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] - (u_char *)&ocg->cg_iused[0]; + blkmapsize = howmany(fs->fs_fpg, NBBY); + inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0]; ocg->cg_magic = CG_MAGIC; savednrpos = fs->fs_nrpos; fs->fs_nrpos = 8; break; case FS_DYNAMICPOSTBLFMT: newcg->cg_btotoff = &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield); newcg->cg_boff = newcg->cg_btotoff + fs->fs_cpg * sizeof(long); newcg->cg_iusedoff = newcg->cg_boff + fs->fs_cpg * fs->fs_nrpos * sizeof(short); newcg->cg_freeoff = newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY); - if (fs->fs_contigsumsize <= 0) { - newcg->cg_nextfreeoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); - } else { - newcg->cg_clustersumoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) - + inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff; + newcg->cg_nextfreeoff = newcg->cg_freeoff + + howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); + blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff; + if (fs->fs_contigsumsize > 0) { + newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(long); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(long)); newcg->cg_clusteroff = newcg->cg_clustersumoff + (fs->fs_contigsumsize + 1) * sizeof(long); newcg->cg_nextfreeoff = newcg->cg_clusteroff + howmany(fs->fs_cpg * fs->fs_spc / NSPB(fs), NBBY); } newcg->cg_magic = CG_MAGIC; basesize = &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield); sumsize = newcg->cg_iusedoff - newcg->cg_btotoff; mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff; break; default: - sumsize = 0; /* keep lint happy */ + inomapsize = blkmapsize = sumsize = 0; /* keep lint happy */ errx(EEXIT, "UNKNOWN ROTATIONAL TABLE FORMAT %d", fs->fs_postblformat); } memset(&idesc[0], 0, sizeof idesc); for (i = 0; i < 3; i++) { idesc[i].id_type = ADDR; if (doinglevel2) idesc[i].id_fix = FIX; } memset(&cstotal, 0, sizeof(struct csum)); j = blknum(fs, fs->fs_size + fs->fs_frag - 1); for (i = fs->fs_size; i < j; i++) setbmap(i); for (c = 0; c < fs->fs_ncg; c++) { getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize); if (!cg_chkmagic(cg)) pfatal("CG %d: BAD MAGIC NUMBER\n", c); dbase = cgbase(fs, c); dmax = dbase + fs->fs_fpg; if (dmax > fs->fs_size) dmax = fs->fs_size; newcg->cg_time = cg->cg_time; newcg->cg_cgx = c; if (c == fs->fs_ncg - 1) newcg->cg_ncyl = fs->fs_ncyl % fs->fs_cpg; else newcg->cg_ncyl = fs->fs_cpg; newcg->cg_ndblk = dmax - dbase; if (fs->fs_contigsumsize > 0) newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag; newcg->cg_cs.cs_ndir = 0; newcg->cg_cs.cs_nffree = 0; newcg->cg_cs.cs_nbfree = 0; newcg->cg_cs.cs_nifree = fs->fs_ipg; if (cg->cg_rotor < newcg->cg_ndblk) newcg->cg_rotor = cg->cg_rotor; else newcg->cg_rotor = 0; if (cg->cg_frotor < newcg->cg_ndblk) newcg->cg_frotor = cg->cg_frotor; else newcg->cg_frotor = 0; if (cg->cg_irotor < newcg->cg_niblk) newcg->cg_irotor = cg->cg_irotor; else newcg->cg_irotor = 0; memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum); memset(&cg_blktot(newcg)[0], 0, (size_t)(sumsize + mapsize)); if (fs->fs_postblformat == FS_42POSTBLFMT) ocg->cg_magic = CG_MAGIC; j = fs->fs_ipg * c; for (i = 0; i < fs->fs_ipg; j++, i++) { switch (statemap[j]) { case USTATE: break; case DSTATE: case DCLEAR: case DFOUND: newcg->cg_cs.cs_ndir++; /* fall through */ case FSTATE: case FCLEAR: newcg->cg_cs.cs_nifree--; setbit(cg_inosused(newcg), i); break; default: if (j < ROOTINO) break; errx(EEXIT, "BAD STATE %d FOR INODE I=%d", statemap[j], j); } } if (c == 0) for (i = 0; i < ROOTINO; i++) { setbit(cg_inosused(newcg), i); newcg->cg_cs.cs_nifree--; } for (i = 0, d = dbase; d < dmax; d += fs->fs_frag, i += fs->fs_frag) { frags = 0; for (j = 0; j < fs->fs_frag; j++) { if (testbmap(d + j)) continue; setbit(cg_blksfree(newcg), i + j); frags++; } if (frags == fs->fs_frag) { newcg->cg_cs.cs_nbfree++; j = cbtocylno(fs, i); cg_blktot(newcg)[j]++; cg_blks(fs, newcg, j)[cbtorpos(fs, i)]++; if (fs->fs_contigsumsize > 0) setbit(cg_clustersfree(newcg), i / fs->fs_frag); } else if (frags > 0) { newcg->cg_cs.cs_nffree += frags; blk = blkmap(fs, cg_blksfree(newcg), i); ffs_fragacct(fs, blk, newcg->cg_frsum, 1); } } if (fs->fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(newcg); u_char *mapp = cg_clustersfree(newcg); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < newcg->cg_nclusterblks; i++) { if ((map & bit) != 0) { run++; } else if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; run = 0; } if ((i & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; } } cstotal.cs_nffree += newcg->cg_cs.cs_nffree; cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree; cstotal.cs_nifree += newcg->cg_cs.cs_nifree; cstotal.cs_ndir += newcg->cg_cs.cs_ndir; cs = &fs->fs_cs(fs, c); if (memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0 && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(cs, &newcg->cg_cs, sizeof *cs); sbdirty(); } if (doinglevel1) { memmove(cg, newcg, (size_t)fs->fs_cgsize); cgdirty(); continue; } - if (memcmp(cg_inosused(newcg), - cg_inosused(cg), mapsize) != 0 && - dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { - memmove(cg_inosused(cg), cg_inosused(newcg), - (size_t)mapsize); - cgdirty(); - } if ((memcmp(newcg, cg, basesize) != 0 || memcmp(&cg_blktot(newcg)[0], &cg_blktot(cg)[0], sumsize) != 0) && dofix(&idesc[2], "SUMMARY INFORMATION BAD")) { memmove(cg, newcg, (size_t)basesize); memmove(&cg_blktot(cg)[0], &cg_blktot(newcg)[0], (size_t)sumsize); + cgdirty(); + } + if (usedsoftdep) { + for (i = 0; i < inomapsize; i++) { + j = cg_inosused(newcg)[i]; + if ((cg_inosused(cg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED INODE %d MARKED FREE", + c * fs->fs_ipg + i * 8 + k); + } + } + for (i = 0; i < blkmapsize; i++) { + j = cg_blksfree(cg)[i]; + if ((cg_blksfree(newcg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED FRAG %d MARKED FREE", + c * fs->fs_fpg + i * 8 + k); + } + } + } + if (memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 && + dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { + memmove(cg_inosused(cg), cg_inosused(newcg), + (size_t)mapsize); cgdirty(); } } if (fs->fs_postblformat == FS_42POSTBLFMT) fs->fs_nrpos = savednrpos; if (memcmp(&cstotal, &fs->fs_cstotal, sizeof *cs) != 0 && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(&fs->fs_cstotal, &cstotal, sizeof *cs); fs->fs_ronly = 0; sbdirty(); } if (fs->fs_fmod != 0) { pwarn("MODIFIED FLAG SET IN SUPERBLOCK"); if (preen) printf(" (FIXED)\n"); if (preen || reply("FIX") == 1) { fs->fs_fmod = 0; sbdirty(); } } if (fs->fs_clean == 0) { pwarn("CLEAN FLAG NOT SET IN SUPERBLOCK"); if (preen) printf(" (FIXED)\n"); if (preen || reply("FIX") == 1) { fs->fs_clean = 1; sbdirty(); } } } Index: head/sbin/fsck/setup.c =================================================================== --- head/sbin/fsck/setup.c (revision 34265) +++ head/sbin/fsck/setup.c (revision 34266) @@ -1,514 +1,520 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)setup.c 8.10 (Berkeley) 5/9/95"; #endif /* not lint */ #define DKTYPENAMES #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" struct bufarea asblk; #define altsblock (*asblk.b_un.b_fs) #define POWEROF2(num) (((num) & ((num) - 1)) == 0) static void badsb __P((int listerr, char *s)); static int calcsb __P((char *dev, int devfd, struct fs *fs)); static struct disklabel *getdisklabel __P((char *s, int fd)); static int readsb __P((int listerr)); /* * Read in a superblock finding an alternate if necessary. * Return 1 if successful, 0 if unsuccessful, -1 if filesystem * is already clean (preen mode only). */ int setup(dev) char *dev; { long cg, size, asked, i, j; long skipclean, bmapsize; struct disklabel *lp; off_t sizepb; struct stat statb; struct fs proto; havesb = 0; fswritefd = -1; skipclean = preen; if (stat(dev, &statb) < 0) { printf("Can't stat %s: %s\n", dev, strerror(errno)); return (0); } if ((statb.st_mode & S_IFMT) != S_IFCHR) { pfatal("%s is not a character device", dev); if (reply("CONTINUE") == 0) return (0); } if ((fsreadfd = open(dev, O_RDONLY)) < 0) { printf("Can't open %s: %s\n", dev, strerror(errno)); return (0); } if (preen == 0) printf("** %s", dev); if (nflag || (fswritefd = open(dev, O_WRONLY)) < 0) { fswritefd = -1; if (preen) pfatal("NO WRITE ACCESS"); printf(" (NO WRITE)"); } if (preen == 0) printf("\n"); fsmodified = 0; lfdir = 0; initbarea(&sblk); initbarea(&asblk); sblk.b_un.b_buf = malloc(SBSIZE); asblk.b_un.b_buf = malloc(SBSIZE); if (sblk.b_un.b_buf == NULL || asblk.b_un.b_buf == NULL) errx(EEXIT, "cannot allocate space for superblock"); lp = getdisklabel((char *)NULL, fsreadfd); if (lp) dev_bsize = secsize = lp->d_secsize; else dev_bsize = secsize = DEV_BSIZE; /* * Read in the superblock, looking for alternates if necessary */ if (readsb(1) == 0) { skipclean = 0; if (bflag || preen || calcsb(dev, fsreadfd, &proto) == 0) return(0); if (reply("LOOK FOR ALTERNATE SUPERBLOCKS") == 0) return (0); for (cg = 0; cg < proto.fs_ncg; cg++) { bflag = fsbtodb(&proto, cgsblock(&proto, cg)); if (readsb(0) != 0) break; } if (cg >= proto.fs_ncg) { printf("%s %s\n%s %s\n%s %s\n", "SEARCH FOR ALTERNATE SUPER-BLOCK", "FAILED. YOU MUST USE THE", "-b OPTION TO FSCK TO SPECIFY THE", "LOCATION OF AN ALTERNATE", "SUPER-BLOCK TO SUPPLY NEEDED", "INFORMATION; SEE fsck(8)."); bflag = 0; return(0); } pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag); bflag = 0; } maxfsblock = sblock.fs_size; maxino = sblock.fs_ncg * sblock.fs_ipg; /* * Check and potentially fix certain fields in the super block. */ if (sblock.fs_optim != FS_OPTTIME && sblock.fs_optim != FS_OPTSPACE) { pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK"); if (reply("SET TO DEFAULT") == 1) { sblock.fs_optim = FS_OPTTIME; sbdirty(); } } if ((sblock.fs_minfree < 0 || sblock.fs_minfree > 99)) { pfatal("IMPOSSIBLE MINFREE=%d IN SUPERBLOCK", sblock.fs_minfree); if (reply("SET TO DEFAULT") == 1) { sblock.fs_minfree = 10; sbdirty(); } } if (sblock.fs_interleave < 1 || sblock.fs_interleave > sblock.fs_nsect) { pwarn("IMPOSSIBLE INTERLEAVE=%d IN SUPERBLOCK", sblock.fs_interleave); sblock.fs_interleave = 1; if (preen) printf(" (FIXED)\n"); if (preen || reply("SET TO DEFAULT") == 1) { sbdirty(); dirty(&asblk); } } if (sblock.fs_npsect < sblock.fs_nsect || sblock.fs_npsect > sblock.fs_nsect*2) { pwarn("IMPOSSIBLE NPSECT=%d IN SUPERBLOCK", sblock.fs_npsect); sblock.fs_npsect = sblock.fs_nsect; if (preen) printf(" (FIXED)\n"); if (preen || reply("SET TO DEFAULT") == 1) { sbdirty(); dirty(&asblk); } } if (sblock.fs_inodefmt >= FS_44INODEFMT) { newinofmt = 1; } else { sblock.fs_qbmask = ~sblock.fs_bmask; sblock.fs_qfmask = ~sblock.fs_fmask; newinofmt = 0; } /* * Convert to new inode format. */ if (cvtlevel >= 2 && sblock.fs_inodefmt < FS_44INODEFMT) { if (preen) pwarn("CONVERTING TO NEW INODE FORMAT\n"); else if (!reply("CONVERT TO NEW INODE FORMAT")) return(0); doinglevel2++; sblock.fs_inodefmt = FS_44INODEFMT; sizepb = sblock.fs_bsize; sblock.fs_maxfilesize = sblock.fs_bsize * NDADDR - 1; for (i = 0; i < NIADDR; i++) { sizepb *= NINDIR(&sblock); sblock.fs_maxfilesize += sizepb; } sblock.fs_maxsymlinklen = MAXSYMLINKLEN; sblock.fs_qbmask = ~sblock.fs_bmask; sblock.fs_qfmask = ~sblock.fs_fmask; sbdirty(); dirty(&asblk); } /* * Convert to new cylinder group format. */ if (cvtlevel >= 1 && sblock.fs_postblformat == FS_42POSTBLFMT) { if (preen) pwarn("CONVERTING TO NEW CYLINDER GROUP FORMAT\n"); else if (!reply("CONVERT TO NEW CYLINDER GROUP FORMAT")) return(0); doinglevel1++; sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT; sblock.fs_nrpos = 8; sblock.fs_postbloff = (char *)(&sblock.fs_opostbl[0][0]) - (char *)(&sblock.fs_firstfield); sblock.fs_rotbloff = &sblock.fs_space[0] - (u_char *)(&sblock.fs_firstfield); sblock.fs_cgsize = fragroundup(&sblock, CGSIZE(&sblock)); sbdirty(); dirty(&asblk); } if (asblk.b_dirty && !bflag) { memmove(&altsblock, &sblock, (size_t)sblock.fs_sbsize); flush(fswritefd, &asblk); } /* * read in the summary info. */ asked = 0; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { size = sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize; sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size); if (bread(fsreadfd, (char *)sblock.fs_csp[j], fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } asked++; } } /* * If we survive the above basic checks and are preening, * quit here unless forced. */ if (skipclean && sblock.fs_clean && !fflag) return (-1); /* * allocate and initialize the necessary maps */ bmapsize = roundup(howmany(maxfsblock, NBBY), sizeof(short)); blockmap = calloc((unsigned)bmapsize, sizeof (char)); if (blockmap == NULL) { printf("cannot alloc %u bytes for blockmap\n", (unsigned)bmapsize); goto badsb; } statemap = calloc((unsigned)(maxino + 1), sizeof(char)); if (statemap == NULL) { printf("cannot alloc %u bytes for statemap\n", (unsigned)(maxino + 1)); goto badsb; } typemap = calloc((unsigned)(maxino + 1), sizeof(char)); if (typemap == NULL) { printf("cannot alloc %u bytes for typemap\n", (unsigned)(maxino + 1)); goto badsb; } lncntp = (short *)calloc((unsigned)(maxino + 1), sizeof(short)); if (lncntp == NULL) { printf("cannot alloc %u bytes for lncntp\n", (unsigned)(maxino + 1) * sizeof(short)); goto badsb; } numdirs = sblock.fs_cstotal.cs_ndir; if (numdirs == 0) { printf("numdirs is zero, try using an alternate superblock\n"); goto badsb; } inplast = 0; listmax = numdirs + 10; inpsort = (struct inoinfo **)calloc((unsigned)listmax, sizeof(struct inoinfo *)); inphead = (struct inoinfo **)calloc((unsigned)numdirs, sizeof(struct inoinfo *)); if (inpsort == NULL || inphead == NULL) { printf("cannot alloc %u bytes for inphead\n", (unsigned)numdirs * sizeof(struct inoinfo *)); goto badsb; } bufinit(); + if (sblock.fs_flags & FS_DOSOFTDEP) + usedsoftdep = 1; + else + usedsoftdep = 0; return (1); badsb: ckfini(0); return (0); } /* * Read in the super block and its summary info. */ static int readsb(listerr) int listerr; { ufs_daddr_t super = bflag ? bflag : SBOFF / dev_bsize; if (bread(fsreadfd, (char *)&sblock, super, (long)SBSIZE) != 0) return (0); sblk.b_bno = super; sblk.b_size = SBSIZE; /* * run a few consistency checks of the super block */ if (sblock.fs_magic != FS_MAGIC) { badsb(listerr, "MAGIC NUMBER WRONG"); return (0); } if (sblock.fs_ncg < 1) { badsb(listerr, "NCG OUT OF RANGE"); return (0); } if (sblock.fs_cpg < 1) { badsb(listerr, "CPG OUT OF RANGE"); return (0); } if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl || (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl) { badsb(listerr, "NCYL LESS THAN NCG*CPG"); return (0); } if (sblock.fs_sbsize > SBSIZE) { badsb(listerr, "SIZE PREPOSTEROUSLY LARGE"); return (0); } /* * Compute block size that the filesystem is based on, * according to fsbtodb, and adjust superblock block number * so we can tell if this is an alternate later. */ super *= dev_bsize; dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1); sblk.b_bno = super / dev_bsize; if (bflag) { havesb = 1; return (1); } /* * Set all possible fields that could differ, then do check * of whole super block against an alternate super block. * When an alternate super-block is specified this check is skipped. */ getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1), sblock.fs_sbsize); if (asblk.b_errs) return (0); altsblock.fs_firstfield = sblock.fs_firstfield; altsblock.fs_unused_1 = sblock.fs_unused_1; altsblock.fs_time = sblock.fs_time; altsblock.fs_cstotal = sblock.fs_cstotal; altsblock.fs_cgrotor = sblock.fs_cgrotor; altsblock.fs_fmod = sblock.fs_fmod; altsblock.fs_clean = sblock.fs_clean; altsblock.fs_ronly = sblock.fs_ronly; altsblock.fs_flags = sblock.fs_flags; altsblock.fs_maxcontig = sblock.fs_maxcontig; altsblock.fs_minfree = sblock.fs_minfree; altsblock.fs_optim = sblock.fs_optim; altsblock.fs_rotdelay = sblock.fs_rotdelay; altsblock.fs_maxbpg = sblock.fs_maxbpg; memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp); altsblock.fs_maxcluster = sblock.fs_maxcluster; memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt); memmove(altsblock.fs_sparecon, sblock.fs_sparecon, sizeof sblock.fs_sparecon); /* * The following should not have to be copied. */ altsblock.fs_fsbtodb = sblock.fs_fsbtodb; altsblock.fs_interleave = sblock.fs_interleave; altsblock.fs_npsect = sblock.fs_npsect; altsblock.fs_nrpos = sblock.fs_nrpos; altsblock.fs_state = sblock.fs_state; altsblock.fs_qbmask = sblock.fs_qbmask; altsblock.fs_qfmask = sblock.fs_qfmask; altsblock.fs_state = sblock.fs_state; altsblock.fs_maxfilesize = sblock.fs_maxfilesize; if (memcmp(&sblock, &altsblock, (int)sblock.fs_sbsize)) { if (debug) { long *nlp, *olp, *endlp; printf("superblock mismatches\n"); nlp = (long *)&altsblock; olp = (long *)&sblock; endlp = olp + (sblock.fs_sbsize / sizeof *olp); for ( ; olp < endlp; olp++, nlp++) { if (*olp == *nlp) continue; printf("offset %d, original %d, alternate %d\n", olp - (long *)&sblock, *olp, *nlp); } } badsb(listerr, "VALUES IN SUPER BLOCK DISAGREE WITH THOSE IN FIRST ALTERNATE"); return (0); } havesb = 1; return (1); } static void badsb(listerr, s) int listerr; char *s; { if (!listerr) return; if (preen) printf("%s: ", cdevname); pfatal("BAD SUPER BLOCK: %s\n", s); } /* * Calculate a prototype superblock based on information in the disk label. * When done the cgsblock macro can be calculated and the fs_ncg field * can be used. Do NOT attempt to use other macros without verifying that * their needed information is available! */ static int calcsb(dev, devfd, fs) char *dev; int devfd; register struct fs *fs; { register struct disklabel *lp; register struct partition *pp; register char *cp; int i; cp = strchr(dev, '\0') - 1; if (cp == (char *)-1 || ((*cp < 'a' || *cp > 'h') && !isdigit(*cp))) { pfatal("%s: CANNOT FIGURE OUT FILE SYSTEM PARTITION\n", dev); return (0); } lp = getdisklabel(dev, devfd); if (isdigit(*cp)) pp = &lp->d_partitions[0]; else pp = &lp->d_partitions[*cp - 'a']; if (pp->p_fstype != FS_BSDFFS) { pfatal("%s: NOT LABELED AS A BSD FILE SYSTEM (%s)\n", dev, pp->p_fstype < FSMAXTYPES ? fstypenames[pp->p_fstype] : "unknown"); return (0); } if (pp->p_fsize == 0 || pp->p_frag == 0) { pfatal("%s: LABELED AS A %s FILE SYSTEM, BUT BLOCK SIZE IS 0\n", dev, fstypenames[pp->p_fstype]); return (0); } memset(fs, 0, sizeof(struct fs)); fs->fs_fsize = pp->p_fsize; fs->fs_frag = pp->p_frag; fs->fs_cpg = pp->p_cpg; fs->fs_size = pp->p_size; fs->fs_ntrak = lp->d_ntracks; fs->fs_nsect = lp->d_nsectors; fs->fs_spc = lp->d_secpercyl; fs->fs_nspf = fs->fs_fsize / lp->d_secsize; fs->fs_sblkno = roundup( howmany(lp->d_bbsize + lp->d_sbsize, fs->fs_fsize), fs->fs_frag); fs->fs_cgmask = 0xffffffff; for (i = fs->fs_ntrak; i > 1; i >>= 1) fs->fs_cgmask <<= 1; if (!POWEROF2(fs->fs_ntrak)) fs->fs_cgmask <<= 1; fs->fs_cgoffset = roundup( howmany(fs->fs_nsect, NSPF(fs)), fs->fs_frag); fs->fs_fpg = (fs->fs_cpg * fs->fs_spc) / NSPF(fs); fs->fs_ncg = howmany(fs->fs_size / fs->fs_spc, fs->fs_cpg); for (fs->fs_fsbtodb = 0, i = NSPF(fs); i > 1; i >>= 1) fs->fs_fsbtodb++; dev_bsize = lp->d_secsize; return (1); } static struct disklabel * getdisklabel(s, fd) char *s; int fd; { static struct disklabel lab; if (ioctl(fd, DIOCGDINFO, (char *)&lab) < 0) { if (s == NULL) return ((struct disklabel *)NULL); pwarn("ioctl (GCINFO): %s\n", strerror(errno)); errx(EEXIT, "%s: can't read disk label", s); } return (&lab); } Index: head/sbin/fsck/utilities.c =================================================================== --- head/sbin/fsck/utilities.c (revision 34265) +++ head/sbin/fsck/utilities.c (revision 34266) @@ -1,625 +1,648 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include "fsck.h" long diskreads, totalreads; /* Disk cache statistics */ static void rwerror __P((char *mesg, ufs_daddr_t blk)); int ftypeok(dp) struct dinode *dp; { switch (dp->di_mode & IFMT) { case IFDIR: case IFREG: case IFBLK: case IFCHR: case IFLNK: case IFSOCK: case IFIFO: return (1); default: if (debug) printf("bad file type 0%o\n", dp->di_mode); return (0); } } int reply(question) char *question; { int persevere; char c; if (preen) pfatal("INTERNAL ERROR: GOT TO reply()"); persevere = !strcmp(question, "CONTINUE"); printf("\n"); if (!persevere && (nflag || fswritefd < 0)) { printf("%s? no\n\n", question); + resolved = 0; return (0); } if (yflag || (persevere && nflag)) { printf("%s? yes\n\n", question); return (1); } do { printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); - while (c != '\n' && getc(stdin) != '\n') - if (feof(stdin)) + while (c != '\n' && getc(stdin) != '\n') { + if (feof(stdin)) { + resolved = 0; return (0); + } + } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); + resolved = 0; return (0); } /* * Malloc buffers and set up cache. */ void bufinit() { register struct bufarea *bp; long bufcnt, i; char *bufp; pbp = pdirbp = (struct bufarea *)0; bufp = malloc((unsigned int)sblock.fs_bsize); if (bufp == 0) errx(EEXIT, "cannot allocate buffer pool"); cgblk.b_un.b_buf = bufp; initbarea(&cgblk); bufhead.b_next = bufhead.b_prev = &bufhead; bufcnt = MAXBUFSPACE / sblock.fs_bsize; if (bufcnt < MINBUFS) bufcnt = MINBUFS; for (i = 0; i < bufcnt; i++) { bp = (struct bufarea *)malloc(sizeof(struct bufarea)); bufp = malloc((unsigned int)sblock.fs_bsize); if (bp == NULL || bufp == NULL) { if (i >= MINBUFS) break; errx(EEXIT, "cannot allocate buffer pool"); } bp->b_un.b_buf = bufp; bp->b_prev = &bufhead; bp->b_next = bufhead.b_next; bufhead.b_next->b_prev = bp; bufhead.b_next = bp; initbarea(bp); } bufhead.b_size = i; /* save number of buffers */ } /* * Manage a cache of directory blocks. */ struct bufarea * getdatablk(blkno, size) ufs_daddr_t blkno; long size; { register struct bufarea *bp; for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next) if (bp->b_bno == fsbtodb(&sblock, blkno)) goto foundit; for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev) if ((bp->b_flags & B_INUSE) == 0) break; if (bp == &bufhead) errx(EEXIT, "deadlocked buffer pool"); getblk(bp, blkno, size); /* fall through */ foundit: totalreads++; bp->b_prev->b_next = bp->b_next; bp->b_next->b_prev = bp->b_prev; bp->b_prev = &bufhead; bp->b_next = bufhead.b_next; bufhead.b_next->b_prev = bp; bufhead.b_next = bp; bp->b_flags |= B_INUSE; return (bp); } void getblk(bp, blk, size) register struct bufarea *bp; ufs_daddr_t blk; long size; { ufs_daddr_t dblk; dblk = fsbtodb(&sblock, blk); if (bp->b_bno != dblk) { flush(fswritefd, bp); diskreads++; bp->b_errs = bread(fsreadfd, bp->b_un.b_buf, dblk, size); bp->b_bno = dblk; bp->b_size = size; } } void flush(fd, bp) int fd; register struct bufarea *bp; { register int i, j; if (!bp->b_dirty) return; if (bp->b_errs != 0) pfatal("WRITING %sZERO'ED BLOCK %d TO DISK\n", (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", bp->b_bno); bp->b_dirty = 0; bp->b_errs = 0; bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size); if (bp != &sblk) return; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { bwrite(fswritefd, (char *)sblock.fs_csp[j], fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize); } } static void rwerror(mesg, blk) char *mesg; ufs_daddr_t blk; { if (preen == 0) printf("\n"); pfatal("CANNOT %s: BLK %ld", mesg, blk); if (reply("CONTINUE") == 0) exit(EEXIT); } void ckfini(markclean) int markclean; { register struct bufarea *bp, *nbp; int ofsmodified, cnt = 0; if (fswritefd < 0) { (void)close(fsreadfd); return; } flush(fswritefd, &sblk); if (havesb && sblk.b_bno != SBOFF / dev_bsize && !preen && reply("UPDATE STANDARD SUPERBLOCK")) { sblk.b_bno = SBOFF / dev_bsize; sbdirty(); flush(fswritefd, &sblk); } flush(fswritefd, &cgblk); free(cgblk.b_un.b_buf); for (bp = bufhead.b_prev; bp && bp != &bufhead; bp = nbp) { cnt++; flush(fswritefd, bp); nbp = bp->b_prev; free(bp->b_un.b_buf); free((char *)bp); } if (bufhead.b_size != cnt) errx(EEXIT, "Panic: lost %d buffers", bufhead.b_size - cnt); pbp = pdirbp = (struct bufarea *)0; if (markclean && sblock.fs_clean == 0) { sblock.fs_clean = 1; sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; if (!preen) printf("\n***** FILE SYSTEM MARKED CLEAN *****\n"); } if (debug) printf("cache missed %ld of %ld (%d%%)\n", diskreads, totalreads, (int)(diskreads * 100 / totalreads)); (void)close(fsreadfd); (void)close(fswritefd); } int bread(fd, buf, blk, size) int fd; char *buf; ufs_daddr_t blk; long size; { char *cp; int i, errs; off_t offset; offset = blk; offset *= dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); else if (read(fd, buf, (int)size) == size) return (0); rwerror("READ", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); errs = 0; memset(buf, 0, (size_t)size); printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { if (read(fd, cp, (int)secsize) != secsize) { (void)lseek(fd, offset + i + secsize, 0); if (secsize != dev_bsize && dev_bsize != 1) printf(" %ld (%ld),", (blk * dev_bsize + i) / secsize, blk + i / dev_bsize); else printf(" %ld,", blk + i / dev_bsize); errs++; } } printf("\n"); return (errs); } void bwrite(fd, buf, blk, size) int fd; char *buf; ufs_daddr_t blk; long size; { int i; char *cp; off_t offset; if (fd < 0) return; offset = blk; offset *= dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); else if (write(fd, buf, (int)size) == size) { fsmodified = 1; return; } rwerror("WRITE", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) if (write(fd, cp, (int)dev_bsize) != dev_bsize) { (void)lseek(fd, offset + i + dev_bsize, 0); printf(" %ld,", blk + i / dev_bsize); } printf("\n"); return; } /* * allocate a data block with the specified number of fragments */ ufs_daddr_t allocblk(frags) long frags; { - register int i, j, k; + int i, j, k, cg, baseblk; + struct cg *cgp = &cgrp; if (frags <= 0 || frags > sblock.fs_frag) return (0); for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) { for (j = 0; j <= sblock.fs_frag - frags; j++) { if (testbmap(i + j)) continue; for (k = 1; k < frags; k++) if (testbmap(i + j + k)) break; if (k < frags) { j += k; continue; } - for (k = 0; k < frags; k++) + cg = dtog(&sblock, i + j); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + baseblk = dtogd(&sblock, i + j); + for (k = 0; k < frags; k++) { setbmap(i + j + k); + clrbit(cg_blksfree(cgp), baseblk + k); + } n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; + cgdirty(); return (i + j); } } return (0); } /* * Free a previously allocated block */ void freeblk(blkno, frags) ufs_daddr_t blkno; long frags; { struct inodesc idesc; idesc.id_blkno = blkno; idesc.id_numfrags = frags; (void)pass4check(&idesc); } /* * Find a pathname */ void getpathname(namebuf, curdir, ino) char *namebuf; ino_t curdir, ino; { int len; register char *cp; struct inodesc idesc; static int busy = 0; if (curdir == ino && ino == ROOTINO) { (void)strcpy(namebuf, "/"); return; } if (busy || (statemap[curdir] != DSTATE && statemap[curdir] != DFOUND)) { (void)strcpy(namebuf, "?"); return; } busy = 1; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_fix = IGNORE; cp = &namebuf[MAXPATHLEN - 1]; *cp = '\0'; if (curdir != ino) { idesc.id_parent = curdir; goto namelookup; } while (ino != ROOTINO) { idesc.id_number = ino; idesc.id_func = findino; idesc.id_name = ".."; if ((ckinode(ginode(ino), &idesc) & FOUND) == 0) break; namelookup: idesc.id_number = idesc.id_parent; idesc.id_parent = ino; idesc.id_func = findname; idesc.id_name = namebuf; if ((ckinode(ginode(idesc.id_number), &idesc)&FOUND) == 0) break; len = strlen(namebuf); cp -= len; memmove(cp, namebuf, (size_t)len); *--cp = '/'; if (cp < &namebuf[MAXNAMLEN]) break; ino = idesc.id_number; } busy = 0; if (ino != ROOTINO) *--cp = '?'; memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); } void catch(sig) int sig; { if (!doinglevel2) ckfini(0); exit(12); } /* * When preening, allow a single quit to signal * a special exit after filesystem checks complete * so that reboot sequence may be interrupted. */ void catchquit(sig) int sig; { printf("returning to single-user after filesystem check\n"); returntosingle = 1; (void)signal(SIGQUIT, SIG_DFL); } /* * Ignore a single quit signal; wait and flush just in case. * Used by child processes in preen. */ void voidquit(sig) int sig; { sleep(1); (void)signal(SIGQUIT, SIG_IGN); (void)signal(SIGQUIT, SIG_DFL); } /* * determine whether an inode should be fixed. */ int dofix(idesc, msg) register struct inodesc *idesc; char *msg; { switch (idesc->id_fix) { case DONTKNOW: if (idesc->id_type == DATA) direrror(idesc->id_number, msg); else pwarn(msg); if (preen) { printf(" (SALVAGED)\n"); idesc->id_fix = FIX; return (ALTERED); } if (reply("SALVAGE") == 0) { idesc->id_fix = NOFIX; return (0); } idesc->id_fix = FIX; return (ALTERED); case FIX: return (ALTERED); case NOFIX: case IGNORE: return (0); default: errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); } /* NOTREACHED */ return (0); } #if __STDC__ #include #else #include #endif /* * An unexpected inconsistency occured. - * Die if preening, otherwise just print message and continue. + * Die if preening or filesystem is running with soft dependency protocol, + * otherwise just print message and continue. */ void #if __STDC__ pfatal(const char *fmt, ...) #else pfatal(fmt, va_alist) char *fmt; va_dcl #endif { va_list ap; #if __STDC__ va_start(ap, fmt); #else va_start(ap); #endif if (!preen) { (void)vfprintf(stderr, fmt, ap); va_end(ap); + if (usedsoftdep) + (void)fprintf(stderr, + "\nUNEXPECTED SOFTDEP INCONSISTENCY\n"); return; } (void)fprintf(stderr, "%s: ", cdevname); (void)vfprintf(stderr, fmt, ap); (void)fprintf(stderr, - "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", - cdevname); + "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", + cdevname, usedsoftdep ? " SOFTDEP " : " "); + ckfini(0); exit(EEXIT); } /* - * Pwarn just prints a message when not preening, - * or a warning (preceded by filename) when preening. + * Pwarn just prints a message when not preening or running soft dependency + * protocol, or a warning (preceded by filename) when preening. */ void #if __STDC__ pwarn(const char *fmt, ...) #else pwarn(fmt, va_alist) char *fmt; va_dcl #endif { va_list ap; #if __STDC__ va_start(ap, fmt); #else va_start(ap); #endif if (preen) (void)fprintf(stderr, "%s: ", cdevname); (void)vfprintf(stderr, fmt, ap); va_end(ap); } /* * Stub for routines from kernel. */ void #if __STDC__ panic(const char *fmt, ...) #else panic(fmt, va_alist) char *fmt; va_dcl #endif { va_list ap; #if __STDC__ va_start(ap, fmt); #else va_start(ap); #endif pfatal("INTERNAL INCONSISTENCY:"); (void)vfprintf(stderr, fmt, ap); va_end(ap); exit(EEXIT); } Index: head/sbin/fsck_ffs/dir.c =================================================================== --- head/sbin/fsck_ffs/dir.c (revision 34265) +++ head/sbin/fsck_ffs/dir.c (revision 34266) @@ -1,734 +1,737 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)dir.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include "fsck.h" char *lfname = "lost+found"; int lfmode = 01777; struct dirtemplate emptydir = { 0, DIRBLKSIZ }; struct dirtemplate dirhead = { 0, 12, DT_DIR, 1, ".", 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." }; struct odirtemplate odirhead = { 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." }; static int chgino __P((struct inodesc *)); static int dircheck __P((struct inodesc *, struct direct *)); static int expanddir __P((struct dinode *dp, char *name)); static void freedir __P((ino_t ino, ino_t parent)); static struct direct *fsck_readdir __P((struct inodesc *)); static struct bufarea *getdirblk __P((ufs_daddr_t blkno, long size)); static int lftempname __P((char *bufp, ino_t ino)); static int mkentry __P((struct inodesc *)); /* * Propagate connected state through the tree. */ void propagate() { register struct inoinfo **inpp, *inp; struct inoinfo **inpend; long change; inpend = &inpsort[inplast]; do { change = 0; for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_parent == 0) continue; if (statemap[inp->i_parent] == DFOUND && statemap[inp->i_number] == DSTATE) { statemap[inp->i_number] = DFOUND; change++; } } } while (change > 0); } /* * Scan each entry in a directory block. */ int dirscan(idesc) register struct inodesc *idesc; { register struct direct *dp; register struct bufarea *bp; int dsize, n; long blksiz; char dbuf[DIRBLKSIZ]; if (idesc->id_type != DATA) errx(EEXIT, "wrong type to dirscan %d", idesc->id_type); if (idesc->id_entryno == 0 && (idesc->id_filesize & (DIRBLKSIZ - 1)) != 0) idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ); blksiz = idesc->id_numfrags * sblock.fs_fsize; if (chkrange(idesc->id_blkno, idesc->id_numfrags)) { idesc->id_filesize -= blksiz; return (SKIP); } idesc->id_loc = 0; for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) { dsize = dp->d_reclen; memmove(dbuf, dp, (size_t)dsize); # if (BYTE_ORDER == LITTLE_ENDIAN) if (!newinofmt) { struct direct *tdp = (struct direct *)dbuf; u_char tmp; tmp = tdp->d_namlen; tdp->d_namlen = tdp->d_type; tdp->d_type = tmp; } # endif idesc->id_dirp = (struct direct *)dbuf; if ((n = (*idesc->id_func)(idesc)) & ALTERED) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (!newinofmt && !doinglevel2) { struct direct *tdp; u_char tmp; tdp = (struct direct *)dbuf; tmp = tdp->d_namlen; tdp->d_namlen = tdp->d_type; tdp->d_type = tmp; } # endif bp = getdirblk(idesc->id_blkno, blksiz); memmove(bp->b_un.b_buf + idesc->id_loc - dsize, dbuf, (size_t)dsize); dirty(bp); sbdirty(); } if (n & STOP) return (n); } return (idesc->id_filesize > 0 ? KEEPON : STOP); } /* * get next entry in a directory. */ static struct direct * fsck_readdir(idesc) register struct inodesc *idesc; { register struct direct *dp, *ndp; register struct bufarea *bp; long size, blksiz, fix, dploc; blksiz = idesc->id_numfrags * sblock.fs_fsize; bp = getdirblk(idesc->id_blkno, blksiz); if (idesc->id_loc % DIRBLKSIZ == 0 && idesc->id_filesize > 0 && idesc->id_loc < blksiz) { dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); if (dircheck(idesc, dp)) goto dpok; if (idesc->id_fix == IGNORE) return (0); fix = dofix(idesc, "DIRECTORY CORRUPTED"); bp = getdirblk(idesc->id_blkno, blksiz); dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); dp->d_reclen = DIRBLKSIZ; dp->d_ino = 0; dp->d_type = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; if (fix) dirty(bp); idesc->id_loc += DIRBLKSIZ; idesc->id_filesize -= DIRBLKSIZ; return (dp); } dpok: if (idesc->id_filesize <= 0 || idesc->id_loc >= blksiz) return NULL; dploc = idesc->id_loc; dp = (struct direct *)(bp->b_un.b_buf + dploc); idesc->id_loc += dp->d_reclen; idesc->id_filesize -= dp->d_reclen; if ((idesc->id_loc % DIRBLKSIZ) == 0) return (dp); ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); if (idesc->id_loc < blksiz && idesc->id_filesize > 0 && dircheck(idesc, ndp) == 0) { size = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); idesc->id_loc += size; idesc->id_filesize -= size; if (idesc->id_fix == IGNORE) return (0); fix = dofix(idesc, "DIRECTORY CORRUPTED"); bp = getdirblk(idesc->id_blkno, blksiz); dp = (struct direct *)(bp->b_un.b_buf + dploc); dp->d_reclen += size; if (fix) dirty(bp); } return (dp); } /* * Verify that a directory entry is valid. * This is a superset of the checks made in the kernel. */ static int dircheck(idesc, dp) struct inodesc *idesc; register struct direct *dp; { register int size; register char *cp; u_char namlen, type; int spaceleft; spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); if (dp->d_ino >= maxino || dp->d_reclen == 0 || dp->d_reclen > spaceleft || (dp->d_reclen & 0x3) != 0) return (0); if (dp->d_ino == 0) return (1); size = DIRSIZ(!newinofmt, dp); # if (BYTE_ORDER == LITTLE_ENDIAN) if (!newinofmt) { type = dp->d_namlen; namlen = dp->d_type; } else { namlen = dp->d_namlen; type = dp->d_type; } # else namlen = dp->d_namlen; type = dp->d_type; # endif if (dp->d_reclen < size || idesc->id_filesize < size || namlen > MAXNAMLEN || type > 15) return (0); for (cp = dp->d_name, size = 0; size < namlen; size++) if (*cp == '\0' || (*cp++ == '/')) return (0); if (*cp != '\0') return (0); return (1); } void direrror(ino, errmesg) ino_t ino; char *errmesg; { fileerror(ino, ino, errmesg); } void fileerror(cwd, ino, errmesg) ino_t cwd, ino; char *errmesg; { register struct dinode *dp; char pathbuf[MAXPATHLEN + 1]; pwarn("%s ", errmesg); pinode(ino); printf("\n"); getpathname(pathbuf, cwd, ino); if (ino < ROOTINO || ino > maxino) { pfatal("NAME=%s\n", pathbuf); return; } dp = ginode(ino); if (ftypeok(dp)) pfatal("%s=%s\n", (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE", pathbuf); else pfatal("NAME=%s\n", pathbuf); } void adjust(idesc, lcnt) register struct inodesc *idesc; int lcnt; { register struct dinode *dp; dp = ginode(idesc->id_number); if (dp->di_nlink == lcnt) { if (linkup(idesc->id_number, (ino_t)0) == 0) clri(idesc, "UNREF", 0); } else { pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname : ((dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE")); pinode(idesc->id_number); printf(" COUNT %d SHOULD BE %d", dp->di_nlink, dp->di_nlink - lcnt); - if (preen) { + if (preen || usedsoftdep) { if (lcnt < 0) { printf("\n"); pfatal("LINK COUNT INCREASING"); } - printf(" (ADJUSTED)\n"); + if (preen) + printf(" (ADJUSTED)\n"); } if (preen || reply("ADJUST") == 1) { dp->di_nlink -= lcnt; inodirty(); } } } static int mkentry(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; struct direct newent; int newlen, oldlen; newent.d_namlen = strlen(idesc->id_name); newlen = DIRSIZ(0, &newent); if (dirp->d_ino != 0) oldlen = DIRSIZ(0, dirp); else oldlen = 0; if (dirp->d_reclen - oldlen < newlen) return (KEEPON); newent.d_reclen = dirp->d_reclen - oldlen; dirp->d_reclen = oldlen; dirp = (struct direct *)(((char *)dirp) + oldlen); dirp->d_ino = idesc->id_parent; /* ino to be entered is in id_parent */ dirp->d_reclen = newent.d_reclen; if (newinofmt) dirp->d_type = typemap[idesc->id_parent]; else dirp->d_type = 0; dirp->d_namlen = newent.d_namlen; memmove(dirp->d_name, idesc->id_name, (size_t)newent.d_namlen + 1); # if (BYTE_ORDER == LITTLE_ENDIAN) /* * If the entry was split, dirscan() will only reverse the byte * order of the original entry, and not the new one, before * writing it back out. So, we reverse the byte order here if * necessary. */ if (oldlen != 0 && !newinofmt && !doinglevel2) { u_char tmp; tmp = dirp->d_namlen; dirp->d_namlen = dirp->d_type; dirp->d_type = tmp; } # endif return (ALTERED|STOP); } static int chgino(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; if (memcmp(dirp->d_name, idesc->id_name, (int)dirp->d_namlen + 1)) return (KEEPON); dirp->d_ino = idesc->id_parent; if (newinofmt) dirp->d_type = typemap[idesc->id_parent]; else dirp->d_type = 0; return (ALTERED|STOP); } int linkup(orphan, parentdir) ino_t orphan; ino_t parentdir; { register struct dinode *dp; int lostdir; ino_t oldlfdir; struct inodesc idesc; char tempname[BUFSIZ]; memset(&idesc, 0, sizeof(struct inodesc)); dp = ginode(orphan); lostdir = (dp->di_mode & IFMT) == IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); pinode(orphan); - if (preen && dp->di_size == 0) + if ((preen || usedsoftdep) && dp->di_size == 0) return (0); if (preen) printf(" (RECONNECTED)\n"); else if (reply("RECONNECT") == 0) return (0); + if (parentdir != 0) + lncntp[parentdir]++; if (lfdir == 0) { dp = ginode(ROOTINO); idesc.id_name = lfname; idesc.id_type = DATA; idesc.id_func = findino; idesc.id_number = ROOTINO; if ((ckinode(dp, &idesc) & FOUND) != 0) { lfdir = idesc.id_parent; } else { pwarn("NO lost+found DIRECTORY"); if (preen || reply("CREATE")) { lfdir = allocdir(ROOTINO, (ino_t)0, lfmode); if (lfdir != 0) { if (makeentry(ROOTINO, lfdir, lfname) != 0) { if (preen) printf(" (CREATED)\n"); } else { freedir(lfdir, ROOTINO); lfdir = 0; if (preen) printf("\n"); } } } } if (lfdir == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY"); printf("\n\n"); return (0); } } dp = ginode(lfdir); if ((dp->di_mode & IFMT) != IFDIR) { pfatal("lost+found IS NOT A DIRECTORY"); if (reply("REALLOCATE") == 0) return (0); oldlfdir = lfdir; if ((lfdir = allocdir(ROOTINO, (ino_t)0, lfmode)) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } if ((changeino(ROOTINO, lfname, lfdir) & ALTERED) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } inodirty(); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = oldlfdir; adjust(&idesc, lncntp[oldlfdir] + 1); lncntp[oldlfdir] = 0; dp = ginode(lfdir); } if (statemap[lfdir] != DFOUND) { pfatal("SORRY. NO lost+found DIRECTORY\n\n"); return (0); } (void)lftempname(tempname, orphan); if (makeentry(lfdir, orphan, tempname) == 0) { pfatal("SORRY. NO SPACE IN lost+found DIRECTORY"); printf("\n\n"); return (0); } lncntp[orphan]--; if (lostdir) { if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 && parentdir != (ino_t)-1) (void)makeentry(orphan, lfdir, ".."); dp = ginode(lfdir); dp->di_nlink++; inodirty(); lncntp[lfdir]++; pwarn("DIR I=%lu CONNECTED. ", orphan); if (parentdir != (ino_t)-1) { printf("PARENT WAS I=%lu\n", parentdir); /* * The parent directory, because of the ordering * guarantees, has had the link count incremented * for the child, but no entry was made. This * fixes the parent link count so that fsck does * not need to be rerun. */ lncntp[parentdir]++; } if (preen == 0) printf("\n"); } return (1); } /* * fix an entry in a directory. */ int changeino(dir, name, newnum) ino_t dir; char *name; ino_t newnum; { struct inodesc idesc; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_func = chgino; idesc.id_number = dir; idesc.id_fix = DONTKNOW; idesc.id_name = name; idesc.id_parent = newnum; /* new value for name */ return (ckinode(ginode(dir), &idesc)); } /* * make an entry in a directory */ int makeentry(parent, ino, name) ino_t parent, ino; char *name; { struct dinode *dp; struct inodesc idesc; char pathbuf[MAXPATHLEN + 1]; if (parent < ROOTINO || parent >= maxino || ino < ROOTINO || ino >= maxino) return (0); memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_func = mkentry; idesc.id_number = parent; idesc.id_parent = ino; /* this is the inode to enter */ idesc.id_fix = DONTKNOW; idesc.id_name = name; dp = ginode(parent); if (dp->di_size % DIRBLKSIZ) { dp->di_size = roundup(dp->di_size, DIRBLKSIZ); inodirty(); } if ((ckinode(dp, &idesc) & ALTERED) != 0) return (1); getpathname(pathbuf, parent, parent); dp = ginode(parent); if (expanddir(dp, pathbuf) == 0) return (0); return (ckinode(dp, &idesc) & ALTERED); } /* * Attempt to expand the size of a directory */ static int expanddir(dp, name) register struct dinode *dp; char *name; { ufs_daddr_t lastbn, newblk; register struct bufarea *bp; char *cp, firstblk[DIRBLKSIZ]; lastbn = lblkno(&sblock, dp->di_size); if (lastbn >= NDADDR - 1 || dp->di_db[lastbn] == 0 || dp->di_size == 0) return (0); if ((newblk = allocblk(sblock.fs_frag)) == 0) return (0); dp->di_db[lastbn + 1] = dp->di_db[lastbn]; dp->di_db[lastbn] = newblk; dp->di_size += sblock.fs_bsize; dp->di_blocks += btodb(sblock.fs_bsize); bp = getdirblk(dp->di_db[lastbn + 1], (long)dblksize(&sblock, dp, lastbn + 1)); if (bp->b_errs) goto bad; memmove(firstblk, bp->b_un.b_buf, DIRBLKSIZ); bp = getdirblk(newblk, sblock.fs_bsize); if (bp->b_errs) goto bad; memmove(bp->b_un.b_buf, firstblk, DIRBLKSIZ); for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; cp < &bp->b_un.b_buf[sblock.fs_bsize]; cp += DIRBLKSIZ) memmove(cp, &emptydir, sizeof emptydir); dirty(bp); bp = getdirblk(dp->di_db[lastbn + 1], (long)dblksize(&sblock, dp, lastbn + 1)); if (bp->b_errs) goto bad; memmove(bp->b_un.b_buf, &emptydir, sizeof emptydir); pwarn("NO SPACE LEFT IN %s", name); if (preen) printf(" (EXPANDED)\n"); else if (reply("EXPAND") == 0) goto bad; dirty(bp); inodirty(); return (1); bad: dp->di_db[lastbn] = dp->di_db[lastbn + 1]; dp->di_db[lastbn + 1] = 0; dp->di_size -= sblock.fs_bsize; dp->di_blocks -= btodb(sblock.fs_bsize); freeblk(newblk, sblock.fs_frag); return (0); } /* * allocate a new directory */ ino_t allocdir(parent, request, mode) ino_t parent, request; int mode; { ino_t ino; char *cp; struct dinode *dp; register struct bufarea *bp; struct dirtemplate *dirp; ino = allocino(request, IFDIR|mode); if (newinofmt) dirp = &dirhead; else dirp = (struct dirtemplate *)&odirhead; dirp->dot_ino = ino; dirp->dotdot_ino = parent; dp = ginode(ino); bp = getdirblk(dp->di_db[0], sblock.fs_fsize); if (bp->b_errs) { freeino(ino); return (0); } memmove(bp->b_un.b_buf, dirp, sizeof(struct dirtemplate)); for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; cp < &bp->b_un.b_buf[sblock.fs_fsize]; cp += DIRBLKSIZ) memmove(cp, &emptydir, sizeof emptydir); dirty(bp); dp->di_nlink = 2; inodirty(); if (ino == ROOTINO) { lncntp[ino] = dp->di_nlink; cacheino(dp, ino); return(ino); } if (statemap[parent] != DSTATE && statemap[parent] != DFOUND) { freeino(ino); return (0); } cacheino(dp, ino); statemap[ino] = statemap[parent]; if (statemap[ino] == DSTATE) { lncntp[ino] = dp->di_nlink; lncntp[parent]++; } dp = ginode(parent); dp->di_nlink++; inodirty(); return (ino); } /* * free a directory inode */ static void freedir(ino, parent) ino_t ino, parent; { struct dinode *dp; if (ino != parent) { dp = ginode(parent); dp->di_nlink--; inodirty(); } freeino(ino); } /* * generate a temporary name for the lost+found directory. */ static int lftempname(bufp, ino) char *bufp; ino_t ino; { register ino_t in; register char *cp; int namlen; cp = bufp + 2; for (in = maxino; in > 0; in /= 10) cp++; *--cp = 0; namlen = cp - bufp; in = ino; while (cp > bufp) { *--cp = (in % 10) + '0'; in /= 10; } *cp = '#'; return (namlen); } /* * Get a directory block. * Insure that it is held until another is requested. */ static struct bufarea * getdirblk(blkno, size) ufs_daddr_t blkno; long size; { if (pdirbp != 0) pdirbp->b_flags &= ~B_INUSE; pdirbp = getdatablk(blkno, size); return (pdirbp); } Index: head/sbin/fsck_ffs/fsck.h =================================================================== --- head/sbin/fsck_ffs/fsck.h (revision 34265) +++ head/sbin/fsck_ffs/fsck.h (revision 34266) @@ -1,281 +1,283 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fsck.h 8.4 (Berkeley) 5/9/95 */ #include #include #include #define MAXDUP 10 /* limit on dup blks (per inode) */ #define MAXBAD 10 /* limit on bad blks (per inode) */ #define MAXBUFSPACE 40*1024 /* maximum space to allocate to buffers */ #define INOBUFSIZE 56*1024 /* size of buffer to read inodes in pass1 */ #ifndef BUFSIZ #define BUFSIZ 1024 #endif #define USTATE 01 /* inode not allocated */ #define FSTATE 02 /* inode is file */ #define DSTATE 03 /* inode is directory */ #define DFOUND 04 /* directory found during descent */ #define DCLEAR 05 /* directory is to be cleared */ #define FCLEAR 06 /* file is to be cleared */ /* * buffer cache structure. */ struct bufarea { struct bufarea *b_next; /* free list queue */ struct bufarea *b_prev; /* free list queue */ ufs_daddr_t b_bno; int b_size; int b_errs; int b_flags; union { char *b_buf; /* buffer space */ ufs_daddr_t *b_indir; /* indirect block */ struct fs *b_fs; /* super block */ struct cg *b_cg; /* cylinder group */ struct dinode *b_dinode; /* inode block */ } b_un; char b_dirty; }; #define B_INUSE 1 #define MINBUFS 5 /* minimum number of buffers required */ struct bufarea bufhead; /* head of list of other blks in filesys */ struct bufarea sblk; /* file system superblock */ struct bufarea cgblk; /* cylinder group blocks */ struct bufarea *pdirbp; /* current directory contents */ struct bufarea *pbp; /* current inode block */ #define dirty(bp) (bp)->b_dirty = 1 #define initbarea(bp) \ (bp)->b_dirty = 0; \ (bp)->b_bno = (ufs_daddr_t)-1; \ (bp)->b_flags = 0; #define sbdirty() sblk.b_dirty = 1 #define cgdirty() cgblk.b_dirty = 1 #define sblock (*sblk.b_un.b_fs) #define cgrp (*cgblk.b_un.b_cg) enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE}; struct inodesc { enum fixstate id_fix; /* policy on fixing errors */ int (*id_func)(); /* function to be applied to blocks of inode */ ino_t id_number; /* inode number described */ ino_t id_parent; /* for DATA nodes, their parent */ ufs_daddr_t id_blkno; /* current block number being examined */ int id_numfrags; /* number of frags contained in block */ quad_t id_filesize; /* for DATA nodes, the size of the directory */ int id_loc; /* for DATA nodes, current location in dir */ int id_entryno; /* for DATA nodes, current entry number */ struct direct *id_dirp; /* for DATA nodes, ptr to current entry */ char *id_name; /* for DATA nodes, name to find or enter */ char id_type; /* type of descriptor, DATA or ADDR */ }; /* file types */ #define DATA 1 #define ADDR 2 /* * Linked list of duplicate blocks. * * The list is composed of two parts. The first part of the * list (from duplist through the node pointed to by muldup) * contains a single copy of each duplicate block that has been * found. The second part of the list (from muldup to the end) * contains duplicate blocks that have been found more than once. * To check if a block has been found as a duplicate it is only * necessary to search from duplist through muldup. To find the * total number of times that a block has been found as a duplicate * the entire list must be searched for occurences of the block * in question. The following diagram shows a sample list where * w (found twice), x (found once), y (found three times), and z * (found once) are duplicate block numbers: * * w -> y -> x -> z -> y -> w -> y * ^ ^ * | | * duplist muldup */ struct dups { struct dups *next; ufs_daddr_t dup; }; struct dups *duplist; /* head of dup list */ struct dups *muldup; /* end of unique duplicate dup block numbers */ /* * Linked list of inodes with zero link counts. */ struct zlncnt { struct zlncnt *next; ino_t zlncnt; }; struct zlncnt *zlnhead; /* head of zero link count list */ /* * Inode cache data structures. */ struct inoinfo { struct inoinfo *i_nexthash; /* next entry in hash chain */ ino_t i_number; /* inode number of this entry */ ino_t i_parent; /* inode number of parent */ ino_t i_dotdot; /* inode number of `..' */ size_t i_isize; /* size of inode */ u_int i_numblks; /* size of block array in bytes */ ufs_daddr_t i_blks[1]; /* actually longer */ } **inphead, **inpsort; long numdirs, listmax, inplast; char *cdevname; /* name of device being checked */ long dev_bsize; /* computed value of DEV_BSIZE */ long secsize; /* actual disk sector size */ char fflag; /* force fs check (ignore clean flag) */ char nflag; /* assume a no response */ char yflag; /* assume a yes response */ int bflag; /* location of alternate super block */ int debug; /* output debugging info */ int cvtlevel; /* convert to newer file system format */ int doinglevel1; /* converting to new cylinder group format */ int doinglevel2; /* converting to new inode format */ int newinofmt; /* filesystem has new inode format */ +char usedsoftdep; /* just fix soft dependency inconsistencies */ +char resolved; /* cleared if unresolved changes => not clean */ char preen; /* just fix normal inconsistencies */ char hotroot; /* checking root device */ char havesb; /* superblock has been read */ int fsmodified; /* 1 => write done to file system */ int fsreadfd; /* file descriptor for reading file system */ int fswritefd; /* file descriptor for writing file system */ int returntosingle; /* return to single user mode */ int rerun; /* rerun fsck. Only used in non-preen mode */ ufs_daddr_t maxfsblock; /* number of blocks in the file system */ char *blockmap; /* ptr to primary blk allocation map */ ino_t maxino; /* number of inodes in file system */ ino_t lastino; /* last inode in use */ char *statemap; /* ptr to inode state table */ u_char *typemap; /* ptr to inode type table */ short *lncntp; /* ptr to link count table */ ino_t lfdir; /* lost & found directory inode number */ char *lfname; /* lost & found directory name */ int lfmode; /* lost & found directory creation mode */ ufs_daddr_t n_blks; /* number of blocks in use */ ufs_daddr_t n_files; /* number of files in use */ #define clearinode(dp) (*(dp) = zino) struct dinode zino; #define setbmap(blkno) setbit(blockmap, blkno) #define testbmap(blkno) isset(blockmap, blkno) #define clrbmap(blkno) clrbit(blockmap, blkno) #define STOP 0x01 #define SKIP 0x02 #define KEEPON 0x04 #define ALTERED 0x08 #define FOUND 0x10 #define EEXIT 8 /* Standard error exit. */ struct fstab; void adjust __P((struct inodesc *, int lcnt)); ufs_daddr_t allocblk __P((long frags)); ino_t allocdir __P((ino_t parent, ino_t request, int mode)); ino_t allocino __P((ino_t request, int type)); void blkerror __P((ino_t ino, char *type, ufs_daddr_t blk)); char *blockcheck __P((char *name)); int bread __P((int fd, char *buf, ufs_daddr_t blk, long size)); void bufinit __P((void)); void bwrite __P((int fd, char *buf, ufs_daddr_t blk, long size)); void cacheino __P((struct dinode *dp, ino_t inumber)); void catch __P((int)); void catchquit __P((int)); int changeino __P((ino_t dir, char *name, ino_t newnum)); int checkfstab __P((int preen, int maxrun, int (*docheck)(struct fstab *), int (*chkit)(char *, char *, long, int))); int chkrange __P((ufs_daddr_t blk, int cnt)); void ckfini __P((int markclean)); int ckinode __P((struct dinode *dp, struct inodesc *)); void clri __P((struct inodesc *, char *type, int flag)); void direrror __P((ino_t ino, char *errmesg)); int dirscan __P((struct inodesc *)); int dofix __P((struct inodesc *, char *msg)); void ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t)); void ffs_fragacct __P((struct fs *, int, int32_t [], int)); int ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t)); void ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t)); void fileerror __P((ino_t cwd, ino_t ino, char *errmesg)); int findino __P((struct inodesc *)); int findname __P((struct inodesc *)); void flush __P((int fd, struct bufarea *bp)); void freeblk __P((ufs_daddr_t blkno, long frags)); void freeino __P((ino_t ino)); void freeinodebuf __P((void)); int ftypeok __P((struct dinode *dp)); void getblk __P((struct bufarea *bp, ufs_daddr_t blk, long size)); struct bufarea *getdatablk __P((ufs_daddr_t blkno, long size)); struct inoinfo *getinoinfo __P((ino_t inumber)); struct dinode *getnextinode __P((ino_t inumber)); void getpathname __P((char *namebuf, ino_t curdir, ino_t ino)); struct dinode *ginode __P((ino_t inumber)); void inocleanup __P((void)); void inodirty __P((void)); int linkup __P((ino_t orphan, ino_t parentdir)); int makeentry __P((ino_t parent, ino_t ino, char *name)); void panic __P((const char *fmt, ...)); void pass1 __P((void)); void pass1b __P((void)); int pass1check __P((struct inodesc *)); void pass2 __P((void)); void pass3 __P((void)); void pass4 __P((void)); int pass4check __P((struct inodesc *)); void pass5 __P((void)); void pfatal __P((const char *fmt, ...)); void pinode __P((ino_t ino)); void propagate __P((void)); void pwarn __P((const char *fmt, ...)); int reply __P((char *question)); void resetinodebuf __P((void)); int setup __P((char *dev)); void voidquit __P((int)); Index: head/sbin/fsck_ffs/inode.c =================================================================== --- head/sbin/fsck_ffs/inode.c (revision 34265) +++ head/sbin/fsck_ffs/inode.c (revision 34266) @@ -1,621 +1,632 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)inode.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include "fsck.h" static ino_t startinum; static int iblock __P((struct inodesc *, long ilevel, quad_t isize)); int ckinode(dp, idesc) struct dinode *dp; register struct inodesc *idesc; { ufs_daddr_t *ap; long ret, n, ndb, offset; struct dinode dino; quad_t remsize, sizepb; mode_t mode; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_fix != IGNORE) idesc->id_fix = DONTKNOW; idesc->id_entryno = 0; idesc->id_filesize = dp->di_size; mode = dp->di_mode & IFMT; if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && (dp->di_size < sblock.fs_maxsymlinklen || dp->di_blocks == 0))) return (KEEPON); dino = *dp; ndb = howmany(dino.di_size, sblock.fs_bsize); for (ap = &dino.di_db[0]; ap < &dino.di_db[NDADDR]; ap++) { if (--ndb == 0 && (offset = blkoff(&sblock, dino.di_size)) != 0) idesc->id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc->id_numfrags = sblock.fs_frag; if (*ap == 0) { if (idesc->id_type == DATA && ndb >= 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); dp->di_size = (ap - &dino.di_db[0]) * sblock.fs_bsize; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); } } continue; } idesc->id_blkno = *ap; if (idesc->id_type == ADDR) ret = (*idesc->id_func)(idesc); else ret = dirscan(idesc); if (ret & STOP) return (ret); } idesc->id_numfrags = sblock.fs_frag; remsize = dino.di_size - sblock.fs_bsize * NDADDR; sizepb = sblock.fs_bsize; for (ap = &dino.di_ib[0], n = 1; n <= NIADDR; ap++, n++) { if (*ap) { idesc->id_blkno = *ap; ret = iblock(idesc, n, remsize); if (ret & STOP) return (ret); } else { if (idesc->id_type == DATA && remsize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); dp->di_size -= remsize; remsize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); break; } } } sizepb *= NINDIR(&sblock); remsize -= sizepb; } return (KEEPON); } static int iblock(idesc, ilevel, isize) struct inodesc *idesc; long ilevel; quad_t isize; { ufs_daddr_t *ap; ufs_daddr_t *aplim; struct bufarea *bp; int i, n, (*func)(), nif; quad_t sizepb; char buf[BUFSIZ]; char pathbuf[MAXPATHLEN + 1]; struct dinode *dp; if (idesc->id_type == ADDR) { func = idesc->id_func; if (((n = (*func)(idesc)) & KEEPON) == 0) return (n); } else func = dirscan; if (chkrange(idesc->id_blkno, idesc->id_numfrags)) return (SKIP); bp = getdatablk(idesc->id_blkno, sblock.fs_bsize); ilevel--; for (sizepb = sblock.fs_bsize, i = 0; i < ilevel; i++) sizepb *= NINDIR(&sblock); nif = howmany(isize , sizepb); if (nif > NINDIR(&sblock)) nif = NINDIR(&sblock); if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) { aplim = &bp->b_un.b_indir[NINDIR(&sblock)]; for (ap = &bp->b_un.b_indir[nif]; ap < aplim; ap++) { if (*ap == 0) continue; (void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu", idesc->id_number); if (dofix(idesc, buf)) { *ap = 0; dirty(bp); } } flush(fswritefd, bp); } aplim = &bp->b_un.b_indir[nif]; for (ap = bp->b_un.b_indir; ap < aplim; ap++) { if (*ap) { idesc->id_blkno = *ap; if (ilevel == 0) n = (*func)(idesc); else n = iblock(idesc, ilevel, isize); if (n & STOP) { bp->b_flags &= ~B_INUSE; return (n); } } else { if (idesc->id_type == DATA && isize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); dp->di_size -= isize; isize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); bp->b_flags &= ~B_INUSE; return(STOP); } } } isize -= sizepb; } bp->b_flags &= ~B_INUSE; return (KEEPON); } /* * Check that a block in a legal block number. * Return 0 if in range, 1 if out of range. */ int chkrange(blk, cnt) ufs_daddr_t blk; int cnt; { register int c; if (blk < 0 || blk >= maxfsblock || cnt < 0 || cnt > maxfsblock - blk) return (1); c = dtog(&sblock, blk); if (blk < cgdmin(&sblock, c)) { if ((blk + cnt) > cgsblock(&sblock, c)) { if (debug) { printf("blk %ld < cgdmin %ld;", blk, cgdmin(&sblock, c)); printf(" blk + cnt %ld > cgsbase %ld\n", blk + cnt, cgsblock(&sblock, c)); } return (1); } } else { if ((blk + cnt) > cgbase(&sblock, c+1)) { if (debug) { printf("blk %ld >= cgdmin %ld;", blk, cgdmin(&sblock, c)); printf(" blk + cnt %ld > sblock.fs_fpg %ld\n", blk+cnt, sblock.fs_fpg); } return (1); } } return (0); } /* * General purpose interface for reading inodes. */ struct dinode * ginode(inumber) ino_t inumber; { ufs_daddr_t iblk; if (inumber < ROOTINO || inumber > maxino) errx(EEXIT, "bad inode number %d to ginode", inumber); if (startinum == 0 || inumber < startinum || inumber >= startinum + INOPB(&sblock)) { iblk = ino_to_fsba(&sblock, inumber); if (pbp != 0) pbp->b_flags &= ~B_INUSE; pbp = getdatablk(iblk, sblock.fs_bsize); startinum = (inumber / INOPB(&sblock)) * INOPB(&sblock); } return (&pbp->b_un.b_dinode[inumber % INOPB(&sblock)]); } /* * Special purpose version of ginode used to optimize first pass * over all the inodes in numerical order. */ ino_t nextino, lastinum; long readcnt, readpercg, fullcnt, inobufsize, partialcnt, partialsize; struct dinode *inodebuf; struct dinode * getnextinode(inumber) ino_t inumber; { long size; ufs_daddr_t dblk; static struct dinode *dp; if (inumber != nextino++ || inumber > maxino) errx(EEXIT, "bad inode number %d to nextinode", inumber); if (inumber >= lastinum) { readcnt++; dblk = fsbtodb(&sblock, ino_to_fsba(&sblock, lastinum)); if (readcnt % readpercg == 0) { size = partialsize; lastinum += partialcnt; } else { size = inobufsize; lastinum += fullcnt; } (void)bread(fsreadfd, (char *)inodebuf, dblk, size); /* ??? */ dp = inodebuf; } return (dp++); } void resetinodebuf() { startinum = 0; nextino = 0; lastinum = 0; readcnt = 0; inobufsize = blkroundup(&sblock, INOBUFSIZE); fullcnt = inobufsize / sizeof(struct dinode); readpercg = sblock.fs_ipg / fullcnt; partialcnt = sblock.fs_ipg % fullcnt; partialsize = partialcnt * sizeof(struct dinode); if (partialcnt != 0) { readpercg++; } else { partialcnt = fullcnt; partialsize = inobufsize; } if (inodebuf == NULL && (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL) errx(EEXIT, "Cannot allocate space for inode buffer"); while (nextino < ROOTINO) (void)getnextinode(nextino); } void freeinodebuf() { if (inodebuf != NULL) free((char *)inodebuf); inodebuf = NULL; } /* * Routines to maintain information about directory inodes. * This is built during the first pass and used during the * second and third passes. * * Enter inodes into the cache. */ void cacheino(dp, inumber) register struct dinode *dp; ino_t inumber; { register struct inoinfo *inp; struct inoinfo **inpp; unsigned int blks; blks = howmany(dp->di_size, sblock.fs_bsize); if (blks > NDADDR) blks = NDADDR + NIADDR; inp = (struct inoinfo *) malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs_daddr_t)); if (inp == NULL) return; inpp = &inphead[inumber % numdirs]; inp->i_nexthash = *inpp; *inpp = inp; if (inumber == ROOTINO) inp->i_parent = ROOTINO; else inp->i_parent = (ino_t)0; inp->i_dotdot = (ino_t)0; inp->i_number = inumber; inp->i_isize = dp->di_size; inp->i_numblks = blks * sizeof(ufs_daddr_t); memmove(&inp->i_blks[0], &dp->di_db[0], (size_t)inp->i_numblks); if (inplast == listmax) { listmax += 100; inpsort = (struct inoinfo **)realloc((char *)inpsort, (unsigned)listmax * sizeof(struct inoinfo *)); if (inpsort == NULL) errx(EEXIT, "cannot increase directory list"); } inpsort[inplast++] = inp; } /* * Look up an inode cache structure. */ struct inoinfo * getinoinfo(inumber) ino_t inumber; { register struct inoinfo *inp; for (inp = inphead[inumber % numdirs]; inp; inp = inp->i_nexthash) { if (inp->i_number != inumber) continue; return (inp); } errx(EEXIT, "cannot find inode %d", inumber); return ((struct inoinfo *)0); } /* * Clean up all the inode cache structure. */ void inocleanup() { register struct inoinfo **inpp; if (inphead == NULL) return; for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) free((char *)(*inpp)); free((char *)inphead); free((char *)inpsort); inphead = inpsort = NULL; } void inodirty() { dirty(pbp); } void clri(idesc, type, flag) register struct inodesc *idesc; char *type; int flag; { register struct dinode *dp; dp = ginode(idesc->id_number); if (flag == 1) { pwarn("%s %s", type, (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE"); pinode(idesc->id_number); } if (preen || reply("CLEAR") == 1) { if (preen) printf(" (CLEARED)\n"); n_files--; (void)ckinode(dp, idesc); clearinode(dp); statemap[idesc->id_number] = USTATE; inodirty(); } } int findname(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent) return (KEEPON); memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1); return (STOP|FOUND); } int findino(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; if (dirp->d_ino == 0) return (KEEPON); if (strcmp(dirp->d_name, idesc->id_name) == 0 && dirp->d_ino >= ROOTINO && dirp->d_ino <= maxino) { idesc->id_parent = dirp->d_ino; return (STOP|FOUND); } return (KEEPON); } void pinode(ino) ino_t ino; { register struct dinode *dp; register char *p; struct passwd *pw; time_t t; printf(" I=%lu ", ino); if (ino < ROOTINO || ino > maxino) return; dp = ginode(ino); printf(" OWNER="); if ((pw = getpwuid((int)dp->di_uid)) != 0) printf("%s ", pw->pw_name); else printf("%u ", (unsigned)dp->di_uid); printf("MODE=%o\n", dp->di_mode); if (preen) printf("%s: ", cdevname); printf("SIZE=%qu ", dp->di_size); t = dp->di_mtime; p = ctime(&t); printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]); } void blkerror(ino, type, blk) ino_t ino; char *type; ufs_daddr_t blk; { pfatal("%ld %s I=%lu", blk, type, ino); printf("\n"); switch (statemap[ino]) { case FSTATE: statemap[ino] = FCLEAR; return; case DSTATE: statemap[ino] = DCLEAR; return; case FCLEAR: case DCLEAR: return; default: errx(EEXIT, "BAD STATE %d TO BLKERR", statemap[ino]); /* NOTREACHED */ } } /* * allocate an unused inode */ ino_t allocino(request, type) ino_t request; int type; { register ino_t ino; register struct dinode *dp; + struct cg *cgp = &cgrp; + int cg; if (request == 0) request = ROOTINO; else if (statemap[request] != USTATE) return (0); for (ino = request; ino < maxino; ino++) if (statemap[ino] == USTATE) break; if (ino == maxino) return (0); + cg = ino_to_cg(&sblock, ino); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + setbit(cg_inosused(cgp), ino % sblock.fs_ipg); + cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: statemap[ino] = DSTATE; + cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: statemap[ino] = FSTATE; break; default: return (0); } + cgdirty(); dp = ginode(ino); dp->di_db[0] = allocblk((long)1); if (dp->di_db[0] == 0) { statemap[ino] = USTATE; return (0); } + dp->di_flags = 0; dp->di_mode = type; dp->di_atime = time(NULL); dp->di_mtime = dp->di_ctime = dp->di_atime; dp->di_size = sblock.fs_fsize; dp->di_blocks = btodb(sblock.fs_fsize); n_files++; inodirty(); if (newinofmt) typemap[ino] = IFTODT(type); return (ino); } /* * deallocate an inode */ void freeino(ino) ino_t ino; { struct inodesc idesc; struct dinode *dp; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = ino; dp = ginode(ino); (void)ckinode(dp, &idesc); clearinode(dp); inodirty(); statemap[ino] = USTATE; n_files--; } Index: head/sbin/fsck_ffs/main.c =================================================================== --- head/sbin/fsck_ffs/main.c (revision 34265) +++ head/sbin/fsck_ffs/main.c (revision 34266) @@ -1,353 +1,359 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif static const char rcsid[] = - "$Id$"; + "$Id: main.c,v 1.12 1997/12/20 22:24:32 bde Exp $"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include "fsck.h" int returntosingle; static int argtoi __P((int flag, char *req, char *str, int base)); static int docheck __P((struct fstab *fsp)); static int checkfilesys __P((char *filesys, char *mntpt, long auxdata, int child)); int main __P((int argc, char *argv[])); int main(argc, argv) int argc; char *argv[]; { int ch; int ret, maxrun = 0; sync(); while ((ch = getopt(argc, argv, "dfpnNyYb:c:l:m:")) != -1) { switch (ch) { case 'p': preen++; break; case 'b': bflag = argtoi('b', "number", optarg, 10); printf("Alternate super block location: %d\n", bflag); break; case 'c': cvtlevel = argtoi('c', "conversion level", optarg, 10); break; case 'd': debug++; break; case 'f': fflag++; break; case 'l': maxrun = argtoi('l', "number", optarg, 10); break; case 'm': lfmode = argtoi('m', "mode", optarg, 8); if (lfmode &~ 07777) errx(EEXIT, "bad mode to -m: %o", lfmode); printf("** lost+found creation mode %o\n", lfmode); break; case 'n': case 'N': nflag++; yflag = 0; break; case 'y': case 'Y': yflag++; nflag = 0; break; default: errx(EEXIT, "%c option?", ch); } } argc -= optind; argv += optind; if (signal(SIGINT, SIG_IGN) != SIG_IGN) (void)signal(SIGINT, catch); if (preen) (void)signal(SIGQUIT, catchquit); if (argc) { while (argc-- > 0) (void)checkfilesys(blockcheck(*argv++), 0, 0L, 0); exit(0); } ret = checkfstab(preen, maxrun, docheck, checkfilesys); if (returntosingle) exit(2); exit(ret); } static int argtoi(flag, req, str, base) int flag; char *req, *str; int base; { char *cp; int ret; ret = (int)strtol(str, &cp, base); if (cp == str || *cp) errx(EEXIT, "-%c flag requires a %s", flag, req); return (ret); } /* * Determine whether a filesystem should be checked. */ static int docheck(fsp) register struct fstab *fsp; { if (strcmp(fsp->fs_vfstype, "ufs") || (strcmp(fsp->fs_type, FSTAB_RW) && strcmp(fsp->fs_type, FSTAB_RO)) || fsp->fs_passno == 0) return (0); return (1); } /* * Check the specified filesystem. */ /* ARGSUSED */ static int checkfilesys(filesys, mntpt, auxdata, child) char *filesys, *mntpt; long auxdata; int child; { ufs_daddr_t n_ffree, n_bfree; struct dups *dp; struct zlncnt *zlnp; int cylno, flags; if (preen && child) (void)signal(SIGQUIT, voidquit); cdevname = filesys; if (debug && preen) pwarn("starting\n"); switch (setup(filesys)) { case 0: if (preen) pfatal("CAN'T CHECK FILE SYSTEM."); return (0); case -1: pwarn("clean, %ld free ", sblock.fs_cstotal.cs_nffree + sblock.fs_frag * sblock.fs_cstotal.cs_nbfree); printf("(%d frags, %d blocks, %.1f%% fragmentation)\n", sblock.fs_cstotal.cs_nffree, sblock.fs_cstotal.cs_nbfree, sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize); return (0); } /* + * Cleared if any questions answered no. Used to decide if + * the superblock should be marked clean. + */ + resolved = 1; + /* * 1: scan inodes tallying blocks used */ if (preen == 0) { printf("** Last Mounted on %s\n", sblock.fs_fsmnt); if (hotroot) printf("** Root file system\n"); printf("** Phase 1 - Check Blocks and Sizes\n"); } pass1(); /* * 1b: locate first references to duplicates, if any */ if (duplist) { - if (preen) + if (preen || usedsoftdep) pfatal("INTERNAL ERROR: dups with -p"); printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); } /* * 2: traverse directories from root to mark all connected directories */ if (preen == 0) printf("** Phase 2 - Check Pathnames\n"); pass2(); /* * 3: scan inodes looking for disconnected directories */ if (preen == 0) printf("** Phase 3 - Check Connectivity\n"); pass3(); /* * 4: scan inodes looking for disconnected files; check reference counts */ if (preen == 0) printf("** Phase 4 - Check Reference Counts\n"); pass4(); /* * 5: check and repair resource counts in cylinder groups */ if (preen == 0) printf("** Phase 5 - Check Cyl groups\n"); pass5(); /* * print out summary statistics */ n_ffree = sblock.fs_cstotal.cs_nffree; n_bfree = sblock.fs_cstotal.cs_nbfree; pwarn("%ld files, %ld used, %ld free ", n_files, n_blks, n_ffree + sblock.fs_frag * n_bfree); printf("(%d frags, %d blocks, %.1f%% fragmentation)\n", n_ffree, n_bfree, n_ffree * 100.0 / sblock.fs_dsize); if (debug && (n_files -= maxino - ROOTINO - sblock.fs_cstotal.cs_nifree)) printf("%d files missing\n", n_files); if (debug) { n_blks += sblock.fs_ncg * (cgdmin(&sblock, 0) - cgsblock(&sblock, 0)); n_blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0); n_blks += howmany(sblock.fs_cssize, sblock.fs_fsize); if (n_blks -= maxfsblock - (n_ffree + sblock.fs_frag * n_bfree)) printf("%d blocks missing\n", n_blks); if (duplist != NULL) { printf("The following duplicate blocks remain:"); for (dp = duplist; dp; dp = dp->next) printf(" %d,", dp->dup); printf("\n"); } if (zlnhead != NULL) { printf("The following zero link count inodes remain:"); for (zlnp = zlnhead; zlnp; zlnp = zlnp->next) printf(" %u,", zlnp->zlncnt); printf("\n"); } } zlnhead = (struct zlncnt *)0; duplist = (struct dups *)0; muldup = (struct dups *)0; inocleanup(); if (fsmodified) { (void)time(&sblock.fs_time); sbdirty(); } if (cvtlevel && sblk.b_dirty) { /* * Write out the duplicate super blocks */ for (cylno = 0; cylno < sblock.fs_ncg; cylno++) bwrite(fswritefd, (char *)&sblock, fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE); } - if (!hotroot) { - ckfini(1); - } else { + if (rerun) + resolved = 0; + flags = 0; + if (hotroot) { struct statfs stfs_buf; /* * Check to see if root is mounted read-write. */ if (statfs("/", &stfs_buf) == 0) flags = stfs_buf.f_flags; - else - flags = 0; - ckfini(flags & MNT_RDONLY); + if ((flags & MNT_RDONLY) == 0) + resolved = 0; } + ckfini(resolved); free(blockmap); free(statemap); free((char *)lncntp); if (!fsmodified) return (0); if (!preen) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n"); if (rerun) printf("\n***** PLEASE RERUN FSCK *****\n"); if (hotroot) { struct ufs_args args; int ret; /* * We modified the root. Do a mount update on * it, unless it is read-write, so we can continue. */ if (flags & MNT_RDONLY) { args.fspec = 0; args.export.ex_flags = 0; args.export.ex_root = 0; flags |= MNT_UPDATE | MNT_RELOAD; ret = mount("ufs", "/", flags, &args); if (ret == 0) return (0); } if (!preen) printf("\n***** REBOOT NOW *****\n"); sync(); return (4); } return (0); } Index: head/sbin/fsck_ffs/pass1.c =================================================================== --- head/sbin/fsck_ffs/pass1.c (revision 34265) +++ head/sbin/fsck_ffs/pass1.c (revision 34266) @@ -1,322 +1,330 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)pass1.c 8.6 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include "fsck.h" static ufs_daddr_t badblk; static ufs_daddr_t dupblk; static void checkinode __P((ino_t inumber, struct inodesc *)); void pass1() { ino_t inumber; int c, i, cgd; struct inodesc idesc; /* * Set file system reserved blocks in used block map. */ for (c = 0; c < sblock.fs_ncg; c++) { cgd = cgdmin(&sblock, c); if (c == 0) { i = cgbase(&sblock, c); cgd += howmany(sblock.fs_cssize, sblock.fs_fsize); } else i = cgsblock(&sblock, c); for (; i < cgd; i++) setbmap(i); } /* * Find all allocated blocks. */ memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = pass1check; inumber = 0; n_files = n_blks = 0; resetinodebuf(); for (c = 0; c < sblock.fs_ncg; c++) { for (i = 0; i < sblock.fs_ipg; i++, inumber++) { if (inumber < ROOTINO) continue; checkinode(inumber, &idesc); } } freeinodebuf(); } static void checkinode(inumber, idesc) ino_t inumber; register struct inodesc *idesc; { register struct dinode *dp; struct zlncnt *zlnp; int ndb, j; mode_t mode; char *symbuf; dp = getnextinode(inumber); mode = dp->di_mode & IFMT; if (mode == 0) { if (memcmp(dp->di_db, zino.di_db, NDADDR * sizeof(ufs_daddr_t)) || memcmp(dp->di_ib, zino.di_ib, NIADDR * sizeof(ufs_daddr_t)) || dp->di_mode || dp->di_size) { pfatal("PARTIALLY ALLOCATED INODE I=%lu", inumber); if (reply("CLEAR") == 1) { dp = ginode(inumber); clearinode(dp); inodirty(); } } statemap[inumber] = USTATE; return; } lastino = inumber; if (/* dp->di_size < 0 || */ dp->di_size + sblock.fs_bsize - 1 < dp->di_size || (mode == IFDIR && dp->di_size > MAXDIRSIZE)) { if (debug) printf("bad size %qu:", dp->di_size); goto unknown; } if (!preen && mode == IFMT && reply("HOLD BAD BLOCK") == 1) { dp = ginode(inumber); dp->di_size = sblock.fs_fsize; dp->di_mode = IFREG|0600; inodirty(); } ndb = howmany(dp->di_size, sblock.fs_bsize); if (ndb < 0) { if (debug) printf("bad size %qu ndb %d:", dp->di_size, ndb); goto unknown; } if (mode == IFBLK || mode == IFCHR) ndb++; if (mode == IFLNK) { if (doinglevel2 && dp->di_size > 0 && dp->di_size < MAXSYMLINKLEN && dp->di_blocks != 0) { symbuf = alloca(secsize); if (bread(fsreadfd, symbuf, fsbtodb(&sblock, dp->di_db[0]), (long)secsize) != 0) errx(EEXIT, "cannot read symlink"); if (debug) { symbuf[dp->di_size] = 0; printf("convert symlink %ld(%s) of size %ld\n", inumber, symbuf, (long)dp->di_size); } dp = ginode(inumber); memmove(dp->di_shortlink, symbuf, (long)dp->di_size); dp->di_blocks = 0; inodirty(); } /* * Fake ndb value so direct/indirect block checks below * will detect any garbage after symlink string. */ if (dp->di_size < sblock.fs_maxsymlinklen || dp->di_blocks == 0) { ndb = howmany(dp->di_size, sizeof(ufs_daddr_t)); if (ndb > NDADDR) { j = ndb - NDADDR; for (ndb = 1; j > 1; j--) ndb *= NINDIR(&sblock); ndb += NDADDR; } } } for (j = ndb; j < NDADDR; j++) if (dp->di_db[j] != 0) { if (debug) printf("bad direct addr: %ld\n", dp->di_db[j]); goto unknown; } for (j = 0, ndb -= NDADDR; ndb > 0; j++) ndb /= NINDIR(&sblock); for (; j < NIADDR; j++) if (dp->di_ib[j] != 0) { if (debug) printf("bad indirect addr: %ld\n", dp->di_ib[j]); goto unknown; } if (ftypeok(dp) == 0) goto unknown; n_files++; lncntp[inumber] = dp->di_nlink; if (dp->di_nlink <= 0) { zlnp = (struct zlncnt *)malloc(sizeof *zlnp); if (zlnp == NULL) { pfatal("LINK COUNT TABLE OVERFLOW"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } } else { zlnp->zlncnt = inumber; zlnp->next = zlnhead; zlnhead = zlnp; } } if (mode == IFDIR) { if (dp->di_size == 0) statemap[inumber] = DCLEAR; else statemap[inumber] = DSTATE; cacheino(dp, inumber); } else statemap[inumber] = FSTATE; typemap[inumber] = IFTODT(mode); if (doinglevel2 && (dp->di_ouid != (u_short)-1 || dp->di_ogid != (u_short)-1)) { dp = ginode(inumber); dp->di_uid = dp->di_ouid; dp->di_ouid = -1; dp->di_gid = dp->di_ogid; dp->di_ogid = -1; inodirty(); } badblk = dupblk = 0; idesc->id_number = inumber; (void)ckinode(dp, idesc); idesc->id_entryno *= btodb(sblock.fs_fsize); if (dp->di_blocks != idesc->id_entryno) { pwarn("INCORRECT BLOCK COUNT I=%lu (%ld should be %ld)", inumber, dp->di_blocks, idesc->id_entryno); if (preen) printf(" (CORRECTED)\n"); else if (reply("CORRECT") == 0) return; dp = ginode(inumber); dp->di_blocks = idesc->id_entryno; inodirty(); } return; unknown: pfatal("UNKNOWN FILE TYPE I=%lu", inumber); statemap[inumber] = FCLEAR; if (reply("CLEAR") == 1) { statemap[inumber] = USTATE; dp = ginode(inumber); clearinode(dp); inodirty(); } } int pass1check(idesc) register struct inodesc *idesc; { int res = KEEPON; int anyout, nfrags; ufs_daddr_t blkno = idesc->id_blkno; register struct dups *dlp; struct dups *new; if ((anyout = chkrange(blkno, idesc->id_numfrags)) != 0) { blkerror(idesc->id_number, "BAD", blkno); if (badblk++ >= MAXBAD) { pwarn("EXCESSIVE BAD BLKS I=%lu", idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } } for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) { if (anyout && chkrange(blkno, 1)) { res = SKIP; } else if (!testbmap(blkno)) { n_blks++; setbmap(blkno); } else { blkerror(idesc->id_number, "DUP", blkno); if (dupblk++ >= MAXDUP) { pwarn("EXCESSIVE DUP BLKS I=%lu", idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new = (struct dups *)malloc(sizeof(struct dups)); if (new == NULL) { pfatal("DUP TABLE OVERFLOW."); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new->dup = blkno; if (muldup == 0) { duplist = muldup = new; new->next = 0; } else { new->next = muldup->next; muldup->next = new; } for (dlp = duplist; dlp != muldup; dlp = dlp->next) if (dlp->dup == blkno) break; if (dlp == muldup && dlp->dup != blkno) muldup = new; } /* * count the number of blocks found in id_entryno */ idesc->id_entryno++; } return (res); } Index: head/sbin/fsck_ffs/pass2.c =================================================================== --- head/sbin/fsck_ffs/pass2.c (revision 34265) +++ head/sbin/fsck_ffs/pass2.c (revision 34266) @@ -1,467 +1,482 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)pass2.c 8.9 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include "fsck.h" #define MINDIRSIZE (sizeof (struct dirtemplate)) static int blksort __P((const void *, const void *)); static int pass2check __P((struct inodesc *)); void pass2() { register struct dinode *dp; register struct inoinfo **inpp, *inp; struct inoinfo **inpend; struct inodesc curino; struct dinode dino; char pathbuf[MAXPATHLEN + 1]; switch (statemap[ROOTINO]) { case USTATE: pfatal("ROOT INODE UNALLOCATED"); - if (reply("ALLOCATE") == 0) + if (reply("ALLOCATE") == 0) { + ckfini(0); exit(EEXIT); + } if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; case DCLEAR: pfatal("DUPS/BAD IN ROOT INODE"); if (reply("REALLOCATE")) { freeino(ROOTINO); if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } break; case FSTATE: case FCLEAR: pfatal("ROOT INODE NOT DIRECTORY"); if (reply("REALLOCATE")) { freeino(ROOTINO); if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("FIX") == 0) + if (reply("FIX") == 0) { + ckfini(0); exit(EEXIT); + } dp = ginode(ROOTINO); dp->di_mode &= ~IFMT; dp->di_mode |= IFDIR; inodirty(); break; case DSTATE: break; default: errx(EEXIT, "BAD STATE %d FOR ROOT INODE", statemap[ROOTINO]); } statemap[ROOTINO] = DFOUND; if (newinofmt) { statemap[WINO] = FSTATE; typemap[WINO] = DT_WHT; } /* * Sort the directory list into disk block order. */ qsort((char *)inpsort, (size_t)inplast, sizeof *inpsort, blksort); /* * Check the integrity of each directory. */ memset(&curino, 0, sizeof(struct inodesc)); curino.id_type = DATA; curino.id_func = pass2check; dp = &dino; inpend = &inpsort[inplast]; for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_isize == 0) continue; if (inp->i_isize < MINDIRSIZE) { direrror(inp->i_number, "DIRECTORY TOO SHORT"); inp->i_isize = roundup(MINDIRSIZE, DIRBLKSIZ); if (reply("FIX") == 1) { dp = ginode(inp->i_number); dp->di_size = inp->i_isize; inodirty(); dp = &dino; } } else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) { getpathname(pathbuf, inp->i_number, inp->i_number); - pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d", - pathbuf, inp->i_isize, DIRBLKSIZ); + if (usedsoftdep) + pfatal("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); + else + pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); if (preen) printf(" (ADJUSTED)\n"); inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ); if (preen || reply("ADJUST") == 1) { dp = ginode(inp->i_number); dp->di_size = roundup(inp->i_isize, DIRBLKSIZ); inodirty(); dp = &dino; } } memset(&dino, 0, sizeof(struct dinode)); dino.di_mode = IFDIR; dp->di_size = inp->i_isize; memmove(&dp->di_db[0], &inp->i_blks[0], (size_t)inp->i_numblks); curino.id_number = inp->i_number; curino.id_parent = inp->i_parent; (void)ckinode(dp, &curino); } /* * Now that the parents of all directories have been found, * make another pass to verify the value of `..' */ for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_parent == 0 || inp->i_isize == 0) continue; if (statemap[inp->i_parent] == DFOUND && statemap[inp->i_number] == DSTATE) statemap[inp->i_number] = DFOUND; if (inp->i_dotdot == inp->i_parent || inp->i_dotdot == (ino_t)-1) continue; if (inp->i_dotdot == 0) { inp->i_dotdot = inp->i_parent; fileerror(inp->i_parent, inp->i_number, "MISSING '..'"); if (reply("FIX") == 0) continue; (void)makeentry(inp->i_number, inp->i_parent, ".."); lncntp[inp->i_parent]--; continue; } fileerror(inp->i_parent, inp->i_number, "BAD INODE NUMBER FOR '..'"); if (reply("FIX") == 0) continue; lncntp[inp->i_dotdot]++; lncntp[inp->i_parent]--; inp->i_dotdot = inp->i_parent; (void)changeino(inp->i_number, "..", inp->i_parent); } /* * Mark all the directories that can be found from the root. */ propagate(); } static int pass2check(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; register struct inoinfo *inp; int n, entrysize, ret = 0; struct dinode *dp; char *errmsg; struct direct proto; char namebuf[MAXPATHLEN + 1]; char pathbuf[MAXPATHLEN + 1]; /* * If converting, set directory entry type. */ if (doinglevel2 && dirp->d_ino > 0 && dirp->d_ino < maxino) { dirp->d_type = typemap[dirp->d_ino]; ret |= ALTERED; } /* * check for "." */ if (idesc->id_entryno != 0) goto chk1; if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") == 0) { if (dirp->d_ino != idesc->id_number) { direrror(idesc->id_number, "BAD INODE NUMBER FOR '.'"); dirp->d_ino = idesc->id_number; if (reply("FIX") == 1) ret |= ALTERED; } if (newinofmt && dirp->d_type != DT_DIR) { direrror(idesc->id_number, "BAD TYPE VALUE FOR '.'"); dirp->d_type = DT_DIR; if (reply("FIX") == 1) ret |= ALTERED; } goto chk1; } direrror(idesc->id_number, "MISSING '.'"); proto.d_ino = idesc->id_number; if (newinofmt) proto.d_type = DT_DIR; else proto.d_type = 0; proto.d_namlen = 1; (void)strcpy(proto.d_name, "."); # if BYTE_ORDER == LITTLE_ENDIAN if (!newinofmt) { u_char tmp; tmp = proto.d_type; proto.d_type = proto.d_namlen; proto.d_namlen = tmp; } # endif entrysize = DIRSIZ(0, &proto); if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") != 0) { pfatal("CANNOT FIX, FIRST ENTRY IN DIRECTORY CONTAINS %s\n", dirp->d_name); } else if (dirp->d_reclen < entrysize) { pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '.'\n"); } else if (dirp->d_reclen < 2 * entrysize) { proto.d_reclen = dirp->d_reclen; memmove(dirp, &proto, (size_t)entrysize); if (reply("FIX") == 1) ret |= ALTERED; } else { n = dirp->d_reclen - entrysize; proto.d_reclen = entrysize; memmove(dirp, &proto, (size_t)entrysize); idesc->id_entryno++; lncntp[dirp->d_ino]--; dirp = (struct direct *)((char *)(dirp) + entrysize); memset(dirp, 0, (size_t)n); dirp->d_reclen = n; if (reply("FIX") == 1) ret |= ALTERED; } chk1: if (idesc->id_entryno > 1) goto chk2; inp = getinoinfo(idesc->id_number); proto.d_ino = inp->i_parent; if (newinofmt) proto.d_type = DT_DIR; else proto.d_type = 0; proto.d_namlen = 2; (void)strcpy(proto.d_name, ".."); # if BYTE_ORDER == LITTLE_ENDIAN if (!newinofmt) { u_char tmp; tmp = proto.d_type; proto.d_type = proto.d_namlen; proto.d_namlen = tmp; } # endif entrysize = DIRSIZ(0, &proto); if (idesc->id_entryno == 0) { n = DIRSIZ(0, dirp); if (dirp->d_reclen < n + entrysize) goto chk2; proto.d_reclen = dirp->d_reclen - n; dirp->d_reclen = n; idesc->id_entryno++; lncntp[dirp->d_ino]--; dirp = (struct direct *)((char *)(dirp) + n); memset(dirp, 0, (size_t)proto.d_reclen); dirp->d_reclen = proto.d_reclen; } if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") == 0) { inp->i_dotdot = dirp->d_ino; if (newinofmt && dirp->d_type != DT_DIR) { direrror(idesc->id_number, "BAD TYPE VALUE FOR '..'"); dirp->d_type = DT_DIR; if (reply("FIX") == 1) ret |= ALTERED; } goto chk2; } if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") != 0) { fileerror(inp->i_parent, idesc->id_number, "MISSING '..'"); pfatal("CANNOT FIX, SECOND ENTRY IN DIRECTORY CONTAINS %s\n", dirp->d_name); inp->i_dotdot = (ino_t)-1; } else if (dirp->d_reclen < entrysize) { fileerror(inp->i_parent, idesc->id_number, "MISSING '..'"); pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '..'\n"); inp->i_dotdot = (ino_t)-1; } else if (inp->i_parent != 0) { /* * We know the parent, so fix now. */ inp->i_dotdot = inp->i_parent; fileerror(inp->i_parent, idesc->id_number, "MISSING '..'"); proto.d_reclen = dirp->d_reclen; memmove(dirp, &proto, (size_t)entrysize); if (reply("FIX") == 1) ret |= ALTERED; } idesc->id_entryno++; if (dirp->d_ino != 0) lncntp[dirp->d_ino]--; return (ret|KEEPON); chk2: if (dirp->d_ino == 0) return (ret|KEEPON); if (dirp->d_namlen <= 2 && dirp->d_name[0] == '.' && idesc->id_entryno >= 2) { if (dirp->d_namlen == 1) { direrror(idesc->id_number, "EXTRA '.' ENTRY"); dirp->d_ino = 0; if (reply("FIX") == 1) ret |= ALTERED; return (KEEPON | ret); } if (dirp->d_name[1] == '.') { direrror(idesc->id_number, "EXTRA '..' ENTRY"); dirp->d_ino = 0; if (reply("FIX") == 1) ret |= ALTERED; return (KEEPON | ret); } } idesc->id_entryno++; n = 0; if (dirp->d_ino > maxino) { fileerror(idesc->id_number, dirp->d_ino, "I OUT OF RANGE"); n = reply("REMOVE"); } else if (newinofmt && ((dirp->d_ino == WINO && dirp->d_type != DT_WHT) || (dirp->d_ino != WINO && dirp->d_type == DT_WHT))) { fileerror(idesc->id_number, dirp->d_ino, "BAD WHITEOUT ENTRY"); dirp->d_ino = WINO; dirp->d_type = DT_WHT; if (reply("FIX") == 1) ret |= ALTERED; } else { again: switch (statemap[dirp->d_ino]) { case USTATE: if (idesc->id_entryno <= 2) break; fileerror(idesc->id_number, dirp->d_ino, "UNALLOCATED"); n = reply("REMOVE"); break; case DCLEAR: case FCLEAR: if (idesc->id_entryno <= 2) break; if (statemap[dirp->d_ino] == FCLEAR) errmsg = "DUP/BAD"; - else if (!preen) + else if (!preen && !usedsoftdep) errmsg = "ZERO LENGTH DIRECTORY"; else { n = 1; break; } fileerror(idesc->id_number, dirp->d_ino, errmsg); if ((n = reply("REMOVE")) == 1) break; dp = ginode(dirp->d_ino); statemap[dirp->d_ino] = (dp->di_mode & IFMT) == IFDIR ? DSTATE : FSTATE; lncntp[dirp->d_ino] = dp->di_nlink; goto again; case DSTATE: if (statemap[idesc->id_number] == DFOUND) statemap[dirp->d_ino] = DFOUND; /* fall through */ case DFOUND: inp = getinoinfo(dirp->d_ino); if (inp->i_parent != 0 && idesc->id_entryno > 2) { getpathname(pathbuf, idesc->id_number, idesc->id_number); getpathname(namebuf, dirp->d_ino, dirp->d_ino); pwarn("%s %s %s\n", pathbuf, "IS AN EXTRANEOUS HARD LINK TO DIRECTORY", namebuf); - if (preen) - printf(" (IGNORED)\n"); + if (preen) { + printf(" (REMOVED)\n"); + n = 1; + break; + } else if ((n = reply("REMOVE")) == 1) break; } if (idesc->id_entryno > 2) inp->i_parent = idesc->id_number; /* fall through */ case FSTATE: if (newinofmt && dirp->d_type != typemap[dirp->d_ino]) { fileerror(idesc->id_number, dirp->d_ino, "BAD TYPE VALUE"); dirp->d_type = typemap[dirp->d_ino]; if (reply("FIX") == 1) ret |= ALTERED; } lncntp[dirp->d_ino]--; break; default: errx(EEXIT, "BAD STATE %d FOR INODE I=%d", statemap[dirp->d_ino], dirp->d_ino); } } if (n == 0) return (ret|KEEPON); dirp->d_ino = 0; return (ret|KEEPON|ALTERED); } /* * Routine to sort disk blocks. */ static int blksort(arg1, arg2) const void *arg1, *arg2; { return ((*(struct inoinfo **)arg1)->i_blks[0] - (*(struct inoinfo **)arg2)->i_blks[0]); } Index: head/sbin/fsck_ffs/pass5.c =================================================================== --- head/sbin/fsck_ffs/pass5.c (revision 34265) +++ head/sbin/fsck_ffs/pass5.c (revision 34266) @@ -1,345 +1,375 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)pass5.c 8.9 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include "fsck.h" void pass5() { int c, blk, frags, basesize, sumsize, mapsize, savednrpos; + int inomapsize, blkmapsize; struct fs *fs = &sblock; struct cg *cg = &cgrp; ufs_daddr_t dbase, dmax; ufs_daddr_t d; - long i, j; + long i, j, k; struct csum *cs; struct csum cstotal; struct inodesc idesc[3]; char buf[MAXBSIZE]; register struct cg *newcg = (struct cg *)buf; struct ocg *ocg = (struct ocg *)buf; statemap[WINO] = USTATE; memset(newcg, 0, (size_t)fs->fs_cgsize); newcg->cg_niblk = fs->fs_ipg; if (cvtlevel >= 3) { if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) { if (preen) pwarn("DELETING CLUSTERING MAPS\n"); if (preen || reply("DELETE CLUSTERING MAPS")) { fs->fs_contigsumsize = 0; doinglevel1 = 1; sbdirty(); } } if (fs->fs_maxcontig > 1) { char *doit = 0; if (fs->fs_contigsumsize < 1) { doit = "CREAT"; } else if (fs->fs_contigsumsize < fs->fs_maxcontig && fs->fs_contigsumsize < FS_MAXCONTIG) { doit = "EXPAND"; } if (doit) { i = fs->fs_contigsumsize; fs->fs_contigsumsize = MIN(fs->fs_maxcontig, FS_MAXCONTIG); if (CGSIZE(fs) > fs->fs_bsize) { pwarn("CANNOT %s CLUSTER MAPS\n", doit); fs->fs_contigsumsize = i; } else if (preen || reply("CREATE CLUSTER MAPS")) { if (preen) pwarn("%sING CLUSTER MAPS\n", doit); fs->fs_cgsize = fragroundup(fs, CGSIZE(fs)); doinglevel1 = 1; sbdirty(); } } } } switch ((int)fs->fs_postblformat) { case FS_42POSTBLFMT: basesize = (char *)(&ocg->cg_btot[0]) - (char *)(&ocg->cg_firstfield); sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]); mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] - (u_char *)&ocg->cg_iused[0]; + blkmapsize = howmany(fs->fs_fpg, NBBY); + inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0]; ocg->cg_magic = CG_MAGIC; savednrpos = fs->fs_nrpos; fs->fs_nrpos = 8; break; case FS_DYNAMICPOSTBLFMT: newcg->cg_btotoff = &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield); newcg->cg_boff = newcg->cg_btotoff + fs->fs_cpg * sizeof(long); newcg->cg_iusedoff = newcg->cg_boff + fs->fs_cpg * fs->fs_nrpos * sizeof(short); newcg->cg_freeoff = newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY); - if (fs->fs_contigsumsize <= 0) { - newcg->cg_nextfreeoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); - } else { - newcg->cg_clustersumoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) - + inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff; + newcg->cg_nextfreeoff = newcg->cg_freeoff + + howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); + blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff; + if (fs->fs_contigsumsize > 0) { + newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(long); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(long)); newcg->cg_clusteroff = newcg->cg_clustersumoff + (fs->fs_contigsumsize + 1) * sizeof(long); newcg->cg_nextfreeoff = newcg->cg_clusteroff + howmany(fs->fs_cpg * fs->fs_spc / NSPB(fs), NBBY); } newcg->cg_magic = CG_MAGIC; basesize = &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield); sumsize = newcg->cg_iusedoff - newcg->cg_btotoff; mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff; break; default: - sumsize = 0; /* keep lint happy */ + inomapsize = blkmapsize = sumsize = 0; /* keep lint happy */ errx(EEXIT, "UNKNOWN ROTATIONAL TABLE FORMAT %d", fs->fs_postblformat); } memset(&idesc[0], 0, sizeof idesc); for (i = 0; i < 3; i++) { idesc[i].id_type = ADDR; if (doinglevel2) idesc[i].id_fix = FIX; } memset(&cstotal, 0, sizeof(struct csum)); j = blknum(fs, fs->fs_size + fs->fs_frag - 1); for (i = fs->fs_size; i < j; i++) setbmap(i); for (c = 0; c < fs->fs_ncg; c++) { getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize); if (!cg_chkmagic(cg)) pfatal("CG %d: BAD MAGIC NUMBER\n", c); dbase = cgbase(fs, c); dmax = dbase + fs->fs_fpg; if (dmax > fs->fs_size) dmax = fs->fs_size; newcg->cg_time = cg->cg_time; newcg->cg_cgx = c; if (c == fs->fs_ncg - 1) newcg->cg_ncyl = fs->fs_ncyl % fs->fs_cpg; else newcg->cg_ncyl = fs->fs_cpg; newcg->cg_ndblk = dmax - dbase; if (fs->fs_contigsumsize > 0) newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag; newcg->cg_cs.cs_ndir = 0; newcg->cg_cs.cs_nffree = 0; newcg->cg_cs.cs_nbfree = 0; newcg->cg_cs.cs_nifree = fs->fs_ipg; if (cg->cg_rotor < newcg->cg_ndblk) newcg->cg_rotor = cg->cg_rotor; else newcg->cg_rotor = 0; if (cg->cg_frotor < newcg->cg_ndblk) newcg->cg_frotor = cg->cg_frotor; else newcg->cg_frotor = 0; if (cg->cg_irotor < newcg->cg_niblk) newcg->cg_irotor = cg->cg_irotor; else newcg->cg_irotor = 0; memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum); memset(&cg_blktot(newcg)[0], 0, (size_t)(sumsize + mapsize)); if (fs->fs_postblformat == FS_42POSTBLFMT) ocg->cg_magic = CG_MAGIC; j = fs->fs_ipg * c; for (i = 0; i < fs->fs_ipg; j++, i++) { switch (statemap[j]) { case USTATE: break; case DSTATE: case DCLEAR: case DFOUND: newcg->cg_cs.cs_ndir++; /* fall through */ case FSTATE: case FCLEAR: newcg->cg_cs.cs_nifree--; setbit(cg_inosused(newcg), i); break; default: if (j < ROOTINO) break; errx(EEXIT, "BAD STATE %d FOR INODE I=%d", statemap[j], j); } } if (c == 0) for (i = 0; i < ROOTINO; i++) { setbit(cg_inosused(newcg), i); newcg->cg_cs.cs_nifree--; } for (i = 0, d = dbase; d < dmax; d += fs->fs_frag, i += fs->fs_frag) { frags = 0; for (j = 0; j < fs->fs_frag; j++) { if (testbmap(d + j)) continue; setbit(cg_blksfree(newcg), i + j); frags++; } if (frags == fs->fs_frag) { newcg->cg_cs.cs_nbfree++; j = cbtocylno(fs, i); cg_blktot(newcg)[j]++; cg_blks(fs, newcg, j)[cbtorpos(fs, i)]++; if (fs->fs_contigsumsize > 0) setbit(cg_clustersfree(newcg), i / fs->fs_frag); } else if (frags > 0) { newcg->cg_cs.cs_nffree += frags; blk = blkmap(fs, cg_blksfree(newcg), i); ffs_fragacct(fs, blk, newcg->cg_frsum, 1); } } if (fs->fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(newcg); u_char *mapp = cg_clustersfree(newcg); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < newcg->cg_nclusterblks; i++) { if ((map & bit) != 0) { run++; } else if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; run = 0; } if ((i & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; } } cstotal.cs_nffree += newcg->cg_cs.cs_nffree; cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree; cstotal.cs_nifree += newcg->cg_cs.cs_nifree; cstotal.cs_ndir += newcg->cg_cs.cs_ndir; cs = &fs->fs_cs(fs, c); if (memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0 && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(cs, &newcg->cg_cs, sizeof *cs); sbdirty(); } if (doinglevel1) { memmove(cg, newcg, (size_t)fs->fs_cgsize); cgdirty(); continue; } - if (memcmp(cg_inosused(newcg), - cg_inosused(cg), mapsize) != 0 && - dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { - memmove(cg_inosused(cg), cg_inosused(newcg), - (size_t)mapsize); - cgdirty(); - } if ((memcmp(newcg, cg, basesize) != 0 || memcmp(&cg_blktot(newcg)[0], &cg_blktot(cg)[0], sumsize) != 0) && dofix(&idesc[2], "SUMMARY INFORMATION BAD")) { memmove(cg, newcg, (size_t)basesize); memmove(&cg_blktot(cg)[0], &cg_blktot(newcg)[0], (size_t)sumsize); + cgdirty(); + } + if (usedsoftdep) { + for (i = 0; i < inomapsize; i++) { + j = cg_inosused(newcg)[i]; + if ((cg_inosused(cg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED INODE %d MARKED FREE", + c * fs->fs_ipg + i * 8 + k); + } + } + for (i = 0; i < blkmapsize; i++) { + j = cg_blksfree(cg)[i]; + if ((cg_blksfree(newcg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED FRAG %d MARKED FREE", + c * fs->fs_fpg + i * 8 + k); + } + } + } + if (memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 && + dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { + memmove(cg_inosused(cg), cg_inosused(newcg), + (size_t)mapsize); cgdirty(); } } if (fs->fs_postblformat == FS_42POSTBLFMT) fs->fs_nrpos = savednrpos; if (memcmp(&cstotal, &fs->fs_cstotal, sizeof *cs) != 0 && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(&fs->fs_cstotal, &cstotal, sizeof *cs); fs->fs_ronly = 0; sbdirty(); } if (fs->fs_fmod != 0) { pwarn("MODIFIED FLAG SET IN SUPERBLOCK"); if (preen) printf(" (FIXED)\n"); if (preen || reply("FIX") == 1) { fs->fs_fmod = 0; sbdirty(); } } if (fs->fs_clean == 0) { pwarn("CLEAN FLAG NOT SET IN SUPERBLOCK"); if (preen) printf(" (FIXED)\n"); if (preen || reply("FIX") == 1) { fs->fs_clean = 1; sbdirty(); } } } Index: head/sbin/fsck_ffs/setup.c =================================================================== --- head/sbin/fsck_ffs/setup.c (revision 34265) +++ head/sbin/fsck_ffs/setup.c (revision 34266) @@ -1,514 +1,520 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)setup.c 8.10 (Berkeley) 5/9/95"; #endif /* not lint */ #define DKTYPENAMES #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" struct bufarea asblk; #define altsblock (*asblk.b_un.b_fs) #define POWEROF2(num) (((num) & ((num) - 1)) == 0) static void badsb __P((int listerr, char *s)); static int calcsb __P((char *dev, int devfd, struct fs *fs)); static struct disklabel *getdisklabel __P((char *s, int fd)); static int readsb __P((int listerr)); /* * Read in a superblock finding an alternate if necessary. * Return 1 if successful, 0 if unsuccessful, -1 if filesystem * is already clean (preen mode only). */ int setup(dev) char *dev; { long cg, size, asked, i, j; long skipclean, bmapsize; struct disklabel *lp; off_t sizepb; struct stat statb; struct fs proto; havesb = 0; fswritefd = -1; skipclean = preen; if (stat(dev, &statb) < 0) { printf("Can't stat %s: %s\n", dev, strerror(errno)); return (0); } if ((statb.st_mode & S_IFMT) != S_IFCHR) { pfatal("%s is not a character device", dev); if (reply("CONTINUE") == 0) return (0); } if ((fsreadfd = open(dev, O_RDONLY)) < 0) { printf("Can't open %s: %s\n", dev, strerror(errno)); return (0); } if (preen == 0) printf("** %s", dev); if (nflag || (fswritefd = open(dev, O_WRONLY)) < 0) { fswritefd = -1; if (preen) pfatal("NO WRITE ACCESS"); printf(" (NO WRITE)"); } if (preen == 0) printf("\n"); fsmodified = 0; lfdir = 0; initbarea(&sblk); initbarea(&asblk); sblk.b_un.b_buf = malloc(SBSIZE); asblk.b_un.b_buf = malloc(SBSIZE); if (sblk.b_un.b_buf == NULL || asblk.b_un.b_buf == NULL) errx(EEXIT, "cannot allocate space for superblock"); lp = getdisklabel((char *)NULL, fsreadfd); if (lp) dev_bsize = secsize = lp->d_secsize; else dev_bsize = secsize = DEV_BSIZE; /* * Read in the superblock, looking for alternates if necessary */ if (readsb(1) == 0) { skipclean = 0; if (bflag || preen || calcsb(dev, fsreadfd, &proto) == 0) return(0); if (reply("LOOK FOR ALTERNATE SUPERBLOCKS") == 0) return (0); for (cg = 0; cg < proto.fs_ncg; cg++) { bflag = fsbtodb(&proto, cgsblock(&proto, cg)); if (readsb(0) != 0) break; } if (cg >= proto.fs_ncg) { printf("%s %s\n%s %s\n%s %s\n", "SEARCH FOR ALTERNATE SUPER-BLOCK", "FAILED. YOU MUST USE THE", "-b OPTION TO FSCK TO SPECIFY THE", "LOCATION OF AN ALTERNATE", "SUPER-BLOCK TO SUPPLY NEEDED", "INFORMATION; SEE fsck(8)."); bflag = 0; return(0); } pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag); bflag = 0; } maxfsblock = sblock.fs_size; maxino = sblock.fs_ncg * sblock.fs_ipg; /* * Check and potentially fix certain fields in the super block. */ if (sblock.fs_optim != FS_OPTTIME && sblock.fs_optim != FS_OPTSPACE) { pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK"); if (reply("SET TO DEFAULT") == 1) { sblock.fs_optim = FS_OPTTIME; sbdirty(); } } if ((sblock.fs_minfree < 0 || sblock.fs_minfree > 99)) { pfatal("IMPOSSIBLE MINFREE=%d IN SUPERBLOCK", sblock.fs_minfree); if (reply("SET TO DEFAULT") == 1) { sblock.fs_minfree = 10; sbdirty(); } } if (sblock.fs_interleave < 1 || sblock.fs_interleave > sblock.fs_nsect) { pwarn("IMPOSSIBLE INTERLEAVE=%d IN SUPERBLOCK", sblock.fs_interleave); sblock.fs_interleave = 1; if (preen) printf(" (FIXED)\n"); if (preen || reply("SET TO DEFAULT") == 1) { sbdirty(); dirty(&asblk); } } if (sblock.fs_npsect < sblock.fs_nsect || sblock.fs_npsect > sblock.fs_nsect*2) { pwarn("IMPOSSIBLE NPSECT=%d IN SUPERBLOCK", sblock.fs_npsect); sblock.fs_npsect = sblock.fs_nsect; if (preen) printf(" (FIXED)\n"); if (preen || reply("SET TO DEFAULT") == 1) { sbdirty(); dirty(&asblk); } } if (sblock.fs_inodefmt >= FS_44INODEFMT) { newinofmt = 1; } else { sblock.fs_qbmask = ~sblock.fs_bmask; sblock.fs_qfmask = ~sblock.fs_fmask; newinofmt = 0; } /* * Convert to new inode format. */ if (cvtlevel >= 2 && sblock.fs_inodefmt < FS_44INODEFMT) { if (preen) pwarn("CONVERTING TO NEW INODE FORMAT\n"); else if (!reply("CONVERT TO NEW INODE FORMAT")) return(0); doinglevel2++; sblock.fs_inodefmt = FS_44INODEFMT; sizepb = sblock.fs_bsize; sblock.fs_maxfilesize = sblock.fs_bsize * NDADDR - 1; for (i = 0; i < NIADDR; i++) { sizepb *= NINDIR(&sblock); sblock.fs_maxfilesize += sizepb; } sblock.fs_maxsymlinklen = MAXSYMLINKLEN; sblock.fs_qbmask = ~sblock.fs_bmask; sblock.fs_qfmask = ~sblock.fs_fmask; sbdirty(); dirty(&asblk); } /* * Convert to new cylinder group format. */ if (cvtlevel >= 1 && sblock.fs_postblformat == FS_42POSTBLFMT) { if (preen) pwarn("CONVERTING TO NEW CYLINDER GROUP FORMAT\n"); else if (!reply("CONVERT TO NEW CYLINDER GROUP FORMAT")) return(0); doinglevel1++; sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT; sblock.fs_nrpos = 8; sblock.fs_postbloff = (char *)(&sblock.fs_opostbl[0][0]) - (char *)(&sblock.fs_firstfield); sblock.fs_rotbloff = &sblock.fs_space[0] - (u_char *)(&sblock.fs_firstfield); sblock.fs_cgsize = fragroundup(&sblock, CGSIZE(&sblock)); sbdirty(); dirty(&asblk); } if (asblk.b_dirty && !bflag) { memmove(&altsblock, &sblock, (size_t)sblock.fs_sbsize); flush(fswritefd, &asblk); } /* * read in the summary info. */ asked = 0; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { size = sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize; sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size); if (bread(fsreadfd, (char *)sblock.fs_csp[j], fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } asked++; } } /* * If we survive the above basic checks and are preening, * quit here unless forced. */ if (skipclean && sblock.fs_clean && !fflag) return (-1); /* * allocate and initialize the necessary maps */ bmapsize = roundup(howmany(maxfsblock, NBBY), sizeof(short)); blockmap = calloc((unsigned)bmapsize, sizeof (char)); if (blockmap == NULL) { printf("cannot alloc %u bytes for blockmap\n", (unsigned)bmapsize); goto badsb; } statemap = calloc((unsigned)(maxino + 1), sizeof(char)); if (statemap == NULL) { printf("cannot alloc %u bytes for statemap\n", (unsigned)(maxino + 1)); goto badsb; } typemap = calloc((unsigned)(maxino + 1), sizeof(char)); if (typemap == NULL) { printf("cannot alloc %u bytes for typemap\n", (unsigned)(maxino + 1)); goto badsb; } lncntp = (short *)calloc((unsigned)(maxino + 1), sizeof(short)); if (lncntp == NULL) { printf("cannot alloc %u bytes for lncntp\n", (unsigned)(maxino + 1) * sizeof(short)); goto badsb; } numdirs = sblock.fs_cstotal.cs_ndir; if (numdirs == 0) { printf("numdirs is zero, try using an alternate superblock\n"); goto badsb; } inplast = 0; listmax = numdirs + 10; inpsort = (struct inoinfo **)calloc((unsigned)listmax, sizeof(struct inoinfo *)); inphead = (struct inoinfo **)calloc((unsigned)numdirs, sizeof(struct inoinfo *)); if (inpsort == NULL || inphead == NULL) { printf("cannot alloc %u bytes for inphead\n", (unsigned)numdirs * sizeof(struct inoinfo *)); goto badsb; } bufinit(); + if (sblock.fs_flags & FS_DOSOFTDEP) + usedsoftdep = 1; + else + usedsoftdep = 0; return (1); badsb: ckfini(0); return (0); } /* * Read in the super block and its summary info. */ static int readsb(listerr) int listerr; { ufs_daddr_t super = bflag ? bflag : SBOFF / dev_bsize; if (bread(fsreadfd, (char *)&sblock, super, (long)SBSIZE) != 0) return (0); sblk.b_bno = super; sblk.b_size = SBSIZE; /* * run a few consistency checks of the super block */ if (sblock.fs_magic != FS_MAGIC) { badsb(listerr, "MAGIC NUMBER WRONG"); return (0); } if (sblock.fs_ncg < 1) { badsb(listerr, "NCG OUT OF RANGE"); return (0); } if (sblock.fs_cpg < 1) { badsb(listerr, "CPG OUT OF RANGE"); return (0); } if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl || (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl) { badsb(listerr, "NCYL LESS THAN NCG*CPG"); return (0); } if (sblock.fs_sbsize > SBSIZE) { badsb(listerr, "SIZE PREPOSTEROUSLY LARGE"); return (0); } /* * Compute block size that the filesystem is based on, * according to fsbtodb, and adjust superblock block number * so we can tell if this is an alternate later. */ super *= dev_bsize; dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1); sblk.b_bno = super / dev_bsize; if (bflag) { havesb = 1; return (1); } /* * Set all possible fields that could differ, then do check * of whole super block against an alternate super block. * When an alternate super-block is specified this check is skipped. */ getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1), sblock.fs_sbsize); if (asblk.b_errs) return (0); altsblock.fs_firstfield = sblock.fs_firstfield; altsblock.fs_unused_1 = sblock.fs_unused_1; altsblock.fs_time = sblock.fs_time; altsblock.fs_cstotal = sblock.fs_cstotal; altsblock.fs_cgrotor = sblock.fs_cgrotor; altsblock.fs_fmod = sblock.fs_fmod; altsblock.fs_clean = sblock.fs_clean; altsblock.fs_ronly = sblock.fs_ronly; altsblock.fs_flags = sblock.fs_flags; altsblock.fs_maxcontig = sblock.fs_maxcontig; altsblock.fs_minfree = sblock.fs_minfree; altsblock.fs_optim = sblock.fs_optim; altsblock.fs_rotdelay = sblock.fs_rotdelay; altsblock.fs_maxbpg = sblock.fs_maxbpg; memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp); altsblock.fs_maxcluster = sblock.fs_maxcluster; memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt); memmove(altsblock.fs_sparecon, sblock.fs_sparecon, sizeof sblock.fs_sparecon); /* * The following should not have to be copied. */ altsblock.fs_fsbtodb = sblock.fs_fsbtodb; altsblock.fs_interleave = sblock.fs_interleave; altsblock.fs_npsect = sblock.fs_npsect; altsblock.fs_nrpos = sblock.fs_nrpos; altsblock.fs_state = sblock.fs_state; altsblock.fs_qbmask = sblock.fs_qbmask; altsblock.fs_qfmask = sblock.fs_qfmask; altsblock.fs_state = sblock.fs_state; altsblock.fs_maxfilesize = sblock.fs_maxfilesize; if (memcmp(&sblock, &altsblock, (int)sblock.fs_sbsize)) { if (debug) { long *nlp, *olp, *endlp; printf("superblock mismatches\n"); nlp = (long *)&altsblock; olp = (long *)&sblock; endlp = olp + (sblock.fs_sbsize / sizeof *olp); for ( ; olp < endlp; olp++, nlp++) { if (*olp == *nlp) continue; printf("offset %d, original %d, alternate %d\n", olp - (long *)&sblock, *olp, *nlp); } } badsb(listerr, "VALUES IN SUPER BLOCK DISAGREE WITH THOSE IN FIRST ALTERNATE"); return (0); } havesb = 1; return (1); } static void badsb(listerr, s) int listerr; char *s; { if (!listerr) return; if (preen) printf("%s: ", cdevname); pfatal("BAD SUPER BLOCK: %s\n", s); } /* * Calculate a prototype superblock based on information in the disk label. * When done the cgsblock macro can be calculated and the fs_ncg field * can be used. Do NOT attempt to use other macros without verifying that * their needed information is available! */ static int calcsb(dev, devfd, fs) char *dev; int devfd; register struct fs *fs; { register struct disklabel *lp; register struct partition *pp; register char *cp; int i; cp = strchr(dev, '\0') - 1; if (cp == (char *)-1 || ((*cp < 'a' || *cp > 'h') && !isdigit(*cp))) { pfatal("%s: CANNOT FIGURE OUT FILE SYSTEM PARTITION\n", dev); return (0); } lp = getdisklabel(dev, devfd); if (isdigit(*cp)) pp = &lp->d_partitions[0]; else pp = &lp->d_partitions[*cp - 'a']; if (pp->p_fstype != FS_BSDFFS) { pfatal("%s: NOT LABELED AS A BSD FILE SYSTEM (%s)\n", dev, pp->p_fstype < FSMAXTYPES ? fstypenames[pp->p_fstype] : "unknown"); return (0); } if (pp->p_fsize == 0 || pp->p_frag == 0) { pfatal("%s: LABELED AS A %s FILE SYSTEM, BUT BLOCK SIZE IS 0\n", dev, fstypenames[pp->p_fstype]); return (0); } memset(fs, 0, sizeof(struct fs)); fs->fs_fsize = pp->p_fsize; fs->fs_frag = pp->p_frag; fs->fs_cpg = pp->p_cpg; fs->fs_size = pp->p_size; fs->fs_ntrak = lp->d_ntracks; fs->fs_nsect = lp->d_nsectors; fs->fs_spc = lp->d_secpercyl; fs->fs_nspf = fs->fs_fsize / lp->d_secsize; fs->fs_sblkno = roundup( howmany(lp->d_bbsize + lp->d_sbsize, fs->fs_fsize), fs->fs_frag); fs->fs_cgmask = 0xffffffff; for (i = fs->fs_ntrak; i > 1; i >>= 1) fs->fs_cgmask <<= 1; if (!POWEROF2(fs->fs_ntrak)) fs->fs_cgmask <<= 1; fs->fs_cgoffset = roundup( howmany(fs->fs_nsect, NSPF(fs)), fs->fs_frag); fs->fs_fpg = (fs->fs_cpg * fs->fs_spc) / NSPF(fs); fs->fs_ncg = howmany(fs->fs_size / fs->fs_spc, fs->fs_cpg); for (fs->fs_fsbtodb = 0, i = NSPF(fs); i > 1; i >>= 1) fs->fs_fsbtodb++; dev_bsize = lp->d_secsize; return (1); } static struct disklabel * getdisklabel(s, fd) char *s; int fd; { static struct disklabel lab; if (ioctl(fd, DIOCGDINFO, (char *)&lab) < 0) { if (s == NULL) return ((struct disklabel *)NULL); pwarn("ioctl (GCINFO): %s\n", strerror(errno)); errx(EEXIT, "%s: can't read disk label", s); } return (&lab); } Index: head/sbin/fsck_ffs/utilities.c =================================================================== --- head/sbin/fsck_ffs/utilities.c (revision 34265) +++ head/sbin/fsck_ffs/utilities.c (revision 34266) @@ -1,625 +1,648 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include "fsck.h" long diskreads, totalreads; /* Disk cache statistics */ static void rwerror __P((char *mesg, ufs_daddr_t blk)); int ftypeok(dp) struct dinode *dp; { switch (dp->di_mode & IFMT) { case IFDIR: case IFREG: case IFBLK: case IFCHR: case IFLNK: case IFSOCK: case IFIFO: return (1); default: if (debug) printf("bad file type 0%o\n", dp->di_mode); return (0); } } int reply(question) char *question; { int persevere; char c; if (preen) pfatal("INTERNAL ERROR: GOT TO reply()"); persevere = !strcmp(question, "CONTINUE"); printf("\n"); if (!persevere && (nflag || fswritefd < 0)) { printf("%s? no\n\n", question); + resolved = 0; return (0); } if (yflag || (persevere && nflag)) { printf("%s? yes\n\n", question); return (1); } do { printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); - while (c != '\n' && getc(stdin) != '\n') - if (feof(stdin)) + while (c != '\n' && getc(stdin) != '\n') { + if (feof(stdin)) { + resolved = 0; return (0); + } + } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); + resolved = 0; return (0); } /* * Malloc buffers and set up cache. */ void bufinit() { register struct bufarea *bp; long bufcnt, i; char *bufp; pbp = pdirbp = (struct bufarea *)0; bufp = malloc((unsigned int)sblock.fs_bsize); if (bufp == 0) errx(EEXIT, "cannot allocate buffer pool"); cgblk.b_un.b_buf = bufp; initbarea(&cgblk); bufhead.b_next = bufhead.b_prev = &bufhead; bufcnt = MAXBUFSPACE / sblock.fs_bsize; if (bufcnt < MINBUFS) bufcnt = MINBUFS; for (i = 0; i < bufcnt; i++) { bp = (struct bufarea *)malloc(sizeof(struct bufarea)); bufp = malloc((unsigned int)sblock.fs_bsize); if (bp == NULL || bufp == NULL) { if (i >= MINBUFS) break; errx(EEXIT, "cannot allocate buffer pool"); } bp->b_un.b_buf = bufp; bp->b_prev = &bufhead; bp->b_next = bufhead.b_next; bufhead.b_next->b_prev = bp; bufhead.b_next = bp; initbarea(bp); } bufhead.b_size = i; /* save number of buffers */ } /* * Manage a cache of directory blocks. */ struct bufarea * getdatablk(blkno, size) ufs_daddr_t blkno; long size; { register struct bufarea *bp; for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next) if (bp->b_bno == fsbtodb(&sblock, blkno)) goto foundit; for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev) if ((bp->b_flags & B_INUSE) == 0) break; if (bp == &bufhead) errx(EEXIT, "deadlocked buffer pool"); getblk(bp, blkno, size); /* fall through */ foundit: totalreads++; bp->b_prev->b_next = bp->b_next; bp->b_next->b_prev = bp->b_prev; bp->b_prev = &bufhead; bp->b_next = bufhead.b_next; bufhead.b_next->b_prev = bp; bufhead.b_next = bp; bp->b_flags |= B_INUSE; return (bp); } void getblk(bp, blk, size) register struct bufarea *bp; ufs_daddr_t blk; long size; { ufs_daddr_t dblk; dblk = fsbtodb(&sblock, blk); if (bp->b_bno != dblk) { flush(fswritefd, bp); diskreads++; bp->b_errs = bread(fsreadfd, bp->b_un.b_buf, dblk, size); bp->b_bno = dblk; bp->b_size = size; } } void flush(fd, bp) int fd; register struct bufarea *bp; { register int i, j; if (!bp->b_dirty) return; if (bp->b_errs != 0) pfatal("WRITING %sZERO'ED BLOCK %d TO DISK\n", (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", bp->b_bno); bp->b_dirty = 0; bp->b_errs = 0; bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size); if (bp != &sblk) return; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { bwrite(fswritefd, (char *)sblock.fs_csp[j], fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize); } } static void rwerror(mesg, blk) char *mesg; ufs_daddr_t blk; { if (preen == 0) printf("\n"); pfatal("CANNOT %s: BLK %ld", mesg, blk); if (reply("CONTINUE") == 0) exit(EEXIT); } void ckfini(markclean) int markclean; { register struct bufarea *bp, *nbp; int ofsmodified, cnt = 0; if (fswritefd < 0) { (void)close(fsreadfd); return; } flush(fswritefd, &sblk); if (havesb && sblk.b_bno != SBOFF / dev_bsize && !preen && reply("UPDATE STANDARD SUPERBLOCK")) { sblk.b_bno = SBOFF / dev_bsize; sbdirty(); flush(fswritefd, &sblk); } flush(fswritefd, &cgblk); free(cgblk.b_un.b_buf); for (bp = bufhead.b_prev; bp && bp != &bufhead; bp = nbp) { cnt++; flush(fswritefd, bp); nbp = bp->b_prev; free(bp->b_un.b_buf); free((char *)bp); } if (bufhead.b_size != cnt) errx(EEXIT, "Panic: lost %d buffers", bufhead.b_size - cnt); pbp = pdirbp = (struct bufarea *)0; if (markclean && sblock.fs_clean == 0) { sblock.fs_clean = 1; sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; if (!preen) printf("\n***** FILE SYSTEM MARKED CLEAN *****\n"); } if (debug) printf("cache missed %ld of %ld (%d%%)\n", diskreads, totalreads, (int)(diskreads * 100 / totalreads)); (void)close(fsreadfd); (void)close(fswritefd); } int bread(fd, buf, blk, size) int fd; char *buf; ufs_daddr_t blk; long size; { char *cp; int i, errs; off_t offset; offset = blk; offset *= dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); else if (read(fd, buf, (int)size) == size) return (0); rwerror("READ", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); errs = 0; memset(buf, 0, (size_t)size); printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { if (read(fd, cp, (int)secsize) != secsize) { (void)lseek(fd, offset + i + secsize, 0); if (secsize != dev_bsize && dev_bsize != 1) printf(" %ld (%ld),", (blk * dev_bsize + i) / secsize, blk + i / dev_bsize); else printf(" %ld,", blk + i / dev_bsize); errs++; } } printf("\n"); return (errs); } void bwrite(fd, buf, blk, size) int fd; char *buf; ufs_daddr_t blk; long size; { int i; char *cp; off_t offset; if (fd < 0) return; offset = blk; offset *= dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); else if (write(fd, buf, (int)size) == size) { fsmodified = 1; return; } rwerror("WRITE", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) if (write(fd, cp, (int)dev_bsize) != dev_bsize) { (void)lseek(fd, offset + i + dev_bsize, 0); printf(" %ld,", blk + i / dev_bsize); } printf("\n"); return; } /* * allocate a data block with the specified number of fragments */ ufs_daddr_t allocblk(frags) long frags; { - register int i, j, k; + int i, j, k, cg, baseblk; + struct cg *cgp = &cgrp; if (frags <= 0 || frags > sblock.fs_frag) return (0); for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) { for (j = 0; j <= sblock.fs_frag - frags; j++) { if (testbmap(i + j)) continue; for (k = 1; k < frags; k++) if (testbmap(i + j + k)) break; if (k < frags) { j += k; continue; } - for (k = 0; k < frags; k++) + cg = dtog(&sblock, i + j); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + baseblk = dtogd(&sblock, i + j); + for (k = 0; k < frags; k++) { setbmap(i + j + k); + clrbit(cg_blksfree(cgp), baseblk + k); + } n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; + cgdirty(); return (i + j); } } return (0); } /* * Free a previously allocated block */ void freeblk(blkno, frags) ufs_daddr_t blkno; long frags; { struct inodesc idesc; idesc.id_blkno = blkno; idesc.id_numfrags = frags; (void)pass4check(&idesc); } /* * Find a pathname */ void getpathname(namebuf, curdir, ino) char *namebuf; ino_t curdir, ino; { int len; register char *cp; struct inodesc idesc; static int busy = 0; if (curdir == ino && ino == ROOTINO) { (void)strcpy(namebuf, "/"); return; } if (busy || (statemap[curdir] != DSTATE && statemap[curdir] != DFOUND)) { (void)strcpy(namebuf, "?"); return; } busy = 1; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_fix = IGNORE; cp = &namebuf[MAXPATHLEN - 1]; *cp = '\0'; if (curdir != ino) { idesc.id_parent = curdir; goto namelookup; } while (ino != ROOTINO) { idesc.id_number = ino; idesc.id_func = findino; idesc.id_name = ".."; if ((ckinode(ginode(ino), &idesc) & FOUND) == 0) break; namelookup: idesc.id_number = idesc.id_parent; idesc.id_parent = ino; idesc.id_func = findname; idesc.id_name = namebuf; if ((ckinode(ginode(idesc.id_number), &idesc)&FOUND) == 0) break; len = strlen(namebuf); cp -= len; memmove(cp, namebuf, (size_t)len); *--cp = '/'; if (cp < &namebuf[MAXNAMLEN]) break; ino = idesc.id_number; } busy = 0; if (ino != ROOTINO) *--cp = '?'; memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); } void catch(sig) int sig; { if (!doinglevel2) ckfini(0); exit(12); } /* * When preening, allow a single quit to signal * a special exit after filesystem checks complete * so that reboot sequence may be interrupted. */ void catchquit(sig) int sig; { printf("returning to single-user after filesystem check\n"); returntosingle = 1; (void)signal(SIGQUIT, SIG_DFL); } /* * Ignore a single quit signal; wait and flush just in case. * Used by child processes in preen. */ void voidquit(sig) int sig; { sleep(1); (void)signal(SIGQUIT, SIG_IGN); (void)signal(SIGQUIT, SIG_DFL); } /* * determine whether an inode should be fixed. */ int dofix(idesc, msg) register struct inodesc *idesc; char *msg; { switch (idesc->id_fix) { case DONTKNOW: if (idesc->id_type == DATA) direrror(idesc->id_number, msg); else pwarn(msg); if (preen) { printf(" (SALVAGED)\n"); idesc->id_fix = FIX; return (ALTERED); } if (reply("SALVAGE") == 0) { idesc->id_fix = NOFIX; return (0); } idesc->id_fix = FIX; return (ALTERED); case FIX: return (ALTERED); case NOFIX: case IGNORE: return (0); default: errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); } /* NOTREACHED */ return (0); } #if __STDC__ #include #else #include #endif /* * An unexpected inconsistency occured. - * Die if preening, otherwise just print message and continue. + * Die if preening or filesystem is running with soft dependency protocol, + * otherwise just print message and continue. */ void #if __STDC__ pfatal(const char *fmt, ...) #else pfatal(fmt, va_alist) char *fmt; va_dcl #endif { va_list ap; #if __STDC__ va_start(ap, fmt); #else va_start(ap); #endif if (!preen) { (void)vfprintf(stderr, fmt, ap); va_end(ap); + if (usedsoftdep) + (void)fprintf(stderr, + "\nUNEXPECTED SOFTDEP INCONSISTENCY\n"); return; } (void)fprintf(stderr, "%s: ", cdevname); (void)vfprintf(stderr, fmt, ap); (void)fprintf(stderr, - "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", - cdevname); + "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", + cdevname, usedsoftdep ? " SOFTDEP " : " "); + ckfini(0); exit(EEXIT); } /* - * Pwarn just prints a message when not preening, - * or a warning (preceded by filename) when preening. + * Pwarn just prints a message when not preening or running soft dependency + * protocol, or a warning (preceded by filename) when preening. */ void #if __STDC__ pwarn(const char *fmt, ...) #else pwarn(fmt, va_alist) char *fmt; va_dcl #endif { va_list ap; #if __STDC__ va_start(ap, fmt); #else va_start(ap); #endif if (preen) (void)fprintf(stderr, "%s: ", cdevname); (void)vfprintf(stderr, fmt, ap); va_end(ap); } /* * Stub for routines from kernel. */ void #if __STDC__ panic(const char *fmt, ...) #else panic(fmt, va_alist) char *fmt; va_dcl #endif { va_list ap; #if __STDC__ va_start(ap, fmt); #else va_start(ap); #endif pfatal("INTERNAL INCONSISTENCY:"); (void)vfprintf(stderr, fmt, ap); va_end(ap); exit(EEXIT); } Index: head/sbin/fsck_ifs/dir.c =================================================================== --- head/sbin/fsck_ifs/dir.c (revision 34265) +++ head/sbin/fsck_ifs/dir.c (revision 34266) @@ -1,734 +1,737 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)dir.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include "fsck.h" char *lfname = "lost+found"; int lfmode = 01777; struct dirtemplate emptydir = { 0, DIRBLKSIZ }; struct dirtemplate dirhead = { 0, 12, DT_DIR, 1, ".", 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." }; struct odirtemplate odirhead = { 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." }; static int chgino __P((struct inodesc *)); static int dircheck __P((struct inodesc *, struct direct *)); static int expanddir __P((struct dinode *dp, char *name)); static void freedir __P((ino_t ino, ino_t parent)); static struct direct *fsck_readdir __P((struct inodesc *)); static struct bufarea *getdirblk __P((ufs_daddr_t blkno, long size)); static int lftempname __P((char *bufp, ino_t ino)); static int mkentry __P((struct inodesc *)); /* * Propagate connected state through the tree. */ void propagate() { register struct inoinfo **inpp, *inp; struct inoinfo **inpend; long change; inpend = &inpsort[inplast]; do { change = 0; for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_parent == 0) continue; if (statemap[inp->i_parent] == DFOUND && statemap[inp->i_number] == DSTATE) { statemap[inp->i_number] = DFOUND; change++; } } } while (change > 0); } /* * Scan each entry in a directory block. */ int dirscan(idesc) register struct inodesc *idesc; { register struct direct *dp; register struct bufarea *bp; int dsize, n; long blksiz; char dbuf[DIRBLKSIZ]; if (idesc->id_type != DATA) errx(EEXIT, "wrong type to dirscan %d", idesc->id_type); if (idesc->id_entryno == 0 && (idesc->id_filesize & (DIRBLKSIZ - 1)) != 0) idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ); blksiz = idesc->id_numfrags * sblock.fs_fsize; if (chkrange(idesc->id_blkno, idesc->id_numfrags)) { idesc->id_filesize -= blksiz; return (SKIP); } idesc->id_loc = 0; for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) { dsize = dp->d_reclen; memmove(dbuf, dp, (size_t)dsize); # if (BYTE_ORDER == LITTLE_ENDIAN) if (!newinofmt) { struct direct *tdp = (struct direct *)dbuf; u_char tmp; tmp = tdp->d_namlen; tdp->d_namlen = tdp->d_type; tdp->d_type = tmp; } # endif idesc->id_dirp = (struct direct *)dbuf; if ((n = (*idesc->id_func)(idesc)) & ALTERED) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (!newinofmt && !doinglevel2) { struct direct *tdp; u_char tmp; tdp = (struct direct *)dbuf; tmp = tdp->d_namlen; tdp->d_namlen = tdp->d_type; tdp->d_type = tmp; } # endif bp = getdirblk(idesc->id_blkno, blksiz); memmove(bp->b_un.b_buf + idesc->id_loc - dsize, dbuf, (size_t)dsize); dirty(bp); sbdirty(); } if (n & STOP) return (n); } return (idesc->id_filesize > 0 ? KEEPON : STOP); } /* * get next entry in a directory. */ static struct direct * fsck_readdir(idesc) register struct inodesc *idesc; { register struct direct *dp, *ndp; register struct bufarea *bp; long size, blksiz, fix, dploc; blksiz = idesc->id_numfrags * sblock.fs_fsize; bp = getdirblk(idesc->id_blkno, blksiz); if (idesc->id_loc % DIRBLKSIZ == 0 && idesc->id_filesize > 0 && idesc->id_loc < blksiz) { dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); if (dircheck(idesc, dp)) goto dpok; if (idesc->id_fix == IGNORE) return (0); fix = dofix(idesc, "DIRECTORY CORRUPTED"); bp = getdirblk(idesc->id_blkno, blksiz); dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); dp->d_reclen = DIRBLKSIZ; dp->d_ino = 0; dp->d_type = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; if (fix) dirty(bp); idesc->id_loc += DIRBLKSIZ; idesc->id_filesize -= DIRBLKSIZ; return (dp); } dpok: if (idesc->id_filesize <= 0 || idesc->id_loc >= blksiz) return NULL; dploc = idesc->id_loc; dp = (struct direct *)(bp->b_un.b_buf + dploc); idesc->id_loc += dp->d_reclen; idesc->id_filesize -= dp->d_reclen; if ((idesc->id_loc % DIRBLKSIZ) == 0) return (dp); ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); if (idesc->id_loc < blksiz && idesc->id_filesize > 0 && dircheck(idesc, ndp) == 0) { size = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); idesc->id_loc += size; idesc->id_filesize -= size; if (idesc->id_fix == IGNORE) return (0); fix = dofix(idesc, "DIRECTORY CORRUPTED"); bp = getdirblk(idesc->id_blkno, blksiz); dp = (struct direct *)(bp->b_un.b_buf + dploc); dp->d_reclen += size; if (fix) dirty(bp); } return (dp); } /* * Verify that a directory entry is valid. * This is a superset of the checks made in the kernel. */ static int dircheck(idesc, dp) struct inodesc *idesc; register struct direct *dp; { register int size; register char *cp; u_char namlen, type; int spaceleft; spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); if (dp->d_ino >= maxino || dp->d_reclen == 0 || dp->d_reclen > spaceleft || (dp->d_reclen & 0x3) != 0) return (0); if (dp->d_ino == 0) return (1); size = DIRSIZ(!newinofmt, dp); # if (BYTE_ORDER == LITTLE_ENDIAN) if (!newinofmt) { type = dp->d_namlen; namlen = dp->d_type; } else { namlen = dp->d_namlen; type = dp->d_type; } # else namlen = dp->d_namlen; type = dp->d_type; # endif if (dp->d_reclen < size || idesc->id_filesize < size || namlen > MAXNAMLEN || type > 15) return (0); for (cp = dp->d_name, size = 0; size < namlen; size++) if (*cp == '\0' || (*cp++ == '/')) return (0); if (*cp != '\0') return (0); return (1); } void direrror(ino, errmesg) ino_t ino; char *errmesg; { fileerror(ino, ino, errmesg); } void fileerror(cwd, ino, errmesg) ino_t cwd, ino; char *errmesg; { register struct dinode *dp; char pathbuf[MAXPATHLEN + 1]; pwarn("%s ", errmesg); pinode(ino); printf("\n"); getpathname(pathbuf, cwd, ino); if (ino < ROOTINO || ino > maxino) { pfatal("NAME=%s\n", pathbuf); return; } dp = ginode(ino); if (ftypeok(dp)) pfatal("%s=%s\n", (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE", pathbuf); else pfatal("NAME=%s\n", pathbuf); } void adjust(idesc, lcnt) register struct inodesc *idesc; int lcnt; { register struct dinode *dp; dp = ginode(idesc->id_number); if (dp->di_nlink == lcnt) { if (linkup(idesc->id_number, (ino_t)0) == 0) clri(idesc, "UNREF", 0); } else { pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname : ((dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE")); pinode(idesc->id_number); printf(" COUNT %d SHOULD BE %d", dp->di_nlink, dp->di_nlink - lcnt); - if (preen) { + if (preen || usedsoftdep) { if (lcnt < 0) { printf("\n"); pfatal("LINK COUNT INCREASING"); } - printf(" (ADJUSTED)\n"); + if (preen) + printf(" (ADJUSTED)\n"); } if (preen || reply("ADJUST") == 1) { dp->di_nlink -= lcnt; inodirty(); } } } static int mkentry(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; struct direct newent; int newlen, oldlen; newent.d_namlen = strlen(idesc->id_name); newlen = DIRSIZ(0, &newent); if (dirp->d_ino != 0) oldlen = DIRSIZ(0, dirp); else oldlen = 0; if (dirp->d_reclen - oldlen < newlen) return (KEEPON); newent.d_reclen = dirp->d_reclen - oldlen; dirp->d_reclen = oldlen; dirp = (struct direct *)(((char *)dirp) + oldlen); dirp->d_ino = idesc->id_parent; /* ino to be entered is in id_parent */ dirp->d_reclen = newent.d_reclen; if (newinofmt) dirp->d_type = typemap[idesc->id_parent]; else dirp->d_type = 0; dirp->d_namlen = newent.d_namlen; memmove(dirp->d_name, idesc->id_name, (size_t)newent.d_namlen + 1); # if (BYTE_ORDER == LITTLE_ENDIAN) /* * If the entry was split, dirscan() will only reverse the byte * order of the original entry, and not the new one, before * writing it back out. So, we reverse the byte order here if * necessary. */ if (oldlen != 0 && !newinofmt && !doinglevel2) { u_char tmp; tmp = dirp->d_namlen; dirp->d_namlen = dirp->d_type; dirp->d_type = tmp; } # endif return (ALTERED|STOP); } static int chgino(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; if (memcmp(dirp->d_name, idesc->id_name, (int)dirp->d_namlen + 1)) return (KEEPON); dirp->d_ino = idesc->id_parent; if (newinofmt) dirp->d_type = typemap[idesc->id_parent]; else dirp->d_type = 0; return (ALTERED|STOP); } int linkup(orphan, parentdir) ino_t orphan; ino_t parentdir; { register struct dinode *dp; int lostdir; ino_t oldlfdir; struct inodesc idesc; char tempname[BUFSIZ]; memset(&idesc, 0, sizeof(struct inodesc)); dp = ginode(orphan); lostdir = (dp->di_mode & IFMT) == IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); pinode(orphan); - if (preen && dp->di_size == 0) + if ((preen || usedsoftdep) && dp->di_size == 0) return (0); if (preen) printf(" (RECONNECTED)\n"); else if (reply("RECONNECT") == 0) return (0); + if (parentdir != 0) + lncntp[parentdir]++; if (lfdir == 0) { dp = ginode(ROOTINO); idesc.id_name = lfname; idesc.id_type = DATA; idesc.id_func = findino; idesc.id_number = ROOTINO; if ((ckinode(dp, &idesc) & FOUND) != 0) { lfdir = idesc.id_parent; } else { pwarn("NO lost+found DIRECTORY"); if (preen || reply("CREATE")) { lfdir = allocdir(ROOTINO, (ino_t)0, lfmode); if (lfdir != 0) { if (makeentry(ROOTINO, lfdir, lfname) != 0) { if (preen) printf(" (CREATED)\n"); } else { freedir(lfdir, ROOTINO); lfdir = 0; if (preen) printf("\n"); } } } } if (lfdir == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY"); printf("\n\n"); return (0); } } dp = ginode(lfdir); if ((dp->di_mode & IFMT) != IFDIR) { pfatal("lost+found IS NOT A DIRECTORY"); if (reply("REALLOCATE") == 0) return (0); oldlfdir = lfdir; if ((lfdir = allocdir(ROOTINO, (ino_t)0, lfmode)) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } if ((changeino(ROOTINO, lfname, lfdir) & ALTERED) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } inodirty(); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = oldlfdir; adjust(&idesc, lncntp[oldlfdir] + 1); lncntp[oldlfdir] = 0; dp = ginode(lfdir); } if (statemap[lfdir] != DFOUND) { pfatal("SORRY. NO lost+found DIRECTORY\n\n"); return (0); } (void)lftempname(tempname, orphan); if (makeentry(lfdir, orphan, tempname) == 0) { pfatal("SORRY. NO SPACE IN lost+found DIRECTORY"); printf("\n\n"); return (0); } lncntp[orphan]--; if (lostdir) { if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 && parentdir != (ino_t)-1) (void)makeentry(orphan, lfdir, ".."); dp = ginode(lfdir); dp->di_nlink++; inodirty(); lncntp[lfdir]++; pwarn("DIR I=%lu CONNECTED. ", orphan); if (parentdir != (ino_t)-1) { printf("PARENT WAS I=%lu\n", parentdir); /* * The parent directory, because of the ordering * guarantees, has had the link count incremented * for the child, but no entry was made. This * fixes the parent link count so that fsck does * not need to be rerun. */ lncntp[parentdir]++; } if (preen == 0) printf("\n"); } return (1); } /* * fix an entry in a directory. */ int changeino(dir, name, newnum) ino_t dir; char *name; ino_t newnum; { struct inodesc idesc; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_func = chgino; idesc.id_number = dir; idesc.id_fix = DONTKNOW; idesc.id_name = name; idesc.id_parent = newnum; /* new value for name */ return (ckinode(ginode(dir), &idesc)); } /* * make an entry in a directory */ int makeentry(parent, ino, name) ino_t parent, ino; char *name; { struct dinode *dp; struct inodesc idesc; char pathbuf[MAXPATHLEN + 1]; if (parent < ROOTINO || parent >= maxino || ino < ROOTINO || ino >= maxino) return (0); memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_func = mkentry; idesc.id_number = parent; idesc.id_parent = ino; /* this is the inode to enter */ idesc.id_fix = DONTKNOW; idesc.id_name = name; dp = ginode(parent); if (dp->di_size % DIRBLKSIZ) { dp->di_size = roundup(dp->di_size, DIRBLKSIZ); inodirty(); } if ((ckinode(dp, &idesc) & ALTERED) != 0) return (1); getpathname(pathbuf, parent, parent); dp = ginode(parent); if (expanddir(dp, pathbuf) == 0) return (0); return (ckinode(dp, &idesc) & ALTERED); } /* * Attempt to expand the size of a directory */ static int expanddir(dp, name) register struct dinode *dp; char *name; { ufs_daddr_t lastbn, newblk; register struct bufarea *bp; char *cp, firstblk[DIRBLKSIZ]; lastbn = lblkno(&sblock, dp->di_size); if (lastbn >= NDADDR - 1 || dp->di_db[lastbn] == 0 || dp->di_size == 0) return (0); if ((newblk = allocblk(sblock.fs_frag)) == 0) return (0); dp->di_db[lastbn + 1] = dp->di_db[lastbn]; dp->di_db[lastbn] = newblk; dp->di_size += sblock.fs_bsize; dp->di_blocks += btodb(sblock.fs_bsize); bp = getdirblk(dp->di_db[lastbn + 1], (long)dblksize(&sblock, dp, lastbn + 1)); if (bp->b_errs) goto bad; memmove(firstblk, bp->b_un.b_buf, DIRBLKSIZ); bp = getdirblk(newblk, sblock.fs_bsize); if (bp->b_errs) goto bad; memmove(bp->b_un.b_buf, firstblk, DIRBLKSIZ); for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; cp < &bp->b_un.b_buf[sblock.fs_bsize]; cp += DIRBLKSIZ) memmove(cp, &emptydir, sizeof emptydir); dirty(bp); bp = getdirblk(dp->di_db[lastbn + 1], (long)dblksize(&sblock, dp, lastbn + 1)); if (bp->b_errs) goto bad; memmove(bp->b_un.b_buf, &emptydir, sizeof emptydir); pwarn("NO SPACE LEFT IN %s", name); if (preen) printf(" (EXPANDED)\n"); else if (reply("EXPAND") == 0) goto bad; dirty(bp); inodirty(); return (1); bad: dp->di_db[lastbn] = dp->di_db[lastbn + 1]; dp->di_db[lastbn + 1] = 0; dp->di_size -= sblock.fs_bsize; dp->di_blocks -= btodb(sblock.fs_bsize); freeblk(newblk, sblock.fs_frag); return (0); } /* * allocate a new directory */ ino_t allocdir(parent, request, mode) ino_t parent, request; int mode; { ino_t ino; char *cp; struct dinode *dp; register struct bufarea *bp; struct dirtemplate *dirp; ino = allocino(request, IFDIR|mode); if (newinofmt) dirp = &dirhead; else dirp = (struct dirtemplate *)&odirhead; dirp->dot_ino = ino; dirp->dotdot_ino = parent; dp = ginode(ino); bp = getdirblk(dp->di_db[0], sblock.fs_fsize); if (bp->b_errs) { freeino(ino); return (0); } memmove(bp->b_un.b_buf, dirp, sizeof(struct dirtemplate)); for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; cp < &bp->b_un.b_buf[sblock.fs_fsize]; cp += DIRBLKSIZ) memmove(cp, &emptydir, sizeof emptydir); dirty(bp); dp->di_nlink = 2; inodirty(); if (ino == ROOTINO) { lncntp[ino] = dp->di_nlink; cacheino(dp, ino); return(ino); } if (statemap[parent] != DSTATE && statemap[parent] != DFOUND) { freeino(ino); return (0); } cacheino(dp, ino); statemap[ino] = statemap[parent]; if (statemap[ino] == DSTATE) { lncntp[ino] = dp->di_nlink; lncntp[parent]++; } dp = ginode(parent); dp->di_nlink++; inodirty(); return (ino); } /* * free a directory inode */ static void freedir(ino, parent) ino_t ino, parent; { struct dinode *dp; if (ino != parent) { dp = ginode(parent); dp->di_nlink--; inodirty(); } freeino(ino); } /* * generate a temporary name for the lost+found directory. */ static int lftempname(bufp, ino) char *bufp; ino_t ino; { register ino_t in; register char *cp; int namlen; cp = bufp + 2; for (in = maxino; in > 0; in /= 10) cp++; *--cp = 0; namlen = cp - bufp; in = ino; while (cp > bufp) { *--cp = (in % 10) + '0'; in /= 10; } *cp = '#'; return (namlen); } /* * Get a directory block. * Insure that it is held until another is requested. */ static struct bufarea * getdirblk(blkno, size) ufs_daddr_t blkno; long size; { if (pdirbp != 0) pdirbp->b_flags &= ~B_INUSE; pdirbp = getdatablk(blkno, size); return (pdirbp); } Index: head/sbin/fsck_ifs/fsck.h =================================================================== --- head/sbin/fsck_ifs/fsck.h (revision 34265) +++ head/sbin/fsck_ifs/fsck.h (revision 34266) @@ -1,281 +1,283 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fsck.h 8.4 (Berkeley) 5/9/95 */ #include #include #include #define MAXDUP 10 /* limit on dup blks (per inode) */ #define MAXBAD 10 /* limit on bad blks (per inode) */ #define MAXBUFSPACE 40*1024 /* maximum space to allocate to buffers */ #define INOBUFSIZE 56*1024 /* size of buffer to read inodes in pass1 */ #ifndef BUFSIZ #define BUFSIZ 1024 #endif #define USTATE 01 /* inode not allocated */ #define FSTATE 02 /* inode is file */ #define DSTATE 03 /* inode is directory */ #define DFOUND 04 /* directory found during descent */ #define DCLEAR 05 /* directory is to be cleared */ #define FCLEAR 06 /* file is to be cleared */ /* * buffer cache structure. */ struct bufarea { struct bufarea *b_next; /* free list queue */ struct bufarea *b_prev; /* free list queue */ ufs_daddr_t b_bno; int b_size; int b_errs; int b_flags; union { char *b_buf; /* buffer space */ ufs_daddr_t *b_indir; /* indirect block */ struct fs *b_fs; /* super block */ struct cg *b_cg; /* cylinder group */ struct dinode *b_dinode; /* inode block */ } b_un; char b_dirty; }; #define B_INUSE 1 #define MINBUFS 5 /* minimum number of buffers required */ struct bufarea bufhead; /* head of list of other blks in filesys */ struct bufarea sblk; /* file system superblock */ struct bufarea cgblk; /* cylinder group blocks */ struct bufarea *pdirbp; /* current directory contents */ struct bufarea *pbp; /* current inode block */ #define dirty(bp) (bp)->b_dirty = 1 #define initbarea(bp) \ (bp)->b_dirty = 0; \ (bp)->b_bno = (ufs_daddr_t)-1; \ (bp)->b_flags = 0; #define sbdirty() sblk.b_dirty = 1 #define cgdirty() cgblk.b_dirty = 1 #define sblock (*sblk.b_un.b_fs) #define cgrp (*cgblk.b_un.b_cg) enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE}; struct inodesc { enum fixstate id_fix; /* policy on fixing errors */ int (*id_func)(); /* function to be applied to blocks of inode */ ino_t id_number; /* inode number described */ ino_t id_parent; /* for DATA nodes, their parent */ ufs_daddr_t id_blkno; /* current block number being examined */ int id_numfrags; /* number of frags contained in block */ quad_t id_filesize; /* for DATA nodes, the size of the directory */ int id_loc; /* for DATA nodes, current location in dir */ int id_entryno; /* for DATA nodes, current entry number */ struct direct *id_dirp; /* for DATA nodes, ptr to current entry */ char *id_name; /* for DATA nodes, name to find or enter */ char id_type; /* type of descriptor, DATA or ADDR */ }; /* file types */ #define DATA 1 #define ADDR 2 /* * Linked list of duplicate blocks. * * The list is composed of two parts. The first part of the * list (from duplist through the node pointed to by muldup) * contains a single copy of each duplicate block that has been * found. The second part of the list (from muldup to the end) * contains duplicate blocks that have been found more than once. * To check if a block has been found as a duplicate it is only * necessary to search from duplist through muldup. To find the * total number of times that a block has been found as a duplicate * the entire list must be searched for occurences of the block * in question. The following diagram shows a sample list where * w (found twice), x (found once), y (found three times), and z * (found once) are duplicate block numbers: * * w -> y -> x -> z -> y -> w -> y * ^ ^ * | | * duplist muldup */ struct dups { struct dups *next; ufs_daddr_t dup; }; struct dups *duplist; /* head of dup list */ struct dups *muldup; /* end of unique duplicate dup block numbers */ /* * Linked list of inodes with zero link counts. */ struct zlncnt { struct zlncnt *next; ino_t zlncnt; }; struct zlncnt *zlnhead; /* head of zero link count list */ /* * Inode cache data structures. */ struct inoinfo { struct inoinfo *i_nexthash; /* next entry in hash chain */ ino_t i_number; /* inode number of this entry */ ino_t i_parent; /* inode number of parent */ ino_t i_dotdot; /* inode number of `..' */ size_t i_isize; /* size of inode */ u_int i_numblks; /* size of block array in bytes */ ufs_daddr_t i_blks[1]; /* actually longer */ } **inphead, **inpsort; long numdirs, listmax, inplast; char *cdevname; /* name of device being checked */ long dev_bsize; /* computed value of DEV_BSIZE */ long secsize; /* actual disk sector size */ char fflag; /* force fs check (ignore clean flag) */ char nflag; /* assume a no response */ char yflag; /* assume a yes response */ int bflag; /* location of alternate super block */ int debug; /* output debugging info */ int cvtlevel; /* convert to newer file system format */ int doinglevel1; /* converting to new cylinder group format */ int doinglevel2; /* converting to new inode format */ int newinofmt; /* filesystem has new inode format */ +char usedsoftdep; /* just fix soft dependency inconsistencies */ +char resolved; /* cleared if unresolved changes => not clean */ char preen; /* just fix normal inconsistencies */ char hotroot; /* checking root device */ char havesb; /* superblock has been read */ int fsmodified; /* 1 => write done to file system */ int fsreadfd; /* file descriptor for reading file system */ int fswritefd; /* file descriptor for writing file system */ int returntosingle; /* return to single user mode */ int rerun; /* rerun fsck. Only used in non-preen mode */ ufs_daddr_t maxfsblock; /* number of blocks in the file system */ char *blockmap; /* ptr to primary blk allocation map */ ino_t maxino; /* number of inodes in file system */ ino_t lastino; /* last inode in use */ char *statemap; /* ptr to inode state table */ u_char *typemap; /* ptr to inode type table */ short *lncntp; /* ptr to link count table */ ino_t lfdir; /* lost & found directory inode number */ char *lfname; /* lost & found directory name */ int lfmode; /* lost & found directory creation mode */ ufs_daddr_t n_blks; /* number of blocks in use */ ufs_daddr_t n_files; /* number of files in use */ #define clearinode(dp) (*(dp) = zino) struct dinode zino; #define setbmap(blkno) setbit(blockmap, blkno) #define testbmap(blkno) isset(blockmap, blkno) #define clrbmap(blkno) clrbit(blockmap, blkno) #define STOP 0x01 #define SKIP 0x02 #define KEEPON 0x04 #define ALTERED 0x08 #define FOUND 0x10 #define EEXIT 8 /* Standard error exit. */ struct fstab; void adjust __P((struct inodesc *, int lcnt)); ufs_daddr_t allocblk __P((long frags)); ino_t allocdir __P((ino_t parent, ino_t request, int mode)); ino_t allocino __P((ino_t request, int type)); void blkerror __P((ino_t ino, char *type, ufs_daddr_t blk)); char *blockcheck __P((char *name)); int bread __P((int fd, char *buf, ufs_daddr_t blk, long size)); void bufinit __P((void)); void bwrite __P((int fd, char *buf, ufs_daddr_t blk, long size)); void cacheino __P((struct dinode *dp, ino_t inumber)); void catch __P((int)); void catchquit __P((int)); int changeino __P((ino_t dir, char *name, ino_t newnum)); int checkfstab __P((int preen, int maxrun, int (*docheck)(struct fstab *), int (*chkit)(char *, char *, long, int))); int chkrange __P((ufs_daddr_t blk, int cnt)); void ckfini __P((int markclean)); int ckinode __P((struct dinode *dp, struct inodesc *)); void clri __P((struct inodesc *, char *type, int flag)); void direrror __P((ino_t ino, char *errmesg)); int dirscan __P((struct inodesc *)); int dofix __P((struct inodesc *, char *msg)); void ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t)); void ffs_fragacct __P((struct fs *, int, int32_t [], int)); int ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t)); void ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t)); void fileerror __P((ino_t cwd, ino_t ino, char *errmesg)); int findino __P((struct inodesc *)); int findname __P((struct inodesc *)); void flush __P((int fd, struct bufarea *bp)); void freeblk __P((ufs_daddr_t blkno, long frags)); void freeino __P((ino_t ino)); void freeinodebuf __P((void)); int ftypeok __P((struct dinode *dp)); void getblk __P((struct bufarea *bp, ufs_daddr_t blk, long size)); struct bufarea *getdatablk __P((ufs_daddr_t blkno, long size)); struct inoinfo *getinoinfo __P((ino_t inumber)); struct dinode *getnextinode __P((ino_t inumber)); void getpathname __P((char *namebuf, ino_t curdir, ino_t ino)); struct dinode *ginode __P((ino_t inumber)); void inocleanup __P((void)); void inodirty __P((void)); int linkup __P((ino_t orphan, ino_t parentdir)); int makeentry __P((ino_t parent, ino_t ino, char *name)); void panic __P((const char *fmt, ...)); void pass1 __P((void)); void pass1b __P((void)); int pass1check __P((struct inodesc *)); void pass2 __P((void)); void pass3 __P((void)); void pass4 __P((void)); int pass4check __P((struct inodesc *)); void pass5 __P((void)); void pfatal __P((const char *fmt, ...)); void pinode __P((ino_t ino)); void propagate __P((void)); void pwarn __P((const char *fmt, ...)); int reply __P((char *question)); void resetinodebuf __P((void)); int setup __P((char *dev)); void voidquit __P((int)); Index: head/sbin/fsck_ifs/inode.c =================================================================== --- head/sbin/fsck_ifs/inode.c (revision 34265) +++ head/sbin/fsck_ifs/inode.c (revision 34266) @@ -1,621 +1,632 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)inode.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include "fsck.h" static ino_t startinum; static int iblock __P((struct inodesc *, long ilevel, quad_t isize)); int ckinode(dp, idesc) struct dinode *dp; register struct inodesc *idesc; { ufs_daddr_t *ap; long ret, n, ndb, offset; struct dinode dino; quad_t remsize, sizepb; mode_t mode; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_fix != IGNORE) idesc->id_fix = DONTKNOW; idesc->id_entryno = 0; idesc->id_filesize = dp->di_size; mode = dp->di_mode & IFMT; if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && (dp->di_size < sblock.fs_maxsymlinklen || dp->di_blocks == 0))) return (KEEPON); dino = *dp; ndb = howmany(dino.di_size, sblock.fs_bsize); for (ap = &dino.di_db[0]; ap < &dino.di_db[NDADDR]; ap++) { if (--ndb == 0 && (offset = blkoff(&sblock, dino.di_size)) != 0) idesc->id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc->id_numfrags = sblock.fs_frag; if (*ap == 0) { if (idesc->id_type == DATA && ndb >= 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); dp->di_size = (ap - &dino.di_db[0]) * sblock.fs_bsize; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); } } continue; } idesc->id_blkno = *ap; if (idesc->id_type == ADDR) ret = (*idesc->id_func)(idesc); else ret = dirscan(idesc); if (ret & STOP) return (ret); } idesc->id_numfrags = sblock.fs_frag; remsize = dino.di_size - sblock.fs_bsize * NDADDR; sizepb = sblock.fs_bsize; for (ap = &dino.di_ib[0], n = 1; n <= NIADDR; ap++, n++) { if (*ap) { idesc->id_blkno = *ap; ret = iblock(idesc, n, remsize); if (ret & STOP) return (ret); } else { if (idesc->id_type == DATA && remsize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); dp->di_size -= remsize; remsize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); break; } } } sizepb *= NINDIR(&sblock); remsize -= sizepb; } return (KEEPON); } static int iblock(idesc, ilevel, isize) struct inodesc *idesc; long ilevel; quad_t isize; { ufs_daddr_t *ap; ufs_daddr_t *aplim; struct bufarea *bp; int i, n, (*func)(), nif; quad_t sizepb; char buf[BUFSIZ]; char pathbuf[MAXPATHLEN + 1]; struct dinode *dp; if (idesc->id_type == ADDR) { func = idesc->id_func; if (((n = (*func)(idesc)) & KEEPON) == 0) return (n); } else func = dirscan; if (chkrange(idesc->id_blkno, idesc->id_numfrags)) return (SKIP); bp = getdatablk(idesc->id_blkno, sblock.fs_bsize); ilevel--; for (sizepb = sblock.fs_bsize, i = 0; i < ilevel; i++) sizepb *= NINDIR(&sblock); nif = howmany(isize , sizepb); if (nif > NINDIR(&sblock)) nif = NINDIR(&sblock); if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) { aplim = &bp->b_un.b_indir[NINDIR(&sblock)]; for (ap = &bp->b_un.b_indir[nif]; ap < aplim; ap++) { if (*ap == 0) continue; (void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu", idesc->id_number); if (dofix(idesc, buf)) { *ap = 0; dirty(bp); } } flush(fswritefd, bp); } aplim = &bp->b_un.b_indir[nif]; for (ap = bp->b_un.b_indir; ap < aplim; ap++) { if (*ap) { idesc->id_blkno = *ap; if (ilevel == 0) n = (*func)(idesc); else n = iblock(idesc, ilevel, isize); if (n & STOP) { bp->b_flags &= ~B_INUSE; return (n); } } else { if (idesc->id_type == DATA && isize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); dp->di_size -= isize; isize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); bp->b_flags &= ~B_INUSE; return(STOP); } } } isize -= sizepb; } bp->b_flags &= ~B_INUSE; return (KEEPON); } /* * Check that a block in a legal block number. * Return 0 if in range, 1 if out of range. */ int chkrange(blk, cnt) ufs_daddr_t blk; int cnt; { register int c; if (blk < 0 || blk >= maxfsblock || cnt < 0 || cnt > maxfsblock - blk) return (1); c = dtog(&sblock, blk); if (blk < cgdmin(&sblock, c)) { if ((blk + cnt) > cgsblock(&sblock, c)) { if (debug) { printf("blk %ld < cgdmin %ld;", blk, cgdmin(&sblock, c)); printf(" blk + cnt %ld > cgsbase %ld\n", blk + cnt, cgsblock(&sblock, c)); } return (1); } } else { if ((blk + cnt) > cgbase(&sblock, c+1)) { if (debug) { printf("blk %ld >= cgdmin %ld;", blk, cgdmin(&sblock, c)); printf(" blk + cnt %ld > sblock.fs_fpg %ld\n", blk+cnt, sblock.fs_fpg); } return (1); } } return (0); } /* * General purpose interface for reading inodes. */ struct dinode * ginode(inumber) ino_t inumber; { ufs_daddr_t iblk; if (inumber < ROOTINO || inumber > maxino) errx(EEXIT, "bad inode number %d to ginode", inumber); if (startinum == 0 || inumber < startinum || inumber >= startinum + INOPB(&sblock)) { iblk = ino_to_fsba(&sblock, inumber); if (pbp != 0) pbp->b_flags &= ~B_INUSE; pbp = getdatablk(iblk, sblock.fs_bsize); startinum = (inumber / INOPB(&sblock)) * INOPB(&sblock); } return (&pbp->b_un.b_dinode[inumber % INOPB(&sblock)]); } /* * Special purpose version of ginode used to optimize first pass * over all the inodes in numerical order. */ ino_t nextino, lastinum; long readcnt, readpercg, fullcnt, inobufsize, partialcnt, partialsize; struct dinode *inodebuf; struct dinode * getnextinode(inumber) ino_t inumber; { long size; ufs_daddr_t dblk; static struct dinode *dp; if (inumber != nextino++ || inumber > maxino) errx(EEXIT, "bad inode number %d to nextinode", inumber); if (inumber >= lastinum) { readcnt++; dblk = fsbtodb(&sblock, ino_to_fsba(&sblock, lastinum)); if (readcnt % readpercg == 0) { size = partialsize; lastinum += partialcnt; } else { size = inobufsize; lastinum += fullcnt; } (void)bread(fsreadfd, (char *)inodebuf, dblk, size); /* ??? */ dp = inodebuf; } return (dp++); } void resetinodebuf() { startinum = 0; nextino = 0; lastinum = 0; readcnt = 0; inobufsize = blkroundup(&sblock, INOBUFSIZE); fullcnt = inobufsize / sizeof(struct dinode); readpercg = sblock.fs_ipg / fullcnt; partialcnt = sblock.fs_ipg % fullcnt; partialsize = partialcnt * sizeof(struct dinode); if (partialcnt != 0) { readpercg++; } else { partialcnt = fullcnt; partialsize = inobufsize; } if (inodebuf == NULL && (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL) errx(EEXIT, "Cannot allocate space for inode buffer"); while (nextino < ROOTINO) (void)getnextinode(nextino); } void freeinodebuf() { if (inodebuf != NULL) free((char *)inodebuf); inodebuf = NULL; } /* * Routines to maintain information about directory inodes. * This is built during the first pass and used during the * second and third passes. * * Enter inodes into the cache. */ void cacheino(dp, inumber) register struct dinode *dp; ino_t inumber; { register struct inoinfo *inp; struct inoinfo **inpp; unsigned int blks; blks = howmany(dp->di_size, sblock.fs_bsize); if (blks > NDADDR) blks = NDADDR + NIADDR; inp = (struct inoinfo *) malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs_daddr_t)); if (inp == NULL) return; inpp = &inphead[inumber % numdirs]; inp->i_nexthash = *inpp; *inpp = inp; if (inumber == ROOTINO) inp->i_parent = ROOTINO; else inp->i_parent = (ino_t)0; inp->i_dotdot = (ino_t)0; inp->i_number = inumber; inp->i_isize = dp->di_size; inp->i_numblks = blks * sizeof(ufs_daddr_t); memmove(&inp->i_blks[0], &dp->di_db[0], (size_t)inp->i_numblks); if (inplast == listmax) { listmax += 100; inpsort = (struct inoinfo **)realloc((char *)inpsort, (unsigned)listmax * sizeof(struct inoinfo *)); if (inpsort == NULL) errx(EEXIT, "cannot increase directory list"); } inpsort[inplast++] = inp; } /* * Look up an inode cache structure. */ struct inoinfo * getinoinfo(inumber) ino_t inumber; { register struct inoinfo *inp; for (inp = inphead[inumber % numdirs]; inp; inp = inp->i_nexthash) { if (inp->i_number != inumber) continue; return (inp); } errx(EEXIT, "cannot find inode %d", inumber); return ((struct inoinfo *)0); } /* * Clean up all the inode cache structure. */ void inocleanup() { register struct inoinfo **inpp; if (inphead == NULL) return; for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) free((char *)(*inpp)); free((char *)inphead); free((char *)inpsort); inphead = inpsort = NULL; } void inodirty() { dirty(pbp); } void clri(idesc, type, flag) register struct inodesc *idesc; char *type; int flag; { register struct dinode *dp; dp = ginode(idesc->id_number); if (flag == 1) { pwarn("%s %s", type, (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE"); pinode(idesc->id_number); } if (preen || reply("CLEAR") == 1) { if (preen) printf(" (CLEARED)\n"); n_files--; (void)ckinode(dp, idesc); clearinode(dp); statemap[idesc->id_number] = USTATE; inodirty(); } } int findname(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent) return (KEEPON); memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1); return (STOP|FOUND); } int findino(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; if (dirp->d_ino == 0) return (KEEPON); if (strcmp(dirp->d_name, idesc->id_name) == 0 && dirp->d_ino >= ROOTINO && dirp->d_ino <= maxino) { idesc->id_parent = dirp->d_ino; return (STOP|FOUND); } return (KEEPON); } void pinode(ino) ino_t ino; { register struct dinode *dp; register char *p; struct passwd *pw; time_t t; printf(" I=%lu ", ino); if (ino < ROOTINO || ino > maxino) return; dp = ginode(ino); printf(" OWNER="); if ((pw = getpwuid((int)dp->di_uid)) != 0) printf("%s ", pw->pw_name); else printf("%u ", (unsigned)dp->di_uid); printf("MODE=%o\n", dp->di_mode); if (preen) printf("%s: ", cdevname); printf("SIZE=%qu ", dp->di_size); t = dp->di_mtime; p = ctime(&t); printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]); } void blkerror(ino, type, blk) ino_t ino; char *type; ufs_daddr_t blk; { pfatal("%ld %s I=%lu", blk, type, ino); printf("\n"); switch (statemap[ino]) { case FSTATE: statemap[ino] = FCLEAR; return; case DSTATE: statemap[ino] = DCLEAR; return; case FCLEAR: case DCLEAR: return; default: errx(EEXIT, "BAD STATE %d TO BLKERR", statemap[ino]); /* NOTREACHED */ } } /* * allocate an unused inode */ ino_t allocino(request, type) ino_t request; int type; { register ino_t ino; register struct dinode *dp; + struct cg *cgp = &cgrp; + int cg; if (request == 0) request = ROOTINO; else if (statemap[request] != USTATE) return (0); for (ino = request; ino < maxino; ino++) if (statemap[ino] == USTATE) break; if (ino == maxino) return (0); + cg = ino_to_cg(&sblock, ino); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + setbit(cg_inosused(cgp), ino % sblock.fs_ipg); + cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: statemap[ino] = DSTATE; + cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: statemap[ino] = FSTATE; break; default: return (0); } + cgdirty(); dp = ginode(ino); dp->di_db[0] = allocblk((long)1); if (dp->di_db[0] == 0) { statemap[ino] = USTATE; return (0); } + dp->di_flags = 0; dp->di_mode = type; dp->di_atime = time(NULL); dp->di_mtime = dp->di_ctime = dp->di_atime; dp->di_size = sblock.fs_fsize; dp->di_blocks = btodb(sblock.fs_fsize); n_files++; inodirty(); if (newinofmt) typemap[ino] = IFTODT(type); return (ino); } /* * deallocate an inode */ void freeino(ino) ino_t ino; { struct inodesc idesc; struct dinode *dp; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = ino; dp = ginode(ino); (void)ckinode(dp, &idesc); clearinode(dp); inodirty(); statemap[ino] = USTATE; n_files--; } Index: head/sbin/fsck_ifs/main.c =================================================================== --- head/sbin/fsck_ifs/main.c (revision 34265) +++ head/sbin/fsck_ifs/main.c (revision 34266) @@ -1,353 +1,359 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif static const char rcsid[] = - "$Id$"; + "$Id: main.c,v 1.12 1997/12/20 22:24:32 bde Exp $"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include "fsck.h" int returntosingle; static int argtoi __P((int flag, char *req, char *str, int base)); static int docheck __P((struct fstab *fsp)); static int checkfilesys __P((char *filesys, char *mntpt, long auxdata, int child)); int main __P((int argc, char *argv[])); int main(argc, argv) int argc; char *argv[]; { int ch; int ret, maxrun = 0; sync(); while ((ch = getopt(argc, argv, "dfpnNyYb:c:l:m:")) != -1) { switch (ch) { case 'p': preen++; break; case 'b': bflag = argtoi('b', "number", optarg, 10); printf("Alternate super block location: %d\n", bflag); break; case 'c': cvtlevel = argtoi('c', "conversion level", optarg, 10); break; case 'd': debug++; break; case 'f': fflag++; break; case 'l': maxrun = argtoi('l', "number", optarg, 10); break; case 'm': lfmode = argtoi('m', "mode", optarg, 8); if (lfmode &~ 07777) errx(EEXIT, "bad mode to -m: %o", lfmode); printf("** lost+found creation mode %o\n", lfmode); break; case 'n': case 'N': nflag++; yflag = 0; break; case 'y': case 'Y': yflag++; nflag = 0; break; default: errx(EEXIT, "%c option?", ch); } } argc -= optind; argv += optind; if (signal(SIGINT, SIG_IGN) != SIG_IGN) (void)signal(SIGINT, catch); if (preen) (void)signal(SIGQUIT, catchquit); if (argc) { while (argc-- > 0) (void)checkfilesys(blockcheck(*argv++), 0, 0L, 0); exit(0); } ret = checkfstab(preen, maxrun, docheck, checkfilesys); if (returntosingle) exit(2); exit(ret); } static int argtoi(flag, req, str, base) int flag; char *req, *str; int base; { char *cp; int ret; ret = (int)strtol(str, &cp, base); if (cp == str || *cp) errx(EEXIT, "-%c flag requires a %s", flag, req); return (ret); } /* * Determine whether a filesystem should be checked. */ static int docheck(fsp) register struct fstab *fsp; { if (strcmp(fsp->fs_vfstype, "ufs") || (strcmp(fsp->fs_type, FSTAB_RW) && strcmp(fsp->fs_type, FSTAB_RO)) || fsp->fs_passno == 0) return (0); return (1); } /* * Check the specified filesystem. */ /* ARGSUSED */ static int checkfilesys(filesys, mntpt, auxdata, child) char *filesys, *mntpt; long auxdata; int child; { ufs_daddr_t n_ffree, n_bfree; struct dups *dp; struct zlncnt *zlnp; int cylno, flags; if (preen && child) (void)signal(SIGQUIT, voidquit); cdevname = filesys; if (debug && preen) pwarn("starting\n"); switch (setup(filesys)) { case 0: if (preen) pfatal("CAN'T CHECK FILE SYSTEM."); return (0); case -1: pwarn("clean, %ld free ", sblock.fs_cstotal.cs_nffree + sblock.fs_frag * sblock.fs_cstotal.cs_nbfree); printf("(%d frags, %d blocks, %.1f%% fragmentation)\n", sblock.fs_cstotal.cs_nffree, sblock.fs_cstotal.cs_nbfree, sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize); return (0); } /* + * Cleared if any questions answered no. Used to decide if + * the superblock should be marked clean. + */ + resolved = 1; + /* * 1: scan inodes tallying blocks used */ if (preen == 0) { printf("** Last Mounted on %s\n", sblock.fs_fsmnt); if (hotroot) printf("** Root file system\n"); printf("** Phase 1 - Check Blocks and Sizes\n"); } pass1(); /* * 1b: locate first references to duplicates, if any */ if (duplist) { - if (preen) + if (preen || usedsoftdep) pfatal("INTERNAL ERROR: dups with -p"); printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); } /* * 2: traverse directories from root to mark all connected directories */ if (preen == 0) printf("** Phase 2 - Check Pathnames\n"); pass2(); /* * 3: scan inodes looking for disconnected directories */ if (preen == 0) printf("** Phase 3 - Check Connectivity\n"); pass3(); /* * 4: scan inodes looking for disconnected files; check reference counts */ if (preen == 0) printf("** Phase 4 - Check Reference Counts\n"); pass4(); /* * 5: check and repair resource counts in cylinder groups */ if (preen == 0) printf("** Phase 5 - Check Cyl groups\n"); pass5(); /* * print out summary statistics */ n_ffree = sblock.fs_cstotal.cs_nffree; n_bfree = sblock.fs_cstotal.cs_nbfree; pwarn("%ld files, %ld used, %ld free ", n_files, n_blks, n_ffree + sblock.fs_frag * n_bfree); printf("(%d frags, %d blocks, %.1f%% fragmentation)\n", n_ffree, n_bfree, n_ffree * 100.0 / sblock.fs_dsize); if (debug && (n_files -= maxino - ROOTINO - sblock.fs_cstotal.cs_nifree)) printf("%d files missing\n", n_files); if (debug) { n_blks += sblock.fs_ncg * (cgdmin(&sblock, 0) - cgsblock(&sblock, 0)); n_blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0); n_blks += howmany(sblock.fs_cssize, sblock.fs_fsize); if (n_blks -= maxfsblock - (n_ffree + sblock.fs_frag * n_bfree)) printf("%d blocks missing\n", n_blks); if (duplist != NULL) { printf("The following duplicate blocks remain:"); for (dp = duplist; dp; dp = dp->next) printf(" %d,", dp->dup); printf("\n"); } if (zlnhead != NULL) { printf("The following zero link count inodes remain:"); for (zlnp = zlnhead; zlnp; zlnp = zlnp->next) printf(" %u,", zlnp->zlncnt); printf("\n"); } } zlnhead = (struct zlncnt *)0; duplist = (struct dups *)0; muldup = (struct dups *)0; inocleanup(); if (fsmodified) { (void)time(&sblock.fs_time); sbdirty(); } if (cvtlevel && sblk.b_dirty) { /* * Write out the duplicate super blocks */ for (cylno = 0; cylno < sblock.fs_ncg; cylno++) bwrite(fswritefd, (char *)&sblock, fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE); } - if (!hotroot) { - ckfini(1); - } else { + if (rerun) + resolved = 0; + flags = 0; + if (hotroot) { struct statfs stfs_buf; /* * Check to see if root is mounted read-write. */ if (statfs("/", &stfs_buf) == 0) flags = stfs_buf.f_flags; - else - flags = 0; - ckfini(flags & MNT_RDONLY); + if ((flags & MNT_RDONLY) == 0) + resolved = 0; } + ckfini(resolved); free(blockmap); free(statemap); free((char *)lncntp); if (!fsmodified) return (0); if (!preen) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n"); if (rerun) printf("\n***** PLEASE RERUN FSCK *****\n"); if (hotroot) { struct ufs_args args; int ret; /* * We modified the root. Do a mount update on * it, unless it is read-write, so we can continue. */ if (flags & MNT_RDONLY) { args.fspec = 0; args.export.ex_flags = 0; args.export.ex_root = 0; flags |= MNT_UPDATE | MNT_RELOAD; ret = mount("ufs", "/", flags, &args); if (ret == 0) return (0); } if (!preen) printf("\n***** REBOOT NOW *****\n"); sync(); return (4); } return (0); } Index: head/sbin/fsck_ifs/pass1.c =================================================================== --- head/sbin/fsck_ifs/pass1.c (revision 34265) +++ head/sbin/fsck_ifs/pass1.c (revision 34266) @@ -1,322 +1,330 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)pass1.c 8.6 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include "fsck.h" static ufs_daddr_t badblk; static ufs_daddr_t dupblk; static void checkinode __P((ino_t inumber, struct inodesc *)); void pass1() { ino_t inumber; int c, i, cgd; struct inodesc idesc; /* * Set file system reserved blocks in used block map. */ for (c = 0; c < sblock.fs_ncg; c++) { cgd = cgdmin(&sblock, c); if (c == 0) { i = cgbase(&sblock, c); cgd += howmany(sblock.fs_cssize, sblock.fs_fsize); } else i = cgsblock(&sblock, c); for (; i < cgd; i++) setbmap(i); } /* * Find all allocated blocks. */ memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = pass1check; inumber = 0; n_files = n_blks = 0; resetinodebuf(); for (c = 0; c < sblock.fs_ncg; c++) { for (i = 0; i < sblock.fs_ipg; i++, inumber++) { if (inumber < ROOTINO) continue; checkinode(inumber, &idesc); } } freeinodebuf(); } static void checkinode(inumber, idesc) ino_t inumber; register struct inodesc *idesc; { register struct dinode *dp; struct zlncnt *zlnp; int ndb, j; mode_t mode; char *symbuf; dp = getnextinode(inumber); mode = dp->di_mode & IFMT; if (mode == 0) { if (memcmp(dp->di_db, zino.di_db, NDADDR * sizeof(ufs_daddr_t)) || memcmp(dp->di_ib, zino.di_ib, NIADDR * sizeof(ufs_daddr_t)) || dp->di_mode || dp->di_size) { pfatal("PARTIALLY ALLOCATED INODE I=%lu", inumber); if (reply("CLEAR") == 1) { dp = ginode(inumber); clearinode(dp); inodirty(); } } statemap[inumber] = USTATE; return; } lastino = inumber; if (/* dp->di_size < 0 || */ dp->di_size + sblock.fs_bsize - 1 < dp->di_size || (mode == IFDIR && dp->di_size > MAXDIRSIZE)) { if (debug) printf("bad size %qu:", dp->di_size); goto unknown; } if (!preen && mode == IFMT && reply("HOLD BAD BLOCK") == 1) { dp = ginode(inumber); dp->di_size = sblock.fs_fsize; dp->di_mode = IFREG|0600; inodirty(); } ndb = howmany(dp->di_size, sblock.fs_bsize); if (ndb < 0) { if (debug) printf("bad size %qu ndb %d:", dp->di_size, ndb); goto unknown; } if (mode == IFBLK || mode == IFCHR) ndb++; if (mode == IFLNK) { if (doinglevel2 && dp->di_size > 0 && dp->di_size < MAXSYMLINKLEN && dp->di_blocks != 0) { symbuf = alloca(secsize); if (bread(fsreadfd, symbuf, fsbtodb(&sblock, dp->di_db[0]), (long)secsize) != 0) errx(EEXIT, "cannot read symlink"); if (debug) { symbuf[dp->di_size] = 0; printf("convert symlink %ld(%s) of size %ld\n", inumber, symbuf, (long)dp->di_size); } dp = ginode(inumber); memmove(dp->di_shortlink, symbuf, (long)dp->di_size); dp->di_blocks = 0; inodirty(); } /* * Fake ndb value so direct/indirect block checks below * will detect any garbage after symlink string. */ if (dp->di_size < sblock.fs_maxsymlinklen || dp->di_blocks == 0) { ndb = howmany(dp->di_size, sizeof(ufs_daddr_t)); if (ndb > NDADDR) { j = ndb - NDADDR; for (ndb = 1; j > 1; j--) ndb *= NINDIR(&sblock); ndb += NDADDR; } } } for (j = ndb; j < NDADDR; j++) if (dp->di_db[j] != 0) { if (debug) printf("bad direct addr: %ld\n", dp->di_db[j]); goto unknown; } for (j = 0, ndb -= NDADDR; ndb > 0; j++) ndb /= NINDIR(&sblock); for (; j < NIADDR; j++) if (dp->di_ib[j] != 0) { if (debug) printf("bad indirect addr: %ld\n", dp->di_ib[j]); goto unknown; } if (ftypeok(dp) == 0) goto unknown; n_files++; lncntp[inumber] = dp->di_nlink; if (dp->di_nlink <= 0) { zlnp = (struct zlncnt *)malloc(sizeof *zlnp); if (zlnp == NULL) { pfatal("LINK COUNT TABLE OVERFLOW"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } } else { zlnp->zlncnt = inumber; zlnp->next = zlnhead; zlnhead = zlnp; } } if (mode == IFDIR) { if (dp->di_size == 0) statemap[inumber] = DCLEAR; else statemap[inumber] = DSTATE; cacheino(dp, inumber); } else statemap[inumber] = FSTATE; typemap[inumber] = IFTODT(mode); if (doinglevel2 && (dp->di_ouid != (u_short)-1 || dp->di_ogid != (u_short)-1)) { dp = ginode(inumber); dp->di_uid = dp->di_ouid; dp->di_ouid = -1; dp->di_gid = dp->di_ogid; dp->di_ogid = -1; inodirty(); } badblk = dupblk = 0; idesc->id_number = inumber; (void)ckinode(dp, idesc); idesc->id_entryno *= btodb(sblock.fs_fsize); if (dp->di_blocks != idesc->id_entryno) { pwarn("INCORRECT BLOCK COUNT I=%lu (%ld should be %ld)", inumber, dp->di_blocks, idesc->id_entryno); if (preen) printf(" (CORRECTED)\n"); else if (reply("CORRECT") == 0) return; dp = ginode(inumber); dp->di_blocks = idesc->id_entryno; inodirty(); } return; unknown: pfatal("UNKNOWN FILE TYPE I=%lu", inumber); statemap[inumber] = FCLEAR; if (reply("CLEAR") == 1) { statemap[inumber] = USTATE; dp = ginode(inumber); clearinode(dp); inodirty(); } } int pass1check(idesc) register struct inodesc *idesc; { int res = KEEPON; int anyout, nfrags; ufs_daddr_t blkno = idesc->id_blkno; register struct dups *dlp; struct dups *new; if ((anyout = chkrange(blkno, idesc->id_numfrags)) != 0) { blkerror(idesc->id_number, "BAD", blkno); if (badblk++ >= MAXBAD) { pwarn("EXCESSIVE BAD BLKS I=%lu", idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } } for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) { if (anyout && chkrange(blkno, 1)) { res = SKIP; } else if (!testbmap(blkno)) { n_blks++; setbmap(blkno); } else { blkerror(idesc->id_number, "DUP", blkno); if (dupblk++ >= MAXDUP) { pwarn("EXCESSIVE DUP BLKS I=%lu", idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new = (struct dups *)malloc(sizeof(struct dups)); if (new == NULL) { pfatal("DUP TABLE OVERFLOW."); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } return (STOP); } new->dup = blkno; if (muldup == 0) { duplist = muldup = new; new->next = 0; } else { new->next = muldup->next; muldup->next = new; } for (dlp = duplist; dlp != muldup; dlp = dlp->next) if (dlp->dup == blkno) break; if (dlp == muldup && dlp->dup != blkno) muldup = new; } /* * count the number of blocks found in id_entryno */ idesc->id_entryno++; } return (res); } Index: head/sbin/fsck_ifs/pass2.c =================================================================== --- head/sbin/fsck_ifs/pass2.c (revision 34265) +++ head/sbin/fsck_ifs/pass2.c (revision 34266) @@ -1,467 +1,482 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)pass2.c 8.9 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include "fsck.h" #define MINDIRSIZE (sizeof (struct dirtemplate)) static int blksort __P((const void *, const void *)); static int pass2check __P((struct inodesc *)); void pass2() { register struct dinode *dp; register struct inoinfo **inpp, *inp; struct inoinfo **inpend; struct inodesc curino; struct dinode dino; char pathbuf[MAXPATHLEN + 1]; switch (statemap[ROOTINO]) { case USTATE: pfatal("ROOT INODE UNALLOCATED"); - if (reply("ALLOCATE") == 0) + if (reply("ALLOCATE") == 0) { + ckfini(0); exit(EEXIT); + } if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; case DCLEAR: pfatal("DUPS/BAD IN ROOT INODE"); if (reply("REALLOCATE")) { freeino(ROOTINO); if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } break; case FSTATE: case FCLEAR: pfatal("ROOT INODE NOT DIRECTORY"); if (reply("REALLOCATE")) { freeino(ROOTINO); if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errx(EEXIT, "CANNOT ALLOCATE ROOT INODE"); break; } - if (reply("FIX") == 0) + if (reply("FIX") == 0) { + ckfini(0); exit(EEXIT); + } dp = ginode(ROOTINO); dp->di_mode &= ~IFMT; dp->di_mode |= IFDIR; inodirty(); break; case DSTATE: break; default: errx(EEXIT, "BAD STATE %d FOR ROOT INODE", statemap[ROOTINO]); } statemap[ROOTINO] = DFOUND; if (newinofmt) { statemap[WINO] = FSTATE; typemap[WINO] = DT_WHT; } /* * Sort the directory list into disk block order. */ qsort((char *)inpsort, (size_t)inplast, sizeof *inpsort, blksort); /* * Check the integrity of each directory. */ memset(&curino, 0, sizeof(struct inodesc)); curino.id_type = DATA; curino.id_func = pass2check; dp = &dino; inpend = &inpsort[inplast]; for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_isize == 0) continue; if (inp->i_isize < MINDIRSIZE) { direrror(inp->i_number, "DIRECTORY TOO SHORT"); inp->i_isize = roundup(MINDIRSIZE, DIRBLKSIZ); if (reply("FIX") == 1) { dp = ginode(inp->i_number); dp->di_size = inp->i_isize; inodirty(); dp = &dino; } } else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) { getpathname(pathbuf, inp->i_number, inp->i_number); - pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d", - pathbuf, inp->i_isize, DIRBLKSIZ); + if (usedsoftdep) + pfatal("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); + else + pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); if (preen) printf(" (ADJUSTED)\n"); inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ); if (preen || reply("ADJUST") == 1) { dp = ginode(inp->i_number); dp->di_size = roundup(inp->i_isize, DIRBLKSIZ); inodirty(); dp = &dino; } } memset(&dino, 0, sizeof(struct dinode)); dino.di_mode = IFDIR; dp->di_size = inp->i_isize; memmove(&dp->di_db[0], &inp->i_blks[0], (size_t)inp->i_numblks); curino.id_number = inp->i_number; curino.id_parent = inp->i_parent; (void)ckinode(dp, &curino); } /* * Now that the parents of all directories have been found, * make another pass to verify the value of `..' */ for (inpp = inpsort; inpp < inpend; inpp++) { inp = *inpp; if (inp->i_parent == 0 || inp->i_isize == 0) continue; if (statemap[inp->i_parent] == DFOUND && statemap[inp->i_number] == DSTATE) statemap[inp->i_number] = DFOUND; if (inp->i_dotdot == inp->i_parent || inp->i_dotdot == (ino_t)-1) continue; if (inp->i_dotdot == 0) { inp->i_dotdot = inp->i_parent; fileerror(inp->i_parent, inp->i_number, "MISSING '..'"); if (reply("FIX") == 0) continue; (void)makeentry(inp->i_number, inp->i_parent, ".."); lncntp[inp->i_parent]--; continue; } fileerror(inp->i_parent, inp->i_number, "BAD INODE NUMBER FOR '..'"); if (reply("FIX") == 0) continue; lncntp[inp->i_dotdot]++; lncntp[inp->i_parent]--; inp->i_dotdot = inp->i_parent; (void)changeino(inp->i_number, "..", inp->i_parent); } /* * Mark all the directories that can be found from the root. */ propagate(); } static int pass2check(idesc) struct inodesc *idesc; { register struct direct *dirp = idesc->id_dirp; register struct inoinfo *inp; int n, entrysize, ret = 0; struct dinode *dp; char *errmsg; struct direct proto; char namebuf[MAXPATHLEN + 1]; char pathbuf[MAXPATHLEN + 1]; /* * If converting, set directory entry type. */ if (doinglevel2 && dirp->d_ino > 0 && dirp->d_ino < maxino) { dirp->d_type = typemap[dirp->d_ino]; ret |= ALTERED; } /* * check for "." */ if (idesc->id_entryno != 0) goto chk1; if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") == 0) { if (dirp->d_ino != idesc->id_number) { direrror(idesc->id_number, "BAD INODE NUMBER FOR '.'"); dirp->d_ino = idesc->id_number; if (reply("FIX") == 1) ret |= ALTERED; } if (newinofmt && dirp->d_type != DT_DIR) { direrror(idesc->id_number, "BAD TYPE VALUE FOR '.'"); dirp->d_type = DT_DIR; if (reply("FIX") == 1) ret |= ALTERED; } goto chk1; } direrror(idesc->id_number, "MISSING '.'"); proto.d_ino = idesc->id_number; if (newinofmt) proto.d_type = DT_DIR; else proto.d_type = 0; proto.d_namlen = 1; (void)strcpy(proto.d_name, "."); # if BYTE_ORDER == LITTLE_ENDIAN if (!newinofmt) { u_char tmp; tmp = proto.d_type; proto.d_type = proto.d_namlen; proto.d_namlen = tmp; } # endif entrysize = DIRSIZ(0, &proto); if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") != 0) { pfatal("CANNOT FIX, FIRST ENTRY IN DIRECTORY CONTAINS %s\n", dirp->d_name); } else if (dirp->d_reclen < entrysize) { pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '.'\n"); } else if (dirp->d_reclen < 2 * entrysize) { proto.d_reclen = dirp->d_reclen; memmove(dirp, &proto, (size_t)entrysize); if (reply("FIX") == 1) ret |= ALTERED; } else { n = dirp->d_reclen - entrysize; proto.d_reclen = entrysize; memmove(dirp, &proto, (size_t)entrysize); idesc->id_entryno++; lncntp[dirp->d_ino]--; dirp = (struct direct *)((char *)(dirp) + entrysize); memset(dirp, 0, (size_t)n); dirp->d_reclen = n; if (reply("FIX") == 1) ret |= ALTERED; } chk1: if (idesc->id_entryno > 1) goto chk2; inp = getinoinfo(idesc->id_number); proto.d_ino = inp->i_parent; if (newinofmt) proto.d_type = DT_DIR; else proto.d_type = 0; proto.d_namlen = 2; (void)strcpy(proto.d_name, ".."); # if BYTE_ORDER == LITTLE_ENDIAN if (!newinofmt) { u_char tmp; tmp = proto.d_type; proto.d_type = proto.d_namlen; proto.d_namlen = tmp; } # endif entrysize = DIRSIZ(0, &proto); if (idesc->id_entryno == 0) { n = DIRSIZ(0, dirp); if (dirp->d_reclen < n + entrysize) goto chk2; proto.d_reclen = dirp->d_reclen - n; dirp->d_reclen = n; idesc->id_entryno++; lncntp[dirp->d_ino]--; dirp = (struct direct *)((char *)(dirp) + n); memset(dirp, 0, (size_t)proto.d_reclen); dirp->d_reclen = proto.d_reclen; } if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") == 0) { inp->i_dotdot = dirp->d_ino; if (newinofmt && dirp->d_type != DT_DIR) { direrror(idesc->id_number, "BAD TYPE VALUE FOR '..'"); dirp->d_type = DT_DIR; if (reply("FIX") == 1) ret |= ALTERED; } goto chk2; } if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") != 0) { fileerror(inp->i_parent, idesc->id_number, "MISSING '..'"); pfatal("CANNOT FIX, SECOND ENTRY IN DIRECTORY CONTAINS %s\n", dirp->d_name); inp->i_dotdot = (ino_t)-1; } else if (dirp->d_reclen < entrysize) { fileerror(inp->i_parent, idesc->id_number, "MISSING '..'"); pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '..'\n"); inp->i_dotdot = (ino_t)-1; } else if (inp->i_parent != 0) { /* * We know the parent, so fix now. */ inp->i_dotdot = inp->i_parent; fileerror(inp->i_parent, idesc->id_number, "MISSING '..'"); proto.d_reclen = dirp->d_reclen; memmove(dirp, &proto, (size_t)entrysize); if (reply("FIX") == 1) ret |= ALTERED; } idesc->id_entryno++; if (dirp->d_ino != 0) lncntp[dirp->d_ino]--; return (ret|KEEPON); chk2: if (dirp->d_ino == 0) return (ret|KEEPON); if (dirp->d_namlen <= 2 && dirp->d_name[0] == '.' && idesc->id_entryno >= 2) { if (dirp->d_namlen == 1) { direrror(idesc->id_number, "EXTRA '.' ENTRY"); dirp->d_ino = 0; if (reply("FIX") == 1) ret |= ALTERED; return (KEEPON | ret); } if (dirp->d_name[1] == '.') { direrror(idesc->id_number, "EXTRA '..' ENTRY"); dirp->d_ino = 0; if (reply("FIX") == 1) ret |= ALTERED; return (KEEPON | ret); } } idesc->id_entryno++; n = 0; if (dirp->d_ino > maxino) { fileerror(idesc->id_number, dirp->d_ino, "I OUT OF RANGE"); n = reply("REMOVE"); } else if (newinofmt && ((dirp->d_ino == WINO && dirp->d_type != DT_WHT) || (dirp->d_ino != WINO && dirp->d_type == DT_WHT))) { fileerror(idesc->id_number, dirp->d_ino, "BAD WHITEOUT ENTRY"); dirp->d_ino = WINO; dirp->d_type = DT_WHT; if (reply("FIX") == 1) ret |= ALTERED; } else { again: switch (statemap[dirp->d_ino]) { case USTATE: if (idesc->id_entryno <= 2) break; fileerror(idesc->id_number, dirp->d_ino, "UNALLOCATED"); n = reply("REMOVE"); break; case DCLEAR: case FCLEAR: if (idesc->id_entryno <= 2) break; if (statemap[dirp->d_ino] == FCLEAR) errmsg = "DUP/BAD"; - else if (!preen) + else if (!preen && !usedsoftdep) errmsg = "ZERO LENGTH DIRECTORY"; else { n = 1; break; } fileerror(idesc->id_number, dirp->d_ino, errmsg); if ((n = reply("REMOVE")) == 1) break; dp = ginode(dirp->d_ino); statemap[dirp->d_ino] = (dp->di_mode & IFMT) == IFDIR ? DSTATE : FSTATE; lncntp[dirp->d_ino] = dp->di_nlink; goto again; case DSTATE: if (statemap[idesc->id_number] == DFOUND) statemap[dirp->d_ino] = DFOUND; /* fall through */ case DFOUND: inp = getinoinfo(dirp->d_ino); if (inp->i_parent != 0 && idesc->id_entryno > 2) { getpathname(pathbuf, idesc->id_number, idesc->id_number); getpathname(namebuf, dirp->d_ino, dirp->d_ino); pwarn("%s %s %s\n", pathbuf, "IS AN EXTRANEOUS HARD LINK TO DIRECTORY", namebuf); - if (preen) - printf(" (IGNORED)\n"); + if (preen) { + printf(" (REMOVED)\n"); + n = 1; + break; + } else if ((n = reply("REMOVE")) == 1) break; } if (idesc->id_entryno > 2) inp->i_parent = idesc->id_number; /* fall through */ case FSTATE: if (newinofmt && dirp->d_type != typemap[dirp->d_ino]) { fileerror(idesc->id_number, dirp->d_ino, "BAD TYPE VALUE"); dirp->d_type = typemap[dirp->d_ino]; if (reply("FIX") == 1) ret |= ALTERED; } lncntp[dirp->d_ino]--; break; default: errx(EEXIT, "BAD STATE %d FOR INODE I=%d", statemap[dirp->d_ino], dirp->d_ino); } } if (n == 0) return (ret|KEEPON); dirp->d_ino = 0; return (ret|KEEPON|ALTERED); } /* * Routine to sort disk blocks. */ static int blksort(arg1, arg2) const void *arg1, *arg2; { return ((*(struct inoinfo **)arg1)->i_blks[0] - (*(struct inoinfo **)arg2)->i_blks[0]); } Index: head/sbin/fsck_ifs/pass5.c =================================================================== --- head/sbin/fsck_ifs/pass5.c (revision 34265) +++ head/sbin/fsck_ifs/pass5.c (revision 34266) @@ -1,345 +1,375 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)pass5.c 8.9 (Berkeley) 4/28/95"; #endif /* not lint */ #include #include #include #include #include #include #include "fsck.h" void pass5() { int c, blk, frags, basesize, sumsize, mapsize, savednrpos; + int inomapsize, blkmapsize; struct fs *fs = &sblock; struct cg *cg = &cgrp; ufs_daddr_t dbase, dmax; ufs_daddr_t d; - long i, j; + long i, j, k; struct csum *cs; struct csum cstotal; struct inodesc idesc[3]; char buf[MAXBSIZE]; register struct cg *newcg = (struct cg *)buf; struct ocg *ocg = (struct ocg *)buf; statemap[WINO] = USTATE; memset(newcg, 0, (size_t)fs->fs_cgsize); newcg->cg_niblk = fs->fs_ipg; if (cvtlevel >= 3) { if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) { if (preen) pwarn("DELETING CLUSTERING MAPS\n"); if (preen || reply("DELETE CLUSTERING MAPS")) { fs->fs_contigsumsize = 0; doinglevel1 = 1; sbdirty(); } } if (fs->fs_maxcontig > 1) { char *doit = 0; if (fs->fs_contigsumsize < 1) { doit = "CREAT"; } else if (fs->fs_contigsumsize < fs->fs_maxcontig && fs->fs_contigsumsize < FS_MAXCONTIG) { doit = "EXPAND"; } if (doit) { i = fs->fs_contigsumsize; fs->fs_contigsumsize = MIN(fs->fs_maxcontig, FS_MAXCONTIG); if (CGSIZE(fs) > fs->fs_bsize) { pwarn("CANNOT %s CLUSTER MAPS\n", doit); fs->fs_contigsumsize = i; } else if (preen || reply("CREATE CLUSTER MAPS")) { if (preen) pwarn("%sING CLUSTER MAPS\n", doit); fs->fs_cgsize = fragroundup(fs, CGSIZE(fs)); doinglevel1 = 1; sbdirty(); } } } } switch ((int)fs->fs_postblformat) { case FS_42POSTBLFMT: basesize = (char *)(&ocg->cg_btot[0]) - (char *)(&ocg->cg_firstfield); sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]); mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] - (u_char *)&ocg->cg_iused[0]; + blkmapsize = howmany(fs->fs_fpg, NBBY); + inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0]; ocg->cg_magic = CG_MAGIC; savednrpos = fs->fs_nrpos; fs->fs_nrpos = 8; break; case FS_DYNAMICPOSTBLFMT: newcg->cg_btotoff = &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield); newcg->cg_boff = newcg->cg_btotoff + fs->fs_cpg * sizeof(long); newcg->cg_iusedoff = newcg->cg_boff + fs->fs_cpg * fs->fs_nrpos * sizeof(short); newcg->cg_freeoff = newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY); - if (fs->fs_contigsumsize <= 0) { - newcg->cg_nextfreeoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); - } else { - newcg->cg_clustersumoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) - + inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff; + newcg->cg_nextfreeoff = newcg->cg_freeoff + + howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); + blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff; + if (fs->fs_contigsumsize > 0) { + newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(long); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(long)); newcg->cg_clusteroff = newcg->cg_clustersumoff + (fs->fs_contigsumsize + 1) * sizeof(long); newcg->cg_nextfreeoff = newcg->cg_clusteroff + howmany(fs->fs_cpg * fs->fs_spc / NSPB(fs), NBBY); } newcg->cg_magic = CG_MAGIC; basesize = &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield); sumsize = newcg->cg_iusedoff - newcg->cg_btotoff; mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff; break; default: - sumsize = 0; /* keep lint happy */ + inomapsize = blkmapsize = sumsize = 0; /* keep lint happy */ errx(EEXIT, "UNKNOWN ROTATIONAL TABLE FORMAT %d", fs->fs_postblformat); } memset(&idesc[0], 0, sizeof idesc); for (i = 0; i < 3; i++) { idesc[i].id_type = ADDR; if (doinglevel2) idesc[i].id_fix = FIX; } memset(&cstotal, 0, sizeof(struct csum)); j = blknum(fs, fs->fs_size + fs->fs_frag - 1); for (i = fs->fs_size; i < j; i++) setbmap(i); for (c = 0; c < fs->fs_ncg; c++) { getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize); if (!cg_chkmagic(cg)) pfatal("CG %d: BAD MAGIC NUMBER\n", c); dbase = cgbase(fs, c); dmax = dbase + fs->fs_fpg; if (dmax > fs->fs_size) dmax = fs->fs_size; newcg->cg_time = cg->cg_time; newcg->cg_cgx = c; if (c == fs->fs_ncg - 1) newcg->cg_ncyl = fs->fs_ncyl % fs->fs_cpg; else newcg->cg_ncyl = fs->fs_cpg; newcg->cg_ndblk = dmax - dbase; if (fs->fs_contigsumsize > 0) newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag; newcg->cg_cs.cs_ndir = 0; newcg->cg_cs.cs_nffree = 0; newcg->cg_cs.cs_nbfree = 0; newcg->cg_cs.cs_nifree = fs->fs_ipg; if (cg->cg_rotor < newcg->cg_ndblk) newcg->cg_rotor = cg->cg_rotor; else newcg->cg_rotor = 0; if (cg->cg_frotor < newcg->cg_ndblk) newcg->cg_frotor = cg->cg_frotor; else newcg->cg_frotor = 0; if (cg->cg_irotor < newcg->cg_niblk) newcg->cg_irotor = cg->cg_irotor; else newcg->cg_irotor = 0; memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum); memset(&cg_blktot(newcg)[0], 0, (size_t)(sumsize + mapsize)); if (fs->fs_postblformat == FS_42POSTBLFMT) ocg->cg_magic = CG_MAGIC; j = fs->fs_ipg * c; for (i = 0; i < fs->fs_ipg; j++, i++) { switch (statemap[j]) { case USTATE: break; case DSTATE: case DCLEAR: case DFOUND: newcg->cg_cs.cs_ndir++; /* fall through */ case FSTATE: case FCLEAR: newcg->cg_cs.cs_nifree--; setbit(cg_inosused(newcg), i); break; default: if (j < ROOTINO) break; errx(EEXIT, "BAD STATE %d FOR INODE I=%d", statemap[j], j); } } if (c == 0) for (i = 0; i < ROOTINO; i++) { setbit(cg_inosused(newcg), i); newcg->cg_cs.cs_nifree--; } for (i = 0, d = dbase; d < dmax; d += fs->fs_frag, i += fs->fs_frag) { frags = 0; for (j = 0; j < fs->fs_frag; j++) { if (testbmap(d + j)) continue; setbit(cg_blksfree(newcg), i + j); frags++; } if (frags == fs->fs_frag) { newcg->cg_cs.cs_nbfree++; j = cbtocylno(fs, i); cg_blktot(newcg)[j]++; cg_blks(fs, newcg, j)[cbtorpos(fs, i)]++; if (fs->fs_contigsumsize > 0) setbit(cg_clustersfree(newcg), i / fs->fs_frag); } else if (frags > 0) { newcg->cg_cs.cs_nffree += frags; blk = blkmap(fs, cg_blksfree(newcg), i); ffs_fragacct(fs, blk, newcg->cg_frsum, 1); } } if (fs->fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(newcg); u_char *mapp = cg_clustersfree(newcg); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < newcg->cg_nclusterblks; i++) { if ((map & bit) != 0) { run++; } else if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; run = 0; } if ((i & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; } } cstotal.cs_nffree += newcg->cg_cs.cs_nffree; cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree; cstotal.cs_nifree += newcg->cg_cs.cs_nifree; cstotal.cs_ndir += newcg->cg_cs.cs_ndir; cs = &fs->fs_cs(fs, c); if (memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0 && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(cs, &newcg->cg_cs, sizeof *cs); sbdirty(); } if (doinglevel1) { memmove(cg, newcg, (size_t)fs->fs_cgsize); cgdirty(); continue; } - if (memcmp(cg_inosused(newcg), - cg_inosused(cg), mapsize) != 0 && - dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { - memmove(cg_inosused(cg), cg_inosused(newcg), - (size_t)mapsize); - cgdirty(); - } if ((memcmp(newcg, cg, basesize) != 0 || memcmp(&cg_blktot(newcg)[0], &cg_blktot(cg)[0], sumsize) != 0) && dofix(&idesc[2], "SUMMARY INFORMATION BAD")) { memmove(cg, newcg, (size_t)basesize); memmove(&cg_blktot(cg)[0], &cg_blktot(newcg)[0], (size_t)sumsize); + cgdirty(); + } + if (usedsoftdep) { + for (i = 0; i < inomapsize; i++) { + j = cg_inosused(newcg)[i]; + if ((cg_inosused(cg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED INODE %d MARKED FREE", + c * fs->fs_ipg + i * 8 + k); + } + } + for (i = 0; i < blkmapsize; i++) { + j = cg_blksfree(cg)[i]; + if ((cg_blksfree(newcg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED FRAG %d MARKED FREE", + c * fs->fs_fpg + i * 8 + k); + } + } + } + if (memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 && + dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { + memmove(cg_inosused(cg), cg_inosused(newcg), + (size_t)mapsize); cgdirty(); } } if (fs->fs_postblformat == FS_42POSTBLFMT) fs->fs_nrpos = savednrpos; if (memcmp(&cstotal, &fs->fs_cstotal, sizeof *cs) != 0 && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(&fs->fs_cstotal, &cstotal, sizeof *cs); fs->fs_ronly = 0; sbdirty(); } if (fs->fs_fmod != 0) { pwarn("MODIFIED FLAG SET IN SUPERBLOCK"); if (preen) printf(" (FIXED)\n"); if (preen || reply("FIX") == 1) { fs->fs_fmod = 0; sbdirty(); } } if (fs->fs_clean == 0) { pwarn("CLEAN FLAG NOT SET IN SUPERBLOCK"); if (preen) printf(" (FIXED)\n"); if (preen || reply("FIX") == 1) { fs->fs_clean = 1; sbdirty(); } } } Index: head/sbin/fsck_ifs/setup.c =================================================================== --- head/sbin/fsck_ifs/setup.c (revision 34265) +++ head/sbin/fsck_ifs/setup.c (revision 34266) @@ -1,514 +1,520 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)setup.c 8.10 (Berkeley) 5/9/95"; #endif /* not lint */ #define DKTYPENAMES #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" struct bufarea asblk; #define altsblock (*asblk.b_un.b_fs) #define POWEROF2(num) (((num) & ((num) - 1)) == 0) static void badsb __P((int listerr, char *s)); static int calcsb __P((char *dev, int devfd, struct fs *fs)); static struct disklabel *getdisklabel __P((char *s, int fd)); static int readsb __P((int listerr)); /* * Read in a superblock finding an alternate if necessary. * Return 1 if successful, 0 if unsuccessful, -1 if filesystem * is already clean (preen mode only). */ int setup(dev) char *dev; { long cg, size, asked, i, j; long skipclean, bmapsize; struct disklabel *lp; off_t sizepb; struct stat statb; struct fs proto; havesb = 0; fswritefd = -1; skipclean = preen; if (stat(dev, &statb) < 0) { printf("Can't stat %s: %s\n", dev, strerror(errno)); return (0); } if ((statb.st_mode & S_IFMT) != S_IFCHR) { pfatal("%s is not a character device", dev); if (reply("CONTINUE") == 0) return (0); } if ((fsreadfd = open(dev, O_RDONLY)) < 0) { printf("Can't open %s: %s\n", dev, strerror(errno)); return (0); } if (preen == 0) printf("** %s", dev); if (nflag || (fswritefd = open(dev, O_WRONLY)) < 0) { fswritefd = -1; if (preen) pfatal("NO WRITE ACCESS"); printf(" (NO WRITE)"); } if (preen == 0) printf("\n"); fsmodified = 0; lfdir = 0; initbarea(&sblk); initbarea(&asblk); sblk.b_un.b_buf = malloc(SBSIZE); asblk.b_un.b_buf = malloc(SBSIZE); if (sblk.b_un.b_buf == NULL || asblk.b_un.b_buf == NULL) errx(EEXIT, "cannot allocate space for superblock"); lp = getdisklabel((char *)NULL, fsreadfd); if (lp) dev_bsize = secsize = lp->d_secsize; else dev_bsize = secsize = DEV_BSIZE; /* * Read in the superblock, looking for alternates if necessary */ if (readsb(1) == 0) { skipclean = 0; if (bflag || preen || calcsb(dev, fsreadfd, &proto) == 0) return(0); if (reply("LOOK FOR ALTERNATE SUPERBLOCKS") == 0) return (0); for (cg = 0; cg < proto.fs_ncg; cg++) { bflag = fsbtodb(&proto, cgsblock(&proto, cg)); if (readsb(0) != 0) break; } if (cg >= proto.fs_ncg) { printf("%s %s\n%s %s\n%s %s\n", "SEARCH FOR ALTERNATE SUPER-BLOCK", "FAILED. YOU MUST USE THE", "-b OPTION TO FSCK TO SPECIFY THE", "LOCATION OF AN ALTERNATE", "SUPER-BLOCK TO SUPPLY NEEDED", "INFORMATION; SEE fsck(8)."); bflag = 0; return(0); } pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag); bflag = 0; } maxfsblock = sblock.fs_size; maxino = sblock.fs_ncg * sblock.fs_ipg; /* * Check and potentially fix certain fields in the super block. */ if (sblock.fs_optim != FS_OPTTIME && sblock.fs_optim != FS_OPTSPACE) { pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK"); if (reply("SET TO DEFAULT") == 1) { sblock.fs_optim = FS_OPTTIME; sbdirty(); } } if ((sblock.fs_minfree < 0 || sblock.fs_minfree > 99)) { pfatal("IMPOSSIBLE MINFREE=%d IN SUPERBLOCK", sblock.fs_minfree); if (reply("SET TO DEFAULT") == 1) { sblock.fs_minfree = 10; sbdirty(); } } if (sblock.fs_interleave < 1 || sblock.fs_interleave > sblock.fs_nsect) { pwarn("IMPOSSIBLE INTERLEAVE=%d IN SUPERBLOCK", sblock.fs_interleave); sblock.fs_interleave = 1; if (preen) printf(" (FIXED)\n"); if (preen || reply("SET TO DEFAULT") == 1) { sbdirty(); dirty(&asblk); } } if (sblock.fs_npsect < sblock.fs_nsect || sblock.fs_npsect > sblock.fs_nsect*2) { pwarn("IMPOSSIBLE NPSECT=%d IN SUPERBLOCK", sblock.fs_npsect); sblock.fs_npsect = sblock.fs_nsect; if (preen) printf(" (FIXED)\n"); if (preen || reply("SET TO DEFAULT") == 1) { sbdirty(); dirty(&asblk); } } if (sblock.fs_inodefmt >= FS_44INODEFMT) { newinofmt = 1; } else { sblock.fs_qbmask = ~sblock.fs_bmask; sblock.fs_qfmask = ~sblock.fs_fmask; newinofmt = 0; } /* * Convert to new inode format. */ if (cvtlevel >= 2 && sblock.fs_inodefmt < FS_44INODEFMT) { if (preen) pwarn("CONVERTING TO NEW INODE FORMAT\n"); else if (!reply("CONVERT TO NEW INODE FORMAT")) return(0); doinglevel2++; sblock.fs_inodefmt = FS_44INODEFMT; sizepb = sblock.fs_bsize; sblock.fs_maxfilesize = sblock.fs_bsize * NDADDR - 1; for (i = 0; i < NIADDR; i++) { sizepb *= NINDIR(&sblock); sblock.fs_maxfilesize += sizepb; } sblock.fs_maxsymlinklen = MAXSYMLINKLEN; sblock.fs_qbmask = ~sblock.fs_bmask; sblock.fs_qfmask = ~sblock.fs_fmask; sbdirty(); dirty(&asblk); } /* * Convert to new cylinder group format. */ if (cvtlevel >= 1 && sblock.fs_postblformat == FS_42POSTBLFMT) { if (preen) pwarn("CONVERTING TO NEW CYLINDER GROUP FORMAT\n"); else if (!reply("CONVERT TO NEW CYLINDER GROUP FORMAT")) return(0); doinglevel1++; sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT; sblock.fs_nrpos = 8; sblock.fs_postbloff = (char *)(&sblock.fs_opostbl[0][0]) - (char *)(&sblock.fs_firstfield); sblock.fs_rotbloff = &sblock.fs_space[0] - (u_char *)(&sblock.fs_firstfield); sblock.fs_cgsize = fragroundup(&sblock, CGSIZE(&sblock)); sbdirty(); dirty(&asblk); } if (asblk.b_dirty && !bflag) { memmove(&altsblock, &sblock, (size_t)sblock.fs_sbsize); flush(fswritefd, &asblk); } /* * read in the summary info. */ asked = 0; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { size = sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize; sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size); if (bread(fsreadfd, (char *)sblock.fs_csp[j], fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); exit(EEXIT); + } asked++; } } /* * If we survive the above basic checks and are preening, * quit here unless forced. */ if (skipclean && sblock.fs_clean && !fflag) return (-1); /* * allocate and initialize the necessary maps */ bmapsize = roundup(howmany(maxfsblock, NBBY), sizeof(short)); blockmap = calloc((unsigned)bmapsize, sizeof (char)); if (blockmap == NULL) { printf("cannot alloc %u bytes for blockmap\n", (unsigned)bmapsize); goto badsb; } statemap = calloc((unsigned)(maxino + 1), sizeof(char)); if (statemap == NULL) { printf("cannot alloc %u bytes for statemap\n", (unsigned)(maxino + 1)); goto badsb; } typemap = calloc((unsigned)(maxino + 1), sizeof(char)); if (typemap == NULL) { printf("cannot alloc %u bytes for typemap\n", (unsigned)(maxino + 1)); goto badsb; } lncntp = (short *)calloc((unsigned)(maxino + 1), sizeof(short)); if (lncntp == NULL) { printf("cannot alloc %u bytes for lncntp\n", (unsigned)(maxino + 1) * sizeof(short)); goto badsb; } numdirs = sblock.fs_cstotal.cs_ndir; if (numdirs == 0) { printf("numdirs is zero, try using an alternate superblock\n"); goto badsb; } inplast = 0; listmax = numdirs + 10; inpsort = (struct inoinfo **)calloc((unsigned)listmax, sizeof(struct inoinfo *)); inphead = (struct inoinfo **)calloc((unsigned)numdirs, sizeof(struct inoinfo *)); if (inpsort == NULL || inphead == NULL) { printf("cannot alloc %u bytes for inphead\n", (unsigned)numdirs * sizeof(struct inoinfo *)); goto badsb; } bufinit(); + if (sblock.fs_flags & FS_DOSOFTDEP) + usedsoftdep = 1; + else + usedsoftdep = 0; return (1); badsb: ckfini(0); return (0); } /* * Read in the super block and its summary info. */ static int readsb(listerr) int listerr; { ufs_daddr_t super = bflag ? bflag : SBOFF / dev_bsize; if (bread(fsreadfd, (char *)&sblock, super, (long)SBSIZE) != 0) return (0); sblk.b_bno = super; sblk.b_size = SBSIZE; /* * run a few consistency checks of the super block */ if (sblock.fs_magic != FS_MAGIC) { badsb(listerr, "MAGIC NUMBER WRONG"); return (0); } if (sblock.fs_ncg < 1) { badsb(listerr, "NCG OUT OF RANGE"); return (0); } if (sblock.fs_cpg < 1) { badsb(listerr, "CPG OUT OF RANGE"); return (0); } if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl || (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl) { badsb(listerr, "NCYL LESS THAN NCG*CPG"); return (0); } if (sblock.fs_sbsize > SBSIZE) { badsb(listerr, "SIZE PREPOSTEROUSLY LARGE"); return (0); } /* * Compute block size that the filesystem is based on, * according to fsbtodb, and adjust superblock block number * so we can tell if this is an alternate later. */ super *= dev_bsize; dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1); sblk.b_bno = super / dev_bsize; if (bflag) { havesb = 1; return (1); } /* * Set all possible fields that could differ, then do check * of whole super block against an alternate super block. * When an alternate super-block is specified this check is skipped. */ getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1), sblock.fs_sbsize); if (asblk.b_errs) return (0); altsblock.fs_firstfield = sblock.fs_firstfield; altsblock.fs_unused_1 = sblock.fs_unused_1; altsblock.fs_time = sblock.fs_time; altsblock.fs_cstotal = sblock.fs_cstotal; altsblock.fs_cgrotor = sblock.fs_cgrotor; altsblock.fs_fmod = sblock.fs_fmod; altsblock.fs_clean = sblock.fs_clean; altsblock.fs_ronly = sblock.fs_ronly; altsblock.fs_flags = sblock.fs_flags; altsblock.fs_maxcontig = sblock.fs_maxcontig; altsblock.fs_minfree = sblock.fs_minfree; altsblock.fs_optim = sblock.fs_optim; altsblock.fs_rotdelay = sblock.fs_rotdelay; altsblock.fs_maxbpg = sblock.fs_maxbpg; memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp); altsblock.fs_maxcluster = sblock.fs_maxcluster; memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt); memmove(altsblock.fs_sparecon, sblock.fs_sparecon, sizeof sblock.fs_sparecon); /* * The following should not have to be copied. */ altsblock.fs_fsbtodb = sblock.fs_fsbtodb; altsblock.fs_interleave = sblock.fs_interleave; altsblock.fs_npsect = sblock.fs_npsect; altsblock.fs_nrpos = sblock.fs_nrpos; altsblock.fs_state = sblock.fs_state; altsblock.fs_qbmask = sblock.fs_qbmask; altsblock.fs_qfmask = sblock.fs_qfmask; altsblock.fs_state = sblock.fs_state; altsblock.fs_maxfilesize = sblock.fs_maxfilesize; if (memcmp(&sblock, &altsblock, (int)sblock.fs_sbsize)) { if (debug) { long *nlp, *olp, *endlp; printf("superblock mismatches\n"); nlp = (long *)&altsblock; olp = (long *)&sblock; endlp = olp + (sblock.fs_sbsize / sizeof *olp); for ( ; olp < endlp; olp++, nlp++) { if (*olp == *nlp) continue; printf("offset %d, original %d, alternate %d\n", olp - (long *)&sblock, *olp, *nlp); } } badsb(listerr, "VALUES IN SUPER BLOCK DISAGREE WITH THOSE IN FIRST ALTERNATE"); return (0); } havesb = 1; return (1); } static void badsb(listerr, s) int listerr; char *s; { if (!listerr) return; if (preen) printf("%s: ", cdevname); pfatal("BAD SUPER BLOCK: %s\n", s); } /* * Calculate a prototype superblock based on information in the disk label. * When done the cgsblock macro can be calculated and the fs_ncg field * can be used. Do NOT attempt to use other macros without verifying that * their needed information is available! */ static int calcsb(dev, devfd, fs) char *dev; int devfd; register struct fs *fs; { register struct disklabel *lp; register struct partition *pp; register char *cp; int i; cp = strchr(dev, '\0') - 1; if (cp == (char *)-1 || ((*cp < 'a' || *cp > 'h') && !isdigit(*cp))) { pfatal("%s: CANNOT FIGURE OUT FILE SYSTEM PARTITION\n", dev); return (0); } lp = getdisklabel(dev, devfd); if (isdigit(*cp)) pp = &lp->d_partitions[0]; else pp = &lp->d_partitions[*cp - 'a']; if (pp->p_fstype != FS_BSDFFS) { pfatal("%s: NOT LABELED AS A BSD FILE SYSTEM (%s)\n", dev, pp->p_fstype < FSMAXTYPES ? fstypenames[pp->p_fstype] : "unknown"); return (0); } if (pp->p_fsize == 0 || pp->p_frag == 0) { pfatal("%s: LABELED AS A %s FILE SYSTEM, BUT BLOCK SIZE IS 0\n", dev, fstypenames[pp->p_fstype]); return (0); } memset(fs, 0, sizeof(struct fs)); fs->fs_fsize = pp->p_fsize; fs->fs_frag = pp->p_frag; fs->fs_cpg = pp->p_cpg; fs->fs_size = pp->p_size; fs->fs_ntrak = lp->d_ntracks; fs->fs_nsect = lp->d_nsectors; fs->fs_spc = lp->d_secpercyl; fs->fs_nspf = fs->fs_fsize / lp->d_secsize; fs->fs_sblkno = roundup( howmany(lp->d_bbsize + lp->d_sbsize, fs->fs_fsize), fs->fs_frag); fs->fs_cgmask = 0xffffffff; for (i = fs->fs_ntrak; i > 1; i >>= 1) fs->fs_cgmask <<= 1; if (!POWEROF2(fs->fs_ntrak)) fs->fs_cgmask <<= 1; fs->fs_cgoffset = roundup( howmany(fs->fs_nsect, NSPF(fs)), fs->fs_frag); fs->fs_fpg = (fs->fs_cpg * fs->fs_spc) / NSPF(fs); fs->fs_ncg = howmany(fs->fs_size / fs->fs_spc, fs->fs_cpg); for (fs->fs_fsbtodb = 0, i = NSPF(fs); i > 1; i >>= 1) fs->fs_fsbtodb++; dev_bsize = lp->d_secsize; return (1); } static struct disklabel * getdisklabel(s, fd) char *s; int fd; { static struct disklabel lab; if (ioctl(fd, DIOCGDINFO, (char *)&lab) < 0) { if (s == NULL) return ((struct disklabel *)NULL); pwarn("ioctl (GCINFO): %s\n", strerror(errno)); errx(EEXIT, "%s: can't read disk label", s); } return (&lab); } Index: head/sbin/fsck_ifs/utilities.c =================================================================== --- head/sbin/fsck_ifs/utilities.c (revision 34265) +++ head/sbin/fsck_ifs/utilities.c (revision 34266) @@ -1,625 +1,648 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include "fsck.h" long diskreads, totalreads; /* Disk cache statistics */ static void rwerror __P((char *mesg, ufs_daddr_t blk)); int ftypeok(dp) struct dinode *dp; { switch (dp->di_mode & IFMT) { case IFDIR: case IFREG: case IFBLK: case IFCHR: case IFLNK: case IFSOCK: case IFIFO: return (1); default: if (debug) printf("bad file type 0%o\n", dp->di_mode); return (0); } } int reply(question) char *question; { int persevere; char c; if (preen) pfatal("INTERNAL ERROR: GOT TO reply()"); persevere = !strcmp(question, "CONTINUE"); printf("\n"); if (!persevere && (nflag || fswritefd < 0)) { printf("%s? no\n\n", question); + resolved = 0; return (0); } if (yflag || (persevere && nflag)) { printf("%s? yes\n\n", question); return (1); } do { printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); - while (c != '\n' && getc(stdin) != '\n') - if (feof(stdin)) + while (c != '\n' && getc(stdin) != '\n') { + if (feof(stdin)) { + resolved = 0; return (0); + } + } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); + resolved = 0; return (0); } /* * Malloc buffers and set up cache. */ void bufinit() { register struct bufarea *bp; long bufcnt, i; char *bufp; pbp = pdirbp = (struct bufarea *)0; bufp = malloc((unsigned int)sblock.fs_bsize); if (bufp == 0) errx(EEXIT, "cannot allocate buffer pool"); cgblk.b_un.b_buf = bufp; initbarea(&cgblk); bufhead.b_next = bufhead.b_prev = &bufhead; bufcnt = MAXBUFSPACE / sblock.fs_bsize; if (bufcnt < MINBUFS) bufcnt = MINBUFS; for (i = 0; i < bufcnt; i++) { bp = (struct bufarea *)malloc(sizeof(struct bufarea)); bufp = malloc((unsigned int)sblock.fs_bsize); if (bp == NULL || bufp == NULL) { if (i >= MINBUFS) break; errx(EEXIT, "cannot allocate buffer pool"); } bp->b_un.b_buf = bufp; bp->b_prev = &bufhead; bp->b_next = bufhead.b_next; bufhead.b_next->b_prev = bp; bufhead.b_next = bp; initbarea(bp); } bufhead.b_size = i; /* save number of buffers */ } /* * Manage a cache of directory blocks. */ struct bufarea * getdatablk(blkno, size) ufs_daddr_t blkno; long size; { register struct bufarea *bp; for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next) if (bp->b_bno == fsbtodb(&sblock, blkno)) goto foundit; for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev) if ((bp->b_flags & B_INUSE) == 0) break; if (bp == &bufhead) errx(EEXIT, "deadlocked buffer pool"); getblk(bp, blkno, size); /* fall through */ foundit: totalreads++; bp->b_prev->b_next = bp->b_next; bp->b_next->b_prev = bp->b_prev; bp->b_prev = &bufhead; bp->b_next = bufhead.b_next; bufhead.b_next->b_prev = bp; bufhead.b_next = bp; bp->b_flags |= B_INUSE; return (bp); } void getblk(bp, blk, size) register struct bufarea *bp; ufs_daddr_t blk; long size; { ufs_daddr_t dblk; dblk = fsbtodb(&sblock, blk); if (bp->b_bno != dblk) { flush(fswritefd, bp); diskreads++; bp->b_errs = bread(fsreadfd, bp->b_un.b_buf, dblk, size); bp->b_bno = dblk; bp->b_size = size; } } void flush(fd, bp) int fd; register struct bufarea *bp; { register int i, j; if (!bp->b_dirty) return; if (bp->b_errs != 0) pfatal("WRITING %sZERO'ED BLOCK %d TO DISK\n", (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", bp->b_bno); bp->b_dirty = 0; bp->b_errs = 0; bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size); if (bp != &sblk) return; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { bwrite(fswritefd, (char *)sblock.fs_csp[j], fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize); } } static void rwerror(mesg, blk) char *mesg; ufs_daddr_t blk; { if (preen == 0) printf("\n"); pfatal("CANNOT %s: BLK %ld", mesg, blk); if (reply("CONTINUE") == 0) exit(EEXIT); } void ckfini(markclean) int markclean; { register struct bufarea *bp, *nbp; int ofsmodified, cnt = 0; if (fswritefd < 0) { (void)close(fsreadfd); return; } flush(fswritefd, &sblk); if (havesb && sblk.b_bno != SBOFF / dev_bsize && !preen && reply("UPDATE STANDARD SUPERBLOCK")) { sblk.b_bno = SBOFF / dev_bsize; sbdirty(); flush(fswritefd, &sblk); } flush(fswritefd, &cgblk); free(cgblk.b_un.b_buf); for (bp = bufhead.b_prev; bp && bp != &bufhead; bp = nbp) { cnt++; flush(fswritefd, bp); nbp = bp->b_prev; free(bp->b_un.b_buf); free((char *)bp); } if (bufhead.b_size != cnt) errx(EEXIT, "Panic: lost %d buffers", bufhead.b_size - cnt); pbp = pdirbp = (struct bufarea *)0; if (markclean && sblock.fs_clean == 0) { sblock.fs_clean = 1; sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; if (!preen) printf("\n***** FILE SYSTEM MARKED CLEAN *****\n"); } if (debug) printf("cache missed %ld of %ld (%d%%)\n", diskreads, totalreads, (int)(diskreads * 100 / totalreads)); (void)close(fsreadfd); (void)close(fswritefd); } int bread(fd, buf, blk, size) int fd; char *buf; ufs_daddr_t blk; long size; { char *cp; int i, errs; off_t offset; offset = blk; offset *= dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); else if (read(fd, buf, (int)size) == size) return (0); rwerror("READ", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); errs = 0; memset(buf, 0, (size_t)size); printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { if (read(fd, cp, (int)secsize) != secsize) { (void)lseek(fd, offset + i + secsize, 0); if (secsize != dev_bsize && dev_bsize != 1) printf(" %ld (%ld),", (blk * dev_bsize + i) / secsize, blk + i / dev_bsize); else printf(" %ld,", blk + i / dev_bsize); errs++; } } printf("\n"); return (errs); } void bwrite(fd, buf, blk, size) int fd; char *buf; ufs_daddr_t blk; long size; { int i; char *cp; off_t offset; if (fd < 0) return; offset = blk; offset *= dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); else if (write(fd, buf, (int)size) == size) { fsmodified = 1; return; } rwerror("WRITE", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK", blk); printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) if (write(fd, cp, (int)dev_bsize) != dev_bsize) { (void)lseek(fd, offset + i + dev_bsize, 0); printf(" %ld,", blk + i / dev_bsize); } printf("\n"); return; } /* * allocate a data block with the specified number of fragments */ ufs_daddr_t allocblk(frags) long frags; { - register int i, j, k; + int i, j, k, cg, baseblk; + struct cg *cgp = &cgrp; if (frags <= 0 || frags > sblock.fs_frag) return (0); for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) { for (j = 0; j <= sblock.fs_frag - frags; j++) { if (testbmap(i + j)) continue; for (k = 1; k < frags; k++) if (testbmap(i + j + k)) break; if (k < frags) { j += k; continue; } - for (k = 0; k < frags; k++) + cg = dtog(&sblock, i + j); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + baseblk = dtogd(&sblock, i + j); + for (k = 0; k < frags; k++) { setbmap(i + j + k); + clrbit(cg_blksfree(cgp), baseblk + k); + } n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; + cgdirty(); return (i + j); } } return (0); } /* * Free a previously allocated block */ void freeblk(blkno, frags) ufs_daddr_t blkno; long frags; { struct inodesc idesc; idesc.id_blkno = blkno; idesc.id_numfrags = frags; (void)pass4check(&idesc); } /* * Find a pathname */ void getpathname(namebuf, curdir, ino) char *namebuf; ino_t curdir, ino; { int len; register char *cp; struct inodesc idesc; static int busy = 0; if (curdir == ino && ino == ROOTINO) { (void)strcpy(namebuf, "/"); return; } if (busy || (statemap[curdir] != DSTATE && statemap[curdir] != DFOUND)) { (void)strcpy(namebuf, "?"); return; } busy = 1; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_fix = IGNORE; cp = &namebuf[MAXPATHLEN - 1]; *cp = '\0'; if (curdir != ino) { idesc.id_parent = curdir; goto namelookup; } while (ino != ROOTINO) { idesc.id_number = ino; idesc.id_func = findino; idesc.id_name = ".."; if ((ckinode(ginode(ino), &idesc) & FOUND) == 0) break; namelookup: idesc.id_number = idesc.id_parent; idesc.id_parent = ino; idesc.id_func = findname; idesc.id_name = namebuf; if ((ckinode(ginode(idesc.id_number), &idesc)&FOUND) == 0) break; len = strlen(namebuf); cp -= len; memmove(cp, namebuf, (size_t)len); *--cp = '/'; if (cp < &namebuf[MAXNAMLEN]) break; ino = idesc.id_number; } busy = 0; if (ino != ROOTINO) *--cp = '?'; memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); } void catch(sig) int sig; { if (!doinglevel2) ckfini(0); exit(12); } /* * When preening, allow a single quit to signal * a special exit after filesystem checks complete * so that reboot sequence may be interrupted. */ void catchquit(sig) int sig; { printf("returning to single-user after filesystem check\n"); returntosingle = 1; (void)signal(SIGQUIT, SIG_DFL); } /* * Ignore a single quit signal; wait and flush just in case. * Used by child processes in preen. */ void voidquit(sig) int sig; { sleep(1); (void)signal(SIGQUIT, SIG_IGN); (void)signal(SIGQUIT, SIG_DFL); } /* * determine whether an inode should be fixed. */ int dofix(idesc, msg) register struct inodesc *idesc; char *msg; { switch (idesc->id_fix) { case DONTKNOW: if (idesc->id_type == DATA) direrror(idesc->id_number, msg); else pwarn(msg); if (preen) { printf(" (SALVAGED)\n"); idesc->id_fix = FIX; return (ALTERED); } if (reply("SALVAGE") == 0) { idesc->id_fix = NOFIX; return (0); } idesc->id_fix = FIX; return (ALTERED); case FIX: return (ALTERED); case NOFIX: case IGNORE: return (0); default: errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); } /* NOTREACHED */ return (0); } #if __STDC__ #include #else #include #endif /* * An unexpected inconsistency occured. - * Die if preening, otherwise just print message and continue. + * Die if preening or filesystem is running with soft dependency protocol, + * otherwise just print message and continue. */ void #if __STDC__ pfatal(const char *fmt, ...) #else pfatal(fmt, va_alist) char *fmt; va_dcl #endif { va_list ap; #if __STDC__ va_start(ap, fmt); #else va_start(ap); #endif if (!preen) { (void)vfprintf(stderr, fmt, ap); va_end(ap); + if (usedsoftdep) + (void)fprintf(stderr, + "\nUNEXPECTED SOFTDEP INCONSISTENCY\n"); return; } (void)fprintf(stderr, "%s: ", cdevname); (void)vfprintf(stderr, fmt, ap); (void)fprintf(stderr, - "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", - cdevname); + "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", + cdevname, usedsoftdep ? " SOFTDEP " : " "); + ckfini(0); exit(EEXIT); } /* - * Pwarn just prints a message when not preening, - * or a warning (preceded by filename) when preening. + * Pwarn just prints a message when not preening or running soft dependency + * protocol, or a warning (preceded by filename) when preening. */ void #if __STDC__ pwarn(const char *fmt, ...) #else pwarn(fmt, va_alist) char *fmt; va_dcl #endif { va_list ap; #if __STDC__ va_start(ap, fmt); #else va_start(ap); #endif if (preen) (void)fprintf(stderr, "%s: ", cdevname); (void)vfprintf(stderr, fmt, ap); va_end(ap); } /* * Stub for routines from kernel. */ void #if __STDC__ panic(const char *fmt, ...) #else panic(fmt, va_alist) char *fmt; va_dcl #endif { va_list ap; #if __STDC__ va_start(ap, fmt); #else va_start(ap); #endif pfatal("INTERNAL INCONSISTENCY:"); (void)vfprintf(stderr, fmt, ap); va_end(ap); exit(EEXIT); } Index: head/sbin/mount/mount.c =================================================================== --- head/sbin/mount/mount.c (revision 34265) +++ head/sbin/mount/mount.c (revision 34266) @@ -1,614 +1,618 @@ /*- * Copyright (c) 1980, 1989, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1989, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)mount.c 8.25 (Berkeley) 5/8/95"; #else static const char rcsid[] = - "$Id: mount.c,v 1.21 1997/11/13 00:28:49 julian Exp $"; + "$Id: mount.c,v 1.22 1998/02/13 04:54:27 bde Exp $"; #endif #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "extern.h" #include "pathnames.h" int debug, fstab_style, verbose; char *catopt __P((char *, const char *)); struct statfs *getmntpt __P((const char *)); int hasopt __P((const char *, const char *)); int ismounted __P((struct fstab *, struct statfs *, int)); int isremountable __P((const char *)); void mangle __P((char *, int *, const char **)); int mountfs __P((const char *, const char *, const char *, int, const char *, const char *)); void prmount __P((struct statfs *)); void putfsent __P((const struct statfs *)); void usage __P((void)); /* Map from mount otions to printable formats. */ static struct opt { int o_opt; const char *o_name; } optnames[] = { { MNT_ASYNC, "asynchronous" }, { MNT_EXPORTED, "NFS exported" }, { MNT_LOCAL, "local" }, { MNT_NOATIME, "noatime" }, { MNT_NODEV, "nodev" }, { MNT_NOEXEC, "noexec" }, { MNT_NOSUID, "nosuid" }, { MNT_QUOTA, "with quotas" }, { MNT_RDONLY, "read-only" }, { MNT_SYNCHRONOUS, "synchronous" }, { MNT_UNION, "union" }, { MNT_NOCLUSTERR, "noclusterr" }, { MNT_NOCLUSTERW, "noclusterw" }, { MNT_SUIDDIR, "suiddir" }, + { MNT_SOFTDEP, "soft-updates" }, { NULL } }; /* * List of VFS types that can be remounted without becoming mounted on top * of each other. * XXX Is this list correct? */ static const char * remountable_fs_names[] = { "ufs", "ffs", "lfs", "ext2fs", 0 }; int main(argc, argv) int argc; char * const argv[]; { const char *mntfromname, **vfslist, *vfstype; struct fstab *fs; struct statfs *mntbuf; FILE *mountdfp; pid_t pid; int all, ch, i, init_flags, mntsize, rval; char *options; all = init_flags = 0; options = NULL; vfslist = NULL; vfstype = "ufs"; while ((ch = getopt(argc, argv, "adfo:prwt:uv")) != -1) switch (ch) { case 'a': all = 1; break; case 'd': debug = 1; break; case 'f': init_flags |= MNT_FORCE; break; case 'o': if (*optarg) options = catopt(options, optarg); break; case 'p': fstab_style = 1; verbose = 1; break; case 'r': init_flags |= MNT_RDONLY; break; case 't': if (vfslist != NULL) errx(1, "only one -t option may be specified."); vfslist = makevfslist(optarg); vfstype = optarg; break; case 'u': init_flags |= MNT_UPDATE; break; case 'v': verbose = 1; break; case 'w': init_flags &= ~MNT_RDONLY; break; case '?': default: usage(); /* NOTREACHED */ } argc -= optind; argv += optind; #define BADTYPE(type) \ (strcmp(type, FSTAB_RO) && \ strcmp(type, FSTAB_RW) && strcmp(type, FSTAB_RQ)) rval = 0; switch (argc) { case 0: if ((mntsize = getmntinfo(&mntbuf, MNT_NOWAIT)) == 0) err(1, "getmntinfo"); if (all) { while ((fs = getfsent()) != NULL) { if (BADTYPE(fs->fs_type)) continue; if (checkvfsname(fs->fs_vfstype, vfslist)) continue; if (hasopt(fs->fs_mntops, "noauto")) continue; if (ismounted(fs, mntbuf, mntsize)) continue; if (mountfs(fs->fs_vfstype, fs->fs_spec, fs->fs_file, init_flags, options, fs->fs_mntops)) rval = 1; } } else if (fstab_style) { for (i = 0; i < mntsize; i++) { if (checkvfsname(mntbuf[i].f_fstypename, vfslist)) continue; putfsent(&mntbuf[i]); } } else { for (i = 0; i < mntsize; i++) { if (checkvfsname(mntbuf[i].f_fstypename, vfslist)) continue; prmount(&mntbuf[i]); } } exit(rval); case 1: if (vfslist != NULL) usage(); if (init_flags & MNT_UPDATE) { if ((mntbuf = getmntpt(*argv)) == NULL) errx(1, "unknown special file or file system %s.", *argv); if ((fs = getfsfile(mntbuf->f_mntonname)) != NULL) mntfromname = fs->fs_spec; else mntfromname = mntbuf->f_mntfromname; rval = mountfs(mntbuf->f_fstypename, mntfromname, mntbuf->f_mntonname, init_flags, options, 0); break; } if ((fs = getfsfile(*argv)) == NULL && (fs = getfsspec(*argv)) == NULL) errx(1, "%s: unknown special file or file system.", *argv); if (BADTYPE(fs->fs_type)) errx(1, "%s has unknown file system type.", *argv); rval = mountfs(fs->fs_vfstype, fs->fs_spec, fs->fs_file, init_flags, options, fs->fs_mntops); break; case 2: /* * If -t flag has not been specified, and spec contains either * a ':' or a '@' then assume that an NFS filesystem is being * specified ala Sun. */ if (vfslist == NULL && strpbrk(argv[0], ":@") != NULL) vfstype = "nfs"; rval = mountfs(vfstype, argv[0], argv[1], init_flags, options, NULL); break; default: usage(); /* NOTREACHED */ } /* * If the mount was successfully, and done by root, tell mountd the * good news. Pid checks are probably unnecessary, but don't hurt. */ if (rval == 0 && getuid() == 0 && (mountdfp = fopen(_PATH_MOUNTDPID, "r")) != NULL) { if (fscanf(mountdfp, "%d", &pid) == 1 && pid > 0 && kill(pid, SIGHUP) == -1 && errno != ESRCH) err(1, "signal mountd"); (void)fclose(mountdfp); } exit(rval); } int ismounted(fs, mntbuf, mntsize) struct fstab *fs; struct statfs *mntbuf; int mntsize; { int i; if (fs->fs_file[0] == '/' && fs->fs_file[1] == '\0') /* the root file system can always be remounted */ return (0); for (i = mntsize - 1; i >= 0; --i) if (strcmp(fs->fs_file, mntbuf[i].f_mntonname) == 0 && (!isremountable(fs->fs_vfstype) || strcmp(fs->fs_spec, mntbuf[i].f_mntfromname) == 0)) return (1); return (0); } int isremountable(vfsname) const char *vfsname; { const char **cp; for (cp = remountable_fs_names; *cp; cp++) if (strcmp(*cp, vfsname) == 0) return (1); return (0); } int hasopt(mntopts, option) const char *mntopts, *option; { int negative, found; char *opt, *optbuf; if (option[0] == 'n' && option[1] == 'o') { negative = 1; option += 2; } else negative = 0; optbuf = strdup(mntopts); found = 0; for (opt = optbuf; (opt = strtok(opt, ",")) != NULL; opt = NULL) { if (opt[0] == 'n' && opt[1] == 'o') { if (!strcasecmp(opt + 2, option)) found = negative; } else if (!strcasecmp(opt, option)) found = !negative; } free(optbuf); return (found); } int mountfs(vfstype, spec, name, flags, options, mntopts) const char *vfstype, *spec, *name, *options, *mntopts; int flags; { /* List of directories containing mount_xxx subcommands. */ static const char *edirs[] = { _PATH_SBIN, _PATH_USRSBIN, NULL }; const char *argv[100], **edir; struct stat sb; struct statfs sf; pid_t pid; int argc, i, status; char *optbuf, execname[MAXPATHLEN + 1], mntpath[MAXPATHLEN]; #if __GNUC__ (void)&optbuf; (void)&name; #endif if (realpath(name, mntpath) != NULL && stat(mntpath, &sb) == 0) { if (!S_ISDIR(sb.st_mode)) { warnx("%s: Not a directory", mntpath); return (1); } } else { warn("%s", mntpath); return (1); } name = mntpath; if (mntopts == NULL) mntopts = ""; if (options == NULL) { if (*mntopts == '\0') { options = "rw"; } else { options = mntopts; mntopts = ""; } } optbuf = catopt(strdup(mntopts), options); if (strcmp(name, "/") == 0) flags |= MNT_UPDATE; if (flags & MNT_FORCE) optbuf = catopt(optbuf, "force"); if (flags & MNT_RDONLY) optbuf = catopt(optbuf, "ro"); /* * XXX * The mount_mfs (newfs) command uses -o to select the * optimisation mode. We don't pass the default "-o rw" * for that reason. */ if (flags & MNT_UPDATE) optbuf = catopt(optbuf, "update"); argc = 0; argv[argc++] = vfstype; mangle(optbuf, &argc, argv); argv[argc++] = spec; argv[argc++] = name; argv[argc] = NULL; if (debug) { (void)printf("exec: mount_%s", vfstype); for (i = 1; i < argc; i++) (void)printf(" %s", argv[i]); (void)printf("\n"); return (0); } switch (pid = fork()) { case -1: /* Error. */ warn("fork"); free(optbuf); return (1); case 0: /* Child. */ if (strcmp(vfstype, "ufs") == 0) exit(mount_ufs(argc, (char * const *) argv)); /* Go find an executable. */ for (edir = edirs; *edir; edir++) { (void)snprintf(execname, sizeof(execname), "%s/mount_%s", *edir, vfstype); execv(execname, (char * const *)argv); } if (errno == ENOENT) { int len = 0; char *cp; for (edir = edirs; *edir; edir++) len += strlen(*edir) + 2; /* ", " */ if ((cp = malloc(len)) == NULL) { warn(NULL); exit(1); } cp[0] = '\0'; for (edir = edirs; *edir; edir++) { strcat(cp, *edir); if (edir[1] != NULL) strcat(cp, ", "); } warn("exec mount_%s not found in %s", vfstype, cp); } exit(1); /* NOTREACHED */ default: /* Parent. */ free(optbuf); if (waitpid(pid, &status, 0) < 0) { warn("waitpid"); return (1); } if (WIFEXITED(status)) { if (WEXITSTATUS(status) != 0) return (WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { warnx("%s: %s", name, sys_siglist[WTERMSIG(status)]); return (1); } if (verbose) { if (statfs(name, &sf) < 0) { warn("statfs %s", name); return (1); } if (fstab_style) putfsent(&sf); else prmount(&sf); } break; } return (0); } void prmount(sfp) struct statfs *sfp; { int flags; struct opt *o; struct passwd *pw; int f; (void)printf("%s on %s", sfp->f_mntfromname, sfp->f_mntonname); flags = sfp->f_flags & MNT_VISFLAGMASK; for (f = 0, o = optnames; flags && o->o_opt; o++) if (flags & o->o_opt) { (void)printf("%s%s", !f++ ? " (" : ", ", o->o_name); flags &= ~o->o_opt; } if (sfp->f_owner) { (void)printf("%smounted by ", !f++ ? " (" : ", "); if ((pw = getpwuid(sfp->f_owner)) != NULL) (void)printf("%s", pw->pw_name); else (void)printf("%d", sfp->f_owner); } - (void)printf(f ? ")\n" : "\n"); + (void)printf("%swrites: sync %d async %d)\n", !f++ ? " (" : ", ", + sfp->f_syncwrites, sfp->f_asyncwrites); } struct statfs * getmntpt(name) const char *name; { struct statfs *mntbuf; int i, mntsize; mntsize = getmntinfo(&mntbuf, MNT_NOWAIT); for (i = 0; i < mntsize; i++) if (strcmp(mntbuf[i].f_mntfromname, name) == 0 || strcmp(mntbuf[i].f_mntonname, name) == 0) return (&mntbuf[i]); return (NULL); } char * catopt(s0, s1) char *s0; const char *s1; { size_t i; char *cp; if (s0 && *s0) { i = strlen(s0) + strlen(s1) + 1 + 1; if ((cp = malloc(i)) == NULL) err(1, NULL); (void)snprintf(cp, i, "%s,%s", s0, s1); } else cp = strdup(s1); if (s0) free(s0); return (cp); } void mangle(options, argcp, argv) char *options; int *argcp; const char **argv; { char *p, *s; int argc; argc = *argcp; for (s = options; (p = strsep(&s, ",")) != NULL;) if (*p != '\0') if (*p == '-') { argv[argc++] = p; p = strchr(p, '='); if (p) { *p = '\0'; argv[argc++] = p+1; } } else if (strcmp(p, "rw") != 0) { argv[argc++] = "-o"; argv[argc++] = p; } *argcp = argc; } void usage() { (void)fprintf(stderr, "usage: mount %s %s\n mount %s\n mount %s\n", "[-dfpruvw] [-o options] [-t ufs | external_type]", "special node", "[-adfpruvw] [-t ufs | external_type]", "[-dfpruvw] special | node"); exit(1); } void putfsent(ent) const struct statfs *ent; { struct fstab *fst; printf("%s\t%s\t%s %s", ent->f_mntfromname, ent->f_mntonname, ent->f_fstypename, (ent->f_flags & MNT_RDONLY) ? "ro" : "rw"); /* XXX should use optnames[] - put shorter names in it. */ if (ent->f_flags & MNT_SYNCHRONOUS) printf(",sync"); if (ent->f_flags & MNT_NOEXEC) printf(",noexec"); if (ent->f_flags & MNT_NOSUID) printf(",nosuid"); if (ent->f_flags & MNT_NODEV) printf(",nodev"); if (ent->f_flags & MNT_UNION) printf(",union"); if (ent->f_flags & MNT_ASYNC) printf(",async"); if (ent->f_flags & MNT_NOATIME) printf(",noatime"); if (ent->f_flags & MNT_NOCLUSTERR) printf(",noclusterr"); if (ent->f_flags & MNT_NOCLUSTERW) printf(",noclusterw"); + if (ent->f_flags & MNT_SUIDDIR) + printf(",suiddir"); if ((fst = getfsspec(ent->f_mntfromname))) printf("\t%u %u\n", fst->fs_freq, fst->fs_passno); else if ((fst = getfsfile(ent->f_mntonname))) printf("\t%u %u\n", fst->fs_freq, fst->fs_passno); else if (strcmp(ent->f_fstypename, "ufs") == 0) printf("\t1 1\n"); else printf("\t0 0\n"); } Index: head/sbin/mount_ifs/mount.c =================================================================== --- head/sbin/mount_ifs/mount.c (revision 34265) +++ head/sbin/mount_ifs/mount.c (revision 34266) @@ -1,614 +1,618 @@ /*- * Copyright (c) 1980, 1989, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1989, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)mount.c 8.25 (Berkeley) 5/8/95"; #else static const char rcsid[] = - "$Id: mount.c,v 1.21 1997/11/13 00:28:49 julian Exp $"; + "$Id: mount.c,v 1.22 1998/02/13 04:54:27 bde Exp $"; #endif #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "extern.h" #include "pathnames.h" int debug, fstab_style, verbose; char *catopt __P((char *, const char *)); struct statfs *getmntpt __P((const char *)); int hasopt __P((const char *, const char *)); int ismounted __P((struct fstab *, struct statfs *, int)); int isremountable __P((const char *)); void mangle __P((char *, int *, const char **)); int mountfs __P((const char *, const char *, const char *, int, const char *, const char *)); void prmount __P((struct statfs *)); void putfsent __P((const struct statfs *)); void usage __P((void)); /* Map from mount otions to printable formats. */ static struct opt { int o_opt; const char *o_name; } optnames[] = { { MNT_ASYNC, "asynchronous" }, { MNT_EXPORTED, "NFS exported" }, { MNT_LOCAL, "local" }, { MNT_NOATIME, "noatime" }, { MNT_NODEV, "nodev" }, { MNT_NOEXEC, "noexec" }, { MNT_NOSUID, "nosuid" }, { MNT_QUOTA, "with quotas" }, { MNT_RDONLY, "read-only" }, { MNT_SYNCHRONOUS, "synchronous" }, { MNT_UNION, "union" }, { MNT_NOCLUSTERR, "noclusterr" }, { MNT_NOCLUSTERW, "noclusterw" }, { MNT_SUIDDIR, "suiddir" }, + { MNT_SOFTDEP, "soft-updates" }, { NULL } }; /* * List of VFS types that can be remounted without becoming mounted on top * of each other. * XXX Is this list correct? */ static const char * remountable_fs_names[] = { "ufs", "ffs", "lfs", "ext2fs", 0 }; int main(argc, argv) int argc; char * const argv[]; { const char *mntfromname, **vfslist, *vfstype; struct fstab *fs; struct statfs *mntbuf; FILE *mountdfp; pid_t pid; int all, ch, i, init_flags, mntsize, rval; char *options; all = init_flags = 0; options = NULL; vfslist = NULL; vfstype = "ufs"; while ((ch = getopt(argc, argv, "adfo:prwt:uv")) != -1) switch (ch) { case 'a': all = 1; break; case 'd': debug = 1; break; case 'f': init_flags |= MNT_FORCE; break; case 'o': if (*optarg) options = catopt(options, optarg); break; case 'p': fstab_style = 1; verbose = 1; break; case 'r': init_flags |= MNT_RDONLY; break; case 't': if (vfslist != NULL) errx(1, "only one -t option may be specified."); vfslist = makevfslist(optarg); vfstype = optarg; break; case 'u': init_flags |= MNT_UPDATE; break; case 'v': verbose = 1; break; case 'w': init_flags &= ~MNT_RDONLY; break; case '?': default: usage(); /* NOTREACHED */ } argc -= optind; argv += optind; #define BADTYPE(type) \ (strcmp(type, FSTAB_RO) && \ strcmp(type, FSTAB_RW) && strcmp(type, FSTAB_RQ)) rval = 0; switch (argc) { case 0: if ((mntsize = getmntinfo(&mntbuf, MNT_NOWAIT)) == 0) err(1, "getmntinfo"); if (all) { while ((fs = getfsent()) != NULL) { if (BADTYPE(fs->fs_type)) continue; if (checkvfsname(fs->fs_vfstype, vfslist)) continue; if (hasopt(fs->fs_mntops, "noauto")) continue; if (ismounted(fs, mntbuf, mntsize)) continue; if (mountfs(fs->fs_vfstype, fs->fs_spec, fs->fs_file, init_flags, options, fs->fs_mntops)) rval = 1; } } else if (fstab_style) { for (i = 0; i < mntsize; i++) { if (checkvfsname(mntbuf[i].f_fstypename, vfslist)) continue; putfsent(&mntbuf[i]); } } else { for (i = 0; i < mntsize; i++) { if (checkvfsname(mntbuf[i].f_fstypename, vfslist)) continue; prmount(&mntbuf[i]); } } exit(rval); case 1: if (vfslist != NULL) usage(); if (init_flags & MNT_UPDATE) { if ((mntbuf = getmntpt(*argv)) == NULL) errx(1, "unknown special file or file system %s.", *argv); if ((fs = getfsfile(mntbuf->f_mntonname)) != NULL) mntfromname = fs->fs_spec; else mntfromname = mntbuf->f_mntfromname; rval = mountfs(mntbuf->f_fstypename, mntfromname, mntbuf->f_mntonname, init_flags, options, 0); break; } if ((fs = getfsfile(*argv)) == NULL && (fs = getfsspec(*argv)) == NULL) errx(1, "%s: unknown special file or file system.", *argv); if (BADTYPE(fs->fs_type)) errx(1, "%s has unknown file system type.", *argv); rval = mountfs(fs->fs_vfstype, fs->fs_spec, fs->fs_file, init_flags, options, fs->fs_mntops); break; case 2: /* * If -t flag has not been specified, and spec contains either * a ':' or a '@' then assume that an NFS filesystem is being * specified ala Sun. */ if (vfslist == NULL && strpbrk(argv[0], ":@") != NULL) vfstype = "nfs"; rval = mountfs(vfstype, argv[0], argv[1], init_flags, options, NULL); break; default: usage(); /* NOTREACHED */ } /* * If the mount was successfully, and done by root, tell mountd the * good news. Pid checks are probably unnecessary, but don't hurt. */ if (rval == 0 && getuid() == 0 && (mountdfp = fopen(_PATH_MOUNTDPID, "r")) != NULL) { if (fscanf(mountdfp, "%d", &pid) == 1 && pid > 0 && kill(pid, SIGHUP) == -1 && errno != ESRCH) err(1, "signal mountd"); (void)fclose(mountdfp); } exit(rval); } int ismounted(fs, mntbuf, mntsize) struct fstab *fs; struct statfs *mntbuf; int mntsize; { int i; if (fs->fs_file[0] == '/' && fs->fs_file[1] == '\0') /* the root file system can always be remounted */ return (0); for (i = mntsize - 1; i >= 0; --i) if (strcmp(fs->fs_file, mntbuf[i].f_mntonname) == 0 && (!isremountable(fs->fs_vfstype) || strcmp(fs->fs_spec, mntbuf[i].f_mntfromname) == 0)) return (1); return (0); } int isremountable(vfsname) const char *vfsname; { const char **cp; for (cp = remountable_fs_names; *cp; cp++) if (strcmp(*cp, vfsname) == 0) return (1); return (0); } int hasopt(mntopts, option) const char *mntopts, *option; { int negative, found; char *opt, *optbuf; if (option[0] == 'n' && option[1] == 'o') { negative = 1; option += 2; } else negative = 0; optbuf = strdup(mntopts); found = 0; for (opt = optbuf; (opt = strtok(opt, ",")) != NULL; opt = NULL) { if (opt[0] == 'n' && opt[1] == 'o') { if (!strcasecmp(opt + 2, option)) found = negative; } else if (!strcasecmp(opt, option)) found = !negative; } free(optbuf); return (found); } int mountfs(vfstype, spec, name, flags, options, mntopts) const char *vfstype, *spec, *name, *options, *mntopts; int flags; { /* List of directories containing mount_xxx subcommands. */ static const char *edirs[] = { _PATH_SBIN, _PATH_USRSBIN, NULL }; const char *argv[100], **edir; struct stat sb; struct statfs sf; pid_t pid; int argc, i, status; char *optbuf, execname[MAXPATHLEN + 1], mntpath[MAXPATHLEN]; #if __GNUC__ (void)&optbuf; (void)&name; #endif if (realpath(name, mntpath) != NULL && stat(mntpath, &sb) == 0) { if (!S_ISDIR(sb.st_mode)) { warnx("%s: Not a directory", mntpath); return (1); } } else { warn("%s", mntpath); return (1); } name = mntpath; if (mntopts == NULL) mntopts = ""; if (options == NULL) { if (*mntopts == '\0') { options = "rw"; } else { options = mntopts; mntopts = ""; } } optbuf = catopt(strdup(mntopts), options); if (strcmp(name, "/") == 0) flags |= MNT_UPDATE; if (flags & MNT_FORCE) optbuf = catopt(optbuf, "force"); if (flags & MNT_RDONLY) optbuf = catopt(optbuf, "ro"); /* * XXX * The mount_mfs (newfs) command uses -o to select the * optimisation mode. We don't pass the default "-o rw" * for that reason. */ if (flags & MNT_UPDATE) optbuf = catopt(optbuf, "update"); argc = 0; argv[argc++] = vfstype; mangle(optbuf, &argc, argv); argv[argc++] = spec; argv[argc++] = name; argv[argc] = NULL; if (debug) { (void)printf("exec: mount_%s", vfstype); for (i = 1; i < argc; i++) (void)printf(" %s", argv[i]); (void)printf("\n"); return (0); } switch (pid = fork()) { case -1: /* Error. */ warn("fork"); free(optbuf); return (1); case 0: /* Child. */ if (strcmp(vfstype, "ufs") == 0) exit(mount_ufs(argc, (char * const *) argv)); /* Go find an executable. */ for (edir = edirs; *edir; edir++) { (void)snprintf(execname, sizeof(execname), "%s/mount_%s", *edir, vfstype); execv(execname, (char * const *)argv); } if (errno == ENOENT) { int len = 0; char *cp; for (edir = edirs; *edir; edir++) len += strlen(*edir) + 2; /* ", " */ if ((cp = malloc(len)) == NULL) { warn(NULL); exit(1); } cp[0] = '\0'; for (edir = edirs; *edir; edir++) { strcat(cp, *edir); if (edir[1] != NULL) strcat(cp, ", "); } warn("exec mount_%s not found in %s", vfstype, cp); } exit(1); /* NOTREACHED */ default: /* Parent. */ free(optbuf); if (waitpid(pid, &status, 0) < 0) { warn("waitpid"); return (1); } if (WIFEXITED(status)) { if (WEXITSTATUS(status) != 0) return (WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { warnx("%s: %s", name, sys_siglist[WTERMSIG(status)]); return (1); } if (verbose) { if (statfs(name, &sf) < 0) { warn("statfs %s", name); return (1); } if (fstab_style) putfsent(&sf); else prmount(&sf); } break; } return (0); } void prmount(sfp) struct statfs *sfp; { int flags; struct opt *o; struct passwd *pw; int f; (void)printf("%s on %s", sfp->f_mntfromname, sfp->f_mntonname); flags = sfp->f_flags & MNT_VISFLAGMASK; for (f = 0, o = optnames; flags && o->o_opt; o++) if (flags & o->o_opt) { (void)printf("%s%s", !f++ ? " (" : ", ", o->o_name); flags &= ~o->o_opt; } if (sfp->f_owner) { (void)printf("%smounted by ", !f++ ? " (" : ", "); if ((pw = getpwuid(sfp->f_owner)) != NULL) (void)printf("%s", pw->pw_name); else (void)printf("%d", sfp->f_owner); } - (void)printf(f ? ")\n" : "\n"); + (void)printf("%swrites: sync %d async %d)\n", !f++ ? " (" : ", ", + sfp->f_syncwrites, sfp->f_asyncwrites); } struct statfs * getmntpt(name) const char *name; { struct statfs *mntbuf; int i, mntsize; mntsize = getmntinfo(&mntbuf, MNT_NOWAIT); for (i = 0; i < mntsize; i++) if (strcmp(mntbuf[i].f_mntfromname, name) == 0 || strcmp(mntbuf[i].f_mntonname, name) == 0) return (&mntbuf[i]); return (NULL); } char * catopt(s0, s1) char *s0; const char *s1; { size_t i; char *cp; if (s0 && *s0) { i = strlen(s0) + strlen(s1) + 1 + 1; if ((cp = malloc(i)) == NULL) err(1, NULL); (void)snprintf(cp, i, "%s,%s", s0, s1); } else cp = strdup(s1); if (s0) free(s0); return (cp); } void mangle(options, argcp, argv) char *options; int *argcp; const char **argv; { char *p, *s; int argc; argc = *argcp; for (s = options; (p = strsep(&s, ",")) != NULL;) if (*p != '\0') if (*p == '-') { argv[argc++] = p; p = strchr(p, '='); if (p) { *p = '\0'; argv[argc++] = p+1; } } else if (strcmp(p, "rw") != 0) { argv[argc++] = "-o"; argv[argc++] = p; } *argcp = argc; } void usage() { (void)fprintf(stderr, "usage: mount %s %s\n mount %s\n mount %s\n", "[-dfpruvw] [-o options] [-t ufs | external_type]", "special node", "[-adfpruvw] [-t ufs | external_type]", "[-dfpruvw] special | node"); exit(1); } void putfsent(ent) const struct statfs *ent; { struct fstab *fst; printf("%s\t%s\t%s %s", ent->f_mntfromname, ent->f_mntonname, ent->f_fstypename, (ent->f_flags & MNT_RDONLY) ? "ro" : "rw"); /* XXX should use optnames[] - put shorter names in it. */ if (ent->f_flags & MNT_SYNCHRONOUS) printf(",sync"); if (ent->f_flags & MNT_NOEXEC) printf(",noexec"); if (ent->f_flags & MNT_NOSUID) printf(",nosuid"); if (ent->f_flags & MNT_NODEV) printf(",nodev"); if (ent->f_flags & MNT_UNION) printf(",union"); if (ent->f_flags & MNT_ASYNC) printf(",async"); if (ent->f_flags & MNT_NOATIME) printf(",noatime"); if (ent->f_flags & MNT_NOCLUSTERR) printf(",noclusterr"); if (ent->f_flags & MNT_NOCLUSTERW) printf(",noclusterw"); + if (ent->f_flags & MNT_SUIDDIR) + printf(",suiddir"); if ((fst = getfsspec(ent->f_mntfromname))) printf("\t%u %u\n", fst->fs_freq, fst->fs_passno); else if ((fst = getfsfile(ent->f_mntonname))) printf("\t%u %u\n", fst->fs_freq, fst->fs_passno); else if (strcmp(ent->f_fstypename, "ufs") == 0) printf("\t1 1\n"); else printf("\t0 0\n"); } Index: head/sbin/tunefs/tunefs.8 =================================================================== --- head/sbin/tunefs/tunefs.8 (revision 34265) +++ head/sbin/tunefs/tunefs.8 (revision 34266) @@ -1,150 +1,151 @@ .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software .\" must display the following acknowledgement: .\" This product includes software developed by the University of .\" California, Berkeley and its contributors. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)tunefs.8 8.2 (Berkeley) 12/11/93 .\" .Dd December 11, 1993 .Dt TUNEFS 8 .Os BSD 4.2 .Sh NAME .Nm tunefs .Nd tune up an existing file system .Sh SYNOPSIS .Nm tunefs .Op Fl A .Op Fl a Ar maxcontig .Op Fl d Ar rotdelay .Op Fl e Ar maxbpg .Op Fl m Ar minfree .Op Fl p .Bk -words +.Op Fl n Ar soft_dependency_enabling .Op Fl o Ar optimize_preference .Ek .Op Ar special | Ar filesys .Sh DESCRIPTION .Nm Tunefs is designed to change the dynamic parameters of a file system which affect the layout policies. The parameters which are to be changed are indicated by the flags given below: .Bl -tag -width Ds .It Fl A The file system has several backups of the super-block. Specifying this option will cause all backups to be modified as well as the primary super-block. This is potentially dangerous - use with caution. .It Fl a Ar maxcontig This specifies the maximum number of contiguous blocks that will be laid out before forcing a rotational delay (see .Fl d below). The default value is one, since most device drivers require an interrupt per disk transfer. Device drivers that can chain several buffers together in a single transfer should set this to the maximum chain length. .It Fl d Ar rotdelay This specifies the expected time (in milliseconds) to service a transfer completion interrupt and initiate a new transfer on the same disk. It is used to decide how much rotational spacing to place between successive blocks in a file. .It Fl e Ar maxbpg This indicates the maximum number of blocks any single file can allocate out of a cylinder group before it is forced to begin allocating blocks from another cylinder group. Typically this value is set to about one quarter of the total blocks in a cylinder group. The intent is to prevent any single file from using up all the blocks in a single cylinder group, thus degrading access times for all files subsequently allocated in that cylinder group. The effect of this limit is to cause big files to do long seeks more frequently than if they were allowed to allocate all the blocks in a cylinder group before seeking elsewhere. For file systems with exclusively large files, this parameter should be set higher. .It Fl m Ar minfree This value specifies the percentage of space held back from normal users; the minimum free space threshold. The default value used is 8%. This value can be set to zero, however up to a factor of three in throughput will be lost over the performance obtained at a 10% threshold. Settings of 5% and less force space optimization to always be used which will greatly increase the overhead for file writes. Note that if the value is raised above the current usage level, users will be unable to allocate files until enough files have been deleted to get under the higher threshold. .It Fl o Ar optimize_preference The file system can either try to minimize the time spent allocating blocks, or it can attempt to minimize the space fragmentation on the disk. Optimization for space has much higher overhead for file writes. The kernel normally changes the preference automatically as the percent fragmentation changes on the file system. .It Fl p This option shows a summary of what the current tuneable settings are on the selected file system. More detailed information can be obtained in the .Xr dumpfs 8 manual page. .El .Sh SEE ALSO .Xr fs 5 , .Xr dumpfs 8 , .Xr newfs 8 .Rs .%A M. McKusick .%A W. Joy .%A S. Leffler .%A R. Fabry .%T "A Fast File System for UNIX" .%J "ACM Transactions on Computer Systems 2" .%N 3 .%P pp 181-197 .%D August 1984 .%O "(reprinted in the BSD System Manager's Manual, SMM:5)" .Re .Sh BUGS This program should work on mounted and active file systems. Because the super-block is not kept in the buffer cache, the changes will only take effect if the program is run on dismounted file systems. To change the root file system, the system must be rebooted after the file system is tuned. .\" Take this out and a Unix Demon will dog your steps from now until .\" the time_t's wrap around. .Pp You can tune a file system, but you can't tune a fish. .Sh HISTORY The .Nm command appeared in .Bx 4.2 . Index: head/sbin/tunefs/tunefs.c =================================================================== --- head/sbin/tunefs/tunefs.c (revision 34265) +++ head/sbin/tunefs/tunefs.c (revision 34266) @@ -1,311 +1,332 @@ /* * Copyright (c) 1983, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static char copyright[] = "@(#) Copyright (c) 1983, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)tunefs.c 8.2 (Berkeley) 4/19/94"; #endif /* not lint */ /* * tunefs: change layout parameters to an existing file system. */ #include #include #include #include #include #include #include #include #include #include #include /* the optimization warning string template */ #define OPTWARN "should optimize for %s with minfree %s %d%%" union { struct fs sb; char pad[MAXBSIZE]; } sbun; #define sblock sbun.sb int fi; long dev_bsize = 1; void bwrite(daddr_t, char *, int); int bread(daddr_t, char *, int); void getsb(struct fs *, char *); void usage __P((void)); void printfs __P((void)); int main(argc, argv) int argc; char *argv[]; { - char *cp, *special, *name; + char *cp, *special, *name, *action; struct stat st; int i; int Aflag = 0; struct fstab *fs; char *chg[2], device[MAXPATHLEN]; argc--, argv++; if (argc < 2) usage(); special = argv[argc - 1]; fs = getfsfile(special); if (fs) special = fs->fs_spec; again: if (stat(special, &st) < 0) { if (*special != '/') { if (*special == 'r') special++; (void)sprintf(device, "%s/%s", _PATH_DEV, special); special = device; goto again; } err(1, "%s", special); } if ((st.st_mode & S_IFMT) != S_IFBLK && (st.st_mode & S_IFMT) != S_IFCHR) errx(10, "%s: not a block or character device", special); getsb(&sblock, special); for (; argc > 0 && argv[0][0] == '-'; argc--, argv++) { for (cp = &argv[0][1]; *cp; cp++) switch (*cp) { case 'A': Aflag++; continue; case 'p': printfs(); exit(0); case 'a': name = "maximum contiguous block count"; if (argc < 1) errx(10, "-a: missing %s", name); argc--, argv++; i = atoi(*argv); if (i < 1) errx(10, "%s must be >= 1 (was %s)", name, *argv); warnx("%s changes from %d to %d", name, sblock.fs_maxcontig, i); sblock.fs_maxcontig = i; continue; case 'd': name = "rotational delay between contiguous blocks"; if (argc < 1) errx(10, "-d: missing %s", name); argc--, argv++; i = atoi(*argv); warnx("%s changes from %dms to %dms", name, sblock.fs_rotdelay, i); sblock.fs_rotdelay = i; continue; case 'e': name = "maximum blocks per file in a cylinder group"; if (argc < 1) errx(10, "-e: missing %s", name); argc--, argv++; i = atoi(*argv); if (i < 1) errx(10, "%s must be >= 1 (was %s)", name, *argv); warnx("%s changes from %d to %d", name, sblock.fs_maxbpg, i); sblock.fs_maxbpg = i; continue; case 'm': name = "minimum percentage of free space"; if (argc < 1) errx(10, "-m: missing %s", name); argc--, argv++; i = atoi(*argv); if (i < 0 || i > 99) errx(10, "bad %s (%s)", name, *argv); warnx("%s changes from %d%% to %d%%", name, sblock.fs_minfree, i); sblock.fs_minfree = i; if (i >= MINFREE && sblock.fs_optim == FS_OPTSPACE) warnx(OPTWARN, "time", ">=", MINFREE); if (i < MINFREE && sblock.fs_optim == FS_OPTTIME) warnx(OPTWARN, "space", "<", MINFREE); continue; + case 'n': + name = "soft updates"; + if (argc < 1) + errx(10, "-s: missing %s", name); + argc--, argv++; + if (strcmp(*argv, "enable") == 0) { + sblock.fs_flags |= FS_DOSOFTDEP; + action = "set"; + } else if (strcmp(*argv, "disable") == 0) { + sblock.fs_flags &= ~FS_DOSOFTDEP; + action = "cleared"; + } else { + errx(10, "bad %s (options are %s)", + name, "`enable' or `disable'"); + } + warnx("%s %s", name, action); + continue; + case 'o': name = "optimization preference"; if (argc < 1) errx(10, "-o: missing %s", name); argc--, argv++; chg[FS_OPTSPACE] = "space"; chg[FS_OPTTIME] = "time"; if (strcmp(*argv, chg[FS_OPTSPACE]) == 0) i = FS_OPTSPACE; else if (strcmp(*argv, chg[FS_OPTTIME]) == 0) i = FS_OPTTIME; else errx(10, "bad %s (options are `space' or `time')", name); if (sblock.fs_optim == i) { warnx("%s remains unchanged as %s", name, chg[i]); continue; } warnx("%s changes from %s to %s", name, chg[sblock.fs_optim], chg[i]); sblock.fs_optim = i; if (sblock.fs_minfree >= MINFREE && i == FS_OPTSPACE) warnx(OPTWARN, "time", ">=", MINFREE); if (sblock.fs_minfree < MINFREE && i == FS_OPTTIME) warnx(OPTWARN, "space", "<", MINFREE); continue; default: usage(); } } if (argc != 1) usage(); bwrite((daddr_t)SBOFF / dev_bsize, (char *)&sblock, SBSIZE); if (Aflag) for (i = 0; i < sblock.fs_ncg; i++) bwrite(fsbtodb(&sblock, cgsblock(&sblock, i)), (char *)&sblock, SBSIZE); close(fi); exit(0); } void usage() { fprintf(stderr, "usage: tunefs tuneup-options special-device\n"); fprintf(stderr, "where tuneup-options are:\n"); fprintf(stderr, "\t-a maximum contiguous blocks\n"); fprintf(stderr, "\t-d rotational delay between contiguous blocks\n"); fprintf(stderr, "\t-e maximum blocks per file in a cylinder group\n"); fprintf(stderr, "\t-m minimum percentage of free space\n"); + fprintf(stderr, "\t-n soft updates (`enable' or `disable')\n"); fprintf(stderr, "\t-o optimization preference (`space' or `time')\n"); fprintf(stderr, "\t-p no change - just prints current tuneable settings\n"); exit(2); } void getsb(fs, file) register struct fs *fs; char *file; { fi = open(file, 2); if (fi < 0) err(3, "cannot open %s", file); if (bread((daddr_t)SBOFF, (char *)fs, SBSIZE)) err(4, "%s: bad super block", file); if (fs->fs_magic != FS_MAGIC) err(5, "%s: bad magic number", file); dev_bsize = fs->fs_fsize / fsbtodb(fs, 1); } void printfs() { + warnx("soft updates: (-n) %s", + (sblock.fs_flags & FS_DOSOFTDEP)? "enabled" : "disabled"); warnx("maximum contiguous block count: (-a) %d", sblock.fs_maxcontig); warnx("rotational delay between contiguous blocks: (-d) %d ms", sblock.fs_rotdelay); warnx("maximum blocks per file in a cylinder group: (-e) %d", sblock.fs_maxbpg); warnx("minimum percentage of free space: (-m) %d%%", sblock.fs_minfree); warnx("optimization preference: (-o) %s", sblock.fs_optim == FS_OPTSPACE ? "space" : "time"); if (sblock.fs_minfree >= MINFREE && sblock.fs_optim == FS_OPTSPACE) warnx(OPTWARN, "time", ">=", MINFREE); if (sblock.fs_minfree < MINFREE && sblock.fs_optim == FS_OPTTIME) warnx(OPTWARN, "space", "<", MINFREE); } void bwrite(blk, buf, size) daddr_t blk; char *buf; int size; { if (lseek(fi, (off_t)blk * dev_bsize, SEEK_SET) < 0) err(6, "FS SEEK"); if (write(fi, buf, size) != size) err(7, "FS WRITE"); } int bread(bno, buf, cnt) daddr_t bno; char *buf; int cnt; { int i; if (lseek(fi, (off_t)bno * dev_bsize, SEEK_SET) < 0) return(1); if ((i = read(fi, buf, cnt)) != cnt) { for(i=0; i MYKERNEL # options INCLUDE_CONFIG_FILE # Include this file in kernel # # This directive defines a number of things: # - The compiled kernel is to be called `kernel' # - The root filesystem might be on partition wd0a # - Crash dumps will be written to wd0b, if possible. Specifying the # dump device here is not recommended. Use dumpon(8). # config kernel root on wd0 dumps on wd0 ##################################################################### # SMP OPTIONS: # # SMP enables building of a Symmetric MultiProcessor Kernel. # APIC_IO enables the use of the IO APIC for Symmetric I/O. # NCPU sets the number of CPUs, defaults to 2. # NBUS sets the number of busses, defaults to 4. # NAPIC sets the number of IO APICs on the motherboard, defaults to 1. # NINTR sets the total number of INTs provided by the motherboard. # # Notes: # # An SMP kernel will ONLY run on an Intel MP spec. qualified motherboard. # # Be sure to disable 'cpu "I386_CPU"' && 'cpu "I486_CPU"' for SMP kernels. # # Check the 'Rogue SMP hardware' section to see if additional options # are required by your hardware. # # Mandatory: options SMP # Symmetric MultiProcessor Kernel options APIC_IO # Symmetric (APIC) I/O # Optional, these are the defaults plus 1: options NCPU=5 # number of CPUs options NBUS=5 # number of busses options NAPIC=2 # number of IO APICs options NINTR=25 # number of INTs # # Rogue SMP hardware: # # Bridged PCI cards: # # The MP tables of most of the current generation MP motherboards # do NOT properly support bridged PCI cards. To use one of these # cards you should refer to ??? ##################################################################### # CPU OPTIONS # # You must specify at least one CPU (the one you intend to run on); # deleting the specification for CPUs you don't need to use may make # parts of the system run faster. This is especially true removing # I386_CPU. # cpu "I386_CPU" cpu "I486_CPU" cpu "I586_CPU" # aka Pentium(tm) cpu "I686_CPU" # aka Pentium Pro(tm) # # Options for CPU features. # # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM # BlueLightning CPU. It works only with Cyrix FPU, and this option # should not be used with Intel FPU. # # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning # CPU if CPU supports it. The default is double-clock mode on # BlueLightning CPU box. # # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1). # # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct # mapped mode. Default is 2-way set associative mode. # # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space # of Cyrix 6x86 and 6x86MX CPUs. If this option is not set and # FAILESAFE is defined, NO_LOCK bit of CCR1 is cleared. (NOTE 3) # # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables # reorder). This option should not be used if you use memory mapped # I/O device(s). # # CPU_FASTER_5X86_FPU enables faster FPU exception handler. # # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products # for i386 machines. # # CPU_IORT defines I/O clock delay time (NOTE 1). Default vaules of # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively # (no clock delay). # # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE # 1). # # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1). # # CPU_SUSP_HLT enables suspend on HALT. If this option is set, CPU # enters suspend mode following execution of HALT instruction. # # CPU_WT_ALLOC enables write-through allocation. # # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache # flush at hold state. # # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs # without cache flush at hold state, and (2) write-back CPU cache on # Cyrix 6x86 whose revision < 2.7 (NOTE 2). # # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is # executed. This should be included for ALL kernels that won't run # on a Pentium. # # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT, # CPU_LOOP_ENand CPU_RSTK_EN should no be used becasue of CPU bugs. # These options may crash your system. # # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled # in write-through mode when revision < 2.7. If revision of Cyrix # 6x86 >= 2.7, CPU cache is always enabled in write-back mode. # # NOTE 3: This option may cause failures for software that requires # locked cycles in order to operate correctly. # options "CPU_BLUELIGHTNING_FPU_OP_CACHE" options "CPU_BLUELIGHTNING_3X" options "CPU_BTB_EN" options "CPU_DIRECT_MAPPED_CACHE" options "CPU_DISABLE_5X86_LSSER" options "CPU_FASTER_5X86_FPU" options "CPU_I486_ON_386" options "CPU_IORT" options "CPU_LOOP_EN" options "CPU_RSTK_EN" options "CPU_SUSP_HLT" options "CYRIX_CACHE_WORKS" options "CYRIX_CACHE_REALLY_WORKS" #options "NO_F00F_HACK" # # A math emulator is mandatory if you wish to run on hardware which # does not have a floating-point processor. Pick either the original, # bogus (but freely-distributable) math emulator, or a much more # fully-featured but GPL-licensed emulator taken from Linux. # options MATH_EMULATE #Support for x87 emulation # Don't enable both of these in a real config. options GPL_MATH_EMULATE #Support for x87 emulation via #new math emulator ##################################################################### # COMPATIBILITY OPTIONS # # Implement system calls compatible with 4.3BSD and older versions of # FreeBSD. You probably do NOT want to remove this as much current code # still relies on the 4.3 emulation. # options "COMPAT_43" # # Allow user-mode programs to manipulate their local descriptor tables. # This option is required for the WINE Windows(tm) emulator, and is # not used by anything else (that we know of). # options USER_LDT #allow user-level control of i386 ldt # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG # # This option includes a MD5 routine in the kernel, this is used for # various authentication and privacy uses. # options "MD5" # # Allow processes to switch to vm86 mode, as well as enabling direct # user-mode access to the I/O port space. This option is necessary for # the doscmd emulator to run. # options "VM86" ##################################################################### # DEBUGGING OPTIONS # # Enable the kernel debugger. # options DDB # # Don't drop into DDB for a panic. Intended for unattended operation # where you may want to drop to DDB from the console, but still want # the machine to recover from a panic # options DDB_UNATTENDED # # If using GDB remote mode to debug the kernel, there's a non-standard # extension to the remote protocol that can be used to use the serial # port as both the debugging port and the system console. It's non- # standard and you're on your own if you enable it. See also the # "remotechat" variables in the FreeBSD specific version of gdb. # options GDB_REMOTE_CHAT # # KTRACE enables the system-call tracing facility ktrace(2). # options KTRACE #kernel tracing # # The DIAGNOSTIC option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options DIAGNOSTIC # # PERFMON causes the driver for Pentium/Pentium Pro performance counters # to be compiled. See perfmon(4) for more information. # options PERFMON # # This option let some drivers co-exist that can't co-exist in a running # system. This is used to be able to compile all kernel code in one go for # quality assurance purposes (like this file, which the option takes it name # from.) # options COMPILING_LINT # XXX - this doesn't belong here. # Allow ordinary users to take the console - this is useful for X. options UCONSOLE # XXX - this doesn't belong here either options USERCONFIG #boot -c editor options USERCONFIG_BOOT #imply -c and parse info area options VISUAL_USERCONFIG #visual boot -c editor ##################################################################### # NETWORKING OPTIONS # # Protocol families: # Only the INET (Internet) family is officially supported in FreeBSD. # Source code for the NS (Xerox Network Service) is provided for amusement # value. # options INET #Internet communications protocols options IPX #IPX/SPX communications protocols options IPXIP #IPX in IP encapsulation (not available) options IPTUNNEL #IP in IPX encapsulation (not available) options NETATALK #Appletalk communications protocols # These are currently broken but are shipped due to interest. #options NS #Xerox NS protocols # These are currently broken and are no longer shipped due to lack # of interest. #options CCITT #X.25 network layer #options ISO #options TPIP #ISO TP class 4 over IP #options TPCONS #ISO TP class 0 over X.25 #options LLC #X.25 link layer for Ethernets #options HDLC #X.25 link layer for serial lines #options EON #ISO CLNP over IP #options NSIP #XNS over IP # # Network interfaces: # The `loop' pseudo-device is MANDATORY when networking is enabled. # The `ether' pseudo-device provides generic code to handle # Ethernets; it is MANDATORY when a Ethernet device driver is # configured. # The 'fddi' pseudo-device provides generic code to support FDDI. # The `sppp' pseudo-device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). # The `sl' pseudo-device implements the Serial Line IP (SLIP) service. # The `ppp' pseudo-device implements the Point-to-Point Protocol. # The `bpfilter' pseudo-device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. The number of devices determines the maximum number of # simultaneous BPF clients programs runnable. # The `disc' pseudo-device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing purposes. # The `tun' pseudo-device implements the User Process PPP (iijppp) # # The PPP_BSDCOMP option enables support for compress(1) style entire # packet compression, the PPP_DEFLATE is for zlib/gzip style compression. # PPP_FILTER enables code for filtering the ppp data stream and selecting # events for resetting the demand dial activity timer - requires bpfilter. # See pppd(8) for more details. # pseudo-device ether #Generic Ethernet pseudo-device fddi #Generic FDDI pseudo-device sppp #Generic Synchronous PPP pseudo-device loop #Network loopback device pseudo-device bpfilter 4 #Berkeley packet filter pseudo-device disc #Discard device pseudo-device tun 1 #Tunnel driver (user process ppp(8)) pseudo-device sl 2 #Serial Line IP pseudo-device ppp 2 #Point-to-point protocol options PPP_BSDCOMP #PPP BSD-compress support options PPP_DEFLATE #PPP zlib/deflate/gzip support options PPP_FILTER #enable bpf filtering (needs bpfilter) # # Internet family options: # # TCP_COMPAT_42 causes the TCP code to emulate certain bugs present in # 4.2BSD. This option should not be used unless you have a 4.2BSD # machine and TCP connections fail. # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted(8). # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE sends # logged packets to the system logger. IPFIREWALL_VERBOSE_LIMIT # limits the number of times a matching entry can be logged. # # WARNING: IPFIREWALL defaults to a policy of "deny ip from any to any" # and if you do not add other rules during startup to allow access, # YOU WILL LOCK YOURSELF OUT. It is suggested that you set firewall=open # in /etc/rc.conf when first enabling this feature, then refining the # firewall rules in /etc/rc.firewall after you've tested that the new kernel # feature works properly. # # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to # allow everything. Use with care, if a cracker can crash your # firewall machine, they can get to your protected machines. However, # if you are using it as an as-needed filter for specific problems as # they arise, then this may be for you. Changing the default to 'allow' # means that you won't get stuck if the kernel and /sbin/ipfw binary get # out of sync. # # IPDIVERT enables the divert IP sockets, used by ``ipfw divert'' # # TCPDEBUG is undocumented. # options "TCP_COMPAT_42" #emulate 4.2BSD TCP bugs options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #print information about # dropped packets options "IPFIREWALL_VERBOSE_LIMIT=100" #limit verbosity options IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default options IPDIVERT #divert sockets options TCPDEBUG ##################################################################### # FILESYSTEM OPTIONS # # Only the root, /usr, and /tmp filesystems need be statically # compiled; everything else will be automatically loaded at mount # time. (Exception: the UFS family---FFS, and MFS --- cannot # currently be demand-loaded.) Some people still prefer to statically # compile other filesystems as well. # # NB: The NULL, PORTAL, UMAP and UNION filesystems are known to be # buggy, and WILL panic your system if you attempt to do anything with # them. They are included here as an incentive for some enterprising # soul to sit down and fix them. # # One of these is mandatory: options FFS #Fast filesystem options NFS #Network File System # The rest are optional: # options NFS_NOSERVER #Disable the NFS-server code. options "CD9660" #ISO 9660 filesystem options FDESC #File descriptor filesystem options KERNFS #Kernel filesystem options MFS #Memory File System options MSDOSFS #MS DOS File System options NULLFS #NULL filesystem options PORTAL #Portal filesystem options PROCFS #Process filesystem options UMAPFS #UID map filesystem options UNION #Union filesystem options "CD9660_ROOT" #CD-ROM usable as root device options FFS_ROOT #FFS usable as root device options NFS_ROOT #NFS usable as root device # This DEVFS is experimental but seems to work options DEVFS #devices filesystem + +# Allow the FFS to use Softupdates technology. +# To do this you need to fetch the two files +# /sys/ufs/ffs/softdep.h and /sys/ufs/ffs/ffs_softdep.c +# from freebsd.org and understand the licensing restrictions. +#options SOFTUPDATES +# (we can't actually enable it because the files may not be present) # Make space in the kernel for a MFS root filesystem. Define to the number # of kilobytes to reserve for the filesystem. options MFS_ROOT=10 # Allow the MFS_ROOT code to load the MFS image from floppy if it is missing. options MFS_AUTOLOAD # Allow this many swap-devices. options NSWAPDEV=20 # Disk quotas are supported when this option is enabled. If you # change the value of this option, you must do a `make clean' in your # kernel compile directory in order to get a working kernel. # options QUOTA #enable disk quotas # Add more checking code to various filesystems #options NULLFS_DIAGNOSTIC #options KERNFS_DIAGNOSTIC #options UMAPFS_DIAGNOSTIC #options UNION_DIAGNOSTIC # In particular multi-session CD-Rs might require a huge amount of # time in order to "settle". If we are about mounting them as the # root f/s, we gotta wait a little. # # The number is supposed to be in seconds. options "CD9660_ROOTDELAY=20" # If you are running a machine just as a fileserver for PC and MAC users. # (using SAMBA or Netatalk), then you may consider setting this option # and keeping all those user's directories on a partition that is mounted # with the suiddir option. This gives new files the same ownership as # the directory (similiar to group). It's a security hole if you let # these users run programs so confine it to file-servers, (but it'll save you # lots of headaches in that case). Root owned directories are excempt and X bits # are cleared. the suid bit must be set on the directory as well. see chmod(1) # PC owners can't see/set ownerships so they keep getting their toes # trodden on. This saves you all the support calls as the filesystem # it's used on will act as they expect. ("It's my dir so it must be my file"). # options SUIDDIR # Add some error checking code to the null_bypass routine # in the NULL filesystem #options SAFETY ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so # that a given bus, target, and LUN always come on line as the same # device unit. In earlier versions the unit numbers were assigned # in the order that the devices were probed on the SCSI bus. This # means that if you removed a disk drive, you may have had to rewrite # your /etc/fstab file, and also that you had to be careful when adding # a new disk as it may have been probed earlier and moved your device # configuration around. # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "sd3" then the first # non-wired disk will be assigned sd4. # The syntax for wiring down devices is: # controller scbus0 at ahc0 # Single bus device # controller scbus1 at ahc1 bus 0 # Single bus device # controller scbus3 at ahc2 bus 0 # Twin bus device # controller scbus2 at ahc2 bus 1 # Twin bus device # disk sd0 at scbus0 target 0 unit 0 # disk sd1 at scbus3 target 1 # disk sd2 at scbus2 target 3 # tape st1 at scbus1 target 6 # device cd0 at scbus? # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI # configuration and doesn't have to be explicitly configured. controller scbus0 #base SCSI code device ch0 #SCSI media changers device sd0 #SCSI disks device st0 #SCSI tapes device cd0 #SCSI CD-ROMs device od0 #SCSI optical disk # The previous devices (ch, sd, st, cd) are recognized by config. # config doesn't (and shouldn't) know about these newer ones, # so we have to specify that they are on a SCSI bus with the "at scbus?" # clause. device worm0 at scbus? # SCSI worm device pt0 at scbus? # SCSI processor type device sctarg0 at scbus? # SCSI target # SCSI OPTIONS: # SCSIDEBUG: When defined enables debugging macros # NO_SCSI_SENSE: When defined disables sense descriptions (about 4k) # SCSI_REPORT_GEOMETRY: Always report disk geometry at boot up instead # of only when booting verbosely. options SCSIDEBUG #options NO_SCSI_SENSE options SCSI_REPORT_GEOMETRY # Options for the `od' optical disk driver: # # If drive returns sense key as 0x02 with vendor specific additional # sense code (ASC) and additional sense code qualifier (ASCQ), or # illegal ASC and ASCQ. This cause an error (NOT READY) and retrying. # To suppress this, use the following option. # options OD_BOGUS_NOT_READY # # For an automatic spindown, try this. Again, preferably as an # option in your config file. # WARNING! Use at your own risk. Joerg's ancient SONY SMO drive # groks it fine, while Shunsuke's Fujitsu chokes on it and times # out. # options OD_AUTO_TURNOFF ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS # The `pty' device usually turns out to be ``effectively mandatory'', # as it is required for `telnetd', `rlogind', `screen', `emacs', and # `xterm', among others. pseudo-device pty 16 #Pseudo ttys - can go as high as 256 pseudo-device speaker #Play IBM BASIC-style noises out your speaker pseudo-device gzip #Exec gzipped a.out's pseudo-device vn #Vnode driver (turns a file into a device) pseudo-device snp 3 #Snoop device - to look at pty/vty/etc.. pseudo-device ccd 4 #Concatenated disk driver # These are only for watching for bitrot in old tty code. # broken #pseudo-device tb # These are only for watching for bitrot in old SCSI code. pseudo-device su #scsi user pseudo-device ssc #super scsi ##################################################################### # HARDWARE DEVICE CONFIGURATION # ISA and EISA devices: # EISA support is available for some device, so they can be auto-probed. # Micro Channel is not supported at all. # # Mandatory ISA devices: isa, npx # controller isa0 # # Options for `isa': # # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # This option breaks suspend/resume on some portables. # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # BOUNCE_BUFFERS provides support for ISA DMA on machines with more # than 16 megabytes of memory. It doesn't hurt on other machines. # Some broken EISA and VLB hardware may need this, too. # # MAXMEM specifies the amount of RAM on the machine; if this is not # specified, FreeBSD will first read the amount of memory from the CMOS # RAM, so the amount of memory will initially be limited to 64MB or 16MB # depending on the BIOS. If the BIOS reports 64MB, a memory probe will # then attempt to detect the installed amount of RAM. If this probe # fails to detect >64MB RAM you will have to use the MAXMEM option. # The amount is in kilobytes, so for a machine with 128MB of RAM, it would # be 131072 (128 * 1024). # # TUNE_1542 enables the automatic ISA bus speed selection for the # Adaptec 1542 boards. Does not work for all boards, use it with caution. # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. # # PAS_JOYSTICK_ENABLE enables the gameport on the ProAudio Spectrum options "AUTO_EOI_1" #options "AUTO_EOI_2" options BOUNCE_BUFFERS options "MAXMEM=(128*1024)" options "TUNE_1542" #options BROKEN_KEYBOARD_RESET #options PAS_JOYSTICK_ENABLE # Enable support for the kernel PLL to use an external PPS signal, # under supervision of [x]ntpd(8) # More info in ftp://ftp.udel.edu/pub/ntp/kernel.tar.Z options PPS_SYNC # Enable PnP support in the kernel. This allows you to automaticly # attach to PnP cards for drivers that support it and allows you to # configure cards from USERCONFIG. See pnp(4) for more info. controller pnp0 # The pcvt console driver (vt220 compatible). device vt0 at isa? port "IO_KBD" tty irq 1 vector pcrint options XSERVER # support for running an X server. options FAT_CURSOR # start with block cursor # This PCVT option is for keyboards such as those used on IBM ThinkPad laptops options PCVT_SCANSET=2 # IBM keyboards are non-std # The syscons console driver (sco color console compatible). device sc0 at isa? port "IO_KBD" tty irq 1 vector scintr options MAXCONS=16 # number of virtual consoles options SLOW_VGA # do byte-wide i/o's to TS and GDC regs options "STD8X16FONT" # Compile font in makeoptions "STD8X16FONT"="cp850" options SC_HISTORY_SIZE=200 # number of history buffer lines # # `flags' for sc0: # 0x01 Use a 'visual' bell # 0x02 Use a 'blink' cursor # 0x04 Use a 'underline' cursor # 0x06 Use a 'blinking underline' (destructive) cursor # 0x08 Force detection of keyboard, else we always assume a keyboard # 0x10 Old-style (XT) keyboard support, useful for older ThinkPads # 0x20 Don't reset keyboard, useful for some newer ThinkPads # # The Numeric Processing eXtension driver. This should be configured if # your machine has a math co-processor, unless the coprocessor is very # buggy. If it is not configured then you *must* configure math emulation # (see above). If both npx0 and emulation are configured, then only npx0 # is used (provided it works). device npx0 at isa? port "IO_NPX" iosiz 0x0 flags 0x0 irq 13 vector npxintr # # `flags' for npx0: # 0x01 don't use the npx registers to optimize bcopy # 0x02 don't use the npx registers to optimize bzero # 0x04 don't use the npx registers to optimize copyin or copyout. # The npx registers are normally used to optimize copying and zeroing when # all of the following conditions are satisfied: # "I586_CPU" is an option # the cpu is an i586 (perhaps not a Pentium) # the probe for npx0 succeeds # INT 16 exception handling works. # Then copying and zeroing using the npx registers is normally 30-100% faster. # The flags can be used to control cases where it doesn't work or is slower. # Setting them at boot time using userconfig works right (the optimizations # are not used until later in the bootstrap when npx0 is attached). # # # `iosiz' for npx0: # This can be used instead of the MAXMEM option to set the memory size. If # it is nonzero, then it overrides both the MAXMEM option and the memory # size reported by the BIOS. Setting it at boot time using userconfig takes # effect on the next reboot after the change has been recorded in the kernel # binary (the size is used early in the boot before userconfig has a chance # to change it). # # # Optional ISA and EISA devices: # # # SCSI host adapters: `aha', `aic', `bt', `nca' # # aha: Adaptec 154x # ahc: Adaptec 274x/284x/294x # aic: Adaptec 152x and sound cards using the Adaptec AIC-6360 (slow!) # bt: Most Buslogic controllers # nca: ProAudioSpectrum cards using the NCR 5380 or Trantor T130 # uha: UltraStore 14F and 34F # sea: Seagate ST01/02 8 bit controller (slow!) # wds: Western Digital WD7000 controller (no scatter/gather!). # # Note that the order is important in order for Buslogic cards to be # probed correctly. # controller bt0 at isa? port "IO_BT0" bio irq ? vector bt_isa_intr controller aha0 at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr controller uha0 at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr controller aic0 at isa? port 0x340 bio irq 11 vector aicintr controller nca0 at isa? port 0x1f88 bio irq 10 vector ncaintr controller nca1 at isa? port 0x1f84 controller nca2 at isa? port 0x1f8c controller nca3 at isa? port 0x1e88 controller nca4 at isa? port 0x350 bio irq 5 vector ncaintr controller sea0 at isa? bio irq 5 iomem 0xdc000 iosiz 0x2000 vector seaintr controller wds0 at isa? port 0x350 bio irq 15 drq 6 vector wdsintr # # ST-506, ESDI, and IDE hard disks: `wdc' and `wd' # # The flags fields are used to enable the multi-sector I/O and # the 32BIT I/O modes. The flags may be used in either the controller # definition or in the individual disk definitions. The controller # definition is supported for the boot configuration stuff. # # Each drive has a 16 bit flags value defined: # The low 8 bits are the maximum value for the multi-sector I/O, # where 0xff defaults to the maximum that the drive can handle. # The high bit of the 16 bit flags (0x8000) allows probing for # 32 bit transfers. Bit 14 (0x4000) enables a hack to wake # up powered-down laptop drives. Bit 13 (0x2000) allows # probing for PCI IDE DMA controllers, such as Intel's PIIX # south bridges. See the wd.4 man page. # # The flags field for the drives can be specified in the controller # specification with the low 16 bits for drive 0, and the high 16 bits # for drive 1. # e.g.: #controller wdc0 at isa? port "IO_WD1" bio irq 14 flags 0x00ff8004 vector wdintr # # specifies that drive 0 will be allowed to probe for 32 bit transfers and # a maximum multi-sector transfer of 4 sectors, and drive 1 will not be # allowed to probe for 32 bit transfers, but will allow multi-sector # transfers up to the maximum that the drive supports. # # If you are using a PCI controller that is not running in compatibility # mode (for example, it is a 2nd IDE PCI interface), then use config line(s) # such as: # #controller wdc2 at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr #disk wd4 at wdc2 drive 0 #disk wd5 at wdc2 drive 1 # #controller wdc3 at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr #disk wd6 at wdc3 drive 0 #disk wd7 at wdc3 drive 1 # # Note that the above config would be useful for a Promise card, when used # on a MB that already has a PIIX controller. Note the bogus irq and port # entries. These are automatically filled in by the IDE/PCI support. # controller wdc0 at isa? port "IO_WD1" bio irq 14 vector wdintr disk wd0 at wdc0 drive 0 disk wd1 at wdc0 drive 1 controller wdc1 at isa? port "IO_WD2" bio irq 15 vector wdintr disk wd2 at wdc1 drive 0 disk wd3 at wdc1 drive 1 # # Options for `wdc': # # CMD640 enables serializing access to primary and secondary channel # of the CMD640B IDE Chip. The serializing will only take place # if this option is set *and* the chip is probed by the pci-system. # options "CMD640" #Enable work around for CMD640 h/w bug # # ATAPI enables the support for ATAPI-compatible IDE devices # options ATAPI #Enable ATAPI support for IDE bus options ATAPI_STATIC #Don't do it as an LKM # IDE CD-ROM driver - requires wdc controller and ATAPI option device wcd0 # IDE floppy driver - requires wdc controller and ATAPI option device wfd0 # # Standard floppy disk controllers and floppy tapes: `fdc', `fd', and `ft' # controller fdc0 at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr # # FDC_DEBUG enables floppy debugging. Since the debug output is huge, you # gotta turn it actually on by setting the variable fd_debug with DDB, # however. options FDC_DEBUG # This option is undocumented on purpose. options FDC_PRINT_BOGUS_CHIPTYPE # # Activate this line instead of the fdc0 line above if you happen to # have an Insight floppy tape. Probing them proved to be dangerous # for people with floppy disks only, so it's "hidden" behind a flag: #controller fdc0 at isa? port "IO_FD1" bio flags 1 irq 6 drq 2 vector fdintr disk fd0 at fdc0 drive 0 disk fd1 at fdc0 drive 1 tape ft0 at fdc0 drive 2 # # Other standard PC hardware: `lpt', `mse', `psm', `sio', etc. # # lpt: printer port # lpt specials: # port can be specified as ?, this will cause the driver to scan # the BIOS port list; # the irq and vector clauses may be omitted, this # will force the port into polling mode. # mse: Logitech and ATI InPort bus mouse ports # psm: PS/2 mouse port [note: conflicts with sc0/vt0, thus "conflicts" keywd] # sio: serial ports (see sio(4)) device lpt0 at isa? port? tty irq 7 vector lptintr device lpt1 at isa? port "IO_LPT3" tty irq 5 vector lptintr device mse0 at isa? port 0x23c tty irq 5 vector mseintr device psm0 at isa? port "IO_KBD" conflicts tty irq 12 vector psmintr # Options for psm: options PSM_HOOKAPM #hook the APM resume event, useful #for some laptops options PSM_RESETAFTERSUSPEND #reset the device at the resume event device sio0 at isa? port "IO_COM1" tty flags 0x10 irq 4 vector siointr # # `flags' for serial drivers that support consoles (only for sio now): # 0x10 enable console support for this unit. The other console flags # are ignored unless this is set. Enabling console support does # not make the unit the preferred console - boot with -h or set # the 0x20 flag for that. Currently, at most one unit can have # console support; the first one (in config file order) with # this flag set is preferred. Setting this flag for sio0 gives # the old behaviour. # 0x20 force this unit to be the console (unless there is another # higher priority console). This replaces the COMCONSOLE option. # 0x40 reserve this unit for low level console operations. Do not # # PnP `flags' (set via userconfig using pnp x flags y) # 0x1 disable probing of this device. Used to prevent your modem # from being attached as a PnP modem. # # Options for serial drivers that support consoles (only for sio now): options BREAK_TO_DEBUGGER #a BREAK on a comconsole goes to #DDB, if available. options CONSPEED=9600 #default speed for serial console (default 9600) # Options for sio: options COM_ESP #code for Hayes ESP options COM_MULTIPORT #code for some cards with shared IRQs options DSI_SOFT_MODEM #code for DSI Softmodems options "EXTRA_SIO=2" #number of extra sio ports to allocate # Other flags for sio that aren't documented in the man page. # 0x20000 enable hardware RTS/CTS and larger FIFOs. Only works for # ST16650A-compatible UARTs. # # Network interfaces: `cx', `ed', `el', `ep', `ie', `is', `le', `lnc' # # ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver (requires sppp) # cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing) # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # el: 3Com 3C501 (slow!) # ep: 3Com 3C509 (buggy) # fe: Fujitsu MB86960A/MB86965A Ethernet # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210; Intel EtherExpress # le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100, # DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422) # lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL) # sr: RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp) # wl: Lucent Wavelan (ISA card only). # ze: IBM/National Semiconductor PCMCIA ethernet controller. # zp: 3Com PCMCIA Etherlink III (It does not require shared memory for # send/receive operation, but it needs 'iomem' to read/write the # attribute memory) # device ar0 at isa? port 0x300 net irq 10 iomem 0xd0000 vector arintr device cx0 at isa? port 0x240 net irq 15 drq 7 vector cxintr device ed0 at isa? port 0x280 net irq 5 iomem 0xd8000 vector edintr device eg0 at isa? port 0x310 net irq 5 vector egintr device el0 at isa? port 0x300 net irq 9 vector elintr device ep0 at isa? port 0x300 net irq 10 vector epintr device ex0 at isa? port? net irq? vector exintr device fe0 at isa? port 0x300 net irq ? vector feintr device ie0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector ieintr device ie1 at isa? port 0x360 net irq 7 iomem 0xd0000 vector ieintr device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr device lnc0 at isa? port 0x300 net irq 10 drq 0 vector lncintr device sr0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector srintr options WLCACHE # enables the signal-strength cache options WLDEBUG # enables verbose debugging output device wl0 at isa? port 0x300 net irq ? vector wlintr # We can (bogusly) include both the dedicated PCCARD drivers and the generic # support when COMPILING_LINT. device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr # # ATM related options # # The `en' device provides support for Efficient Networks (ENI) # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0). # # atm pseudo-device provides generic atm functions and is required for # atm devices. # NATM enables the netnatm protocol family that can be used to # bypass TCP/IP. # # the current driver supports only PVC operations (no atm-arp, no multicast). # for more details, please read the original documents at # http://www.ccrc.wustl.edu/pub/chuck/bsdatm/wucs.html # pseudo-device atm device en0 device en1 options NATM #native ATM # # Audio drivers: `snd', `sb', `pas', `gus', `pca' # # snd: Voxware sound support code # sb: SoundBlaster PCM - SoundBlaster, SB Pro, SB16, ProAudioSpectrum # sbxvi: SoundBlaster 16 # sbmidi: SoundBlaster 16 MIDI interface # pas: ProAudioSpectrum PCM and MIDI # gus: Gravis Ultrasound - Ultrasound, Ultrasound 16, Ultrasound MAX # gusxvi: Gravis Ultrasound 16-bit PCM (do not use) # mss: Microsoft Sound System # css: Crystal Sound System (CSS 423x PnP) # sscape: Ensoniq Soundscape MIDI interface # sscape_mss: Ensoniq Soundscape PCM (requires sscape) # opl: Yamaha OPL-2 and OPL-3 FM - SB, SB Pro, SB 16, ProAudioSpectrum # uart: stand-alone 6850 UART for MIDI # mpu: Roland MPU-401 stand-alone card # # Beware! The addresses specified below are also hard-coded in # i386/isa/sound/sound_config.h. If you change the values here, you # must also change the values in the include file. # # pcm: PCM audio through various sound cards. # # This is the work in progress from Luigi Rizzo. This has support for # CS423x based cards, OPTi931, SB16 PnP, GusPnP. For more information # about this driver, take a look at sys/i386/isa/snd/README. # # The flags of the device tells the device a bit more info about the # device that normally is obtained through the PnP interface. # bit 2..0 secondary DMA channel; # bit 4 set if the board uses two dma channels; # bit 15..8 board type, overrides autodetection; leave it # zero if don't know what to put in (and you don't, # since this is unsupported at the moment...). # # This driver will use the new PnP code if it's available. # # pca: PCM audio through your PC speaker # # If you have a GUS-MAX card and want to use the CS4231 codec on the # card the drqs for the gus max must be 8 bit (1, 2, or 3). # # If you would like to use the full duplex option on the gus, then define # flags to be the ``read dma channel''. # # options BROKEN_BUS_CLOCK #PAS-16 isn't working and OPTI chipset # options SYMPHONY_PAS #PAS-16 isn't working and SYMPHONY chipset # options EXCLUDE_SBPRO #PAS-16 # options SBC_IRQ=5 #PAS-16. Must match irq on sb0 line. # PAS16: The order of the pas0/sb0/opl0 is important since the # sb emulation is enabled in the pas-16 attach. # # The i386/isa/sound/sound.doc has more information. # Controls all "VOXWARE" driver sound devices. See Luigi's driver # below for an alternate which may work better for some cards. # controller snd0 device pas0 at isa? port 0x388 irq 10 drq 6 vector pasintr device sb0 at isa? port 0x220 irq 5 drq 1 vector sbintr device sbxvi0 at isa? drq 5 device sbmidi0 at isa? port 0x330 device awe0 at isa? port 0x620 device gus0 at isa? port 0x220 irq 12 drq 1 vector gusintr #device gus0 at isa? port 0x220 irq 12 drq 1 flags 0x3 vector gusintr device mss0 at isa? port 0x530 irq 10 drq 1 vector adintr device css0 at isa? port 0x534 irq 5 drq 1 flags 0x08 vector adintr device sscape0 at isa? port 0x330 irq 9 drq 0 vector sscapeintr device trix0 at isa? port 0x330 irq 6 drq 0 vector sscapeintr device sscape_mss0 at isa? port 0x534 irq 5 drq 1 vector sndintr device opl0 at isa? port 0x388 device mpu0 at isa? port 0x330 irq 6 drq 0 device uart0 at isa? port 0x330 irq 5 vector "m6850intr" # Luigi's snd code (use INSTEAD of snd0 and all VOXWARE drivers!). # You may also wish to enable the pnp controller with this, for pnp # sound cards. # #device pcm0 at isa? port ? tty irq 10 drq 1 flags 0x0 vector pcmintr # Not controlled by `snd' device pca0 at isa? port IO_TIMER1 tty # # Miscellaneous hardware: # # mcd: Mitsumi CD-ROM # scd: Sony CD-ROM # matcd: Matsushita/Panasonic CD-ROM # wt: Wangtek and Archive QIC-02/QIC-36 tape drives # ctx: Cortex-I frame grabber # apm: Laptop Advanced Power Management (experimental) # spigot: The Creative Labs Video Spigot video-acquisition board # meteor: Matrox Meteor video capture board # alog: Industrial Computer Source AIO8-P driver # bktr: Bt848 capture boards (http://www.freebsd.org/~fsmp/HomeAuto/Bt848.html) # cy: Cyclades serial driver # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!) # gp: National Instruments AT-GPIB and AT-GPIB/TNT board # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey # gsc: Genius GS-4500 hand scanner. # joy: joystick # labpc: National Instrument's Lab-PC and Lab-PC+ # rc: RISCom/8 multiport card # rp: Comtrol Rocketport(ISA) - single card # tw: TW-523 power line interface for use with X-10 home control products # si: Specialix SI/XIO 4-32 port terminal multiplexor # stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based) # stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent) # # Notes on APM # The flags takes the following meaning for apm0: # 0x0020 Statclock is broken. # 0x0011 Limit APM protocol to 1.1 or 1.0 # 0x0010 Limit APM protocol to 1.0 # # # Notes on the spigot: # The video spigot is at 0xad6. This port address can not be changed. # The irq values may only be 10, 11, or 15 # I/O memory is an 8kb region. Possible values are: # 0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff # The start address must be on an even boundary. # Add the following option if you want to allow non-root users to be able # to access the spigot. This option is not secure because it allows users # direct access to the I/O page. # options SPIGOT_UNSECURE # # Notes on the Comtrol Rocketport driver: # # The exact values used for rp0 depend on how many boards you have # in the system. The manufacturer's sample configs are listed as: # # Comtrol Rocketport ISA single card # device rp0 at isa? port 0x280 tty # # If instead you have two ISA cards, one installed at 0x100 and the # second installed at 0x180, then you should add the following to # your kernel configuration file: # # device rp0 at isa? port 0x100 tty # device rp1 at isa? port 0x180 tty # # For 4 ISA cards, it might be something like this: # # device rp0 at isa? port 0x180 tty # device rp1 at isa? port 0x100 tty # device rp2 at isa? port 0x340 tty # device rp3 at isa? port 0x240 tty # # And for PCI cards, you only need say: # # device rp0 # device rp1 # ... # Note: Make sure that any Rocketport PCI devices are specified BEFORE the # ISA Rocketport devices. # Notes on the Digiboard driver: # # The following flag values have special meanings: # 0x01 - alternate layout of pins # 0x02 - use the windowed PC/Xe in 64K mode # Notes on the Specialix SI/XIO driver: # **This is NOT a Specialix supported Driver!** # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. # Notes on the Stallion stl and stli drivers: # See src/i386/isa/README.stl for complete instructions. # This is version 0.0.5alpha, unsupported by Stallion. # The stl driver has a secondary IO port hard coded at 0x280. You need # to change src/i386/isa/stallion.c if you reconfigure this on the boards. # The "flags" and "iosiz" settings on the stli driver depend on the board: # EasyConnection 8/64 ISA: flags 23 iosiz 0x1000 # EasyConnection 8/64 EISA: flags 24 iosiz 0x10000 # EasyConnection 8/64 MCA: flags 25 iosiz 0x1000 # ONboard ISA: flags 4 iosiz 0x10000 # ONboard EISA: flags 7 iosiz 0x10000 # ONboard MCA: flags 3 iosiz 0x10000 # Brumby: flags 2 iosiz 0x4000 # Stallion: flags 1 iosiz 0x10000 device mcd0 at isa? port 0x300 bio irq 10 vector mcdintr # for the Sony CDU31/33A CDROM device scd0 at isa? port 0x230 bio # for the SoundBlaster 16 multicd - up to 4 devices controller matcd0 at isa? port 0x230 bio device wt0 at isa? port 0x300 bio irq 5 drq 1 vector wtintr device ctx0 at isa? port 0x230 iomem 0xd0000 device spigot0 at isa? port 0xad6 irq 15 iomem 0xee000 vector spigintr device apm0 at isa? device gp0 at isa? port 0x2c0 tty device gsc0 at isa? port "IO_GSC1" tty drq 3 device joy0 at isa? port "IO_GAME" device alog0 at isa? port 0x260 tty irq 5 vector alogintr device cy0 at isa? tty irq 10 iomem 0xd4000 iosiz 0x2000 vector cyintr device dgb0 at isa? port 0x220 iomem 0xfc0000 iosiz ? tty device labpc0 at isa? port 0x260 tty irq 5 vector labpcintr device rc0 at isa? port 0x220 tty irq 12 vector rcintr device rp0 at isa? port 0x280 tty # the port and irq for tw0 are fictitious device tw0 at isa? port 0x380 tty irq 11 vector twintr device si0 at isa? iomem 0xd0000 tty irq 12 vector siintr device asc0 at isa? port IO_ASC1 tty drq 3 irq 10 vector ascintr device bqu0 at isa? port 0x150 device stl0 at isa? port 0x2a0 tty irq 10 vector stlintr device stli0 at isa? port 0x2a0 tty iomem 0xcc000 flags 23 iosiz 0x1000 device loran0 at isa? port ? tty irq 5 vector loranintr # # EISA devices: # # The EISA bus device is eisa0. It provides auto-detection and # configuration support for all devices on the EISA bus. # # The `ahb' device provides support for the Adaptec 174X adapter. # # The `ahc' device provides support for the Adaptec 274X and 284X # adapters. The 284X, although a VLB card responds to EISA probes. # # fea: DEC DEFEA EISA FDDI adapter # controller eisa0 controller ahb0 controller ahc0 device fea0 # enable tagged command queuing, which is a major performance win on # devices that support it (and controllers with enough SCB's) options AHC_TAGENABLE # enable SCB paging - See the ahc.4 man page options AHC_SCBPAGING_ENABLE # The aic7xxx driver will attempt to use memory mapped I/O for all PCI # controllers that have it configured only if this option is set. Unfortunately, # this doesn't work on some motherboards, which prevents it from being the # default. options AHC_ALLOW_MEMIO # By default, only 10 EISA slots are probed, since the slot numbers # above clash with the configuration address space of the PCI subsystem, # and the EISA probe is not very smart about this. This is sufficient # for most machines, but in particular the HP NetServer LC series comes # with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11, # thus you need to bump this figure to 12 for them. options "EISA_SLOTS=12" # # PCI devices: # # The main PCI bus device is `pci'. It provides auto-detection and # configuration support for all devices on the PCI bus, using either # configuration mode defined in the PCI specification. # # The `ahc' device provides support for the Adaptec 29/3940(U)(W) # and motherboard based AIC7870/AIC7880 adapters. # # The `ncr' device provides support for the NCR 53C810 and 53C825 # self-contained SCSI host adapters. # # The `amd' device provides support for the Tekram DC-390 and 390T # SCSI host adapters, but is expected to work with any AMD 53c974 # PCI SCSI chip and the AMD Ethernet+SCSI Combo chip, after some # local patches were applied to the sources (that had originally # been written by Tekram and limited to work with their SCSI cards). # # The `de' device provides support for the Digital Equipment DC21040 # self-contained Ethernet adapter. # # The `fxp' device provides support for the Intel EtherExpress Pro/100B # PCI Fast Ethernet adapters. # # The `tx' device provides support for the SMC 9432TX cards. # # The `vx' device provides support for the 3Com 3C590 and 3C595 # early support # # The `fpa' device provides support for the Digital DEFPA PCI FDDI # adapter. pseudo-device fddi is also needed. # # The `meteor' device is a PCI video capture board. It can also have the # following options: # options METEOR_ALLOC_PAGES=xxx preallocate kernel pages for data entry # figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE # options METEOR_DEALLOC_PAGES remove all allocated pages on close(2) # options METEOR_DEALLOC_ABOVE=xxx remove all allocated pages above the # specified amount. If this value is below the allocated amount no action # taken # option METEOR_SYSTEM_DEFAULT={METEOR_PAL|METEOR_NTSC|METEOR_SECAM}, used # for initialization of fps routine when a signal is not present. # # The 'bktr' device is a PCI video capture board. It also has a TV tuner # on board. # controller pci0 controller ahc1 controller ncr0 controller amd0 device de0 device fxp0 device tx0 device vx0 device fpa0 device meteor0 device bktr0 # # PCCARD/PCMCIA # # card: slot controller # pcic: slots controller card0 controller pcic0 at card? controller pcic1 at card? # # Laptop/Notebook options: # # See also: # apm under `Miscellaneous hardware' # above. # For older notebooks that signal a powerfail condition (external # power supply dropped, or battery state low) by issuing an NMI: options POWERFAIL_NMI # make it beep instead of panicing # # Parallel-Port Bus # # Parallel port bus support is provided by the `ppbus' device. # Multiple devices may be attached to the parallel port, devices # are automatically probed and attached when found. # # Supported devices: # vpo Iomega Zip Drive # Requires SCSI disk support ('scbus' and 'sd'), best # performance is achieved with ports in EPP 1.9 mode. # nlpt Parallel Printer # ppi General-purpose I/O ("Geek Port") # # Supported interfaces: # ppc ISA-bus parallel port interfaces. # controller ppbus0 controller vpo0 at ppbus? device nlpt0 at ppbus? device ppi0 at ppbus? device pps0 at ppbus? controller ppc0 at isa? disable port ? irq 7 vector ppcintr # Kernel BOOTP support options BOOTP # Use BOOTP to obtain IP address/hostname options BOOTP_NFSROOT # NFS mount root filesystem using BOOTP info options "BOOTP_NFSV3" # Use NFS v3 to NFS mount root options BOOTP_COMPAT # Workaround for broken bootp daemons. # # An obsolete option to test kern_opt.c. # options GATEWAY # If you want to disable loadable kernel modules (LKM), you # might want to use this option. #options NO_LKM # # Add tie-ins for a hardware watchdog. This only enable the hooks; # the user must still supply the actual driver. # options HW_WDOG # More undocumented options for linting. options CLK_CALIBRATION_LOOP options "CLK_USE_I8254_CALIBRATION" options CLK_USE_TSC_CALIBRATION options CLUSTERDEBUG options COMPAT_LINUX options CPU_UPGRADE_HW_CACHE options DEBUG options "DEBUG_1284" options DEVFS_ROOT #options DISABLE_PSE options "EXT2FS" options "I586_PMC_GUPROF=0x70000" options "IBCS2" # broken: #options IPFILTER options KEY options KEY_DEBUG options LOCKF_DEBUG options LOUTB options KBD_MAXRETRY=4 options KBD_MAXWAIT=6 options KBD_RESETDELAY=201 options KBDIO_DEBUG=2 options MSGMNB=2049 options MSGMNI=41 options MSGSEG=2049 options MSGSSZ=16 options MSGTQL=41 options NBUF=512 options NETATALKDEBUG options NMBCLUSTERS=1024 options NPX_DEBUG options NULLFS_DIAGNOSTIC options PANIC_REBOOT_WAIT_TIME=16 options "PCVT_24LINESDEF" options PCVT_CTRL_ALT_DEL options PCVT_EMU_MOUSE options PCVT_FREEBSD=211 options PCVT_META_ESC options PCVT_NSCREENS=9 options PCVT_PRETTYSCRNS options PCVT_SCANSET=2 options PCVT_SCREENSAVER options PCVT_USEKBDSEC options "PCVT_VT220KEYB" options PSM_DEBUG=1 options "SCSI_2_DEF" options SCSI_DELAY=8 # Be pessimistic about Joe SCSI device options SCSI_NCR_DEBUG options SCSI_NCR_DFLT_TAGS=4 options SCSI_NCR_MAX_SYNC=10000 options SCSI_NCR_MAX_WIDE=1 options SCSI_NCR_MYADDR=7 options SEMMAP=31 options SEMMNI=11 options SEMMNS=61 options SEMMNU=31 options SEMMSL=61 options SEMOPM=101 options SEMUME=11 options SHOW_BUSYBUFS # List buffers that prevent root unmount options SHMALL=1025 options "SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)" options SHMMAXPGS=1025 options SHMMIN=2 options SHMMNI=33 options SHMSEG=9 options SI_DEBUG options SIMPLELOCK_DEBUG options SPX_HACK # The 'dpt' driver provides hardware RAID-{0,1,5} support, multi-initiator I/O # See sys/dev/dpt for debugging and other subtle options. # DPT_VERIFY_HINTR Performs some strict hardware interrupts testing. # Only use if you suspect PCI bus corruption problems # DPT_RESTRICTED_FREELIST Normally, the freelisat used by the DPT for queue # will grow to accomodate increased use. This growth # will NOT shrink. To restrict the number of queue # slots to exactly what the DPT can hold at one time, # enable this option. # DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various # instruments are enabled. Assumed to be enabled by # /usr/sbin/dpt_* tools. # DPT_FREELIST_IS_STACK For optimat L{1,2} CPU cache utilization, enable # this option. Otherwise, the transaction queue is # a LIFO. I cannot measure the performance gain. # DPT_HANDLE_TIMEOUTS Normally device timeouts are handled by the DPT. # If you ant the driver to handle timeouts, enable # this option. If your system is very busy, this # option will create more trouble than solve. # DPT_TIMEOUT_FACTOR Used to compute the excessive amount of time to # wait when timing out with the above option. # DPT_DEBUG_xxxx These are controllable from sys/dev/dpt/dpt.h # DPT_LOST_IRQ When enabled, will try, once per second, to catch # any interrupt that got lost. Seems to help in some # DPT-firmware/Motherboard combinations. Minimal # cost, great benefit. controller dpt0 # DPT options options DPT_VERIFY_HINTR options DPT_RESTRICTED_FREELIST options DPT_MEASURE_PERFORMANCE options DPT_FREELIST_IS_STACK options DPT_HANDLE_TIMEOUTS options DPT_TIMEOUT_FACTOR=4 options DPT_INTR_DELAY=200 # Some motherboards need that options DPT_LOST_IRQ Index: head/sys/conf/files =================================================================== --- head/sys/conf/files (revision 34265) +++ head/sys/conf/files (revision 34266) @@ -1,456 +1,458 @@ aicasm optional ahc device-driver \ dependency "$S/dev/aic7xxx/*.[chyl]" \ compile-with "make -f $S/dev/aic7xxx/Makefile MAKESRCPATH=$S/dev/aic7xxx" \ no-obj no-implicit-rule \ clean "aicasm" # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # aic7xxx_{seq,reg}.h optional ahc device-driver \ compile-with "./aicasm ${INCLUDES} -o aic7xxx_seq.h -r aic7xxx_reg.h $S/dev/aic7xxx/aic7xxx.seq" \ no-obj no-implicit-rule before-depend \ clean "aic7xxx_seq.h aic7xxx_reg.h" \ dependency "$S/dev/aic7xxx/aic7xxx.{reg,seq} $S/scsi/scsi_message.h aicasm" cfs/cfs_namecache.c optional vcfs cfs/cfs_nbsd.c optional vcfs cfs/cfs_fbsd.c optional vcfs cfs/cfs_psdev.c optional vcfs cfs/cfs_subr.c optional vcfs cfs/cfs_venus.c optional vcfs cfs/cfs_vfsops.c optional vcfs cfs/cfs_vnodeops.c optional vcfs ddb/db_access.c optional ddb ddb/db_aout.c optional ddb ddb/db_break.c optional ddb ddb/db_command.c optional ddb ddb/db_examine.c optional ddb ddb/db_expr.c optional ddb ddb/db_input.c optional ddb ddb/db_lex.c optional ddb ddb/db_output.c optional ddb ddb/db_print.c optional ddb ddb/db_ps.c optional ddb ddb/db_run.c optional ddb ddb/db_sym.c optional ddb ddb/db_trap.c optional ddb ddb/db_variables.c optional ddb ddb/db_watch.c optional ddb ddb/db_write_cmd.c optional ddb dev/ccd/ccd.c optional ccd device-driver dev/dpt/dpt_control.c optional dpt device-driver dev/dpt/dpt_scsi.c optional dpt device-driver dev/en/midway.c optional en device-driver dev/pdq/pdq.c optional fea device-driver dev/pdq/pdq_ifsubr.c optional fea device-driver dev/pdq/pdq.c optional fpa device-driver dev/pdq/pdq_ifsubr.c optional fpa device-driver dev/ppbus/nlpt.c optional nlpt dev/ppbus/ppb_base.c optional ppbus dev/ppbus/ppb_1284.c optional ppbus dev/ppbus/ppbconf.c optional ppbus dev/ppbus/ppi.c optional ppi dev/ppbus/pps.c optional pps dev/ppbus/vpo.c optional vpo dev/vn/vn.c optional vn dev/vx/if_vx.c optional vx device-driver gnu/ext2fs/ext2_alloc.c optional ext2fs gnu/ext2fs/ext2_balloc.c optional ext2fs gnu/ext2fs/ext2_inode.c optional ext2fs gnu/ext2fs/ext2_inode_cnv.c optional ext2fs gnu/ext2fs/ext2_linux_balloc.c optional ext2fs gnu/ext2fs/ext2_linux_ialloc.c optional ext2fs gnu/ext2fs/ext2_lookup.c optional ext2fs gnu/ext2fs/ext2_subr.c optional ext2fs gnu/ext2fs/ext2_vfsops.c optional ext2fs gnu/ext2fs/ext2_vnops.c optional ext2fs isofs/cd9660/cd9660_bmap.c optional cd9660 isofs/cd9660/cd9660_lookup.c optional cd9660 isofs/cd9660/cd9660_node.c optional cd9660 isofs/cd9660/cd9660_rrip.c optional cd9660 isofs/cd9660/cd9660_util.c optional cd9660 isofs/cd9660/cd9660_vfsops.c optional cd9660 isofs/cd9660/cd9660_vnops.c optional cd9660 kern/imgact_aout.c standard kern/imgact_elf.c standard kern/imgact_gzip.c optional gzip kern/imgact_shell.c standard kern/inflate.c optional gzip kern/init_main.c standard kern/init_sysent.c standard kern/init_sysvec.c standard kern/kern_intr.c standard kern/kern_module.c standard kern/kern_linker.c standard kern/link_aout.c standard kern/kern_acct.c standard kern/kern_clock.c standard kern/kern_conf.c standard kern/kern_descrip.c standard kern/kern_exec.c standard kern/kern_exit.c standard kern/kern_fork.c standard kern/kern_ktrace.c standard kern/kern_lkm.c standard kern/kern_lock.c standard kern/kern_lockf.c standard kern/kern_malloc.c standard kern/kern_mib.c standard kern/kern_ntptime.c standard kern/kern_opt.c standard kern/kern_physio.c standard kern/kern_proc.c standard kern/kern_prot.c standard kern/kern_resource.c standard kern/kern_shutdown.c standard kern/kern_sig.c standard kern/kern_subr.c standard kern/kern_synch.c standard kern/kern_sysctl.c standard kern/kern_time.c standard kern/kern_timeout.c standard kern/kern_xxx.c standard kern/md5c.c optional md5 kern/md5c.c optional sppp kern/subr_diskslice.c standard kern/subr_autoconf.c standard kern/subr_dkbad.c standard kern/subr_log.c standard kern/subr_prf.c standard kern/subr_prof.c standard kern/subr_rlist.c standard kern/subr_xxx.c standard kern/sys_generic.c standard kern/sys_pipe.c standard kern/sys_process.c standard kern/sys_socket.c standard kern/sysv_ipc.c standard kern/sysv_msg.c optional sysvmsg kern/sysv_sem.c optional sysvsem kern/sysv_shm.c optional sysvshm kern/tty.c standard kern/tty_compat.c standard kern/tty_conf.c standard kern/tty_pty.c optional pty kern/tty_snoop.c optional snp kern/tty_subr.c standard kern/tty_tb.c optional tb kern/tty_tty.c standard kern/uipc_domain.c standard kern/uipc_mbuf.c standard kern/uipc_proto.c standard kern/uipc_socket.c standard kern/uipc_socket2.c standard kern/uipc_syscalls.c standard kern/uipc_usrreq.c standard kern/vfs_bio.c standard kern/vfs_cache.c standard kern/vfs_cluster.c standard kern/vfs_conf.c standard kern/vfs_default.c standard kern/vfs_init.c standard kern/vfs_lookup.c standard kern/vfs_subr.c standard kern/vfs_syscalls.c standard kern/vfs_vnops.c standard kern/kern_threads.c standard kern/vfs_aio.c standard miscfs/deadfs/dead_vnops.c standard miscfs/devfs/devfs_tree.c optional devfs miscfs/devfs/devfs_vfsops.c optional devfs miscfs/devfs/devfs_vnops.c optional devfs miscfs/fdesc/fdesc_vfsops.c optional fdesc miscfs/fdesc/fdesc_vnops.c optional fdesc miscfs/fifofs/fifo_vnops.c standard miscfs/kernfs/kernfs_vfsops.c optional kernfs miscfs/kernfs/kernfs_vnops.c optional kernfs miscfs/nullfs/null_subr.c optional nullfs miscfs/nullfs/null_vfsops.c optional nullfs miscfs/nullfs/null_vnops.c optional nullfs miscfs/portal/portal_vfsops.c optional portal miscfs/portal/portal_vnops.c optional portal miscfs/procfs/procfs_ctl.c optional procfs miscfs/procfs/procfs_fpregs.c standard miscfs/procfs/procfs_map.c optional procfs miscfs/procfs/procfs_mem.c standard miscfs/procfs/procfs_note.c optional procfs miscfs/procfs/procfs_regs.c standard miscfs/procfs/procfs_status.c optional procfs miscfs/procfs/procfs_subr.c optional procfs miscfs/procfs/procfs_type.c optional procfs miscfs/procfs/procfs_vfsops.c optional procfs miscfs/procfs/procfs_vnops.c optional procfs miscfs/specfs/spec_vnops.c standard miscfs/umapfs/umap_subr.c optional umapfs miscfs/umapfs/umap_vfsops.c optional umapfs miscfs/umapfs/umap_vnops.c optional umapfs miscfs/union/union_subr.c optional union miscfs/union/union_vfsops.c optional union miscfs/union/union_vnops.c optional union msdosfs/msdosfs_conv.c optional msdosfs msdosfs/msdosfs_denode.c optional msdosfs msdosfs/msdosfs_fat.c optional msdosfs msdosfs/msdosfs_lookup.c optional msdosfs msdosfs/msdosfs_vfsops.c optional msdosfs msdosfs/msdosfs_vnops.c optional msdosfs net/bpf.c optional bpfilter net/bpf_filter.c optional bpfilter net/bsd_comp.c optional ppp_bsdcomp net/hostcache.c standard net/if.c standard net/if_atmsubr.c optional atm net/if_disc.c optional disc net/if_ethersubr.c optional ether net/if_fddisubr.c optional fddi net/if_loop.c optional loop net/if_media.c standard net/if_mib.c standard net/if_ppp.c optional ppp net/if_sl.c optional sl net/if_spppsubr.c optional sppp net/if_tun.c optional tun net/ppp_deflate.c optional ppp_deflate net/ppp_tty.c optional ppp net/radix.c standard net/raw_cb.c standard net/raw_usrreq.c standard net/route.c standard net/rtsock.c standard net/slcompress.c optional ppp net/slcompress.c optional sl net/zlib.c optional ppp_deflate netatalk/aarp.c optional netatalk netatalk/at_control.c optional netatalk netatalk/at_proto.c optional netatalk netatalk/at_rmx.c optional netatalkdebug netatalk/ddp_input.c optional netatalk netatalk/ddp_output.c optional netatalk netatalk/ddp_usrreq.c optional netatalk #netccitt/ccitt_proto.c optional ccitt #netccitt/hd_debug.c optional hdlc #netccitt/hd_input.c optional hdlc #netccitt/hd_output.c optional hdlc #netccitt/hd_subr.c optional hdlc #netccitt/hd_timer.c optional hdlc #netccitt/if_x25subr.c optional ccitt #netccitt/llc_input.c optional llc #netccitt/llc_output.c optional llc #netccitt/llc_subr.c optional llc #netccitt/llc_timer.c optional llc #netccitt/pk_acct.c optional ccitt #netccitt/pk_debug.c optional ccitt #netccitt/pk_input.c optional ccitt #netccitt/pk_llcsubr.c optional hdlc #netccitt/pk_llcsubr.c optional llc #netccitt/pk_output.c optional ccitt #netccitt/pk_subr.c optional ccitt #netccitt/pk_timer.c optional ccitt #netccitt/pk_usrreq.c optional ccitt #netimp/if_imp.c optional imp #netimp/if_imphost.c optional imp #netimp/raw_imp.c optional imp netinet/if_atm.c optional atm netinet/if_ether.c optional ether netinet/igmp.c optional inet netinet/in.c optional inet netinet/in_hostcache.c optional inet netinet/in_pcb.c optional inet netinet/in_proto.c optional inet netinet/in_rmx.c optional inet netinet/ip_divert.c optional ipdivert netinet/ip_fw.c optional ipfirewall netinet/ip_icmp.c optional inet netinet/ip_input.c optional inet netinet/ip_mroute.c optional inet netinet/ip_output.c optional inet netinet/raw_ip.c optional inet netinet/tcp_debug.c optional tcpdebug netinet/tcp_input.c optional inet netinet/tcp_output.c optional inet netinet/tcp_subr.c optional inet netinet/tcp_timer.c optional inet netinet/tcp_usrreq.c optional inet netinet/udp_usrreq.c optional inet netinet/ip_fil.c optional ipfilter inet netinet/fil.c optional ipfilter inet netinet/ip_nat.c optional ipfilter inet netinet/ip_frag.c optional ipfilter inet netinet/ip_state.c optional ipfilter inet netinet/ip_proxy.c optional ipfilter inet netinet/mln_ipl.c optional ipfilter inet netipx/ipx.c optional ipx netipx/ipx_cksum.c optional ipx netipx/ipx_input.c optional ipx netipx/ipx_ip.c optional ipx netipx/ipx_outputfl.c optional ipx netipx/ipx_pcb.c optional ipx netipx/ipx_proto.c optional ipx netipx/ipx_tun.c optional ipx netipx/ipx_usrreq.c optional ipx netipx/spx_debug.c optional ipx netipx/spx_usrreq.c optional ipx #netiso/clnp_debug.c optional iso #netiso/clnp_er.c optional iso #netiso/clnp_frag.c optional iso #netiso/clnp_input.c optional iso #netiso/clnp_options.c optional iso #netiso/clnp_output.c optional iso #netiso/clnp_raw.c optional iso #netiso/clnp_subr.c optional iso #netiso/clnp_timer.c optional iso #netiso/cltp_usrreq.c optional iso #netiso/esis.c optional iso #netiso/idrp_usrreq.c optional iso #netiso/if_eon.c optional eon #netiso/iso.c optional iso #netiso/iso_chksum.c optional iso #netiso/iso_pcb.c optional iso #netiso/iso_proto.c optional iso #netiso/iso_snpac.c optional iso #netiso/tp_astring.c optional iso #netiso/tp_astring.c optional tpip #netiso/tp_cons.c optional iso #netiso/tp_driver.c optional iso #netiso/tp_driver.c optional tpip #netiso/tp_emit.c optional iso #netiso/tp_emit.c optional tpip #netiso/tp_inet.c optional iso #netiso/tp_inet.c optional tpip #netiso/tp_input.c optional iso #netiso/tp_input.c optional tpip #netiso/tp_iso.c optional iso #netiso/tp_meas.c optional iso #netiso/tp_meas.c optional tpip #netiso/tp_output.c optional iso #netiso/tp_output.c optional tpip #netiso/tp_pcb.c optional iso #netiso/tp_pcb.c optional tpip #netiso/tp_subr.c optional iso #netiso/tp_subr.c optional tpip #netiso/tp_subr2.c optional iso #netiso/tp_subr2.c optional tpip #netiso/tp_timer.c optional iso #netiso/tp_timer.c optional tpip #netiso/tp_trace.c optional iso #netiso/tp_trace.c optional tpip #netiso/tp_usrreq.c optional iso #netiso/tp_usrreq.c optional tpip #netiso/tuba_subr.c optional iso tuba #netiso/tuba_table.c optional iso tuba #netiso/tuba_usrreq.c optional iso tuba netkey/key.c optional key netkey/key_debug.c optional key_debug netnatm/natm.c optional natm netnatm/natm_pcb.c optional natm netnatm/natm_proto.c optional natm #netns/idp_usrreq.c optional ns #netns/ns.c optional ns #netns/ns_error.c optional ns #netns/ns_input.c optional ns #netns/ns_ip.c optional ns #netns/ns_output.c optional ns #netns/ns_pcb.c optional ns #netns/ns_proto.c optional ns #netns/spp_debug.c optional ns #netns/spp_usrreq.c optional ns nfs/nfs_bio.c optional nfs nfs/nfs_node.c optional nfs nfs/nfs_nqlease.c optional nfs nfs/nfs_serv.c optional nfs nfs/nfs_socket.c optional nfs nfs/nfs_srvcache.c optional nfs nfs/nfs_subs.c optional nfs nfs/nfs_syscalls.c optional nfs nfs/nfs_vfsops.c optional nfs nfs/nfs_vnops.c optional nfs nfs/bootp_subr.c optional bootp nfs/krpc_subr.c optional bootp pccard/pccard.c optional card pccard/pccard_beep.c optional card pccard/pcic.c optional pcic device-driver pci/pcic_p.c optional pcic device-driver pci/aic7870.c optional ahc device-driver \ dependency "aic7xxx_reg.h $S/pci/aic7870.c" pci/brooktree848.c optional bktr device-driver pci/bt9xx.c optional bt device-driver pci/dpt_pci.c optional dpt device-driver pci/cy_pci.c optional cy device-driver pci/if_de.c optional de device-driver pci/if_ed_p.c optional ed device-driver pci/if_en_pci.c optional en device-driver pci/if_fxp.c optional fxp device-driver pci/if_lnc_p.c optional lnc device-driver pci/if_fpa.c optional fpa device-driver pci/if_sr_p.c optional sr device-driver pci/if_tx.c optional tx device-driver pci/if_vx_pci.c optional vx device-driver pci/meteor.c optional meteor device-driver pci/ncr.c optional ncr device-driver pci/pci.c optional pci device-driver pci/pci_compat.c optional pci pci/pcisupport.c optional pci pci/tek390.c optional amd device-driver pci/wdc_p.c optional wdc device-driver posix4/posix4_mib.c optional posix4 posix4/ksched.c optional posix4 scsi/cd.c optional cd scsi/ch.c optional ch scsi/od.c optional od scsi/pt.c optional pt scsi/scsi_base.c optional scbus scsi/scsi_driver.c optional scbus scsi/scsi_ioctl.c optional scbus scsi/scsi_sense.c optional scbus scsi/scsiconf.c optional scbus scsi/sctarg.c optional sctarg scsi/sd.c optional sd scsi/ssc.c optional ssc scsi/st.c optional st scsi/su.c optional su scsi/uk.c optional scbus scsi/worm.c optional worm ufs/ffs/ffs_alloc.c optional ffs ufs/ffs/ffs_alloc.c optional mfs ufs/ffs/ffs_balloc.c optional ffs ufs/ffs/ffs_balloc.c optional mfs ufs/ffs/ffs_inode.c optional ffs ufs/ffs/ffs_inode.c optional mfs +ufs/ffs/ffs_softdep_stub.c optional ffs +ufs/ffs/ffs_softdep.c optional softupdates ufs/ffs/ffs_subr.c optional ffs ufs/ffs/ffs_subr.c optional mfs ufs/ffs/ffs_tables.c optional ffs ufs/ffs/ffs_tables.c optional mfs ufs/ffs/ffs_vfsops.c optional ffs ufs/ffs/ffs_vfsops.c optional mfs ufs/ffs/ffs_vnops.c optional ffs ufs/ffs/ffs_vnops.c optional mfs ufs/ifs/ifs_isyscalls.c optional vcfs ufs/ifs/ifs_subr.c optional vcfs ufs/mfs/mfs_vfsops.c optional mfs ufs/mfs/mfs_vnops.c optional mfs ufs/ufs/ufs_bmap.c standard ufs/ufs/ufs_disksubr.c standard ufs/ufs/ufs_ihash.c standard ufs/ufs/ufs_inode.c standard ufs/ufs/ufs_lookup.c standard ufs/ufs/ufs_quota.c standard ufs/ufs/ufs_vfsops.c standard ufs/ufs/ufs_vnops.c standard vm/default_pager.c standard vm/device_pager.c standard vm/swap_pager.c standard vm/vm_fault.c standard vm/vm_glue.c standard vm/vm_init.c standard vm/vm_kern.c standard vm/vm_map.c standard vm/vm_meter.c standard vm/vm_mmap.c standard vm/vm_object.c standard vm/vm_page.c standard vm/vm_pageout.c standard vm/vm_pager.c standard vm/vm_swap.c standard vm/vm_unix.c standard vm/vnode_pager.c standard vm/vm_zone.c standard Index: head/sys/conf/options =================================================================== --- head/sys/conf/options (revision 34265) +++ head/sys/conf/options (revision 34266) @@ -1,189 +1,195 @@ -# $Id: options,v 1.63 1998/02/27 10:02:37 itojun Exp $ +# $Id: options,v 1.64 1998/03/04 10:24:08 dufault Exp $ # Format: # Option name filename # Miscellaneous options. BOUNCE_BUFFERS opt_bounce.h COMPAT_43 opt_compat.h COMPAT_SUNOS opt_compat.h COMPILING_LINT opt_lint.h DDB DDB_UNATTENDED opt_ddb.h GDB_REMOTE_CHAT opt_ddb.h DEVFS DEVFS_ROOT opt_devfs.h FAILSAFE HW_WDOG KTRACE MD5 MFS_AUTOLOAD opt_mfs.h MFS_ROOT opt_mfs.h NO_LKM NSWAPDEV opt_swap.h PPS_SYNC opt_ntp.h QUOTA SPX_HACK SUIDDIR opt_suiddir.h SYSVMSG opt_sysvipc.h SYSVSEM opt_sysvipc.h SYSVSHM opt_sysvipc.h UCONSOLE # POSIX 4. POSIX4 opt_posix4.h # Do we want the config file compiled into the kernel? INCLUDE_CONFIG_FILE opt_config.h # Options for static file systems. These should only be used at config # time, since the corresponding lkms cannot work if there are any static # dependencies. Unusability is enforced by hiding the defines for the # options in a never-included header. EXT2FS opt_dontuse.h FDESC opt_dontuse.h KERNFS opt_dontuse.h MFS opt_dontuse.h MSDOSFS opt_dontuse.h NULLFS opt_dontuse.h PORTAL opt_dontuse.h PROCFS opt_dontuse.h UMAPFS opt_dontuse.h # These static filesystems has one slightly bogus static dependency in # sys/i386/i386/autoconf.c. If any of these filesystems are # statically compiled into the kernel, code for mounting them as root # filesystems will be enabled - but look below. Boot-code is purposely # unavailable for the LKM-based versions. CD9660 FFS NFS + +# If you are following the conditions in the copyright, +# you can enable soft-updates which will speed up a lot of thigs +# and make the system safer from crashes at the same time. +# otherwise a STUB module will be compiled in. +SOFTUPDATES opt_ffs.h # The above static dependencies are planned removed, with a # _ROOT option to control if it usable as root. This list # allows these options to be present in config files already (though # they won't make any difference yet). CD9660_ROOT opt_cd9660.h FFS_ROOT opt_ffs.h NFS_ROOT opt_nfs.h # Multi-session CD-Rs might require a huge amount of time in order to # "settle". If we are about mounting them as the root f/s, we gotta # wait a little. CD9660_ROOTDELAY opt_cd9660.h # The union static file system has bogus static dependencies, so it isn't # hidden yet. UNION # Options used only in param.c. EXTRAVNODES opt_defunct.h MSGMNB opt_param.h MSGMNI opt_param.h MSGSEG opt_param.h MSGSSZ opt_param.h MSGTQL opt_param.h NBUF opt_param.h NMBCLUSTERS opt_param.h SEMMAP opt_param.h SEMMNI opt_param.h SEMMNS opt_param.h SEMMNU opt_param.h SEMMSL opt_param.h SEMOPM opt_param.h SEMUME opt_param.h SHMALL opt_param.h SHMMAX opt_param.h SHMMAXPGS opt_param.h SHMMIN opt_param.h SHMMNI opt_param.h SHMSEG opt_param.h # Generic SCSI options. SCSIDEBUG opt_scsi.h SCSI_DELAY opt_scsi.h SCSI_REPORT_GEOMETRY opt_scsi.h SCSI_2_DEF opt_scsi.h # Options used only in scsi/od.c. OD_AUTO_TURNOFF opt_od.h OD_BOGUS_NOT_READY opt_od.h # Options used only in pci/ncr.c SCSI_NCR_DEBUG opt_ncr.h SCSI_NCR_DFLT_TAGS opt_ncr.h SCSI_NCR_MAX_SYNC opt_ncr.h SCSI_NCR_MAX_WIDE opt_ncr.h SCSI_NCR_MYADDR opt_ncr.h # Resource limits. CHILD_MAX opt_defunct.h DFLDSIZ opt_rlimit.h MAXDSIZ opt_rlimit.h OPEN_MAX opt_defunct.h # Net stuff. ARP_PROXYALL opt_defunct.h BOOTP opt_bootp.h BOOTP_COMPAT opt_bootp.h BOOTP_NFSROOT opt_bootp.h BOOTP_NFSV3 opt_bootp.h GATEWAY opt_defunct.h MROUTING opt_mrouting.h INET opt_inet.h IPDIVERT IPFIREWALL opt_ipfw.h IPFIREWALL_VERBOSE opt_ipfw.h IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h IPX opt_ipx.h IPXIP opt_ipx.h IPTUNNEL opt_ipx.h NETATALK opt_atalk.h PPP_BSDCOMP opt_ppp.h PPP_DEFLATE opt_ppp.h PPP_FILTER opt_ppp.h TCP_COMPAT_42 opt_compat.h TCPDEBUG # XXX Conflict: # of devices vs network protocol (Native ATM). # This makes "atm.h" unusable. NATM opt_natm.h # DPT driver debug flags DPT_VERIFY_HINTR opt_dpt.h DPT_USE_SINTR opt_dpt.h DPT_RESTRICTED_FREELIST opt_dpt.h DPT_MEASURE_PERFORMANCE opt_dpt.h DPT_FREELIST_IS_STACK opt_dpt.h DPT_HANDLE_TIMEOUTS opt_dpt.h DPT_TIMEOUT_FACTOR opt_dpt.h DPT_INTR_DELAY opt_dpt.h DPT_LOST_IRQ opt_dpt.h # Misc debug flags. Most of these should probably be replaced with # 'DEBUG', and then let people recompile just the interesting modules # with 'make CC="cc -DDEBUG'. CLUSTERDEBUG opt_debug_cluster.h DEBUG_1284 opt_debug_1284.h LOCKF_DEBUG opt_debug_lockf.h LOUTB opt_debug_outb.h NPX_DEBUG opt_debug_npx.h NETATALKDEBUG opt_atalk.h NULLFS_DIAGNOSTIC opt_debug_nullfs.h SI_DEBUG opt_debug_si.h # These cause changes all over the kernel DEBUG opt_global.h DIAGNOSTIC opt_global.h SIMPLELOCK_DEBUG opt_global.h # These are VM related options VM_KMEM_SIZE opt_vm.h VM_KMEM_SIZE_SCALE opt_vm.h VM_KMEM_SIZE_MAX opt_vm.h # sys/netkey KEY KEY_DEBUG opt_key.h Index: head/sys/dev/de/if_de.c =================================================================== --- head/sys/dev/de/if_de.c (revision 34265) +++ head/sys/dev/de/if_de.c (revision 34266) @@ -1,5442 +1,5444 @@ +#undef __FreeBSD__ +#define __FreeBSD__ 3 /* $NetBSD: if_de.c,v 1.56 1997/10/20 14:32:46 matt Exp $ */ -/* $Id: if_de.c,v 1.79 1998/02/06 12:14:08 eivind Exp $ */ +/* $Id: if_de.c,v 1.80 1998/02/20 13:11:50 bde Exp $ */ /*- * Copyright (c) 1994-1997 Matt Thomas (matt@3am-software.com) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Id: if_de.c,v 1.94 1997/07/03 16:55:07 thomas Exp * */ /* * DEC 21040 PCI Ethernet Controller * * Written by Matt Thomas * BPF support code stolen directly from if_ec.c * * This driver supports the DEC DE435 or any other PCI * board which support 21040, 21041, or 21140 (mostly). */ #define TULIP_HDR_DATA #include "opt_inet.h" #include "opt_ipx.h" #include #include #include #include #include #include #include #if defined(__FreeBSD__) #include #elif defined(__bsdi__) || defined(__NetBSD__) #include #endif #if defined(__NetBSD__) #include "rnd.h" #if NRND > 0 #include #endif #endif #include #if defined(SIOCSIFMEDIA) && !defined(TULIP_NOIFMEDIA) #include #endif #include #ifdef TULIP_USE_SOFTINTR #include #endif #if defined(__bsdi__) && _BSDI_VERSION >= 199701 #include #include #endif #include "bpfilter.h" #if NBPFILTER > 0 #include #endif #ifdef INET #include #include #endif #ifdef IPX #include #include #endif #ifdef NS #include #include #endif #include #if defined(__FreeBSD__) #include #include #if NPCI > 0 #include #include #define DEVAR_INCLUDE "pci/if_devar.h" #endif #endif /* __FreeBSD__ */ #if defined(__bsdi__) #include #include #include #include #include #include #include #if _BSDI_VERSION < 199510 #include #else #define NEISA 0 #endif #if NEISA > 0 && _BSDI_VERSION >= 199401 #include #define TULIP_EISA #endif #define DEVAR_INCLUDE "i386/pci/if_devar.h" #endif /* __bsdi__ */ #if defined(__NetBSD__) #include #if defined(INET) #include #endif #include #if defined(__alpha__) #include #endif #include #include #include #define DEVAR_INCLUDE "dev/pci/if_devar.h" #endif /* __NetBSD__ */ /* * Intel CPUs should use I/O mapped access. */ #if defined(__i386__) || defined(TULIP_EISA) #define TULIP_IOMAPPED #endif #if 0 /* * This turns on all sort of debugging stuff and make the * driver much larger. */ #define TULIP_DEBUG #endif #if 0 #define TULIP_PERFSTATS #endif #if 0 #define TULIP_USE_SOFTINTR #endif #define TULIP_HZ 10 #include DEVAR_INCLUDE /* * This module supports * the DEC 21040 PCI Ethernet Controller. * the DEC 21041 PCI Ethernet Controller. * the DEC 21140 PCI Fast Ethernet Controller. */ static void tulip_mii_autonegotiate(tulip_softc_t * const sc, const unsigned phyaddr); static tulip_intrfunc_t tulip_intr_shared(void *arg); static tulip_intrfunc_t tulip_intr_normal(void *arg); static void tulip_init(tulip_softc_t * const sc); static void tulip_reset(tulip_softc_t * const sc); static ifnet_ret_t tulip_ifstart_one(struct ifnet *ifp); static ifnet_ret_t tulip_ifstart(struct ifnet *ifp); static struct mbuf *tulip_txput(tulip_softc_t * const sc, struct mbuf *m); static void tulip_txput_setup(tulip_softc_t * const sc); static void tulip_rx_intr(tulip_softc_t * const sc); static void tulip_addr_filter(tulip_softc_t * const sc); static unsigned tulip_mii_readreg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno); static void tulip_mii_writereg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data); static int tulip_mii_map_abilities(tulip_softc_t * const sc, unsigned abilities); static tulip_media_t tulip_mii_phy_readspecific(tulip_softc_t * const sc); static int tulip_srom_decode(tulip_softc_t * const sc); #if defined(IFM_ETHER) static int tulip_ifmedia_change(struct ifnet * const ifp); static void tulip_ifmedia_status(struct ifnet * const ifp, struct ifmediareq *req); #endif /* static void tulip_21140_map_media(tulip_softc_t *sc); */ static void tulip_timeout_callback( void *arg) { tulip_softc_t * const sc = arg; tulip_spl_t s = TULIP_RAISESPL(); TULIP_PERFSTART(timeout) sc->tulip_flags &= ~TULIP_TIMEOUTPENDING; sc->tulip_probe_timeout -= 1000 / TULIP_HZ; (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_TIMER); TULIP_PERFEND(timeout); TULIP_RESTORESPL(s); } static void tulip_timeout( tulip_softc_t * const sc) { if (sc->tulip_flags & TULIP_TIMEOUTPENDING) return; sc->tulip_flags |= TULIP_TIMEOUTPENDING; timeout(tulip_timeout_callback, sc, (hz + TULIP_HZ / 2) / TULIP_HZ); } #if defined(TULIP_NEED_FASTTIMEOUT) static void tulip_fasttimeout_callback( void *arg) { tulip_softc_t * const sc = arg; tulip_spl_t s = TULIP_RAISESPL(); sc->tulip_flags &= ~TULIP_FASTTIMEOUTPENDING; (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_FASTTIMER); TULIP_RESTORESPL(s); } static void tulip_fasttimeout( tulip_softc_t * const sc) { if (sc->tulip_flags & TULIP_FASTTIMEOUTPENDING) return; sc->tulip_flags |= TULIP_FASTTIMEOUTPENDING; timeout(tulip_fasttimeout_callback, sc, 1); } #endif static int tulip_txprobe( tulip_softc_t * const sc) { struct mbuf *m; /* * Before we are sure this is the right media we need * to send a small packet to make sure there's carrier. * Strangely, BNC and AUI will "see" receive data if * either is connected so the transmit is the only way * to verify the connectivity. */ MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return 0; /* * Construct a LLC TEST message which will point to ourselves. */ bcopy(sc->tulip_enaddr, mtod(m, struct ether_header *)->ether_dhost, 6); bcopy(sc->tulip_enaddr, mtod(m, struct ether_header *)->ether_shost, 6); mtod(m, struct ether_header *)->ether_type = htons(3); mtod(m, unsigned char *)[14] = 0; mtod(m, unsigned char *)[15] = 0; mtod(m, unsigned char *)[16] = 0xE3; /* LLC Class1 TEST (no poll) */ m->m_len = m->m_pkthdr.len = sizeof(struct ether_header) + 3; /* * send it! */ sc->tulip_cmdmode |= TULIP_CMD_TXRUN; sc->tulip_intrmask |= TULIP_STS_TXINTR; sc->tulip_flags |= TULIP_TXPROBE_ACTIVE; TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); if ((m = tulip_txput(sc, m)) != NULL) m_freem(m); sc->tulip_probe.probe_txprobes++; return 1; } #ifdef BIG_PACKET #define TULIP_SIAGEN_WATCHDOG (sc->tulip_if.if_mtu > ETHERMTU ? TULIP_WATCHDOG_RXDISABLE|TULIP_WATCHDOG_TXDISABLE : 0) #else #define TULIP_SIAGEN_WATCHDOG 0 #endif static void tulip_media_set( tulip_softc_t * const sc, tulip_media_t media) { const tulip_media_info_t *mi = sc->tulip_mediums[media]; if (mi == NULL) return; /* * If we are switching media, make sure we don't think there's * any stale RX activity */ sc->tulip_flags &= ~TULIP_RXACT; if (mi->mi_type == TULIP_MEDIAINFO_SIA) { TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET); TULIP_CSR_WRITE(sc, csr_sia_tx_rx, mi->mi_sia_tx_rx); if (sc->tulip_features & TULIP_HAVE_SIAGP) { TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_sia_gp_control|mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG); DELAY(50); TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_sia_gp_data|mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG); } else { TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG); } TULIP_CSR_WRITE(sc, csr_sia_connectivity, mi->mi_sia_connectivity); } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) { #define TULIP_GPR_CMDBITS (TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_SCRAMBLER|TULIP_CMD_TXTHRSHLDCTL) /* * If the cmdmode bits don't match the currently operating mode, * set the cmdmode appropriately and reset the chip. */ if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) { sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS; sc->tulip_cmdmode |= mi->mi_cmdmode; tulip_reset(sc); } TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit); DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, (u_int8_t) mi->mi_gpdata); } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) { /* * If the cmdmode bits don't match the currently operating mode, * set the cmdmode appropriately and reset the chip. */ if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) { sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS; sc->tulip_cmdmode |= mi->mi_cmdmode; tulip_reset(sc); } TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpcontrol); TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpdata); } else if (mi->mi_type == TULIP_MEDIAINFO_MII && sc->tulip_probe_state != TULIP_PROBE_INACTIVE) { int idx; if (sc->tulip_features & TULIP_HAVE_SIAGP) { const u_int8_t *dp; dp = &sc->tulip_rombuf[mi->mi_reset_offset]; for (idx = 0; idx < mi->mi_reset_length; idx++, dp += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16); } sc->tulip_phyaddr = mi->mi_phyaddr; dp = &sc->tulip_rombuf[mi->mi_gpr_offset]; for (idx = 0; idx < mi->mi_gpr_length; idx++, dp += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16); } } else { for (idx = 0; idx < mi->mi_reset_length; idx++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx]); } sc->tulip_phyaddr = mi->mi_phyaddr; for (idx = 0; idx < mi->mi_gpr_length; idx++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx]); } } if (sc->tulip_flags & TULIP_TRYNWAY) { tulip_mii_autonegotiate(sc, sc->tulip_phyaddr); } else if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) { u_int32_t data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_CONTROL); data &= ~(PHYCTL_SELECT_100MB|PHYCTL_FULL_DUPLEX|PHYCTL_AUTONEG_ENABLE); sc->tulip_flags &= ~TULIP_DIDNWAY; if (TULIP_IS_MEDIA_FD(media)) data |= PHYCTL_FULL_DUPLEX; if (TULIP_IS_MEDIA_100MB(media)) data |= PHYCTL_SELECT_100MB; tulip_mii_writereg(sc, sc->tulip_phyaddr, PHYREG_CONTROL, data); } } } static void tulip_linkup( tulip_softc_t * const sc, tulip_media_t media) { if ((sc->tulip_flags & TULIP_LINKUP) == 0) sc->tulip_flags |= TULIP_PRINTLINKUP; sc->tulip_flags |= TULIP_LINKUP; sc->tulip_if.if_flags &= ~IFF_OACTIVE; #if 0 /* XXX how does with work with ifmedia? */ if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) { if (sc->tulip_if.if_flags & IFF_FULLDUPLEX) { if (TULIP_CAN_MEDIA_FD(media) && sc->tulip_mediums[TULIP_FD_MEDIA_OF(media)] != NULL) media = TULIP_FD_MEDIA_OF(media); } else { if (TULIP_IS_MEDIA_FD(media) && sc->tulip_mediums[TULIP_HD_MEDIA_OF(media)] != NULL) media = TULIP_HD_MEDIA_OF(media); } } #endif if (sc->tulip_media != media) { #ifdef TULIP_DEBUG sc->tulip_dbg.dbg_last_media = sc->tulip_media; #endif sc->tulip_media = media; sc->tulip_flags |= TULIP_PRINTMEDIA; if (TULIP_IS_MEDIA_FD(sc->tulip_media)) { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX; } else if (sc->tulip_chipid != TULIP_21041 || (sc->tulip_flags & TULIP_DIDNWAY) == 0) { sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; } } /* * We could set probe_timeout to 0 but setting to 3000 puts this * in one central place and the only matters is tulip_link is * followed by a tulip_timeout. Therefore setting it should not * result in aberrant behavour. */ sc->tulip_probe_timeout = 3000; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_TRYNWAY); if (sc->tulip_flags & TULIP_INRESET) { tulip_media_set(sc, sc->tulip_media); } else if (sc->tulip_probe_media != sc->tulip_media) { /* * No reason to change media if we have the right media. */ tulip_reset(sc); tulip_init(sc); } } static void tulip_media_print( tulip_softc_t * const sc) { if ((sc->tulip_flags & TULIP_LINKUP) == 0) return; if (sc->tulip_flags & TULIP_PRINTMEDIA) { printf(TULIP_PRINTF_FMT ": enabling %s port\n", TULIP_PRINTF_ARGS, tulip_mediums[sc->tulip_media]); sc->tulip_flags &= ~(TULIP_PRINTMEDIA|TULIP_PRINTLINKUP); } else if (sc->tulip_flags & TULIP_PRINTLINKUP) { printf(TULIP_PRINTF_FMT ": link up\n", TULIP_PRINTF_ARGS); sc->tulip_flags &= ~TULIP_PRINTLINKUP; } } #if defined(TULIP_DO_GPR_SENSE) static tulip_media_t tulip_21140_gpr_media_sense( tulip_softc_t * const sc) { tulip_media_t maybe_media = TULIP_MEDIA_UNKNOWN; tulip_media_t last_media = TULIP_MEDIA_UNKNOWN; tulip_media_t media; /* * If one of the media blocks contained a default media flag, * use that. */ for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { const tulip_media_info_t *mi; /* * Media is not supported (or is full-duplex). */ if ((mi = sc->tulip_mediums[media]) == NULL || TULIP_IS_MEDIA_FD(media)) continue; if (mi->mi_type != TULIP_MEDIAINFO_GPR) continue; /* * Remember the media is this is the "default" media. */ if (mi->mi_default && maybe_media == TULIP_MEDIA_UNKNOWN) maybe_media = media; /* * No activity mask? Can't see if it is active if there's no mask. */ if (mi->mi_actmask == 0) continue; /* * Does the activity data match? */ if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) != mi->mi_actdata) continue; #if defined(TULIP_DEBUG) printf(TULIP_PRINTF_FMT ": gpr_media_sense: %s: 0x%02x & 0x%02x == 0x%02x\n", TULIP_PRINTF_ARGS, tulip_mediums[media], TULIP_CSR_READ(sc, csr_gp) & 0xFF, mi->mi_actmask, mi->mi_actdata); #endif /* * It does! If this is the first media we detected, then * remember this media. If isn't the first, then there were * multiple matches which we equate to no match (since we don't * which to select (if any). */ if (last_media == TULIP_MEDIA_UNKNOWN) { last_media = media; } else if (last_media != media) { last_media = TULIP_MEDIA_UNKNOWN; } } return (last_media != TULIP_MEDIA_UNKNOWN) ? last_media : maybe_media; } #endif /* TULIP_DO_GPR_SENSE */ static tulip_link_status_t tulip_media_link_monitor( tulip_softc_t * const sc) { const tulip_media_info_t * const mi = sc->tulip_mediums[sc->tulip_media]; tulip_link_status_t linkup = TULIP_LINK_DOWN; if (mi == NULL) { #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG) panic("tulip_media_link_monitor: %s: botch at line %d\n", tulip_mediums[sc->tulip_media],__LINE__); #endif return TULIP_LINK_UNKNOWN; } /* * Have we seen some packets? If so, the link must be good. */ if ((sc->tulip_flags & (TULIP_RXACT|TULIP_LINKUP)) == (TULIP_RXACT|TULIP_LINKUP)) { sc->tulip_flags &= ~TULIP_RXACT; sc->tulip_probe_timeout = 3000; return TULIP_LINK_UP; } sc->tulip_flags &= ~TULIP_RXACT; if (mi->mi_type == TULIP_MEDIAINFO_MII) { u_int32_t status; /* * Read the PHY status register. */ status = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS); if (status & PHYSTS_AUTONEG_DONE) { /* * If the PHY has completed autonegotiation, see the if the * remote systems abilities have changed. If so, upgrade or * downgrade as appropriate. */ u_int32_t abilities = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_AUTONEG_ABILITIES); abilities = (abilities << 6) & status; if (abilities != sc->tulip_abilities) { #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation changed: 0x%04x -> 0x%04x\n", TULIP_PRINTF_ARGS, sc->tulip_phyaddr, sc->tulip_abilities, abilities); #endif if (tulip_mii_map_abilities(sc, abilities)) { tulip_linkup(sc, sc->tulip_probe_media); return TULIP_LINK_UP; } /* * if we had selected media because of autonegotiation, * we need to probe for the new media. */ sc->tulip_probe_state = TULIP_PROBE_INACTIVE; if (sc->tulip_flags & TULIP_DIDNWAY) return TULIP_LINK_DOWN; } } /* * The link is now up. If was down, say its back up. */ if ((status & (PHYSTS_LINK_UP|PHYSTS_REMOTE_FAULT)) == PHYSTS_LINK_UP) linkup = TULIP_LINK_UP; } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) { /* * No activity sensor? Assume all's well. */ if (mi->mi_actmask == 0) return TULIP_LINK_UNKNOWN; /* * Does the activity data match? */ if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) == mi->mi_actdata) linkup = TULIP_LINK_UP; } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) { /* * Assume non TP ok for now. */ if (!TULIP_IS_MEDIA_TP(sc->tulip_media)) return TULIP_LINK_UNKNOWN; if ((TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL) == 0) linkup = TULIP_LINK_UP; #if defined(TULIP_DEBUG) if (sc->tulip_probe_timeout <= 0) printf(TULIP_PRINTF_FMT ": sia status = 0x%08x\n", TULIP_PRINTF_ARGS, TULIP_CSR_READ(sc, csr_sia_status)); #endif } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) { return TULIP_LINK_UNKNOWN; } /* * We will wait for 3 seconds until the link goes into suspect mode. */ if (sc->tulip_flags & TULIP_LINKUP) { if (linkup == TULIP_LINK_UP) sc->tulip_probe_timeout = 3000; if (sc->tulip_probe_timeout > 0) return TULIP_LINK_UP; sc->tulip_flags &= ~TULIP_LINKUP; printf(TULIP_PRINTF_FMT ": link down: cable problem?\n", TULIP_PRINTF_ARGS); } #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_downed++; #endif return TULIP_LINK_DOWN; } static void tulip_media_poll( tulip_softc_t * const sc, tulip_mediapoll_event_t event) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_events[event]++; #endif if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE && event == TULIP_MEDIAPOLL_TIMER) { switch (tulip_media_link_monitor(sc)) { case TULIP_LINK_DOWN: { /* * Link Monitor failed. Probe for new media. */ event = TULIP_MEDIAPOLL_LINKFAIL; break; } case TULIP_LINK_UP: { /* * Check again soon. */ tulip_timeout(sc); return; } case TULIP_LINK_UNKNOWN: { /* * We can't tell so don't bother. */ return; } } } if (event == TULIP_MEDIAPOLL_LINKFAIL) { if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) { if (TULIP_DO_AUTOSENSE(sc)) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_failures++; #endif sc->tulip_media = TULIP_MEDIA_UNKNOWN; tulip_reset(sc); /* restart probe */ } return; } #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_pollintrs++; #endif } if (event == TULIP_MEDIAPOLL_START) { sc->tulip_if.if_flags |= IFF_OACTIVE; if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE) return; sc->tulip_probe_mediamask = 0; sc->tulip_probe_passes = 0; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_media_probes++; #endif /* * If the SROM contained an explicit media to use, use it. */ sc->tulip_cmdmode &= ~(TULIP_CMD_RXRUN|TULIP_CMD_FULLDUPLEX); sc->tulip_flags |= TULIP_TRYNWAY|TULIP_PROBE1STPASS; sc->tulip_flags &= ~(TULIP_DIDNWAY|TULIP_PRINTMEDIA|TULIP_PRINTLINKUP); /* * connidx is defaulted to a media_unknown type. */ sc->tulip_probe_media = tulip_srom_conninfo[sc->tulip_connidx].sc_media; if (sc->tulip_probe_media != TULIP_MEDIA_UNKNOWN) { tulip_linkup(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } if (sc->tulip_features & TULIP_HAVE_GPR) { sc->tulip_probe_state = TULIP_PROBE_GPRTEST; sc->tulip_probe_timeout = 2000; } else { sc->tulip_probe_media = TULIP_MEDIA_MAX; sc->tulip_probe_timeout = 0; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; } } /* * Ignore txprobe failures or spurious callbacks. */ if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED && sc->tulip_probe_state != TULIP_PROBE_MEDIATEST) { sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; return; } /* * If we really transmitted a packet, then that's the media we'll use. */ if (event == TULIP_MEDIAPOLL_TXPROBE_OK || event == TULIP_MEDIAPOLL_LINKPASS) { if (event == TULIP_MEDIAPOLL_LINKPASS) sc->tulip_probe_media = TULIP_MEDIA_10BASET; #if defined(TULIP_DEBUG) else sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++; #endif tulip_linkup(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } if (sc->tulip_probe_state == TULIP_PROBE_GPRTEST) { #if defined(TULIP_DO_GPR_SENSE) /* * Check for media via the general purpose register. * * Try to sense the media via the GPR. If the same value * occurs 3 times in a row then just use that. */ if (sc->tulip_probe_timeout > 0) { tulip_media_t new_probe_media = tulip_21140_gpr_media_sense(sc); #if defined(TULIP_DEBUG) printf(TULIP_PRINTF_FMT ": media_poll: gpr sensing = %s\n", TULIP_PRINTF_ARGS, tulip_mediums[new_probe_media]); #endif if (new_probe_media != TULIP_MEDIA_UNKNOWN) { if (new_probe_media == sc->tulip_probe_media) { if (--sc->tulip_probe_count == 0) tulip_linkup(sc, sc->tulip_probe_media); } else { sc->tulip_probe_count = 10; } } sc->tulip_probe_media = new_probe_media; tulip_timeout(sc); return; } #endif /* TULIP_DO_GPR_SENSE */ /* * Brute force. We cycle through each of the media types * and try to transmit a packet. */ sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; sc->tulip_probe_media = TULIP_MEDIA_MAX; sc->tulip_probe_timeout = 0; tulip_timeout(sc); return; } if (sc->tulip_probe_state != TULIP_PROBE_MEDIATEST && (sc->tulip_features & TULIP_HAVE_MII)) { tulip_media_t old_media = sc->tulip_probe_media; tulip_mii_autonegotiate(sc, sc->tulip_phyaddr); switch (sc->tulip_probe_state) { case TULIP_PROBE_FAILED: case TULIP_PROBE_MEDIATEST: { /* * Try the next media. */ sc->tulip_probe_mediamask |= sc->tulip_mediums[sc->tulip_probe_media]->mi_mediamask; sc->tulip_probe_timeout = 0; #ifdef notyet if (sc->tulip_probe_state == TULIP_PROBE_FAILED) break; if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc)) break; sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 300; #endif break; } case TULIP_PROBE_PHYAUTONEG: { return; } case TULIP_PROBE_INACTIVE: { /* * Only probe if we autonegotiated a media that hasn't failed. */ sc->tulip_probe_timeout = 0; if (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media)) { sc->tulip_probe_media = old_media; break; } tulip_linkup(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } default: { #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG) panic("tulip_media_poll: botch at line %d\n", __LINE__); #endif break; } } } if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txprobes_failed[sc->tulip_probe_media]++; #endif sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; return; } /* * switch to another media if we tried this one enough. */ if (/* event == TULIP_MEDIAPOLL_TXPROBE_FAILED || */ sc->tulip_probe_timeout <= 0) { #if defined(TULIP_DEBUG) if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) { printf(TULIP_PRINTF_FMT ": poll media unknown!\n", TULIP_PRINTF_ARGS); sc->tulip_probe_media = TULIP_MEDIA_MAX; } #endif /* * Find the next media type to check for. Full Duplex * types are not allowed. */ do { sc->tulip_probe_media -= 1; if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) { if (++sc->tulip_probe_passes == 3) { printf(TULIP_PRINTF_FMT ": autosense failed: cable problem?\n", TULIP_PRINTF_ARGS); if ((sc->tulip_if.if_flags & IFF_UP) == 0) { sc->tulip_if.if_flags &= ~IFF_RUNNING; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; return; } } sc->tulip_flags ^= TULIP_TRYNWAY; /* XXX */ sc->tulip_probe_mediamask = 0; sc->tulip_probe_media = TULIP_MEDIA_MAX - 1; } } while (sc->tulip_mediums[sc->tulip_probe_media] == NULL || (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media)) || TULIP_IS_MEDIA_FD(sc->tulip_probe_media)); #if defined(TULIP_DEBUG) printf(TULIP_PRINTF_FMT ": %s: probing %s\n", TULIP_PRINTF_ARGS, event == TULIP_MEDIAPOLL_TXPROBE_FAILED ? "txprobe failed" : "timeout", tulip_mediums[sc->tulip_probe_media]); #endif sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 1000; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; sc->tulip_probe.probe_txprobes = 0; tulip_reset(sc); tulip_media_set(sc, sc->tulip_probe_media); sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; } tulip_timeout(sc); /* * If this is hanging off a phy, we know are doing NWAY and we have * forced the phy to a specific speed. Wait for link up before * before sending a packet. */ switch (sc->tulip_mediums[sc->tulip_probe_media]->mi_type) { case TULIP_MEDIAINFO_MII: { if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc)) return; break; } case TULIP_MEDIAINFO_SIA: { if (TULIP_IS_MEDIA_TP(sc->tulip_probe_media)) { if (TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL) return; tulip_linkup(sc, sc->tulip_probe_media); #ifdef notyet if (sc->tulip_features & TULIP_HAVE_MII) tulip_timeout(sc); #endif return; } break; } case TULIP_MEDIAINFO_RESET: case TULIP_MEDIAINFO_SYM: case TULIP_MEDIAINFO_NONE: case TULIP_MEDIAINFO_GPR: { break; } } /* * Try to send a packet. */ tulip_txprobe(sc); } static void tulip_media_select( tulip_softc_t * const sc) { if (sc->tulip_features & TULIP_HAVE_GPR) { TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit); DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpdata); } /* * If this board has no media, just return */ if (sc->tulip_features & TULIP_HAVE_NOMEDIA) return; if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) { TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); (*sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_START); } else { tulip_media_set(sc, sc->tulip_media); } } static void tulip_21040_mediainfo_init( tulip_softc_t * const sc, tulip_media_t media) { sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_THRSHLD160 |TULIP_CMD_BACKOFFCTR; sc->tulip_if.if_baudrate = 10000000; if (media == TULIP_MEDIA_10BASET || media == TULIP_MEDIA_UNKNOWN) { TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[0], 21040, 10BASET); TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[1], 21040, 10BASET_FD); } if (media == TULIP_MEDIA_AUIBNC || media == TULIP_MEDIA_UNKNOWN) { TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[2], 21040, AUIBNC); } if (media == TULIP_MEDIA_UNKNOWN) { TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[3], 21040, EXTSIA); } } static void tulip_21040_media_probe( tulip_softc_t * const sc) { tulip_21040_mediainfo_init(sc, TULIP_MEDIA_UNKNOWN); return; } static void tulip_21040_10baset_only_media_probe( tulip_softc_t * const sc) { tulip_21040_mediainfo_init(sc, TULIP_MEDIA_10BASET); tulip_media_set(sc, TULIP_MEDIA_10BASET); sc->tulip_media = TULIP_MEDIA_10BASET; } static void tulip_21040_10baset_only_media_select( tulip_softc_t * const sc) { sc->tulip_flags |= TULIP_LINKUP; if (sc->tulip_media == TULIP_MEDIA_10BASET_FD) { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX; sc->tulip_flags &= ~TULIP_SQETEST; } else { sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; sc->tulip_flags |= TULIP_SQETEST; } tulip_media_set(sc, sc->tulip_media); } static void tulip_21040_auibnc_only_media_probe( tulip_softc_t * const sc) { tulip_21040_mediainfo_init(sc, TULIP_MEDIA_AUIBNC); sc->tulip_flags |= TULIP_SQETEST|TULIP_LINKUP; tulip_media_set(sc, TULIP_MEDIA_AUIBNC); sc->tulip_media = TULIP_MEDIA_AUIBNC; } static void tulip_21040_auibnc_only_media_select( tulip_softc_t * const sc) { tulip_media_set(sc, TULIP_MEDIA_AUIBNC); sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; } static const tulip_boardsw_t tulip_21040_boardsw = { TULIP_21040_GENERIC, tulip_21040_media_probe, tulip_media_select, tulip_media_poll, }; static const tulip_boardsw_t tulip_21040_10baset_only_boardsw = { TULIP_21040_GENERIC, tulip_21040_10baset_only_media_probe, tulip_21040_10baset_only_media_select, NULL, }; static const tulip_boardsw_t tulip_21040_auibnc_only_boardsw = { TULIP_21040_GENERIC, tulip_21040_auibnc_only_media_probe, tulip_21040_auibnc_only_media_select, NULL, }; static void tulip_21041_mediainfo_init( tulip_softc_t * const sc) { tulip_media_info_t * const mi = sc->tulip_mediainfo; #ifdef notyet if (sc->tulip_revinfo >= 0x20) { TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, 10BASET); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, 10BASET_FD); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, AUI); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, BNC); return; } #endif TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041, 10BASET); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041, 10BASET_FD); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[2], 21041, AUI); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[3], 21041, BNC); } static void tulip_21041_media_probe( tulip_softc_t * const sc) { sc->tulip_if.if_baudrate = 10000000; sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_ENHCAPTEFFCT |TULIP_CMD_THRSHLD160|TULIP_CMD_BACKOFFCTR; sc->tulip_intrmask |= TULIP_STS_LINKPASS; tulip_21041_mediainfo_init(sc); } static void tulip_21041_media_poll( tulip_softc_t * const sc, const tulip_mediapoll_event_t event) { u_int32_t sia_status; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_events[event]++; #endif if (event == TULIP_MEDIAPOLL_LINKFAIL) { if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE || !TULIP_DO_AUTOSENSE(sc)) return; sc->tulip_media = TULIP_MEDIA_UNKNOWN; tulip_reset(sc); /* start probe */ return; } /* * If we've been been asked to start a poll or link change interrupt * restart the probe (and reset the tulip to a known state). */ if (event == TULIP_MEDIAPOLL_START) { sc->tulip_if.if_flags |= IFF_OACTIVE; sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_RXRUN); #ifdef notyet if (sc->tulip_revinfo >= 0x20) { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX; sc->tulip_flags |= TULIP_DIDNWAY; } #endif TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; sc->tulip_probe_media = TULIP_MEDIA_10BASET; sc->tulip_probe_timeout = TULIP_21041_PROBE_10BASET_TIMEOUT; tulip_media_set(sc, TULIP_MEDIA_10BASET); tulip_timeout(sc); return; } if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) return; if (event == TULIP_MEDIAPOLL_TXPROBE_OK) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++; #endif tulip_linkup(sc, sc->tulip_probe_media); return; } sia_status = TULIP_CSR_READ(sc, csr_sia_status); TULIP_CSR_WRITE(sc, csr_sia_status, sia_status); if ((sia_status & TULIP_SIASTS_LINKFAIL) == 0) { if (sc->tulip_revinfo >= 0x20) { if (sia_status & (PHYSTS_10BASET_FD << (16 - 6))) sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD; } /* * If the link has passed LinkPass, 10baseT is the * proper media to use. */ tulip_linkup(sc, sc->tulip_probe_media); return; } /* * wait for up to 2.4 seconds for the link to reach pass state. * Only then start scanning the other media for activity. * choose media with receive activity over those without. */ if (sc->tulip_probe_media == TULIP_MEDIA_10BASET) { if (event != TULIP_MEDIAPOLL_TIMER) return; if (sc->tulip_probe_timeout > 0 && (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) == 0) { tulip_timeout(sc); return; } sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; sc->tulip_flags |= TULIP_WANTRXACT; if (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) { sc->tulip_probe_media = TULIP_MEDIA_BNC; } else { sc->tulip_probe_media = TULIP_MEDIA_AUI; } tulip_media_set(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } /* * If we failed, clear the txprobe active flag. */ if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED) sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; if (event == TULIP_MEDIAPOLL_TIMER) { /* * If we've received something, then that's our link! */ if (sc->tulip_flags & TULIP_RXACT) { tulip_linkup(sc, sc->tulip_probe_media); return; } /* * if no txprobe active */ if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0 && ((sc->tulip_flags & TULIP_WANTRXACT) == 0 || (sia_status & TULIP_SIASTS_RXACTIVITY))) { sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; tulip_txprobe(sc); tulip_timeout(sc); return; } /* * Take 2 passes through before deciding to not * wait for receive activity. Then take another * two passes before spitting out a warning. */ if (sc->tulip_probe_timeout <= 0) { if (sc->tulip_flags & TULIP_WANTRXACT) { sc->tulip_flags &= ~TULIP_WANTRXACT; sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; } else { printf(TULIP_PRINTF_FMT ": autosense failed: cable problem?\n", TULIP_PRINTF_ARGS); if ((sc->tulip_if.if_flags & IFF_UP) == 0) { sc->tulip_if.if_flags &= ~IFF_RUNNING; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; return; } } } } /* * Since this media failed to probe, try the other one. */ sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; if (sc->tulip_probe_media == TULIP_MEDIA_AUI) { sc->tulip_probe_media = TULIP_MEDIA_BNC; } else { sc->tulip_probe_media = TULIP_MEDIA_AUI; } tulip_media_set(sc, sc->tulip_probe_media); sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; tulip_timeout(sc); } static const tulip_boardsw_t tulip_21041_boardsw = { TULIP_21041_GENERIC, tulip_21041_media_probe, tulip_media_select, tulip_21041_media_poll }; static const tulip_phy_attr_t tulip_mii_phy_attrlist[] = { { 0x20005c00, 0, /* 08-00-17 */ { { 0x19, 0x0040, 0x0040 }, /* 10TX */ { 0x19, 0x0040, 0x0000 }, /* 100TX */ }, #if defined(TULIP_DEBUG) "NS DP83840", #endif }, { 0x0281F400, 0, /* 00-A0-7D */ { { 0x12, 0x0010, 0x0000 }, /* 10T */ { }, /* 100TX */ { 0x12, 0x0010, 0x0010 }, /* 100T4 */ { 0x12, 0x0008, 0x0008 }, /* FULL_DUPLEX */ }, #if defined(TULIP_DEBUG) "Seeq 80C240" #endif }, #if 0 { 0x0015F420, 0, /* 00-A0-7D */ { { 0x12, 0x0010, 0x0000 }, /* 10T */ { }, /* 100TX */ { 0x12, 0x0010, 0x0010 }, /* 100T4 */ { 0x12, 0x0008, 0x0008 }, /* FULL_DUPLEX */ }, #if defined(TULIP_DEBUG) "Broadcom BCM5000" #endif }, #endif { 0x0281F400, 0, /* 00-A0-BE */ { { 0x11, 0x8000, 0x0000 }, /* 10T */ { 0x11, 0x8000, 0x8000 }, /* 100TX */ { }, /* 100T4 */ { 0x11, 0x4000, 0x4000 }, /* FULL_DUPLEX */ }, #if defined(TULIP_DEBUG) "ICS 1890" #endif }, { 0 } }; static tulip_media_t tulip_mii_phy_readspecific( tulip_softc_t * const sc) { const tulip_phy_attr_t *attr; u_int16_t data; u_int32_t id; unsigned idx = 0; static const tulip_media_t table[] = { TULIP_MEDIA_UNKNOWN, TULIP_MEDIA_10BASET, TULIP_MEDIA_100BASETX, TULIP_MEDIA_100BASET4, TULIP_MEDIA_UNKNOWN, TULIP_MEDIA_10BASET_FD, TULIP_MEDIA_100BASETX_FD, TULIP_MEDIA_UNKNOWN }; /* * Don't read phy specific registers if link is not up. */ data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS); if ((data & (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS)) != (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS)) return TULIP_MEDIA_UNKNOWN; id = (tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDHIGH); for (attr = tulip_mii_phy_attrlist;; attr++) { if (attr->attr_id == 0) return TULIP_MEDIA_UNKNOWN; if ((id & ~0x0F) == attr->attr_id) break; } if (attr->attr_modes[PHY_MODE_100TX].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100TX]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); if ((data & pm->pm_mask) == pm->pm_value) idx = 2; } if (idx == 0 && attr->attr_modes[PHY_MODE_100T4].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100T4]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); if ((data & pm->pm_mask) == pm->pm_value) idx = 3; } if (idx == 0 && attr->attr_modes[PHY_MODE_10T].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_10T]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); if ((data & pm->pm_mask) == pm->pm_value) idx = 1; } if (idx != 0 && attr->attr_modes[PHY_MODE_FULLDUPLEX].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_FULLDUPLEX]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); idx += ((data & pm->pm_mask) == pm->pm_value ? 4 : 0); } return table[idx]; } static unsigned tulip_mii_get_phyaddr( tulip_softc_t * const sc, unsigned offset) { unsigned phyaddr; for (phyaddr = 1; phyaddr < 32; phyaddr++) { unsigned status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS); if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET) continue; if (offset == 0) return phyaddr; offset--; } if (offset == 0) { unsigned status = tulip_mii_readreg(sc, 0, PHYREG_STATUS); if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET) return TULIP_MII_NOPHY; return 0; } return TULIP_MII_NOPHY; } static int tulip_mii_map_abilities( tulip_softc_t * const sc, unsigned abilities) { sc->tulip_abilities = abilities; if (abilities & PHYSTS_100BASETX_FD) { sc->tulip_probe_media = TULIP_MEDIA_100BASETX_FD; } else if (abilities & PHYSTS_100BASET4) { sc->tulip_probe_media = TULIP_MEDIA_100BASET4; } else if (abilities & PHYSTS_100BASETX) { sc->tulip_probe_media = TULIP_MEDIA_100BASETX; } else if (abilities & PHYSTS_10BASET_FD) { sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD; } else if (abilities & PHYSTS_10BASET) { sc->tulip_probe_media = TULIP_MEDIA_10BASET; } else { sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; return 0; } sc->tulip_probe_state = TULIP_PROBE_INACTIVE; return 1; } static void tulip_mii_autonegotiate( tulip_softc_t * const sc, const unsigned phyaddr) { switch (sc->tulip_probe_state) { case TULIP_PROBE_MEDIATEST: case TULIP_PROBE_INACTIVE: { sc->tulip_flags |= TULIP_DIDNWAY; tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, PHYCTL_RESET); sc->tulip_probe_timeout = 3000; sc->tulip_intrmask |= TULIP_STS_ABNRMLINTR|TULIP_STS_NORMALINTR; sc->tulip_probe_state = TULIP_PROBE_PHYRESET; /* FALL THROUGH */ } case TULIP_PROBE_PHYRESET: { u_int32_t status; u_int32_t data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL); if (data & PHYCTL_RESET) { if (sc->tulip_probe_timeout > 0) { tulip_timeout(sc); return; } printf(TULIP_PRINTF_FMT "(phy%d): error: reset of PHY never completed!\n", TULIP_PRINTF_ARGS, phyaddr); sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; sc->tulip_probe_state = TULIP_PROBE_FAILED; sc->tulip_if.if_flags &= ~(IFF_UP|IFF_RUNNING); return; } status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS); if ((status & PHYSTS_CAN_AUTONEG) == 0) { #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation disabled\n", TULIP_PRINTF_ARGS, phyaddr); #endif sc->tulip_flags &= ~TULIP_DIDNWAY; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; return; } if (tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT) != ((status >> 6) | 0x01)) tulip_mii_writereg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT, (status >> 6) | 0x01); tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, data|PHYCTL_AUTONEG_RESTART|PHYCTL_AUTONEG_ENABLE); data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL); #if defined(TULIP_DEBUG) if ((data & PHYCTL_AUTONEG_ENABLE) == 0) loudprintf(TULIP_PRINTF_FMT "(phy%d): oops: enable autonegotiation failed: 0x%04x\n", TULIP_PRINTF_ARGS, phyaddr, data); else loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation restarted: 0x%04x\n", TULIP_PRINTF_ARGS, phyaddr, data); sc->tulip_dbg.dbg_nway_starts++; #endif sc->tulip_probe_state = TULIP_PROBE_PHYAUTONEG; sc->tulip_probe_timeout = 3000; /* FALL THROUGH */ } case TULIP_PROBE_PHYAUTONEG: { u_int32_t status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS); u_int32_t data; if ((status & PHYSTS_AUTONEG_DONE) == 0) { if (sc->tulip_probe_timeout > 0) { tulip_timeout(sc); return; } #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation timeout: sts=0x%04x, ctl=0x%04x\n", TULIP_PRINTF_ARGS, phyaddr, status, tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL)); #endif sc->tulip_flags &= ~TULIP_DIDNWAY; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; return; } data = tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ABILITIES); #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation complete: 0x%04x\n", TULIP_PRINTF_ARGS, phyaddr, data); #endif data = (data << 6) & status; if (!tulip_mii_map_abilities(sc, data)) sc->tulip_flags &= ~TULIP_DIDNWAY; return; } default: { #if defined(DIAGNOSTIC) panic("tulip_media_poll: botch at line %d\n", __LINE__); #endif break; } } #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation failure: state = %d\n", TULIP_PRINTF_ARGS, phyaddr, sc->tulip_probe_state); sc->tulip_dbg.dbg_nway_failures++; #endif } static void tulip_2114x_media_preset( tulip_softc_t * const sc) { const tulip_media_info_t *mi = NULL; tulip_media_t media = sc->tulip_media; if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) media = sc->tulip_media; else media = sc->tulip_probe_media; sc->tulip_cmdmode &= ~TULIP_CMD_PORTSELECT; sc->tulip_flags &= ~TULIP_SQETEST; if (media != TULIP_MEDIA_UNKNOWN && media != TULIP_MEDIA_MAX) { #if defined(TULIP_DEBUG) if (media < TULIP_MEDIA_MAX && sc->tulip_mediums[media] != NULL) { #endif mi = sc->tulip_mediums[media]; if (mi->mi_type == TULIP_MEDIAINFO_MII) { sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT; } else if (mi->mi_type == TULIP_MEDIAINFO_GPR || mi->mi_type == TULIP_MEDIAINFO_SYM) { sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS; sc->tulip_cmdmode |= mi->mi_cmdmode; } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) { TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET); } #if defined(TULIP_DEBUG) } else { printf(TULIP_PRINTF_FMT ": preset: bad media %d!\n", TULIP_PRINTF_ARGS, media); } #endif } switch (media) { case TULIP_MEDIA_BNC: case TULIP_MEDIA_AUI: case TULIP_MEDIA_10BASET: { sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; sc->tulip_cmdmode |= TULIP_CMD_TXTHRSHLDCTL; sc->tulip_if.if_baudrate = 10000000; sc->tulip_flags |= TULIP_SQETEST; break; } case TULIP_MEDIA_10BASET_FD: { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL; sc->tulip_if.if_baudrate = 10000000; break; } case TULIP_MEDIA_100BASEFX: case TULIP_MEDIA_100BASET4: case TULIP_MEDIA_100BASETX: { sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL); sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT; sc->tulip_if.if_baudrate = 100000000; break; } case TULIP_MEDIA_100BASEFX_FD: case TULIP_MEDIA_100BASETX_FD: { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_PORTSELECT; sc->tulip_cmdmode &= ~TULIP_CMD_TXTHRSHLDCTL; sc->tulip_if.if_baudrate = 100000000; break; } default: { break; } } TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } /* ******************************************************************** * Start of 21140/21140A support which does not use the MII interface */ static void tulip_null_media_poll( tulip_softc_t * const sc, tulip_mediapoll_event_t event) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_events[event]++; #endif #if defined(DIAGNOSTIC) printf(TULIP_PRINTF_FMT ": botch(media_poll) at line %d\n", TULIP_PRINTF_ARGS, __LINE__); #endif } __inline__ static void tulip_21140_mediainit( tulip_softc_t * const sc, tulip_media_info_t * const mip, tulip_media_t const media, unsigned gpdata, unsigned cmdmode) { sc->tulip_mediums[media] = mip; mip->mi_type = TULIP_MEDIAINFO_GPR; mip->mi_cmdmode = cmdmode; mip->mi_gpdata = gpdata; } static void tulip_21140_evalboard_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; sc->tulip_gpinit = TULIP_GP_EB_PINS; sc->tulip_gpdata = TULIP_GP_EB_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL); DELAY(1000000); if ((TULIP_CSR_READ(sc, csr_gp) & TULIP_GP_EB_OK100) != 0) { sc->tulip_media = TULIP_MEDIA_10BASET; } else { sc->tulip_media = TULIP_MEDIA_100BASETX; } tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_EB_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_EB_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_EB_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_EB_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_eb_boardsw = { TULIP_21140_DEC_EB, tulip_21140_evalboard_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_21140_accton_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; unsigned gpdata; sc->tulip_gpinit = TULIP_GP_EB_PINS; sc->tulip_gpdata = TULIP_GP_EB_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL); DELAY(1000000); gpdata = TULIP_CSR_READ(sc, csr_gp); if ((gpdata & TULIP_GP_EN1207_UTP_INIT) == 0) { sc->tulip_media = TULIP_MEDIA_10BASET; } else { if ((gpdata & TULIP_GP_EN1207_BNC_INIT) == 0) { sc->tulip_media = TULIP_MEDIA_BNC; } else { sc->tulip_media = TULIP_MEDIA_100BASETX; } } tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_BNC, TULIP_GP_EN1207_BNC_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_EN1207_UTP_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_EN1207_UTP_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_EN1207_100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_EN1207_100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_accton_boardsw = { TULIP_21140_EN1207, tulip_21140_accton_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_21140_smc9332_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; int idx, cnt = 0; TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT|TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(10); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); sc->tulip_gpinit = TULIP_GP_SMC_9332_PINS; sc->tulip_gpdata = TULIP_GP_SMC_9332_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_PINS|TULIP_GP_PINSET); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_INIT); DELAY(200000); for (idx = 1000; idx > 0; idx--) { u_int32_t csr = TULIP_CSR_READ(sc, csr_gp); if ((csr & (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) == (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) { if (++cnt > 100) break; } else if ((csr & TULIP_GP_SMC_9332_OK10) == 0) { break; } else { cnt = 0; } DELAY(1000); } sc->tulip_media = cnt > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_SMC_9332_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_SMC_9332_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_SMC_9332_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_SMC_9332_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_smc9332_boardsw = { TULIP_21140_SMC_9332, tulip_21140_smc9332_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_21140_cogent_em100_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; u_int32_t cmdmode = TULIP_CSR_READ(sc, csr_command); sc->tulip_gpinit = TULIP_GP_EM100_PINS; sc->tulip_gpdata = TULIP_GP_EM100_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_INIT); cmdmode = TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_MUSTBEONE; cmdmode &= ~(TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_SCRAMBLER); if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) { TULIP_CSR_WRITE(sc, csr_command, cmdmode); sc->tulip_media = TULIP_MEDIA_100BASEFX; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX_FD, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_FULLDUPLEX); } else { TULIP_CSR_WRITE(sc, csr_command, cmdmode|TULIP_CMD_SCRAMBLER); sc->tulip_media = TULIP_MEDIA_100BASETX; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } } static const tulip_boardsw_t tulip_21140_cogent_em100_boardsw = { TULIP_21140_COGENT_EM100, tulip_21140_cogent_em100_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset }; static void tulip_21140_znyx_zx34x_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; int cnt10 = 0, cnt100 = 0, idx; sc->tulip_gpinit = TULIP_GP_ZX34X_PINS; sc->tulip_gpdata = TULIP_GP_ZX34X_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_INIT); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL); DELAY(200000); for (idx = 1000; idx > 0; idx--) { u_int32_t csr = TULIP_CSR_READ(sc, csr_gp); if ((csr & (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) == (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) { if (++cnt100 > 100) break; } else if ((csr & TULIP_GP_ZX34X_LNKFAIL) == 0) { if (++cnt10 > 100) break; } else { cnt10 = 0; cnt100 = 0; } DELAY(1000); } sc->tulip_media = cnt100 > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_ZX34X_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_ZX34X_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_ZX34X_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_ZX34X_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_znyx_zx34x_boardsw = { TULIP_21140_ZNYX_ZX34X, tulip_21140_znyx_zx34x_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_2114x_media_probe( tulip_softc_t * const sc) { sc->tulip_cmdmode |= TULIP_CMD_MUSTBEONE |TULIP_CMD_BACKOFFCTR|TULIP_CMD_THRSHLD72; } static const tulip_boardsw_t tulip_2114x_isv_boardsw = { TULIP_21140_ISV, tulip_2114x_media_probe, tulip_media_select, tulip_media_poll, tulip_2114x_media_preset, }; /* * ******** END of chip-specific handlers. *********** */ /* * Code the read the SROM and MII bit streams (I2C) */ static void tulip_delay_300ns( tulip_softc_t * const sc) { int idx; for (idx = (300 / 33) + 1; idx > 0; idx--) (void) TULIP_CSR_READ(sc, csr_busmode); } #define EMIT do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); tulip_delay_300ns(sc); } while (0) static void tulip_srom_idle( tulip_softc_t * const sc) { unsigned bit, csr; csr = SROMSEL ; EMIT; csr = SROMSEL | SROMRD; EMIT; csr ^= SROMCS; EMIT; csr ^= SROMCLKON; EMIT; /* * Write 25 cycles of 0 which will force the SROM to be idle. */ for (bit = 3 + SROM_BITWIDTH + 16; bit > 0; bit--) { csr ^= SROMCLKOFF; EMIT; /* clock low; data not valid */ csr ^= SROMCLKON; EMIT; /* clock high; data valid */ } csr ^= SROMCLKOFF; EMIT; csr ^= SROMCS; EMIT; csr = 0; EMIT; } static void tulip_srom_read( tulip_softc_t * const sc) { unsigned idx; const unsigned bitwidth = SROM_BITWIDTH; const unsigned cmdmask = (SROMCMD_RD << bitwidth); const unsigned msb = 1 << (bitwidth + 3 - 1); unsigned lastidx = (1 << bitwidth) - 1; tulip_srom_idle(sc); for (idx = 0; idx <= lastidx; idx++) { unsigned lastbit, data, bits, bit, csr; csr = SROMSEL ; EMIT; csr = SROMSEL | SROMRD; EMIT; csr ^= SROMCSON; EMIT; csr ^= SROMCLKON; EMIT; lastbit = 0; for (bits = idx|cmdmask, bit = bitwidth + 3; bit > 0; bit--, bits <<= 1) { const unsigned thisbit = bits & msb; csr ^= SROMCLKOFF; EMIT; /* clock low; data not valid */ if (thisbit != lastbit) { csr ^= SROMDOUT; EMIT; /* clock low; invert data */ } else { EMIT; } csr ^= SROMCLKON; EMIT; /* clock high; data valid */ lastbit = thisbit; } csr ^= SROMCLKOFF; EMIT; for (data = 0, bits = 0; bits < 16; bits++) { data <<= 1; csr ^= SROMCLKON; EMIT; /* clock high; data valid */ data |= TULIP_CSR_READ(sc, csr_srom_mii) & SROMDIN ? 1 : 0; csr ^= SROMCLKOFF; EMIT; /* clock low; data not valid */ } sc->tulip_rombuf[idx*2] = data & 0xFF; sc->tulip_rombuf[idx*2+1] = data >> 8; csr = SROMSEL | SROMRD; EMIT; csr = 0; EMIT; } tulip_srom_idle(sc); } #define MII_EMIT do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); tulip_delay_300ns(sc); } while (0) static void tulip_mii_writebits( tulip_softc_t * const sc, unsigned data, unsigned bits) { unsigned msb = 1 << (bits - 1); unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); unsigned lastbit = (csr & MII_DOUT) ? msb : 0; csr |= MII_WR; MII_EMIT; /* clock low; assert write */ for (; bits > 0; bits--, data <<= 1) { const unsigned thisbit = data & msb; if (thisbit != lastbit) { csr ^= MII_DOUT; MII_EMIT; /* clock low; invert data */ } csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ lastbit = thisbit; csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ } } static void tulip_mii_turnaround( tulip_softc_t * const sc, unsigned cmd) { unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); if (cmd == MII_WRCMD) { csr |= MII_DOUT; MII_EMIT; /* clock low; change data */ csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ csr ^= MII_DOUT; MII_EMIT; /* clock low; change data */ } else { csr |= MII_RD; MII_EMIT; /* clock low; switch to read */ } csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ } static unsigned tulip_mii_readbits( tulip_softc_t * const sc) { unsigned data; unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); int idx; for (idx = 0, data = 0; idx < 16; idx++) { data <<= 1; /* this is NOOP on the first pass through */ csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ if (TULIP_CSR_READ(sc, csr_srom_mii) & MII_DIN) data |= 1; csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ } csr ^= MII_RD; MII_EMIT; /* clock low; turn off read */ return data; } static unsigned tulip_mii_readreg( tulip_softc_t * const sc, unsigned devaddr, unsigned regno) { unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); unsigned data; csr &= ~(MII_RD|MII_CLK); MII_EMIT; tulip_mii_writebits(sc, MII_PREAMBLE, 32); tulip_mii_writebits(sc, MII_RDCMD, 8); tulip_mii_writebits(sc, devaddr, 5); tulip_mii_writebits(sc, regno, 5); tulip_mii_turnaround(sc, MII_RDCMD); data = tulip_mii_readbits(sc); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_phyregs[regno][0] = data; sc->tulip_dbg.dbg_phyregs[regno][1]++; #endif return data; } static void tulip_mii_writereg( tulip_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data) { unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); csr &= ~(MII_RD|MII_CLK); MII_EMIT; tulip_mii_writebits(sc, MII_PREAMBLE, 32); tulip_mii_writebits(sc, MII_WRCMD, 8); tulip_mii_writebits(sc, devaddr, 5); tulip_mii_writebits(sc, regno, 5); tulip_mii_turnaround(sc, MII_WRCMD); tulip_mii_writebits(sc, data, 16); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_phyregs[regno][2] = data; sc->tulip_dbg.dbg_phyregs[regno][3]++; #endif } #define tulip_mchash(mca) (tulip_crc32(mca, 6) & 0x1FF) #define tulip_srom_crcok(databuf) ( \ ((tulip_crc32(databuf, 126) & 0xFFFFU) ^ 0xFFFFU) == \ ((databuf)[126] | ((databuf)[127] << 8))) static unsigned tulip_crc32( const unsigned char *databuf, size_t datalen) { u_int idx, bit, data, crc = 0xFFFFFFFFUL; for (idx = 0; idx < datalen; idx++) for (data = *databuf++, bit = 0; bit < 8; bit++, data >>= 1) crc = (crc >> 1) ^ (((crc ^ data) & 1) ? TULIP_CRC32_POLY : 0); return crc; } static void tulip_identify_dec_nic( tulip_softc_t * const sc) { strcpy(sc->tulip_boardid, "DEC "); #define D0 4 if (sc->tulip_chipid <= TULIP_DE425) return; if (bcmp(sc->tulip_rombuf + 29, "DE500", 5) == 0 || bcmp(sc->tulip_rombuf + 29, "DE450", 5) == 0) { bcopy(sc->tulip_rombuf + 29, &sc->tulip_boardid[D0], 8); sc->tulip_boardid[D0+8] = ' '; } #undef D0 } static void tulip_identify_znyx_nic( tulip_softc_t * const sc) { unsigned id = 0; strcpy(sc->tulip_boardid, "ZNYX ZX3XX "); if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) { unsigned znyx_ptr; sc->tulip_boardid[8] = '4'; znyx_ptr = sc->tulip_rombuf[124] + 256 * sc->tulip_rombuf[125]; if (znyx_ptr < 26 || znyx_ptr > 116) { sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; return; } /* ZX344 = 0010 .. 0013FF */ if (sc->tulip_rombuf[znyx_ptr] == 0x4A && sc->tulip_rombuf[znyx_ptr + 1] == 0x52 && sc->tulip_rombuf[znyx_ptr + 2] == 0x01) { id = sc->tulip_rombuf[znyx_ptr + 5] + 256 * sc->tulip_rombuf[znyx_ptr + 4]; if ((id >> 8) == (TULIP_ZNYX_ID_ZX342 >> 8)) { sc->tulip_boardid[9] = '2'; if (id == TULIP_ZNYX_ID_ZX342B) { sc->tulip_boardid[10] = 'B'; sc->tulip_boardid[11] = ' '; } sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; } else if (id == TULIP_ZNYX_ID_ZX344) { sc->tulip_boardid[10] = '4'; sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; } else if (id == TULIP_ZNYX_ID_ZX345) { sc->tulip_boardid[9] = (sc->tulip_rombuf[19] > 1) ? '8' : '5'; } else if (id == TULIP_ZNYX_ID_ZX346) { sc->tulip_boardid[9] = '6'; } else if (id == TULIP_ZNYX_ID_ZX351) { sc->tulip_boardid[8] = '5'; sc->tulip_boardid[9] = '1'; } } if (id == 0) { /* * Assume it's a ZX342... */ sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; } return; } sc->tulip_boardid[8] = '1'; if (sc->tulip_chipid == TULIP_21041) { sc->tulip_boardid[10] = '1'; return; } if (sc->tulip_rombuf[32] == 0x4A && sc->tulip_rombuf[33] == 0x52) { id = sc->tulip_rombuf[37] + 256 * sc->tulip_rombuf[36]; if (id == TULIP_ZNYX_ID_ZX312T) { sc->tulip_boardid[9] = '2'; sc->tulip_boardid[10] = 'T'; sc->tulip_boardid[11] = ' '; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; } else if (id == TULIP_ZNYX_ID_ZX314_INTA) { sc->tulip_boardid[9] = '4'; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if (id == TULIP_ZNYX_ID_ZX314) { sc->tulip_boardid[9] = '4'; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; sc->tulip_features |= TULIP_HAVE_BASEROM; } else if (id == TULIP_ZNYX_ID_ZX315_INTA) { sc->tulip_boardid[9] = '5'; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if (id == TULIP_ZNYX_ID_ZX315) { sc->tulip_boardid[9] = '5'; sc->tulip_features |= TULIP_HAVE_BASEROM; } else { id = 0; } } if (id == 0) { if ((sc->tulip_enaddr[3] & ~3) == 0xF0 && (sc->tulip_enaddr[5] & 2) == 0) { sc->tulip_boardid[9] = '4'; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if ((sc->tulip_enaddr[3] & ~3) == 0xF4 && (sc->tulip_enaddr[5] & 1) == 0) { sc->tulip_boardid[9] = '5'; sc->tulip_boardsw = &tulip_21040_boardsw; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if ((sc->tulip_enaddr[3] & ~3) == 0xEC) { sc->tulip_boardid[9] = '2'; sc->tulip_boardsw = &tulip_21040_boardsw; } } } static void tulip_identify_smc_nic( tulip_softc_t * const sc) { u_int32_t id1, id2, ei; int auibnc = 0, utp = 0; char *cp; strcpy(sc->tulip_boardid, "SMC "); if (sc->tulip_chipid == TULIP_21041) return; if (sc->tulip_chipid != TULIP_21040) { if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw) { strcpy(&sc->tulip_boardid[4], "9332DST "); sc->tulip_boardsw = &tulip_21140_smc9332_boardsw; } else if (sc->tulip_features & (TULIP_HAVE_BASEROM|TULIP_HAVE_SLAVEDROM)) { strcpy(&sc->tulip_boardid[4], "9334BDT "); } else { strcpy(&sc->tulip_boardid[4], "9332BDT "); } return; } id1 = sc->tulip_rombuf[0x60] | (sc->tulip_rombuf[0x61] << 8); id2 = sc->tulip_rombuf[0x62] | (sc->tulip_rombuf[0x63] << 8); ei = sc->tulip_rombuf[0x66] | (sc->tulip_rombuf[0x67] << 8); strcpy(&sc->tulip_boardid[4], "8432"); cp = &sc->tulip_boardid[8]; if ((id1 & 1) == 0) *cp++ = 'B', auibnc = 1; if ((id1 & 0xFF) > 0x32) *cp++ = 'T', utp = 1; if ((id1 & 0x4000) == 0) *cp++ = 'A', auibnc = 1; if (id2 == 0x15) { sc->tulip_boardid[7] = '4'; *cp++ = '-'; *cp++ = 'C'; *cp++ = 'H'; *cp++ = (ei ? '2' : '1'); } *cp++ = ' '; *cp = '\0'; if (utp && !auibnc) sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; else if (!utp && auibnc) sc->tulip_boardsw = &tulip_21040_auibnc_only_boardsw; } static void tulip_identify_cogent_nic( tulip_softc_t * const sc) { strcpy(sc->tulip_boardid, "Cogent "); if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) { if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100TX_ID) { strcat(sc->tulip_boardid, "EM100FX "); sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw; } else if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) { strcat(sc->tulip_boardid, "EM100FX "); sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw; } /* * Magic number (0x24001109U) is the SubVendor (0x2400) and * SubDevId (0x1109) for the ANA6944TX (EM440TX). */ if (*(u_int32_t *) sc->tulip_rombuf == 0x24001109U && (sc->tulip_features & TULIP_HAVE_BASEROM)) { /* * Cogent (Adaptec) is still mapping all INTs to INTA of * first 21140. Dumb! Dumb! */ strcat(sc->tulip_boardid, "EM440TX "); sc->tulip_features |= TULIP_HAVE_SHAREDINTR; } } else if (sc->tulip_chipid == TULIP_21040) { sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } } static void tulip_identify_accton_nic( tulip_softc_t * const sc) { strcpy(sc->tulip_boardid, "ACCTON "); switch (sc->tulip_chipid) { case TULIP_21140A: strcat(sc->tulip_boardid, "EN1207 "); sc->tulip_boardsw = &tulip_21140_accton_boardsw; break; case TULIP_21140: strcat(sc->tulip_boardid, "EN1207TX "); sc->tulip_boardsw = &tulip_21140_eb_boardsw; break; case TULIP_21040: strcat(sc->tulip_boardid, "EN1203 "); sc->tulip_boardsw = &tulip_21040_boardsw; break; case TULIP_21041: strcat(sc->tulip_boardid, "EN1203 "); sc->tulip_boardsw = &tulip_21041_boardsw; break; default: sc->tulip_boardsw = &tulip_2114x_isv_boardsw; break; } } static void tulip_identify_asante_nic( tulip_softc_t * const sc) { strcpy(sc->tulip_boardid, "Asante "); if ((sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) && sc->tulip_boardsw != &tulip_2114x_isv_boardsw) { tulip_media_info_t *mi = sc->tulip_mediainfo; int idx; /* * The Asante Fast Ethernet doesn't always ship with a valid * new format SROM. So if isn't in the new format, we cheat * set it up as if we had. */ sc->tulip_gpinit = TULIP_GP_ASANTE_PINS; sc->tulip_gpdata = 0; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PINS|TULIP_GP_PINSET); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PHYRESET); DELAY(100); TULIP_CSR_WRITE(sc, csr_gp, 0); mi->mi_type = TULIP_MEDIAINFO_MII; mi->mi_gpr_length = 0; mi->mi_gpr_offset = 0; mi->mi_reset_length = 0; mi->mi_reset_offset = 0;; mi->mi_phyaddr = TULIP_MII_NOPHY; for (idx = 20; idx > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx--) { DELAY(10000); mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, 0); } if (mi->mi_phyaddr == TULIP_MII_NOPHY) { printf(TULIP_PRINTF_FMT ": can't find phy 0\n", TULIP_PRINTF_ARGS); return; } sc->tulip_features |= TULIP_HAVE_MII; mi->mi_capabilities = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD; mi->mi_advertisement = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD; mi->mi_full_duplex = PHYSTS_10BASET_FD|PHYSTS_100BASETX_FD; mi->mi_tx_threshold = PHYSTS_10BASET|PHYSTS_10BASET_FD; TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET); mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH); sc->tulip_boardsw = &tulip_2114x_isv_boardsw; } } static int tulip_srom_decode( tulip_softc_t * const sc) { unsigned idx1, idx2, idx3; const tulip_srom_header_t *shp = (tulip_srom_header_t *) &sc->tulip_rombuf[0]; const tulip_srom_adapter_info_t *saip = (tulip_srom_adapter_info_t *) (shp + 1); tulip_srom_media_t srom_media; tulip_media_info_t *mi = sc->tulip_mediainfo; const u_int8_t *dp; u_int32_t leaf_offset, blocks, data; for (idx1 = 0; idx1 < shp->sh_adapter_count; idx1++, saip++) { if (shp->sh_adapter_count == 1) break; if (saip->sai_device == sc->tulip_pci_devno) break; } /* * Didn't find the right media block for this card. */ if (idx1 == shp->sh_adapter_count) return 0; /* * Save the hardware address. */ bcopy((caddr_t) shp->sh_ieee802_address, (caddr_t) sc->tulip_enaddr, 6); /* * If this is a multiple port card, add the adapter index to the last * byte of the hardware address. (if it isn't multiport, adding 0 * won't hurt. */ sc->tulip_enaddr[5] += idx1; leaf_offset = saip->sai_leaf_offset_lowbyte + saip->sai_leaf_offset_highbyte * 256; dp = sc->tulip_rombuf + leaf_offset; sc->tulip_conntype = (tulip_srom_connection_t) (dp[0] + dp[1] * 256); dp += 2; for (idx2 = 0;; idx2++) { if (tulip_srom_conninfo[idx2].sc_type == sc->tulip_conntype || tulip_srom_conninfo[idx2].sc_type == TULIP_SROM_CONNTYPE_NOT_USED) break; } sc->tulip_connidx = idx2; if (sc->tulip_chipid == TULIP_21041) { blocks = *dp++; for (idx2 = 0; idx2 < blocks; idx2++) { tulip_media_t media; data = *dp++; srom_media = (tulip_srom_media_t) (data & 0x3F); for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media != TULIP_MEDIA_UNKNOWN) { if (data & TULIP_SROM_21041_EXTENDED) { mi->mi_type = TULIP_MEDIAINFO_SIA; sc->tulip_mediums[media] = mi; mi->mi_sia_connectivity = dp[0] + dp[1] * 256; mi->mi_sia_tx_rx = dp[2] + dp[3] * 256; mi->mi_sia_general = dp[4] + dp[5] * 256; mi++; } else { switch (media) { case TULIP_MEDIA_BNC: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC); mi++; break; } case TULIP_MEDIA_AUI: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI); mi++; break; } case TULIP_MEDIA_10BASET: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET); mi++; break; } case TULIP_MEDIA_10BASET_FD: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD); mi++; break; } default: { break; } } } } if (data & TULIP_SROM_21041_EXTENDED) dp += 6; } #ifdef notdef if (blocks == 0) { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC); mi++; TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI); mi++; TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET); mi++; TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD); mi++; } #endif } else { unsigned length, type; tulip_media_t gp_media = TULIP_MEDIA_UNKNOWN; if (sc->tulip_features & TULIP_HAVE_GPR) sc->tulip_gpinit = *dp++; blocks = *dp++; for (idx2 = 0; idx2 < blocks; idx2++) { const u_int8_t *ep; if ((*dp & 0x80) == 0) { length = 4; type = 0; } else { length = (*dp++ & 0x7f) - 1; type = *dp++ & 0x3f; } ep = dp + length; switch (type & 0x3f) { case 0: { /* 21140[A] GPR block */ tulip_media_t media; srom_media = (tulip_srom_media_t) dp[0]; for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media == TULIP_MEDIA_UNKNOWN) break; mi->mi_type = TULIP_MEDIAINFO_GPR; sc->tulip_mediums[media] = mi; mi->mi_gpdata = dp[1]; if (media > gp_media && !TULIP_IS_MEDIA_FD(media)) { sc->tulip_gpdata = mi->mi_gpdata; gp_media = media; } data = dp[2] + dp[3] * 256; mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data); if (data & TULIP_SROM_2114X_NOINDICATOR) { mi->mi_actmask = 0; } else { #if 0 mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0; #endif mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data); mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask; } mi++; break; } case 1: { /* 21140[A] MII block */ const unsigned phyno = *dp++; mi->mi_type = TULIP_MEDIAINFO_MII; mi->mi_gpr_length = *dp++; mi->mi_gpr_offset = dp - sc->tulip_rombuf; dp += mi->mi_gpr_length; mi->mi_reset_length = *dp++; mi->mi_reset_offset = dp - sc->tulip_rombuf; dp += mi->mi_reset_length; /* * Before we probe for a PHY, use the GPR information * to select it. If we don't, it may be inaccessible. */ TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpinit|TULIP_GP_PINSET); for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx3]); } sc->tulip_phyaddr = mi->mi_phyaddr; for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx3]); } /* * At least write something! */ if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0) TULIP_CSR_WRITE(sc, csr_gp, 0); mi->mi_phyaddr = TULIP_MII_NOPHY; for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) { DELAY(10000); mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno); } if (mi->mi_phyaddr == TULIP_MII_NOPHY) { printf(TULIP_PRINTF_FMT ": can't find phy %d\n", TULIP_PRINTF_ARGS, phyno); break; } sc->tulip_features |= TULIP_HAVE_MII; mi->mi_capabilities = dp[0] + dp[1] * 256; dp += 2; mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2; mi->mi_full_duplex = dp[0] + dp[1] * 256; dp += 2; mi->mi_tx_threshold = dp[0] + dp[1] * 256; dp += 2; TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET); mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH); mi++; break; } case 2: { /* 2114[23] SIA block */ tulip_media_t media; srom_media = (tulip_srom_media_t) dp[0]; for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media == TULIP_MEDIA_UNKNOWN) break; mi->mi_type = TULIP_MEDIAINFO_SIA; sc->tulip_mediums[media] = mi; if (type & 0x40) { mi->mi_sia_connectivity = dp[0] + dp[1] * 256; mi->mi_sia_tx_rx = dp[2] + dp[3] * 256; mi->mi_sia_general = dp[4] + dp[5] * 256; dp += 6; } else { switch (media) { case TULIP_MEDIA_BNC: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, BNC); break; } case TULIP_MEDIA_AUI: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, AUI); break; } case TULIP_MEDIA_10BASET: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET); break; } case TULIP_MEDIA_10BASET_FD: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET_FD); break; } default: { goto bad_media; } } } mi->mi_sia_gp_control = (dp[0] + dp[1] * 256) << 16; mi->mi_sia_gp_data = (dp[2] + dp[3] * 256) << 16; mi++; bad_media: break; } case 3: { /* 2114[23] MII PHY block */ const unsigned phyno = *dp++; const u_int8_t *dp0; mi->mi_type = TULIP_MEDIAINFO_MII; mi->mi_gpr_length = *dp++; mi->mi_gpr_offset = dp - sc->tulip_rombuf; dp += 2 * mi->mi_gpr_length; mi->mi_reset_length = *dp++; mi->mi_reset_offset = dp - sc->tulip_rombuf; dp += 2 * mi->mi_reset_length; dp0 = &sc->tulip_rombuf[mi->mi_reset_offset]; for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++, dp0 += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16); } sc->tulip_phyaddr = mi->mi_phyaddr; dp0 = &sc->tulip_rombuf[mi->mi_gpr_offset]; for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++, dp0 += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16); } if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0) TULIP_CSR_WRITE(sc, csr_sia_general, 0); mi->mi_phyaddr = TULIP_MII_NOPHY; for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) { DELAY(10000); mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno); } if (mi->mi_phyaddr == TULIP_MII_NOPHY) { printf(TULIP_PRINTF_FMT ": can't find phy %d\n", TULIP_PRINTF_ARGS, phyno); break; } sc->tulip_features |= TULIP_HAVE_MII; mi->mi_capabilities = dp[0] + dp[1] * 256; dp += 2; mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2; mi->mi_full_duplex = dp[0] + dp[1] * 256; dp += 2; mi->mi_tx_threshold = dp[0] + dp[1] * 256; dp += 2; mi->mi_mii_interrupt = dp[0] + dp[1] * 256; dp += 2; TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET); mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH); mi++; break; } case 4: { /* 21143 SYM block */ tulip_media_t media; srom_media = (tulip_srom_media_t) dp[0]; for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media == TULIP_MEDIA_UNKNOWN) break; mi->mi_type = TULIP_MEDIAINFO_SYM; sc->tulip_mediums[media] = mi; mi->mi_gpcontrol = (dp[1] + dp[2] * 256) << 16; mi->mi_gpdata = (dp[3] + dp[4] * 256) << 16; data = dp[5] + dp[6] * 256; mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data); if (data & TULIP_SROM_2114X_NOINDICATOR) { mi->mi_actmask = 0; } else { mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0; mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data); mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask; } mi++; break; } #if 0 case 5: { /* 21143 Reset block */ mi->mi_type = TULIP_MEDIAINFO_RESET; mi->mi_reset_length = *dp++; mi->mi_reset_offset = dp - sc->tulip_rombuf; dp += 2 * mi->mi_reset_length; mi++; break; } #endif default: { } } dp = ep; } } return mi - sc->tulip_mediainfo; } static const struct { void (*vendor_identify_nic)(tulip_softc_t * const sc); unsigned char vendor_oui[3]; } tulip_vendors[] = { { tulip_identify_dec_nic, { 0x08, 0x00, 0x2B } }, { tulip_identify_dec_nic, { 0x00, 0x00, 0xF8 } }, { tulip_identify_smc_nic, { 0x00, 0x00, 0xC0 } }, { tulip_identify_smc_nic, { 0x00, 0xE0, 0x29 } }, { tulip_identify_znyx_nic, { 0x00, 0xC0, 0x95 } }, { tulip_identify_cogent_nic, { 0x00, 0x00, 0x92 } }, { tulip_identify_asante_nic, { 0x00, 0x00, 0x94 } }, { tulip_identify_accton_nic, { 0x00, 0x00, 0xE8 } }, { NULL } }; /* * This deals with the vagaries of the address roms and the * brain-deadness that various vendors commit in using them. */ static int tulip_read_macaddr( tulip_softc_t * const sc) { unsigned cksum, rom_cksum, idx; u_int32_t csr; unsigned char tmpbuf[8]; static const u_char testpat[] = { 0xFF, 0, 0x55, 0xAA, 0xFF, 0, 0x55, 0xAA }; sc->tulip_connidx = TULIP_SROM_LASTCONNIDX; if (sc->tulip_chipid == TULIP_21040) { TULIP_CSR_WRITE(sc, csr_enetrom, 1); for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) { int cnt = 0; while (((csr = TULIP_CSR_READ(sc, csr_enetrom)) & 0x80000000L) && cnt < 10000) cnt++; sc->tulip_rombuf[idx] = csr & 0xFF; } sc->tulip_boardsw = &tulip_21040_boardsw; #if defined(TULIP_EISA) } else if (sc->tulip_chipid == TULIP_DE425) { int cnt; for (idx = 0, cnt = 0; idx < sizeof(testpat) && cnt < 32; cnt++) { tmpbuf[idx] = TULIP_CSR_READBYTE(sc, csr_enetrom); if (tmpbuf[idx] == testpat[idx]) ++idx; else idx = 0; } for (idx = 0; idx < 32; idx++) sc->tulip_rombuf[idx] = TULIP_CSR_READBYTE(sc, csr_enetrom); sc->tulip_boardsw = &tulip_21040_boardsw; #endif /* TULIP_EISA */ } else { if (sc->tulip_chipid == TULIP_21041) { /* * Thankfully all 21041's act the same. */ sc->tulip_boardsw = &tulip_21041_boardsw; } else { /* * Assume all 21140 board are compatible with the * DEC 10/100 evaluation board. Not really valid but * it's the best we can do until every one switches to * the new SROM format. */ sc->tulip_boardsw = &tulip_21140_eb_boardsw; } tulip_srom_read(sc); if (tulip_srom_crcok(sc->tulip_rombuf)) { /* * SROM CRC is valid therefore it must be in the * new format. */ sc->tulip_features |= TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM; } else if (sc->tulip_rombuf[126] == 0xff && sc->tulip_rombuf[127] == 0xFF) { /* * No checksum is present. See if the SROM id checks out; * the first 18 bytes should be 0 followed by a 1 followed * by the number of adapters (which we don't deal with yet). */ for (idx = 0; idx < 18; idx++) { if (sc->tulip_rombuf[idx] != 0) break; } if (idx == 18 && sc->tulip_rombuf[18] == 1 && sc->tulip_rombuf[19] != 0) sc->tulip_features |= TULIP_HAVE_ISVSROM; } else if (sc->tulip_chipid >= TULIP_21142) { sc->tulip_features |= TULIP_HAVE_ISVSROM; sc->tulip_boardsw = &tulip_2114x_isv_boardsw; } if ((sc->tulip_features & TULIP_HAVE_ISVSROM) && tulip_srom_decode(sc)) { if (sc->tulip_chipid != TULIP_21041) sc->tulip_boardsw = &tulip_2114x_isv_boardsw; /* * If the SROM specifies more than one adapter, tag this as a * BASE rom. */ if (sc->tulip_rombuf[19] > 1) sc->tulip_features |= TULIP_HAVE_BASEROM; if (sc->tulip_boardsw == NULL) return -6; goto check_oui; } } if (bcmp(&sc->tulip_rombuf[0], &sc->tulip_rombuf[16], 8) != 0) { /* * Some folks don't use the standard ethernet rom format * but instead just put the address in the first 6 bytes * of the rom and let the rest be all 0xffs. (Can we say * ZNYX???) (well sometimes they put in a checksum so we'll * start at 8). */ for (idx = 8; idx < 32; idx++) { if (sc->tulip_rombuf[idx] != 0xFF) return -4; } /* * Make sure the address is not multicast or locally assigned * that the OUI is not 00-00-00. */ if ((sc->tulip_rombuf[0] & 3) != 0) return -4; if (sc->tulip_rombuf[0] == 0 && sc->tulip_rombuf[1] == 0 && sc->tulip_rombuf[2] == 0) return -4; bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6); sc->tulip_features |= TULIP_HAVE_OKROM; goto check_oui; } else { /* * A number of makers of multiport boards (ZNYX and Cogent) * only put on one address ROM on their 21040 boards. So * if the ROM is all zeros (or all 0xFFs), look at the * previous configured boards (as long as they are on the same * PCI bus and the bus number is non-zero) until we find the * master board with address ROM. We then use its address ROM * as the base for this board. (we add our relative board * to the last byte of its address). */ for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) { if (sc->tulip_rombuf[idx] != 0 && sc->tulip_rombuf[idx] != 0xFF) break; } if (idx == sizeof(sc->tulip_rombuf)) { int root_unit; tulip_softc_t *root_sc = NULL; for (root_unit = sc->tulip_unit - 1; root_unit >= 0; root_unit--) { root_sc = TULIP_UNIT_TO_SOFTC(root_unit); if (root_sc == NULL || (root_sc->tulip_features & (TULIP_HAVE_OKROM|TULIP_HAVE_SLAVEDROM)) == TULIP_HAVE_OKROM) break; root_sc = NULL; } if (root_sc != NULL && (root_sc->tulip_features & TULIP_HAVE_BASEROM) && root_sc->tulip_chipid == sc->tulip_chipid && root_sc->tulip_pci_busno == sc->tulip_pci_busno) { sc->tulip_features |= TULIP_HAVE_SLAVEDROM; sc->tulip_boardsw = root_sc->tulip_boardsw; strcpy(sc->tulip_boardid, root_sc->tulip_boardid); if (sc->tulip_boardsw->bd_type == TULIP_21140_ISV) { bcopy(root_sc->tulip_rombuf, sc->tulip_rombuf, sizeof(sc->tulip_rombuf)); if (!tulip_srom_decode(sc)) return -5; } else { bcopy(root_sc->tulip_enaddr, sc->tulip_enaddr, 6); sc->tulip_enaddr[5] += sc->tulip_unit - root_sc->tulip_unit; } /* * Now for a truly disgusting kludge: all 4 21040s on * the ZX314 share the same INTA line so the mapping * setup by the BIOS on the PCI bridge is worthless. * Rather than reprogramming the value in the config * register, we will handle this internally. */ if (root_sc->tulip_features & TULIP_HAVE_SHAREDINTR) { sc->tulip_slaves = root_sc->tulip_slaves; root_sc->tulip_slaves = sc; sc->tulip_features |= TULIP_HAVE_SLAVEDINTR; } return 0; } } } /* * This is the standard DEC address ROM test. */ if (bcmp(&sc->tulip_rombuf[24], testpat, 8) != 0) return -3; tmpbuf[0] = sc->tulip_rombuf[15]; tmpbuf[1] = sc->tulip_rombuf[14]; tmpbuf[2] = sc->tulip_rombuf[13]; tmpbuf[3] = sc->tulip_rombuf[12]; tmpbuf[4] = sc->tulip_rombuf[11]; tmpbuf[5] = sc->tulip_rombuf[10]; tmpbuf[6] = sc->tulip_rombuf[9]; tmpbuf[7] = sc->tulip_rombuf[8]; if (bcmp(&sc->tulip_rombuf[0], tmpbuf, 8) != 0) return -2; bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6); cksum = *(u_int16_t *) &sc->tulip_enaddr[0]; cksum *= 2; if (cksum > 65535) cksum -= 65535; cksum += *(u_int16_t *) &sc->tulip_enaddr[2]; if (cksum > 65535) cksum -= 65535; cksum *= 2; if (cksum > 65535) cksum -= 65535; cksum += *(u_int16_t *) &sc->tulip_enaddr[4]; if (cksum >= 65535) cksum -= 65535; rom_cksum = *(u_int16_t *) &sc->tulip_rombuf[6]; if (cksum != rom_cksum) return -1; check_oui: /* * Check for various boards based on OUI. Did I say braindead? */ for (idx = 0; tulip_vendors[idx].vendor_identify_nic != NULL; idx++) { if (bcmp((caddr_t) sc->tulip_enaddr, (caddr_t) tulip_vendors[idx].vendor_oui, 3) == 0) { (*tulip_vendors[idx].vendor_identify_nic)(sc); break; } } sc->tulip_features |= TULIP_HAVE_OKROM; return 0; } #if defined(IFM_ETHER) static void tulip_ifmedia_add( tulip_softc_t * const sc) { tulip_media_t media; int medias = 0; for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL) { ifmedia_add(&sc->tulip_ifmedia, tulip_media_to_ifmedia[media], 0, 0); medias++; } } if (medias == 0) { sc->tulip_features |= TULIP_HAVE_NOMEDIA; ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE, 0, 0); ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE); } else if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) { ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO, 0, 0); ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO); } else { ifmedia_set(&sc->tulip_ifmedia, tulip_media_to_ifmedia[sc->tulip_media]); sc->tulip_flags |= TULIP_PRINTMEDIA; tulip_linkup(sc, sc->tulip_media); } } static int tulip_ifmedia_change( struct ifnet * const ifp) { tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); sc->tulip_flags |= TULIP_NEEDRESET; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_media = TULIP_MEDIA_UNKNOWN; if (IFM_SUBTYPE(sc->tulip_ifmedia.ifm_media) != IFM_AUTO) { tulip_media_t media; for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL && sc->tulip_ifmedia.ifm_media == tulip_media_to_ifmedia[media]) { sc->tulip_flags |= TULIP_PRINTMEDIA; sc->tulip_flags &= ~TULIP_DIDNWAY; tulip_linkup(sc, media); return 0; } } } sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_WANTRXACT); tulip_reset(sc); tulip_init(sc); return 0; } /* * Media status callback */ static void tulip_ifmedia_status( struct ifnet * const ifp, struct ifmediareq *req) { tulip_softc_t *sc = TULIP_IFP_TO_SOFTC(ifp); #if defined(__bsdi__) if (sc->tulip_mii.mii_instance != 0) { mii_pollstat(&sc->tulip_mii); req->ifm_active = sc->tulip_mii.mii_media_active; req->ifm_status = sc->tulip_mii.mii_media_status; return; } #endif if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) return; req->ifm_status = IFM_AVALID; if (sc->tulip_flags & TULIP_LINKUP) req->ifm_status |= IFM_ACTIVE; req->ifm_active = tulip_media_to_ifmedia[sc->tulip_media]; } #endif static void tulip_addr_filter( tulip_softc_t * const sc) { #if defined(__FreeBSD__) && __FreeBSD__ >= 3 struct ifmultiaddr *ifma; u_char *addrp; #else struct ether_multistep step; struct ether_multi *enm; #endif int multicnt; sc->tulip_flags &= ~(TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY|TULIP_ALLMULTI); sc->tulip_flags |= TULIP_WANTSETUP|TULIP_WANTTXSTART; sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN; sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED; #if defined(IFF_ALLMULTI) sc->tulip_if.if_flags &= ~IFF_ALLMULTI; #endif #if defined(__FreeBSD__) && __FreeBSD__ >= 3 multicnt = 0; for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL; ifma = ifma->ifma_link.le_next) { if (ifma->ifma_addr->sa_family == AF_LINK) multicnt++; } #else multicnt = sc->tulip_multicnt; #endif sc->tulip_if.if_start = tulip_ifstart; /* so the setup packet gets queued */ if (multicnt > 14) { u_int32_t *sp = sc->tulip_setupdata; unsigned hash; /* * Some early passes of the 21140 have broken implementations of * hash-perfect mode. When we get too many multicasts for perfect * filtering with these chips, we need to switch into hash-only * mode (this is better than all-multicast on network with lots * of multicast traffic). */ if (sc->tulip_features & TULIP_HAVE_BROKEN_HASH) sc->tulip_flags |= TULIP_WANTHASHONLY; else sc->tulip_flags |= TULIP_WANTHASHPERFECT; /* * If we have more than 14 multicasts, we have * go into hash perfect mode (512 bit multicast * hash and one perfect hardware). */ bzero(sc->tulip_setupdata, sizeof(sc->tulip_setupdata)); #if defined(__FreeBSD__) && __FreeBSD__ >= 3 for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL; ifma = ifma->ifma_link.le_next) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; hash = tulip_mchash(LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); sp[hash >> 4] |= 1 << (hash & 0xF); } #else ETHER_FIRST_MULTI(step, TULIP_ETHERCOM(sc), enm); while (enm != NULL) { if (bcmp(enm->enm_addrlo, enm->enm_addrhi, 6) == 0) { hash = tulip_mchash(enm->enm_addrlo); sp[hash >> 4] |= 1 << (hash & 0xF); } else { sc->tulip_flags |= TULIP_ALLMULTI; sc->tulip_flags &= ~(TULIP_WANTHASHONLY|TULIP_WANTHASHPERFECT); break; } ETHER_NEXT_MULTI(step, enm); } #endif /* * No reason to use a hash if we are going to be * receiving every multicast. */ if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) { hash = tulip_mchash(etherbroadcastaddr); sp[hash >> 4] |= 1 << (hash & 0xF); if (sc->tulip_flags & TULIP_WANTHASHONLY) { hash = tulip_mchash(sc->tulip_enaddr); sp[hash >> 4] |= 1 << (hash & 0xF); } else { sp[39] = ((u_int16_t *) sc->tulip_enaddr)[0]; sp[40] = ((u_int16_t *) sc->tulip_enaddr)[1]; sp[41] = ((u_int16_t *) sc->tulip_enaddr)[2]; } } } if ((sc->tulip_flags & (TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY)) == 0) { u_int32_t *sp = sc->tulip_setupdata; int idx = 0; if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) { /* * Else can get perfect filtering for 16 addresses. */ #if defined(__FreeBSD__) && __FreeBSD__ >= 3 for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL; ifma = ifma->ifma_link.le_next) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; addrp = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); *sp++ = ((u_int16_t *) addrp)[0]; *sp++ = ((u_int16_t *) addrp)[1]; *sp++ = ((u_int16_t *) addrp)[2]; idx++; } #else ETHER_FIRST_MULTI(step, TULIP_ETHERCOM(sc), enm); for (; enm != NULL; idx++) { if (bcmp(enm->enm_addrlo, enm->enm_addrhi, 6) == 0) { *sp++ = ((u_int16_t *) enm->enm_addrlo)[0]; *sp++ = ((u_int16_t *) enm->enm_addrlo)[1]; *sp++ = ((u_int16_t *) enm->enm_addrlo)[2]; } else { sc->tulip_flags |= TULIP_ALLMULTI; break; } ETHER_NEXT_MULTI(step, enm); } #endif /* * Add the broadcast address. */ idx++; *sp++ = 0xFFFF; *sp++ = 0xFFFF; *sp++ = 0xFFFF; } /* * Pad the rest with our hardware address */ for (; idx < 16; idx++) { *sp++ = ((u_int16_t *) sc->tulip_enaddr)[0]; *sp++ = ((u_int16_t *) sc->tulip_enaddr)[1]; *sp++ = ((u_int16_t *) sc->tulip_enaddr)[2]; } } #if defined(IFF_ALLMULTI) if (sc->tulip_flags & TULIP_ALLMULTI) sc->tulip_if.if_flags |= IFF_ALLMULTI; #endif } static void tulip_reset( tulip_softc_t * const sc) { tulip_ringinfo_t *ri; tulip_desc_t *di; u_int32_t inreset = (sc->tulip_flags & TULIP_INRESET); /* * Brilliant. Simply brilliant. When switching modes/speeds * on a 2114*, you need to set the appriopriate MII/PCS/SCL/PS * bits in CSR6 and then do a software reset to get the 21140 * to properly reset its internal pathways to the right places. * Grrrr. */ if (sc->tulip_boardsw->bd_media_preset != NULL) (*sc->tulip_boardsw->bd_media_preset)(sc); TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(10); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ if (!inreset) { sc->tulip_flags |= TULIP_INRESET; sc->tulip_flags &= ~(TULIP_NEEDRESET|TULIP_RXBUFSLOW); sc->tulip_if.if_flags &= ~IFF_OACTIVE; } TULIP_CSR_WRITE(sc, csr_txlist, TULIP_KVATOPHYS(sc, &sc->tulip_txinfo.ri_first[0])); TULIP_CSR_WRITE(sc, csr_rxlist, TULIP_KVATOPHYS(sc, &sc->tulip_rxinfo.ri_first[0])); TULIP_CSR_WRITE(sc, csr_busmode, (1 << (TULIP_BURSTSIZE(sc->tulip_unit) + 8)) |TULIP_BUSMODE_CACHE_ALIGN8 |TULIP_BUSMODE_READMULTIPLE |(BYTE_ORDER != LITTLE_ENDIAN ? TULIP_BUSMODE_BIGENDIAN : 0)); sc->tulip_txtimer = 0; sc->tulip_txq.ifq_maxlen = TULIP_TXDESCS; /* * Free all the mbufs that were on the transmit ring. */ for (;;) { struct mbuf *m; IF_DEQUEUE(&sc->tulip_txq, m); if (m == NULL) break; m_freem(m); } ri = &sc->tulip_txinfo; ri->ri_nextin = ri->ri_nextout = ri->ri_first; ri->ri_free = ri->ri_max; for (di = ri->ri_first; di < ri->ri_last; di++) di->d_status = 0; /* * We need to collect all the mbufs were on the * receive ring before we reinit it either to put * them back on or to know if we have to allocate * more. */ ri = &sc->tulip_rxinfo; ri->ri_nextin = ri->ri_nextout = ri->ri_first; ri->ri_free = ri->ri_max; for (di = ri->ri_first; di < ri->ri_last; di++) { di->d_status = 0; di->d_length1 = 0; di->d_addr1 = 0; di->d_length2 = 0; di->d_addr2 = 0; } for (;;) { struct mbuf *m; IF_DEQUEUE(&sc->tulip_rxq, m); if (m == NULL) break; m_freem(m); } /* * If tulip_reset is being called recurisvely, exit quickly knowing * that when the outer tulip_reset returns all the right stuff will * have happened. */ if (inreset) return; sc->tulip_intrmask |= TULIP_STS_NORMALINTR|TULIP_STS_RXINTR|TULIP_STS_TXINTR |TULIP_STS_ABNRMLINTR|TULIP_STS_SYSERROR|TULIP_STS_TXSTOPPED |TULIP_STS_TXUNDERFLOW|TULIP_STS_TXBABBLE|TULIP_STS_LINKFAIL |TULIP_STS_RXSTOPPED; if ((sc->tulip_flags & TULIP_DEVICEPROBE) == 0) (*sc->tulip_boardsw->bd_media_select)(sc); #if defined(TULIP_DEBUG) if ((sc->tulip_flags & TULIP_NEEDRESET) == TULIP_NEEDRESET) printf(TULIP_PRINTF_FMT ": tulip_reset: additional reset needed?!?\n", TULIP_PRINTF_ARGS); #endif tulip_media_print(sc); if (sc->tulip_features & TULIP_HAVE_DUALSENSE) TULIP_CSR_WRITE(sc, csr_sia_status, TULIP_CSR_READ(sc, csr_sia_status)); sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_WANTSETUP|TULIP_INRESET |TULIP_RXACT); tulip_addr_filter(sc); } static void tulip_init( tulip_softc_t * const sc) { if (sc->tulip_if.if_flags & IFF_UP) { if ((sc->tulip_if.if_flags & IFF_RUNNING) == 0) { /* initialize the media */ tulip_reset(sc); } sc->tulip_if.if_flags |= IFF_RUNNING; if (sc->tulip_if.if_flags & IFF_PROMISC) { sc->tulip_flags |= TULIP_PROMISC; sc->tulip_cmdmode |= TULIP_CMD_PROMISCUOUS; sc->tulip_intrmask |= TULIP_STS_TXINTR; } else { sc->tulip_flags &= ~TULIP_PROMISC; sc->tulip_cmdmode &= ~TULIP_CMD_PROMISCUOUS; if (sc->tulip_flags & TULIP_ALLMULTI) { sc->tulip_cmdmode |= TULIP_CMD_ALLMULTI; } else { sc->tulip_cmdmode &= ~TULIP_CMD_ALLMULTI; } } sc->tulip_cmdmode |= TULIP_CMD_TXRUN; if ((sc->tulip_flags & (TULIP_TXPROBE_ACTIVE|TULIP_WANTSETUP)) == 0) { tulip_rx_intr(sc); sc->tulip_cmdmode |= TULIP_CMD_RXRUN; sc->tulip_intrmask |= TULIP_STS_RXSTOPPED; } else { sc->tulip_if.if_flags |= IFF_OACTIVE; sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN; sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED; } TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP) tulip_txput_setup(sc); } else { sc->tulip_if.if_flags &= ~IFF_RUNNING; tulip_reset(sc); } } static void tulip_rx_intr( tulip_softc_t * const sc) { TULIP_PERFSTART(rxintr) tulip_ringinfo_t * const ri = &sc->tulip_rxinfo; struct ifnet * const ifp = &sc->tulip_if; int fillok = 1; #if defined(TULIP_DEBUG) int cnt = 0; #endif for (;;) { TULIP_PERFSTART(rxget) struct ether_header eh; tulip_desc_t *eop = ri->ri_nextin; int total_len = 0, last_offset = 0; struct mbuf *ms = NULL, *me = NULL; int accept = 0; if (fillok && sc->tulip_rxq.ifq_len < TULIP_RXQ_TARGET) goto queue_mbuf; #if defined(TULIP_DEBUG) if (cnt == ri->ri_max) break; #endif /* * If the TULIP has no descriptors, there can't be any receive * descriptors to process. */ if (eop == ri->ri_nextout) break; /* * 90% of the packets will fit in one descriptor. So we optimize * for that case. */ if ((((volatile tulip_desc_t *) eop)->d_status & (TULIP_DSTS_OWNER|TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) == (TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) { IF_DEQUEUE(&sc->tulip_rxq, ms); me = ms; } else { /* * If still owned by the TULIP, don't touch it. */ if (((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_OWNER) break; /* * It is possible (though improbable unless the BIG_PACKET support * is enabled or MCLBYTES < 1518) for a received packet to cross * more than one receive descriptor. */ while ((((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_RxLASTDESC) == 0) { if (++eop == ri->ri_last) eop = ri->ri_first; if (eop == ri->ri_nextout || ((((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_OWNER))) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_rxintrs++; sc->tulip_dbg.dbg_rxpktsperintr[cnt]++; #endif TULIP_PERFEND(rxget); TULIP_PERFEND(rxintr); return; } total_len++; } /* * Dequeue the first buffer for the start of the packet. Hopefully * this will be the only one we need to dequeue. However, if the * packet consumed multiple descriptors, then we need to dequeue * those buffers and chain to the starting mbuf. All buffers but * the last buffer have the same length so we can set that now. * (we add to last_offset instead of multiplying since we normally * won't go into the loop and thereby saving a ourselves from * doing a multiplication by 0 in the normal case). */ IF_DEQUEUE(&sc->tulip_rxq, ms); for (me = ms; total_len > 0; total_len--) { me->m_len = TULIP_RX_BUFLEN; last_offset += TULIP_RX_BUFLEN; IF_DEQUEUE(&sc->tulip_rxq, me->m_next); me = me->m_next; } } /* * Now get the size of received packet (minus the CRC). */ total_len = ((eop->d_status >> 16) & 0x7FFF) - 4; if ((sc->tulip_flags & TULIP_RXIGNORE) == 0 && ((eop->d_status & TULIP_DSTS_ERRSUM) == 0 #ifdef BIG_PACKET || (total_len <= sc->tulip_if.if_mtu + sizeof(struct ether_header) && (eop->d_status & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxRUNT| TULIP_DSTS_RxCOLLSEEN|TULIP_DSTS_RxBADCRC| TULIP_DSTS_RxOVERFLOW)) == 0) #endif )) { me->m_len = total_len - last_offset; eh = *mtod(ms, struct ether_header *); #if NBPFILTER > 0 if (sc->tulip_bpf != NULL) if (me == ms) TULIP_BPF_TAP(sc, mtod(ms, caddr_t), total_len); else TULIP_BPF_MTAP(sc, ms); #endif sc->tulip_flags |= TULIP_RXACT; if ((sc->tulip_flags & (TULIP_PROMISC|TULIP_HASHONLY)) && (eh.ether_dhost[0] & 1) == 0 && !TULIP_ADDREQUAL(eh.ether_dhost, sc->tulip_enaddr)) goto next; accept = 1; total_len -= sizeof(struct ether_header); } else { ifp->if_ierrors++; if (eop->d_status & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxOVERFLOW|TULIP_DSTS_RxWATCHDOG)) { sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++; } else { const char *error = NULL; if (eop->d_status & TULIP_DSTS_RxTOOLONG) { sc->tulip_dot3stats.dot3StatsFrameTooLongs++; error = "frame too long"; } if (eop->d_status & TULIP_DSTS_RxBADCRC) { if (eop->d_status & TULIP_DSTS_RxDRBBLBIT) { sc->tulip_dot3stats.dot3StatsAlignmentErrors++; error = "alignment error"; } else { sc->tulip_dot3stats.dot3StatsFCSErrors++; error = "bad crc"; } } if (error != NULL && (sc->tulip_flags & TULIP_NOMESSAGES) == 0) { printf(TULIP_PRINTF_FMT ": receive: " TULIP_EADDR_FMT ": %s\n", TULIP_PRINTF_ARGS, TULIP_EADDR_ARGS(mtod(ms, u_char *) + 6), error); sc->tulip_flags |= TULIP_NOMESSAGES; } } } next: #if defined(TULIP_DEBUG) cnt++; #endif ifp->if_ipackets++; if (++eop == ri->ri_last) eop = ri->ri_first; ri->ri_nextin = eop; queue_mbuf: /* * Either we are priming the TULIP with mbufs (m == NULL) * or we are about to accept an mbuf for the upper layers * so we need to allocate an mbuf to replace it. If we * can't replace it, send up it anyways. This may cause * us to drop packets in the future but that's better than * being caught in livelock. * * Note that if this packet crossed multiple descriptors * we don't even try to reallocate all the mbufs here. * Instead we rely on the test of the beginning of * the loop to refill for the extra consumed mbufs. */ if (accept || ms == NULL) { struct mbuf *m0; MGETHDR(m0, M_DONTWAIT, MT_DATA); if (m0 != NULL) { #if defined(TULIP_COPY_RXDATA) if (!accept || total_len >= MHLEN) { #endif MCLGET(m0, M_DONTWAIT); if ((m0->m_flags & M_EXT) == 0) { m_freem(m0); m0 = NULL; } #if defined(TULIP_COPY_RXDATA) } #endif } if (accept #if defined(TULIP_COPY_RXDATA) && m0 != NULL #endif ) { #if defined(__bsdi__) eh.ether_type = ntohs(eh.ether_type); #endif #if !defined(TULIP_COPY_RXDATA) ms->m_data += sizeof(struct ether_header); ms->m_len -= sizeof(struct ether_header); ms->m_pkthdr.len = total_len; ms->m_pkthdr.rcvif = ifp; ether_input(ifp, &eh, ms); #else #ifdef BIG_PACKET #error BIG_PACKET is incompatible with TULIP_COPY_RXDATA #endif if (ms == me) bcopy(mtod(ms, caddr_t) + sizeof(struct ether_header), mtod(m0, caddr_t), total_len); else m_copydata(ms, 0, total_len, mtod(m0, caddr_t)); m0->m_len = m0->m_pkthdr.len = total_len; m0->m_pkthdr.rcvif = ifp; ether_input(ifp, &eh, m0); m0 = ms; #endif } ms = m0; } if (ms == NULL) { /* * Couldn't allocate a new buffer. Don't bother * trying to replenish the receive queue. */ fillok = 0; sc->tulip_flags |= TULIP_RXBUFSLOW; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_rxlowbufs++; #endif TULIP_PERFEND(rxget); continue; } /* * Now give the buffer(s) to the TULIP and save in our * receive queue. */ do { ri->ri_nextout->d_length1 = TULIP_RX_BUFLEN; ri->ri_nextout->d_addr1 = TULIP_KVATOPHYS(sc, mtod(ms, caddr_t)); ri->ri_nextout->d_status = TULIP_DSTS_OWNER; if (++ri->ri_nextout == ri->ri_last) ri->ri_nextout = ri->ri_first; me = ms->m_next; ms->m_next = NULL; IF_ENQUEUE(&sc->tulip_rxq, ms); } while ((ms = me) != NULL); if (sc->tulip_rxq.ifq_len >= TULIP_RXQ_TARGET) sc->tulip_flags &= ~TULIP_RXBUFSLOW; TULIP_PERFEND(rxget); } #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_rxintrs++; sc->tulip_dbg.dbg_rxpktsperintr[cnt]++; #endif TULIP_PERFEND(rxintr); } static int tulip_tx_intr( tulip_softc_t * const sc) { TULIP_PERFSTART(txintr) tulip_ringinfo_t * const ri = &sc->tulip_txinfo; struct mbuf *m; int xmits = 0; int descs = 0; while (ri->ri_free < ri->ri_max) { u_int32_t d_flag; if (((volatile tulip_desc_t *) ri->ri_nextin)->d_status & TULIP_DSTS_OWNER) break; d_flag = ri->ri_nextin->d_flag; if (d_flag & TULIP_DFLAG_TxLASTSEG) { if (d_flag & TULIP_DFLAG_TxSETUPPKT) { /* * We've just finished processing a setup packet. * Mark that we finished it. If there's not * another pending, startup the TULIP receiver. * Make sure we ack the RXSTOPPED so we won't get * an abormal interrupt indication. */ sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_HASHONLY); if (ri->ri_nextin->d_flag & TULIP_DFLAG_TxINVRSFILT) sc->tulip_flags |= TULIP_HASHONLY; if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == 0) { tulip_rx_intr(sc); sc->tulip_cmdmode |= TULIP_CMD_RXRUN; sc->tulip_intrmask |= TULIP_STS_RXSTOPPED; TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } } else { const u_int32_t d_status = ri->ri_nextin->d_status; IF_DEQUEUE(&sc->tulip_txq, m); if (m != NULL) { #if NBPFILTER > 0 if (sc->tulip_bpf != NULL) TULIP_BPF_MTAP(sc, m); #endif m_freem(m); #if defined(TULIP_DEBUG) } else { printf(TULIP_PRINTF_FMT ": tx_intr: failed to dequeue mbuf?!?\n", TULIP_PRINTF_ARGS); #endif } if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) { tulip_mediapoll_event_t event = TULIP_MEDIAPOLL_TXPROBE_OK; if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxEXCCOLL)) { #if defined(TULIP_DEBUG) if (d_status & TULIP_DSTS_TxNOCARR) sc->tulip_dbg.dbg_txprobe_nocarr++; if (d_status & TULIP_DSTS_TxEXCCOLL) sc->tulip_dbg.dbg_txprobe_exccoll++; #endif event = TULIP_MEDIAPOLL_TXPROBE_FAILED; } (*sc->tulip_boardsw->bd_media_poll)(sc, event); /* * Escape from the loop before media poll has reset the TULIP! */ break; } else { xmits++; if (d_status & TULIP_DSTS_ERRSUM) { sc->tulip_if.if_oerrors++; if (d_status & TULIP_DSTS_TxEXCCOLL) sc->tulip_dot3stats.dot3StatsExcessiveCollisions++; if (d_status & TULIP_DSTS_TxLATECOLL) sc->tulip_dot3stats.dot3StatsLateCollisions++; if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxCARRLOSS)) sc->tulip_dot3stats.dot3StatsCarrierSenseErrors++; if (d_status & (TULIP_DSTS_TxUNDERFLOW|TULIP_DSTS_TxBABBLE)) sc->tulip_dot3stats.dot3StatsInternalMacTransmitErrors++; if (d_status & TULIP_DSTS_TxUNDERFLOW) sc->tulip_dot3stats.dot3StatsInternalTransmitUnderflows++; if (d_status & TULIP_DSTS_TxBABBLE) sc->tulip_dot3stats.dot3StatsInternalTransmitBabbles++; } else { u_int32_t collisions = (d_status & TULIP_DSTS_TxCOLLMASK) >> TULIP_DSTS_V_TxCOLLCNT; sc->tulip_if.if_collisions += collisions; if (collisions == 1) sc->tulip_dot3stats.dot3StatsSingleCollisionFrames++; else if (collisions > 1) sc->tulip_dot3stats.dot3StatsMultipleCollisionFrames++; else if (d_status & TULIP_DSTS_TxDEFERRED) sc->tulip_dot3stats.dot3StatsDeferredTransmissions++; /* * SQE is only valid for 10baseT/BNC/AUI when not * running in full-duplex. In order to speed up the * test, the corresponding bit in tulip_flags needs to * set as well to get us to count SQE Test Errors. */ if (d_status & TULIP_DSTS_TxNOHRTBT & sc->tulip_flags) sc->tulip_dot3stats.dot3StatsSQETestErrors++; } } } } if (++ri->ri_nextin == ri->ri_last) ri->ri_nextin = ri->ri_first; ri->ri_free++; descs++; if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0) sc->tulip_if.if_flags &= ~IFF_OACTIVE; } /* * If nothing left to transmit, disable the timer. * Else if progress, reset the timer back to 2 ticks. */ if (ri->ri_free == ri->ri_max || (sc->tulip_flags & TULIP_TXPROBE_ACTIVE)) sc->tulip_txtimer = 0; else if (xmits > 0) sc->tulip_txtimer = TULIP_TXTIMER; sc->tulip_if.if_opackets += xmits; TULIP_PERFEND(txintr); return descs; } static void tulip_print_abnormal_interrupt( tulip_softc_t * const sc, u_int32_t csr) { const char * const *msgp = tulip_status_bits; const char *sep; u_int32_t mask; const char thrsh[] = "72|128\0\0\096|256\0\0\0128|512\0\0160|1024\0"; csr &= (1 << (sizeof(tulip_status_bits)/sizeof(tulip_status_bits[0]))) - 1; printf(TULIP_PRINTF_FMT ": abnormal interrupt:", TULIP_PRINTF_ARGS); for (sep = " ", mask = 1; mask <= csr; mask <<= 1, msgp++) { if ((csr & mask) && *msgp != NULL) { printf("%s%s", sep, *msgp); if (mask == TULIP_STS_TXUNDERFLOW && (sc->tulip_flags & TULIP_NEWTXTHRESH)) { sc->tulip_flags &= ~TULIP_NEWTXTHRESH; if (sc->tulip_cmdmode & TULIP_CMD_STOREFWD) { printf(" (switching to store-and-forward mode)"); } else { printf(" (raising TX threshold to %s)", &thrsh[9 * ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) >> 14)]); } } sep = ", "; } } printf("\n"); } static void tulip_intr_handler( tulip_softc_t * const sc, int *progress_p) { TULIP_PERFSTART(intr) u_int32_t csr; #if defined(__NetBSD__) && !defined(TULIP_USE_SOFTINTR) int only_once; only_once = 1; #endif while ((csr = TULIP_CSR_READ(sc, csr_status)) & sc->tulip_intrmask) { #if defined(__NetBSD__) && !defined(TULIP_USE_SOFTINTR) if (only_once == 1) { #if NRND > 0 rnd_add_uint32(&sc->tulip_rndsource, csr); #endif only_once = 0; } #endif *progress_p = 1; TULIP_CSR_WRITE(sc, csr_status, csr); if (csr & TULIP_STS_SYSERROR) { sc->tulip_last_system_error = (csr & TULIP_STS_ERRORMASK) >> TULIP_STS_ERR_SHIFT; if (sc->tulip_flags & TULIP_NOMESSAGES) { sc->tulip_flags |= TULIP_SYSTEMERROR; } else { printf(TULIP_PRINTF_FMT ": system error: %s\n", TULIP_PRINTF_ARGS, tulip_system_errors[sc->tulip_last_system_error]); } sc->tulip_flags |= TULIP_NEEDRESET; sc->tulip_system_errors++; break; } if (csr & (TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL)) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_intrs++; #endif if (sc->tulip_boardsw->bd_media_poll != NULL) { (*sc->tulip_boardsw->bd_media_poll)(sc, csr & TULIP_STS_LINKFAIL ? TULIP_MEDIAPOLL_LINKFAIL : TULIP_MEDIAPOLL_LINKPASS); csr &= ~TULIP_STS_ABNRMLINTR; } tulip_media_print(sc); } if (csr & (TULIP_STS_RXINTR|TULIP_STS_RXNOBUF)) { u_int32_t misses = TULIP_CSR_READ(sc, csr_missed_frames); if (csr & TULIP_STS_RXNOBUF) sc->tulip_dot3stats.dot3StatsMissedFrames += misses & 0xFFFF; /* * Pass 2.[012] of the 21140A-A[CDE] may hang and/or corrupt data * on receive overflows. */ if ((misses & 0x0FFE0000) && (sc->tulip_features & TULIP_HAVE_RXBADOVRFLW)) { sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++; /* * Stop the receiver process and spin until it's stopped. * Tell rx_intr to drop the packets it dequeues. */ TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode & ~TULIP_CMD_RXRUN); while ((TULIP_CSR_READ(sc, csr_status) & TULIP_STS_RXSTOPPED) == 0) ; TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED); sc->tulip_flags |= TULIP_RXIGNORE; } tulip_rx_intr(sc); if (sc->tulip_flags & TULIP_RXIGNORE) { /* * Restart the receiver. */ sc->tulip_flags &= ~TULIP_RXIGNORE; TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } } if (csr & TULIP_STS_ABNRMLINTR) { u_int32_t tmp = csr & sc->tulip_intrmask & ~(TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR); if (csr & TULIP_STS_TXUNDERFLOW) { if ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) != TULIP_CMD_THRSHLD160) { sc->tulip_cmdmode += TULIP_CMD_THRSHLD96; sc->tulip_flags |= TULIP_NEWTXTHRESH; } else if (sc->tulip_features & TULIP_HAVE_STOREFWD) { sc->tulip_cmdmode |= TULIP_CMD_STOREFWD; sc->tulip_flags |= TULIP_NEWTXTHRESH; } } if (sc->tulip_flags & TULIP_NOMESSAGES) { sc->tulip_statusbits |= tmp; } else { tulip_print_abnormal_interrupt(sc, tmp); sc->tulip_flags |= TULIP_NOMESSAGES; } TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_TXPROBE_ACTIVE|TULIP_DOINGSETUP|TULIP_PROMISC)) { tulip_tx_intr(sc); if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0) tulip_ifstart(&sc->tulip_if); } } if (sc->tulip_flags & TULIP_NEEDRESET) { tulip_reset(sc); tulip_init(sc); } TULIP_PERFEND(intr); } #if defined(TULIP_USE_SOFTINTR) /* * This is a experimental idea to alleviate problems due to interrupt * livelock. What is interrupt livelock? It's when you spend all your * time servicing device interrupts and never drop below device ipl * to do "useful" work. * * So what we do here is see if the device needs service and if so, * disable interrupts (dismiss the interrupt), place it in a list of devices * needing service, and issue a network software interrupt. * * When our network software interrupt routine gets called, we simply * walk done the list of devices that we have created and deal with them * at splnet/splsoftnet. * */ static void tulip_hardintr_handler( tulip_softc_t * const sc, int *progress_p) { if (TULIP_CSR_READ(sc, csr_status) & (TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR) == 0) return; *progress_p = 1; /* * disable interrupts */ TULIP_CSR_WRITE(sc, csr_intr, 0); /* * mark it as needing a software interrupt */ tulip_softintr_mask |= (1U << sc->tulip_unit); #if defined(__NetBSD__) && NRND > 0 /* * This isn't all that random (the value we feed in) but it is * better than a constant probably. It isn't used in entropy * calculation anyway, just to add something to the pool. */ rnd_add_uint32(&sc->tulip_rndsource, sc->tulip_flags); #endif } static void tulip_softintr( void) { u_int32_t softintr_mask, mask; int progress = 0; int unit; tulip_spl_t s; /* * Copy mask to local copy and reset global one to 0. */ s = TULIP_RAISESPL(); softintr_mask = tulip_softintr_mask; tulip_softintr_mask = 0; TULIP_RESTORESPL(s); /* * Optimize for the single unit case. */ if (tulip_softintr_max_unit == 0) { if (softintr_mask & 1) { tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(0); /* * Handle the "interrupt" and then reenable interrupts */ softintr_mask = 0; tulip_intr_handler(sc, &progress); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } return; } /* * Handle all "queued" interrupts in a round robin fashion. * This is done so as not to favor a particular interface. */ unit = tulip_softintr_last_unit; mask = (1U << unit); while (softintr_mask != 0) { if (tulip_softintr_max_unit == unit) { unit = 0; mask = 1; } else { unit += 1; mask <<= 1; } if (softintr_mask & mask) { tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(unit); /* * Handle the "interrupt" and then reenable interrupts */ softintr_mask ^= mask; tulip_intr_handler(sc, &progress); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } /* * Save where we ending up. */ tulip_softintr_last_unit = unit; } #endif /* TULIP_USE_SOFTINTR */ static tulip_intrfunc_t tulip_intr_shared( void *arg) { tulip_softc_t * sc = arg; int progress = 0; for (; sc != NULL; sc = sc->tulip_slaves) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_intrs++; #endif #if defined(TULIP_USE_SOFTINTR) tulip_hardintr_handler(sc, &progress); #else tulip_intr_handler(sc, &progress); #endif } #if defined(TULIP_USE_SOFTINTR) if (progress) schednetisr(NETISR_DE); #endif #if !defined(TULIP_VOID_INTRFUNC) return progress; #endif } static tulip_intrfunc_t tulip_intr_normal( void *arg) { tulip_softc_t * sc = (tulip_softc_t *) arg; int progress = 0; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_intrs++; #endif #if defined(TULIP_USE_SOFTINTR) tulip_hardintr_handler(sc, &progress); if (progress) schednetisr(NETISR_DE); #else tulip_intr_handler(sc, &progress); #endif #if !defined(TULIP_VOID_INTRFUNC) return progress; #endif } static struct mbuf * tulip_mbuf_compress( struct mbuf *m) { struct mbuf *m0; #if MCLBYTES >= ETHERMTU + 18 && !defined(BIG_PACKET) MGETHDR(m0, M_DONTWAIT, MT_DATA); if (m0 != NULL) { if (m->m_pkthdr.len > MHLEN) { MCLGET(m0, M_DONTWAIT); if ((m0->m_flags & M_EXT) == 0) { m_freem(m); m_freem(m0); return NULL; } } m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t)); m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len; } #else int mlen = MHLEN; int len = m->m_pkthdr.len; struct mbuf **mp = &m0; while (len > 0) { if (mlen == MHLEN) { MGETHDR(*mp, M_DONTWAIT, MT_DATA); } else { MGET(*mp, M_DONTWAIT, MT_DATA); } if (*mp == NULL) { m_freem(m0); m0 = NULL; break; } if (len > MLEN) { MCLGET(*mp, M_DONTWAIT); if (((*mp)->m_flags & M_EXT) == 0) { m_freem(m0); m0 = NULL; break; } (*mp)->m_len = len <= MCLBYTES ? len : MCLBYTES; } else { (*mp)->m_len = len <= mlen ? len : mlen; } m_copydata(m, m->m_pkthdr.len - len, (*mp)->m_len, mtod((*mp), caddr_t)); len -= (*mp)->m_len; mp = &(*mp)->m_next; mlen = MLEN; } #endif m_freem(m); return m0; } static struct mbuf * tulip_txput( tulip_softc_t * const sc, struct mbuf *m) { TULIP_PERFSTART(txput) tulip_ringinfo_t * const ri = &sc->tulip_txinfo; tulip_desc_t *eop, *nextout; int segcnt, free; u_int32_t d_status; struct mbuf *m0; #if defined(TULIP_DEBUG) if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) { printf(TULIP_PRINTF_FMT ": txput%s: tx not running\n", TULIP_PRINTF_ARGS, (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) ? "(probe)" : ""); sc->tulip_flags |= TULIP_WANTTXSTART; goto finish; } #endif /* * Now we try to fill in our transmit descriptors. This is * a bit reminiscent of going on the Ark two by two * since each descriptor for the TULIP can describe * two buffers. So we advance through packet filling * each of the two entries at a time to to fill each * descriptor. Clear the first and last segment bits * in each descriptor (actually just clear everything * but the end-of-ring or chain bits) to make sure * we don't get messed up by previously sent packets. * * We may fail to put the entire packet on the ring if * there is either not enough ring entries free or if the * packet has more than MAX_TXSEG segments. In the former * case we will just wait for the ring to empty. In the * latter case we have to recopy. */ again: d_status = 0; eop = nextout = ri->ri_nextout; m0 = m; segcnt = 0; free = ri->ri_free; do { int len = m0->m_len; caddr_t addr = mtod(m0, caddr_t); unsigned clsize = CLBYTES - (((u_long) addr) & (CLBYTES-1)); while (len > 0) { unsigned slen = min(len, clsize); #ifdef BIG_PACKET int partial = 0; if (slen >= 2048) slen = 2040, partial = 1; #endif segcnt++; if (segcnt > TULIP_MAX_TXSEG) { /* * The packet exceeds the number of transmit buffer * entries that we can use for one packet, so we have * recopy it into one mbuf and then try again. */ m = tulip_mbuf_compress(m); if (m == NULL) goto finish; goto again; } if (segcnt & 1) { if (--free == 0) { /* * See if there's any unclaimed space in the * transmit ring. */ if ((free += tulip_tx_intr(sc)) == 0) { /* * There's no more room but since nothing * has been committed at this point, just * show output is active, put back the * mbuf and return. */ sc->tulip_flags |= TULIP_WANTTXSTART; goto finish; } } eop = nextout; if (++nextout == ri->ri_last) nextout = ri->ri_first; eop->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN; eop->d_status = d_status; eop->d_addr1 = TULIP_KVATOPHYS(sc, addr); eop->d_length1 = slen; } else { /* * Fill in second half of descriptor */ eop->d_addr2 = TULIP_KVATOPHYS(sc, addr); eop->d_length2 = slen; } d_status = TULIP_DSTS_OWNER; len -= slen; addr += slen; #ifdef BIG_PACKET if (partial) continue; #endif clsize = CLBYTES; } } while ((m0 = m0->m_next) != NULL); /* * The descriptors have been filled in. Now get ready * to transmit. */ IF_ENQUEUE(&sc->tulip_txq, m); m = NULL; /* * Make sure the next descriptor after this packet is owned * by us since it may have been set up above if we ran out * of room in the ring. */ nextout->d_status = 0; /* * If we only used the first segment of the last descriptor, * make sure the second segment will not be used. */ if (segcnt & 1) { eop->d_addr2 = 0; eop->d_length2 = 0; } /* * Mark the last and first segments, indicate we want a transmit * complete interrupt, and tell it to transmit! */ eop->d_flag |= TULIP_DFLAG_TxLASTSEG|TULIP_DFLAG_TxWANTINTR; /* * Note that ri->ri_nextout is still the start of the packet * and until we set the OWNER bit, we can still back out of * everything we have done. */ ri->ri_nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG; ri->ri_nextout->d_status = TULIP_DSTS_OWNER; TULIP_CSR_WRITE(sc, csr_txpoll, 1); /* * This advances the ring for us. */ ri->ri_nextout = nextout; ri->ri_free = free; TULIP_PERFEND(txput); if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) { sc->tulip_if.if_flags |= IFF_OACTIVE; TULIP_PERFEND(txput); return NULL; } /* * switch back to the single queueing ifstart. */ sc->tulip_flags &= ~TULIP_WANTTXSTART; sc->tulip_if.if_start = tulip_ifstart_one; if (sc->tulip_txtimer == 0) sc->tulip_txtimer = TULIP_TXTIMER; /* * If we want a txstart, there must be not enough space in the * transmit ring. So we want to enable transmit done interrupts * so we can immediately reclaim some space. When the transmit * interrupt is posted, the interrupt handler will call tx_intr * to reclaim space and then txstart (since WANTTXSTART is set). * txstart will move the packet into the transmit ring and clear * WANTTXSTART thereby causing TXINTR to be cleared. */ finish: if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_DOINGSETUP)) { sc->tulip_if.if_flags |= IFF_OACTIVE; sc->tulip_if.if_start = tulip_ifstart; if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) { sc->tulip_intrmask |= TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } else if ((sc->tulip_flags & TULIP_PROMISC) == 0) { if (sc->tulip_intrmask & TULIP_STS_TXINTR) { sc->tulip_intrmask &= ~TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } TULIP_PERFEND(txput); return m; } static void tulip_txput_setup( tulip_softc_t * const sc) { tulip_ringinfo_t * const ri = &sc->tulip_txinfo; tulip_desc_t *nextout; /* * We will transmit, at most, one setup packet per call to ifstart. */ #if defined(TULIP_DEBUG) if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) { printf(TULIP_PRINTF_FMT ": txput_setup: tx not running\n", TULIP_PRINTF_ARGS); sc->tulip_flags |= TULIP_WANTTXSTART; sc->tulip_if.if_start = tulip_ifstart; return; } #endif /* * Try to reclaim some free descriptors.. */ if (ri->ri_free < 2) tulip_tx_intr(sc); if ((sc->tulip_flags & TULIP_DOINGSETUP) || ri->ri_free == 1) { sc->tulip_flags |= TULIP_WANTTXSTART; sc->tulip_if.if_start = tulip_ifstart; return; } bcopy(sc->tulip_setupdata, sc->tulip_setupbuf, sizeof(sc->tulip_setupbuf)); /* * Clear WANTSETUP and set DOINGSETUP. Set know that WANTSETUP is * set and DOINGSETUP is clear doing an XOR of the two will DTRT. */ sc->tulip_flags ^= TULIP_WANTSETUP|TULIP_DOINGSETUP; ri->ri_free--; nextout = ri->ri_nextout; nextout->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN; nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG|TULIP_DFLAG_TxLASTSEG |TULIP_DFLAG_TxSETUPPKT|TULIP_DFLAG_TxWANTINTR; if (sc->tulip_flags & TULIP_WANTHASHPERFECT) nextout->d_flag |= TULIP_DFLAG_TxHASHFILT; else if (sc->tulip_flags & TULIP_WANTHASHONLY) nextout->d_flag |= TULIP_DFLAG_TxHASHFILT|TULIP_DFLAG_TxINVRSFILT; nextout->d_length1 = sizeof(sc->tulip_setupbuf); nextout->d_addr1 = TULIP_KVATOPHYS(sc, sc->tulip_setupbuf); nextout->d_length2 = 0; nextout->d_addr2 = 0; /* * Advance the ring for the next transmit packet. */ if (++ri->ri_nextout == ri->ri_last) ri->ri_nextout = ri->ri_first; /* * Make sure the next descriptor is owned by us since it * may have been set up above if we ran out of room in the * ring. */ ri->ri_nextout->d_status = 0; nextout->d_status = TULIP_DSTS_OWNER; TULIP_CSR_WRITE(sc, csr_txpoll, 1); if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) { sc->tulip_intrmask |= TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } /* * This routine is entered at splnet() (splsoftnet() on NetBSD) * and thereby imposes no problems when TULIP_USE_SOFTINTR is * defined or not. */ static int tulip_ifioctl( struct ifnet * ifp, ioctl_cmd_t cmd, caddr_t data) { TULIP_PERFSTART(ifioctl) tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); struct ifaddr *ifa = (struct ifaddr *)data; struct ifreq *ifr = (struct ifreq *) data; tulip_spl_t s; int error = 0; #if defined(TULIP_USE_SOFTINTR) s = TULIP_RAISESOFTSPL(); #else s = TULIP_RAISESPL(); #endif switch (cmd) { case SIOCSIFADDR: { ifp->if_flags |= IFF_UP; switch(ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: { tulip_init(sc); TULIP_ARP_IFINIT(sc, ifa); break; } #endif /* INET */ #ifdef IPX case AF_IPX: { struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr); if (ipx_nullhost(*ina)) { ina->x_host = *(union ipx_host *)(sc->tulip_enaddr); } else { ifp->if_flags &= ~IFF_RUNNING; bcopy((caddr_t)ina->x_host.c_host, (caddr_t)sc->tulip_enaddr, sizeof(sc->tulip_enaddr)); } tulip_init(sc); break; } #endif /* IPX */ #ifdef NS /* * This magic copied from if_is.c; I don't use XNS, * so I have no way of telling if this actually * works or not. */ case AF_NS: { struct ns_addr *ina = &(IA_SNS(ifa)->sns_addr); if (ns_nullhost(*ina)) { ina->x_host = *(union ns_host *)(sc->tulip_enaddr); } else { ifp->if_flags &= ~IFF_RUNNING; bcopy((caddr_t)ina->x_host.c_host, (caddr_t)sc->tulip_enaddr, sizeof(sc->tulip_enaddr)); } tulip_init(sc); break; } #endif /* NS */ default: { tulip_init(sc); break; } } break; } case SIOCGIFADDR: { bcopy((caddr_t) sc->tulip_enaddr, (caddr_t) ((struct sockaddr *)&ifr->ifr_data)->sa_data, 6); break; } case SIOCSIFFLAGS: { #if !defined(IFM_ETHER) int flags = 0; if (ifp->if_flags & IFF_LINK0) flags |= 1; if (ifp->if_flags & IFF_LINK1) flags |= 2; if (ifp->if_flags & IFF_LINK2) flags |= 4; if (flags == 7) { ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2); sc->tulip_media = TULIP_MEDIA_UNKNOWN; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP|TULIP_NOAUTOSENSE); tulip_reset(sc); } else if (flags) { tulip_media_t media; for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL && --flags == 0) { sc->tulip_flags |= TULIP_NOAUTOSENSE; if (sc->tulip_media != media || (sc->tulip_flags & TULIP_DIDNWAY)) { sc->tulip_flags &= ~TULIP_DIDNWAY; tulip_linkup(sc, media); } break; } } if (flags) printf(TULIP_PRINTF_FMT ": ignored invalid media request\n", TULIP_PRINTF_ARGS); } #endif tulip_init(sc); break; } #if defined(SIOCSIFMEDIA) case SIOCSIFMEDIA: case SIOCGIFMEDIA: { error = ifmedia_ioctl(ifp, ifr, &sc->tulip_ifmedia, cmd); break; } #endif case SIOCADDMULTI: case SIOCDELMULTI: { /* * Update multicast listeners */ #if defined(__FreeBSD__) && __FreeBSD__ >= 3 tulip_addr_filter(sc); /* reset multicast filtering */ tulip_init(sc); error = 0; #else if (cmd == SIOCADDMULTI) error = ether_addmulti(ifr, TULIP_ETHERCOM(sc)); else error = ether_delmulti(ifr, TULIP_ETHERCOM(sc)); if (error == ENETRESET) { tulip_addr_filter(sc); /* reset multicast filtering */ tulip_init(sc); error = 0; } #endif break; } #if defined(SIOCSIFMTU) #if !defined(ifr_mtu) #define ifr_mtu ifr_metric #endif case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ETHERMTU #ifdef BIG_PACKET && sc->tulip_chipid != TULIP_21140 && sc->tulip_chipid != TULIP_21140A && sc->tulip_chipid != TULIP_21041 #endif ) { error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; #ifdef BIG_PACKET tulip_reset(sc); tulip_init(sc); #endif break; #endif /* SIOCSIFMTU */ #ifdef SIOCGADDRROM case SIOCGADDRROM: { error = copyout(sc->tulip_rombuf, ifr->ifr_data, sizeof(sc->tulip_rombuf)); break; } #endif #ifdef SIOCGCHIPID case SIOCGCHIPID: { ifr->ifr_metric = (int) sc->tulip_chipid; break; } #endif default: { error = EINVAL; break; } } TULIP_RESTORESPL(s); TULIP_PERFEND(ifioctl); return error; } /* * These routines gets called at device spl (from ether_output). This might * pose a problem for TULIP_USE_SOFTINTR if ether_output is called at * device spl from another driver. */ static ifnet_ret_t tulip_ifstart( struct ifnet * const ifp) { TULIP_PERFSTART(ifstart) tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); if (sc->tulip_if.if_flags & IFF_RUNNING) { if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP) tulip_txput_setup(sc); while (sc->tulip_if.if_snd.ifq_head != NULL) { struct mbuf *m; IF_DEQUEUE(&sc->tulip_if.if_snd, m); if ((m = tulip_txput(sc, m)) != NULL) { IF_PREPEND(&sc->tulip_if.if_snd, m); break; } } } TULIP_PERFEND(ifstart); } static ifnet_ret_t tulip_ifstart_one( struct ifnet * const ifp) { TULIP_PERFSTART(ifstart_one) tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); if ((sc->tulip_if.if_flags & IFF_RUNNING) && sc->tulip_if.if_snd.ifq_head != NULL) { struct mbuf *m; IF_DEQUEUE(&sc->tulip_if.if_snd, m); if ((m = tulip_txput(sc, m)) != NULL) IF_PREPEND(&sc->tulip_if.if_snd, m); } TULIP_PERFEND(ifstart_one); } /* * Even though this routine runs at device spl, it does not break * our use of splnet (splsoftnet under NetBSD) for the majority * of this driver (if TULIP_USE_SOFTINTR defined) since * if_watcbog is called from if_watchdog which is called from * splsoftclock which is below spl[soft]net. */ static void tulip_ifwatchdog( struct ifnet *ifp) { TULIP_PERFSTART(ifwatchdog) tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); #if defined(TULIP_DEBUG) u_int32_t rxintrs = sc->tulip_dbg.dbg_rxintrs - sc->tulip_dbg.dbg_last_rxintrs; if (rxintrs > sc->tulip_dbg.dbg_high_rxintrs_hz) sc->tulip_dbg.dbg_high_rxintrs_hz = rxintrs; sc->tulip_dbg.dbg_last_rxintrs = sc->tulip_dbg.dbg_rxintrs; #endif /* TULIP_DEBUG */ sc->tulip_if.if_timer = 1; /* * These should be rare so do a bulk test up front so we can just skip * them if needed. */ if (sc->tulip_flags & (TULIP_SYSTEMERROR|TULIP_RXBUFSLOW|TULIP_NOMESSAGES)) { /* * If the number of receive buffer is low, try to refill */ if (sc->tulip_flags & TULIP_RXBUFSLOW) tulip_rx_intr(sc); if (sc->tulip_flags & TULIP_SYSTEMERROR) { printf(TULIP_PRINTF_FMT ": %d system errors: last was %s\n", TULIP_PRINTF_ARGS, sc->tulip_system_errors, tulip_system_errors[sc->tulip_last_system_error]); } if (sc->tulip_statusbits) { tulip_print_abnormal_interrupt(sc, sc->tulip_statusbits); sc->tulip_statusbits = 0; } sc->tulip_flags &= ~(TULIP_NOMESSAGES|TULIP_SYSTEMERROR); } if (sc->tulip_txtimer) tulip_tx_intr(sc); if (sc->tulip_txtimer && --sc->tulip_txtimer == 0) { printf(TULIP_PRINTF_FMT ": transmission timeout\n", TULIP_PRINTF_ARGS); if (TULIP_DO_AUTOSENSE(sc)) { sc->tulip_media = TULIP_MEDIA_UNKNOWN; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP); } tulip_reset(sc); tulip_init(sc); } TULIP_PERFEND(ifwatchdog); TULIP_PERFMERGE(sc, perf_intr_cycles); TULIP_PERFMERGE(sc, perf_ifstart_cycles); TULIP_PERFMERGE(sc, perf_ifioctl_cycles); TULIP_PERFMERGE(sc, perf_ifwatchdog_cycles); TULIP_PERFMERGE(sc, perf_timeout_cycles); TULIP_PERFMERGE(sc, perf_ifstart_one_cycles); TULIP_PERFMERGE(sc, perf_txput_cycles); TULIP_PERFMERGE(sc, perf_txintr_cycles); TULIP_PERFMERGE(sc, perf_rxintr_cycles); TULIP_PERFMERGE(sc, perf_rxget_cycles); TULIP_PERFMERGE(sc, perf_intr); TULIP_PERFMERGE(sc, perf_ifstart); TULIP_PERFMERGE(sc, perf_ifioctl); TULIP_PERFMERGE(sc, perf_ifwatchdog); TULIP_PERFMERGE(sc, perf_timeout); TULIP_PERFMERGE(sc, perf_ifstart_one); TULIP_PERFMERGE(sc, perf_txput); TULIP_PERFMERGE(sc, perf_txintr); TULIP_PERFMERGE(sc, perf_rxintr); TULIP_PERFMERGE(sc, perf_rxget); } #if defined(__bsdi__) || (defined(__FreeBSD__) && BSD < 199506) static ifnet_ret_t tulip_ifwatchdog_wrapper( int unit) { tulip_ifwatchdog(&TULIP_UNIT_TO_SOFTC(unit)->tulip_if); } #define tulip_ifwatchdog tulip_ifwatchdog_wrapper #endif /* * All printf's are real as of now! */ #ifdef printf #undef printf #endif #if !defined(IFF_NOTRAILERS) #define IFF_NOTRAILERS 0 #endif static void tulip_attach( tulip_softc_t * const sc) { struct ifnet * const ifp = &sc->tulip_if; ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST; ifp->if_ioctl = tulip_ifioctl; ifp->if_start = tulip_ifstart; ifp->if_watchdog = tulip_ifwatchdog; ifp->if_timer = 1; #if !defined(__bsdi__) || _BSDI_VERSION < 199401 ifp->if_output = ether_output; #endif #if defined(__bsdi__) && _BSDI_VERSION < 199401 ifp->if_mtu = ETHERMTU; #endif #if defined(__bsdi__) && _BSDI_VERSION >= 199510 aprint_naive(": DEC Ethernet"); aprint_normal(": %s%s", sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid]); aprint_verbose(" pass %d.%d", (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F); printf("\n"); sc->tulip_pf = aprint_normal; aprint_normal(TULIP_PRINTF_FMT ": address " TULIP_EADDR_FMT "\n", TULIP_PRINTF_ARGS, TULIP_EADDR_ARGS(sc->tulip_enaddr)); #else printf( #if defined(__bsdi__) "\n" #endif TULIP_PRINTF_FMT ": %s%s pass %d.%d%s\n", TULIP_PRINTF_ARGS, sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid], (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F, (sc->tulip_features & (TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM)) == TULIP_HAVE_ISVSROM ? " (invalid EESPROM checksum)" : ""); printf(TULIP_PRINTF_FMT ": address " TULIP_EADDR_FMT "\n", TULIP_PRINTF_ARGS, TULIP_EADDR_ARGS(sc->tulip_enaddr)); #endif #if defined(__alpha__) /* * In case the SRM console told us about a bogus media, * we need to check to be safe. */ if (sc->tulip_mediums[sc->tulip_media] == NULL) sc->tulip_media = TULIP_MEDIA_UNKNOWN; #endif (*sc->tulip_boardsw->bd_media_probe)(sc); #if defined(IFM_ETHER) ifmedia_init(&sc->tulip_ifmedia, 0, tulip_ifmedia_change, tulip_ifmedia_status); #else { tulip_media_t media; int cnt; printf(TULIP_PRINTF_FMT ": media:", TULIP_PRINTF_ARGS); for (media = TULIP_MEDIA_UNKNOWN, cnt = 1; cnt < 7 && media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL) { printf(" %d=\"%s\"", cnt, tulip_mediums[media]); cnt++; } } if (cnt == 1) { sc->tulip_features |= TULIP_HAVE_NOMEDIA; printf(" none\n"); } else { printf("\n"); } } #endif sc->tulip_flags &= ~TULIP_DEVICEPROBE; #if defined(IFM_ETHER) tulip_ifmedia_add(sc); #endif tulip_reset(sc); #if defined(__bsdi__) && _BSDI_VERSION >= 199510 sc->tulip_pf = printf; TULIP_ETHER_IFATTACH(sc); #else if_attach(ifp); #if defined(__NetBSD__) || (defined(__FreeBSD__) && BSD >= 199506) TULIP_ETHER_IFATTACH(sc); #endif #endif /* __bsdi__ */ #if NBPFILTER > 0 TULIP_BPF_ATTACH(sc); #endif #if defined(__NetBSD__) && NRND > 0 rnd_attach_source(&sc->tulip_rndsource, sc->tulip_dev.dv_xname, RND_TYPE_NET); #endif } static void tulip_initcsrs( tulip_softc_t * const sc, tulip_csrptr_t csr_base, size_t csr_size) { sc->tulip_csrs.csr_busmode = csr_base + 0 * csr_size; sc->tulip_csrs.csr_txpoll = csr_base + 1 * csr_size; sc->tulip_csrs.csr_rxpoll = csr_base + 2 * csr_size; sc->tulip_csrs.csr_rxlist = csr_base + 3 * csr_size; sc->tulip_csrs.csr_txlist = csr_base + 4 * csr_size; sc->tulip_csrs.csr_status = csr_base + 5 * csr_size; sc->tulip_csrs.csr_command = csr_base + 6 * csr_size; sc->tulip_csrs.csr_intr = csr_base + 7 * csr_size; sc->tulip_csrs.csr_missed_frames = csr_base + 8 * csr_size; sc->tulip_csrs.csr_9 = csr_base + 9 * csr_size; sc->tulip_csrs.csr_10 = csr_base + 10 * csr_size; sc->tulip_csrs.csr_11 = csr_base + 11 * csr_size; sc->tulip_csrs.csr_12 = csr_base + 12 * csr_size; sc->tulip_csrs.csr_13 = csr_base + 13 * csr_size; sc->tulip_csrs.csr_14 = csr_base + 14 * csr_size; sc->tulip_csrs.csr_15 = csr_base + 15 * csr_size; #if defined(TULIP_EISA) sc->tulip_csrs.csr_enetrom = csr_base + DE425_ENETROM_OFFSET; #endif } static void tulip_initring( tulip_softc_t * const sc, tulip_ringinfo_t * const ri, tulip_desc_t *descs, int ndescs) { ri->ri_max = ndescs; ri->ri_first = descs; ri->ri_last = ri->ri_first + ri->ri_max; bzero((caddr_t) ri->ri_first, sizeof(ri->ri_first[0]) * ri->ri_max); ri->ri_last[-1].d_flag = TULIP_DFLAG_ENDRING; } /* * This is the PCI configuration support. Since the 21040 is available * on both EISA and PCI boards, one must be careful in how defines the * 21040 in the config file. */ #define PCI_CFID 0x00 /* Configuration ID */ #define PCI_CFCS 0x04 /* Configurtion Command/Status */ #define PCI_CFRV 0x08 /* Configuration Revision */ #define PCI_CFLT 0x0c /* Configuration Latency Timer */ #define PCI_CBIO 0x10 /* Configuration Base IO Address */ #define PCI_CBMA 0x14 /* Configuration Base Memory Address */ #define PCI_CFIT 0x3c /* Configuration Interrupt */ #define PCI_CFDA 0x40 /* Configuration Driver Area */ #if defined(TULIP_EISA) static const int tulip_eisa_irqs[4] = { IRQ5, IRQ9, IRQ10, IRQ11 }; #endif #if defined(__FreeBSD__) #define TULIP_PCI_ATTACH_ARGS pcici_t config_id, int unit #define TULIP_SHUTDOWN_ARGS int howto, void * arg #if defined(TULIP_DEVCONF) static void tulip_shutdown(TULIP_SHUTDOWN_ARGS); static int tulip_pci_shutdown( struct kern_devconf * const kdc, int force) { if (kdc->kdc_unit < TULIP_MAX_DEVICES) { tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(kdc->kdc_unit); if (sc != NULL) tulip_shutdown(0, sc); } (void) dev_detach(kdc); return 0; } #endif static char* tulip_pci_probe( pcici_t config_id, pcidi_t device_id) { if (PCI_VENDORID(device_id) != DEC_VENDORID) return NULL; if (PCI_CHIPID(device_id) == CHIPID_21040) return "Digital 21040 Ethernet"; if (PCI_CHIPID(device_id) == CHIPID_21041) return "Digital 21041 Ethernet"; if (PCI_CHIPID(device_id) == CHIPID_21140) { u_int32_t revinfo = pci_conf_read(config_id, PCI_CFRV) & 0xFF; if (revinfo >= 0x20) return "Digital 21140A Fast Ethernet"; else return "Digital 21140 Fast Ethernet"; } if (PCI_CHIPID(device_id) == CHIPID_21142) { u_int32_t revinfo = pci_conf_read(config_id, PCI_CFRV) & 0xFF; if (revinfo >= 0x20) return "Digital 21143 Fast Ethernet"; else return "Digital 21142 Fast Ethernet"; } return NULL; } static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS); static u_long tulip_pci_count; static struct pci_device dedevice = { "de", tulip_pci_probe, tulip_pci_attach, &tulip_pci_count, #if defined(TULIP_DEVCONF) tulip_pci_shutdown, #endif }; DATA_SET (pcidevice_set, dedevice); #endif /* __FreeBSD__ */ #if defined(__bsdi__) #define TULIP_PCI_ATTACH_ARGS struct device * const parent, struct device * const self, void * const aux #define TULIP_SHUTDOWN_ARGS void *arg static int tulip_pci_match( pci_devaddr_t *pa) { int irq; unsigned id; id = pci_inl(pa, PCI_VENDOR_ID); if (PCI_VENDORID(id) != DEC_VENDORID) return 0; id = PCI_CHIPID(id); if (id != CHIPID_21040 && id != CHIPID_21041 && id != CHIPID_21140 && id != CHIPID_21142) return 0; irq = pci_inl(pa, PCI_I_LINE) & 0xFF; if (irq == 0 || irq >= 16) { printf("de?: invalid IRQ %d; skipping\n", irq); return 0; } return 1; } static int tulip_probe( struct device *parent, struct cfdata *cf, void *aux) { struct isa_attach_args * const ia = (struct isa_attach_args *) aux; unsigned irq, slot; pci_devaddr_t *pa; #if _BSDI_VERSION >= 199401 switch (ia->ia_bustype) { case BUS_PCI: #endif pa = pci_scan(tulip_pci_match); if (pa == NULL) return 0; irq = (1 << (pci_inl(pa, PCI_I_LINE) & 0xFF)); /* Get the base address; assume the BIOS set it up correctly */ #if defined(TULIP_IOMAPPED) ia->ia_maddr = NULL; ia->ia_msize = 0; ia->ia_iobase = pci_inl(pa, PCI_CBIO) & ~7; pci_outl(pa, PCI_CBIO, 0xFFFFFFFF); ia->ia_iosize = ((~pci_inl(pa, PCI_CBIO)) | 7) + 1; pci_outl(pa, PCI_CBIO, (int) ia->ia_iobase); /* Disable memory space access */ pci_outl(pa, PCI_COMMAND, pci_inl(pa, PCI_COMMAND) & ~2); #else ia->ia_maddr = (caddr_t) (pci_inl(pa, PCI_CBMA) & ~7); pci_outl(pa, PCI_CBMA, 0xFFFFFFFF); ia->ia_msize = ((~pci_inl(pa, PCI_CBMA)) | 7) + 1; pci_outl(pa, PCI_CBMA, (int) ia->ia_maddr); ia->ia_iobase = 0; ia->ia_iosize = 0; /* Disable I/O space access */ pci_outl(pa, PCI_COMMAND, pci_inl(pa, PCI_COMMAND) & ~1); #endif /* TULIP_IOMAPPED */ ia->ia_aux = (void *) pa; #if _BSDI_VERSION >= 199401 break; #if defined(TULIP_EISA) case BUS_EISA: { unsigned tmp; if ((slot = eisa_match(cf, ia)) == 0) return 0; ia->ia_iobase = slot << 12; ia->ia_iosize = EISA_NPORT; eisa_slotalloc(slot); tmp = inb(ia->ia_iobase + DE425_CFG0); irq = tulip_eisa_irqs[(tmp >> 1) & 0x03]; /* * Until BSD/OS likes level interrupts, force * the DE425 into edge-triggered mode. */ if ((tmp & 1) == 0) outb(ia->ia_iobase + DE425_CFG0, tmp | 1); /* * CBIO needs to map to the EISA slot * enable I/O access and Master */ outl(ia->ia_iobase + DE425_CBIO, ia->ia_iobase); outl(ia->ia_iobase + DE425_CFCS, 5 | inl(ia->ia_iobase + DE425_CFCS)); ia->ia_aux = NULL; break; } #endif /* TULIP_EISA */ default: return 0; } #endif /* PCI bus masters don't use host DMA channels */ ia->ia_drq = DRQNONE; if (ia->ia_irq != IRQUNK && irq != ia->ia_irq) { printf("de%d: error: desired IRQ of %d does not match device's " "actual IRQ of %d,\n", cf->cf_unit, ffs(ia->ia_irq) - 1, ffs(irq) - 1); return 0; } if (ia->ia_irq == IRQUNK) ia->ia_irq = irq; #ifdef IRQSHARE ia->ia_irq |= IRQSHARE; #endif return 1; } static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS); #if defined(TULIP_EISA) static char *tulip_eisa_ids[] = { "DEC4250", NULL }; #endif struct cfdriver decd = { 0, "de", tulip_probe, tulip_pci_attach, #if _BSDI_VERSION >= 199401 DV_IFNET, #endif sizeof(tulip_softc_t), #if defined(TULIP_EISA) tulip_eisa_ids #endif }; #endif /* __bsdi__ */ #if defined(__NetBSD__) #define TULIP_PCI_ATTACH_ARGS struct device * const parent, struct device * const self, void * const aux #define TULIP_SHUTDOWN_ARGS void *arg static int tulip_pci_probe( struct device *parent, #ifdef __BROKEN_INDIRECT_CONFIG void *match, #else struct cfdata *match, #endif void *aux) { struct pci_attach_args *pa = (struct pci_attach_args *) aux; if (PCI_VENDORID(pa->pa_id) != DEC_VENDORID) return 0; if (PCI_CHIPID(pa->pa_id) == CHIPID_21040 || PCI_CHIPID(pa->pa_id) == CHIPID_21041 || PCI_CHIPID(pa->pa_id) == CHIPID_21140 || PCI_CHIPID(pa->pa_id) == CHIPID_21142) return 1; return 0; } static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS); struct cfattach de_ca = { sizeof(tulip_softc_t), tulip_pci_probe, tulip_pci_attach }; struct cfdriver de_cd = { 0, "de", DV_IFNET }; #endif /* __NetBSD__ */ static void tulip_shutdown( TULIP_SHUTDOWN_ARGS) { tulip_softc_t * const sc = arg; TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(10); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ } static void tulip_pci_attach( TULIP_PCI_ATTACH_ARGS) { #if defined(__FreeBSD__) tulip_softc_t *sc; #define PCI_CONF_WRITE(r, v) pci_conf_write(config_id, (r), (v)) #define PCI_CONF_READ(r) pci_conf_read(config_id, (r)) #if __FreeBSD__ >= 3 #define PCI_GETBUSDEVINFO(sc) ((void)((sc)->tulip_pci_busno = (config_id->bus), /* XXX */ \ (sc)->tulip_pci_devno = (config_id->slot))) /* XXX */ #else #define PCI_GETBUSDEVINFO(sc) ((void)((sc)->tulip_pci_busno = ((config_id.cfg1 >> 16) & 0xFF), /* XXX */ \ (sc)->tulip_pci_devno = ((config_id.cfg1 >> 11) & 0x1F))) /* XXX */ #endif #endif #if defined(__bsdi__) tulip_softc_t * const sc = (tulip_softc_t *) self; struct isa_attach_args * const ia = (struct isa_attach_args *) aux; pci_devaddr_t *pa = (pci_devaddr_t *) ia->ia_aux; const int unit = sc->tulip_dev.dv_unit; #define PCI_CONF_WRITE(r, v) pci_outl(pa, (r), (v)) #define PCI_CONF_READ(r) pci_inl(pa, (r)) #define PCI_GETBUSDEVINFO(sc) ((void)((sc)->tulip_pci_busno = pa->d_bus, \ (sc)->tulip_pci_devno = pa->d_agent)) #endif #if defined(__NetBSD__) tulip_softc_t * const sc = (tulip_softc_t *) self; struct pci_attach_args * const pa = (struct pci_attach_args *) aux; const int unit = sc->tulip_dev.dv_unit; #define PCI_CONF_WRITE(r, v) pci_conf_write(pa->pa_pc, pa->pa_tag, (r), (v)) #define PCI_CONF_READ(r) pci_conf_read(pa->pa_pc, pa->pa_tag, (r)) #define PCI_GETBUSDEVINFO(sc) do { \ (sc)->tulip_pci_busno = parent; \ (sc)->tulip_pci_devno = pa->pa_device; \ } while (0) #endif /* __NetBSD__ */ #if defined(__alpha__) tulip_media_t media = TULIP_MEDIA_UNKNOWN; #endif int retval, idx; u_int32_t revinfo, cfdainfo, id; #if !defined(TULIP_IOMAPPED) && defined(__FreeBSD__) vm_offset_t pa_csrs; #endif unsigned csroffset = TULIP_PCI_CSROFFSET; unsigned csrsize = TULIP_PCI_CSRSIZE; tulip_csrptr_t csr_base; tulip_chipid_t chipid = TULIP_CHIPID_UNKNOWN; if (unit >= TULIP_MAX_DEVICES) { #ifdef __FreeBSD__ printf("de%d", unit); #endif printf(": not configured; limit of %d reached or exceeded\n", TULIP_MAX_DEVICES); return; } #if defined(__bsdi__) if (pa != NULL) { revinfo = pci_inl(pa, PCI_CFRV) & 0xFF; id = pci_inl(pa, PCI_CFID); cfdainfo = pci_inl(pa, PCI_CFDA); #if defined(TULIP_EISA) } else { revinfo = inl(ia->ia_iobase + DE425_CFRV) & 0xFF; csroffset = TULIP_EISA_CSROFFSET; csrsize = TULIP_EISA_CSRSIZE; chipid = TULIP_DE425; cfdainfo = 0; #endif /* TULIP_EISA */ } #else /* __bsdi__ */ revinfo = PCI_CONF_READ(PCI_CFRV) & 0xFF; id = PCI_CONF_READ(PCI_CFID); cfdainfo = PCI_CONF_READ(PCI_CFDA); #endif /* __bsdi__ */ if (PCI_VENDORID(id) == DEC_VENDORID) { if (PCI_CHIPID(id) == CHIPID_21040) chipid = TULIP_21040; else if (PCI_CHIPID(id) == CHIPID_21140) { chipid = (revinfo >= 0x20) ? TULIP_21140A : TULIP_21140; } else if (PCI_CHIPID(id) == CHIPID_21142) { chipid = (revinfo >= 0x20) ? TULIP_21143 : TULIP_21142; } else if (PCI_CHIPID(id) == CHIPID_21041) chipid = TULIP_21041; else if (PCI_CHIPID(id) == CHIPID_21142) chipid = TULIP_21142; } if (chipid == TULIP_CHIPID_UNKNOWN) return; if ((chipid == TULIP_21040 || chipid == TULIP_DE425) && revinfo < 0x20) { #ifdef __FreeBSD__ printf("de%d", unit); #endif printf(": not configured; 21040 pass 2.0 required (%d.%d found)\n", revinfo >> 4, revinfo & 0x0f); return; } else if (chipid == TULIP_21140 && revinfo < 0x11) { #ifndef __FreeBSD__ printf("\n"); #endif printf("de%d: not configured; 21140 pass 1.1 required (%d.%d found)\n", unit, revinfo >> 4, revinfo & 0x0f); return; } #if defined(__FreeBSD__) sc = (tulip_softc_t *) malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT); if (sc == NULL) return; bzero(sc, sizeof(*sc)); /* Zero out the softc*/ sc->tulip_rxdescs = (tulip_desc_t *) malloc(sizeof(tulip_desc_t) * TULIP_RXDESCS, M_DEVBUF, M_NOWAIT); sc->tulip_txdescs = (tulip_desc_t *) malloc(sizeof(tulip_desc_t) * TULIP_TXDESCS, M_DEVBUF, M_NOWAIT); if (sc->tulip_rxdescs == NULL || sc->tulip_txdescs == NULL) { if (sc->tulip_rxdescs) free((caddr_t) sc->tulip_rxdescs, M_DEVBUF); if (sc->tulip_txdescs) free((caddr_t) sc->tulip_txdescs, M_DEVBUF); free((caddr_t) sc, M_DEVBUF); return; } #endif PCI_GETBUSDEVINFO(sc); sc->tulip_chipid = chipid; sc->tulip_flags |= TULIP_DEVICEPROBE; if (chipid == TULIP_21140 || chipid == TULIP_21140A) sc->tulip_features |= TULIP_HAVE_GPR|TULIP_HAVE_STOREFWD; if (chipid == TULIP_21140A && revinfo <= 0x22) sc->tulip_features |= TULIP_HAVE_RXBADOVRFLW; if (chipid == TULIP_21140) sc->tulip_features |= TULIP_HAVE_BROKEN_HASH; if (chipid != TULIP_21040 && chipid != TULIP_DE425 && chipid != TULIP_21140) sc->tulip_features |= TULIP_HAVE_POWERMGMT; if (chipid == TULIP_21041 || chipid == TULIP_21142 || chipid == TULIP_21143) { sc->tulip_features |= TULIP_HAVE_DUALSENSE; if (chipid != TULIP_21041 || sc->tulip_revinfo >= 0x20) sc->tulip_features |= TULIP_HAVE_SIANWAY; if (chipid != TULIP_21041) sc->tulip_features |= TULIP_HAVE_SIAGP|TULIP_HAVE_RXBADOVRFLW|TULIP_HAVE_STOREFWD; if (chipid != TULIP_21041 && sc->tulip_revinfo >= 0x20) sc->tulip_features |= TULIP_HAVE_SIA100; } if (sc->tulip_features & TULIP_HAVE_POWERMGMT && (cfdainfo & (TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE))) { cfdainfo &= ~(TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE); PCI_CONF_WRITE(PCI_CFDA, cfdainfo); DELAY(11*1000); } #if defined(__alpha__) && defined(__NetBSD__) /* * The Alpha SRM console encodes a console set media in the driver * part of the CFDA register. Note that the Multia presents a * problem in that its BNC mode is really EXTSIA. So in that case * force a probe. */ switch ((cfdainfo >> 8) & 0xff) { case 1: media = chipid > TULIP_DE425 ? TULIP_MEDIA_AUI : TULIP_MEDIA_AUIBNC; break; case 2: media = chipid > TULIP_DE425 ? TULIP_MEDIA_BNC : TULIP_MEDIA_UNKNOWN; break; case 3: media = TULIP_MEDIA_10BASET; break; case 4: media = TULIP_MEDIA_10BASET_FD; break; case 5: media = TULIP_MEDIA_100BASETX; break; case 6: media = TULIP_MEDIA_100BASETX_FD; break; } #endif #if defined(__NetBSD__) bcopy(self->dv_xname, sc->tulip_if.if_xname, IFNAMSIZ); sc->tulip_if.if_softc = sc; sc->tulip_pc = pa->pa_pc; #else sc->tulip_unit = unit; sc->tulip_name = "de"; #endif sc->tulip_revinfo = revinfo; #if defined(__FreeBSD__) #if BSD >= 199506 sc->tulip_if.if_softc = sc; #endif #if defined(TULIP_IOMAPPED) retval = pci_map_port(config_id, PCI_CBIO, &csr_base); #else retval = pci_map_mem(config_id, PCI_CBMA, (vm_offset_t *) &csr_base, &pa_csrs); #endif if (!retval) { free((caddr_t) sc->tulip_rxdescs, M_DEVBUF); free((caddr_t) sc->tulip_txdescs, M_DEVBUF); free((caddr_t) sc, M_DEVBUF); return; } tulips[unit] = sc; #endif /* __FreeBSD__ */ #if defined(__bsdi__) sc->tulip_pf = printf; #if defined(TULIP_IOMAPPED) csr_base = ia->ia_iobase; #else csr_base = (vm_offset_t) mapphys((vm_offset_t) ia->ia_maddr, ia->ia_msize); #endif #endif /* __bsdi__ */ #if defined(__NetBSD__) csr_base = 0; { bus_space_tag_t iot, memt; bus_space_handle_t ioh, memh; int ioh_valid, memh_valid; ioh_valid = (pci_mapreg_map(pa, PCI_CBIO, PCI_MAPREG_TYPE_IO, 0, &iot, &ioh, NULL, NULL) == 0); memh_valid = (pci_mapreg_map(pa, PCI_CBMA, PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT, 0, &memt, &memh, NULL, NULL) == 0); if (memh_valid) { sc->tulip_bustag = memt; sc->tulip_bushandle = memh; } else if (ioh_valid) { sc->tulip_bustag = iot; sc->tulip_bushandle = ioh; } else { printf(": unable to map device registers\n"); return; } } #endif /* __NetBSD__ */ tulip_initcsrs(sc, csr_base + csroffset, csrsize); tulip_initring(sc, &sc->tulip_rxinfo, sc->tulip_rxdescs, TULIP_RXDESCS); tulip_initring(sc, &sc->tulip_txinfo, sc->tulip_txdescs, TULIP_TXDESCS); /* * Make sure there won't be any interrupts or such... */ TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(100); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ if ((retval = tulip_read_macaddr(sc)) < 0) { #if defined(__FreeBSD__) printf(TULIP_PRINTF_FMT, TULIP_PRINTF_ARGS); #endif printf(": can't read ENET ROM (why=%d) (", retval); for (idx = 0; idx < 32; idx++) printf("%02x", sc->tulip_rombuf[idx]); printf("\n"); printf(TULIP_PRINTF_FMT ": %s%s pass %d.%d\n", TULIP_PRINTF_ARGS, sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid], (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F); printf(TULIP_PRINTF_FMT ": address unknown\n", TULIP_PRINTF_ARGS); } else { tulip_spl_t s; tulip_intrfunc_t (*intr_rtn)(void *) = tulip_intr_normal; if (sc->tulip_features & TULIP_HAVE_SHAREDINTR) intr_rtn = tulip_intr_shared; #if defined(__NetBSD__) if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) { pci_intr_handle_t intrhandle; const char *intrstr; if (pci_intr_map(pa->pa_pc, pa->pa_intrtag, pa->pa_intrpin, pa->pa_intrline, &intrhandle)) { printf(": couldn't map interrupt\n"); return; } intrstr = pci_intr_string(pa->pa_pc, intrhandle); sc->tulip_ih = pci_intr_establish(pa->pa_pc, intrhandle, IPL_NET, intr_rtn, sc); if (sc->tulip_ih == NULL) printf(": couldn't establish interrupt"); if (intrstr != NULL) printf(" at %s", intrstr); printf("\n"); if (sc->tulip_ih == NULL) return; } sc->tulip_ats = shutdownhook_establish(tulip_shutdown, sc); if (sc->tulip_ats == NULL) printf("\n%s: warning: couldn't establish shutdown hook\n", sc->tulip_xname); #endif #if defined(__FreeBSD__) if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) { if (!pci_map_int (config_id, intr_rtn, (void*) sc, &net_imask)) { printf(TULIP_PRINTF_FMT ": couldn't map interrupt\n", TULIP_PRINTF_ARGS); return; } } #if !defined(TULIP_DEVCONF) at_shutdown(tulip_shutdown, sc, SHUTDOWN_POST_SYNC); #endif #endif #if defined(__bsdi__) if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) { isa_establish(&sc->tulip_id, &sc->tulip_dev); sc->tulip_ih.ih_fun = intr_rtn; sc->tulip_ih.ih_arg = (void *) sc; intr_establish(ia->ia_irq, &sc->tulip_ih, DV_NET); } sc->tulip_ats.func = tulip_shutdown; sc->tulip_ats.arg = (void *) sc; atshutdown(&sc->tulip_ats, ATSH_ADD); #endif #if defined(TULIP_USE_SOFTINTR) if (sc->tulip_unit > tulip_softintr_max_unit) tulip_softintr_max_unit = sc->tulip_unit; #endif s = TULIP_RAISESPL(); tulip_reset(sc); tulip_attach(sc); #if defined(__alpha__) && defined(__NetBSD__) if (media != TULIP_MEDIA_UNKNOWN) tulip_linkup(sc, media); #endif TULIP_RESTORESPL(s); } } Index: head/sys/fs/cd9660/cd9660_vfsops.c =================================================================== --- head/sys/fs/cd9660/cd9660_vfsops.c (revision 34265) +++ head/sys/fs/cd9660/cd9660_vfsops.c (revision 34266) @@ -1,891 +1,892 @@ /*- * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension * Support code is derived from software contributed to Berkeley * by Atsushi Murai (amurai@spec.co.jp). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)cd9660_vfsops.c 8.18 (Berkeley) 5/22/95 - * $Id: cd9660_vfsops.c,v 1.33 1997/12/21 21:40:02 joerg Exp $ + * $Id: cd9660_vfsops.c,v 1.34 1998/03/01 22:46:00 msmith Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure"); MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part"); static int cd9660_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int cd9660_start __P((struct mount *, int, struct proc *)); static int cd9660_unmount __P((struct mount *, int, struct proc *)); static int cd9660_root __P((struct mount *, struct vnode **)); static int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *)); static int cd9660_sync __P((struct mount *, int, struct ucred *, struct proc *)); static int cd9660_vget __P((struct mount *, ino_t, struct vnode **)); static int cd9660_vrele __P((struct mount *, struct vnode *)); static int cd9660_fhtovp __P((struct mount *, struct fid *, struct sockaddr *, struct vnode **, int *, struct ucred **)); static int cd9660_vptofh __P((struct vnode *, struct fid *)); static struct vfsops cd9660_vfsops = { cd9660_mount, cd9660_start, cd9660_unmount, cd9660_root, cd9660_quotactl, cd9660_statfs, cd9660_sync, cd9660_vget, cd9660_vrele, cd9660_fhtovp, cd9660_vptofh, cd9660_init }; VFS_SET(cd9660_vfsops, cd9660, MOUNT_CD9660, VFCF_READONLY); /* * Called by vfs_mountroot when iso is going to be mounted as root. */ static int iso_get_ssector __P((dev_t dev, struct proc *p)); static int iso_mountfs __P((struct vnode *devvp, struct mount *mp, struct proc *p, struct iso_args *argp)); static int iso_mountroot __P((struct mount *mp, struct proc *p)); /* * Try to find the start of the last data track on this CD-ROM. This * is used to mount the last session of a multi-session CD. Bail out * and return 0 if we fail, this is always a safe bet. */ static int iso_get_ssector(dev, p) dev_t dev; struct proc *p; { struct ioc_toc_header h; struct ioc_read_toc_single_entry t; int i; struct bdevsw *bd; d_ioctl_t *ioctlp; bd = bdevsw[major(dev)]; ioctlp = bd->d_ioctl; if (ioctlp == NULL) return 0; if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) == -1) return 0; for (i = h.ending_track; i >= 0; i--) { t.address_format = CD_LBA_FORMAT; t.track = i; if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) == -1) return 0; if ((t.entry.control & 4) != 0) /* found a data track */ break; } if (i < 0) return 0; return ntohl(t.entry.addr.lba); } static int iso_mountroot(mp, p) struct mount *mp; struct proc *p; { struct iso_args args; int error; if ((error = bdevvp(rootdev, &rootvp))) { printf("iso_mountroot: can't find rootvp"); return (error); } args.flags = ISOFSMNT_ROOT; args.ssector = iso_get_ssector(rootdev, p); if (bootverbose) printf("iso_mountroot(): using session at block %d\n", args.ssector); if (error = iso_mountfs(rootvp, mp, p, &args)) return (error); (void)cd9660_statfs(mp, &mp->mnt_stat, p); return (0); } /* * VFS Operations. * * mount system call */ static int cd9660_mount(mp, path, data, ndp, p) register struct mount *mp; char *path; caddr_t data; struct nameidata *ndp; struct proc *p; { struct vnode *devvp; struct iso_args args; u_int size; int error; struct iso_mnt *imp = 0; if ((mp->mnt_flag & MNT_ROOTFS) != 0) { if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR) mp->mnt_flag |= MNT_NOCLUSTERR; return (iso_mountroot(mp, p)); } if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))) return (error); if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EROFS); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. * Disallow clearing MNT_NOCLUSTERR flag, if block device requests. */ if (mp->mnt_flag & MNT_UPDATE) { imp = VFSTOISOFS(mp); if (bdevsw[major(imp->im_devvp->v_rdev)]->d_flags & D_NOCLUSTERR) mp->mnt_flag |= MNT_NOCLUSTERR; if (args.fspec == 0) return (vfs_export(mp, &imp->im_export, &args.export)); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); if ((error = namei(ndp))) return (error); devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { vrele(devvp); return ENOTBLK; } if (major(devvp->v_rdev) >= nblkdev) { vrele(devvp); return ENXIO; } if ((mp->mnt_flag & MNT_UPDATE) == 0) { if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR) mp->mnt_flag |= MNT_NOCLUSTERR; error = iso_mountfs(devvp, mp, p, &args); } else { if (devvp != imp->im_devvp) error = EINVAL; /* needs translation */ else vrele(devvp); } if (error) { vrele(devvp); return error; } imp = VFSTOISOFS(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void) cd9660_statfs(mp, &mp->mnt_stat, p); return 0; } /* * Common code for mount and mountroot */ static int iso_mountfs(devvp, mp, p, argp) register struct vnode *devvp; struct mount *mp; struct proc *p; struct iso_args *argp; { register struct iso_mnt *isomp = (struct iso_mnt *)0; struct buf *bp = NULL; dev_t dev = devvp->v_rdev; int error = EINVAL; int needclose = 0; int high_sierra = 0; int ronly = (mp->mnt_flag & MNT_RDONLY) != 0; int iso_bsize; int iso_blknum; struct iso_volume_descriptor *vdp = 0; struct iso_primary_descriptor *pri; struct iso_sierra_primary_descriptor *pri_sierra; struct iso_directory_record *rootp; int logical_block_size; if (!ronly) return EROFS; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp))) return error; if (vcount(devvp) > 1 && devvp != rootvp) return EBUSY; if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))) return (error); if ((error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))) return error; needclose = 1; /* This is the "logical sector size". The standard says this * should be 2048 or the physical sector size on the device, * whichever is greater. For now, we'll just use a constant. */ iso_bsize = ISO_DEFAULT_BLOCK_SIZE; for (iso_blknum = 16 + argp->ssector; iso_blknum < 100 + argp->ssector; iso_blknum++) { if (error = bread(devvp, iso_blknum * btodb(iso_bsize), iso_bsize, NOCRED, &bp)) goto out; vdp = (struct iso_volume_descriptor *)bp->b_data; if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) { if (bcmp (vdp->id_sierra, ISO_SIERRA_ID, sizeof vdp->id) != 0) { error = EINVAL; goto out; } else high_sierra = 1; } if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_END) { error = EINVAL; goto out; } if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_PRIMARY) break; brelse(bp); } if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) != ISO_VD_PRIMARY) { error = EINVAL; goto out; } pri = (struct iso_primary_descriptor *)vdp; pri_sierra = (struct iso_sierra_primary_descriptor *)vdp; logical_block_size = isonum_723 (high_sierra? pri_sierra->logical_block_size: pri->logical_block_size); if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE || (logical_block_size & (logical_block_size - 1)) != 0) { error = EINVAL; goto out; } rootp = (struct iso_directory_record *) (high_sierra? pri_sierra->root_directory_record: pri->root_directory_record); isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK); bzero((caddr_t)isomp, sizeof *isomp); isomp->logical_block_size = logical_block_size; isomp->volume_space_size = isonum_733 (high_sierra? pri_sierra->volume_space_size: pri->volume_space_size); /* * Since an ISO9660 multi-session CD can also access previous * sessions, we have to include them into the space consider- * ations. This doesn't yield a very accurate number since * parts of the old sessions might be inaccessible now, but we * can't do much better. This is also important for the NFS * filehandle validation. */ isomp->volume_space_size += argp->ssector; bcopy (rootp, isomp->root, sizeof isomp->root); isomp->root_extent = isonum_733 (rootp->extent); isomp->root_size = isonum_733 (rootp->size); isomp->im_bmask = logical_block_size - 1; isomp->im_bshift = 0; while ((1 << isomp->im_bshift) < isomp->logical_block_size) isomp->im_bshift++; bp->b_flags |= B_AGE; brelse(bp); bp = NULL; mp->mnt_data = (qaddr_t)isomp; mp->mnt_stat.f_fsid.val[0] = (long)dev; mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = 0; mp->mnt_flag |= MNT_LOCAL; isomp->im_mountp = mp; isomp->im_dev = dev; isomp->im_devvp = devvp; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; /* Check the Rock Ridge Extention support */ if (!(argp->flags & ISOFSMNT_NORRIP)) { if (error = bread(isomp->im_devvp, (isomp->root_extent + isonum_711(rootp->ext_attr_length)) << (isomp->im_bshift - DEV_BSHIFT), isomp->logical_block_size, NOCRED, &bp)) goto out; rootp = (struct iso_directory_record *)bp->b_data; if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) { argp->flags |= ISOFSMNT_NORRIP; } else { argp->flags &= ~ISOFSMNT_GENS; } /* * The contents are valid, * but they will get reread as part of another vnode, so... */ bp->b_flags |= B_AGE; brelse(bp); bp = NULL; } isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT); if(high_sierra) /* this effectively ignores all the mount flags */ isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA; else switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) { default: isomp->iso_ftype = ISO_FTYPE_DEFAULT; break; case ISOFSMNT_GENS|ISOFSMNT_NORRIP: isomp->iso_ftype = ISO_FTYPE_9660; break; case 0: isomp->iso_ftype = ISO_FTYPE_RRIP; break; } return 0; out: - devvp->v_specflags &= ~SI_MOUNTEDON; + devvp->v_specmountpoint = NULL; if (bp) brelse(bp); if (needclose) (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); if (isomp) { free((caddr_t)isomp, M_ISOFSMNT); mp->mnt_data = (qaddr_t)0; } return error; } /* * Make a filesystem operational. * Nothing to do at the moment. */ /* ARGSUSED */ static int cd9660_start(mp, flags, p) struct mount *mp; int flags; struct proc *p; { return 0; } /* * unmount system call */ static int cd9660_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct iso_mnt *isomp; int error, flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; #if 0 mntflushbuf(mp, 0); if (mntinvalbuf(mp)) return EBUSY; #endif if ((error = vflush(mp, NULLVP, flags))) return (error); isomp = VFSTOISOFS(mp); - isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON; + isomp->im_devvp->v_specmountpoint = NULL; error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p); vrele(isomp->im_devvp); free((caddr_t)isomp, M_ISOFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } /* * Return root of a filesystem */ static int cd9660_root(mp, vpp) struct mount *mp; struct vnode **vpp; { struct iso_mnt *imp = VFSTOISOFS(mp); struct iso_directory_record *dp = (struct iso_directory_record *)imp->root; ino_t ino = isodirino(dp, imp); /* * With RRIP we must use the `.' entry of the root directory. * Simply tell vget, that it's a relocated directory. */ return (cd9660_vget_internal(mp, ino, vpp, imp->iso_ftype == ISO_FTYPE_RRIP, dp)); } /* * Do operations associated with quotas, not supported */ /* ARGSUSED */ static int cd9660_quotactl(mp, cmd, uid, arg, p) struct mount *mp; int cmd; uid_t uid; caddr_t arg; struct proc *p; { return (EOPNOTSUPP); } /* * Get file system statistics. */ int cd9660_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { register struct iso_mnt *isomp; isomp = VFSTOISOFS(mp); sbp->f_type = MOUNT_CD9660; sbp->f_bsize = isomp->logical_block_size; sbp->f_iosize = sbp->f_bsize; /* XXX */ sbp->f_blocks = isomp->volume_space_size; sbp->f_bfree = 0; /* total free blocks */ sbp->f_bavail = 0; /* blocks free for non superuser */ sbp->f_files = 0; /* total files */ sbp->f_ffree = 0; /* free file nodes */ if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } /* Use the first spare for flags: */ - sbp->f_spare[0] = isomp->im_flags; + /* Don't do this!!! XXX */ + /* sbp->f_spare[0] = isomp->im_flags; */ return 0; } /* ARGSUSED */ static int cd9660_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is in range * - call iget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the generation number matches */ struct ifid { ushort ifid_len; ushort ifid_pad; int ifid_ino; long ifid_start; }; /* ARGSUSED */ int cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) register struct mount *mp; struct fid *fhp; struct sockaddr *nam; struct vnode **vpp; int *exflagsp; struct ucred **credanonp; { struct ifid *ifhp = (struct ifid *)fhp; register struct iso_node *ip; register struct netcred *np; register struct iso_mnt *imp = VFSTOISOFS(mp); struct vnode *nvp; int error; #ifdef ISOFS_DBG printf("fhtovp: ino %d, start %ld\n", ifhp->ifid_ino, ifhp->ifid_start); #endif /* * Get the export permission structure for this tuple. */ np = vfs_export_lookup(mp, &imp->im_export, nam); if (np == NULL) return (EACCES); if (error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) { *vpp = NULLVP; return (error); } ip = VTOI(nvp); if (ip->inode.iso_mode == 0) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; *exflagsp = np->netc_exflags; *credanonp = &np->netc_anon; return (0); } int cd9660_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { /* * XXXX * It would be nice if we didn't always set the `relocated' flag * and force the extra read, but I don't want to think about fixing * that right now. */ return (cd9660_vget_internal(mp, ino, vpp, #if 0 VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP, #else 0, #endif (struct iso_directory_record *)0)); } /* * Complement to all vpp returning ops. * XXX - initially only to get rid of WILLRELE. */ /* ARGSUSED */ static int cd9660_vrele(mp, vp) struct mount *mp; struct vnode *vp; { return (EOPNOTSUPP); } int cd9660_vget_internal(mp, ino, vpp, relocated, isodir) struct mount *mp; ino_t ino; struct vnode **vpp; int relocated; struct iso_directory_record *isodir; { struct iso_mnt *imp; struct iso_node *ip; struct buf *bp; struct vnode *vp, *nvp; dev_t dev; int error; imp = VFSTOISOFS(mp); dev = imp->im_dev; if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP) return (0); /* Allocate a new vnode/iso_node. */ if (error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) { *vpp = NULLVP; return (error); } MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE, M_WAITOK); bzero((caddr_t)ip, sizeof(struct iso_node)); lockinit(&ip->i_lock, PINOD, "isonode", 0, 0); vp->v_data = ip; ip->i_vnode = vp; ip->i_dev = dev; ip->i_number = ino; /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ cd9660_ihashins(ip); if (isodir == 0) { int lbn, off; lbn = lblkno(imp, ino); if (lbn >= imp->volume_space_size) { vput(vp); printf("fhtovp: lbn exceed volume space %d\n", lbn); return (ESTALE); } off = blkoff(imp, ino); if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) { vput(vp); printf("fhtovp: crosses block boundary %d\n", off + ISO_DIRECTORY_RECORD_SIZE); return (ESTALE); } error = bread(imp->im_devvp, lbn << (imp->im_bshift - DEV_BSHIFT), imp->logical_block_size, NOCRED, &bp); if (error) { vput(vp); brelse(bp); printf("fhtovp: bread error %d\n",error); return (error); } isodir = (struct iso_directory_record *)(bp->b_data + off); if (off + isonum_711(isodir->length) > imp->logical_block_size) { vput(vp); if (bp != 0) brelse(bp); printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n", off +isonum_711(isodir->length), off, isonum_711(isodir->length)); return (ESTALE); } #if 0 if (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) { if (bp != 0) brelse(bp); printf("fhtovp: file start miss %d vs %d\n", isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length), ifhp->ifid_start); return (ESTALE); } #endif } else bp = 0; ip->i_mnt = imp; ip->i_devvp = imp->im_devvp; VREF(ip->i_devvp); if (relocated) { /* * On relocated directories we must * read the `.' entry out of a dir. */ ip->iso_start = ino >> imp->im_bshift; if (bp != 0) brelse(bp); if (error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) { vput(vp); return (error); } isodir = (struct iso_directory_record *)bp->b_data; } ip->iso_extent = isonum_733(isodir->extent); ip->i_size = isonum_733(isodir->size); ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent; /* * Setup time stamp, attribute */ vp->v_type = VNON; switch (imp->iso_ftype) { default: /* ISO_FTYPE_9660 */ { struct buf *bp2; int off; if ((imp->im_flags & ISOFSMNT_EXTATT) && (off = isonum_711(isodir->ext_attr_length))) cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL, &bp2); else bp2 = NULL; cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660); cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660); if (bp2) brelse(bp2); break; } case ISO_FTYPE_RRIP: cd9660_rrip_analyze(isodir, ip, imp); break; } if (bp != 0) brelse(bp); /* * Initialize the associated vnode */ switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) { case VFIFO: vp->v_op = cd9660_fifoop_p; break; case VCHR: case VBLK: /* * if device, look at device number table for translation */ vp->v_op = cd9660_specop_p; if (nvp = checkalias(vp, ip->inode.iso_rdev, mp)) { /* * Discard unneeded vnode, but save its iso_node. * Note that the lock is carried over in the iso_node * to the replacement vnode. */ nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; vrele(vp); vgone(vp); /* * Reinitialize aliased inode. */ vp = nvp; ip->i_vnode = vp; } break; } if (ip->iso_extent == imp->root_extent) vp->v_flag |= VROOT; /* * XXX need generation number? */ *vpp = vp; return (0); } /* * Vnode pointer to File handle */ /* ARGSUSED */ int cd9660_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { register struct iso_node *ip = VTOI(vp); register struct ifid *ifhp; ifhp = (struct ifid *)fhp; ifhp->ifid_len = sizeof(struct ifid); ifhp->ifid_ino = ip->i_number; ifhp->ifid_start = ip->iso_start; #ifdef ISOFS_DBG printf("vptofh: ino %d, start %ld\n", ifhp->ifid_ino,ifhp->ifid_start); #endif return 0; } Index: head/sys/fs/msdosfs/msdosfs_vfsops.c =================================================================== --- head/sys/fs/msdosfs/msdosfs_vfsops.c (revision 34265) +++ head/sys/fs/msdosfs/msdosfs_vfsops.c (revision 34266) @@ -1,1051 +1,1054 @@ -/* $Id: msdosfs_vfsops.c,v 1.28 1998/02/23 16:44:32 ache Exp $ */ +/* $Id: msdosfs_vfsops.c,v 1.29 1998/03/01 22:46:27 msmith Exp $ */ /* $NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include #include #include #include /* XXX */ /* defines v_rdev */ #include #include #include #include #include /* defines ALLPERMS */ #include #include #include #include #include #include MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure"); static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table"); static int update_mp __P((struct mount *mp, struct msdosfs_args *argp)); static int mountmsdosfs __P((struct vnode *devvp, struct mount *mp, struct proc *p, struct msdosfs_args *argp)); static int msdosfs_fhtovp __P((struct mount *, struct fid *, struct sockaddr *, struct vnode **, int *, struct ucred **)); static int msdosfs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); static int msdosfs_root __P((struct mount *, struct vnode **)); static int msdosfs_start __P((struct mount *, int, struct proc *)); static int msdosfs_statfs __P((struct mount *, struct statfs *, struct proc *)); static int msdosfs_sync __P((struct mount *, int, struct ucred *, struct proc *)); static int msdosfs_unmount __P((struct mount *, int, struct proc *)); static int msdosfs_vget __P((struct mount *mp, ino_t ino, struct vnode **vpp)); static int msdosfs_vptofh __P((struct vnode *, struct fid *)); static int update_mp(mp, argp) struct mount *mp; struct msdosfs_args *argp; { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error; pmp->pm_gid = argp->gid; pmp->pm_uid = argp->uid; pmp->pm_mask = argp->mask & ALLPERMS; pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT; if (pmp->pm_flags & MSDOSFSMNT_U2WTABLE) { bcopy(argp->u2w, pmp->pm_u2w, sizeof(pmp->pm_u2w)); bcopy(argp->d2u, pmp->pm_d2u, sizeof(pmp->pm_d2u)); bcopy(argp->u2d, pmp->pm_u2d, sizeof(pmp->pm_u2d)); } if (pmp->pm_flags & MSDOSFSMNT_ULTABLE) { bcopy(argp->ul, pmp->pm_ul, sizeof(pmp->pm_ul)); bcopy(argp->lu, pmp->pm_lu, sizeof(pmp->pm_lu)); } #ifndef __FreeBSD__ /* * GEMDOS knows nothing (yet) about win95 */ if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS) pmp->pm_flags |= MSDOSFSMNT_NOWIN95; #endif if (pmp->pm_flags & MSDOSFSMNT_NOWIN95) pmp->pm_flags |= MSDOSFSMNT_SHORTNAME; else if (!(pmp->pm_flags & (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) { struct vnode *rootvp; /* * Try to divine whether to support Win'95 long filenames */ if (FAT32(pmp)) pmp->pm_flags |= MSDOSFSMNT_LONGNAME; else { if ((error = msdosfs_root(mp, &rootvp)) != 0) return error; pmp->pm_flags |= findwin95(VTODE(rootvp)) ? MSDOSFSMNT_LONGNAME : MSDOSFSMNT_SHORTNAME; vput(rootvp); } } return 0; } #ifndef __FreeBSD__ int msdosfs_mountroot() { register struct mount *mp; struct proc *p = curproc; /* XXX */ size_t size; int error; struct msdosfs_args args; if (root_device->dv_class != DV_DISK) return (ENODEV); /* * Get vnodes for swapdev and rootdev. */ if (bdevvp(rootdev, &rootvp)) panic("msdosfs_mountroot: can't setup rootvp"); mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); mp->mnt_op = &msdosfs_vfsops; mp->mnt_flag = 0; LIST_INIT(&mp->mnt_vnodelist); args.flags = 0; args.uid = 0; args.gid = 0; args.mask = 0777; if ((error = mountmsdosfs(rootvp, mp, p, &args)) != 0) { free(mp, M_MOUNT); return (error); } if ((error = update_mp(mp, &args)) != 0) { (void)msdosfs_unmount(mp, 0, p); free(mp, M_MOUNT); return (error); } if ((error = vfs_lock(mp)) != 0) { (void)msdosfs_unmount(mp, 0, p); free(mp, M_MOUNT); return (error); } CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); mp->mnt_vnodecovered = NULLVP; (void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void)msdosfs_statfs(mp, &mp->mnt_stat, p); vfs_unlock(mp); return (0); } #endif /* * mp - path - addr in user space of mount point (ie /usr or whatever) * data - addr in user space of mount params including the name of the block * special file to treat as a filesystem. */ static int msdosfs_mount(mp, path, data, ndp, p) struct mount *mp; char *path; caddr_t data; struct nameidata *ndp; struct proc *p; { struct vnode *devvp; /* vnode for blk device to mount */ struct msdosfs_args args; /* will hold data from mount request */ /* msdosfs specific mount control block */ struct msdosfsmount *pmp = NULL; size_t size; int error, flags; mode_t accessmode; error = copyin(data, (caddr_t)&args, sizeof(struct msdosfs_args)); if (error) return (error); if (args.magic != MSDOSFS_ARGSMAGIC) { printf("Old mount_msdosfs, flags=%d\n", args.flags); args.flags = 0; } /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { pmp = VFSTOMSDOSFS(mp); error = 0; if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = vflush(mp, NULLVP, flags); } if (!error && (mp->mnt_flag & MNT_RELOAD)) /* not yet implemented */ error = EOPNOTSUPP; if (error) return (error); if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ if (p->p_ucred->cr_uid != 0) { devvp = pmp->pm_devvp; vn_lock(devvp, LK_EXCLUSIVE, p); error = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p); if (error) { VOP_UNLOCK(devvp, 0, p); return (error); } VOP_UNLOCK(devvp, 0, p); } pmp->pm_flags &= ~MSDOSFSMNT_RONLY; } if (args.fspec == 0) { #ifdef __notyet__ /* doesn't work correctly with current mountd XXX */ if (args.flags & MSDOSFSMNT_MNTOPT) { pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT; pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT; if (pmp->pm_flags & MSDOSFSMNT_NOWIN95) pmp->pm_flags |= MSDOSFSMNT_SHORTNAME; } #endif /* * Process export requests. */ return (vfs_export(mp, &pmp->pm_export, &args.export)); } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); error = namei(ndp); if (error) return (error); devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { vrele(devvp); return (ENOTBLK); } if (major(devvp->v_rdev) >= nblkdev) { vrele(devvp); return (ENXIO); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ if (p->p_ucred->cr_uid != 0) { accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); if (error) { vput(devvp); return (error); } VOP_UNLOCK(devvp, 0, p); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = mountmsdosfs(devvp, mp, p, &args); #ifdef MSDOSFS_DEBUG /* only needed for the printf below */ pmp = VFSTOMSDOSFS(mp); #endif } else { if (devvp != pmp->pm_devvp) error = EINVAL; /* XXX needs translation */ else vrele(devvp); } if (error) { vrele(devvp); return (error); } error = update_mp(mp, &args); if (error) { msdosfs_unmount(mp, MNT_FORCE, p); return error; } (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void) msdosfs_statfs(mp, &mp->mnt_stat, p); #ifdef MSDOSFS_DEBUG printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap); #endif return (0); } static int mountmsdosfs(devvp, mp, p, argp) struct vnode *devvp; struct mount *mp; struct proc *p; struct msdosfs_args *argp; { struct msdosfsmount *pmp; struct buf *bp; dev_t dev = devvp->v_rdev; #ifndef __FreeBSD__ struct partinfo dpart; #endif union bootsector *bsp; struct byte_bpb33 *b33; struct byte_bpb50 *b50; #ifdef PC98 u_int pc98_wrk; u_int Phy_Sector_Size; #endif struct byte_bpb710 *b710; u_int8_t SecPerClust; int ronly, error; int bsize = 0, dtype = 0, tmp; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ error = vfs_mountedon(devvp); if (error) return (error); if (vcount(devvp) > 1 && devvp != rootvp) return (EBUSY); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0); VOP_UNLOCK(devvp, 0, p); if (error) return (error); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); if (error) return (error); bp = NULL; /* both used in error_exit */ pmp = NULL; #ifndef __FreeBSD__ if (argp->flags & MSDOSFSMNT_GEMDOSFS) { /* * We need the disklabel to calculate the size of a FAT entry * later on. Also make sure the partition contains a filesystem * of type FS_MSDOS. This doesn't work for floppies, so we have * to check for them too. * * At least some parts of the msdos fs driver seem to assume * that the size of a disk block will always be 512 bytes. * Let's check it... */ error = VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p); if (error) goto error_exit; tmp = dpart.part->p_fstype; dtype = dpart.disklab->d_type; bsize = dpart.disklab->d_secsize; if (bsize != 512 || (dtype!=DTYPE_FLOPPY && tmp!=FS_MSDOS)) { error = EINVAL; goto error_exit; } } #endif /* * Read the boot sector of the filesystem, and then check the * boot signature. If not a dos boot sector then error out. */ #ifdef PC98 devvp->v_flag &= 0xffff; error = bread(devvp, 0, 1024, NOCRED, &bp); #else error = bread(devvp, 0, 512, NOCRED, &bp); #endif if (error) goto error_exit; bp->b_flags |= B_AGE; bsp = (union bootsector *)bp->b_data; b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB; b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB; b710 = (struct byte_bpb710 *)bsp->bs710.bsPBP; #ifndef __FreeBSD__ if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) { #endif #ifdef PC98 if ((bsp->bs50.bsBootSectSig0 != BOOTSIG0 || bsp->bs50.bsBootSectSig1 != BOOTSIG1) && (bsp->bs50.bsBootSectSig0 != 0 /* PC98 DOS 3.3x */ || bsp->bs50.bsBootSectSig1 != 0) && (bsp->bs50.bsBootSectSig0 != 0x90 /* PC98 DOS 5.0 */ || bsp->bs50.bsBootSectSig1 != 0x3d) && (bsp->bs50.bsBootSectSig0 != 0x46 /* PC98 DOS 3.3B */ || bsp->bs50.bsBootSectSig1 != 0xfa)) { #else if (bsp->bs50.bsBootSectSig0 != BOOTSIG0 || bsp->bs50.bsBootSectSig1 != BOOTSIG1) { #endif error = EINVAL; goto error_exit; } #ifndef __FreeBSD__ } #endif pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK); bzero((caddr_t)pmp, sizeof *pmp); pmp->pm_mountp = mp; /* * Compute several useful quantities from the bpb in the * bootsector. Copy in the dos 5 variant of the bpb then fix up * the fields that are different between dos 5 and dos 3.3. */ SecPerClust = b50->bpbSecPerClust; pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec); pmp->pm_ResSectors = getushort(b50->bpbResSectors); pmp->pm_FATs = b50->bpbFATs; pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts); pmp->pm_Sectors = getushort(b50->bpbSectors); pmp->pm_FATsecs = getushort(b50->bpbFATsecs); pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack); pmp->pm_Heads = getushort(b50->bpbHeads); pmp->pm_Media = b50->bpbMedia; #ifndef __FreeBSD__ if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) { #endif /* XXX - We should probably check more values here */ if (!pmp->pm_BytesPerSec || !SecPerClust || !pmp->pm_Heads || pmp->pm_Heads > 255 #ifdef PC98 || !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 255) { #else || !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 63) { #endif error = EINVAL; goto error_exit; } #ifndef __FreeBSD__ } #endif if (pmp->pm_Sectors == 0) { pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs); pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors); } else { pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs); pmp->pm_HugeSectors = pmp->pm_Sectors; } #ifdef PC98 /* for PC98 added Satoshi Yasuda */ Phy_Sector_Size = 512; if ((devvp->v_rdev>>8) == 2) { /* floppy check */ if (((devvp->v_rdev&077) == 2) && (pmp->pm_HugeSectors == 1232)) { Phy_Sector_Size = 1024; /* 2HD */ /* * 1024byte/sector support */ devvp->v_flag |= 0x10000; } else { if ((((devvp->v_rdev&077) == 3) /* 2DD 8 or 9 sector */ && (pmp->pm_HugeSectors == 1440)) /* 9 sector */ || (((devvp->v_rdev&077) == 4) && (pmp->pm_HugeSectors == 1280)) /* 8 sector */ || (((devvp->v_rdev&077) == 5) && (pmp->pm_HugeSectors == 2880))) { /* 1.44M */ Phy_Sector_Size = 512; } else { if (((devvp->v_rdev&077) != 1) && ((devvp->v_rdev&077) != 0)) { /* 2HC */ error = EINVAL; goto error_exit; } } } } pc98_wrk = pmp->pm_BytesPerSec / Phy_Sector_Size; pmp->pm_BytesPerSec = Phy_Sector_Size; SecPerClust = SecPerClust * pc98_wrk; pmp->pm_HugeSectors = pmp->pm_HugeSectors * pc98_wrk; pmp->pm_ResSectors = pmp->pm_ResSectors * pc98_wrk; pmp->pm_FATsecs = pmp->pm_FATsecs * pc98_wrk; pmp->pm_SecPerTrack = pmp->pm_SecPerTrack * pc98_wrk; pmp->pm_HiddenSects = pmp->pm_HiddenSects * pc98_wrk; #endif /* */ if (pmp->pm_HugeSectors > 0xffffffff / pmp->pm_BytesPerSec + 1) { /* * We cannot deal currently with this size of disk * due to fileid limitations (see msdosfs_getattr and * msdosfs_readdir) */ error = EINVAL; goto error_exit; } if (pmp->pm_RootDirEnts == 0) { if (bsp->bs710.bsBootSectSig2 != BOOTSIG2 || bsp->bs710.bsBootSectSig3 != BOOTSIG3 || pmp->pm_Sectors || pmp->pm_FATsecs || getushort(b710->bpbFSVers)) { error = EINVAL; goto error_exit; } pmp->pm_fatmask = FAT32_MASK; pmp->pm_fatmult = 4; pmp->pm_fatdiv = 1; pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs); if (getushort(b710->bpbExtFlags) & FATMIRROR) pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM; else pmp->pm_flags |= MSDOSFS_FATMIRROR; } else pmp->pm_flags |= MSDOSFS_FATMIRROR; #ifndef __FreeBSD__ if (argp->flags & MSDOSFSMNT_GEMDOSFS) { if (FAT32(pmp)) { /* * GEMDOS doesn't know fat32. */ error = EINVAL; goto error_exit; } /* * Check a few values (could do some more): * - logical sector size: power of 2, >= block size * - sectors per cluster: power of 2, >= 1 * - number of sectors: >= 1, <= size of partition */ if ( (SecPerClust == 0) || (SecPerClust & (SecPerClust - 1)) || (pmp->pm_BytesPerSec < bsize) || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1)) || (pmp->pm_HugeSectors == 0) || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / bsize) > dpart.part->p_size) ) { error = EINVAL; goto error_exit; } /* * XXX - Many parts of the msdos fs driver seem to assume that * the number of bytes per logical sector (BytesPerSec) will * always be the same as the number of bytes per disk block * Let's pretend it is. */ tmp = pmp->pm_BytesPerSec / bsize; pmp->pm_BytesPerSec = bsize; pmp->pm_HugeSectors *= tmp; pmp->pm_HiddenSects *= tmp; pmp->pm_ResSectors *= tmp; pmp->pm_Sectors *= tmp; pmp->pm_FATsecs *= tmp; SecPerClust *= tmp; } #endif pmp->pm_fatblk = pmp->pm_ResSectors; if (FAT32(pmp)) { pmp->pm_rootdirblk = getulong(b710->bpbRootClust); pmp->pm_firstcluster = pmp->pm_fatblk + (pmp->pm_FATs * pmp->pm_FATsecs); pmp->pm_fsinfo = getushort(b710->bpbFSInfo); } else { pmp->pm_rootdirblk = pmp->pm_fatblk + (pmp->pm_FATs * pmp->pm_FATsecs); pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry) + pmp->pm_BytesPerSec - 1) / pmp->pm_BytesPerSec;/* in sectors */ pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize; } pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) / SecPerClust; pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1; pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec; #ifndef __FreeBSD__ if (argp->flags & MSDOSFSMNT_GEMDOSFS) { if ((pmp->pm_nmbrofclusters <= (0xff0 - 2)) && ((dtype == DTYPE_FLOPPY) || ((dtype == DTYPE_VNODE) && ((pmp->pm_Heads == 1) || (pmp->pm_Heads == 2)))) ) { pmp->pm_fatmask = FAT12_MASK; pmp->pm_fatmult = 3; pmp->pm_fatdiv = 2; } else { pmp->pm_fatmask = FAT16_MASK; pmp->pm_fatmult = 2; pmp->pm_fatdiv = 1; } } else #endif if (pmp->pm_fatmask == 0) { if (pmp->pm_maxcluster <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) { /* * This will usually be a floppy disk. This size makes * sure that one fat entry will not be split across * multiple blocks. */ pmp->pm_fatmask = FAT12_MASK; pmp->pm_fatmult = 3; pmp->pm_fatdiv = 2; } else { pmp->pm_fatmask = FAT16_MASK; pmp->pm_fatmult = 2; pmp->pm_fatdiv = 1; } } if (FAT12(pmp)) pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec; else pmp->pm_fatblocksize = MAXBSIZE; pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec; pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1; /* * Compute mask and shift value for isolating cluster relative byte * offsets and cluster numbers from a file offset. */ pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec; pmp->pm_crbomask = pmp->pm_bpcluster - 1; pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1; /* * Check for valid cluster size * must be a power of 2 */ if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) { error = EINVAL; goto error_exit; } /* * Release the bootsector buffer. */ brelse(bp); bp = NULL; /* * Check FSInfo. */ if (pmp->pm_fsinfo) { struct fsinfo *fp; if ((error = bread(devvp, pmp->pm_fsinfo, 1024, NOCRED, &bp)) != 0) goto error_exit; fp = (struct fsinfo *)bp->b_data; if (!bcmp(fp->fsisig1, "RRaA", 4) && !bcmp(fp->fsisig2, "rrAa", 4) && !bcmp(fp->fsisig3, "\0\0\125\252", 4) && !bcmp(fp->fsisig4, "\0\0\125\252", 4)) pmp->pm_nxtfree = getulong(fp->fsinxtfree); else pmp->pm_fsinfo = 0; brelse(bp); bp = NULL; } /* * Check and validate (or perhaps invalidate?) the fsinfo structure? XXX */ /* * Allocate memory for the bitmap of allocated clusters, and then * fill it in. */ pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS - 1) / N_INUSEBITS) * sizeof(*pmp->pm_inusemap), M_MSDOSFSFAT, M_WAITOK); /* * fillinusemap() needs pm_devvp. */ pmp->pm_dev = dev; pmp->pm_devvp = devvp; /* * Have the inuse map filled in. */ if ((error = fillinusemap(pmp)) != 0) goto error_exit; /* * If they want fat updates to be synchronous then let them suffer * the performance degradation in exchange for the on disk copy of * the fat being correct just about all the time. I suppose this * would be a good thing to turn on if the kernel is still flakey. */ if (mp->mnt_flag & MNT_SYNCHRONOUS) pmp->pm_flags |= MSDOSFSMNT_WAITONFAT; /* * Finish up. */ if (ronly) pmp->pm_flags |= MSDOSFSMNT_RONLY; else pmp->pm_fmod = 1; mp->mnt_data = (qaddr_t) pmp; mp->mnt_stat.f_fsid.val[0] = (long)dev; mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_flag |= MNT_LOCAL; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; return 0; error_exit: if (bp) brelse(bp); (void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, p); if (pmp) { if (pmp->pm_inusemap) free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = (qaddr_t)0; } return (error); } static int msdosfs_start(mp, flags, p) struct mount *mp; int flags; struct proc *p; { return (0); } /* * Unmount the filesystem described by mp. */ static int msdosfs_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { struct msdosfsmount *pmp; int error, flags; flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; error = vflush(mp, NULLVP, flags); if (error) return error; pmp = VFSTOMSDOSFS(mp); - pmp->pm_devvp->v_specflags &= ~SI_MOUNTEDON; + pmp->pm_devvp->v_specmountpoint = NULL; #ifdef MSDOSFS_DEBUG { struct vnode *vp = pmp->pm_devvp; printf("msdosfs_umount(): just before calling VOP_CLOSE()\n"); printf("flag %08lx, usecount %d, writecount %d, holdcnt %ld\n", vp->v_flag, vp->v_usecount, vp->v_writecount, vp->v_holdcnt); printf("lastr %d, id %lu, mount %p, op %p\n", vp->v_lastr, vp->v_id, vp->v_mount, vp->v_op); printf("freef %p, freeb %p, mount %p\n", vp->v_freelist.tqe_next, vp->v_freelist.tqe_prev, vp->v_mount); printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n", vp->v_cleanblkhd.lh_first, vp->v_dirtyblkhd.lh_first, vp->v_numoutput, vp->v_type); printf("union %p, tag %d, data[0] %08x, data[1] %08x\n", vp->v_socket, vp->v_tag, ((u_int *)vp->v_data)[0], ((u_int *)vp->v_data)[1]); } #endif - error = VOP_CLOSE(pmp->pm_devvp, (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE, - NOCRED, p); + error = VOP_CLOSE(pmp->pm_devvp, + (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE, + NOCRED, p); vrele(pmp->pm_devvp); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } static int msdosfs_root(mp, vpp) struct mount *mp; struct vnode **vpp; { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); struct denode *ndep; int error; #ifdef MSDOSFS_DEBUG printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp); #endif error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep); if (error) return (error); *vpp = DETOV(ndep); return (0); } static int msdosfs_quotactl(mp, cmds, uid, arg, p) struct mount *mp; int cmds; uid_t uid; caddr_t arg; struct proc *p; { return EOPNOTSUPP; } static int msdosfs_statfs(mp, sbp, p) struct mount *mp; struct statfs *sbp; struct proc *p; { struct msdosfsmount *pmp; pmp = VFSTOMSDOSFS(mp); sbp->f_bsize = pmp->pm_bpcluster; sbp->f_iosize = pmp->pm_bpcluster; sbp->f_blocks = pmp->pm_nmbrofclusters; sbp->f_bfree = pmp->pm_freeclustercount; sbp->f_bavail = pmp->pm_freeclustercount; sbp->f_files = pmp->pm_RootDirEnts; /* XXX */ sbp->f_ffree = 0; /* what to put in here? */ if (sbp != &mp->mnt_stat) { sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } static int msdosfs_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { struct vnode *vp, *nvp; struct denode *dep; struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error, allerror = 0; /* * If we ever switch to not updating all of the fats all the time, * this would be the place to update them from the first one. */ if (pmp->pm_fmod != 0) if (pmp->pm_flags & MSDOSFSMNT_RONLY) panic("msdosfs_sync: rofs mod"); else { /* update fats here */ } /* * Write back each (modified) denode. */ simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { /* * If the vnode that we are about to sync is no longer * assoicated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; simple_lock(&vp->v_interlock); nvp = vp->v_mntvnodes.le_next; dep = VTODE(vp); - if (vp->v_type == VNON || ((dep->de_flag & - (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) - && vp->v_dirtyblkhd.lh_first == NULL) { + if (vp->v_type == VNON + || (waitfor == MNT_LAZY) /* can this happen with msdosfs? */ + || (((dep->de_flag & + (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) + && (vp->v_dirtyblkhd.lh_first == NULL))) { simple_unlock(&vp->v_interlock); continue; } simple_unlock(&mntvnode_slock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); if (error) { simple_lock(&mntvnode_slock); if (error == ENOENT) goto loop; continue; } error = VOP_FSYNC(vp, cred, waitfor, p); if (error) allerror = error; VOP_UNLOCK(vp, 0, p); vrele(vp); /* done with this one */ simple_lock(&mntvnode_slock); } simple_unlock(&mntvnode_slock); /* * Flush filesystem control info. */ error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor, p); if (error) allerror = error; return (allerror); } static int msdosfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) struct mount *mp; struct fid *fhp; struct sockaddr *nam; struct vnode **vpp; int *exflagsp; struct ucred **credanonp; { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); struct defid *defhp = (struct defid *) fhp; struct denode *dep; struct netcred *np; int error; np = vfs_export_lookup(mp, &pmp->pm_export, nam); if (np == NULL) return (EACCES); error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep); if (error) { *vpp = NULLVP; return (error); } *vpp = DETOV(dep); *exflagsp = np->netc_exflags; *credanonp = &np->netc_anon; return (0); } static int msdosfs_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { struct denode *dep; struct defid *defhp; dep = VTODE(vp); defhp = (struct defid *)fhp; defhp->defid_len = sizeof(struct defid); defhp->defid_dirclust = dep->de_dirclust; defhp->defid_dirofs = dep->de_diroffset; /* defhp->defid_gen = dep->de_gen; */ return (0); } static int msdosfs_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { return EOPNOTSUPP; } static struct vfsops msdosfs_vfsops = { msdosfs_mount, msdosfs_start, msdosfs_unmount, msdosfs_root, msdosfs_quotactl, msdosfs_statfs, msdosfs_sync, msdosfs_vget, vfs_vrele, msdosfs_fhtovp, msdosfs_vptofh, msdosfs_init }; VFS_SET(msdosfs_vfsops, msdos, MOUNT_MSDOS, 0); Index: head/sys/fs/specfs/spec_vnops.c =================================================================== --- head/sys/fs/specfs/spec_vnops.c (revision 34265) +++ head/sys/fs/specfs/spec_vnops.c (revision 34266) @@ -1,911 +1,917 @@ /* * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 - * $Id: spec_vnops.c,v 1.58 1998/03/07 21:35:52 dyson Exp $ + * $Id: spec_vnops.c,v 1.59 1998/03/08 08:46:18 dyson Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int spec_getattr __P((struct vop_getattr_args *)); static int spec_badop __P((void)); static int spec_strategy __P((struct vop_strategy_args *)); static int spec_print __P((struct vop_print_args *)); static int spec_lookup __P((struct vop_lookup_args *)); static int spec_open __P((struct vop_open_args *)); static int spec_close __P((struct vop_close_args *)); static int spec_read __P((struct vop_read_args *)); static int spec_write __P((struct vop_write_args *)); static int spec_ioctl __P((struct vop_ioctl_args *)); static int spec_poll __P((struct vop_poll_args *)); static int spec_inactive __P((struct vop_inactive_args *)); static int spec_fsync __P((struct vop_fsync_args *)); static int spec_bmap __P((struct vop_bmap_args *)); static int spec_advlock __P((struct vop_advlock_args *)); static int spec_getpages __P((struct vop_getpages_args *)); struct vnode *speclisth[SPECHSZ]; vop_t **spec_vnodeop_p; static struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_access_desc, (vop_t *) vop_ebadf }, { &vop_advlock_desc, (vop_t *) spec_advlock }, { &vop_bmap_desc, (vop_t *) spec_bmap }, { &vop_close_desc, (vop_t *) spec_close }, { &vop_create_desc, (vop_t *) spec_badop }, { &vop_fsync_desc, (vop_t *) spec_fsync }, { &vop_getattr_desc, (vop_t *) spec_getattr }, { &vop_getpages_desc, (vop_t *) spec_getpages }, { &vop_inactive_desc, (vop_t *) spec_inactive }, { &vop_ioctl_desc, (vop_t *) spec_ioctl }, { &vop_lease_desc, (vop_t *) vop_null }, { &vop_link_desc, (vop_t *) spec_badop }, { &vop_lookup_desc, (vop_t *) spec_lookup }, { &vop_mkdir_desc, (vop_t *) spec_badop }, { &vop_mknod_desc, (vop_t *) spec_badop }, { &vop_open_desc, (vop_t *) spec_open }, { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, { &vop_poll_desc, (vop_t *) spec_poll }, { &vop_print_desc, (vop_t *) spec_print }, { &vop_read_desc, (vop_t *) spec_read }, { &vop_readdir_desc, (vop_t *) spec_badop }, { &vop_readlink_desc, (vop_t *) spec_badop }, { &vop_reallocblks_desc, (vop_t *) spec_badop }, { &vop_reclaim_desc, (vop_t *) vop_null }, { &vop_remove_desc, (vop_t *) spec_badop }, { &vop_rename_desc, (vop_t *) spec_badop }, { &vop_rmdir_desc, (vop_t *) spec_badop }, { &vop_setattr_desc, (vop_t *) vop_ebadf }, { &vop_strategy_desc, (vop_t *) spec_strategy }, { &vop_symlink_desc, (vop_t *) spec_badop }, { &vop_write_desc, (vop_t *) spec_write }, { NULL, NULL } }; static struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; VNODEOP_SET(spec_vnodeop_opv_desc); int spec_vnoperate(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap)); } static void spec_getpages_iodone __P((struct buf *bp)); /* * Trivial lookup routine that always fails. */ static int spec_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * Open a special file. */ /* ARGSUSED */ static int spec_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct proc *p = ap->a_p; struct vnode *bvp, *vp = ap->a_vp; dev_t bdev, dev = (dev_t)vp->v_rdev; int maj = major(dev); int error; /* * Don't allow open if fs is mounted -nodev. */ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) return (ENXIO); switch (vp->v_type) { case VCHR: if ((u_int)maj >= nchrdev) return (ENXIO); if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL)) return ENXIO; if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { /* * When running in very secure mode, do not allow * opens for writing of any disk character devices. */ if (securelevel >= 2 && cdevsw[maj]->d_bdev && (cdevsw[maj]->d_bdev->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); /* * When running in secure mode, do not allow opens * for writing of /dev/mem, /dev/kmem, or character * devices whose corresponding block devices are * currently mounted. */ if (securelevel >= 1) { if ((bdev = chrtoblk(dev)) != NODEV && vfinddev(bdev, VBLK, &bvp) && bvp->v_usecount > 0 && (error = vfs_mountedon(bvp))) return (error); if (iskmemdev(dev)) return (EPERM); } } #if 0 /* * Lite2 stuff. We will almost certainly do this * differently with devfs. The only use of this flag * is in dead_read to make ttys return EOF instead of * EIO when they are dead. Pre-lite2 FreeBSD returns * EOF for all character devices. */ if (cdevsw[maj]->d_type == D_TTY) vp->v_flag |= VISTTY; #endif VOP_UNLOCK(vp, 0, p); error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if ((u_int)maj >= nblkdev) return (ENXIO); if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL)) return ENXIO; /* * When running in very secure mode, do not allow * opens for writing of any disk block devices. */ if (securelevel >= 2 && ap->a_cred != FSCRED && (ap->a_mode & FWRITE) && (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); /* * Do not allow opens of block devices that are * currently mounted. */ error = vfs_mountedon(vp); if (error) return (error); return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p)); } return (0); } /* * Vnode op for read */ /* ARGSUSED */ static int spec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn, nextbn; long bsize, bscale; struct partinfo dpart; int n, on, majordev; d_ioctl_t *ioctl; int error = 0; dev_t dev; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_read proc"); #endif if (uio->uio_resid == 0) return (0); switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)]->d_read) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; dev = vp->v_rdev; if ((majordev = major(dev)) < nblkdev && (ioctl = bdevsw[majordev]->d_ioctl) != NULL && (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; bscale = btodb(bsize); do { bn = btodb(uio->uio_offset) & ~(bscale - 1); on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (vp->v_lastr + bscale == bn) { nextbn = bn + bscale; error = breadn(vp, bn, (int)bsize, &nextbn, (int *)&bsize, 1, NOCRED, &bp); } else error = bread(vp, bn, (int)bsize, NOCRED, &bp); vp->v_lastr = bn; n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ } /* * Vnode op for write */ /* ARGSUSED */ static int spec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn; int bsize, blkmask; struct partinfo dpart; register int n, on; int error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)]->d_write) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { if (dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; } blkmask = btodb(bsize) - 1; do { bn = btodb(uio->uio_offset) & ~blkmask; on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (n == bsize) bp = getblk(vp, bn, bsize, 0, 0); else error = bread(vp, bn, bsize, NOCRED, &bp); n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); if (n + on == bsize) bawrite(bp); else bdwrite(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ } /* * Device ioctl operation. */ /* ARGSUSED */ static int spec_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { dev_t dev = ap->a_vp->v_rdev; switch (ap->a_vp->v_type) { case VCHR: return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); case VBLK: if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) if ((bdevsw[major(dev)]->d_flags & D_TYPEMASK) == D_TAPE) return (0); else return (1); return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); default: panic("spec_ioctl"); /* NOTREACHED */ } } /* ARGSUSED */ static int spec_poll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register dev_t dev; switch (ap->a_vp->v_type) { case VCHR: dev = ap->a_vp->v_rdev; return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p); default: return (vop_defaultop((struct vop_generic_args *)ap)); } } /* * Synch buffers associated with a block device */ /* ARGSUSED */ static int spec_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct buf *bp; struct buf *nbp; int s; if (vp->v_type == VCHR) return (0); /* * Flush all dirty buffers associated with a block device. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("spec_fsync: not dirty"); if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) { vfs_bio_awrite(bp); splx(s); } else { bremfree(bp); bp->b_flags |= B_BUSY; splx(s); bawrite(bp); } goto loop; } if (ap->a_waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0); } #ifdef DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("spec_fsync: dirty", vp); splx(s); goto loop; } #endif } splx(s); return (0); } static int spec_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } /* * Just call the device strategy routine */ static int spec_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { + struct buf *bp; - (*bdevsw[major(ap->a_bp->b_dev)]->d_strategy)(ap->a_bp); + bp = ap->a_bp; + if ((LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start) + (*bioops.io_start)(bp); + (*bdevsw[major(bp->b_dev)]->d_strategy)(bp); return (0); } /* * This is a noop, simply returning what one has been given. */ static int spec_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * Device close routine */ /* ARGSUSED */ static int spec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; dev_t dev = vp->v_rdev; d_close_t *devclose; int mode, error; switch (vp->v_type) { case VCHR: /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ if (vcount(vp) == 2 && ap->a_p && (vp->v_flag & VXLOCK) == 0 && vp == ap->a_p->p_session->s_ttyvp) { vrele(vp); ap->a_p->p_session->s_ttyvp = NULL; } /* * If the vnode is locked, then we are in the midst * of forcably closing the device, otherwise we only * close on last reference. */ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) return (0); devclose = cdevsw[major(dev)]->d_close; mode = S_IFCHR; break; case VBLK: /* * On last close of a block device (that isn't mounted) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); + VOP_UNLOCK(vp, 0, ap->a_p); if (error) return (error); /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0) return (0); devclose = bdevsw[major(dev)]->d_close; mode = S_IFBLK; break; default: panic("spec_close: not special"); } return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); } /* * Print out the contents of a special device vnode. */ static int spec_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), minor(ap->a_vp->v_rdev)); return (0); } /* * Special device advisory byte-level locks. */ /* ARGSUSED */ static int spec_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); } /* * Special device bad operation */ static int spec_badop() { panic("spec_badop called"); /* NOTREACHED */ } static void spec_getpages_iodone(bp) struct buf *bp; { bp->b_flags |= B_DONE; wakeup(bp); } static int spec_getpages(ap) struct vop_getpages_args *ap; { vm_offset_t kva; int error; int i, pcount, size, s; daddr_t blkno; struct buf *bp; vm_page_t m; vm_ooffset_t offset; int toff, nextoff, nread; struct vnode *vp = ap->a_vp; int blksiz; int gotreqpage; error = 0; pcount = round_page(ap->a_count) / PAGE_SIZE; /* * Calculate the offset of the transfer. */ offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; /* XXX sanity check before we go into details. */ /* XXX limits should be defined elsewhere. */ #define DADDR_T_BIT 32 #define OFFSET_MAX ((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1) if (offset < 0 || offset > OFFSET_MAX) { /* XXX still no %q in kernel. */ printf("spec_getpages: preposterous offset 0x%x%08x\n", (u_int)((u_quad_t)offset >> 32), (u_int)(offset & 0xffffffff)); return (VM_PAGER_ERROR); } blkno = btodb(offset); /* * Round up physical size for real devices, use the * fundamental blocksize of the fs if possible. */ if (vp && vp->v_mount) blksiz = vp->v_mount->mnt_stat.f_bsize; else blksiz = DEV_BSIZE; size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); bp = getpbuf(); kva = (vm_offset_t)bp->b_data; /* * Map the pages to be read into the kva. */ pmap_qenter(kva, ap->a_m, pcount); /* Build a minimal buffer header. */ bp->b_flags = B_BUSY | B_READ | B_CALL; bp->b_iodone = spec_getpages_iodone; /* B_PHYS is not set, but it is nice to fill this in. */ bp->b_proc = curproc; bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; if (bp->b_rcred != NOCRED) crhold(bp->b_rcred); if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_blkno = blkno; bp->b_lblkno = blkno; pbgetvp(ap->a_vp, bp); bp->b_bcount = size; bp->b_bufsize = size; bp->b_resid = 0; cnt.v_vnodein++; cnt.v_vnodepgsin += pcount; /* Do the input. */ VOP_STRATEGY(bp); s = splbio(); /* We definitely need to be at splbio here. */ while ((bp->b_flags & B_DONE) == 0) tsleep(bp, PVM, "spread", 0); splx(s); if ((bp->b_flags & B_ERROR) != 0) { if (bp->b_error) error = bp->b_error; else error = EIO; } nread = size - bp->b_resid; if (nread < ap->a_count) { bzero((caddr_t)kva + nread, ap->a_count - nread); } pmap_qremove(kva, pcount); gotreqpage = 0; for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { nextoff = toff + PAGE_SIZE; m = ap->a_m[i]; m->flags &= ~PG_ZERO; if (nextoff <= nread) { m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else if (toff < nread) { int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); vm_page_set_validclean(m, 0, nvalid); } else { m->valid = 0; m->dirty = 0; } if (i != ap->a_reqpage) { /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error || (m->valid == VM_PAGE_BITS_ALL)) { if (m->valid) { if (m->flags & PG_WANTED) { vm_page_activate(m); } else { vm_page_deactivate(m); } PAGE_WAKEUP(m); } else { vm_page_free(m); } } else { vm_page_free(m); } } else if (m->valid) { gotreqpage = 1; } } if (!gotreqpage) { m = ap->a_m[ap->a_reqpage]; #ifndef MAX_PERF printf("spec_getpages: I/O read failure: (error code=%d)\n", error); printf(" size: %d, resid: %d, a_count: %d, valid: 0x%x\n", size, bp->b_resid, ap->a_count, m->valid); printf(" nread: %d, reqpage: %d, pindex: %d, pcount: %d\n", nread, ap->a_reqpage, m->pindex, pcount); #endif /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); return VM_PAGER_ERROR; } /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); return VM_PAGER_OK; } /* ARGSUSED */ static int spec_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vattr *vap = ap->a_vap; struct partinfo dpart; bzero(vap, sizeof (*vap)); if (vp->v_type == VBLK) vap->va_blocksize = BLKDEV_IOSIZE; else if (vp->v_type == VCHR) vap->va_blocksize = MAXBSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, ap->a_p) == 0) { vap->va_bytes = dbtob(dpart.disklab->d_partitions [minor(vp->v_rdev)].p_size); vap->va_size = vap->va_bytes; } return (0); } Index: head/sys/gnu/ext2fs/inode.h =================================================================== --- head/sys/gnu/ext2fs/inode.h (revision 34265) +++ head/sys/gnu/ext2fs/inode.h (revision 34266) @@ -1,172 +1,181 @@ /* * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)inode.h 8.9 (Berkeley) 5/14/95 - * $Id: inode.h,v 1.19 1997/12/05 13:43:47 jkh Exp $ + * $Id: inode.h,v 1.20 1998/01/30 11:34:02 phk Exp $ */ #ifndef _UFS_UFS_INODE_H_ #define _UFS_UFS_INODE_H_ #include #include /* + * The size of a logical block number. + */ +typedef long ufs_lbn_t; + +/* * This must agree with the definition in . */ #define doff_t int32_t /* * The inode is used to describe each active (or recently active) file in the * UFS filesystem. It is composed of two types of information. The first part * is the information that is needed only while the file is active (such as * the identity of the file and linkage to speed its lookup). The second part * is the permanent meta-data associated with the file which is read in * from the permanent dinode from long term storage when the file becomes * active, and is put back when the file is no longer being used. */ struct inode { struct lock i_lock; /* Inode lock. >Keep this first< */ LIST_ENTRY(inode) i_hash;/* Hash chain. */ struct vnode *i_vnode;/* Vnode associated with this inode. */ struct vnode *i_devvp;/* Vnode for block I/O. */ u_int32_t i_flag; /* flags, see below */ dev_t i_dev; /* Device associated with the inode. */ ino_t i_number; /* The identity of the inode. */ + int i_effnlink; /* i_nlink when I/O completes */ union { /* Associated filesystem. */ struct fs *fs; /* FFS */ struct ext2_sb_info *e2fs; /* EXT2FS */ } inode_u; #define i_fs inode_u.fs #define i_e2fs inode_u.e2fs struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ u_quad_t i_modrev; /* Revision level for NFS lease. */ struct lockf *i_lockf;/* Head of byte-level lock list. */ /* * Side effects; used during directory lookup. */ int32_t i_count; /* Size of free slot in directory. */ doff_t i_endoff; /* End of useful stuff in directory. */ doff_t i_diroff; /* Offset in dir, where we found last entry. */ doff_t i_offset; /* Offset of free space in directory. */ ino_t i_ino; /* Inode number of found directory. */ u_int32_t i_reclen; /* Size of found directory entry. */ int i_spare[5]; /* XXX actually non-spare (for ext2fs). */ /* * The on-disk dinode itself. */ struct dinode i_din; /* 128 bytes of the on-disk dinode. */ }; #define i_atime i_din.di_atime #define i_atimensec i_din.di_atimensec #define i_blocks i_din.di_blocks #define i_ctime i_din.di_ctime #define i_ctimensec i_din.di_ctimensec #define i_db i_din.di_db #define i_flags i_din.di_flags #define i_gen i_din.di_gen #define i_gid i_din.di_gid #define i_ib i_din.di_ib #define i_mode i_din.di_mode #define i_mtime i_din.di_mtime #define i_mtimensec i_din.di_mtimensec #define i_nlink i_din.di_nlink #define i_rdev i_din.di_rdev #define i_shortlink i_din.di_shortlink #define i_size i_din.di_size #define i_uid i_din.di_uid /* These flags are kept in i_flag. */ #define IN_ACCESS 0x0001 /* Access time update request. */ #define IN_CHANGE 0x0002 /* Inode change time update request. */ #define IN_UPDATE 0x0004 /* Modification time update request. */ #define IN_MODIFIED 0x0008 /* Inode has been modified. */ #define IN_RENAME 0x0010 /* Inode is being renamed. */ #define IN_SHLOCK 0x0020 /* File has shared lock. */ #define IN_EXLOCK 0x0040 /* File has exclusive lock. */ #define IN_HASHED 0x0080 /* Inode is on hash list */ #ifdef KERNEL /* * Structure used to pass around logical block paths generated by * ufs_getlbns and used by truncate and bmap code. */ struct indir { ufs_daddr_t in_lbn; /* Logical block number. */ int in_off; /* Offset in buffer. */ int in_exists; /* Flag if the block exists. */ }; /* Convert between inode pointers and vnode pointers. */ #define VTOI(vp) ((struct inode *)(vp)->v_data) #define ITOV(ip) ((ip)->i_vnode) /* * XXX this is too long to be a macro, and isn't used in any time-critical * place; in fact it is only used in ufs_vnops.c so it shouldn't be in a * header file. */ #define ITIMES(ip, t1, t2) { \ long tv_sec = time.tv_sec; \ if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \ (ip)->i_flag |= IN_MODIFIED; \ if ((ip)->i_flag & IN_ACCESS) \ (ip)->i_atime \ = ((t1) == &time ? tv_sec : (t1)->tv_sec); \ if ((ip)->i_flag & IN_UPDATE) { \ (ip)->i_mtime \ = ((t2) == &time ? tv_sec : (t2)->tv_sec); \ (ip)->i_modrev++; \ } \ if ((ip)->i_flag & IN_CHANGE) \ (ip)->i_ctime = tv_sec; \ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \ } \ } + +/* Determine if soft dependencies are being done */ +#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) /* This overlays the fid structure (see mount.h). */ struct ufid { u_int16_t ufid_len; /* Length of structure. */ u_int16_t ufid_pad; /* Force 32-bit alignment. */ ino_t ufid_ino; /* File number (ino). */ int32_t ufid_gen; /* Generation number. */ }; #endif /* KERNEL */ #endif /* !_UFS_UFS_INODE_H_ */ Index: head/sys/gnu/fs/ext2fs/inode.h =================================================================== --- head/sys/gnu/fs/ext2fs/inode.h (revision 34265) +++ head/sys/gnu/fs/ext2fs/inode.h (revision 34266) @@ -1,172 +1,181 @@ /* * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)inode.h 8.9 (Berkeley) 5/14/95 - * $Id: inode.h,v 1.19 1997/12/05 13:43:47 jkh Exp $ + * $Id: inode.h,v 1.20 1998/01/30 11:34:02 phk Exp $ */ #ifndef _UFS_UFS_INODE_H_ #define _UFS_UFS_INODE_H_ #include #include /* + * The size of a logical block number. + */ +typedef long ufs_lbn_t; + +/* * This must agree with the definition in . */ #define doff_t int32_t /* * The inode is used to describe each active (or recently active) file in the * UFS filesystem. It is composed of two types of information. The first part * is the information that is needed only while the file is active (such as * the identity of the file and linkage to speed its lookup). The second part * is the permanent meta-data associated with the file which is read in * from the permanent dinode from long term storage when the file becomes * active, and is put back when the file is no longer being used. */ struct inode { struct lock i_lock; /* Inode lock. >Keep this first< */ LIST_ENTRY(inode) i_hash;/* Hash chain. */ struct vnode *i_vnode;/* Vnode associated with this inode. */ struct vnode *i_devvp;/* Vnode for block I/O. */ u_int32_t i_flag; /* flags, see below */ dev_t i_dev; /* Device associated with the inode. */ ino_t i_number; /* The identity of the inode. */ + int i_effnlink; /* i_nlink when I/O completes */ union { /* Associated filesystem. */ struct fs *fs; /* FFS */ struct ext2_sb_info *e2fs; /* EXT2FS */ } inode_u; #define i_fs inode_u.fs #define i_e2fs inode_u.e2fs struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ u_quad_t i_modrev; /* Revision level for NFS lease. */ struct lockf *i_lockf;/* Head of byte-level lock list. */ /* * Side effects; used during directory lookup. */ int32_t i_count; /* Size of free slot in directory. */ doff_t i_endoff; /* End of useful stuff in directory. */ doff_t i_diroff; /* Offset in dir, where we found last entry. */ doff_t i_offset; /* Offset of free space in directory. */ ino_t i_ino; /* Inode number of found directory. */ u_int32_t i_reclen; /* Size of found directory entry. */ int i_spare[5]; /* XXX actually non-spare (for ext2fs). */ /* * The on-disk dinode itself. */ struct dinode i_din; /* 128 bytes of the on-disk dinode. */ }; #define i_atime i_din.di_atime #define i_atimensec i_din.di_atimensec #define i_blocks i_din.di_blocks #define i_ctime i_din.di_ctime #define i_ctimensec i_din.di_ctimensec #define i_db i_din.di_db #define i_flags i_din.di_flags #define i_gen i_din.di_gen #define i_gid i_din.di_gid #define i_ib i_din.di_ib #define i_mode i_din.di_mode #define i_mtime i_din.di_mtime #define i_mtimensec i_din.di_mtimensec #define i_nlink i_din.di_nlink #define i_rdev i_din.di_rdev #define i_shortlink i_din.di_shortlink #define i_size i_din.di_size #define i_uid i_din.di_uid /* These flags are kept in i_flag. */ #define IN_ACCESS 0x0001 /* Access time update request. */ #define IN_CHANGE 0x0002 /* Inode change time update request. */ #define IN_UPDATE 0x0004 /* Modification time update request. */ #define IN_MODIFIED 0x0008 /* Inode has been modified. */ #define IN_RENAME 0x0010 /* Inode is being renamed. */ #define IN_SHLOCK 0x0020 /* File has shared lock. */ #define IN_EXLOCK 0x0040 /* File has exclusive lock. */ #define IN_HASHED 0x0080 /* Inode is on hash list */ #ifdef KERNEL /* * Structure used to pass around logical block paths generated by * ufs_getlbns and used by truncate and bmap code. */ struct indir { ufs_daddr_t in_lbn; /* Logical block number. */ int in_off; /* Offset in buffer. */ int in_exists; /* Flag if the block exists. */ }; /* Convert between inode pointers and vnode pointers. */ #define VTOI(vp) ((struct inode *)(vp)->v_data) #define ITOV(ip) ((ip)->i_vnode) /* * XXX this is too long to be a macro, and isn't used in any time-critical * place; in fact it is only used in ufs_vnops.c so it shouldn't be in a * header file. */ #define ITIMES(ip, t1, t2) { \ long tv_sec = time.tv_sec; \ if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \ (ip)->i_flag |= IN_MODIFIED; \ if ((ip)->i_flag & IN_ACCESS) \ (ip)->i_atime \ = ((t1) == &time ? tv_sec : (t1)->tv_sec); \ if ((ip)->i_flag & IN_UPDATE) { \ (ip)->i_mtime \ = ((t2) == &time ? tv_sec : (t2)->tv_sec); \ (ip)->i_modrev++; \ } \ if ((ip)->i_flag & IN_CHANGE) \ (ip)->i_ctime = tv_sec; \ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \ } \ } + +/* Determine if soft dependencies are being done */ +#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) /* This overlays the fid structure (see mount.h). */ struct ufid { u_int16_t ufid_len; /* Length of structure. */ u_int16_t ufid_pad; /* Force 32-bit alignment. */ ino_t ufid_ino; /* File number (ino). */ int32_t ufid_gen; /* Generation number. */ }; #endif /* KERNEL */ #endif /* !_UFS_UFS_INODE_H_ */ Index: head/sys/i386/conf/LINT =================================================================== --- head/sys/i386/conf/LINT (revision 34265) +++ head/sys/i386/conf/LINT (revision 34266) @@ -1,1497 +1,1504 @@ # # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.412 1998/02/24 22:24:46 phk Exp $ +# $Id: LINT,v 1.413 1998/02/27 10:02:41 itojun Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from # this file as required. # # # This directive is mandatory; it defines the architecture to be # configured for; in this case, the 386 family based IBM-PC and # compatibles. # machine "i386" # # This is the ``identification'' of the kernel. Usually this should # be the same as the name of your kernel. # ident LINT # # The `maxusers' parameter controls the static sizing of a number of # internal system tables by a complicated formula defined in param.c. # maxusers 10 # # Certain applications can grow to be larger than the 128M limit # that FreeBSD initially imposes. Below are some options to # allow that limit to grow to 256MB, and can be increased further # with changing the parameters. MAXDSIZ is the maximum that the # limit can be set to, and the DFLDSIZ is the default value for # the limit. You might want to set the default lower than the # max, and explicitly set the maximum with a shell command for processes # that regularly exceed the limit like INND. # options "MAXDSIZ=(256*1024*1024)" options "DFLDSIZ=(256*1024*1024)" # When this is set, be extra conservative in various parts of the kernel # and choose functionality over speed (on the widest variety of systems). options FAILSAFE # This allows you to actually store this configuration file into # the kernel binary itself, where it may be later read by saying: # strings /kernel | grep ^___ | sed -e 's/^___//' > MYKERNEL # options INCLUDE_CONFIG_FILE # Include this file in kernel # # This directive defines a number of things: # - The compiled kernel is to be called `kernel' # - The root filesystem might be on partition wd0a # - Crash dumps will be written to wd0b, if possible. Specifying the # dump device here is not recommended. Use dumpon(8). # config kernel root on wd0 dumps on wd0 ##################################################################### # SMP OPTIONS: # # SMP enables building of a Symmetric MultiProcessor Kernel. # APIC_IO enables the use of the IO APIC for Symmetric I/O. # NCPU sets the number of CPUs, defaults to 2. # NBUS sets the number of busses, defaults to 4. # NAPIC sets the number of IO APICs on the motherboard, defaults to 1. # NINTR sets the total number of INTs provided by the motherboard. # # Notes: # # An SMP kernel will ONLY run on an Intel MP spec. qualified motherboard. # # Be sure to disable 'cpu "I386_CPU"' && 'cpu "I486_CPU"' for SMP kernels. # # Check the 'Rogue SMP hardware' section to see if additional options # are required by your hardware. # # Mandatory: options SMP # Symmetric MultiProcessor Kernel options APIC_IO # Symmetric (APIC) I/O # Optional, these are the defaults plus 1: options NCPU=5 # number of CPUs options NBUS=5 # number of busses options NAPIC=2 # number of IO APICs options NINTR=25 # number of INTs # # Rogue SMP hardware: # # Bridged PCI cards: # # The MP tables of most of the current generation MP motherboards # do NOT properly support bridged PCI cards. To use one of these # cards you should refer to ??? ##################################################################### # CPU OPTIONS # # You must specify at least one CPU (the one you intend to run on); # deleting the specification for CPUs you don't need to use may make # parts of the system run faster. This is especially true removing # I386_CPU. # cpu "I386_CPU" cpu "I486_CPU" cpu "I586_CPU" # aka Pentium(tm) cpu "I686_CPU" # aka Pentium Pro(tm) # # Options for CPU features. # # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM # BlueLightning CPU. It works only with Cyrix FPU, and this option # should not be used with Intel FPU. # # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning # CPU if CPU supports it. The default is double-clock mode on # BlueLightning CPU box. # # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1). # # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct # mapped mode. Default is 2-way set associative mode. # # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space # of Cyrix 6x86 and 6x86MX CPUs. If this option is not set and # FAILESAFE is defined, NO_LOCK bit of CCR1 is cleared. (NOTE 3) # # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables # reorder). This option should not be used if you use memory mapped # I/O device(s). # # CPU_FASTER_5X86_FPU enables faster FPU exception handler. # # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products # for i386 machines. # # CPU_IORT defines I/O clock delay time (NOTE 1). Default vaules of # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively # (no clock delay). # # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE # 1). # # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1). # # CPU_SUSP_HLT enables suspend on HALT. If this option is set, CPU # enters suspend mode following execution of HALT instruction. # # CPU_WT_ALLOC enables write-through allocation. # # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache # flush at hold state. # # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs # without cache flush at hold state, and (2) write-back CPU cache on # Cyrix 6x86 whose revision < 2.7 (NOTE 2). # # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is # executed. This should be included for ALL kernels that won't run # on a Pentium. # # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT, # CPU_LOOP_ENand CPU_RSTK_EN should no be used becasue of CPU bugs. # These options may crash your system. # # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled # in write-through mode when revision < 2.7. If revision of Cyrix # 6x86 >= 2.7, CPU cache is always enabled in write-back mode. # # NOTE 3: This option may cause failures for software that requires # locked cycles in order to operate correctly. # options "CPU_BLUELIGHTNING_FPU_OP_CACHE" options "CPU_BLUELIGHTNING_3X" options "CPU_BTB_EN" options "CPU_DIRECT_MAPPED_CACHE" options "CPU_DISABLE_5X86_LSSER" options "CPU_FASTER_5X86_FPU" options "CPU_I486_ON_386" options "CPU_IORT" options "CPU_LOOP_EN" options "CPU_RSTK_EN" options "CPU_SUSP_HLT" options "CYRIX_CACHE_WORKS" options "CYRIX_CACHE_REALLY_WORKS" #options "NO_F00F_HACK" # # A math emulator is mandatory if you wish to run on hardware which # does not have a floating-point processor. Pick either the original, # bogus (but freely-distributable) math emulator, or a much more # fully-featured but GPL-licensed emulator taken from Linux. # options MATH_EMULATE #Support for x87 emulation # Don't enable both of these in a real config. options GPL_MATH_EMULATE #Support for x87 emulation via #new math emulator ##################################################################### # COMPATIBILITY OPTIONS # # Implement system calls compatible with 4.3BSD and older versions of # FreeBSD. You probably do NOT want to remove this as much current code # still relies on the 4.3 emulation. # options "COMPAT_43" # # Allow user-mode programs to manipulate their local descriptor tables. # This option is required for the WINE Windows(tm) emulator, and is # not used by anything else (that we know of). # options USER_LDT #allow user-level control of i386 ldt # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG # # This option includes a MD5 routine in the kernel, this is used for # various authentication and privacy uses. # options "MD5" # # Allow processes to switch to vm86 mode, as well as enabling direct # user-mode access to the I/O port space. This option is necessary for # the doscmd emulator to run. # options "VM86" ##################################################################### # DEBUGGING OPTIONS # # Enable the kernel debugger. # options DDB # # Don't drop into DDB for a panic. Intended for unattended operation # where you may want to drop to DDB from the console, but still want # the machine to recover from a panic # options DDB_UNATTENDED # # If using GDB remote mode to debug the kernel, there's a non-standard # extension to the remote protocol that can be used to use the serial # port as both the debugging port and the system console. It's non- # standard and you're on your own if you enable it. See also the # "remotechat" variables in the FreeBSD specific version of gdb. # options GDB_REMOTE_CHAT # # KTRACE enables the system-call tracing facility ktrace(2). # options KTRACE #kernel tracing # # The DIAGNOSTIC option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options DIAGNOSTIC # # PERFMON causes the driver for Pentium/Pentium Pro performance counters # to be compiled. See perfmon(4) for more information. # options PERFMON # # This option let some drivers co-exist that can't co-exist in a running # system. This is used to be able to compile all kernel code in one go for # quality assurance purposes (like this file, which the option takes it name # from.) # options COMPILING_LINT # XXX - this doesn't belong here. # Allow ordinary users to take the console - this is useful for X. options UCONSOLE # XXX - this doesn't belong here either options USERCONFIG #boot -c editor options USERCONFIG_BOOT #imply -c and parse info area options VISUAL_USERCONFIG #visual boot -c editor ##################################################################### # NETWORKING OPTIONS # # Protocol families: # Only the INET (Internet) family is officially supported in FreeBSD. # Source code for the NS (Xerox Network Service) is provided for amusement # value. # options INET #Internet communications protocols options IPX #IPX/SPX communications protocols options IPXIP #IPX in IP encapsulation (not available) options IPTUNNEL #IP in IPX encapsulation (not available) options NETATALK #Appletalk communications protocols # These are currently broken but are shipped due to interest. #options NS #Xerox NS protocols # These are currently broken and are no longer shipped due to lack # of interest. #options CCITT #X.25 network layer #options ISO #options TPIP #ISO TP class 4 over IP #options TPCONS #ISO TP class 0 over X.25 #options LLC #X.25 link layer for Ethernets #options HDLC #X.25 link layer for serial lines #options EON #ISO CLNP over IP #options NSIP #XNS over IP # # Network interfaces: # The `loop' pseudo-device is MANDATORY when networking is enabled. # The `ether' pseudo-device provides generic code to handle # Ethernets; it is MANDATORY when a Ethernet device driver is # configured. # The 'fddi' pseudo-device provides generic code to support FDDI. # The `sppp' pseudo-device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). # The `sl' pseudo-device implements the Serial Line IP (SLIP) service. # The `ppp' pseudo-device implements the Point-to-Point Protocol. # The `bpfilter' pseudo-device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. The number of devices determines the maximum number of # simultaneous BPF clients programs runnable. # The `disc' pseudo-device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing purposes. # The `tun' pseudo-device implements the User Process PPP (iijppp) # # The PPP_BSDCOMP option enables support for compress(1) style entire # packet compression, the PPP_DEFLATE is for zlib/gzip style compression. # PPP_FILTER enables code for filtering the ppp data stream and selecting # events for resetting the demand dial activity timer - requires bpfilter. # See pppd(8) for more details. # pseudo-device ether #Generic Ethernet pseudo-device fddi #Generic FDDI pseudo-device sppp #Generic Synchronous PPP pseudo-device loop #Network loopback device pseudo-device bpfilter 4 #Berkeley packet filter pseudo-device disc #Discard device pseudo-device tun 1 #Tunnel driver (user process ppp(8)) pseudo-device sl 2 #Serial Line IP pseudo-device ppp 2 #Point-to-point protocol options PPP_BSDCOMP #PPP BSD-compress support options PPP_DEFLATE #PPP zlib/deflate/gzip support options PPP_FILTER #enable bpf filtering (needs bpfilter) # # Internet family options: # # TCP_COMPAT_42 causes the TCP code to emulate certain bugs present in # 4.2BSD. This option should not be used unless you have a 4.2BSD # machine and TCP connections fail. # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted(8). # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE sends # logged packets to the system logger. IPFIREWALL_VERBOSE_LIMIT # limits the number of times a matching entry can be logged. # # WARNING: IPFIREWALL defaults to a policy of "deny ip from any to any" # and if you do not add other rules during startup to allow access, # YOU WILL LOCK YOURSELF OUT. It is suggested that you set firewall=open # in /etc/rc.conf when first enabling this feature, then refining the # firewall rules in /etc/rc.firewall after you've tested that the new kernel # feature works properly. # # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to # allow everything. Use with care, if a cracker can crash your # firewall machine, they can get to your protected machines. However, # if you are using it as an as-needed filter for specific problems as # they arise, then this may be for you. Changing the default to 'allow' # means that you won't get stuck if the kernel and /sbin/ipfw binary get # out of sync. # # IPDIVERT enables the divert IP sockets, used by ``ipfw divert'' # # TCPDEBUG is undocumented. # options "TCP_COMPAT_42" #emulate 4.2BSD TCP bugs options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #print information about # dropped packets options "IPFIREWALL_VERBOSE_LIMIT=100" #limit verbosity options IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default options IPDIVERT #divert sockets options TCPDEBUG ##################################################################### # FILESYSTEM OPTIONS # # Only the root, /usr, and /tmp filesystems need be statically # compiled; everything else will be automatically loaded at mount # time. (Exception: the UFS family---FFS, and MFS --- cannot # currently be demand-loaded.) Some people still prefer to statically # compile other filesystems as well. # # NB: The NULL, PORTAL, UMAP and UNION filesystems are known to be # buggy, and WILL panic your system if you attempt to do anything with # them. They are included here as an incentive for some enterprising # soul to sit down and fix them. # # One of these is mandatory: options FFS #Fast filesystem options NFS #Network File System # The rest are optional: # options NFS_NOSERVER #Disable the NFS-server code. options "CD9660" #ISO 9660 filesystem options FDESC #File descriptor filesystem options KERNFS #Kernel filesystem options MFS #Memory File System options MSDOSFS #MS DOS File System options NULLFS #NULL filesystem options PORTAL #Portal filesystem options PROCFS #Process filesystem options UMAPFS #UID map filesystem options UNION #Union filesystem options "CD9660_ROOT" #CD-ROM usable as root device options FFS_ROOT #FFS usable as root device options NFS_ROOT #NFS usable as root device # This DEVFS is experimental but seems to work options DEVFS #devices filesystem + +# Allow the FFS to use Softupdates technology. +# To do this you need to fetch the two files +# /sys/ufs/ffs/softdep.h and /sys/ufs/ffs/ffs_softdep.c +# from freebsd.org and understand the licensing restrictions. +#options SOFTUPDATES +# (we can't actually enable it because the files may not be present) # Make space in the kernel for a MFS root filesystem. Define to the number # of kilobytes to reserve for the filesystem. options MFS_ROOT=10 # Allow the MFS_ROOT code to load the MFS image from floppy if it is missing. options MFS_AUTOLOAD # Allow this many swap-devices. options NSWAPDEV=20 # Disk quotas are supported when this option is enabled. If you # change the value of this option, you must do a `make clean' in your # kernel compile directory in order to get a working kernel. # options QUOTA #enable disk quotas # Add more checking code to various filesystems #options NULLFS_DIAGNOSTIC #options KERNFS_DIAGNOSTIC #options UMAPFS_DIAGNOSTIC #options UNION_DIAGNOSTIC # In particular multi-session CD-Rs might require a huge amount of # time in order to "settle". If we are about mounting them as the # root f/s, we gotta wait a little. # # The number is supposed to be in seconds. options "CD9660_ROOTDELAY=20" # If you are running a machine just as a fileserver for PC and MAC users. # (using SAMBA or Netatalk), then you may consider setting this option # and keeping all those user's directories on a partition that is mounted # with the suiddir option. This gives new files the same ownership as # the directory (similiar to group). It's a security hole if you let # these users run programs so confine it to file-servers, (but it'll save you # lots of headaches in that case). Root owned directories are excempt and X bits # are cleared. the suid bit must be set on the directory as well. see chmod(1) # PC owners can't see/set ownerships so they keep getting their toes # trodden on. This saves you all the support calls as the filesystem # it's used on will act as they expect. ("It's my dir so it must be my file"). # options SUIDDIR # Add some error checking code to the null_bypass routine # in the NULL filesystem #options SAFETY ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so # that a given bus, target, and LUN always come on line as the same # device unit. In earlier versions the unit numbers were assigned # in the order that the devices were probed on the SCSI bus. This # means that if you removed a disk drive, you may have had to rewrite # your /etc/fstab file, and also that you had to be careful when adding # a new disk as it may have been probed earlier and moved your device # configuration around. # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "sd3" then the first # non-wired disk will be assigned sd4. # The syntax for wiring down devices is: # controller scbus0 at ahc0 # Single bus device # controller scbus1 at ahc1 bus 0 # Single bus device # controller scbus3 at ahc2 bus 0 # Twin bus device # controller scbus2 at ahc2 bus 1 # Twin bus device # disk sd0 at scbus0 target 0 unit 0 # disk sd1 at scbus3 target 1 # disk sd2 at scbus2 target 3 # tape st1 at scbus1 target 6 # device cd0 at scbus? # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI # configuration and doesn't have to be explicitly configured. controller scbus0 #base SCSI code device ch0 #SCSI media changers device sd0 #SCSI disks device st0 #SCSI tapes device cd0 #SCSI CD-ROMs device od0 #SCSI optical disk # The previous devices (ch, sd, st, cd) are recognized by config. # config doesn't (and shouldn't) know about these newer ones, # so we have to specify that they are on a SCSI bus with the "at scbus?" # clause. device worm0 at scbus? # SCSI worm device pt0 at scbus? # SCSI processor type device sctarg0 at scbus? # SCSI target # SCSI OPTIONS: # SCSIDEBUG: When defined enables debugging macros # NO_SCSI_SENSE: When defined disables sense descriptions (about 4k) # SCSI_REPORT_GEOMETRY: Always report disk geometry at boot up instead # of only when booting verbosely. options SCSIDEBUG #options NO_SCSI_SENSE options SCSI_REPORT_GEOMETRY # Options for the `od' optical disk driver: # # If drive returns sense key as 0x02 with vendor specific additional # sense code (ASC) and additional sense code qualifier (ASCQ), or # illegal ASC and ASCQ. This cause an error (NOT READY) and retrying. # To suppress this, use the following option. # options OD_BOGUS_NOT_READY # # For an automatic spindown, try this. Again, preferably as an # option in your config file. # WARNING! Use at your own risk. Joerg's ancient SONY SMO drive # groks it fine, while Shunsuke's Fujitsu chokes on it and times # out. # options OD_AUTO_TURNOFF ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS # The `pty' device usually turns out to be ``effectively mandatory'', # as it is required for `telnetd', `rlogind', `screen', `emacs', and # `xterm', among others. pseudo-device pty 16 #Pseudo ttys - can go as high as 256 pseudo-device speaker #Play IBM BASIC-style noises out your speaker pseudo-device gzip #Exec gzipped a.out's pseudo-device vn #Vnode driver (turns a file into a device) pseudo-device snp 3 #Snoop device - to look at pty/vty/etc.. pseudo-device ccd 4 #Concatenated disk driver # These are only for watching for bitrot in old tty code. # broken #pseudo-device tb # These are only for watching for bitrot in old SCSI code. pseudo-device su #scsi user pseudo-device ssc #super scsi ##################################################################### # HARDWARE DEVICE CONFIGURATION # ISA and EISA devices: # EISA support is available for some device, so they can be auto-probed. # Micro Channel is not supported at all. # # Mandatory ISA devices: isa, npx # controller isa0 # # Options for `isa': # # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # This option breaks suspend/resume on some portables. # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # BOUNCE_BUFFERS provides support for ISA DMA on machines with more # than 16 megabytes of memory. It doesn't hurt on other machines. # Some broken EISA and VLB hardware may need this, too. # # MAXMEM specifies the amount of RAM on the machine; if this is not # specified, FreeBSD will first read the amount of memory from the CMOS # RAM, so the amount of memory will initially be limited to 64MB or 16MB # depending on the BIOS. If the BIOS reports 64MB, a memory probe will # then attempt to detect the installed amount of RAM. If this probe # fails to detect >64MB RAM you will have to use the MAXMEM option. # The amount is in kilobytes, so for a machine with 128MB of RAM, it would # be 131072 (128 * 1024). # # TUNE_1542 enables the automatic ISA bus speed selection for the # Adaptec 1542 boards. Does not work for all boards, use it with caution. # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. # # PAS_JOYSTICK_ENABLE enables the gameport on the ProAudio Spectrum options "AUTO_EOI_1" #options "AUTO_EOI_2" options BOUNCE_BUFFERS options "MAXMEM=(128*1024)" options "TUNE_1542" #options BROKEN_KEYBOARD_RESET #options PAS_JOYSTICK_ENABLE # Enable support for the kernel PLL to use an external PPS signal, # under supervision of [x]ntpd(8) # More info in ftp://ftp.udel.edu/pub/ntp/kernel.tar.Z options PPS_SYNC # Enable PnP support in the kernel. This allows you to automaticly # attach to PnP cards for drivers that support it and allows you to # configure cards from USERCONFIG. See pnp(4) for more info. controller pnp0 # The pcvt console driver (vt220 compatible). device vt0 at isa? port "IO_KBD" tty irq 1 vector pcrint options XSERVER # support for running an X server. options FAT_CURSOR # start with block cursor # This PCVT option is for keyboards such as those used on IBM ThinkPad laptops options PCVT_SCANSET=2 # IBM keyboards are non-std # The syscons console driver (sco color console compatible). device sc0 at isa? port "IO_KBD" tty irq 1 vector scintr options MAXCONS=16 # number of virtual consoles options SLOW_VGA # do byte-wide i/o's to TS and GDC regs options "STD8X16FONT" # Compile font in makeoptions "STD8X16FONT"="cp850" options SC_HISTORY_SIZE=200 # number of history buffer lines # # `flags' for sc0: # 0x01 Use a 'visual' bell # 0x02 Use a 'blink' cursor # 0x04 Use a 'underline' cursor # 0x06 Use a 'blinking underline' (destructive) cursor # 0x08 Force detection of keyboard, else we always assume a keyboard # 0x10 Old-style (XT) keyboard support, useful for older ThinkPads # 0x20 Don't reset keyboard, useful for some newer ThinkPads # # The Numeric Processing eXtension driver. This should be configured if # your machine has a math co-processor, unless the coprocessor is very # buggy. If it is not configured then you *must* configure math emulation # (see above). If both npx0 and emulation are configured, then only npx0 # is used (provided it works). device npx0 at isa? port "IO_NPX" iosiz 0x0 flags 0x0 irq 13 vector npxintr # # `flags' for npx0: # 0x01 don't use the npx registers to optimize bcopy # 0x02 don't use the npx registers to optimize bzero # 0x04 don't use the npx registers to optimize copyin or copyout. # The npx registers are normally used to optimize copying and zeroing when # all of the following conditions are satisfied: # "I586_CPU" is an option # the cpu is an i586 (perhaps not a Pentium) # the probe for npx0 succeeds # INT 16 exception handling works. # Then copying and zeroing using the npx registers is normally 30-100% faster. # The flags can be used to control cases where it doesn't work or is slower. # Setting them at boot time using userconfig works right (the optimizations # are not used until later in the bootstrap when npx0 is attached). # # # `iosiz' for npx0: # This can be used instead of the MAXMEM option to set the memory size. If # it is nonzero, then it overrides both the MAXMEM option and the memory # size reported by the BIOS. Setting it at boot time using userconfig takes # effect on the next reboot after the change has been recorded in the kernel # binary (the size is used early in the boot before userconfig has a chance # to change it). # # # Optional ISA and EISA devices: # # # SCSI host adapters: `aha', `aic', `bt', `nca' # # aha: Adaptec 154x # ahc: Adaptec 274x/284x/294x # aic: Adaptec 152x and sound cards using the Adaptec AIC-6360 (slow!) # bt: Most Buslogic controllers # nca: ProAudioSpectrum cards using the NCR 5380 or Trantor T130 # uha: UltraStore 14F and 34F # sea: Seagate ST01/02 8 bit controller (slow!) # wds: Western Digital WD7000 controller (no scatter/gather!). # # Note that the order is important in order for Buslogic cards to be # probed correctly. # controller bt0 at isa? port "IO_BT0" bio irq ? vector bt_isa_intr controller aha0 at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr controller uha0 at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr controller aic0 at isa? port 0x340 bio irq 11 vector aicintr controller nca0 at isa? port 0x1f88 bio irq 10 vector ncaintr controller nca1 at isa? port 0x1f84 controller nca2 at isa? port 0x1f8c controller nca3 at isa? port 0x1e88 controller nca4 at isa? port 0x350 bio irq 5 vector ncaintr controller sea0 at isa? bio irq 5 iomem 0xdc000 iosiz 0x2000 vector seaintr controller wds0 at isa? port 0x350 bio irq 15 drq 6 vector wdsintr # # ST-506, ESDI, and IDE hard disks: `wdc' and `wd' # # The flags fields are used to enable the multi-sector I/O and # the 32BIT I/O modes. The flags may be used in either the controller # definition or in the individual disk definitions. The controller # definition is supported for the boot configuration stuff. # # Each drive has a 16 bit flags value defined: # The low 8 bits are the maximum value for the multi-sector I/O, # where 0xff defaults to the maximum that the drive can handle. # The high bit of the 16 bit flags (0x8000) allows probing for # 32 bit transfers. Bit 14 (0x4000) enables a hack to wake # up powered-down laptop drives. Bit 13 (0x2000) allows # probing for PCI IDE DMA controllers, such as Intel's PIIX # south bridges. See the wd.4 man page. # # The flags field for the drives can be specified in the controller # specification with the low 16 bits for drive 0, and the high 16 bits # for drive 1. # e.g.: #controller wdc0 at isa? port "IO_WD1" bio irq 14 flags 0x00ff8004 vector wdintr # # specifies that drive 0 will be allowed to probe for 32 bit transfers and # a maximum multi-sector transfer of 4 sectors, and drive 1 will not be # allowed to probe for 32 bit transfers, but will allow multi-sector # transfers up to the maximum that the drive supports. # # If you are using a PCI controller that is not running in compatibility # mode (for example, it is a 2nd IDE PCI interface), then use config line(s) # such as: # #controller wdc2 at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr #disk wd4 at wdc2 drive 0 #disk wd5 at wdc2 drive 1 # #controller wdc3 at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr #disk wd6 at wdc3 drive 0 #disk wd7 at wdc3 drive 1 # # Note that the above config would be useful for a Promise card, when used # on a MB that already has a PIIX controller. Note the bogus irq and port # entries. These are automatically filled in by the IDE/PCI support. # controller wdc0 at isa? port "IO_WD1" bio irq 14 vector wdintr disk wd0 at wdc0 drive 0 disk wd1 at wdc0 drive 1 controller wdc1 at isa? port "IO_WD2" bio irq 15 vector wdintr disk wd2 at wdc1 drive 0 disk wd3 at wdc1 drive 1 # # Options for `wdc': # # CMD640 enables serializing access to primary and secondary channel # of the CMD640B IDE Chip. The serializing will only take place # if this option is set *and* the chip is probed by the pci-system. # options "CMD640" #Enable work around for CMD640 h/w bug # # ATAPI enables the support for ATAPI-compatible IDE devices # options ATAPI #Enable ATAPI support for IDE bus options ATAPI_STATIC #Don't do it as an LKM # IDE CD-ROM driver - requires wdc controller and ATAPI option device wcd0 # IDE floppy driver - requires wdc controller and ATAPI option device wfd0 # # Standard floppy disk controllers and floppy tapes: `fdc', `fd', and `ft' # controller fdc0 at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr # # FDC_DEBUG enables floppy debugging. Since the debug output is huge, you # gotta turn it actually on by setting the variable fd_debug with DDB, # however. options FDC_DEBUG # This option is undocumented on purpose. options FDC_PRINT_BOGUS_CHIPTYPE # # Activate this line instead of the fdc0 line above if you happen to # have an Insight floppy tape. Probing them proved to be dangerous # for people with floppy disks only, so it's "hidden" behind a flag: #controller fdc0 at isa? port "IO_FD1" bio flags 1 irq 6 drq 2 vector fdintr disk fd0 at fdc0 drive 0 disk fd1 at fdc0 drive 1 tape ft0 at fdc0 drive 2 # # Other standard PC hardware: `lpt', `mse', `psm', `sio', etc. # # lpt: printer port # lpt specials: # port can be specified as ?, this will cause the driver to scan # the BIOS port list; # the irq and vector clauses may be omitted, this # will force the port into polling mode. # mse: Logitech and ATI InPort bus mouse ports # psm: PS/2 mouse port [note: conflicts with sc0/vt0, thus "conflicts" keywd] # sio: serial ports (see sio(4)) device lpt0 at isa? port? tty irq 7 vector lptintr device lpt1 at isa? port "IO_LPT3" tty irq 5 vector lptintr device mse0 at isa? port 0x23c tty irq 5 vector mseintr device psm0 at isa? port "IO_KBD" conflicts tty irq 12 vector psmintr # Options for psm: options PSM_HOOKAPM #hook the APM resume event, useful #for some laptops options PSM_RESETAFTERSUSPEND #reset the device at the resume event device sio0 at isa? port "IO_COM1" tty flags 0x10 irq 4 vector siointr # # `flags' for serial drivers that support consoles (only for sio now): # 0x10 enable console support for this unit. The other console flags # are ignored unless this is set. Enabling console support does # not make the unit the preferred console - boot with -h or set # the 0x20 flag for that. Currently, at most one unit can have # console support; the first one (in config file order) with # this flag set is preferred. Setting this flag for sio0 gives # the old behaviour. # 0x20 force this unit to be the console (unless there is another # higher priority console). This replaces the COMCONSOLE option. # 0x40 reserve this unit for low level console operations. Do not # # PnP `flags' (set via userconfig using pnp x flags y) # 0x1 disable probing of this device. Used to prevent your modem # from being attached as a PnP modem. # # Options for serial drivers that support consoles (only for sio now): options BREAK_TO_DEBUGGER #a BREAK on a comconsole goes to #DDB, if available. options CONSPEED=9600 #default speed for serial console (default 9600) # Options for sio: options COM_ESP #code for Hayes ESP options COM_MULTIPORT #code for some cards with shared IRQs options DSI_SOFT_MODEM #code for DSI Softmodems options "EXTRA_SIO=2" #number of extra sio ports to allocate # Other flags for sio that aren't documented in the man page. # 0x20000 enable hardware RTS/CTS and larger FIFOs. Only works for # ST16650A-compatible UARTs. # # Network interfaces: `cx', `ed', `el', `ep', `ie', `is', `le', `lnc' # # ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver (requires sppp) # cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing) # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # el: 3Com 3C501 (slow!) # ep: 3Com 3C509 (buggy) # fe: Fujitsu MB86960A/MB86965A Ethernet # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210; Intel EtherExpress # le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100, # DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422) # lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL) # sr: RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp) # wl: Lucent Wavelan (ISA card only). # ze: IBM/National Semiconductor PCMCIA ethernet controller. # zp: 3Com PCMCIA Etherlink III (It does not require shared memory for # send/receive operation, but it needs 'iomem' to read/write the # attribute memory) # device ar0 at isa? port 0x300 net irq 10 iomem 0xd0000 vector arintr device cx0 at isa? port 0x240 net irq 15 drq 7 vector cxintr device ed0 at isa? port 0x280 net irq 5 iomem 0xd8000 vector edintr device eg0 at isa? port 0x310 net irq 5 vector egintr device el0 at isa? port 0x300 net irq 9 vector elintr device ep0 at isa? port 0x300 net irq 10 vector epintr device ex0 at isa? port? net irq? vector exintr device fe0 at isa? port 0x300 net irq ? vector feintr device ie0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector ieintr device ie1 at isa? port 0x360 net irq 7 iomem 0xd0000 vector ieintr device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr device lnc0 at isa? port 0x300 net irq 10 drq 0 vector lncintr device sr0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector srintr options WLCACHE # enables the signal-strength cache options WLDEBUG # enables verbose debugging output device wl0 at isa? port 0x300 net irq ? vector wlintr # We can (bogusly) include both the dedicated PCCARD drivers and the generic # support when COMPILING_LINT. device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr # # ATM related options # # The `en' device provides support for Efficient Networks (ENI) # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0). # # atm pseudo-device provides generic atm functions and is required for # atm devices. # NATM enables the netnatm protocol family that can be used to # bypass TCP/IP. # # the current driver supports only PVC operations (no atm-arp, no multicast). # for more details, please read the original documents at # http://www.ccrc.wustl.edu/pub/chuck/bsdatm/wucs.html # pseudo-device atm device en0 device en1 options NATM #native ATM # # Audio drivers: `snd', `sb', `pas', `gus', `pca' # # snd: Voxware sound support code # sb: SoundBlaster PCM - SoundBlaster, SB Pro, SB16, ProAudioSpectrum # sbxvi: SoundBlaster 16 # sbmidi: SoundBlaster 16 MIDI interface # pas: ProAudioSpectrum PCM and MIDI # gus: Gravis Ultrasound - Ultrasound, Ultrasound 16, Ultrasound MAX # gusxvi: Gravis Ultrasound 16-bit PCM (do not use) # mss: Microsoft Sound System # css: Crystal Sound System (CSS 423x PnP) # sscape: Ensoniq Soundscape MIDI interface # sscape_mss: Ensoniq Soundscape PCM (requires sscape) # opl: Yamaha OPL-2 and OPL-3 FM - SB, SB Pro, SB 16, ProAudioSpectrum # uart: stand-alone 6850 UART for MIDI # mpu: Roland MPU-401 stand-alone card # # Beware! The addresses specified below are also hard-coded in # i386/isa/sound/sound_config.h. If you change the values here, you # must also change the values in the include file. # # pcm: PCM audio through various sound cards. # # This is the work in progress from Luigi Rizzo. This has support for # CS423x based cards, OPTi931, SB16 PnP, GusPnP. For more information # about this driver, take a look at sys/i386/isa/snd/README. # # The flags of the device tells the device a bit more info about the # device that normally is obtained through the PnP interface. # bit 2..0 secondary DMA channel; # bit 4 set if the board uses two dma channels; # bit 15..8 board type, overrides autodetection; leave it # zero if don't know what to put in (and you don't, # since this is unsupported at the moment...). # # This driver will use the new PnP code if it's available. # # pca: PCM audio through your PC speaker # # If you have a GUS-MAX card and want to use the CS4231 codec on the # card the drqs for the gus max must be 8 bit (1, 2, or 3). # # If you would like to use the full duplex option on the gus, then define # flags to be the ``read dma channel''. # # options BROKEN_BUS_CLOCK #PAS-16 isn't working and OPTI chipset # options SYMPHONY_PAS #PAS-16 isn't working and SYMPHONY chipset # options EXCLUDE_SBPRO #PAS-16 # options SBC_IRQ=5 #PAS-16. Must match irq on sb0 line. # PAS16: The order of the pas0/sb0/opl0 is important since the # sb emulation is enabled in the pas-16 attach. # # The i386/isa/sound/sound.doc has more information. # Controls all "VOXWARE" driver sound devices. See Luigi's driver # below for an alternate which may work better for some cards. # controller snd0 device pas0 at isa? port 0x388 irq 10 drq 6 vector pasintr device sb0 at isa? port 0x220 irq 5 drq 1 vector sbintr device sbxvi0 at isa? drq 5 device sbmidi0 at isa? port 0x330 device awe0 at isa? port 0x620 device gus0 at isa? port 0x220 irq 12 drq 1 vector gusintr #device gus0 at isa? port 0x220 irq 12 drq 1 flags 0x3 vector gusintr device mss0 at isa? port 0x530 irq 10 drq 1 vector adintr device css0 at isa? port 0x534 irq 5 drq 1 flags 0x08 vector adintr device sscape0 at isa? port 0x330 irq 9 drq 0 vector sscapeintr device trix0 at isa? port 0x330 irq 6 drq 0 vector sscapeintr device sscape_mss0 at isa? port 0x534 irq 5 drq 1 vector sndintr device opl0 at isa? port 0x388 device mpu0 at isa? port 0x330 irq 6 drq 0 device uart0 at isa? port 0x330 irq 5 vector "m6850intr" # Luigi's snd code (use INSTEAD of snd0 and all VOXWARE drivers!). # You may also wish to enable the pnp controller with this, for pnp # sound cards. # #device pcm0 at isa? port ? tty irq 10 drq 1 flags 0x0 vector pcmintr # Not controlled by `snd' device pca0 at isa? port IO_TIMER1 tty # # Miscellaneous hardware: # # mcd: Mitsumi CD-ROM # scd: Sony CD-ROM # matcd: Matsushita/Panasonic CD-ROM # wt: Wangtek and Archive QIC-02/QIC-36 tape drives # ctx: Cortex-I frame grabber # apm: Laptop Advanced Power Management (experimental) # spigot: The Creative Labs Video Spigot video-acquisition board # meteor: Matrox Meteor video capture board # alog: Industrial Computer Source AIO8-P driver # bktr: Bt848 capture boards (http://www.freebsd.org/~fsmp/HomeAuto/Bt848.html) # cy: Cyclades serial driver # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!) # gp: National Instruments AT-GPIB and AT-GPIB/TNT board # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey # gsc: Genius GS-4500 hand scanner. # joy: joystick # labpc: National Instrument's Lab-PC and Lab-PC+ # rc: RISCom/8 multiport card # rp: Comtrol Rocketport(ISA) - single card # tw: TW-523 power line interface for use with X-10 home control products # si: Specialix SI/XIO 4-32 port terminal multiplexor # stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based) # stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent) # # Notes on APM # The flags takes the following meaning for apm0: # 0x0020 Statclock is broken. # 0x0011 Limit APM protocol to 1.1 or 1.0 # 0x0010 Limit APM protocol to 1.0 # # # Notes on the spigot: # The video spigot is at 0xad6. This port address can not be changed. # The irq values may only be 10, 11, or 15 # I/O memory is an 8kb region. Possible values are: # 0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff # The start address must be on an even boundary. # Add the following option if you want to allow non-root users to be able # to access the spigot. This option is not secure because it allows users # direct access to the I/O page. # options SPIGOT_UNSECURE # # Notes on the Comtrol Rocketport driver: # # The exact values used for rp0 depend on how many boards you have # in the system. The manufacturer's sample configs are listed as: # # Comtrol Rocketport ISA single card # device rp0 at isa? port 0x280 tty # # If instead you have two ISA cards, one installed at 0x100 and the # second installed at 0x180, then you should add the following to # your kernel configuration file: # # device rp0 at isa? port 0x100 tty # device rp1 at isa? port 0x180 tty # # For 4 ISA cards, it might be something like this: # # device rp0 at isa? port 0x180 tty # device rp1 at isa? port 0x100 tty # device rp2 at isa? port 0x340 tty # device rp3 at isa? port 0x240 tty # # And for PCI cards, you only need say: # # device rp0 # device rp1 # ... # Note: Make sure that any Rocketport PCI devices are specified BEFORE the # ISA Rocketport devices. # Notes on the Digiboard driver: # # The following flag values have special meanings: # 0x01 - alternate layout of pins # 0x02 - use the windowed PC/Xe in 64K mode # Notes on the Specialix SI/XIO driver: # **This is NOT a Specialix supported Driver!** # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. # Notes on the Stallion stl and stli drivers: # See src/i386/isa/README.stl for complete instructions. # This is version 0.0.5alpha, unsupported by Stallion. # The stl driver has a secondary IO port hard coded at 0x280. You need # to change src/i386/isa/stallion.c if you reconfigure this on the boards. # The "flags" and "iosiz" settings on the stli driver depend on the board: # EasyConnection 8/64 ISA: flags 23 iosiz 0x1000 # EasyConnection 8/64 EISA: flags 24 iosiz 0x10000 # EasyConnection 8/64 MCA: flags 25 iosiz 0x1000 # ONboard ISA: flags 4 iosiz 0x10000 # ONboard EISA: flags 7 iosiz 0x10000 # ONboard MCA: flags 3 iosiz 0x10000 # Brumby: flags 2 iosiz 0x4000 # Stallion: flags 1 iosiz 0x10000 device mcd0 at isa? port 0x300 bio irq 10 vector mcdintr # for the Sony CDU31/33A CDROM device scd0 at isa? port 0x230 bio # for the SoundBlaster 16 multicd - up to 4 devices controller matcd0 at isa? port 0x230 bio device wt0 at isa? port 0x300 bio irq 5 drq 1 vector wtintr device ctx0 at isa? port 0x230 iomem 0xd0000 device spigot0 at isa? port 0xad6 irq 15 iomem 0xee000 vector spigintr device apm0 at isa? device gp0 at isa? port 0x2c0 tty device gsc0 at isa? port "IO_GSC1" tty drq 3 device joy0 at isa? port "IO_GAME" device alog0 at isa? port 0x260 tty irq 5 vector alogintr device cy0 at isa? tty irq 10 iomem 0xd4000 iosiz 0x2000 vector cyintr device dgb0 at isa? port 0x220 iomem 0xfc0000 iosiz ? tty device labpc0 at isa? port 0x260 tty irq 5 vector labpcintr device rc0 at isa? port 0x220 tty irq 12 vector rcintr device rp0 at isa? port 0x280 tty # the port and irq for tw0 are fictitious device tw0 at isa? port 0x380 tty irq 11 vector twintr device si0 at isa? iomem 0xd0000 tty irq 12 vector siintr device asc0 at isa? port IO_ASC1 tty drq 3 irq 10 vector ascintr device bqu0 at isa? port 0x150 device stl0 at isa? port 0x2a0 tty irq 10 vector stlintr device stli0 at isa? port 0x2a0 tty iomem 0xcc000 flags 23 iosiz 0x1000 device loran0 at isa? port ? tty irq 5 vector loranintr # # EISA devices: # # The EISA bus device is eisa0. It provides auto-detection and # configuration support for all devices on the EISA bus. # # The `ahb' device provides support for the Adaptec 174X adapter. # # The `ahc' device provides support for the Adaptec 274X and 284X # adapters. The 284X, although a VLB card responds to EISA probes. # # fea: DEC DEFEA EISA FDDI adapter # controller eisa0 controller ahb0 controller ahc0 device fea0 # enable tagged command queuing, which is a major performance win on # devices that support it (and controllers with enough SCB's) options AHC_TAGENABLE # enable SCB paging - See the ahc.4 man page options AHC_SCBPAGING_ENABLE # The aic7xxx driver will attempt to use memory mapped I/O for all PCI # controllers that have it configured only if this option is set. Unfortunately, # this doesn't work on some motherboards, which prevents it from being the # default. options AHC_ALLOW_MEMIO # By default, only 10 EISA slots are probed, since the slot numbers # above clash with the configuration address space of the PCI subsystem, # and the EISA probe is not very smart about this. This is sufficient # for most machines, but in particular the HP NetServer LC series comes # with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11, # thus you need to bump this figure to 12 for them. options "EISA_SLOTS=12" # # PCI devices: # # The main PCI bus device is `pci'. It provides auto-detection and # configuration support for all devices on the PCI bus, using either # configuration mode defined in the PCI specification. # # The `ahc' device provides support for the Adaptec 29/3940(U)(W) # and motherboard based AIC7870/AIC7880 adapters. # # The `ncr' device provides support for the NCR 53C810 and 53C825 # self-contained SCSI host adapters. # # The `amd' device provides support for the Tekram DC-390 and 390T # SCSI host adapters, but is expected to work with any AMD 53c974 # PCI SCSI chip and the AMD Ethernet+SCSI Combo chip, after some # local patches were applied to the sources (that had originally # been written by Tekram and limited to work with their SCSI cards). # # The `de' device provides support for the Digital Equipment DC21040 # self-contained Ethernet adapter. # # The `fxp' device provides support for the Intel EtherExpress Pro/100B # PCI Fast Ethernet adapters. # # The `tx' device provides support for the SMC 9432TX cards. # # The `vx' device provides support for the 3Com 3C590 and 3C595 # early support # # The `fpa' device provides support for the Digital DEFPA PCI FDDI # adapter. pseudo-device fddi is also needed. # # The `meteor' device is a PCI video capture board. It can also have the # following options: # options METEOR_ALLOC_PAGES=xxx preallocate kernel pages for data entry # figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE # options METEOR_DEALLOC_PAGES remove all allocated pages on close(2) # options METEOR_DEALLOC_ABOVE=xxx remove all allocated pages above the # specified amount. If this value is below the allocated amount no action # taken # option METEOR_SYSTEM_DEFAULT={METEOR_PAL|METEOR_NTSC|METEOR_SECAM}, used # for initialization of fps routine when a signal is not present. # # The 'bktr' device is a PCI video capture board. It also has a TV tuner # on board. # controller pci0 controller ahc1 controller ncr0 controller amd0 device de0 device fxp0 device tx0 device vx0 device fpa0 device meteor0 device bktr0 # # PCCARD/PCMCIA # # card: slot controller # pcic: slots controller card0 controller pcic0 at card? controller pcic1 at card? # # Laptop/Notebook options: # # See also: # apm under `Miscellaneous hardware' # above. # For older notebooks that signal a powerfail condition (external # power supply dropped, or battery state low) by issuing an NMI: options POWERFAIL_NMI # make it beep instead of panicing # # Parallel-Port Bus # # Parallel port bus support is provided by the `ppbus' device. # Multiple devices may be attached to the parallel port, devices # are automatically probed and attached when found. # # Supported devices: # vpo Iomega Zip Drive # Requires SCSI disk support ('scbus' and 'sd'), best # performance is achieved with ports in EPP 1.9 mode. # nlpt Parallel Printer # ppi General-purpose I/O ("Geek Port") # # Supported interfaces: # ppc ISA-bus parallel port interfaces. # controller ppbus0 controller vpo0 at ppbus? device nlpt0 at ppbus? device ppi0 at ppbus? device pps0 at ppbus? controller ppc0 at isa? disable port ? irq 7 vector ppcintr # Kernel BOOTP support options BOOTP # Use BOOTP to obtain IP address/hostname options BOOTP_NFSROOT # NFS mount root filesystem using BOOTP info options "BOOTP_NFSV3" # Use NFS v3 to NFS mount root options BOOTP_COMPAT # Workaround for broken bootp daemons. # # An obsolete option to test kern_opt.c. # options GATEWAY # If you want to disable loadable kernel modules (LKM), you # might want to use this option. #options NO_LKM # # Add tie-ins for a hardware watchdog. This only enable the hooks; # the user must still supply the actual driver. # options HW_WDOG # More undocumented options for linting. options CLK_CALIBRATION_LOOP options "CLK_USE_I8254_CALIBRATION" options CLK_USE_TSC_CALIBRATION options CLUSTERDEBUG options COMPAT_LINUX options CPU_UPGRADE_HW_CACHE options DEBUG options "DEBUG_1284" options DEVFS_ROOT #options DISABLE_PSE options "EXT2FS" options "I586_PMC_GUPROF=0x70000" options "IBCS2" # broken: #options IPFILTER options KEY options KEY_DEBUG options LOCKF_DEBUG options LOUTB options KBD_MAXRETRY=4 options KBD_MAXWAIT=6 options KBD_RESETDELAY=201 options KBDIO_DEBUG=2 options MSGMNB=2049 options MSGMNI=41 options MSGSEG=2049 options MSGSSZ=16 options MSGTQL=41 options NBUF=512 options NETATALKDEBUG options NMBCLUSTERS=1024 options NPX_DEBUG options NULLFS_DIAGNOSTIC options PANIC_REBOOT_WAIT_TIME=16 options "PCVT_24LINESDEF" options PCVT_CTRL_ALT_DEL options PCVT_EMU_MOUSE options PCVT_FREEBSD=211 options PCVT_META_ESC options PCVT_NSCREENS=9 options PCVT_PRETTYSCRNS options PCVT_SCANSET=2 options PCVT_SCREENSAVER options PCVT_USEKBDSEC options "PCVT_VT220KEYB" options PSM_DEBUG=1 options "SCSI_2_DEF" options SCSI_DELAY=8 # Be pessimistic about Joe SCSI device options SCSI_NCR_DEBUG options SCSI_NCR_DFLT_TAGS=4 options SCSI_NCR_MAX_SYNC=10000 options SCSI_NCR_MAX_WIDE=1 options SCSI_NCR_MYADDR=7 options SEMMAP=31 options SEMMNI=11 options SEMMNS=61 options SEMMNU=31 options SEMMSL=61 options SEMOPM=101 options SEMUME=11 options SHOW_BUSYBUFS # List buffers that prevent root unmount options SHMALL=1025 options "SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)" options SHMMAXPGS=1025 options SHMMIN=2 options SHMMNI=33 options SHMSEG=9 options SI_DEBUG options SIMPLELOCK_DEBUG options SPX_HACK # The 'dpt' driver provides hardware RAID-{0,1,5} support, multi-initiator I/O # See sys/dev/dpt for debugging and other subtle options. # DPT_VERIFY_HINTR Performs some strict hardware interrupts testing. # Only use if you suspect PCI bus corruption problems # DPT_RESTRICTED_FREELIST Normally, the freelisat used by the DPT for queue # will grow to accomodate increased use. This growth # will NOT shrink. To restrict the number of queue # slots to exactly what the DPT can hold at one time, # enable this option. # DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various # instruments are enabled. Assumed to be enabled by # /usr/sbin/dpt_* tools. # DPT_FREELIST_IS_STACK For optimat L{1,2} CPU cache utilization, enable # this option. Otherwise, the transaction queue is # a LIFO. I cannot measure the performance gain. # DPT_HANDLE_TIMEOUTS Normally device timeouts are handled by the DPT. # If you ant the driver to handle timeouts, enable # this option. If your system is very busy, this # option will create more trouble than solve. # DPT_TIMEOUT_FACTOR Used to compute the excessive amount of time to # wait when timing out with the above option. # DPT_DEBUG_xxxx These are controllable from sys/dev/dpt/dpt.h # DPT_LOST_IRQ When enabled, will try, once per second, to catch # any interrupt that got lost. Seems to help in some # DPT-firmware/Motherboard combinations. Minimal # cost, great benefit. controller dpt0 # DPT options options DPT_VERIFY_HINTR options DPT_RESTRICTED_FREELIST options DPT_MEASURE_PERFORMANCE options DPT_FREELIST_IS_STACK options DPT_HANDLE_TIMEOUTS options DPT_TIMEOUT_FACTOR=4 options DPT_INTR_DELAY=200 # Some motherboards need that options DPT_LOST_IRQ Index: head/sys/i386/conf/NOTES =================================================================== --- head/sys/i386/conf/NOTES (revision 34265) +++ head/sys/i386/conf/NOTES (revision 34266) @@ -1,1497 +1,1504 @@ # # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.412 1998/02/24 22:24:46 phk Exp $ +# $Id: LINT,v 1.413 1998/02/27 10:02:41 itojun Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from # this file as required. # # # This directive is mandatory; it defines the architecture to be # configured for; in this case, the 386 family based IBM-PC and # compatibles. # machine "i386" # # This is the ``identification'' of the kernel. Usually this should # be the same as the name of your kernel. # ident LINT # # The `maxusers' parameter controls the static sizing of a number of # internal system tables by a complicated formula defined in param.c. # maxusers 10 # # Certain applications can grow to be larger than the 128M limit # that FreeBSD initially imposes. Below are some options to # allow that limit to grow to 256MB, and can be increased further # with changing the parameters. MAXDSIZ is the maximum that the # limit can be set to, and the DFLDSIZ is the default value for # the limit. You might want to set the default lower than the # max, and explicitly set the maximum with a shell command for processes # that regularly exceed the limit like INND. # options "MAXDSIZ=(256*1024*1024)" options "DFLDSIZ=(256*1024*1024)" # When this is set, be extra conservative in various parts of the kernel # and choose functionality over speed (on the widest variety of systems). options FAILSAFE # This allows you to actually store this configuration file into # the kernel binary itself, where it may be later read by saying: # strings /kernel | grep ^___ | sed -e 's/^___//' > MYKERNEL # options INCLUDE_CONFIG_FILE # Include this file in kernel # # This directive defines a number of things: # - The compiled kernel is to be called `kernel' # - The root filesystem might be on partition wd0a # - Crash dumps will be written to wd0b, if possible. Specifying the # dump device here is not recommended. Use dumpon(8). # config kernel root on wd0 dumps on wd0 ##################################################################### # SMP OPTIONS: # # SMP enables building of a Symmetric MultiProcessor Kernel. # APIC_IO enables the use of the IO APIC for Symmetric I/O. # NCPU sets the number of CPUs, defaults to 2. # NBUS sets the number of busses, defaults to 4. # NAPIC sets the number of IO APICs on the motherboard, defaults to 1. # NINTR sets the total number of INTs provided by the motherboard. # # Notes: # # An SMP kernel will ONLY run on an Intel MP spec. qualified motherboard. # # Be sure to disable 'cpu "I386_CPU"' && 'cpu "I486_CPU"' for SMP kernels. # # Check the 'Rogue SMP hardware' section to see if additional options # are required by your hardware. # # Mandatory: options SMP # Symmetric MultiProcessor Kernel options APIC_IO # Symmetric (APIC) I/O # Optional, these are the defaults plus 1: options NCPU=5 # number of CPUs options NBUS=5 # number of busses options NAPIC=2 # number of IO APICs options NINTR=25 # number of INTs # # Rogue SMP hardware: # # Bridged PCI cards: # # The MP tables of most of the current generation MP motherboards # do NOT properly support bridged PCI cards. To use one of these # cards you should refer to ??? ##################################################################### # CPU OPTIONS # # You must specify at least one CPU (the one you intend to run on); # deleting the specification for CPUs you don't need to use may make # parts of the system run faster. This is especially true removing # I386_CPU. # cpu "I386_CPU" cpu "I486_CPU" cpu "I586_CPU" # aka Pentium(tm) cpu "I686_CPU" # aka Pentium Pro(tm) # # Options for CPU features. # # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM # BlueLightning CPU. It works only with Cyrix FPU, and this option # should not be used with Intel FPU. # # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning # CPU if CPU supports it. The default is double-clock mode on # BlueLightning CPU box. # # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1). # # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct # mapped mode. Default is 2-way set associative mode. # # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space # of Cyrix 6x86 and 6x86MX CPUs. If this option is not set and # FAILESAFE is defined, NO_LOCK bit of CCR1 is cleared. (NOTE 3) # # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables # reorder). This option should not be used if you use memory mapped # I/O device(s). # # CPU_FASTER_5X86_FPU enables faster FPU exception handler. # # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products # for i386 machines. # # CPU_IORT defines I/O clock delay time (NOTE 1). Default vaules of # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively # (no clock delay). # # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE # 1). # # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1). # # CPU_SUSP_HLT enables suspend on HALT. If this option is set, CPU # enters suspend mode following execution of HALT instruction. # # CPU_WT_ALLOC enables write-through allocation. # # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache # flush at hold state. # # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs # without cache flush at hold state, and (2) write-back CPU cache on # Cyrix 6x86 whose revision < 2.7 (NOTE 2). # # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is # executed. This should be included for ALL kernels that won't run # on a Pentium. # # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT, # CPU_LOOP_ENand CPU_RSTK_EN should no be used becasue of CPU bugs. # These options may crash your system. # # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled # in write-through mode when revision < 2.7. If revision of Cyrix # 6x86 >= 2.7, CPU cache is always enabled in write-back mode. # # NOTE 3: This option may cause failures for software that requires # locked cycles in order to operate correctly. # options "CPU_BLUELIGHTNING_FPU_OP_CACHE" options "CPU_BLUELIGHTNING_3X" options "CPU_BTB_EN" options "CPU_DIRECT_MAPPED_CACHE" options "CPU_DISABLE_5X86_LSSER" options "CPU_FASTER_5X86_FPU" options "CPU_I486_ON_386" options "CPU_IORT" options "CPU_LOOP_EN" options "CPU_RSTK_EN" options "CPU_SUSP_HLT" options "CYRIX_CACHE_WORKS" options "CYRIX_CACHE_REALLY_WORKS" #options "NO_F00F_HACK" # # A math emulator is mandatory if you wish to run on hardware which # does not have a floating-point processor. Pick either the original, # bogus (but freely-distributable) math emulator, or a much more # fully-featured but GPL-licensed emulator taken from Linux. # options MATH_EMULATE #Support for x87 emulation # Don't enable both of these in a real config. options GPL_MATH_EMULATE #Support for x87 emulation via #new math emulator ##################################################################### # COMPATIBILITY OPTIONS # # Implement system calls compatible with 4.3BSD and older versions of # FreeBSD. You probably do NOT want to remove this as much current code # still relies on the 4.3 emulation. # options "COMPAT_43" # # Allow user-mode programs to manipulate their local descriptor tables. # This option is required for the WINE Windows(tm) emulator, and is # not used by anything else (that we know of). # options USER_LDT #allow user-level control of i386 ldt # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG # # This option includes a MD5 routine in the kernel, this is used for # various authentication and privacy uses. # options "MD5" # # Allow processes to switch to vm86 mode, as well as enabling direct # user-mode access to the I/O port space. This option is necessary for # the doscmd emulator to run. # options "VM86" ##################################################################### # DEBUGGING OPTIONS # # Enable the kernel debugger. # options DDB # # Don't drop into DDB for a panic. Intended for unattended operation # where you may want to drop to DDB from the console, but still want # the machine to recover from a panic # options DDB_UNATTENDED # # If using GDB remote mode to debug the kernel, there's a non-standard # extension to the remote protocol that can be used to use the serial # port as both the debugging port and the system console. It's non- # standard and you're on your own if you enable it. See also the # "remotechat" variables in the FreeBSD specific version of gdb. # options GDB_REMOTE_CHAT # # KTRACE enables the system-call tracing facility ktrace(2). # options KTRACE #kernel tracing # # The DIAGNOSTIC option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options DIAGNOSTIC # # PERFMON causes the driver for Pentium/Pentium Pro performance counters # to be compiled. See perfmon(4) for more information. # options PERFMON # # This option let some drivers co-exist that can't co-exist in a running # system. This is used to be able to compile all kernel code in one go for # quality assurance purposes (like this file, which the option takes it name # from.) # options COMPILING_LINT # XXX - this doesn't belong here. # Allow ordinary users to take the console - this is useful for X. options UCONSOLE # XXX - this doesn't belong here either options USERCONFIG #boot -c editor options USERCONFIG_BOOT #imply -c and parse info area options VISUAL_USERCONFIG #visual boot -c editor ##################################################################### # NETWORKING OPTIONS # # Protocol families: # Only the INET (Internet) family is officially supported in FreeBSD. # Source code for the NS (Xerox Network Service) is provided for amusement # value. # options INET #Internet communications protocols options IPX #IPX/SPX communications protocols options IPXIP #IPX in IP encapsulation (not available) options IPTUNNEL #IP in IPX encapsulation (not available) options NETATALK #Appletalk communications protocols # These are currently broken but are shipped due to interest. #options NS #Xerox NS protocols # These are currently broken and are no longer shipped due to lack # of interest. #options CCITT #X.25 network layer #options ISO #options TPIP #ISO TP class 4 over IP #options TPCONS #ISO TP class 0 over X.25 #options LLC #X.25 link layer for Ethernets #options HDLC #X.25 link layer for serial lines #options EON #ISO CLNP over IP #options NSIP #XNS over IP # # Network interfaces: # The `loop' pseudo-device is MANDATORY when networking is enabled. # The `ether' pseudo-device provides generic code to handle # Ethernets; it is MANDATORY when a Ethernet device driver is # configured. # The 'fddi' pseudo-device provides generic code to support FDDI. # The `sppp' pseudo-device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). # The `sl' pseudo-device implements the Serial Line IP (SLIP) service. # The `ppp' pseudo-device implements the Point-to-Point Protocol. # The `bpfilter' pseudo-device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. The number of devices determines the maximum number of # simultaneous BPF clients programs runnable. # The `disc' pseudo-device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing purposes. # The `tun' pseudo-device implements the User Process PPP (iijppp) # # The PPP_BSDCOMP option enables support for compress(1) style entire # packet compression, the PPP_DEFLATE is for zlib/gzip style compression. # PPP_FILTER enables code for filtering the ppp data stream and selecting # events for resetting the demand dial activity timer - requires bpfilter. # See pppd(8) for more details. # pseudo-device ether #Generic Ethernet pseudo-device fddi #Generic FDDI pseudo-device sppp #Generic Synchronous PPP pseudo-device loop #Network loopback device pseudo-device bpfilter 4 #Berkeley packet filter pseudo-device disc #Discard device pseudo-device tun 1 #Tunnel driver (user process ppp(8)) pseudo-device sl 2 #Serial Line IP pseudo-device ppp 2 #Point-to-point protocol options PPP_BSDCOMP #PPP BSD-compress support options PPP_DEFLATE #PPP zlib/deflate/gzip support options PPP_FILTER #enable bpf filtering (needs bpfilter) # # Internet family options: # # TCP_COMPAT_42 causes the TCP code to emulate certain bugs present in # 4.2BSD. This option should not be used unless you have a 4.2BSD # machine and TCP connections fail. # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted(8). # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE sends # logged packets to the system logger. IPFIREWALL_VERBOSE_LIMIT # limits the number of times a matching entry can be logged. # # WARNING: IPFIREWALL defaults to a policy of "deny ip from any to any" # and if you do not add other rules during startup to allow access, # YOU WILL LOCK YOURSELF OUT. It is suggested that you set firewall=open # in /etc/rc.conf when first enabling this feature, then refining the # firewall rules in /etc/rc.firewall after you've tested that the new kernel # feature works properly. # # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to # allow everything. Use with care, if a cracker can crash your # firewall machine, they can get to your protected machines. However, # if you are using it as an as-needed filter for specific problems as # they arise, then this may be for you. Changing the default to 'allow' # means that you won't get stuck if the kernel and /sbin/ipfw binary get # out of sync. # # IPDIVERT enables the divert IP sockets, used by ``ipfw divert'' # # TCPDEBUG is undocumented. # options "TCP_COMPAT_42" #emulate 4.2BSD TCP bugs options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #print information about # dropped packets options "IPFIREWALL_VERBOSE_LIMIT=100" #limit verbosity options IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default options IPDIVERT #divert sockets options TCPDEBUG ##################################################################### # FILESYSTEM OPTIONS # # Only the root, /usr, and /tmp filesystems need be statically # compiled; everything else will be automatically loaded at mount # time. (Exception: the UFS family---FFS, and MFS --- cannot # currently be demand-loaded.) Some people still prefer to statically # compile other filesystems as well. # # NB: The NULL, PORTAL, UMAP and UNION filesystems are known to be # buggy, and WILL panic your system if you attempt to do anything with # them. They are included here as an incentive for some enterprising # soul to sit down and fix them. # # One of these is mandatory: options FFS #Fast filesystem options NFS #Network File System # The rest are optional: # options NFS_NOSERVER #Disable the NFS-server code. options "CD9660" #ISO 9660 filesystem options FDESC #File descriptor filesystem options KERNFS #Kernel filesystem options MFS #Memory File System options MSDOSFS #MS DOS File System options NULLFS #NULL filesystem options PORTAL #Portal filesystem options PROCFS #Process filesystem options UMAPFS #UID map filesystem options UNION #Union filesystem options "CD9660_ROOT" #CD-ROM usable as root device options FFS_ROOT #FFS usable as root device options NFS_ROOT #NFS usable as root device # This DEVFS is experimental but seems to work options DEVFS #devices filesystem + +# Allow the FFS to use Softupdates technology. +# To do this you need to fetch the two files +# /sys/ufs/ffs/softdep.h and /sys/ufs/ffs/ffs_softdep.c +# from freebsd.org and understand the licensing restrictions. +#options SOFTUPDATES +# (we can't actually enable it because the files may not be present) # Make space in the kernel for a MFS root filesystem. Define to the number # of kilobytes to reserve for the filesystem. options MFS_ROOT=10 # Allow the MFS_ROOT code to load the MFS image from floppy if it is missing. options MFS_AUTOLOAD # Allow this many swap-devices. options NSWAPDEV=20 # Disk quotas are supported when this option is enabled. If you # change the value of this option, you must do a `make clean' in your # kernel compile directory in order to get a working kernel. # options QUOTA #enable disk quotas # Add more checking code to various filesystems #options NULLFS_DIAGNOSTIC #options KERNFS_DIAGNOSTIC #options UMAPFS_DIAGNOSTIC #options UNION_DIAGNOSTIC # In particular multi-session CD-Rs might require a huge amount of # time in order to "settle". If we are about mounting them as the # root f/s, we gotta wait a little. # # The number is supposed to be in seconds. options "CD9660_ROOTDELAY=20" # If you are running a machine just as a fileserver for PC and MAC users. # (using SAMBA or Netatalk), then you may consider setting this option # and keeping all those user's directories on a partition that is mounted # with the suiddir option. This gives new files the same ownership as # the directory (similiar to group). It's a security hole if you let # these users run programs so confine it to file-servers, (but it'll save you # lots of headaches in that case). Root owned directories are excempt and X bits # are cleared. the suid bit must be set on the directory as well. see chmod(1) # PC owners can't see/set ownerships so they keep getting their toes # trodden on. This saves you all the support calls as the filesystem # it's used on will act as they expect. ("It's my dir so it must be my file"). # options SUIDDIR # Add some error checking code to the null_bypass routine # in the NULL filesystem #options SAFETY ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so # that a given bus, target, and LUN always come on line as the same # device unit. In earlier versions the unit numbers were assigned # in the order that the devices were probed on the SCSI bus. This # means that if you removed a disk drive, you may have had to rewrite # your /etc/fstab file, and also that you had to be careful when adding # a new disk as it may have been probed earlier and moved your device # configuration around. # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "sd3" then the first # non-wired disk will be assigned sd4. # The syntax for wiring down devices is: # controller scbus0 at ahc0 # Single bus device # controller scbus1 at ahc1 bus 0 # Single bus device # controller scbus3 at ahc2 bus 0 # Twin bus device # controller scbus2 at ahc2 bus 1 # Twin bus device # disk sd0 at scbus0 target 0 unit 0 # disk sd1 at scbus3 target 1 # disk sd2 at scbus2 target 3 # tape st1 at scbus1 target 6 # device cd0 at scbus? # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI # configuration and doesn't have to be explicitly configured. controller scbus0 #base SCSI code device ch0 #SCSI media changers device sd0 #SCSI disks device st0 #SCSI tapes device cd0 #SCSI CD-ROMs device od0 #SCSI optical disk # The previous devices (ch, sd, st, cd) are recognized by config. # config doesn't (and shouldn't) know about these newer ones, # so we have to specify that they are on a SCSI bus with the "at scbus?" # clause. device worm0 at scbus? # SCSI worm device pt0 at scbus? # SCSI processor type device sctarg0 at scbus? # SCSI target # SCSI OPTIONS: # SCSIDEBUG: When defined enables debugging macros # NO_SCSI_SENSE: When defined disables sense descriptions (about 4k) # SCSI_REPORT_GEOMETRY: Always report disk geometry at boot up instead # of only when booting verbosely. options SCSIDEBUG #options NO_SCSI_SENSE options SCSI_REPORT_GEOMETRY # Options for the `od' optical disk driver: # # If drive returns sense key as 0x02 with vendor specific additional # sense code (ASC) and additional sense code qualifier (ASCQ), or # illegal ASC and ASCQ. This cause an error (NOT READY) and retrying. # To suppress this, use the following option. # options OD_BOGUS_NOT_READY # # For an automatic spindown, try this. Again, preferably as an # option in your config file. # WARNING! Use at your own risk. Joerg's ancient SONY SMO drive # groks it fine, while Shunsuke's Fujitsu chokes on it and times # out. # options OD_AUTO_TURNOFF ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS # The `pty' device usually turns out to be ``effectively mandatory'', # as it is required for `telnetd', `rlogind', `screen', `emacs', and # `xterm', among others. pseudo-device pty 16 #Pseudo ttys - can go as high as 256 pseudo-device speaker #Play IBM BASIC-style noises out your speaker pseudo-device gzip #Exec gzipped a.out's pseudo-device vn #Vnode driver (turns a file into a device) pseudo-device snp 3 #Snoop device - to look at pty/vty/etc.. pseudo-device ccd 4 #Concatenated disk driver # These are only for watching for bitrot in old tty code. # broken #pseudo-device tb # These are only for watching for bitrot in old SCSI code. pseudo-device su #scsi user pseudo-device ssc #super scsi ##################################################################### # HARDWARE DEVICE CONFIGURATION # ISA and EISA devices: # EISA support is available for some device, so they can be auto-probed. # Micro Channel is not supported at all. # # Mandatory ISA devices: isa, npx # controller isa0 # # Options for `isa': # # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # This option breaks suspend/resume on some portables. # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # BOUNCE_BUFFERS provides support for ISA DMA on machines with more # than 16 megabytes of memory. It doesn't hurt on other machines. # Some broken EISA and VLB hardware may need this, too. # # MAXMEM specifies the amount of RAM on the machine; if this is not # specified, FreeBSD will first read the amount of memory from the CMOS # RAM, so the amount of memory will initially be limited to 64MB or 16MB # depending on the BIOS. If the BIOS reports 64MB, a memory probe will # then attempt to detect the installed amount of RAM. If this probe # fails to detect >64MB RAM you will have to use the MAXMEM option. # The amount is in kilobytes, so for a machine with 128MB of RAM, it would # be 131072 (128 * 1024). # # TUNE_1542 enables the automatic ISA bus speed selection for the # Adaptec 1542 boards. Does not work for all boards, use it with caution. # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. # # PAS_JOYSTICK_ENABLE enables the gameport on the ProAudio Spectrum options "AUTO_EOI_1" #options "AUTO_EOI_2" options BOUNCE_BUFFERS options "MAXMEM=(128*1024)" options "TUNE_1542" #options BROKEN_KEYBOARD_RESET #options PAS_JOYSTICK_ENABLE # Enable support for the kernel PLL to use an external PPS signal, # under supervision of [x]ntpd(8) # More info in ftp://ftp.udel.edu/pub/ntp/kernel.tar.Z options PPS_SYNC # Enable PnP support in the kernel. This allows you to automaticly # attach to PnP cards for drivers that support it and allows you to # configure cards from USERCONFIG. See pnp(4) for more info. controller pnp0 # The pcvt console driver (vt220 compatible). device vt0 at isa? port "IO_KBD" tty irq 1 vector pcrint options XSERVER # support for running an X server. options FAT_CURSOR # start with block cursor # This PCVT option is for keyboards such as those used on IBM ThinkPad laptops options PCVT_SCANSET=2 # IBM keyboards are non-std # The syscons console driver (sco color console compatible). device sc0 at isa? port "IO_KBD" tty irq 1 vector scintr options MAXCONS=16 # number of virtual consoles options SLOW_VGA # do byte-wide i/o's to TS and GDC regs options "STD8X16FONT" # Compile font in makeoptions "STD8X16FONT"="cp850" options SC_HISTORY_SIZE=200 # number of history buffer lines # # `flags' for sc0: # 0x01 Use a 'visual' bell # 0x02 Use a 'blink' cursor # 0x04 Use a 'underline' cursor # 0x06 Use a 'blinking underline' (destructive) cursor # 0x08 Force detection of keyboard, else we always assume a keyboard # 0x10 Old-style (XT) keyboard support, useful for older ThinkPads # 0x20 Don't reset keyboard, useful for some newer ThinkPads # # The Numeric Processing eXtension driver. This should be configured if # your machine has a math co-processor, unless the coprocessor is very # buggy. If it is not configured then you *must* configure math emulation # (see above). If both npx0 and emulation are configured, then only npx0 # is used (provided it works). device npx0 at isa? port "IO_NPX" iosiz 0x0 flags 0x0 irq 13 vector npxintr # # `flags' for npx0: # 0x01 don't use the npx registers to optimize bcopy # 0x02 don't use the npx registers to optimize bzero # 0x04 don't use the npx registers to optimize copyin or copyout. # The npx registers are normally used to optimize copying and zeroing when # all of the following conditions are satisfied: # "I586_CPU" is an option # the cpu is an i586 (perhaps not a Pentium) # the probe for npx0 succeeds # INT 16 exception handling works. # Then copying and zeroing using the npx registers is normally 30-100% faster. # The flags can be used to control cases where it doesn't work or is slower. # Setting them at boot time using userconfig works right (the optimizations # are not used until later in the bootstrap when npx0 is attached). # # # `iosiz' for npx0: # This can be used instead of the MAXMEM option to set the memory size. If # it is nonzero, then it overrides both the MAXMEM option and the memory # size reported by the BIOS. Setting it at boot time using userconfig takes # effect on the next reboot after the change has been recorded in the kernel # binary (the size is used early in the boot before userconfig has a chance # to change it). # # # Optional ISA and EISA devices: # # # SCSI host adapters: `aha', `aic', `bt', `nca' # # aha: Adaptec 154x # ahc: Adaptec 274x/284x/294x # aic: Adaptec 152x and sound cards using the Adaptec AIC-6360 (slow!) # bt: Most Buslogic controllers # nca: ProAudioSpectrum cards using the NCR 5380 or Trantor T130 # uha: UltraStore 14F and 34F # sea: Seagate ST01/02 8 bit controller (slow!) # wds: Western Digital WD7000 controller (no scatter/gather!). # # Note that the order is important in order for Buslogic cards to be # probed correctly. # controller bt0 at isa? port "IO_BT0" bio irq ? vector bt_isa_intr controller aha0 at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr controller uha0 at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr controller aic0 at isa? port 0x340 bio irq 11 vector aicintr controller nca0 at isa? port 0x1f88 bio irq 10 vector ncaintr controller nca1 at isa? port 0x1f84 controller nca2 at isa? port 0x1f8c controller nca3 at isa? port 0x1e88 controller nca4 at isa? port 0x350 bio irq 5 vector ncaintr controller sea0 at isa? bio irq 5 iomem 0xdc000 iosiz 0x2000 vector seaintr controller wds0 at isa? port 0x350 bio irq 15 drq 6 vector wdsintr # # ST-506, ESDI, and IDE hard disks: `wdc' and `wd' # # The flags fields are used to enable the multi-sector I/O and # the 32BIT I/O modes. The flags may be used in either the controller # definition or in the individual disk definitions. The controller # definition is supported for the boot configuration stuff. # # Each drive has a 16 bit flags value defined: # The low 8 bits are the maximum value for the multi-sector I/O, # where 0xff defaults to the maximum that the drive can handle. # The high bit of the 16 bit flags (0x8000) allows probing for # 32 bit transfers. Bit 14 (0x4000) enables a hack to wake # up powered-down laptop drives. Bit 13 (0x2000) allows # probing for PCI IDE DMA controllers, such as Intel's PIIX # south bridges. See the wd.4 man page. # # The flags field for the drives can be specified in the controller # specification with the low 16 bits for drive 0, and the high 16 bits # for drive 1. # e.g.: #controller wdc0 at isa? port "IO_WD1" bio irq 14 flags 0x00ff8004 vector wdintr # # specifies that drive 0 will be allowed to probe for 32 bit transfers and # a maximum multi-sector transfer of 4 sectors, and drive 1 will not be # allowed to probe for 32 bit transfers, but will allow multi-sector # transfers up to the maximum that the drive supports. # # If you are using a PCI controller that is not running in compatibility # mode (for example, it is a 2nd IDE PCI interface), then use config line(s) # such as: # #controller wdc2 at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr #disk wd4 at wdc2 drive 0 #disk wd5 at wdc2 drive 1 # #controller wdc3 at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr #disk wd6 at wdc3 drive 0 #disk wd7 at wdc3 drive 1 # # Note that the above config would be useful for a Promise card, when used # on a MB that already has a PIIX controller. Note the bogus irq and port # entries. These are automatically filled in by the IDE/PCI support. # controller wdc0 at isa? port "IO_WD1" bio irq 14 vector wdintr disk wd0 at wdc0 drive 0 disk wd1 at wdc0 drive 1 controller wdc1 at isa? port "IO_WD2" bio irq 15 vector wdintr disk wd2 at wdc1 drive 0 disk wd3 at wdc1 drive 1 # # Options for `wdc': # # CMD640 enables serializing access to primary and secondary channel # of the CMD640B IDE Chip. The serializing will only take place # if this option is set *and* the chip is probed by the pci-system. # options "CMD640" #Enable work around for CMD640 h/w bug # # ATAPI enables the support for ATAPI-compatible IDE devices # options ATAPI #Enable ATAPI support for IDE bus options ATAPI_STATIC #Don't do it as an LKM # IDE CD-ROM driver - requires wdc controller and ATAPI option device wcd0 # IDE floppy driver - requires wdc controller and ATAPI option device wfd0 # # Standard floppy disk controllers and floppy tapes: `fdc', `fd', and `ft' # controller fdc0 at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr # # FDC_DEBUG enables floppy debugging. Since the debug output is huge, you # gotta turn it actually on by setting the variable fd_debug with DDB, # however. options FDC_DEBUG # This option is undocumented on purpose. options FDC_PRINT_BOGUS_CHIPTYPE # # Activate this line instead of the fdc0 line above if you happen to # have an Insight floppy tape. Probing them proved to be dangerous # for people with floppy disks only, so it's "hidden" behind a flag: #controller fdc0 at isa? port "IO_FD1" bio flags 1 irq 6 drq 2 vector fdintr disk fd0 at fdc0 drive 0 disk fd1 at fdc0 drive 1 tape ft0 at fdc0 drive 2 # # Other standard PC hardware: `lpt', `mse', `psm', `sio', etc. # # lpt: printer port # lpt specials: # port can be specified as ?, this will cause the driver to scan # the BIOS port list; # the irq and vector clauses may be omitted, this # will force the port into polling mode. # mse: Logitech and ATI InPort bus mouse ports # psm: PS/2 mouse port [note: conflicts with sc0/vt0, thus "conflicts" keywd] # sio: serial ports (see sio(4)) device lpt0 at isa? port? tty irq 7 vector lptintr device lpt1 at isa? port "IO_LPT3" tty irq 5 vector lptintr device mse0 at isa? port 0x23c tty irq 5 vector mseintr device psm0 at isa? port "IO_KBD" conflicts tty irq 12 vector psmintr # Options for psm: options PSM_HOOKAPM #hook the APM resume event, useful #for some laptops options PSM_RESETAFTERSUSPEND #reset the device at the resume event device sio0 at isa? port "IO_COM1" tty flags 0x10 irq 4 vector siointr # # `flags' for serial drivers that support consoles (only for sio now): # 0x10 enable console support for this unit. The other console flags # are ignored unless this is set. Enabling console support does # not make the unit the preferred console - boot with -h or set # the 0x20 flag for that. Currently, at most one unit can have # console support; the first one (in config file order) with # this flag set is preferred. Setting this flag for sio0 gives # the old behaviour. # 0x20 force this unit to be the console (unless there is another # higher priority console). This replaces the COMCONSOLE option. # 0x40 reserve this unit for low level console operations. Do not # # PnP `flags' (set via userconfig using pnp x flags y) # 0x1 disable probing of this device. Used to prevent your modem # from being attached as a PnP modem. # # Options for serial drivers that support consoles (only for sio now): options BREAK_TO_DEBUGGER #a BREAK on a comconsole goes to #DDB, if available. options CONSPEED=9600 #default speed for serial console (default 9600) # Options for sio: options COM_ESP #code for Hayes ESP options COM_MULTIPORT #code for some cards with shared IRQs options DSI_SOFT_MODEM #code for DSI Softmodems options "EXTRA_SIO=2" #number of extra sio ports to allocate # Other flags for sio that aren't documented in the man page. # 0x20000 enable hardware RTS/CTS and larger FIFOs. Only works for # ST16650A-compatible UARTs. # # Network interfaces: `cx', `ed', `el', `ep', `ie', `is', `le', `lnc' # # ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver (requires sppp) # cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing) # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # el: 3Com 3C501 (slow!) # ep: 3Com 3C509 (buggy) # fe: Fujitsu MB86960A/MB86965A Ethernet # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210; Intel EtherExpress # le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100, # DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422) # lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL) # sr: RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp) # wl: Lucent Wavelan (ISA card only). # ze: IBM/National Semiconductor PCMCIA ethernet controller. # zp: 3Com PCMCIA Etherlink III (It does not require shared memory for # send/receive operation, but it needs 'iomem' to read/write the # attribute memory) # device ar0 at isa? port 0x300 net irq 10 iomem 0xd0000 vector arintr device cx0 at isa? port 0x240 net irq 15 drq 7 vector cxintr device ed0 at isa? port 0x280 net irq 5 iomem 0xd8000 vector edintr device eg0 at isa? port 0x310 net irq 5 vector egintr device el0 at isa? port 0x300 net irq 9 vector elintr device ep0 at isa? port 0x300 net irq 10 vector epintr device ex0 at isa? port? net irq? vector exintr device fe0 at isa? port 0x300 net irq ? vector feintr device ie0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector ieintr device ie1 at isa? port 0x360 net irq 7 iomem 0xd0000 vector ieintr device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr device lnc0 at isa? port 0x300 net irq 10 drq 0 vector lncintr device sr0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector srintr options WLCACHE # enables the signal-strength cache options WLDEBUG # enables verbose debugging output device wl0 at isa? port 0x300 net irq ? vector wlintr # We can (bogusly) include both the dedicated PCCARD drivers and the generic # support when COMPILING_LINT. device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr # # ATM related options # # The `en' device provides support for Efficient Networks (ENI) # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0). # # atm pseudo-device provides generic atm functions and is required for # atm devices. # NATM enables the netnatm protocol family that can be used to # bypass TCP/IP. # # the current driver supports only PVC operations (no atm-arp, no multicast). # for more details, please read the original documents at # http://www.ccrc.wustl.edu/pub/chuck/bsdatm/wucs.html # pseudo-device atm device en0 device en1 options NATM #native ATM # # Audio drivers: `snd', `sb', `pas', `gus', `pca' # # snd: Voxware sound support code # sb: SoundBlaster PCM - SoundBlaster, SB Pro, SB16, ProAudioSpectrum # sbxvi: SoundBlaster 16 # sbmidi: SoundBlaster 16 MIDI interface # pas: ProAudioSpectrum PCM and MIDI # gus: Gravis Ultrasound - Ultrasound, Ultrasound 16, Ultrasound MAX # gusxvi: Gravis Ultrasound 16-bit PCM (do not use) # mss: Microsoft Sound System # css: Crystal Sound System (CSS 423x PnP) # sscape: Ensoniq Soundscape MIDI interface # sscape_mss: Ensoniq Soundscape PCM (requires sscape) # opl: Yamaha OPL-2 and OPL-3 FM - SB, SB Pro, SB 16, ProAudioSpectrum # uart: stand-alone 6850 UART for MIDI # mpu: Roland MPU-401 stand-alone card # # Beware! The addresses specified below are also hard-coded in # i386/isa/sound/sound_config.h. If you change the values here, you # must also change the values in the include file. # # pcm: PCM audio through various sound cards. # # This is the work in progress from Luigi Rizzo. This has support for # CS423x based cards, OPTi931, SB16 PnP, GusPnP. For more information # about this driver, take a look at sys/i386/isa/snd/README. # # The flags of the device tells the device a bit more info about the # device that normally is obtained through the PnP interface. # bit 2..0 secondary DMA channel; # bit 4 set if the board uses two dma channels; # bit 15..8 board type, overrides autodetection; leave it # zero if don't know what to put in (and you don't, # since this is unsupported at the moment...). # # This driver will use the new PnP code if it's available. # # pca: PCM audio through your PC speaker # # If you have a GUS-MAX card and want to use the CS4231 codec on the # card the drqs for the gus max must be 8 bit (1, 2, or 3). # # If you would like to use the full duplex option on the gus, then define # flags to be the ``read dma channel''. # # options BROKEN_BUS_CLOCK #PAS-16 isn't working and OPTI chipset # options SYMPHONY_PAS #PAS-16 isn't working and SYMPHONY chipset # options EXCLUDE_SBPRO #PAS-16 # options SBC_IRQ=5 #PAS-16. Must match irq on sb0 line. # PAS16: The order of the pas0/sb0/opl0 is important since the # sb emulation is enabled in the pas-16 attach. # # The i386/isa/sound/sound.doc has more information. # Controls all "VOXWARE" driver sound devices. See Luigi's driver # below for an alternate which may work better for some cards. # controller snd0 device pas0 at isa? port 0x388 irq 10 drq 6 vector pasintr device sb0 at isa? port 0x220 irq 5 drq 1 vector sbintr device sbxvi0 at isa? drq 5 device sbmidi0 at isa? port 0x330 device awe0 at isa? port 0x620 device gus0 at isa? port 0x220 irq 12 drq 1 vector gusintr #device gus0 at isa? port 0x220 irq 12 drq 1 flags 0x3 vector gusintr device mss0 at isa? port 0x530 irq 10 drq 1 vector adintr device css0 at isa? port 0x534 irq 5 drq 1 flags 0x08 vector adintr device sscape0 at isa? port 0x330 irq 9 drq 0 vector sscapeintr device trix0 at isa? port 0x330 irq 6 drq 0 vector sscapeintr device sscape_mss0 at isa? port 0x534 irq 5 drq 1 vector sndintr device opl0 at isa? port 0x388 device mpu0 at isa? port 0x330 irq 6 drq 0 device uart0 at isa? port 0x330 irq 5 vector "m6850intr" # Luigi's snd code (use INSTEAD of snd0 and all VOXWARE drivers!). # You may also wish to enable the pnp controller with this, for pnp # sound cards. # #device pcm0 at isa? port ? tty irq 10 drq 1 flags 0x0 vector pcmintr # Not controlled by `snd' device pca0 at isa? port IO_TIMER1 tty # # Miscellaneous hardware: # # mcd: Mitsumi CD-ROM # scd: Sony CD-ROM # matcd: Matsushita/Panasonic CD-ROM # wt: Wangtek and Archive QIC-02/QIC-36 tape drives # ctx: Cortex-I frame grabber # apm: Laptop Advanced Power Management (experimental) # spigot: The Creative Labs Video Spigot video-acquisition board # meteor: Matrox Meteor video capture board # alog: Industrial Computer Source AIO8-P driver # bktr: Bt848 capture boards (http://www.freebsd.org/~fsmp/HomeAuto/Bt848.html) # cy: Cyclades serial driver # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!) # gp: National Instruments AT-GPIB and AT-GPIB/TNT board # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey # gsc: Genius GS-4500 hand scanner. # joy: joystick # labpc: National Instrument's Lab-PC and Lab-PC+ # rc: RISCom/8 multiport card # rp: Comtrol Rocketport(ISA) - single card # tw: TW-523 power line interface for use with X-10 home control products # si: Specialix SI/XIO 4-32 port terminal multiplexor # stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based) # stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent) # # Notes on APM # The flags takes the following meaning for apm0: # 0x0020 Statclock is broken. # 0x0011 Limit APM protocol to 1.1 or 1.0 # 0x0010 Limit APM protocol to 1.0 # # # Notes on the spigot: # The video spigot is at 0xad6. This port address can not be changed. # The irq values may only be 10, 11, or 15 # I/O memory is an 8kb region. Possible values are: # 0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff # The start address must be on an even boundary. # Add the following option if you want to allow non-root users to be able # to access the spigot. This option is not secure because it allows users # direct access to the I/O page. # options SPIGOT_UNSECURE # # Notes on the Comtrol Rocketport driver: # # The exact values used for rp0 depend on how many boards you have # in the system. The manufacturer's sample configs are listed as: # # Comtrol Rocketport ISA single card # device rp0 at isa? port 0x280 tty # # If instead you have two ISA cards, one installed at 0x100 and the # second installed at 0x180, then you should add the following to # your kernel configuration file: # # device rp0 at isa? port 0x100 tty # device rp1 at isa? port 0x180 tty # # For 4 ISA cards, it might be something like this: # # device rp0 at isa? port 0x180 tty # device rp1 at isa? port 0x100 tty # device rp2 at isa? port 0x340 tty # device rp3 at isa? port 0x240 tty # # And for PCI cards, you only need say: # # device rp0 # device rp1 # ... # Note: Make sure that any Rocketport PCI devices are specified BEFORE the # ISA Rocketport devices. # Notes on the Digiboard driver: # # The following flag values have special meanings: # 0x01 - alternate layout of pins # 0x02 - use the windowed PC/Xe in 64K mode # Notes on the Specialix SI/XIO driver: # **This is NOT a Specialix supported Driver!** # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. # Notes on the Stallion stl and stli drivers: # See src/i386/isa/README.stl for complete instructions. # This is version 0.0.5alpha, unsupported by Stallion. # The stl driver has a secondary IO port hard coded at 0x280. You need # to change src/i386/isa/stallion.c if you reconfigure this on the boards. # The "flags" and "iosiz" settings on the stli driver depend on the board: # EasyConnection 8/64 ISA: flags 23 iosiz 0x1000 # EasyConnection 8/64 EISA: flags 24 iosiz 0x10000 # EasyConnection 8/64 MCA: flags 25 iosiz 0x1000 # ONboard ISA: flags 4 iosiz 0x10000 # ONboard EISA: flags 7 iosiz 0x10000 # ONboard MCA: flags 3 iosiz 0x10000 # Brumby: flags 2 iosiz 0x4000 # Stallion: flags 1 iosiz 0x10000 device mcd0 at isa? port 0x300 bio irq 10 vector mcdintr # for the Sony CDU31/33A CDROM device scd0 at isa? port 0x230 bio # for the SoundBlaster 16 multicd - up to 4 devices controller matcd0 at isa? port 0x230 bio device wt0 at isa? port 0x300 bio irq 5 drq 1 vector wtintr device ctx0 at isa? port 0x230 iomem 0xd0000 device spigot0 at isa? port 0xad6 irq 15 iomem 0xee000 vector spigintr device apm0 at isa? device gp0 at isa? port 0x2c0 tty device gsc0 at isa? port "IO_GSC1" tty drq 3 device joy0 at isa? port "IO_GAME" device alog0 at isa? port 0x260 tty irq 5 vector alogintr device cy0 at isa? tty irq 10 iomem 0xd4000 iosiz 0x2000 vector cyintr device dgb0 at isa? port 0x220 iomem 0xfc0000 iosiz ? tty device labpc0 at isa? port 0x260 tty irq 5 vector labpcintr device rc0 at isa? port 0x220 tty irq 12 vector rcintr device rp0 at isa? port 0x280 tty # the port and irq for tw0 are fictitious device tw0 at isa? port 0x380 tty irq 11 vector twintr device si0 at isa? iomem 0xd0000 tty irq 12 vector siintr device asc0 at isa? port IO_ASC1 tty drq 3 irq 10 vector ascintr device bqu0 at isa? port 0x150 device stl0 at isa? port 0x2a0 tty irq 10 vector stlintr device stli0 at isa? port 0x2a0 tty iomem 0xcc000 flags 23 iosiz 0x1000 device loran0 at isa? port ? tty irq 5 vector loranintr # # EISA devices: # # The EISA bus device is eisa0. It provides auto-detection and # configuration support for all devices on the EISA bus. # # The `ahb' device provides support for the Adaptec 174X adapter. # # The `ahc' device provides support for the Adaptec 274X and 284X # adapters. The 284X, although a VLB card responds to EISA probes. # # fea: DEC DEFEA EISA FDDI adapter # controller eisa0 controller ahb0 controller ahc0 device fea0 # enable tagged command queuing, which is a major performance win on # devices that support it (and controllers with enough SCB's) options AHC_TAGENABLE # enable SCB paging - See the ahc.4 man page options AHC_SCBPAGING_ENABLE # The aic7xxx driver will attempt to use memory mapped I/O for all PCI # controllers that have it configured only if this option is set. Unfortunately, # this doesn't work on some motherboards, which prevents it from being the # default. options AHC_ALLOW_MEMIO # By default, only 10 EISA slots are probed, since the slot numbers # above clash with the configuration address space of the PCI subsystem, # and the EISA probe is not very smart about this. This is sufficient # for most machines, but in particular the HP NetServer LC series comes # with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11, # thus you need to bump this figure to 12 for them. options "EISA_SLOTS=12" # # PCI devices: # # The main PCI bus device is `pci'. It provides auto-detection and # configuration support for all devices on the PCI bus, using either # configuration mode defined in the PCI specification. # # The `ahc' device provides support for the Adaptec 29/3940(U)(W) # and motherboard based AIC7870/AIC7880 adapters. # # The `ncr' device provides support for the NCR 53C810 and 53C825 # self-contained SCSI host adapters. # # The `amd' device provides support for the Tekram DC-390 and 390T # SCSI host adapters, but is expected to work with any AMD 53c974 # PCI SCSI chip and the AMD Ethernet+SCSI Combo chip, after some # local patches were applied to the sources (that had originally # been written by Tekram and limited to work with their SCSI cards). # # The `de' device provides support for the Digital Equipment DC21040 # self-contained Ethernet adapter. # # The `fxp' device provides support for the Intel EtherExpress Pro/100B # PCI Fast Ethernet adapters. # # The `tx' device provides support for the SMC 9432TX cards. # # The `vx' device provides support for the 3Com 3C590 and 3C595 # early support # # The `fpa' device provides support for the Digital DEFPA PCI FDDI # adapter. pseudo-device fddi is also needed. # # The `meteor' device is a PCI video capture board. It can also have the # following options: # options METEOR_ALLOC_PAGES=xxx preallocate kernel pages for data entry # figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE # options METEOR_DEALLOC_PAGES remove all allocated pages on close(2) # options METEOR_DEALLOC_ABOVE=xxx remove all allocated pages above the # specified amount. If this value is below the allocated amount no action # taken # option METEOR_SYSTEM_DEFAULT={METEOR_PAL|METEOR_NTSC|METEOR_SECAM}, used # for initialization of fps routine when a signal is not present. # # The 'bktr' device is a PCI video capture board. It also has a TV tuner # on board. # controller pci0 controller ahc1 controller ncr0 controller amd0 device de0 device fxp0 device tx0 device vx0 device fpa0 device meteor0 device bktr0 # # PCCARD/PCMCIA # # card: slot controller # pcic: slots controller card0 controller pcic0 at card? controller pcic1 at card? # # Laptop/Notebook options: # # See also: # apm under `Miscellaneous hardware' # above. # For older notebooks that signal a powerfail condition (external # power supply dropped, or battery state low) by issuing an NMI: options POWERFAIL_NMI # make it beep instead of panicing # # Parallel-Port Bus # # Parallel port bus support is provided by the `ppbus' device. # Multiple devices may be attached to the parallel port, devices # are automatically probed and attached when found. # # Supported devices: # vpo Iomega Zip Drive # Requires SCSI disk support ('scbus' and 'sd'), best # performance is achieved with ports in EPP 1.9 mode. # nlpt Parallel Printer # ppi General-purpose I/O ("Geek Port") # # Supported interfaces: # ppc ISA-bus parallel port interfaces. # controller ppbus0 controller vpo0 at ppbus? device nlpt0 at ppbus? device ppi0 at ppbus? device pps0 at ppbus? controller ppc0 at isa? disable port ? irq 7 vector ppcintr # Kernel BOOTP support options BOOTP # Use BOOTP to obtain IP address/hostname options BOOTP_NFSROOT # NFS mount root filesystem using BOOTP info options "BOOTP_NFSV3" # Use NFS v3 to NFS mount root options BOOTP_COMPAT # Workaround for broken bootp daemons. # # An obsolete option to test kern_opt.c. # options GATEWAY # If you want to disable loadable kernel modules (LKM), you # might want to use this option. #options NO_LKM # # Add tie-ins for a hardware watchdog. This only enable the hooks; # the user must still supply the actual driver. # options HW_WDOG # More undocumented options for linting. options CLK_CALIBRATION_LOOP options "CLK_USE_I8254_CALIBRATION" options CLK_USE_TSC_CALIBRATION options CLUSTERDEBUG options COMPAT_LINUX options CPU_UPGRADE_HW_CACHE options DEBUG options "DEBUG_1284" options DEVFS_ROOT #options DISABLE_PSE options "EXT2FS" options "I586_PMC_GUPROF=0x70000" options "IBCS2" # broken: #options IPFILTER options KEY options KEY_DEBUG options LOCKF_DEBUG options LOUTB options KBD_MAXRETRY=4 options KBD_MAXWAIT=6 options KBD_RESETDELAY=201 options KBDIO_DEBUG=2 options MSGMNB=2049 options MSGMNI=41 options MSGSEG=2049 options MSGSSZ=16 options MSGTQL=41 options NBUF=512 options NETATALKDEBUG options NMBCLUSTERS=1024 options NPX_DEBUG options NULLFS_DIAGNOSTIC options PANIC_REBOOT_WAIT_TIME=16 options "PCVT_24LINESDEF" options PCVT_CTRL_ALT_DEL options PCVT_EMU_MOUSE options PCVT_FREEBSD=211 options PCVT_META_ESC options PCVT_NSCREENS=9 options PCVT_PRETTYSCRNS options PCVT_SCANSET=2 options PCVT_SCREENSAVER options PCVT_USEKBDSEC options "PCVT_VT220KEYB" options PSM_DEBUG=1 options "SCSI_2_DEF" options SCSI_DELAY=8 # Be pessimistic about Joe SCSI device options SCSI_NCR_DEBUG options SCSI_NCR_DFLT_TAGS=4 options SCSI_NCR_MAX_SYNC=10000 options SCSI_NCR_MAX_WIDE=1 options SCSI_NCR_MYADDR=7 options SEMMAP=31 options SEMMNI=11 options SEMMNS=61 options SEMMNU=31 options SEMMSL=61 options SEMOPM=101 options SEMUME=11 options SHOW_BUSYBUFS # List buffers that prevent root unmount options SHMALL=1025 options "SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)" options SHMMAXPGS=1025 options SHMMIN=2 options SHMMNI=33 options SHMSEG=9 options SI_DEBUG options SIMPLELOCK_DEBUG options SPX_HACK # The 'dpt' driver provides hardware RAID-{0,1,5} support, multi-initiator I/O # See sys/dev/dpt for debugging and other subtle options. # DPT_VERIFY_HINTR Performs some strict hardware interrupts testing. # Only use if you suspect PCI bus corruption problems # DPT_RESTRICTED_FREELIST Normally, the freelisat used by the DPT for queue # will grow to accomodate increased use. This growth # will NOT shrink. To restrict the number of queue # slots to exactly what the DPT can hold at one time, # enable this option. # DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various # instruments are enabled. Assumed to be enabled by # /usr/sbin/dpt_* tools. # DPT_FREELIST_IS_STACK For optimat L{1,2} CPU cache utilization, enable # this option. Otherwise, the transaction queue is # a LIFO. I cannot measure the performance gain. # DPT_HANDLE_TIMEOUTS Normally device timeouts are handled by the DPT. # If you ant the driver to handle timeouts, enable # this option. If your system is very busy, this # option will create more trouble than solve. # DPT_TIMEOUT_FACTOR Used to compute the excessive amount of time to # wait when timing out with the above option. # DPT_DEBUG_xxxx These are controllable from sys/dev/dpt/dpt.h # DPT_LOST_IRQ When enabled, will try, once per second, to catch # any interrupt that got lost. Seems to help in some # DPT-firmware/Motherboard combinations. Minimal # cost, great benefit. controller dpt0 # DPT options options DPT_VERIFY_HINTR options DPT_RESTRICTED_FREELIST options DPT_MEASURE_PERFORMANCE options DPT_FREELIST_IS_STACK options DPT_HANDLE_TIMEOUTS options DPT_TIMEOUT_FACTOR=4 options DPT_INTR_DELAY=200 # Some motherboards need that options DPT_LOST_IRQ Index: head/sys/isofs/cd9660/cd9660_vfsops.c =================================================================== --- head/sys/isofs/cd9660/cd9660_vfsops.c (revision 34265) +++ head/sys/isofs/cd9660/cd9660_vfsops.c (revision 34266) @@ -1,891 +1,892 @@ /*- * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension * Support code is derived from software contributed to Berkeley * by Atsushi Murai (amurai@spec.co.jp). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)cd9660_vfsops.c 8.18 (Berkeley) 5/22/95 - * $Id: cd9660_vfsops.c,v 1.33 1997/12/21 21:40:02 joerg Exp $ + * $Id: cd9660_vfsops.c,v 1.34 1998/03/01 22:46:00 msmith Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure"); MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part"); static int cd9660_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int cd9660_start __P((struct mount *, int, struct proc *)); static int cd9660_unmount __P((struct mount *, int, struct proc *)); static int cd9660_root __P((struct mount *, struct vnode **)); static int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *)); static int cd9660_sync __P((struct mount *, int, struct ucred *, struct proc *)); static int cd9660_vget __P((struct mount *, ino_t, struct vnode **)); static int cd9660_vrele __P((struct mount *, struct vnode *)); static int cd9660_fhtovp __P((struct mount *, struct fid *, struct sockaddr *, struct vnode **, int *, struct ucred **)); static int cd9660_vptofh __P((struct vnode *, struct fid *)); static struct vfsops cd9660_vfsops = { cd9660_mount, cd9660_start, cd9660_unmount, cd9660_root, cd9660_quotactl, cd9660_statfs, cd9660_sync, cd9660_vget, cd9660_vrele, cd9660_fhtovp, cd9660_vptofh, cd9660_init }; VFS_SET(cd9660_vfsops, cd9660, MOUNT_CD9660, VFCF_READONLY); /* * Called by vfs_mountroot when iso is going to be mounted as root. */ static int iso_get_ssector __P((dev_t dev, struct proc *p)); static int iso_mountfs __P((struct vnode *devvp, struct mount *mp, struct proc *p, struct iso_args *argp)); static int iso_mountroot __P((struct mount *mp, struct proc *p)); /* * Try to find the start of the last data track on this CD-ROM. This * is used to mount the last session of a multi-session CD. Bail out * and return 0 if we fail, this is always a safe bet. */ static int iso_get_ssector(dev, p) dev_t dev; struct proc *p; { struct ioc_toc_header h; struct ioc_read_toc_single_entry t; int i; struct bdevsw *bd; d_ioctl_t *ioctlp; bd = bdevsw[major(dev)]; ioctlp = bd->d_ioctl; if (ioctlp == NULL) return 0; if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) == -1) return 0; for (i = h.ending_track; i >= 0; i--) { t.address_format = CD_LBA_FORMAT; t.track = i; if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) == -1) return 0; if ((t.entry.control & 4) != 0) /* found a data track */ break; } if (i < 0) return 0; return ntohl(t.entry.addr.lba); } static int iso_mountroot(mp, p) struct mount *mp; struct proc *p; { struct iso_args args; int error; if ((error = bdevvp(rootdev, &rootvp))) { printf("iso_mountroot: can't find rootvp"); return (error); } args.flags = ISOFSMNT_ROOT; args.ssector = iso_get_ssector(rootdev, p); if (bootverbose) printf("iso_mountroot(): using session at block %d\n", args.ssector); if (error = iso_mountfs(rootvp, mp, p, &args)) return (error); (void)cd9660_statfs(mp, &mp->mnt_stat, p); return (0); } /* * VFS Operations. * * mount system call */ static int cd9660_mount(mp, path, data, ndp, p) register struct mount *mp; char *path; caddr_t data; struct nameidata *ndp; struct proc *p; { struct vnode *devvp; struct iso_args args; u_int size; int error; struct iso_mnt *imp = 0; if ((mp->mnt_flag & MNT_ROOTFS) != 0) { if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR) mp->mnt_flag |= MNT_NOCLUSTERR; return (iso_mountroot(mp, p)); } if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))) return (error); if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EROFS); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. * Disallow clearing MNT_NOCLUSTERR flag, if block device requests. */ if (mp->mnt_flag & MNT_UPDATE) { imp = VFSTOISOFS(mp); if (bdevsw[major(imp->im_devvp->v_rdev)]->d_flags & D_NOCLUSTERR) mp->mnt_flag |= MNT_NOCLUSTERR; if (args.fspec == 0) return (vfs_export(mp, &imp->im_export, &args.export)); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); if ((error = namei(ndp))) return (error); devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { vrele(devvp); return ENOTBLK; } if (major(devvp->v_rdev) >= nblkdev) { vrele(devvp); return ENXIO; } if ((mp->mnt_flag & MNT_UPDATE) == 0) { if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR) mp->mnt_flag |= MNT_NOCLUSTERR; error = iso_mountfs(devvp, mp, p, &args); } else { if (devvp != imp->im_devvp) error = EINVAL; /* needs translation */ else vrele(devvp); } if (error) { vrele(devvp); return error; } imp = VFSTOISOFS(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void) cd9660_statfs(mp, &mp->mnt_stat, p); return 0; } /* * Common code for mount and mountroot */ static int iso_mountfs(devvp, mp, p, argp) register struct vnode *devvp; struct mount *mp; struct proc *p; struct iso_args *argp; { register struct iso_mnt *isomp = (struct iso_mnt *)0; struct buf *bp = NULL; dev_t dev = devvp->v_rdev; int error = EINVAL; int needclose = 0; int high_sierra = 0; int ronly = (mp->mnt_flag & MNT_RDONLY) != 0; int iso_bsize; int iso_blknum; struct iso_volume_descriptor *vdp = 0; struct iso_primary_descriptor *pri; struct iso_sierra_primary_descriptor *pri_sierra; struct iso_directory_record *rootp; int logical_block_size; if (!ronly) return EROFS; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp))) return error; if (vcount(devvp) > 1 && devvp != rootvp) return EBUSY; if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))) return (error); if ((error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))) return error; needclose = 1; /* This is the "logical sector size". The standard says this * should be 2048 or the physical sector size on the device, * whichever is greater. For now, we'll just use a constant. */ iso_bsize = ISO_DEFAULT_BLOCK_SIZE; for (iso_blknum = 16 + argp->ssector; iso_blknum < 100 + argp->ssector; iso_blknum++) { if (error = bread(devvp, iso_blknum * btodb(iso_bsize), iso_bsize, NOCRED, &bp)) goto out; vdp = (struct iso_volume_descriptor *)bp->b_data; if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) { if (bcmp (vdp->id_sierra, ISO_SIERRA_ID, sizeof vdp->id) != 0) { error = EINVAL; goto out; } else high_sierra = 1; } if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_END) { error = EINVAL; goto out; } if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_PRIMARY) break; brelse(bp); } if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) != ISO_VD_PRIMARY) { error = EINVAL; goto out; } pri = (struct iso_primary_descriptor *)vdp; pri_sierra = (struct iso_sierra_primary_descriptor *)vdp; logical_block_size = isonum_723 (high_sierra? pri_sierra->logical_block_size: pri->logical_block_size); if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE || (logical_block_size & (logical_block_size - 1)) != 0) { error = EINVAL; goto out; } rootp = (struct iso_directory_record *) (high_sierra? pri_sierra->root_directory_record: pri->root_directory_record); isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK); bzero((caddr_t)isomp, sizeof *isomp); isomp->logical_block_size = logical_block_size; isomp->volume_space_size = isonum_733 (high_sierra? pri_sierra->volume_space_size: pri->volume_space_size); /* * Since an ISO9660 multi-session CD can also access previous * sessions, we have to include them into the space consider- * ations. This doesn't yield a very accurate number since * parts of the old sessions might be inaccessible now, but we * can't do much better. This is also important for the NFS * filehandle validation. */ isomp->volume_space_size += argp->ssector; bcopy (rootp, isomp->root, sizeof isomp->root); isomp->root_extent = isonum_733 (rootp->extent); isomp->root_size = isonum_733 (rootp->size); isomp->im_bmask = logical_block_size - 1; isomp->im_bshift = 0; while ((1 << isomp->im_bshift) < isomp->logical_block_size) isomp->im_bshift++; bp->b_flags |= B_AGE; brelse(bp); bp = NULL; mp->mnt_data = (qaddr_t)isomp; mp->mnt_stat.f_fsid.val[0] = (long)dev; mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = 0; mp->mnt_flag |= MNT_LOCAL; isomp->im_mountp = mp; isomp->im_dev = dev; isomp->im_devvp = devvp; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; /* Check the Rock Ridge Extention support */ if (!(argp->flags & ISOFSMNT_NORRIP)) { if (error = bread(isomp->im_devvp, (isomp->root_extent + isonum_711(rootp->ext_attr_length)) << (isomp->im_bshift - DEV_BSHIFT), isomp->logical_block_size, NOCRED, &bp)) goto out; rootp = (struct iso_directory_record *)bp->b_data; if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) { argp->flags |= ISOFSMNT_NORRIP; } else { argp->flags &= ~ISOFSMNT_GENS; } /* * The contents are valid, * but they will get reread as part of another vnode, so... */ bp->b_flags |= B_AGE; brelse(bp); bp = NULL; } isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT); if(high_sierra) /* this effectively ignores all the mount flags */ isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA; else switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) { default: isomp->iso_ftype = ISO_FTYPE_DEFAULT; break; case ISOFSMNT_GENS|ISOFSMNT_NORRIP: isomp->iso_ftype = ISO_FTYPE_9660; break; case 0: isomp->iso_ftype = ISO_FTYPE_RRIP; break; } return 0; out: - devvp->v_specflags &= ~SI_MOUNTEDON; + devvp->v_specmountpoint = NULL; if (bp) brelse(bp); if (needclose) (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); if (isomp) { free((caddr_t)isomp, M_ISOFSMNT); mp->mnt_data = (qaddr_t)0; } return error; } /* * Make a filesystem operational. * Nothing to do at the moment. */ /* ARGSUSED */ static int cd9660_start(mp, flags, p) struct mount *mp; int flags; struct proc *p; { return 0; } /* * unmount system call */ static int cd9660_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct iso_mnt *isomp; int error, flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; #if 0 mntflushbuf(mp, 0); if (mntinvalbuf(mp)) return EBUSY; #endif if ((error = vflush(mp, NULLVP, flags))) return (error); isomp = VFSTOISOFS(mp); - isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON; + isomp->im_devvp->v_specmountpoint = NULL; error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p); vrele(isomp->im_devvp); free((caddr_t)isomp, M_ISOFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } /* * Return root of a filesystem */ static int cd9660_root(mp, vpp) struct mount *mp; struct vnode **vpp; { struct iso_mnt *imp = VFSTOISOFS(mp); struct iso_directory_record *dp = (struct iso_directory_record *)imp->root; ino_t ino = isodirino(dp, imp); /* * With RRIP we must use the `.' entry of the root directory. * Simply tell vget, that it's a relocated directory. */ return (cd9660_vget_internal(mp, ino, vpp, imp->iso_ftype == ISO_FTYPE_RRIP, dp)); } /* * Do operations associated with quotas, not supported */ /* ARGSUSED */ static int cd9660_quotactl(mp, cmd, uid, arg, p) struct mount *mp; int cmd; uid_t uid; caddr_t arg; struct proc *p; { return (EOPNOTSUPP); } /* * Get file system statistics. */ int cd9660_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { register struct iso_mnt *isomp; isomp = VFSTOISOFS(mp); sbp->f_type = MOUNT_CD9660; sbp->f_bsize = isomp->logical_block_size; sbp->f_iosize = sbp->f_bsize; /* XXX */ sbp->f_blocks = isomp->volume_space_size; sbp->f_bfree = 0; /* total free blocks */ sbp->f_bavail = 0; /* blocks free for non superuser */ sbp->f_files = 0; /* total files */ sbp->f_ffree = 0; /* free file nodes */ if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } /* Use the first spare for flags: */ - sbp->f_spare[0] = isomp->im_flags; + /* Don't do this!!! XXX */ + /* sbp->f_spare[0] = isomp->im_flags; */ return 0; } /* ARGSUSED */ static int cd9660_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is in range * - call iget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the generation number matches */ struct ifid { ushort ifid_len; ushort ifid_pad; int ifid_ino; long ifid_start; }; /* ARGSUSED */ int cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) register struct mount *mp; struct fid *fhp; struct sockaddr *nam; struct vnode **vpp; int *exflagsp; struct ucred **credanonp; { struct ifid *ifhp = (struct ifid *)fhp; register struct iso_node *ip; register struct netcred *np; register struct iso_mnt *imp = VFSTOISOFS(mp); struct vnode *nvp; int error; #ifdef ISOFS_DBG printf("fhtovp: ino %d, start %ld\n", ifhp->ifid_ino, ifhp->ifid_start); #endif /* * Get the export permission structure for this tuple. */ np = vfs_export_lookup(mp, &imp->im_export, nam); if (np == NULL) return (EACCES); if (error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) { *vpp = NULLVP; return (error); } ip = VTOI(nvp); if (ip->inode.iso_mode == 0) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; *exflagsp = np->netc_exflags; *credanonp = &np->netc_anon; return (0); } int cd9660_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { /* * XXXX * It would be nice if we didn't always set the `relocated' flag * and force the extra read, but I don't want to think about fixing * that right now. */ return (cd9660_vget_internal(mp, ino, vpp, #if 0 VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP, #else 0, #endif (struct iso_directory_record *)0)); } /* * Complement to all vpp returning ops. * XXX - initially only to get rid of WILLRELE. */ /* ARGSUSED */ static int cd9660_vrele(mp, vp) struct mount *mp; struct vnode *vp; { return (EOPNOTSUPP); } int cd9660_vget_internal(mp, ino, vpp, relocated, isodir) struct mount *mp; ino_t ino; struct vnode **vpp; int relocated; struct iso_directory_record *isodir; { struct iso_mnt *imp; struct iso_node *ip; struct buf *bp; struct vnode *vp, *nvp; dev_t dev; int error; imp = VFSTOISOFS(mp); dev = imp->im_dev; if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP) return (0); /* Allocate a new vnode/iso_node. */ if (error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) { *vpp = NULLVP; return (error); } MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE, M_WAITOK); bzero((caddr_t)ip, sizeof(struct iso_node)); lockinit(&ip->i_lock, PINOD, "isonode", 0, 0); vp->v_data = ip; ip->i_vnode = vp; ip->i_dev = dev; ip->i_number = ino; /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ cd9660_ihashins(ip); if (isodir == 0) { int lbn, off; lbn = lblkno(imp, ino); if (lbn >= imp->volume_space_size) { vput(vp); printf("fhtovp: lbn exceed volume space %d\n", lbn); return (ESTALE); } off = blkoff(imp, ino); if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) { vput(vp); printf("fhtovp: crosses block boundary %d\n", off + ISO_DIRECTORY_RECORD_SIZE); return (ESTALE); } error = bread(imp->im_devvp, lbn << (imp->im_bshift - DEV_BSHIFT), imp->logical_block_size, NOCRED, &bp); if (error) { vput(vp); brelse(bp); printf("fhtovp: bread error %d\n",error); return (error); } isodir = (struct iso_directory_record *)(bp->b_data + off); if (off + isonum_711(isodir->length) > imp->logical_block_size) { vput(vp); if (bp != 0) brelse(bp); printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n", off +isonum_711(isodir->length), off, isonum_711(isodir->length)); return (ESTALE); } #if 0 if (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) { if (bp != 0) brelse(bp); printf("fhtovp: file start miss %d vs %d\n", isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length), ifhp->ifid_start); return (ESTALE); } #endif } else bp = 0; ip->i_mnt = imp; ip->i_devvp = imp->im_devvp; VREF(ip->i_devvp); if (relocated) { /* * On relocated directories we must * read the `.' entry out of a dir. */ ip->iso_start = ino >> imp->im_bshift; if (bp != 0) brelse(bp); if (error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) { vput(vp); return (error); } isodir = (struct iso_directory_record *)bp->b_data; } ip->iso_extent = isonum_733(isodir->extent); ip->i_size = isonum_733(isodir->size); ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent; /* * Setup time stamp, attribute */ vp->v_type = VNON; switch (imp->iso_ftype) { default: /* ISO_FTYPE_9660 */ { struct buf *bp2; int off; if ((imp->im_flags & ISOFSMNT_EXTATT) && (off = isonum_711(isodir->ext_attr_length))) cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL, &bp2); else bp2 = NULL; cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660); cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660); if (bp2) brelse(bp2); break; } case ISO_FTYPE_RRIP: cd9660_rrip_analyze(isodir, ip, imp); break; } if (bp != 0) brelse(bp); /* * Initialize the associated vnode */ switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) { case VFIFO: vp->v_op = cd9660_fifoop_p; break; case VCHR: case VBLK: /* * if device, look at device number table for translation */ vp->v_op = cd9660_specop_p; if (nvp = checkalias(vp, ip->inode.iso_rdev, mp)) { /* * Discard unneeded vnode, but save its iso_node. * Note that the lock is carried over in the iso_node * to the replacement vnode. */ nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; vrele(vp); vgone(vp); /* * Reinitialize aliased inode. */ vp = nvp; ip->i_vnode = vp; } break; } if (ip->iso_extent == imp->root_extent) vp->v_flag |= VROOT; /* * XXX need generation number? */ *vpp = vp; return (0); } /* * Vnode pointer to File handle */ /* ARGSUSED */ int cd9660_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { register struct iso_node *ip = VTOI(vp); register struct ifid *ifhp; ifhp = (struct ifid *)fhp; ifhp->ifid_len = sizeof(struct ifid); ifhp->ifid_ino = ip->i_number; ifhp->ifid_start = ip->iso_start; #ifdef ISOFS_DBG printf("vptofh: ino %d, start %ld\n", ifhp->ifid_ino,ifhp->ifid_start); #endif return 0; } Index: head/sys/kern/kern_malloc.c =================================================================== --- head/sys/kern/kern_malloc.c (revision 34265) +++ head/sys/kern/kern_malloc.c (revision 34266) @@ -1,454 +1,454 @@ /* * Copyright (c) 1987, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 - * $Id: kern_malloc.c,v 1.43 1998/02/09 06:09:22 eivind Exp $ + * $Id: kern_malloc.c,v 1.44 1998/02/23 07:41:23 dyson Exp $ */ #include "opt_vm.h" #include #include #include #define MALLOC_INSTANTIATE #include #include #include #include #include #include #include #include #include #include static void kmeminit __P((void *)); static void malloc_init __P((struct malloc_type *)); SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL) static MALLOC_DEFINE(M_FREE, "free", "should be on free list"); static struct malloc_type *kmemstatistics = M_FREE; static struct kmembuckets bucket[MINBUCKET + 16]; static struct kmemusage *kmemusage; static char *kmembase; static char *kmemlimit; static int vm_kmem_size; #ifdef DIAGNOSTIC /* * This structure provides a set of masks to catch unaligned frees. */ static long addrmask[] = { 0, 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, }; /* * The WEIRD_ADDR is used as known text to copy into free objects so * that modifications after frees can be detected. */ #define WEIRD_ADDR 0xdeadc0de #define MAX_COPY 64 /* * Normally the first word of the structure is used to hold the list * pointer for free objects. However, when running with diagnostics, * we use the third and fourth fields, so as to catch modifications * in the most commonly trashed first two words. */ struct freelist { long spare0; struct malloc_type *type; long spare1; caddr_t next; }; #else /* !DIAGNOSTIC */ struct freelist { caddr_t next; }; #endif /* DIAGNOSTIC */ /* * Allocate a block of memory */ void * malloc(size, type, flags) unsigned long size; struct malloc_type *type; int flags; { register struct kmembuckets *kbp; register struct kmemusage *kup; register struct freelist *freep; long indx, npg, allocsize; int s; caddr_t va, cp, savedlist; #ifdef DIAGNOSTIC long *end, *lp; int copysize; char *savedtype; #endif register struct malloc_type *ksp = type; if (!type->ks_next) malloc_init(type); indx = BUCKETINDX(size); kbp = &bucket[indx]; - s = splhigh(); + s = splmem(); while (ksp->ks_memuse >= ksp->ks_limit) { if (flags & M_NOWAIT) { splx(s); return ((void *) NULL); } if (ksp->ks_limblocks < 65535) ksp->ks_limblocks++; tsleep((caddr_t)ksp, PSWP+2, type->ks_shortdesc, 0); } ksp->ks_size |= 1 << indx; #ifdef DIAGNOSTIC copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY; #endif if (kbp->kb_next == NULL) { kbp->kb_last = NULL; if (size > MAXALLOCSAVE) allocsize = roundup(size, PAGE_SIZE); else allocsize = 1 << indx; npg = btoc(allocsize); va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg), flags); if (va == NULL) { splx(s); return ((void *) NULL); } kbp->kb_total += kbp->kb_elmpercl; kup = btokup(va); kup->ku_indx = indx; if (allocsize > MAXALLOCSAVE) { if (npg > 65535) panic("malloc: allocation too large"); kup->ku_pagecnt = npg; ksp->ks_memuse += allocsize; goto out; } kup->ku_freecnt = kbp->kb_elmpercl; kbp->kb_totalfree += kbp->kb_elmpercl; /* * Just in case we blocked while allocating memory, * and someone else also allocated memory for this * bucket, don't assume the list is still empty. */ savedlist = kbp->kb_next; kbp->kb_next = cp = va + (npg * PAGE_SIZE) - allocsize; for (;;) { freep = (struct freelist *)cp; #ifdef DIAGNOSTIC /* * Copy in known text to detect modification * after freeing. */ end = (long *)&cp[copysize]; for (lp = (long *)cp; lp < end; lp++) *lp = WEIRD_ADDR; freep->type = M_FREE; #endif /* DIAGNOSTIC */ if (cp <= va) break; cp -= allocsize; freep->next = cp; } freep->next = savedlist; if (kbp->kb_last == NULL) kbp->kb_last = (caddr_t)freep; } va = kbp->kb_next; kbp->kb_next = ((struct freelist *)va)->next; #ifdef DIAGNOSTIC freep = (struct freelist *)va; savedtype = (char *) type->ks_shortdesc; #if BYTE_ORDER == BIG_ENDIAN freep->type = (struct malloc_type *)WEIRD_ADDR >> 16; #endif #if BYTE_ORDER == LITTLE_ENDIAN freep->type = (struct malloc_type *)WEIRD_ADDR; #endif if (((long)(&freep->next)) & 0x2) freep->next = (caddr_t)((WEIRD_ADDR >> 16)|(WEIRD_ADDR << 16)); else freep->next = (caddr_t)WEIRD_ADDR; end = (long *)&va[copysize]; for (lp = (long *)va; lp < end; lp++) { if (*lp == WEIRD_ADDR) continue; printf("%s %d of object %p size %ld %s %s (0x%lx != 0x%x)\n", "Data modified on freelist: word", lp - (long *)va, va, size, "previous type", savedtype, *lp, WEIRD_ADDR); break; } freep->spare0 = 0; #endif /* DIAGNOSTIC */ kup = btokup(va); if (kup->ku_indx != indx) panic("malloc: wrong bucket"); if (kup->ku_freecnt == 0) panic("malloc: lost data"); kup->ku_freecnt--; kbp->kb_totalfree--; ksp->ks_memuse += 1 << indx; out: kbp->kb_calls++; ksp->ks_inuse++; ksp->ks_calls++; if (ksp->ks_memuse > ksp->ks_maxused) ksp->ks_maxused = ksp->ks_memuse; splx(s); return ((void *) va); } /* * Free a block of memory allocated by malloc. */ void free(addr, type) void *addr; struct malloc_type *type; { register struct kmembuckets *kbp; register struct kmemusage *kup; register struct freelist *freep; long size; int s; #ifdef DIAGNOSTIC struct freelist *fp; long *end, *lp, alloc, copysize; #endif register struct malloc_type *ksp = type; if (!type->ks_next) panic("freeing with unknown type (%s)", type->ks_shortdesc); #ifdef DIAGNOSTIC if ((char *)addr < kmembase || (char *)addr >= kmemlimit) { panic("free: address 0x%x out of range", addr); } #endif kup = btokup(addr); size = 1 << kup->ku_indx; kbp = &bucket[kup->ku_indx]; - s = splhigh(); + s = splmem(); #ifdef DIAGNOSTIC /* * Check for returns of data that do not point to the * beginning of the allocation. */ if (size > PAGE_SIZE) alloc = addrmask[BUCKETINDX(PAGE_SIZE)]; else alloc = addrmask[kup->ku_indx]; if (((u_long)addr & alloc) != 0) panic("free: unaligned addr 0x%x, size %d, type %s, mask %d", addr, size, type->ks_shortdesc, alloc); #endif /* DIAGNOSTIC */ if (size > MAXALLOCSAVE) { kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt)); size = kup->ku_pagecnt << PAGE_SHIFT; ksp->ks_memuse -= size; kup->ku_indx = 0; kup->ku_pagecnt = 0; if (ksp->ks_memuse + size >= ksp->ks_limit && ksp->ks_memuse < ksp->ks_limit) wakeup((caddr_t)ksp); ksp->ks_inuse--; kbp->kb_total -= 1; splx(s); return; } freep = (struct freelist *)addr; #ifdef DIAGNOSTIC /* * Check for multiple frees. Use a quick check to see if * it looks free before laboriously searching the freelist. */ if (freep->spare0 == WEIRD_ADDR) { fp = (struct freelist *)kbp->kb_next; while (fp) { if (fp->spare0 != WEIRD_ADDR) { printf("trashed free item %p\n", fp); panic("free: free item modified"); } else if (addr == (caddr_t)fp) { printf("multiple freed item %p\n", addr); panic("free: multiple free"); } fp = (struct freelist *)fp->next; } } /* * Copy in known text to detect modification after freeing * and to make it look free. Also, save the type being freed * so we can list likely culprit if modification is detected * when the object is reallocated. */ copysize = size < MAX_COPY ? size : MAX_COPY; end = (long *)&((caddr_t)addr)[copysize]; for (lp = (long *)addr; lp < end; lp++) *lp = WEIRD_ADDR; freep->type = type; #endif /* DIAGNOSTIC */ kup->ku_freecnt++; if (kup->ku_freecnt >= kbp->kb_elmpercl) if (kup->ku_freecnt > kbp->kb_elmpercl) panic("free: multiple frees"); else if (kbp->kb_totalfree > kbp->kb_highwat) kbp->kb_couldfree++; kbp->kb_totalfree++; ksp->ks_memuse -= size; if (ksp->ks_memuse + size >= ksp->ks_limit && ksp->ks_memuse < ksp->ks_limit) wakeup((caddr_t)ksp); ksp->ks_inuse--; #ifdef OLD_MALLOC_MEMORY_POLICY if (kbp->kb_next == NULL) kbp->kb_next = addr; else ((struct freelist *)kbp->kb_last)->next = addr; freep->next = NULL; kbp->kb_last = addr; #else /* * Return memory to the head of the queue for quick reuse. This * can improve performance by improving the probability of the * item being in the cache when it is reused. */ if (kbp->kb_next == NULL) { kbp->kb_next = addr; kbp->kb_last = addr; freep->next = NULL; } else { freep->next = kbp->kb_next; kbp->kb_next = addr; } #endif splx(s); } /* * Initialize the kernel memory allocator */ /* ARGSUSED*/ static void kmeminit(dummy) void *dummy; { register long indx; int npg; int mem_size; #if ((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0) #error "kmeminit: MAXALLOCSAVE not power of 2" #endif #if (MAXALLOCSAVE > MINALLOCSIZE * 32768) #error "kmeminit: MAXALLOCSAVE too big" #endif #if (MAXALLOCSAVE < PAGE_SIZE) #error "kmeminit: MAXALLOCSAVE too small" #endif /* * Try to auto-tune the kernel memory size, so that it is * more applicable for a wider range of machine sizes. * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while * a VM_KMEM_SIZE of 12MB is a fair compromise. The * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space * available, and on an X86 with a total KVA space of 256MB, * try to keep VM_KMEM_SIZE_MAX at 80MB or below. * * Note that the kmem_map is also used by the zone allocator, * so make sure that there is enough space. */ vm_kmem_size = VM_KMEM_SIZE; mem_size = cnt.v_page_count * PAGE_SIZE; #if defined(VM_KMEM_SIZE_SCALE) if ((mem_size / VM_KMEM_SIZE_SCALE) > vm_kmem_size) vm_kmem_size = mem_size / VM_KMEM_SIZE_SCALE; #endif #if defined(VM_KMEM_SIZE_MAX) if (vm_kmem_size >= VM_KMEM_SIZE_MAX) vm_kmem_size = VM_KMEM_SIZE_MAX; #endif if (vm_kmem_size > 2 * (cnt.v_page_count * PAGE_SIZE)) vm_kmem_size = 2 * (cnt.v_page_count * PAGE_SIZE); npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + vm_kmem_size) / PAGE_SIZE; kmemusage = (struct kmemusage *) kmem_alloc(kernel_map, (vm_size_t)(npg * sizeof(struct kmemusage))); kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE)); kmem_map->system_map = 1; for (indx = 0; indx < MINBUCKET + 16; indx++) { if (1 << indx >= PAGE_SIZE) bucket[indx].kb_elmpercl = 1; else bucket[indx].kb_elmpercl = PAGE_SIZE / (1 << indx); bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl; } } static void malloc_init(type) struct malloc_type *type; { int npg; int mem_size; if (type->ks_magic != M_MAGIC) panic("malloc type lacks magic"); if (cnt.v_page_count == 0) panic("malloc_init not allowed before vm init"); /* * The default limits for each malloc region is 1/2 of the * malloc portion of the kmem map size. */ type->ks_limit = vm_kmem_size / 2; type->ks_next = kmemstatistics; kmemstatistics = type; } Index: head/sys/kern/kern_shutdown.c =================================================================== --- head/sys/kern/kern_shutdown.c (revision 34265) +++ head/sys/kern/kern_shutdown.c (revision 34266) @@ -1,503 +1,513 @@ /*- * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 - * $Id: kern_shutdown.c,v 1.27 1997/11/25 07:07:43 julian Exp $ + * $Id: kern_shutdown.c,v 1.28 1998/02/16 23:57:44 eivind Exp $ */ #include "opt_ddb.h" #include "opt_hw_wdog.h" #include "opt_panic.h" #include "opt_show_busybufs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include /* smp_active, cpuid */ #endif #include #ifndef PANIC_REBOOT_WAIT_TIME #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ #endif /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #include #ifdef DDB #ifdef DDB_UNATTENDED static int debugger_on_panic = 0; #else static int debugger_on_panic = 1; #endif SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW, &debugger_on_panic, 0, ""); #endif #ifdef HW_WDOG /* * If there is a hardware watchdog, point this at the function needed to * hold it off. * It's needed when the kernel needs to do some lengthy operations. * e.g. in wd.c when dumping core.. It's most annoying to have * your precious core-dump only half written because the wdog kicked in. */ watchdog_tickle_fn wdog_tickler = NULL; #endif /* HW_WDOG */ /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. */ const char *panicstr; /* * callout list for things to do a shutdown */ typedef struct shutdown_list_element { struct shutdown_list_element *next; bootlist_fn function; void *arg; } *sle_p; /* * there are two shutdown lists. Some things need to be shut down * Earlier than others. */ static sle_p shutdown_list1; static sle_p shutdown_list2; static void boot __P((int)) __dead2; static void dumpsys __P((void)); #ifndef _SYS_SYSPROTO_H_ struct reboot_args { int opt; }; #endif /* ARGSUSED */ /* * The system call that results in a reboot */ int reboot(p, uap) struct proc *p; struct reboot_args *uap; { int error; if ((error = suser(p->p_ucred, &p->p_acflag))) return (error); boot(uap->opt); return (0); } /* * Called by events that want to shut down.. e.g on a PC */ void shutdown_nice() { /* Send a signal to init(8) and have it shutdown the world */ if (initproc != NULL) { psignal(initproc, SIGINT); } else { /* No init(8) running, so simply reboot */ boot(RB_NOSYNC); } return; } static int waittime = -1; static struct pcb dumppcb; /* * Go through the rigmarole of shutting down.. * this used to be in machdep.c but I'll be dammned if I could see * anything machine dependant in it. */ static void boot(howto) int howto; { sle_p ep; #ifdef SMP int c, spins; /* The MPSPEC says that the BSP must do the shutdown */ if (smp_active) { smp_active = 0; spins = 100; printf("boot() called on cpu#%d\n", cpuid); while ((c = cpuid) != 0) { if (spins-- < 1) { printf("timeout waiting for cpu #0!\n"); break; } printf("I'm on cpu#%d, I need to be on cpu#0, sleeping..\n", c); tsleep((caddr_t)&smp_active, PZERO, "cpu0wt", 10); } } #endif /* * Do any callouts that should be done BEFORE syncing the filesystems. */ ep = shutdown_list1; while (ep) { shutdown_list1 = ep->next; (*ep->function)(howto, ep->arg); ep = ep->next; } /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { register struct buf *bp; int iter, nbusy; waittime = 0; printf("\nsyncing disks... "); sync(&proc0, NULL); + /* + * With soft updates, some buffers that are + * written will be remarked as dirty until other + * buffers are written. + */ for (iter = 0; iter < 20; iter++) { nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) { if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) { nbusy++; + } else if ((bp->b_flags & (B_DELWRI | B_INVAL)) + == B_DELWRI) { + /* bawrite(bp);*/ + nbusy++; } } if (nbusy == 0) break; printf("%d ", nbusy); - DELAY(40000 * iter); + sync(&proc0, NULL); + DELAY(50000 * iter); } if (nbusy) { /* * Failed to sync all blocks. Indicate this and don't * unmount filesystems (thus forcing an fsck on reboot). */ printf("giving up\n"); #ifdef SHOW_BUSYBUFS nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) { if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) { nbusy++; printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno); } } DELAY(5000000); /* 5 seconds */ #endif } else { printf("done\n"); /* * Unmount filesystems */ if (panicstr == 0) vfs_unmountall(); } DELAY(100000); /* wait for console output to finish */ } /* * Ok, now do things that assume all filesystem activity has * been completed. */ ep = shutdown_list2; while (ep) { shutdown_list2 = ep->next; (*ep->function)(howto, ep->arg); ep = ep->next; } splhigh(); if (howto & RB_HALT) { cpu_power_down(); printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); switch (cngetc()) { case -1: /* No console, just die */ cpu_halt(); /* NOTREACHED */ default: break; } } else { if (howto & RB_DUMP) { if (!cold) { savectx(&dumppcb); dumppcb.pcb_cr3 = rcr3(); dumpsys(); } if (PANIC_REBOOT_WAIT_TIME != 0) { if (PANIC_REBOOT_WAIT_TIME != -1) { int loop; printf("Automatic reboot in %d seconds - press a key on the console to abort\n", PANIC_REBOOT_WAIT_TIME); for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) { DELAY(1000 * 100); /* 1/10th second */ /* Did user type a key? */ if (cncheckc() != -1) break; } if (!loop) goto die; } } else { /* zero time specified - reboot NOW */ goto die; } printf("--> Press a key on the console to reboot <--\n"); cngetc(); } } die: printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); for(;;) ; /* NOTREACHED */ } /* * Magic number for savecore * * exported (symorder) and used at least by savecore(8) * */ static u_long const dumpmag = 0x8fca0101UL; static int dumpsize = 0; /* also for savecore */ static int dodump = 1; SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, ""); /* ARGSUSED */ static void dump_conf __P((void *dummy)); static void dump_conf(dummy) void *dummy; { cpu_dumpconf(); } SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL) /* * Doadump comes here after turning off memory management and * getting on the dump stack, either when called above, or by * the auto-restart code. */ static void dumpsys(void) { if (!dodump) return; if (dumpdev == NODEV) return; if ((minor(dumpdev)&07) != 1) return; if (!(bdevsw[major(dumpdev)])) return; if (!(bdevsw[major(dumpdev)]->d_dump)) return; dumpsize = Maxmem; printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo); printf("dump "); switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) { case ENXIO: printf("device bad\n"); break; case EFAULT: printf("device not ready\n"); break; case EINVAL: printf("area improper\n"); break; case EIO: printf("i/o error\n"); break; case EINTR: printf("aborted from console\n"); break; default: printf("succeeded\n"); break; } } /* * Panic is called on unresolvable fatal errors. It prints "panic: mesg", * and then reboots. If we are called twice, then we avoid trying to sync * the disks as this often leads to recursive panics. */ void panic(const char *fmt, ...) { int bootopt; va_list ap; bootopt = RB_AUTOBOOT | RB_DUMP; if (panicstr) bootopt |= RB_NOSYNC; else panicstr = fmt; printf("panic: "); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); #ifdef SMP /* three seperate prints in case of an unmapped page and trap */ printf("mp_lock = %08x; ", mp_lock); printf("cpuid = %d; ", cpuid); printf("lapic.id = %08x\n", lapic.id); #endif #if defined(DDB) if (debugger_on_panic) Debugger ("panic"); #endif boot(bootopt); } /* * Two routines to handle adding/deleting items on the * shutdown callout lists * * at_shutdown(): * Take the arguments given and put them onto the shutdown callout list. * However first make sure that it's not already there. * returns 0 on success. */ int at_shutdown(bootlist_fn function, void *arg, int position) { sle_p ep, *epp; switch(position) { case SHUTDOWN_PRE_SYNC: epp = &shutdown_list1; break; case SHUTDOWN_POST_SYNC: epp = &shutdown_list2; break; default: printf("bad exit callout list specified\n"); return (EINVAL); } if (rm_at_shutdown(function, arg)) printf("exit callout entry already present\n"); ep = malloc(sizeof(*ep), M_TEMP, M_NOWAIT); if (ep == NULL) return (ENOMEM); ep->next = *epp; ep->function = function; ep->arg = arg; *epp = ep; return (0); } /* * Scan the exit callout lists for the given items and remove them. * Returns the number of items removed. */ int rm_at_shutdown(bootlist_fn function, void *arg) { sle_p *epp, ep; int count; count = 0; epp = &shutdown_list1; ep = *epp; while (ep) { if ((ep->function == function) && (ep->arg == arg)) { *epp = ep->next; free(ep, M_TEMP); count++; } else { epp = &ep->next; } ep = *epp; } epp = &shutdown_list2; ep = *epp; while (ep) { if ((ep->function == function) && (ep->arg == arg)) { *epp = ep->next; free(ep, M_TEMP); count++; } else { epp = &ep->next; } ep = *epp; } return (count); } Index: head/sys/kern/kern_synch.c =================================================================== --- head/sys/kern/kern_synch.c (revision 34265) +++ head/sys/kern/kern_synch.c (revision 34266) @@ -1,752 +1,752 @@ /*- * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 - * $Id: kern_synch.c,v 1.47 1998/02/25 06:04:46 bde Exp $ + * $Id: kern_synch.c,v 1.48 1998/03/04 10:25:55 dufault Exp $ */ #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include /* for UCHAR_MAX = typeof(p_priority)_MAX */ static void rqinit __P((void *)); SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL) u_char curpriority; /* usrpri of curproc */ int lbolt; /* once a second sleep address */ static void endtsleep __P((void *)); static void roundrobin __P((void *arg)); static void schedcpu __P((void *arg)); static void updatepri __P((struct proc *p)); #define MAXIMUM_SCHEDULE_QUANTUM (1000000) /* arbitrary limit */ #ifndef DEFAULT_SCHEDULE_QUANTUM #define DEFAULT_SCHEDULE_QUANTUM 10 #endif static int quantum = DEFAULT_SCHEDULE_QUANTUM; /* default value */ static int sysctl_kern_quantum SYSCTL_HANDLER_ARGS { int error; int new_val = quantum; new_val = quantum; error = sysctl_handle_int(oidp, &new_val, 0, req); if (error == 0) { if ((new_val > 0) && (new_val < MAXIMUM_SCHEDULE_QUANTUM)) { quantum = new_val; } else { error = EINVAL; } } return (error); } SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, 0, sizeof quantum, sysctl_kern_quantum, "I", ""); /* maybe_resched: Decide if you need to reschedule or not * taking the priorities and schedulers into account. */ static void maybe_resched(struct proc *chk) { struct proc *p = curproc; /* XXX */ if (p == 0 || ((chk->p_priority < curpriority) && ((RTP_PRIO_BASE(chk->p_rtprio.type) == RTP_PRIO_BASE(p->p_rtprio.type))))) need_resched(); } #define ROUNDROBIN_INTERVAL (hz / quantum) int roundrobin_interval(void) { return ROUNDROBIN_INTERVAL; } /* * Force switch among equal priority processes every 100ms. */ /* ARGSUSED */ static void roundrobin(arg) void *arg; { struct proc *p = curproc; /* XXX */ if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type)) need_resched(); timeout(roundrobin, NULL, ROUNDROBIN_INTERVAL); } /* * Constants for digital decay and forget: * 90% of (p_estcpu) usage in 5 * loadav time * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) * Note that, as ps(1) mentions, this can let percentages * total over 100% (I've seen 137.9% for 3 processes). * * Note that statclock() updates p_estcpu and p_cpticks asynchronously. * * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. * That is, the system wants to compute a value of decay such * that the following for loop: * for (i = 0; i < (5 * loadavg); i++) * p_estcpu *= decay; * will compute * p_estcpu *= 0.1; * for all values of loadavg: * * Mathematically this loop can be expressed by saying: * decay ** (5 * loadavg) ~= .1 * * The system computes decay as: * decay = (2 * loadavg) / (2 * loadavg + 1) * * We wish to prove that the system's computation of decay * will always fulfill the equation: * decay ** (5 * loadavg) ~= .1 * * If we compute b as: * b = 2 * loadavg * then * decay = b / (b + 1) * * We now need to prove two things: * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) * * Facts: * For x close to zero, exp(x) =~ 1 + x, since * exp(x) = 0! + x**1/1! + x**2/2! + ... . * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. * For x close to zero, ln(1+x) =~ x, since * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). * ln(.1) =~ -2.30 * * Proof of (1): * Solve (factor)**(power) =~ .1 given power (5*loadav): * solving for factor, * ln(factor) =~ (-2.30/5*loadav), or * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED * * Proof of (2): * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): * solving for power, * power*ln(b/(b+1)) =~ -2.30, or * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED * * Actual power values for the implemented algorithm are as follows: * loadav: 1 2 3 4 * power: 5.68 10.32 14.94 19.55 */ /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ #define loadfactor(loadav) (2 * (loadav)) #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ /* * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). * * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). * * If you don't want to bother with the faster/more-accurate formula, you * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate * (more general) method of calculating the %age of CPU used by a process. */ #define CCPU_SHIFT 11 /* * Recompute process priorities, every hz ticks. */ /* ARGSUSED */ static void schedcpu(arg) void *arg; { register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); register struct proc *p; register int s; register unsigned int newcpu; - wakeup((caddr_t)&lbolt); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { /* * Increment time in/out of memory and sleep time * (if sleeping). We ignore overflow; with 16-bit int's * (remember them?) overflow takes 45 days. */ p->p_swtime++; if (p->p_stat == SSLEEP || p->p_stat == SSTOP) p->p_slptime++; p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; /* * If the process has slept the entire second, * stop recalculating its priority until it wakes up. */ if (p->p_slptime > 1) continue; s = splhigh(); /* prevent state changes and protect run queue */ /* * p_pctcpu is only for ps. */ #if (FSHIFT >= CCPU_SHIFT) p->p_pctcpu += (hz == 100)? ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 100 * (((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT)) / hz; #else p->p_pctcpu += ((FSCALE - ccpu) * (p->p_cpticks * FSCALE / hz)) >> FSHIFT; #endif p->p_cpticks = 0; newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice; p->p_estcpu = min(newcpu, UCHAR_MAX); resetpriority(p); if (p->p_priority >= PUSER) { #define PPQ (128 / NQS) /* priorities per queue */ if ((p != curproc) && #ifdef SMP (u_char)p->p_oncpu == 0xff && /* idle */ #endif p->p_stat == SRUN && (p->p_flag & P_INMEM) && (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { remrq(p); p->p_priority = p->p_usrpri; setrunqueue(p); } else p->p_priority = p->p_usrpri; } splx(s); } vmmeter(); + wakeup((caddr_t)&lbolt); timeout(schedcpu, (void *)0, hz); } /* * Recalculate the priority of a process after it has slept for a while. * For all load averages >= 1 and max p_estcpu of 255, sleeping for at * least six times the loadfactor will decay p_estcpu to zero. */ static void updatepri(p) register struct proc *p; { register unsigned int newcpu = p->p_estcpu; register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); if (p->p_slptime > 5 * loadfac) p->p_estcpu = 0; else { p->p_slptime--; /* the first time was done in schedcpu */ while (newcpu && --p->p_slptime) newcpu = (int) decay_cpu(loadfac, newcpu); p->p_estcpu = min(newcpu, UCHAR_MAX); } resetpriority(p); } /* * We're only looking at 7 bits of the address; everything is * aligned to 4, lots of things are aligned to greater powers * of 2. Shift right by 8, i.e. drop the bottom 256 worth. */ #define TABLESIZE 128 static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE]; #define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) /* * During autoconfiguration or after a panic, a sleep will simply * lower the priority briefly to allow interrupts, then return. * The priority to be used (safepri) is machine-dependent, thus this * value is initialized and maintained in the machine-dependent layers. * This priority will typically be 0, or the lowest priority * that is safe for use on the interrupt stack; it can be made * higher to block network software interrupts after panics. */ int safepri; void sleepinit() { int i; for (i = 0; i < TABLESIZE; i++) TAILQ_INIT(&slpque[i]); } /* * General sleep call. Suspends the current process until a wakeup is * performed on the specified identifier. The process will then be made * runnable with the specified priority. Sleeps at most timo/hz seconds * (0 means no timeout). If pri includes PCATCH flag, signals are checked * before and after sleeping, else signals are not checked. Returns 0 if * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a * signal needs to be delivered, ERESTART is returned if the current system * call should be restarted if possible, and EINTR is returned if the system * call should be interrupted by the signal (return EINTR). */ int tsleep(ident, priority, wmesg, timo) void *ident; int priority, timo; const char *wmesg; { struct proc *p = curproc; int s, sig, catch = priority & PCATCH; struct callout_handle thandle; #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 1, 0); #endif s = splhigh(); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, * just give interrupts a chance, then just return; * don't run any other procs or panic below, * in case this is the idle process and already asleep. */ splx(safepri); splx(s); return (0); } #ifdef DIAGNOSTIC if(p == NULL) panic("tsleep1"); if (ident == NULL || p->p_stat != SRUN) panic("tsleep"); /* XXX This is not exhaustive, just the most common case */ if ((p->p_procq.tqe_prev != NULL) && (*p->p_procq.tqe_prev == p)) panic("sleeping process already on another queue"); #endif p->p_wchan = ident; p->p_wmesg = wmesg; p->p_slptime = 0; p->p_priority = priority & PRIMASK; TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq); if (timo) thandle = timeout(endtsleep, (void *)p, timo); /* * We put ourselves on the sleep queue and start our timeout * before calling CURSIG, as we could stop there, and a wakeup * or a SIGCONT (or both) could occur while we were stopped. * A SIGCONT would cause us to be marked as SSLEEP * without resuming us, thus we must be ready for sleep * when CURSIG is called. If the wakeup happens while we're * stopped, p->p_wchan will be 0 upon return from CURSIG. */ if (catch) { p->p_flag |= P_SINTR; if ((sig = CURSIG(p))) { if (p->p_wchan) unsleep(p); p->p_stat = SRUN; goto resume; } if (p->p_wchan == 0) { catch = 0; goto resume; } } else sig = 0; p->p_stat = SSLEEP; p->p_stats->p_ru.ru_nvcsw++; mi_switch(); resume: curpriority = p->p_usrpri; splx(s); p->p_flag &= ~P_SINTR; if (p->p_flag & P_TIMEOUT) { p->p_flag &= ~P_TIMEOUT; if (sig == 0) { #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 0, 0); #endif return (EWOULDBLOCK); } } else if (timo) untimeout(endtsleep, (void *)p, thandle); if (catch && (sig != 0 || (sig = CURSIG(p)))) { #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 0, 0); #endif if (p->p_sigacts->ps_sigintr & sigmask(sig)) return (EINTR); return (ERESTART); } #ifdef KTRACE if (KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 0, 0); #endif return (0); } /* * Implement timeout for tsleep. * If process hasn't been awakened (wchan non-zero), * set timeout flag and undo the sleep. If proc * is stopped, just unsleep so it will remain stopped. */ static void endtsleep(arg) void *arg; { register struct proc *p; int s; p = (struct proc *)arg; s = splhigh(); if (p->p_wchan) { if (p->p_stat == SSLEEP) setrunnable(p); else unsleep(p); p->p_flag |= P_TIMEOUT; } splx(s); } /* * Remove a process from its wait queue */ void unsleep(p) register struct proc *p; { int s; s = splhigh(); if (p->p_wchan) { TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq); p->p_wchan = 0; } splx(s); } /* * Make all processes sleeping on the specified identifier runnable. */ void wakeup(ident) register void *ident; { register struct slpquehead *qp; register struct proc *p; int s; s = splhigh(); qp = &slpque[LOOKUP(ident)]; restart: for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) { #ifdef DIAGNOSTIC if (p->p_stat != SSLEEP && p->p_stat != SSTOP) panic("wakeup"); #endif if (p->p_wchan == ident) { TAILQ_REMOVE(qp, p, p_procq); p->p_wchan = 0; if (p->p_stat == SSLEEP) { /* OPTIMIZED EXPANSION OF setrunnable(p); */ if (p->p_slptime > 1) updatepri(p); p->p_slptime = 0; p->p_stat = SRUN; if (p->p_flag & P_INMEM) { setrunqueue(p); maybe_resched(p); } else { p->p_flag |= P_SWAPINREQ; wakeup((caddr_t)&proc0); } /* END INLINE EXPANSION */ goto restart; } } } splx(s); } /* * Make a process sleeping on the specified identifier runnable. * May wake more than one process if a target prcoess is currently * swapped out. */ void wakeup_one(ident) register void *ident; { register struct slpquehead *qp; register struct proc *p; int s; s = splhigh(); qp = &slpque[LOOKUP(ident)]; for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) { #ifdef DIAGNOSTIC if (p->p_stat != SSLEEP && p->p_stat != SSTOP) panic("wakeup_one"); #endif if (p->p_wchan == ident) { TAILQ_REMOVE(qp, p, p_procq); p->p_wchan = 0; if (p->p_stat == SSLEEP) { /* OPTIMIZED EXPANSION OF setrunnable(p); */ if (p->p_slptime > 1) updatepri(p); p->p_slptime = 0; p->p_stat = SRUN; if (p->p_flag & P_INMEM) { setrunqueue(p); maybe_resched(p); break; } else { p->p_flag |= P_SWAPINREQ; wakeup((caddr_t)&proc0); } /* END INLINE EXPANSION */ } } } splx(s); } /* * The machine independent parts of mi_switch(). * Must be called at splstatclock() or higher. */ void mi_switch() { register struct proc *p = curproc; /* XXX */ register struct rlimit *rlim; register long s, u; int x; struct timeval tv; /* * XXX this spl is almost unnecessary. It is partly to allow for * sloppy callers that don't do it (issignal() via CURSIG() is the * main offender). It is partly to work around a bug in the i386 * cpu_switch() (the ipl is not preserved). We ran for years * without it. I think there was only a interrupt latency problem. * The main caller, tsleep(), does an splx() a couple of instructions * after calling here. The buggy caller, issignal(), usually calls * here at spl0() and sometimes returns at splhigh(). The process * then runs for a little too long at splhigh(). The ipl gets fixed * when the process returns to user mode (or earlier). * * It would probably be better to always call here at spl0(). Callers * are prepared to give up control to another process, so they must * be prepared to be interrupted. The clock stuff here may not * actually need splstatclock(). */ x = splstatclock(); #ifdef SIMPLELOCK_DEBUG if (p->p_simple_locks) printf("sleep: holding simple lock\n"); #endif /* * Compute the amount of time during which the current * process was running, and add that to its total so far. */ microtime(&tv); u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec); s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec); if (u < 0) { u += 1000000; s--; } else if (u >= 1000000) { u -= 1000000; s++; } #ifdef SMP if (s < 0) s = u = 0; #endif p->p_rtime.tv_usec = u; p->p_rtime.tv_sec = s; /* * Check if the process exceeds its cpu resource allocation. * If over max, kill it. */ if (p->p_stat != SZOMB) { rlim = &p->p_rlimit[RLIMIT_CPU]; if (s >= rlim->rlim_cur) { if (s >= rlim->rlim_max) killproc(p, "exceeded maximum CPU limit"); else { psignal(p, SIGXCPU); if (rlim->rlim_cur < rlim->rlim_max) rlim->rlim_cur += 5; } } } /* * Pick a new current process and record its start time. */ cnt.v_swtch++; cpu_switch(p); microtime(&runtime); splx(x); } /* * Initialize the (doubly-linked) run queues * to be empty. */ /* ARGSUSED*/ static void rqinit(dummy) void *dummy; { register int i; for (i = 0; i < NQS; i++) { qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; rtqs[i].ph_link = rtqs[i].ph_rlink = (struct proc *)&rtqs[i]; idqs[i].ph_link = idqs[i].ph_rlink = (struct proc *)&idqs[i]; } } /* * Change process state to be runnable, * placing it on the run queue if it is in memory, * and awakening the swapper if it isn't in memory. */ void setrunnable(p) register struct proc *p; { register int s; s = splhigh(); switch (p->p_stat) { case 0: case SRUN: case SZOMB: default: panic("setrunnable"); case SSTOP: case SSLEEP: unsleep(p); /* e.g. when sending signals */ break; case SIDL: break; } p->p_stat = SRUN; if (p->p_flag & P_INMEM) setrunqueue(p); splx(s); if (p->p_slptime > 1) updatepri(p); p->p_slptime = 0; if ((p->p_flag & P_INMEM) == 0) { p->p_flag |= P_SWAPINREQ; wakeup((caddr_t)&proc0); } else maybe_resched(p); } /* * Compute the priority of a process when running in user mode. * Arrange to reschedule if the resulting priority is better * than that of the current process. */ void resetpriority(p) register struct proc *p; { register unsigned int newpriority; if (p->p_rtprio.type == RTP_PRIO_NORMAL) { newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; newpriority = min(newpriority, MAXPRI); p->p_usrpri = newpriority; } maybe_resched(p); } /* ARGSUSED */ static void sched_setup __P((void *dummy)); static void sched_setup(dummy) void *dummy; { /* Kick off timeout driven events by calling first time. */ roundrobin(NULL); schedcpu(NULL); } SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) Index: head/sys/kern/vfs_bio.c =================================================================== --- head/sys/kern/vfs_bio.c (revision 34265) +++ head/sys/kern/vfs_bio.c (revision 34266) @@ -1,2314 +1,2390 @@ /* * Copyright (c) 1994,1997 John S. Dyson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. * - * $Id: vfs_bio.c,v 1.153 1998/03/04 03:17:30 dyson Exp $ + * $Id: vfs_bio.c,v 1.154 1998/03/07 21:35:24 dyson Exp $ */ /* * this file contains a new buffer I/O scheme implementing a coherent * VM object and buffer cache scheme. Pains have been taken to make * sure that the performance degradation associated with schemes such * as this is not realized. * * Author: John S. Dyson * Significant help during the development and debugging phases * had been provided by David Greenman, also of the FreeBSD core team. */ #include "opt_bounce.h" #define VMIO #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer"); +struct bio_ops bioops; /* I/O operation notification */ + +#if 0 /* replaced bu sched_sync */ static void vfs_update __P((void)); static struct proc *updateproc; static struct kproc_desc up_kp = { "update", vfs_update, &updateproc }; SYSINIT_KT(update, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) +#endif struct buf *buf; /* buffer header pool */ struct swqueue bswlist; static int count_lock_queue __P((void)); static void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); static void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); static void vfs_buf_set_valid(struct buf *bp, vm_ooffset_t foff, vm_offset_t off, vm_offset_t size, vm_page_t m); static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m); static void vfs_clean_pages(struct buf * bp); static void vfs_setdirty(struct buf *bp); static void vfs_vmio_release(struct buf *bp); static void flushdirtybuffers(int slpflag, int slptimeo); int needsbuffer; /* * Internal update daemon, process 3 * The variable vfs_update_wakeup allows for internal syncs. */ int vfs_update_wakeup; /* * buffers base kva */ /* * bogus page -- for I/O to/from partially complete buffers * this is a temporary solution to the problem, but it is not * really that bad. it would be better to split the buffer * for input in the case of buffers partially already in memory, * but the code is intricate enough already. */ vm_page_t bogus_page; static vm_offset_t bogus_offset; static int bufspace, maxbufspace, vmiospace, maxvmiobufspace, bufmallocspace, maxbufmallocspace; int numdirtybuffers; static int lodirtybuffers, hidirtybuffers; static int numfreebuffers, lofreebuffers, hifreebuffers; static int kvafreespace; SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, &numdirtybuffers, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW, &maxbufspace, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, maxvmiobufspace, CTLFLAG_RW, &maxvmiobufspace, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, vmiospace, CTLFLAG_RD, &vmiospace, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, ""); SYSCTL_INT(_vfs, OID_AUTO, kvafreespace, CTLFLAG_RD, &kvafreespace, 0, ""); static LIST_HEAD(bufhashhdr, buf) bufhashtbl[BUFHSZ], invalhash; struct bqueues bufqueues[BUFFER_QUEUES] = {0}; extern int vm_swap_size; #define BUF_MAXUSE 24 #define VFS_BIO_NEED_ANY 1 #define VFS_BIO_NEED_LOWLIMIT 2 #define VFS_BIO_NEED_FREE 4 /* * Initialize buffer headers and related structures. */ void bufinit() { struct buf *bp; int i; TAILQ_INIT(&bswlist); LIST_INIT(&invalhash); /* first, make a null hash table */ for (i = 0; i < BUFHSZ; i++) LIST_INIT(&bufhashtbl[i]); /* next, make a null set of free lists */ for (i = 0; i < BUFFER_QUEUES; i++) TAILQ_INIT(&bufqueues[i]); /* finally, initialize each buffer header and stick on empty q */ for (i = 0; i < nbuf; i++) { bp = &buf[i]; bzero(bp, sizeof *bp); bp->b_flags = B_INVAL; /* we're just an empty header */ bp->b_dev = NODEV; bp->b_rcred = NOCRED; bp->b_wcred = NOCRED; bp->b_qindex = QUEUE_EMPTY; bp->b_vnbufs.le_next = NOLIST; bp->b_generation = 0; + LIST_INIT(&bp->b_dep); TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); LIST_INSERT_HEAD(&invalhash, bp, b_hash); } /* * maxbufspace is currently calculated to support all filesystem blocks * to be 8K. If you happen to use a 16K filesystem, the size of the buffer * cache is still the same as it would be for 8K filesystems. This * keeps the size of the buffer cache "in check" for big block filesystems. */ maxbufspace = (nbuf + 8) * DFLTBSIZE; /* * reserve 1/3 of the buffers for metadata (VDIR) which might not be VMIO'ed */ maxvmiobufspace = 2 * maxbufspace / 3; /* * Limit the amount of malloc memory since it is wired permanently into * the kernel space. Even though this is accounted for in the buffer * allocation, we don't want the malloced region to grow uncontrolled. * The malloc scheme improves memory utilization significantly on average * (small) directories. */ maxbufmallocspace = maxbufspace / 20; /* * Remove the probability of deadlock conditions by limiting the * number of dirty buffers. */ hidirtybuffers = nbuf / 8 + 20; lodirtybuffers = nbuf / 16 + 10; numdirtybuffers = 0; lofreebuffers = nbuf / 18 + 5; hifreebuffers = 2 * lofreebuffers; numfreebuffers = nbuf; kvafreespace = 0; bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); bogus_page = vm_page_alloc(kernel_object, ((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT), VM_ALLOC_NORMAL); } /* * Free the kva allocation for a buffer * Must be called only at splbio or higher, * as this is the only locking for buffer_map. */ static void bfreekva(struct buf * bp) { if (bp->b_kvasize == 0) return; vm_map_delete(buffer_map, (vm_offset_t) bp->b_kvabase, (vm_offset_t) bp->b_kvabase + bp->b_kvasize); bp->b_kvasize = 0; } /* * remove the buffer from the appropriate free list */ void bremfree(struct buf * bp) { int s = splbio(); if (bp->b_qindex != QUEUE_NONE) { if (bp->b_qindex == QUEUE_EMPTY) { kvafreespace -= bp->b_kvasize; } TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); bp->b_qindex = QUEUE_NONE; } else { #if !defined(MAX_PERF) panic("bremfree: removing a buffer when not on a queue"); #endif } if ((bp->b_flags & B_INVAL) || (bp->b_flags & (B_DELWRI|B_LOCKED)) == 0) --numfreebuffers; splx(s); } /* * Get a buffer with the specified data. Look in the cache first. */ int bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred, struct buf ** bpp) { struct buf *bp; bp = getblk(vp, blkno, size, 0, 0); *bpp = bp; /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0) { if (curproc != NULL) curproc->p_stats->p_ru.ru_inblock++; bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); if (bp->b_rcred == NOCRED) { if (cred != NOCRED) crhold(cred); bp->b_rcred = cred; } vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); return (biowait(bp)); } return (0); } /* * Operates like bread, but also starts asynchronous I/O on * read-ahead blocks. */ int breadn(struct vnode * vp, daddr_t blkno, int size, daddr_t * rablkno, int *rabsize, int cnt, struct ucred * cred, struct buf ** bpp) { struct buf *bp, *rabp; int i; int rv = 0, readwait = 0; *bpp = bp = getblk(vp, blkno, size, 0, 0); /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0) { if (curproc != NULL) curproc->p_stats->p_ru.ru_inblock++; bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); if (bp->b_rcred == NOCRED) { if (cred != NOCRED) crhold(cred); bp->b_rcred = cred; } vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); ++readwait; } for (i = 0; i < cnt; i++, rablkno++, rabsize++) { if (inmem(vp, *rablkno)) continue; rabp = getblk(vp, *rablkno, *rabsize, 0, 0); if ((rabp->b_flags & B_CACHE) == 0) { if (curproc != NULL) curproc->p_stats->p_ru.ru_inblock++; rabp->b_flags |= B_READ | B_ASYNC; rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); if (rabp->b_rcred == NOCRED) { if (cred != NOCRED) crhold(cred); rabp->b_rcred = cred; } vfs_busy_pages(rabp, 0); VOP_STRATEGY(rabp); } else { brelse(rabp); } } if (readwait) { rv = biowait(bp); } return (rv); } /* * Write, release buffer on completion. (Done by iodone * if async.) */ int bwrite(struct buf * bp) { int oldflags = bp->b_flags; + struct vnode *vp; + struct mount *mp; + if (bp->b_flags & B_INVAL) { brelse(bp); return (0); } #if !defined(MAX_PERF) if (!(bp->b_flags & B_BUSY)) panic("bwrite: buffer is not busy???"); #endif bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); bp->b_flags |= B_WRITEINPROG; if ((oldflags & B_DELWRI) == B_DELWRI) { --numdirtybuffers; reassignbuf(bp, bp->b_vp); } bp->b_vp->v_numoutput++; vfs_busy_pages(bp, 1); if (curproc != NULL) curproc->p_stats->p_ru.ru_oublock++; VOP_STRATEGY(bp); + /* + * Collect statistics on synchronous and asynchronous writes. + * Writes to block devices are charged to their associated + * filesystem (if any). + */ + if ((vp = bp->b_vp) != NULL) { + if (vp->v_type == VBLK) + mp = vp->v_specmountpoint; + else + mp = vp->v_mount; + if (mp != NULL) + if ((oldflags & B_ASYNC) == 0) + mp->mnt_stat.f_syncwrites++; + else + mp->mnt_stat.f_asyncwrites++; + } + if ((oldflags & B_ASYNC) == 0) { int rtval = biowait(bp); if (oldflags & B_DELWRI) { reassignbuf(bp, bp->b_vp); } brelse(bp); return (rtval); } return (0); } inline void vfs_bio_need_satisfy(void) { ++numfreebuffers; if (!needsbuffer) return; if (numdirtybuffers < lodirtybuffers) { needsbuffer &= ~(VFS_BIO_NEED_ANY | VFS_BIO_NEED_LOWLIMIT); } else { needsbuffer &= ~VFS_BIO_NEED_ANY; } if (numfreebuffers >= hifreebuffers) { needsbuffer &= ~VFS_BIO_NEED_FREE; } wakeup(&needsbuffer); } /* * Delayed write. (Buffer is marked dirty). */ void bdwrite(struct buf * bp) { + int s; + struct vnode *vp; #if !defined(MAX_PERF) if ((bp->b_flags & B_BUSY) == 0) { panic("bdwrite: buffer is not busy"); } #endif if (bp->b_flags & B_INVAL) { brelse(bp); return; } if (bp->b_flags & B_TAPE) { bawrite(bp); return; } bp->b_flags &= ~(B_READ|B_RELBUF); if ((bp->b_flags & B_DELWRI) == 0) { bp->b_flags |= B_DONE | B_DELWRI; + s = splbio(); reassignbuf(bp, bp->b_vp); + splx(s); ++numdirtybuffers; } /* * This bmap keeps the system from needing to do the bmap later, * perhaps when the system is attempting to do a sync. Since it * is likely that the indirect block -- or whatever other datastructure * that the filesystem needs is still in memory now, it is a good * thing to do this. Note also, that if the pageout daemon is * requesting a sync -- there might not be enough memory to do * the bmap then... So, this is important to do. */ if (bp->b_lblkno == bp->b_blkno) { VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); } /* * Set the *dirty* buffer range based upon the VM system dirty pages. */ vfs_setdirty(bp); /* * We need to do this here to satisfy the vnode_pager and the * pageout daemon, so that it thinks that the pages have been * "cleaned". Note that since the pages are in a delayed write * buffer -- the VFS layer "will" see that the pages get written * out on the next sync, or perhaps the cluster will be completed. */ vfs_clean_pages(bp); bqrelse(bp); + /* + * XXX The soft dependency code is not prepared to + * have I/O done when a bdwrite is requested. For + * now we just let the write be delayed if it is + * requested by the soft dependency code. + */ + if ((vp = bp->b_vp) && + (vp->v_type == VBLK && vp->v_specmountpoint && + (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) || + (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))) + return; + if (numdirtybuffers >= hidirtybuffers) flushdirtybuffers(0, 0); return; } + /* + * Same as first half of bdwrite, mark buffer dirty, but do not release it. + * Check how this compares with vfs_setdirty(); XXX [JRE] + */ +void +bdirty(bp) + struct buf *bp; +{ + int s; + + bp->b_flags &= ~(B_READ|B_RELBUF); /* XXX ??? check this */ + if ((bp->b_flags & B_DELWRI) == 0) { + bp->b_flags |= B_DONE | B_DELWRI; /* why done? XXX JRE */ + s = splbio(); + reassignbuf(bp, bp->b_vp); + splx(s); + ++numdirtybuffers; + } +} + +/* * Asynchronous write. * Start output on a buffer, but do not wait for it to complete. * The buffer is released when the output completes. */ void bawrite(struct buf * bp) { bp->b_flags |= B_ASYNC; (void) VOP_BWRITE(bp); } /* * Ordered write. * Start output on a buffer, but only wait for it to complete if the * output device cannot guarantee ordering in some other way. Devices * that can perform asynchronous ordered writes will set the B_ASYNC * flag in their strategy routine. * The buffer is released when the output completes. */ int bowrite(struct buf * bp) { /* * XXX Add in B_ASYNC once the SCSI * layer can deal with ordered * writes properly. */ bp->b_flags |= B_ORDERED; return (VOP_BWRITE(bp)); } /* * Release a buffer. */ void brelse(struct buf * bp) { int s; if (bp->b_flags & B_CLUSTER) { relpbuf(bp); return; } s = splbio(); /* anyone need this block? */ if (bp->b_flags & B_WANTED) { bp->b_flags &= ~(B_WANTED | B_AGE); wakeup(bp); } if (bp->b_flags & B_LOCKED) bp->b_flags &= ~B_ERROR; if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || (bp->b_bufsize <= 0)) { bp->b_flags |= B_INVAL; + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate) + (*bioops.io_deallocate)(bp); if (bp->b_flags & B_DELWRI) --numdirtybuffers; bp->b_flags &= ~(B_DELWRI | B_CACHE); if ((bp->b_flags & B_VMIO) == 0) { if (bp->b_bufsize) allocbuf(bp, 0); if (bp->b_vp) brelvp(bp); } } /* * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer * constituted, so the B_INVAL flag is used to *invalidate* the buffer, * but the VM object is kept around. The B_NOCACHE flag is used to * invalidate the pages in the VM object. * * If the buffer is a partially filled NFS buffer, keep it * since invalidating it now will lose informatio. The valid * flags in the vm_pages have only DEV_BSIZE resolution but * the b_validoff, b_validend fields have byte resolution. * This can avoid unnecessary re-reads of the buffer. * XXX this seems to cause performance problems. */ if ((bp->b_flags & B_VMIO) && !(bp->b_vp->v_tag == VT_NFS && bp->b_vp->v_type != VBLK && (bp->b_flags & B_DELWRI) != 0) #ifdef notdef && (bp->b_vp->v_tag != VT_NFS || bp->b_vp->v_type == VBLK || (bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || bp->b_validend == 0 || (bp->b_validoff == 0 && bp->b_validend == bp->b_bufsize)) #endif ) { int i, j, resid; vm_page_t m; off_t foff; vm_pindex_t poff; vm_object_t obj; struct vnode *vp; int blksize; vp = bp->b_vp; if (vp->v_type == VBLK) blksize = DEV_BSIZE; else blksize = vp->v_mount->mnt_stat.f_iosize; resid = bp->b_bufsize; foff = -1LL; for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; if (m == bogus_page) { obj = (vm_object_t) vp->v_object; foff = (off_t) bp->b_lblkno * blksize; poff = OFF_TO_IDX(foff); for (j = i; j < bp->b_npages; j++) { m = bp->b_pages[j]; if (m == bogus_page) { m = vm_page_lookup(obj, poff + j); #if !defined(MAX_PERF) if (!m) { panic("brelse: page missing\n"); } #endif bp->b_pages[j] = m; } } if ((bp->b_flags & B_INVAL) == 0) { pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } break; } if (bp->b_flags & (B_NOCACHE|B_ERROR)) { if ((blksize & PAGE_MASK) == 0) { vm_page_set_invalid(m, 0, resid); } else { if (foff == -1LL) foff = (off_t) bp->b_lblkno * blksize; vm_page_set_invalid(m, (vm_offset_t) foff, resid); } } resid -= PAGE_SIZE; } if (bp->b_flags & (B_INVAL | B_RELBUF)) vfs_vmio_release(bp); } else if (bp->b_flags & B_VMIO) { if (bp->b_flags & (B_INVAL | B_RELBUF)) vfs_vmio_release(bp); } #if !defined(MAX_PERF) if (bp->b_qindex != QUEUE_NONE) panic("brelse: free buffer onto another queue???"); #endif /* enqueue */ /* buffers with no memory */ if (bp->b_bufsize == 0) { bp->b_flags |= B_INVAL; bp->b_qindex = QUEUE_EMPTY; TAILQ_INSERT_HEAD(&bufqueues[QUEUE_EMPTY], bp, b_freelist); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); bp->b_dev = NODEV; kvafreespace += bp->b_kvasize; bp->b_generation++; /* buffers with junk contents */ } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) { bp->b_flags |= B_INVAL; bp->b_qindex = QUEUE_AGE; TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); bp->b_dev = NODEV; bp->b_generation++; /* buffers that are locked */ } else if (bp->b_flags & B_LOCKED) { bp->b_qindex = QUEUE_LOCKED; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); /* buffers with stale but valid contents */ } else if (bp->b_flags & B_AGE) { bp->b_qindex = QUEUE_AGE; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist); /* buffers with valid and quite potentially reuseable contents */ } else { bp->b_qindex = QUEUE_LRU; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); } if ((bp->b_flags & B_INVAL) || (bp->b_flags & (B_LOCKED|B_DELWRI)) == 0) { if (bp->b_flags & B_DELWRI) { --numdirtybuffers; bp->b_flags &= ~B_DELWRI; } vfs_bio_need_satisfy(); } /* unlock */ bp->b_flags &= ~(B_ORDERED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); splx(s); } /* * Release a buffer. */ void bqrelse(struct buf * bp) { int s; s = splbio(); /* anyone need this block? */ if (bp->b_flags & B_WANTED) { bp->b_flags &= ~(B_WANTED | B_AGE); wakeup(bp); } #if !defined(MAX_PERF) if (bp->b_qindex != QUEUE_NONE) panic("bqrelse: free buffer onto another queue???"); #endif if (bp->b_flags & B_LOCKED) { bp->b_flags &= ~B_ERROR; bp->b_qindex = QUEUE_LOCKED; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); /* buffers with stale but valid contents */ } else { bp->b_qindex = QUEUE_LRU; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); } if ((bp->b_flags & (B_LOCKED|B_DELWRI)) == 0) { vfs_bio_need_satisfy(); } /* unlock */ bp->b_flags &= ~(B_ORDERED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); splx(s); } static void vfs_vmio_release(bp) struct buf *bp; { int i; vm_page_t m; for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; bp->b_pages[i] = NULL; vm_page_unwire(m); /* * We don't mess with busy pages, it is * the responsibility of the process that * busied the pages to deal with them. */ if ((m->flags & PG_BUSY) || (m->busy != 0)) continue; if (m->wire_count == 0) { /* * If this is an async free -- we cannot place * pages onto the cache queue. If it is an * async free, then we don't modify any queues. * This is probably in error (for perf reasons), * and we will eventually need to build * a more complete infrastructure to support I/O * rundown. */ if ((bp->b_flags & B_ASYNC) == 0) { /* * In the case of sync buffer frees, we can do pretty much * anything to any of the memory queues. Specifically, * the cache queue is okay to be modified. */ if (m->valid) { if(m->dirty == 0) vm_page_test_dirty(m); /* * this keeps pressure off of the process memory */ if (m->dirty == 0 && m->hold_count == 0) vm_page_cache(m); else vm_page_deactivate(m); } else if (m->hold_count == 0) { m->flags |= PG_BUSY; vm_page_protect(m, VM_PROT_NONE); vm_page_free(m); } } else { /* * If async, then at least we clear the * act_count. */ m->act_count = 0; } } } bufspace -= bp->b_bufsize; vmiospace -= bp->b_bufsize; pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); bp->b_npages = 0; bp->b_bufsize = 0; bp->b_flags &= ~B_VMIO; if (bp->b_vp) brelvp(bp); } /* * Check to see if a block is currently memory resident. */ struct buf * gbincore(struct vnode * vp, daddr_t blkno) { struct buf *bp; struct bufhashhdr *bh; bh = BUFHASH(vp, blkno); bp = bh->lh_first; /* Search hash chain */ while (bp != NULL) { /* hit */ if (bp->b_vp == vp && bp->b_lblkno == blkno && (bp->b_flags & B_INVAL) == 0) { break; } bp = bp->b_hash.le_next; } return (bp); } /* * this routine implements clustered async writes for * clearing out B_DELWRI buffers... This is much better * than the old way of writing only one buffer at a time. */ int vfs_bio_awrite(struct buf * bp) { int i; daddr_t lblkno = bp->b_lblkno; struct vnode *vp = bp->b_vp; int s; int ncl; struct buf *bpa; int nwritten; int size; int maxcl; s = splbio(); /* * right now we support clustered writing only to regular files */ if ((vp->v_type == VREG) && (vp->v_mount != 0) && /* Only on nodes that have the size info */ (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) { size = vp->v_mount->mnt_stat.f_iosize; maxcl = MAXPHYS / size; for (i = 1; i < maxcl; i++) { if ((bpa = gbincore(vp, lblkno + i)) && ((bpa->b_flags & (B_BUSY | B_DELWRI | B_CLUSTEROK | B_INVAL)) == (B_DELWRI | B_CLUSTEROK)) && (bpa->b_bufsize == size)) { if ((bpa->b_blkno == bpa->b_lblkno) || (bpa->b_blkno != bp->b_blkno + ((i * size) >> DEV_BSHIFT))) break; } else { break; } } ncl = i; /* * this is a possible cluster write */ if (ncl != 1) { nwritten = cluster_wbuild(vp, size, lblkno, ncl); splx(s); return nwritten; } } bremfree(bp); splx(s); /* * default (old) behavior, writing out only one block */ bp->b_flags |= B_BUSY | B_ASYNC; nwritten = bp->b_bufsize; (void) VOP_BWRITE(bp); return nwritten; } /* * Find a buffer header which is available for use. */ static struct buf * getnewbuf(struct vnode *vp, daddr_t blkno, int slpflag, int slptimeo, int size, int maxsize) { struct buf *bp, *bp1; int nbyteswritten = 0; vm_offset_t addr; static int writerecursion = 0; start: if (bufspace >= maxbufspace) goto trytofreespace; /* can we constitute a new buffer? */ if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]))) { #if !defined(MAX_PERF) if (bp->b_qindex != QUEUE_EMPTY) panic("getnewbuf: inconsistent EMPTY queue, qindex=%d", bp->b_qindex); #endif bp->b_flags |= B_BUSY; bremfree(bp); goto fillbuf; } trytofreespace: /* * We keep the file I/O from hogging metadata I/O * This is desirable because file data is cached in the * VM/Buffer cache even if a buffer is freed. */ if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]))) { #if !defined(MAX_PERF) if (bp->b_qindex != QUEUE_AGE) panic("getnewbuf: inconsistent AGE queue, qindex=%d", bp->b_qindex); #endif } else if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]))) { #if !defined(MAX_PERF) if (bp->b_qindex != QUEUE_LRU) panic("getnewbuf: inconsistent LRU queue, qindex=%d", bp->b_qindex); #endif } if (!bp) { /* wait for a free buffer of any kind */ needsbuffer |= VFS_BIO_NEED_ANY; do tsleep(&needsbuffer, (PRIBIO + 1) | slpflag, "newbuf", slptimeo); while (needsbuffer & VFS_BIO_NEED_ANY); return (0); } #if defined(DIAGNOSTIC) if (bp->b_flags & B_BUSY) { panic("getnewbuf: busy buffer on free list\n"); } #endif /* * We are fairly aggressive about freeing VMIO buffers, but since * the buffering is intact without buffer headers, there is not * much loss. We gain by maintaining non-VMIOed metadata in buffers. */ if ((bp->b_qindex == QUEUE_LRU) && (bp->b_usecount > 0)) { if ((bp->b_flags & B_VMIO) == 0 || (vmiospace < maxvmiobufspace)) { --bp->b_usecount; TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist); if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) { TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); goto start; } TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); } } /* if we are a delayed write, convert to an async write */ if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { /* * If our delayed write is likely to be used soon, then * recycle back onto the LRU queue. */ if (vp && (bp->b_vp == vp) && (bp->b_qindex == QUEUE_LRU) && (bp->b_lblkno >= blkno) && (maxsize > 0)) { if (bp->b_usecount > 0) { if (bp->b_lblkno < blkno + (MAXPHYS / maxsize)) { TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist); if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) { TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); bp->b_usecount--; goto start; } TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); } } } /* * Certain layered filesystems can recursively re-enter the vfs_bio * code, due to delayed writes. This helps keep the system from * deadlocking. */ if (writerecursion > 0) { bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]); while (bp) { if ((bp->b_flags & B_DELWRI) == 0) break; bp = TAILQ_NEXT(bp, b_freelist); } if (bp == NULL) { bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]); while (bp) { if ((bp->b_flags & B_DELWRI) == 0) break; bp = TAILQ_NEXT(bp, b_freelist); } } if (bp == NULL) panic("getnewbuf: cannot get buffer, infinite recursion failure"); } else { ++writerecursion; nbyteswritten += vfs_bio_awrite(bp); --writerecursion; if (!slpflag && !slptimeo) { return (0); } goto start; } } if (bp->b_flags & B_WANTED) { bp->b_flags &= ~B_WANTED; wakeup(bp); } bremfree(bp); bp->b_flags |= B_BUSY; if (bp->b_flags & B_VMIO) { bp->b_flags &= ~B_ASYNC; vfs_vmio_release(bp); } if (bp->b_vp) brelvp(bp); fillbuf: bp->b_generation++; /* we are not free, nor do we contain interesting data */ if (bp->b_rcred != NOCRED) { crfree(bp->b_rcred); bp->b_rcred = NOCRED; } if (bp->b_wcred != NOCRED) { crfree(bp->b_wcred); bp->b_wcred = NOCRED; } + if (LIST_FIRST(&bp->b_dep) != NULL && + bioops.io_deallocate) + (*bioops.io_deallocate)(bp); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); if (bp->b_bufsize) { allocbuf(bp, 0); } bp->b_flags = B_BUSY; bp->b_dev = NODEV; bp->b_vp = NULL; bp->b_blkno = bp->b_lblkno = 0; bp->b_iodone = 0; bp->b_error = 0; bp->b_resid = 0; bp->b_bcount = 0; bp->b_npages = 0; bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_validoff = bp->b_validend = 0; bp->b_usecount = 5; + /* Here, not kern_physio.c, is where this should be done*/ + LIST_INIT(&bp->b_dep); maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK; /* * we assume that buffer_map is not at address 0 */ addr = 0; if (maxsize != bp->b_kvasize) { bfreekva(bp); findkvaspace: /* * See if we have buffer kva space */ if (vm_map_findspace(buffer_map, vm_map_min(buffer_map), maxsize, &addr)) { if (kvafreespace > 0) { int totfree = 0, freed; do { freed = 0; for (bp1 = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]); bp1 != NULL; bp1 = TAILQ_NEXT(bp1, b_freelist)) { if (bp1->b_kvasize != 0) { totfree += bp1->b_kvasize; freed = bp1->b_kvasize; bremfree(bp1); bfreekva(bp1); brelse(bp1); break; } } } while (freed); /* * if we found free space, then retry with the same buffer. */ if (totfree) goto findkvaspace; } bp->b_flags |= B_INVAL; brelse(bp); goto trytofreespace; } } /* * See if we are below are allocated minimum */ if (bufspace >= (maxbufspace + nbyteswritten)) { bp->b_flags |= B_INVAL; brelse(bp); goto trytofreespace; } /* * create a map entry for the buffer -- in essence * reserving the kva space. */ if (addr) { vm_map_insert(buffer_map, NULL, 0, addr, addr + maxsize, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); bp->b_kvabase = (caddr_t) addr; bp->b_kvasize = maxsize; } bp->b_data = bp->b_kvabase; return (bp); } static void waitfreebuffers(int slpflag, int slptimeo) { while (numfreebuffers < hifreebuffers) { flushdirtybuffers(slpflag, slptimeo); if (numfreebuffers < hifreebuffers) break; needsbuffer |= VFS_BIO_NEED_FREE; if (tsleep(&needsbuffer, PRIBIO|slpflag, "biofre", slptimeo)) break; } } static void flushdirtybuffers(int slpflag, int slptimeo) { int s; static pid_t flushing = 0; s = splbio(); if (flushing) { if (flushing == curproc->p_pid) { splx(s); return; } while (flushing) { if (tsleep(&flushing, PRIBIO|slpflag, "biofls", slptimeo)) { splx(s); return; } } } flushing = curproc->p_pid; while (numdirtybuffers > lodirtybuffers) { struct buf *bp; needsbuffer |= VFS_BIO_NEED_LOWLIMIT; bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]); if (bp == NULL) bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]); while (bp && ((bp->b_flags & B_DELWRI) == 0)) { bp = TAILQ_NEXT(bp, b_freelist); } if (bp) { vfs_bio_awrite(bp); continue; } break; } flushing = 0; wakeup(&flushing); splx(s); } /* * Check to see if a block is currently memory resident. */ struct buf * incore(struct vnode * vp, daddr_t blkno) { struct buf *bp; int s = splbio(); bp = gbincore(vp, blkno); splx(s); return (bp); } /* * Returns true if no I/O is needed to access the * associated VM object. This is like incore except * it also hunts around in the VM system for the data. */ int inmem(struct vnode * vp, daddr_t blkno) { vm_object_t obj; vm_offset_t toff, tinc; vm_page_t m; vm_ooffset_t off; if (incore(vp, blkno)) return 1; if (vp->v_mount == NULL) return 0; if ((vp->v_object == NULL) || (vp->v_flag & VOBJBUF) == 0) return 0; obj = vp->v_object; tinc = PAGE_SIZE; if (tinc > vp->v_mount->mnt_stat.f_iosize) tinc = vp->v_mount->mnt_stat.f_iosize; off = blkno * vp->v_mount->mnt_stat.f_iosize; for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { m = vm_page_lookup(obj, OFF_TO_IDX(off + toff)); if (!m) return 0; if (vm_page_is_valid(m, (vm_offset_t) (toff + off), tinc) == 0) return 0; } return 1; } /* * now we set the dirty range for the buffer -- * for NFS -- if the file is mapped and pages have * been written to, let it know. We want the * entire range of the buffer to be marked dirty if * any of the pages have been written to for consistancy * with the b_validoff, b_validend set in the nfs write * code, and used by the nfs read code. */ static void vfs_setdirty(struct buf *bp) { int i; vm_object_t object; vm_offset_t boffset, offset; /* * We qualify the scan for modified pages on whether the * object has been flushed yet. The OBJ_WRITEABLE flag * is not cleared simply by protecting pages off. */ if ((bp->b_flags & B_VMIO) && ((object = bp->b_pages[0]->object)->flags & (OBJ_WRITEABLE|OBJ_CLEANING))) { /* * test the pages to see if they have been modified directly * by users through the VM system. */ for (i = 0; i < bp->b_npages; i++) vm_page_test_dirty(bp->b_pages[i]); /* * scan forwards for the first page modified */ for (i = 0; i < bp->b_npages; i++) { if (bp->b_pages[i]->dirty) { break; } } boffset = (i << PAGE_SHIFT); if (boffset < bp->b_dirtyoff) { bp->b_dirtyoff = boffset; } /* * scan backwards for the last page modified */ for (i = bp->b_npages - 1; i >= 0; --i) { if (bp->b_pages[i]->dirty) { break; } } boffset = (i + 1); offset = boffset + bp->b_pages[0]->pindex; if (offset >= object->size) boffset = object->size - bp->b_pages[0]->pindex; if (bp->b_dirtyend < (boffset << PAGE_SHIFT)) bp->b_dirtyend = (boffset << PAGE_SHIFT); } } /* * Get a block given a specified block and offset into a file/device. */ struct buf * getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo) { struct buf *bp; int i, s; struct bufhashhdr *bh; int maxsize; int generation; if (vp->v_mount) { maxsize = vp->v_mount->mnt_stat.f_iosize; /* * This happens on mount points. */ if (maxsize < size) maxsize = size; } else { maxsize = size; } #if !defined(MAX_PERF) if (size > MAXBSIZE) panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE); #endif s = splbio(); loop: if (numfreebuffers < lofreebuffers) { waitfreebuffers(slpflag, slptimeo); } if ((bp = gbincore(vp, blkno))) { loop1: generation = bp->b_generation; if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; if (bp->b_usecount < BUF_MAXUSE) ++bp->b_usecount; if (!tsleep(bp, (PRIBIO + 1) | slpflag, "getblk", slptimeo)) { if (bp->b_generation != generation) goto loop; goto loop1; } else { splx(s); return (struct buf *) NULL; } } bp->b_flags |= B_BUSY | B_CACHE; bremfree(bp); /* * check for size inconsistancies (note that they shouldn't * happen but do when filesystems don't handle the size changes * correctly.) We are conservative on metadata and don't just * extend the buffer but write and re-constitute it. */ if (bp->b_bcount != size) { bp->b_generation++; if ((bp->b_flags & B_VMIO) && (size <= bp->b_kvasize)) { allocbuf(bp, size); } else { bp->b_flags |= B_NOCACHE; VOP_BWRITE(bp); goto loop; } } if (bp->b_usecount < BUF_MAXUSE) ++bp->b_usecount; splx(s); return (bp); } else { vm_object_t obj; if ((bp = getnewbuf(vp, blkno, slpflag, slptimeo, size, maxsize)) == 0) { if (slpflag || slptimeo) { splx(s); return NULL; } goto loop; } /* * This code is used to make sure that a buffer is not * created while the getnewbuf routine is blocked. * Normally the vnode is locked so this isn't a problem. * VBLK type I/O requests, however, don't lock the vnode. */ if (!VOP_ISLOCKED(vp) && gbincore(vp, blkno)) { bp->b_flags |= B_INVAL; brelse(bp); goto loop; } /* * Insert the buffer into the hash, so that it can * be found by incore. */ bp->b_blkno = bp->b_lblkno = blkno; bgetvp(vp, bp); LIST_REMOVE(bp, b_hash); bh = BUFHASH(vp, blkno); LIST_INSERT_HEAD(bh, bp, b_hash); if ((obj = vp->v_object) && (vp->v_flag & VOBJBUF)) { bp->b_flags |= (B_VMIO | B_CACHE); #if defined(VFS_BIO_DEBUG) if (vp->v_type != VREG && vp->v_type != VBLK) printf("getblk: vmioing file type %d???\n", vp->v_type); #endif } else { bp->b_flags &= ~B_VMIO; } allocbuf(bp, size); splx(s); #ifdef PC98 /* * 1024byte/sector support */ #define B_XXX2 0x8000000 if (vp->v_flag & 0x10000) bp->b_flags |= B_XXX2; #endif return (bp); } } /* * Get an empty, disassociated buffer of given size. */ struct buf * geteblk(int size) { struct buf *bp; int s; s = splbio(); while ((bp = getnewbuf(0, (daddr_t) 0, 0, 0, size, MAXBSIZE)) == 0); splx(s); allocbuf(bp, size); bp->b_flags |= B_INVAL; return (bp); } /* * This code constitutes the buffer memory from either anonymous system * memory (in the case of non-VMIO operations) or from an associated * VM object (in the case of VMIO operations). * * Note that this code is tricky, and has many complications to resolve * deadlock or inconsistant data situations. Tread lightly!!! * * Modify the length of a buffer's underlying buffer storage without * destroying information (unless, of course the buffer is shrinking). */ int allocbuf(struct buf * bp, int size) { int s; int newbsize, mbsize; int i; #if !defined(MAX_PERF) if (!(bp->b_flags & B_BUSY)) panic("allocbuf: buffer not busy"); if (bp->b_kvasize < size) panic("allocbuf: buffer too small"); #endif if ((bp->b_flags & B_VMIO) == 0) { caddr_t origbuf; int origbufsize; /* * Just get anonymous memory from the kernel */ mbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); #if !defined(NO_B_MALLOC) if (bp->b_flags & B_MALLOC) newbsize = mbsize; else #endif newbsize = round_page(size); if (newbsize < bp->b_bufsize) { #if !defined(NO_B_MALLOC) /* * malloced buffers are not shrunk */ if (bp->b_flags & B_MALLOC) { if (newbsize) { bp->b_bcount = size; } else { free(bp->b_data, M_BIOBUF); bufspace -= bp->b_bufsize; bufmallocspace -= bp->b_bufsize; bp->b_data = bp->b_kvabase; bp->b_bufsize = 0; bp->b_bcount = 0; bp->b_flags &= ~B_MALLOC; } return 1; } #endif vm_hold_free_pages( bp, (vm_offset_t) bp->b_data + newbsize, (vm_offset_t) bp->b_data + bp->b_bufsize); } else if (newbsize > bp->b_bufsize) { #if !defined(NO_B_MALLOC) /* * We only use malloced memory on the first allocation. * and revert to page-allocated memory when the buffer grows. */ if ( (bufmallocspace < maxbufmallocspace) && (bp->b_bufsize == 0) && (mbsize <= PAGE_SIZE/2)) { bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK); bp->b_bufsize = mbsize; bp->b_bcount = size; bp->b_flags |= B_MALLOC; bufspace += mbsize; bufmallocspace += mbsize; return 1; } #endif origbuf = NULL; origbufsize = 0; #if !defined(NO_B_MALLOC) /* * If the buffer is growing on it's other-than-first allocation, * then we revert to the page-allocation scheme. */ if (bp->b_flags & B_MALLOC) { origbuf = bp->b_data; origbufsize = bp->b_bufsize; bp->b_data = bp->b_kvabase; bufspace -= bp->b_bufsize; bufmallocspace -= bp->b_bufsize; bp->b_bufsize = 0; bp->b_flags &= ~B_MALLOC; newbsize = round_page(newbsize); } #endif vm_hold_load_pages( bp, (vm_offset_t) bp->b_data + bp->b_bufsize, (vm_offset_t) bp->b_data + newbsize); #if !defined(NO_B_MALLOC) if (origbuf) { bcopy(origbuf, bp->b_data, origbufsize); free(origbuf, M_BIOBUF); } #endif } } else { vm_page_t m; int desiredpages; newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); desiredpages = (round_page(newbsize) >> PAGE_SHIFT); #if !defined(NO_B_MALLOC) if (bp->b_flags & B_MALLOC) panic("allocbuf: VMIO buffer can't be malloced"); #endif if (newbsize < bp->b_bufsize) { if (desiredpages < bp->b_npages) { for (i = desiredpages; i < bp->b_npages; i++) { /* * the page is not freed here -- it * is the responsibility of vnode_pager_setsize */ m = bp->b_pages[i]; #if defined(DIAGNOSTIC) if (m == bogus_page) panic("allocbuf: bogus page found"); #endif vm_page_sleep(m, "biodep", &m->busy); bp->b_pages[i] = NULL; vm_page_unwire(m); } pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); bp->b_npages = desiredpages; } } else if (newbsize > bp->b_bufsize) { vm_object_t obj; vm_offset_t tinc, toff; vm_ooffset_t off; vm_pindex_t objoff; int pageindex, curbpnpages; struct vnode *vp; int bsize; int orig_validoff = bp->b_validoff; int orig_validend = bp->b_validend; vp = bp->b_vp; if (vp->v_type == VBLK) bsize = DEV_BSIZE; else bsize = vp->v_mount->mnt_stat.f_iosize; if (bp->b_npages < desiredpages) { obj = vp->v_object; tinc = PAGE_SIZE; if (tinc > bsize) tinc = bsize; off = (vm_ooffset_t) bp->b_lblkno * bsize; curbpnpages = bp->b_npages; doretry: bp->b_validoff = orig_validoff; bp->b_validend = orig_validend; bp->b_flags |= B_CACHE; for (toff = 0; toff < newbsize; toff += tinc) { int bytesinpage; pageindex = toff >> PAGE_SHIFT; objoff = OFF_TO_IDX(off + toff); if (pageindex < curbpnpages) { m = bp->b_pages[pageindex]; #ifdef VFS_BIO_DIAG if (m->pindex != objoff) panic("allocbuf: page changed offset??!!!?"); #endif bytesinpage = tinc; if (tinc > (newbsize - toff)) bytesinpage = newbsize - toff; if (bp->b_flags & B_CACHE) vfs_buf_set_valid(bp, off, toff, bytesinpage, m); continue; } m = vm_page_lookup(obj, objoff); if (!m) { m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL); if (!m) { VM_WAIT; vm_pageout_deficit += (desiredpages - bp->b_npages); goto doretry; } vm_page_wire(m); m->flags &= ~PG_BUSY; bp->b_flags &= ~B_CACHE; } else if (m->flags & PG_BUSY) { s = splvm(); if (m->flags & PG_BUSY) { m->flags |= PG_WANTED; tsleep(m, PVM, "pgtblk", 0); } splx(s); goto doretry; } else { if ((curproc != pageproc) && ((m->queue - m->pc) == PQ_CACHE) && ((cnt.v_free_count + cnt.v_cache_count) < (cnt.v_free_min + cnt.v_cache_min))) { pagedaemon_wakeup(); } bytesinpage = tinc; if (tinc > (newbsize - toff)) bytesinpage = newbsize - toff; if (bp->b_flags & B_CACHE) vfs_buf_set_valid(bp, off, toff, bytesinpage, m); vm_page_wire(m); } bp->b_pages[pageindex] = m; curbpnpages = pageindex + 1; } if (vp->v_tag == VT_NFS && vp->v_type != VBLK) { if (bp->b_dirtyend > 0) { bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); bp->b_validend = max(bp->b_validend, bp->b_dirtyend); } if (bp->b_validend == 0) bp->b_flags &= ~B_CACHE; } bp->b_data = (caddr_t) trunc_page(bp->b_data); bp->b_npages = curbpnpages; pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); ((vm_offset_t) bp->b_data) |= off & PAGE_MASK; } } } if (bp->b_flags & B_VMIO) vmiospace += (newbsize - bp->b_bufsize); bufspace += (newbsize - bp->b_bufsize); bp->b_bufsize = newbsize; bp->b_bcount = size; return 1; } /* * Wait for buffer I/O completion, returning error status. */ int biowait(register struct buf * bp) { int s; s = splbio(); while ((bp->b_flags & B_DONE) == 0) #if defined(NO_SCHEDULE_MODS) tsleep(bp, PRIBIO, "biowait", 0); #else if (bp->b_flags & B_READ) tsleep(bp, PRIBIO, "biord", 0); else tsleep(bp, curproc->p_usrpri, "biowr", 0); #endif splx(s); if (bp->b_flags & B_EINTR) { bp->b_flags &= ~B_EINTR; return (EINTR); } if (bp->b_flags & B_ERROR) { return (bp->b_error ? bp->b_error : EIO); } else { return (0); } } /* * Finish I/O on a buffer, calling an optional function. * This is usually called from interrupt level, so process blocking * is not *a good idea*. */ void biodone(register struct buf * bp) { int s; s = splbio(); #if !defined(MAX_PERF) if (!(bp->b_flags & B_BUSY)) panic("biodone: buffer not busy"); #endif if (bp->b_flags & B_DONE) { splx(s); #if !defined(MAX_PERF) printf("biodone: buffer already done\n"); #endif return; } bp->b_flags |= B_DONE; if ((bp->b_flags & B_READ) == 0) { vwakeup(bp); } #ifdef BOUNCE_BUFFERS if (bp->b_flags & B_BOUNCE) vm_bounce_free(bp); #endif /* call optional completion function if requested */ if (bp->b_flags & B_CALL) { bp->b_flags &= ~B_CALL; (*bp->b_iodone) (bp); splx(s); return; } + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete) + (*bioops.io_complete)(bp); + if (bp->b_flags & B_VMIO) { int i, resid; vm_ooffset_t foff; vm_page_t m; vm_object_t obj; int iosize; struct vnode *vp = bp->b_vp; obj = vp->v_object; #if defined(VFS_BIO_DEBUG) if (vp->v_usecount == 0) { panic("biodone: zero vnode ref count"); } if (vp->v_object == NULL) { panic("biodone: missing VM object"); } if ((vp->v_flag & VOBJBUF) == 0) { panic("biodone: vnode is not setup for merged cache"); } #endif if (vp->v_type == VBLK) foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; #if !defined(MAX_PERF) if (!obj) { panic("biodone: no object"); } #endif #if defined(VFS_BIO_DEBUG) if (obj->paging_in_progress < bp->b_npages) { printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", obj->paging_in_progress, bp->b_npages); } #endif iosize = bp->b_bufsize; for (i = 0; i < bp->b_npages; i++) { int bogusflag = 0; m = bp->b_pages[i]; if (m == bogus_page) { bogusflag = 1; m = vm_page_lookup(obj, OFF_TO_IDX(foff)); if (!m) { #if defined(VFS_BIO_DEBUG) printf("biodone: page disappeared\n"); #endif --obj->paging_in_progress; continue; } bp->b_pages[i] = m; pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } #if defined(VFS_BIO_DEBUG) if (OFF_TO_IDX(foff) != m->pindex) { printf("biodone: foff(%d)/m->pindex(%d) mismatch\n", foff, m->pindex); } #endif resid = IDX_TO_OFF(m->pindex + 1) - foff; if (resid > iosize) resid = iosize; /* * In the write case, the valid and clean bits are * already changed correctly, so we only need to do this * here in the read case. */ if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) { vfs_page_set_valid(bp, foff, i, m); } /* * when debugging new filesystems or buffer I/O methods, this * is the most common error that pops up. if you see this, you * have not set the page busy flag correctly!!! */ if (m->busy == 0) { #if !defined(MAX_PERF) printf("biodone: page busy < 0, " "pindex: %d, foff: 0x(%x,%x), " "resid: %d, index: %d\n", (int) m->pindex, (int)(foff >> 32), (int) foff & 0xffffffff, resid, i); #endif if (vp->v_type != VBLK) #if !defined(MAX_PERF) printf(" iosize: %ld, lblkno: %d, flags: 0x%lx, npages: %d\n", bp->b_vp->v_mount->mnt_stat.f_iosize, (int) bp->b_lblkno, bp->b_flags, bp->b_npages); else printf(" VDEV, lblkno: %d, flags: 0x%lx, npages: %d\n", (int) bp->b_lblkno, bp->b_flags, bp->b_npages); printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n", m->valid, m->dirty, m->wire_count); #endif panic("biodone: page busy < 0\n"); } PAGE_BWAKEUP(m); --obj->paging_in_progress; foff += resid; iosize -= resid; } if (obj && (obj->paging_in_progress == 0) && (obj->flags & OBJ_PIPWNT)) { obj->flags &= ~OBJ_PIPWNT; wakeup(obj); } } /* * For asynchronous completions, release the buffer now. The brelse * checks for B_WANTED and will do the wakeup there if necessary - so * no need to do a wakeup here in the async case. */ if (bp->b_flags & B_ASYNC) { if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR | B_RELBUF)) != 0) brelse(bp); else bqrelse(bp); } else { bp->b_flags &= ~B_WANTED; wakeup(bp); } splx(s); } static int count_lock_queue() { int count; struct buf *bp; count = 0; for (bp = TAILQ_FIRST(&bufqueues[QUEUE_LOCKED]); bp != NULL; bp = TAILQ_NEXT(bp, b_freelist)) count++; return (count); } +#if 0 /* not with kirks code */ static int vfs_update_interval = 30; static void vfs_update() { while (1) { tsleep(&vfs_update_wakeup, PUSER, "update", hz * vfs_update_interval); vfs_update_wakeup = 0; sync(curproc, NULL); } } static int sysctl_kern_updateinterval SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error) wakeup(&vfs_update_wakeup); return error; } SYSCTL_PROC(_kern, KERN_UPDATEINTERVAL, update, CTLTYPE_INT|CTLFLAG_RW, &vfs_update_interval, 0, sysctl_kern_updateinterval, "I", ""); + +#endif /* * This routine is called in lieu of iodone in the case of * incomplete I/O. This keeps the busy status for pages * consistant. */ void vfs_unbusy_pages(struct buf * bp) { int i; if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; vm_object_t obj = vp->v_object; vm_ooffset_t foff; foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; if (m == bogus_page) { m = vm_page_lookup(obj, OFF_TO_IDX(foff) + i); #if !defined(MAX_PERF) if (!m) { panic("vfs_unbusy_pages: page missing\n"); } #endif bp->b_pages[i] = m; pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } --obj->paging_in_progress; PAGE_BWAKEUP(m); } if (obj->paging_in_progress == 0 && (obj->flags & OBJ_PIPWNT)) { obj->flags &= ~OBJ_PIPWNT; wakeup(obj); } } } /* * Set NFS' b_validoff and b_validend fields from the valid bits * of a page. If the consumer is not NFS, and the page is not * valid for the entire range, clear the B_CACHE flag to force * the consumer to re-read the page. */ static void vfs_buf_set_valid(struct buf *bp, vm_ooffset_t foff, vm_offset_t off, vm_offset_t size, vm_page_t m) { if (bp->b_vp->v_tag == VT_NFS && bp->b_vp->v_type != VBLK) { vm_offset_t svalid, evalid; int validbits = m->valid; /* * This only bothers with the first valid range in the * page. */ svalid = off; while (validbits && !(validbits & 1)) { svalid += DEV_BSIZE; validbits >>= 1; } evalid = svalid; while (validbits & 1) { evalid += DEV_BSIZE; validbits >>= 1; } /* * Make sure this range is contiguous with the range * built up from previous pages. If not, then we will * just use the range from the previous pages. */ if (svalid == bp->b_validend) { bp->b_validoff = min(bp->b_validoff, svalid); bp->b_validend = max(bp->b_validend, evalid); } } else if (!vm_page_is_valid(m, (vm_offset_t) ((foff + off) & PAGE_MASK), size)) { bp->b_flags &= ~B_CACHE; } } /* * Set the valid bits in a page, taking care of the b_validoff, * b_validend fields which NFS uses to optimise small reads. Off is * the offset within the file and pageno is the page index within the buf. */ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m) { struct vnode *vp = bp->b_vp; vm_ooffset_t soff, eoff; soff = off; eoff = off + min(PAGE_SIZE, bp->b_bufsize); vm_page_set_invalid(m, (vm_offset_t) (soff & PAGE_MASK), (vm_offset_t) (eoff - soff)); if (vp->v_tag == VT_NFS && vp->v_type != VBLK) { vm_ooffset_t sv, ev; off = off - pageno * PAGE_SIZE; sv = off + ((bp->b_validoff + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1)); ev = off + ((bp->b_validend + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1)); soff = max(sv, soff); eoff = min(ev, eoff); } if (eoff > soff) vm_page_set_validclean(m, (vm_offset_t) (soff & PAGE_MASK), (vm_offset_t) (eoff - soff)); } /* * This routine is called before a device strategy routine. * It is used to tell the VM system that paging I/O is in * progress, and treat the pages associated with the buffer * almost as being PG_BUSY. Also the object paging_in_progress * flag is handled to make sure that the object doesn't become * inconsistant. */ void vfs_busy_pages(struct buf * bp, int clear_modify) { int i,s; if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; vm_object_t obj = vp->v_object; vm_ooffset_t foff; if (vp->v_type == VBLK) foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; vfs_setdirty(bp); retry: for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; if (vm_page_sleep(m, "vbpage", NULL)) goto retry; } for (i = 0; i < bp->b_npages; i++, foff += PAGE_SIZE) { vm_page_t m = bp->b_pages[i]; if ((bp->b_flags & B_CLUSTER) == 0) { obj->paging_in_progress++; m->busy++; } vm_page_protect(m, VM_PROT_NONE); if (clear_modify) vfs_page_set_valid(bp, foff, i, m); else if (bp->b_bcount >= PAGE_SIZE) { if (m->valid && (bp->b_flags & B_CACHE) == 0) { bp->b_pages[i] = bogus_page; pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } } } } } /* * Tell the VM system that the pages associated with this buffer * are clean. This is used for delayed writes where the data is * going to go to disk eventually without additional VM intevention. */ void vfs_clean_pages(struct buf * bp) { int i; if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; vm_ooffset_t foff; if (vp->v_type == VBLK) foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; for (i = 0; i < bp->b_npages; i++, foff += PAGE_SIZE) { vm_page_t m = bp->b_pages[i]; vfs_page_set_valid(bp, foff, i, m); } } } void vfs_bio_clrbuf(struct buf *bp) { int i; if( bp->b_flags & B_VMIO) { if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) { int mask; mask = 0; for(i=0;ib_bufsize;i+=DEV_BSIZE) mask |= (1 << (i/DEV_BSIZE)); if( bp->b_pages[0]->valid != mask) { bzero(bp->b_data, bp->b_bufsize); } bp->b_pages[0]->valid = mask; bp->b_resid = 0; return; } for(i=0;ib_npages;i++) { if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL) continue; if( bp->b_pages[i]->valid == 0) { if ((bp->b_pages[i]->flags & PG_ZERO) == 0) { bzero(bp->b_data + (i << PAGE_SHIFT), PAGE_SIZE); } } else { int j; for(j=0;jb_pages[i]->valid & (1<b_data + (i << PAGE_SHIFT) + j * DEV_BSIZE, DEV_BSIZE); } } /* bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; */ } bp->b_resid = 0; } else { clrbuf(bp); } } /* * vm_hold_load_pages and vm_hold_unload pages get pages into * a buffers address space. The pages are anonymous and are * not associated with a file object. */ void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) { vm_offset_t pg; vm_page_t p; int index; to = round_page(to); from = round_page(from); index = (from - trunc_page(bp->b_data)) >> PAGE_SHIFT; for (pg = from; pg < to; pg += PAGE_SIZE, index++) { tryagain: p = vm_page_alloc(kernel_object, ((pg - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT), VM_ALLOC_NORMAL); if (!p) { vm_pageout_deficit += (to - from) >> PAGE_SHIFT; VM_WAIT; goto tryagain; } vm_page_wire(p); p->valid = VM_PAGE_BITS_ALL; pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); bp->b_pages[index] = p; PAGE_WAKEUP(p); } bp->b_npages = index; } void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) { vm_offset_t pg; vm_page_t p; int index, newnpages; from = round_page(from); to = round_page(to); newnpages = index = (from - trunc_page(bp->b_data)) >> PAGE_SHIFT; for (pg = from; pg < to; pg += PAGE_SIZE, index++) { p = bp->b_pages[index]; if (p && (index < bp->b_npages)) { #if !defined(MAX_PERF) if (p->busy) { printf("vm_hold_free_pages: blkno: %d, lblkno: %d\n", bp->b_blkno, bp->b_lblkno); } #endif bp->b_pages[index] = NULL; pmap_kremove(pg); p->flags |= PG_BUSY; vm_page_unwire(p); vm_page_free(p); } } bp->b_npages = newnpages; } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(buffer, db_show_buffer) { /* get args */ struct buf *bp = (struct buf *)addr; if (!have_addr) { db_printf("usage: show buffer \n"); return; } db_printf("b_proc = %p,\nb_flags = 0x%b\n", (void *)bp->b_proc, bp->b_flags, "\20\40bounce\37cluster\36vmio\35ram\34ordered" "\33paging\32xxx\31writeinprog\30wanted\27relbuf\26tape" "\25read\24raw\23phys\22clusterok\21malloc\20nocache" "\17locked\16inval\15gathered\14error\13eintr\12done\11dirty" "\10delwri\7call\6cache\5busy\4bad\3async\2needcommit\1age"); db_printf("b_error = %d, b_bufsize = %ld, b_bcount = %ld, " "b_resid = %ld\nb_dev = 0x%x, b_data = %p, " "b_blkno = %d, b_pblkno = %d\n", bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid, bp->b_dev, bp->b_data, bp->b_blkno, bp->b_pblkno); if (bp->b_npages) { int i; db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages); for (i = 0; i < bp->b_npages; i++) { vm_page_t m; m = bp->b_pages[i]; db_printf("(0x%x, 0x%x, 0x%x)", m->object, m->pindex, VM_PAGE_TO_PHYS(m)); if ((i + 1) < bp->b_npages) db_printf(","); } db_printf("\n"); } } #endif /* DDB */ Index: head/sys/kern/vfs_cluster.c =================================================================== --- head/sys/kern/vfs_cluster.c (revision 34265) +++ head/sys/kern/vfs_cluster.c (revision 34266) @@ -1,796 +1,801 @@ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * Modifications/enhancements: * Copyright (c) 1995 John S. Dyson. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94 - * $Id: vfs_cluster.c,v 1.55 1998/02/06 12:13:30 eivind Exp $ + * $Id: vfs_cluster.c,v 1.56 1998/03/07 21:35:28 dyson Exp $ */ #include "opt_debug_cluster.h" #include #include #include #include #include #include #include #include #include #include #include #if defined(CLUSTERDEBUG) #include #include static int rcluster= 0; SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0, ""); #endif #ifdef notyet_block_reallocation_enabled static struct cluster_save * cluster_collectbufs __P((struct vnode *vp, struct buf *last_bp)); #endif static struct buf * cluster_rbuild __P((struct vnode *vp, u_quad_t filesize, daddr_t lbn, daddr_t blkno, long size, int run, struct buf *fbp)); extern vm_page_t bogus_page; /* * Maximum number of blocks for read-ahead. */ #define MAXRA 32 /* * This replaces bread. */ int cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) struct vnode *vp; u_quad_t filesize; daddr_t lblkno; long size; struct ucred *cred; long totread; int seqcount; struct buf **bpp; { struct buf *bp, *rbp, *reqbp; daddr_t blkno, origblkno; int error, num_ra; int i; int maxra, racluster; long origtotread; error = 0; if (vp->v_maxio == 0) vp->v_maxio = DFLTPHYS; /* * Try to limit the amount of read-ahead by a few * ad-hoc parameters. This needs work!!! */ racluster = vp->v_maxio/size; maxra = 2 * racluster + (totread / size); if (maxra > MAXRA) maxra = MAXRA; if (maxra > nbuf/8) maxra = nbuf/8; /* * get the requested block */ *bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0); origblkno = lblkno; origtotread = totread; /* * if it is in the cache, then check to see if the reads have been * sequential. If they have, then try some read-ahead, otherwise * back-off on prospective read-aheads. */ if (bp->b_flags & B_CACHE) { if (!seqcount) { return 0; } else if ((bp->b_flags & B_RAM) == 0) { return 0; } else { int s; struct buf *tbp; bp->b_flags &= ~B_RAM; /* * We do the spl here so that there is no window * between the incore and the b_usecount increment * below. We opt to keep the spl out of the loop * for efficiency. */ s = splbio(); for(i=1;ib_flags |= B_RAM; if ((tbp->b_usecount < 5) && ((tbp->b_flags & B_BUSY) == 0) && (tbp->b_qindex == QUEUE_LRU)) { TAILQ_REMOVE(&bufqueues[QUEUE_LRU], tbp, b_freelist); TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], tbp, b_freelist); } } splx(s); if (i >= maxra) { return 0; } lblkno += i; } reqbp = bp = NULL; } else { u_quad_t firstread; firstread = (u_quad_t) lblkno * size; if (firstread + totread > filesize) totread = filesize - firstread; if (totread > size) { int nblks = 0; int ncontigafter; while (totread > 0) { nblks++; totread -= size; } if (nblks == 1) goto single_block_read; if (nblks > racluster) nblks = racluster; error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontigafter, NULL); if (error) goto single_block_read; if (blkno == -1) goto single_block_read; if (ncontigafter == 0) goto single_block_read; if (ncontigafter + 1 < nblks) nblks = ncontigafter + 1; bp = cluster_rbuild(vp, filesize, lblkno, blkno, size, nblks, bp); lblkno += nblks; } else { single_block_read: /* * if it isn't in the cache, then get a chunk from * disk if sequential, otherwise just get the block. */ bp->b_flags |= B_READ | B_RAM; lblkno += 1; } } /* * if we have been doing sequential I/O, then do some read-ahead */ rbp = NULL; if (seqcount && (lblkno < (origblkno + seqcount))) { /* * we now build the read-ahead buffer if it is desirable. */ if (((u_quad_t)(lblkno + 1) * size) <= filesize && !(error = VOP_BMAP(vp, lblkno, NULL, &blkno, &num_ra, NULL)) && blkno != -1) { int nblksread; int ntoread = num_ra + 1; nblksread = (origtotread + size - 1) / size; if (seqcount < nblksread) seqcount = nblksread; if (seqcount < ntoread) ntoread = seqcount; if (num_ra) { rbp = cluster_rbuild(vp, filesize, lblkno, blkno, size, ntoread, NULL); } else { rbp = getblk(vp, lblkno, size, 0, 0); rbp->b_flags |= B_READ | B_ASYNC | B_RAM; rbp->b_blkno = blkno; } } } /* * handle the synchronous read */ if (bp) { if (bp->b_flags & (B_DONE | B_DELWRI)) { panic("cluster_read: DONE bp"); } else { #if defined(CLUSTERDEBUG) if (rcluster) printf("S(%d,%d,%d) ", bp->b_lblkno, bp->b_bcount, seqcount); #endif if ((bp->b_flags & B_CLUSTER) == 0) vfs_busy_pages(bp, 0); error = VOP_STRATEGY(bp); curproc->p_stats->p_ru.ru_inblock++; } } /* * and if we have read-aheads, do them too */ if (rbp) { if (error) { rbp->b_flags &= ~(B_ASYNC | B_READ); brelse(rbp); } else if (rbp->b_flags & B_CACHE) { rbp->b_flags &= ~(B_ASYNC | B_READ); bqrelse(rbp); } else { #if defined(CLUSTERDEBUG) if (rcluster) { if (bp) printf("A+(%d,%d,%d,%d) ", rbp->b_lblkno, rbp->b_bcount, rbp->b_lblkno - origblkno, seqcount); else printf("A(%d,%d,%d,%d) ", rbp->b_lblkno, rbp->b_bcount, rbp->b_lblkno - origblkno, seqcount); } #endif if ((rbp->b_flags & B_CLUSTER) == 0) vfs_busy_pages(rbp, 0); (void) VOP_STRATEGY(rbp); curproc->p_stats->p_ru.ru_inblock++; } } if (reqbp) return (biowait(reqbp)); else return (error); } /* * If blocks are contiguous on disk, use this to provide clustered * read ahead. We will read as many blocks as possible sequentially * and then parcel them up into logical blocks in the buffer hash table. */ static struct buf * cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) struct vnode *vp; u_quad_t filesize; daddr_t lbn; daddr_t blkno; long size; int run; struct buf *fbp; { struct buf *bp, *tbp; daddr_t bn; int i, inc, j; #ifdef DIAGNOSTIC if (size != vp->v_mount->mnt_stat.f_iosize) panic("cluster_rbuild: size %d != filesize %d\n", size, vp->v_mount->mnt_stat.f_iosize); #endif /* * avoid a division */ while ((u_quad_t) size * (lbn + run) > filesize) { --run; } if (fbp) { tbp = fbp; tbp->b_flags |= B_READ; } else { tbp = getblk(vp, lbn, size, 0, 0); if (tbp->b_flags & B_CACHE) return tbp; tbp->b_flags |= B_ASYNC | B_READ | B_RAM; } tbp->b_blkno = blkno; if( (tbp->b_flags & B_MALLOC) || ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) ) return tbp; bp = trypbuf(); if (bp == 0) return tbp; (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK; bp->b_flags = B_ASYNC | B_READ | B_CALL | B_BUSY | B_CLUSTER | B_VMIO; bp->b_iodone = cluster_callback; bp->b_blkno = blkno; bp->b_lblkno = lbn; pbgetvp(vp, bp); TAILQ_INIT(&bp->b_cluster.cluster_head); bp->b_bcount = 0; bp->b_bufsize = 0; bp->b_npages = 0; if (vp->v_maxio == 0) vp->v_maxio = DFLTPHYS; inc = btodb(size); for (bn = blkno, i = 0; i < run; ++i, bn += inc) { if (i != 0) { if ((bp->b_npages * PAGE_SIZE) + round_page(size) > vp->v_maxio) break; if (incore(vp, lbn + i)) break; tbp = getblk(vp, lbn + i, size, 0, 0); if ((tbp->b_flags & B_CACHE) || (tbp->b_flags & B_VMIO) == 0) { bqrelse(tbp); break; } for (j=0;jb_npages;j++) { if (tbp->b_pages[j]->valid) { break; } } if (j != tbp->b_npages) { /* * force buffer to be re-constituted later */ tbp->b_flags |= B_RELBUF; brelse(tbp); break; } if ((fbp && (i == 1)) || (i == (run - 1))) tbp->b_flags |= B_RAM; tbp->b_flags |= B_READ | B_ASYNC; if (tbp->b_blkno == tbp->b_lblkno) { tbp->b_blkno = bn; } else if (tbp->b_blkno != bn) { brelse(tbp); break; } } + /* check for latent dependencies to be handled */ + if ((LIST_FIRST(&tbp->b_dep)) != NULL && bioops.io_start) + (*bioops.io_start)(tbp); TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, tbp, b_cluster.cluster_entry); for (j = 0; j < tbp->b_npages; j += 1) { vm_page_t m; m = tbp->b_pages[j]; ++m->busy; ++m->object->paging_in_progress; if ((bp->b_npages == 0) || (bp->b_pages[bp->b_npages-1] != m)) { bp->b_pages[bp->b_npages] = m; bp->b_npages++; } if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) tbp->b_pages[j] = bogus_page; } bp->b_bcount += tbp->b_bcount; bp->b_bufsize += tbp->b_bufsize; } for(j=0;jb_npages;j++) { if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) bp->b_pages[j] = bogus_page; } if (bp->b_bufsize > bp->b_kvasize) panic("cluster_rbuild: b_bufsize(%d) > b_kvasize(%d)\n", bp->b_bufsize, bp->b_kvasize); bp->b_kvasize = bp->b_bufsize; pmap_qenter(trunc_page((vm_offset_t) bp->b_data), (vm_page_t *)bp->b_pages, bp->b_npages); return (bp); } /* * Cleanup after a clustered read or write. * This is complicated by the fact that any of the buffers might have * extra memory (if there were no empty buffer headers at allocbuf time) * that we will need to shift around. */ void cluster_callback(bp) struct buf *bp; { struct buf *nbp, *tbp; int error = 0; /* * Must propogate errors to all the components. */ if (bp->b_flags & B_ERROR) error = bp->b_error; pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); /* * Move memory from the large cluster buffer into the component * buffers and mark IO as done on these. */ for (tbp = TAILQ_FIRST(&bp->b_cluster.cluster_head); tbp; tbp = nbp) { nbp = TAILQ_NEXT(&tbp->b_cluster, cluster_entry); if (error) { tbp->b_flags |= B_ERROR; tbp->b_error = error; } else tbp->b_dirtyoff = tbp->b_dirtyend = 0; biodone(tbp); } relpbuf(bp); } /* * Do clustered write for FFS. * * Three cases: * 1. Write is not sequential (write asynchronously) * Write is sequential: * 2. beginning of cluster - begin cluster * 3. middle of a cluster - add to cluster * 4. end of a cluster - asynchronously write cluster */ void cluster_write(bp, filesize) struct buf *bp; u_quad_t filesize; { struct vnode *vp; daddr_t lbn; int maxclen, cursize; int lblocksize; int async; vp = bp->b_vp; if (vp->v_maxio == 0) vp->v_maxio = DFLTPHYS; if (vp->v_type == VREG) { async = vp->v_mount->mnt_flag & MNT_ASYNC; lblocksize = vp->v_mount->mnt_stat.f_iosize; } else { async = 0; lblocksize = bp->b_bufsize; } lbn = bp->b_lblkno; /* Initialize vnode to beginning of file. */ if (lbn == 0) vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || (bp->b_blkno != vp->v_lasta + btodb(lblocksize))) { maxclen = vp->v_maxio / lblocksize - 1; if (vp->v_clen != 0) { /* * Next block is not sequential. * * If we are not writing at end of file, the process * seeked to another point in the file since its last * write, or we have reached our maximum cluster size, * then push the previous cluster. Otherwise try * reallocating to make it sequential. */ cursize = vp->v_lastw - vp->v_cstart + 1; #ifndef notyet_block_reallocation_enabled if (((u_quad_t)(lbn + 1) * lblocksize) != filesize || lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { if (!async) cluster_wbuild(vp, lblocksize, vp->v_cstart, cursize); } #else if ((lbn + 1) * lblocksize != filesize || lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { if (!async) cluster_wbuild(vp, lblocksize, vp->v_cstart, cursize); } else { struct buf **bpp, **endbp; struct cluster_save *buflist; buflist = cluster_collectbufs(vp, bp); endbp = &buflist->bs_children [buflist->bs_nchildren - 1]; if (VOP_REALLOCBLKS(vp, buflist)) { /* * Failed, push the previous cluster. */ for (bpp = buflist->bs_children; bpp < endbp; bpp++) brelse(*bpp); free(buflist, M_SEGMENT); cluster_wbuild(vp, lblocksize, vp->v_cstart, cursize); } else { /* * Succeeded, keep building cluster. */ for (bpp = buflist->bs_children; bpp <= endbp; bpp++) bdwrite(*bpp); free(buflist, M_SEGMENT); vp->v_lastw = lbn; vp->v_lasta = bp->b_blkno; return; } } #endif /* notyet_block_reallocation_enabled */ } /* * Consider beginning a cluster. If at end of file, make * cluster as large as possible, otherwise find size of * existing cluster. */ if ((vp->v_type == VREG) && ((u_quad_t) (lbn + 1) * lblocksize) != filesize && (bp->b_blkno == bp->b_lblkno) && (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) || bp->b_blkno == -1)) { bawrite(bp); vp->v_clen = 0; vp->v_lasta = bp->b_blkno; vp->v_cstart = lbn + 1; vp->v_lastw = lbn; return; } vp->v_clen = maxclen; if (!async && maxclen == 0) { /* I/O not contiguous */ vp->v_cstart = lbn + 1; bawrite(bp); } else { /* Wait for rest of cluster */ vp->v_cstart = lbn; bdwrite(bp); } } else if (lbn == vp->v_cstart + vp->v_clen) { /* * At end of cluster, write it out. */ bdwrite(bp); cluster_wbuild(vp, lblocksize, vp->v_cstart, vp->v_clen + 1); vp->v_clen = 0; vp->v_cstart = lbn + 1; } else /* * In the middle of a cluster, so just delay the I/O for now. */ bdwrite(bp); vp->v_lastw = lbn; vp->v_lasta = bp->b_blkno; } /* * This is an awful lot like cluster_rbuild...wish they could be combined. * The last lbn argument is the current block on which I/O is being * performed. Check to see that it doesn't fall in the middle of * the current block (if last_bp == NULL). */ int cluster_wbuild(vp, size, start_lbn, len) struct vnode *vp; long size; daddr_t start_lbn; int len; { struct buf *bp, *tbp; int i, j, s; int totalwritten = 0; int dbsize = btodb(size); while (len > 0) { s = splbio(); if (((tbp = gbincore(vp, start_lbn)) == NULL) || ((tbp->b_flags & (B_INVAL|B_BUSY|B_DELWRI)) != B_DELWRI)) { ++start_lbn; --len; splx(s); continue; } bremfree(tbp); tbp->b_flags |= B_BUSY; tbp->b_flags &= ~B_DONE; splx(s); /* * Extra memory in the buffer, punt on this buffer. XXX we could * handle this in most cases, but we would have to push the extra * memory down to after our max possible cluster size and then * potentially pull it back up if the cluster was terminated * prematurely--too much hassle. */ if (((tbp->b_flags & (B_CLUSTEROK|B_MALLOC)) != B_CLUSTEROK) || (tbp->b_bcount != tbp->b_bufsize) || (tbp->b_bcount != size) || len == 1) { totalwritten += tbp->b_bufsize; bawrite(tbp); ++start_lbn; --len; continue; } bp = trypbuf(); if (bp == NULL) { totalwritten += tbp->b_bufsize; bawrite(tbp); ++start_lbn; --len; continue; } TAILQ_INIT(&bp->b_cluster.cluster_head); bp->b_bcount = 0; bp->b_bufsize = 0; bp->b_npages = 0; if (tbp->b_wcred != NOCRED) { bp->b_wcred = tbp->b_wcred; crhold(bp->b_wcred); } bp->b_blkno = tbp->b_blkno; bp->b_lblkno = tbp->b_lblkno; (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK; bp->b_flags |= B_CALL | B_BUSY | B_CLUSTER | (tbp->b_flags & (B_VMIO|B_NEEDCOMMIT)); bp->b_iodone = cluster_callback; pbgetvp(vp, bp); - for (i = 0; i < len; ++i, ++start_lbn) { if (i != 0) { s = splbio(); if ((tbp = gbincore(vp, start_lbn)) == NULL) { splx(s); break; } if ((tbp->b_flags & (B_VMIO|B_CLUSTEROK|B_INVAL|B_BUSY|B_DELWRI|B_NEEDCOMMIT)) != (B_DELWRI|B_CLUSTEROK|(bp->b_flags & (B_VMIO|B_NEEDCOMMIT)))) { splx(s); break; } if (tbp->b_wcred != bp->b_wcred) { splx(s); break; } if ((tbp->b_bcount != size) || ((bp->b_blkno + dbsize * i) != tbp->b_blkno) || ((tbp->b_npages + bp->b_npages) > (vp->v_maxio / PAGE_SIZE))) { splx(s); break; } bremfree(tbp); tbp->b_flags |= B_BUSY; tbp->b_flags &= ~B_DONE; splx(s); } - + /* check for latent dependencies to be handled */ + if ((LIST_FIRST(&tbp->b_dep)) != NULL && + bioops.io_start) + (*bioops.io_start)(tbp); if (tbp->b_flags & B_VMIO) { vm_page_t m; if (i != 0) { for (j = 0; j < tbp->b_npages; j += 1) { m = tbp->b_pages[j]; if (m->flags & PG_BUSY) goto finishcluster; } } for (j = 0; j < tbp->b_npages; j += 1) { m = tbp->b_pages[j]; ++m->busy; ++m->object->paging_in_progress; if ((bp->b_npages == 0) || (bp->b_pages[bp->b_npages - 1] != m)) { bp->b_pages[bp->b_npages] = m; bp->b_npages++; } } } bp->b_bcount += size; bp->b_bufsize += size; --numdirtybuffers; tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); tbp->b_flags |= B_ASYNC; s = splbio(); reassignbuf(tbp, tbp->b_vp); /* put on clean list */ ++tbp->b_vp->v_numoutput; splx(s); TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, tbp, b_cluster.cluster_entry); } finishcluster: pmap_qenter(trunc_page((vm_offset_t) bp->b_data), (vm_page_t *) bp->b_pages, bp->b_npages); if (bp->b_bufsize > bp->b_kvasize) panic("cluster_wbuild: b_bufsize(%d) > b_kvasize(%d)\n", bp->b_bufsize, bp->b_kvasize); bp->b_kvasize = bp->b_bufsize; totalwritten += bp->b_bufsize; bp->b_dirtyoff = 0; bp->b_dirtyend = bp->b_bufsize; bawrite(bp); len -= i; } return totalwritten; } #ifdef notyet_block_reallocation_enabled /* * Collect together all the buffers in a cluster. * Plus add one additional buffer. */ static struct cluster_save * cluster_collectbufs(vp, last_bp) struct vnode *vp; struct buf *last_bp; { struct cluster_save *buflist; daddr_t lbn; int i, len; len = vp->v_lastw - vp->v_cstart + 1; buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), M_SEGMENT, M_WAITOK); buflist->bs_nchildren = 0; buflist->bs_children = (struct buf **) (buflist + 1); for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) (void) bread(vp, lbn, last_bp->b_bcount, NOCRED, &buflist->bs_children[i]); buflist->bs_children[i] = last_bp; buflist->bs_nchildren = i + 1; return (buflist); } #endif /* notyet_block_reallocation_enabled */ Index: head/sys/kern/vfs_export.c =================================================================== --- head/sys/kern/vfs_export.c (revision 34265) +++ head/sys/kern/vfs_export.c (revision 34266) @@ -1,2328 +1,2674 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 - * $Id: vfs_subr.c,v 1.136 1998/03/01 23:07:45 dyson Exp $ + * $Id: vfs_subr.c,v 1.137 1998/03/07 21:35:35 dyson Exp $ */ /* * External virtual filesystem routines */ #include "opt_ddb.h" #include "opt_devfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); static void insmntque __P((struct vnode *vp, struct mount *mp)); #ifdef DDB static void printlockedvnodes __P((void)); #endif static void vclean __P((struct vnode *vp, int flags, struct proc *p)); static void vfree __P((struct vnode *)); static void vgonel __P((struct vnode *vp, struct proc *p)); static unsigned long numvnodes; SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[9] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, }; /* * Insq/Remq for the vnode usage lists. */ #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) #define bufremvn(bp) { \ LIST_REMOVE(bp, b_vnbufs); \ (bp)->b_vnbufs.le_next = NOLIST; \ } static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ struct tobefreelist vnode_tobefree_list; /* vnode free list */ static u_long wantfreevnodes = 25; SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); static u_long freevnodes = 0; SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); int vfs_ioopt = 0; SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); struct mntlist mountlist; /* mounted filesystem list */ struct simplelock mountlist_slock; static struct simplelock mntid_slock; struct simplelock mntvnode_slock; static struct simplelock vnode_free_list_slock; static struct simplelock spechash_slock; struct nfs_public nfs_pub; /* publicly exported FS */ static vm_zone_t vnode_zone; +/* + * The workitem queue. + */ +#define SYNCER_MAXDELAY 32 +int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ +time_t syncdelay = 30; +int rushjob; /* number of slots to run ASAP */ + +static int syncer_delayno = 0; +static long syncer_mask; +LIST_HEAD(synclist, vnode); +static struct synclist *syncer_workitem_pending; + int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); static void vfs_free_addrlist __P((struct netexport *nep)); static int vfs_free_netcred __P((struct radix_node *rn, void *w)); static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, struct export_args *argp)); /* * Initialize the vnode management data structures. */ void vntblinit() { desiredvnodes = maxproc + cnt.v_page_count / 4; simple_lock_init(&mntvnode_slock); simple_lock_init(&mntid_slock); simple_lock_init(&spechash_slock); TAILQ_INIT(&vnode_free_list); TAILQ_INIT(&vnode_tobefree_list); simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); + /* + * Initialize the filesystem syncer. + */ + syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, + &syncer_mask); + syncer_maxdelay = syncer_mask + 1; } /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Interlock is not released on failure. */ int vfs_busy(mp, flags, interlkp, p) struct mount *mp; int flags; struct simplelock *interlkp; struct proc *p; { int lkflags; if (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & LK_NOWAIT) return (ENOENT); mp->mnt_kern_flag |= MNTK_MWAIT; if (interlkp) { simple_unlock(interlkp); } /* * Since all busy locks are shared except the exclusive * lock granted when unmounting, the only place that a * wakeup needs to be done is at the release of the * exclusive lock at the end of dounmount. */ tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); if (interlkp) { simple_lock(interlkp); } return (ENOENT); } lkflags = LK_SHARED | LK_NOPAUSE; if (interlkp) lkflags |= LK_INTERLOCK; if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) panic("vfs_busy: unexpected lock failure"); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(mp, p) struct mount *mp; struct proc *p; { lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); } /* * Lookup a filesystem type, and if found allocate and initialize * a mount structure for it. * * Devname is usually updated by mount(8) after booting. */ int vfs_rootmountalloc(fstypename, devname, mpp) char *fstypename; char *devname; struct mount **mpp; { struct proc *p = curproc; /* XXX */ struct vfsconf *vfsp; struct mount *mp; for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstypename)) break; if (vfsp == NULL) return (ENODEV); mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, p); LIST_INIT(&mp->mnt_vnodelist); mp->mnt_vfc = vfsp; mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_flag = MNT_RDONLY; mp->mnt_vnodecovered = NULLVP; vfsp->vfc_refcount++; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); mp->mnt_stat.f_mntonname[0] = '/'; mp->mnt_stat.f_mntonname[1] = 0; (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); *mpp = mp; return (0); } /* * Find an appropriate filesystem to use for the root. If a filesystem * has not been preselected, walk through the list of known filesystems * trying those that have mountroot routines, and try them until one * works or we have tried them all. */ #ifdef notdef /* XXX JH */ int lite2_vfs_mountroot() { struct vfsconf *vfsp; extern int (*lite2_mountroot) __P((void)); int error; if (lite2_mountroot != NULL) return ((*lite2_mountroot)()); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { if (vfsp->vfc_mountroot == NULL) continue; if ((error = (*vfsp->vfc_mountroot)()) == 0) return (0); printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); } return (ENODEV); } #endif /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { simple_unlock(&mountlist_slock); return (mp); } } simple_unlock(&mountlist_slock); return ((struct mount *) 0); } /* * Get a new unique fsid */ void vfs_getnewfsid(mp) struct mount *mp; { static u_short xxxfs_mntid; fsid_t tfsid; int mtype; simple_lock(&mntid_slock); mtype = mp->mnt_vfc->vfc_typenum; mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) ++xxxfs_mntid; tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { while (vfs_getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; simple_unlock(&mntid_slock); } /* * Set vnode attributes to VNOVAL */ void vattr_null(vap) register struct vattr *vap; { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = vap->va_fsid = vap->va_fileid = vap->va_blocksize = vap->va_rdev = vap->va_atime.tv_sec = vap->va_atime.tv_nsec = vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = vap->va_flags = vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Routines having to do with the management of the vnode table. */ extern vop_t **dead_vnodeop_p; /* * Return the next vnode from the free list. */ int getnewvnode(tag, mp, vops, vpp) enum vtagtype tag; struct mount *mp; vop_t **vops; struct vnode **vpp; { int s; struct proc *p = curproc; /* XXX */ struct vnode *vp, *tvp, *nvp; vm_object_t object; TAILQ_HEAD(freelst, vnode) vnode_tmp_list; /* * We take the least recently used vnode from the freelist * if we can get it and it has no cached pages, and no * namecache entries are relative to it. * Otherwise we allocate a new vnode */ s = splbio(); simple_lock(&vnode_free_list_slock); TAILQ_INIT(&vnode_tmp_list); for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { nvp = TAILQ_NEXT(vp, v_freelist); TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } vp->v_flag &= ~(VTBFREE|VAGE); vp->v_flag |= VFREE; if (vp->v_usecount) panic("tobe free vnode isn't"); freevnodes++; } if (wantfreevnodes && freevnodes < wantfreevnodes) { vp = NULL; } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { /* * XXX: this is only here to be backwards compatible */ vp = NULL; } else { for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { nvp = TAILQ_NEXT(vp, v_freelist); if (!simple_lock_try(&vp->v_interlock)) continue; if (vp->v_usecount) panic("free vnode isn't"); object = vp->v_object; if (object && (object->resident_page_count || object->ref_count)) { printf("object inconsistant state: RPC: %d, RC: %d\n", object->resident_page_count, object->ref_count); /* Don't recycle if it's caching some pages */ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); continue; } else if (LIST_FIRST(&vp->v_cache_src)) { /* Don't recycle if active in the namecache */ simple_unlock(&vp->v_interlock); continue; } else { break; } } } for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { nvp = TAILQ_NEXT(tvp, v_freelist); TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); simple_unlock(&tvp->v_interlock); } if (vp) { vp->v_flag |= VDOOMED; TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; simple_unlock(&vnode_free_list_slock); cache_purge(vp); vp->v_lease = NULL; if (vp->v_type != VBAD) { vgonel(vp, p); } else { simple_unlock(&vp->v_interlock); } #ifdef DIAGNOSTIC { int s; if (vp->v_data) panic("cleaned vnode isn't"); s = splbio(); if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); splx(s); } #endif vp->v_flag = 0; vp->v_lastr = 0; vp->v_lastw = 0; vp->v_lasta = 0; vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; vp->v_writecount = 0; /* XXX */ vp->v_maxio = 0; } else { simple_unlock(&vnode_free_list_slock); vp = (struct vnode *) zalloc(vnode_zone); bzero((char *) vp, sizeof *vp); simple_lock_init(&vp->v_interlock); vp->v_dd = vp; cache_purge(vp); LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); numvnodes++; } vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; insmntque(vp, mp); *vpp = vp; vp->v_usecount = 1; vp->v_data = 0; splx(s); vfs_object_create(vp, p, p->p_ucred, TRUE); return (0); } /* * Move a vnode from one mount queue to another. */ static void insmntque(vp, mp) register struct vnode *vp; register struct mount *mp; { simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) LIST_REMOVE(vp, v_mntvnodes); /* * Insert into list of vnodes for the new mount point, if available. */ if ((vp->v_mount = mp) == NULL) { simple_unlock(&mntvnode_slock); return; } LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); simple_unlock(&mntvnode_slock); } /* * Update outstanding I/O count and do wakeup if requested. */ void vwakeup(bp) register struct buf *bp; { register struct vnode *vp; bp->b_flags &= ~B_WRITEINPROG; if ((vp = bp->b_vp)) { vp->v_numoutput--; if (vp->v_numoutput < 0) panic("vwakeup: neg numoutput"); if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { vp->v_flag &= ~VBWAIT; wakeup((caddr_t) &vp->v_numoutput); } } } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) register struct vnode *vp; int flags; struct ucred *cred; struct proc *p; int slpflag, slptimeo; { register struct buf *bp; struct buf *nbp, *blist; int s, error; vm_object_t object; - if (flags & V_SAVE) { + if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) { if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) return (error); if (vp->v_dirtyblkhd.lh_first != NULL) panic("vinvalbuf: dirty bufs"); } s = splbio(); for (;;) { if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist) break; for (bp = blist; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) continue; if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; error = tsleep((caddr_t) bp, slpflag | (PRIBIO + 1), "vinvalbuf", slptimeo); if (error) { splx(s); return (error); } break; } bremfree(bp); bp->b_flags |= B_BUSY; /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { if (bp->b_vp == vp) { if (bp->b_flags & B_CLUSTEROK) { vfs_bio_awrite(bp); } else { bp->b_flags |= B_ASYNC; VOP_BWRITE(bp); } } else { (void) VOP_BWRITE(bp); } break; } bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); brelse(bp); } } while (vp->v_numoutput > 0) { vp->v_flag |= VBWAIT; tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); } splx(s); /* * Destroy the copy in the VM cache, too. */ simple_lock(&vp->v_interlock); object = vp->v_object; if (object != NULL) { if (flags & V_SAVEMETA) vm_object_page_remove(object, 0, object->size, (flags & V_SAVE) ? TRUE : FALSE); else vm_object_page_remove(object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); } simple_unlock(&vp->v_interlock); if (!(flags & V_SAVEMETA) && (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) panic("vinvalbuf: flush failed"); return (0); } /* * Associate a buffer with a vnode. */ void bgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { int s; #if defined(DIAGNOSTIC) if (bp->b_vp) panic("bgetvp: not free"); #endif vhold(vp); bp->b_vp = vp; if (vp->v_type == VBLK || vp->v_type == VCHR) bp->b_dev = vp->v_rdev; else bp->b_dev = NODEV; /* * Insert onto list for new vnode. */ s = splbio(); bufinsvn(bp, &vp->v_cleanblkhd); splx(s); } /* * Disassociate a buffer from a vnode. */ void brelvp(bp) register struct buf *bp; { struct vnode *vp; int s; #if defined(DIAGNOSTIC) if (bp->b_vp == (struct vnode *) 0) panic("brelvp: NULL"); #endif /* * Delete from old vnode list, if on one. */ + vp = bp->b_vp; s = splbio(); if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); + if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) { + vp->v_flag &= ~VONWORKLST; + LIST_REMOVE(vp, v_synclist); + } splx(s); - - vp = bp->b_vp; bp->b_vp = (struct vnode *) 0; vdrop(vp); } /* + * The workitem queue. + * + * It is useful to delay writes of file data and filesystem metadata + * for tens of seconds so that quickly created and deleted files need + * not waste disk bandwidth being created and removed. To realize this, + * we append vnodes to a "workitem" queue. When running with a soft + * updates implementation, most pending metadata dependencies should + * not wait for more than a few seconds. Thus, mounted on block devices + * are delayed only about a half the time that file data is delayed. + * Similarly, directory updates are more critical, so are only delayed + * about a third the time that file data is delayed. Thus, there are + * SYNCER_MAXDELAY queues that are processed round-robin at a rate of + * one each second (driven off the filesystem syner process). The + * syncer_delayno variable indicates the next queue that is to be processed. + * Items that need to be processed soon are placed in this queue: + * + * syncer_workitem_pending[syncer_delayno] + * + * A delay of fifteen seconds is done by placing the request fifteen + * entries later in the queue: + * + * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] + * + */ + +/* + * Add an item to the syncer work queue. + */ +void +vn_syncer_add_to_worklist(vp, delay) + struct vnode *vp; + int delay; +{ + int s, slot; + + s = splbio(); + + if (vp->v_flag & VONWORKLST) { + LIST_REMOVE(vp, v_synclist); + } + + if (delay > syncer_maxdelay - 2) + delay = syncer_maxdelay - 2; + slot = (syncer_delayno + delay) & syncer_mask; + + LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); + vp->v_flag |= VONWORKLST; + splx(s); +} + +static void sched_sync __P((void)); +static struct proc *updateproc; +static struct kproc_desc up_kp = { + "syncer", + sched_sync, + &updateproc +}; +SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) + +/* + * System filesystem synchronizer daemon. + */ +void +sched_sync(void) +{ + struct synclist *slp; + struct vnode *vp; + long starttime; + int s; + struct proc *p = updateproc; + + for (;;) { + starttime = time.tv_sec; + + /* + * Push files whose dirty time has expired. + */ + s = splbio(); + slp = &syncer_workitem_pending[syncer_delayno]; + syncer_delayno += 1; + if (syncer_delayno == syncer_maxdelay) + syncer_delayno = 0; + splx(s); + + while ((vp = LIST_FIRST(slp)) != NULL) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); + VOP_UNLOCK(vp, 0, p); + if (LIST_FIRST(slp) == vp) { + if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && + vp->v_type != VBLK) + panic("sched_sync: fsync failed"); + /* + * Move ourselves to the back of the sync list. + */ + LIST_REMOVE(vp, v_synclist); + vn_syncer_add_to_worklist(vp, syncdelay); + } + } + + /* + * Do soft update processing. + */ + if (bioops.io_sync) + (*bioops.io_sync)(NULL); + + /* + * The variable rushjob allows the kernel to speed up the + * processing of the filesystem syncer process. A rushjob + * value of N tells the filesystem syncer to process the next + * N seconds worth of work on its queue ASAP. Currently rushjob + * is used by the soft update code to speed up the filesystem + * syncer process when the incore state is getting so far + * ahead of the disk that the kernel memory pool is being + * threatened with exhaustion. + */ + if (rushjob > 0) { + rushjob -= 1; + continue; + } + /* + * If it has taken us less than a second to process the + * current work, then wait. Otherwise start right over + * again. We can still lose time if any single round + * takes more than two seconds, but it does not really + * matter as we are just trying to generally pace the + * filesystem activity. + */ + if (time.tv_sec == starttime) + tsleep(&lbolt, PPAUSE, "syncer", 0); + } +} + +/* * Associate a p-buffer with a vnode. */ void pbgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { #if defined(DIAGNOSTIC) if (bp->b_vp) panic("pbgetvp: not free"); #endif bp->b_vp = vp; if (vp->v_type == VBLK || vp->v_type == VCHR) bp->b_dev = vp->v_rdev; else bp->b_dev = NODEV; } /* * Disassociate a p-buffer from a vnode. */ void pbrelvp(bp) register struct buf *bp; { #if defined(DIAGNOSTIC) if (bp->b_vp == (struct vnode *) 0) panic("pbrelvp: NULL"); #endif bp->b_vp = (struct vnode *) 0; } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { + struct buflists *listheadp; + int delay; int s; if (newvp == NULL) { printf("reassignbuf: NULL"); return; } s = splbio(); /* * Delete from old vnode list, if on one. */ if (bp->b_vnbufs.le_next != NOLIST) { bufremvn(bp); vdrop(bp->b_vp); } /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { struct buf *tbp; - tbp = newvp->v_dirtyblkhd.lh_first; + listheadp = &newvp->v_dirtyblkhd; + if ((newvp->v_flag & VONWORKLST) == 0) { + switch (newvp->v_type) { + case VDIR: + delay = syncdelay / 3; + break; + case VBLK: + if (newvp->v_specmountpoint != NULL) { + delay = syncdelay / 2; + break; + } + /* fall through */ + default: + delay = syncdelay; + } + vn_syncer_add_to_worklist(newvp, delay); + } + tbp = listheadp->lh_first; if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { - bufinsvn(bp, &newvp->v_dirtyblkhd); + bufinsvn(bp, listheadp); } else { while (tbp->b_vnbufs.le_next && - (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { + (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { tbp = tbp->b_vnbufs.le_next; } LIST_INSERT_AFTER(tbp, bp, b_vnbufs); } } else { bufinsvn(bp, &newvp->v_cleanblkhd); + if ((newvp->v_flag & VONWORKLST) && + LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { + newvp->v_flag &= ~VONWORKLST; + LIST_REMOVE(newvp, v_synclist); + } } bp->b_vp = newvp; vhold(bp->b_vp); splx(s); } #ifndef DEVFS_ROOT /* * Create a vnode for a block device. * Used for mounting the root file system. */ int bdevvp(dev, vpp) dev_t dev; struct vnode **vpp; { register struct vnode *vp; struct vnode *nvp; int error; if (dev == NODEV) return (0); error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); if (error) { *vpp = 0; return (error); } vp = nvp; vp->v_type = VBLK; if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { vput(vp); vp = nvp; } *vpp = vp; return (0); } #endif /* !DEVFS_ROOT */ /* * Check to see if the new vnode represents a special device * for which we already have a vnode (either because of * bdevvp() or because of a different vnode representing * the same block device). If such an alias exists, deallocate * the existing contents and return the aliased vnode. The * caller is responsible for filling it with its new contents. */ struct vnode * checkalias(nvp, nvp_rdev, mp) register struct vnode *nvp; dev_t nvp_rdev; struct mount *mp; { struct proc *p = curproc; /* XXX */ struct vnode *vp; struct vnode **vpp; if (nvp->v_type != VBLK && nvp->v_type != VCHR) return (NULLVP); vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: simple_lock(&spechash_slock); for (vp = *vpp; vp; vp = vp->v_specnext) { if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { simple_unlock(&spechash_slock); vgonel(vp, p); goto loop; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { simple_unlock(&spechash_slock); goto loop; } break; } if (vp == NULL || vp->v_tag != VT_NON) { MALLOC(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo), M_VNODE, M_WAITOK); nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; - nvp->v_specflags = 0; + nvp->v_specmountpoint = NULL; simple_unlock(&spechash_slock); *vpp = nvp; if (vp != NULLVP) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } simple_unlock(&spechash_slock); VOP_UNLOCK(vp, 0, p); simple_lock(&vp->v_interlock); vclean(vp, 0, p); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; insmntque(vp, mp); return (vp); } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. The vnode lock bit is set the * vnode is being eliminated in vgone. The process is awakened * when the transition is completed, and an error returned to * indicate that the vnode is no longer usable (possibly having * been changed to a new file system type). */ int vget(vp, flags, p) register struct vnode *vp; int flags; struct proc *p; { int error; /* * If the vnode is in the process of being cleaned out for * another use, we wait for the cleaning to finish and then * return failure. Cleaning is determined by checking that * the VXLOCK flag is set. */ if ((flags & LK_INTERLOCK) == 0) { simple_lock(&vp->v_interlock); } if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vget", 0); return (ENOENT); } vp->v_usecount++; if (VSHOULDBUSY(vp)) vbusy(vp); - if (flags & LK_TYPE_MASK) { if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { /* * must expand vrele here because we do not want * to call VOP_INACTIVE if the reference count * drops back to zero since it was never really * active. We must remove it from the free list * before sleeping so that multiple processes do * not try to recycle it. */ simple_lock(&vp->v_interlock); vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); simple_unlock(&vp->v_interlock); } return (error); } simple_unlock(&vp->v_interlock); return (0); } void vref(struct vnode *vp) { simple_lock(&vp->v_interlock); vp->v_usecount++; simple_unlock(&vp->v_interlock); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(vp) struct vnode *vp; { struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if (vp == NULL) panic("vrele: null vp"); #endif simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; simple_unlock(&vp->v_interlock); return; } if (vp->v_usecount == 1) { vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); /* * If we are doing a vput, the node is already locked, and we must * call VOP_INACTIVE with the node locked. So, in the case of * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. */ if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { VOP_INACTIVE(vp, p); } } else { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); simple_unlock(&vp->v_interlock); #endif panic("vrele: negative ref cnt"); } } void vput(vp) struct vnode *vp; { struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if (vp == NULL) panic("vput: null vp"); #endif simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; VOP_UNLOCK(vp, LK_INTERLOCK, p); return; } if (vp->v_usecount == 1) { vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); /* * If we are doing a vput, the node is already locked, and we must * call VOP_INACTIVE with the node locked. So, in the case of * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. */ simple_unlock(&vp->v_interlock); VOP_INACTIVE(vp, p); } else { #ifdef DIAGNOSTIC vprint("vput: negative ref count", vp); #endif panic("vput: negative ref cnt"); } } /* * Somebody doesn't want the vnode recycled. */ void vhold(vp) register struct vnode *vp; { simple_lock(&vp->v_interlock); vp->v_holdcnt++; if (VSHOULDBUSY(vp)) vbusy(vp); simple_unlock(&vp->v_interlock); } /* * One less who cares about this vnode. */ void vdrop(vp) register struct vnode *vp; { simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) - panic("holdrele: holdcnt"); + panic("vdrop: holdcnt"); vp->v_holdcnt--; if (VSHOULDFREE(vp)) vfree(vp); simple_unlock(&vp->v_interlock); } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If MNT_NOFORCE is specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If MNT_FORCE is specified, detach any active vnodes * that are found. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); #endif int vflush(mp, skipvp, flags) struct mount *mp; struct vnode *skipvp; int flags; { struct proc *p = curproc; /* XXX */ struct vnode *vp, *nvp; int busy = 0; simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { /* * Make sure this vnode wasn't reclaimed in getnewvnode(). * Start over if it has (it won't be on the list anymore). */ if (vp->v_mount != mp) goto loop; nvp = vp->v_mntvnodes.le_next; /* * Skip over a selected vnode. */ if (vp == skipvp) continue; simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { simple_unlock(&vp->v_interlock); continue; } /* * If WRITECLOSE is set, only flush out regular file vnodes * open for writing. */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) { simple_unlock(&vp->v_interlock); continue; } /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. */ if (vp->v_usecount == 0) { simple_unlock(&mntvnode_slock); vgonel(vp, p); simple_lock(&mntvnode_slock); continue; } /* * If FORCECLOSE is set, forcibly close the vnode. For block * or character devices, revert to an anonymous device. For * all other files, just kill them. */ if (flags & FORCECLOSE) { simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { vgonel(vp, p); } else { vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } simple_lock(&mntvnode_slock); continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif simple_unlock(&vp->v_interlock); busy++; } simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); } /* * Disassociate the underlying file system from a vnode. */ static void vclean(vp, flags, p) struct vnode *vp; int flags; struct proc *p; { int active; vm_object_t obj; /* * Check to see if the vnode is in use. If so we have to reference it * before we clean it out so that its count cannot fall to zero and * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) vp->v_usecount++; /* * Prevent the vnode from being recycled or brought into use while we * clean it out. */ if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; /* * Even if the count is zero, the VOP_INACTIVE routine may still * have the object locked while it cleans it out. The VOP_LOCK * ensures that the VOP_INACTIVE routine is done with its work. * For active vnodes, it ensures that no other activity can * occur while the underlying object is being cleaned out. */ VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); /* * Clean out any buffers associated with the vnode. */ vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); if (obj = vp->v_object) { if (obj->ref_count == 0) { /* * This is a normal way of shutting down the object/vnode * association. */ vm_object_terminate(obj); } else { /* * Woe to the process that tries to page now :-). */ vm_pager_deallocate(obj); } } /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. Note that the * VOP_INACTIVE will unlock the vnode. */ if (active) { if (flags & DOCLOSE) VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); VOP_INACTIVE(vp, p); } else { /* * Any other processes trying to obtain this lock must first * wait for VXLOCK to clear, then call the new lock operation. */ VOP_UNLOCK(vp, 0, p); } /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); if (active) vrele(vp); cache_purge(vp); if (vp->v_vnlock) { #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ #ifdef DIAGNOSTIC if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) vprint("vclean: lock not drained", vp); #endif #endif FREE(vp->v_vnlock, M_VNODE); vp->v_vnlock = NULL; } if (VSHOULDFREE(vp)) vfree(vp); /* * Done with purge, notify sleepers of the grim news. */ vp->v_op = dead_vnodeop_p; vn_pollgone(vp); vp->v_tag = VT_NON; vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; wakeup((caddr_t) vp); } } /* * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ int vop_revoke(ap) struct vop_revoke_args /* { struct vnode *a_vp; int a_flags; } */ *ap; { struct vnode *vp, *vq; struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if ((ap->a_flags & REVOKEALL) == 0) panic("vop_revoke"); #endif vp = ap->a_vp; simple_lock(&vp->v_interlock); if (vp->v_flag & VALIASED) { /* * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); return (0); } /* * Ensure that vp will not be vgone'd while we * are eliminating its aliases. */ vp->v_flag |= VXLOCK; simple_unlock(&vp->v_interlock); while (vp->v_flag & VALIASED) { simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; simple_unlock(&spechash_slock); vgone(vq); break; } if (vq == NULLVP) { simple_unlock(&spechash_slock); } } /* * Remove the lock so that vgone below will * really eliminate the vnode after which time * vgone will awaken any sleepers. */ simple_lock(&vp->v_interlock); vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; wakeup(vp); } } vgonel(vp, p); return (0); } /* * Recycle an unused vnode to the front of the free list. * Release the passed interlock if the vnode will be recycled. */ int vrecycle(vp, inter_lkp, p) struct vnode *vp; struct simplelock *inter_lkp; struct proc *p; { simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { if (inter_lkp) { simple_unlock(inter_lkp); } vgonel(vp, p); return (1); } simple_unlock(&vp->v_interlock); return (0); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(vp) register struct vnode *vp; { struct proc *p = curproc; /* XXX */ simple_lock(&vp->v_interlock); vgonel(vp, p); } /* * vgone, with the vp interlock held. */ static void vgonel(vp, p) struct vnode *vp; struct proc *p; { int s; struct vnode *vq; struct vnode *vx; /* * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } /* * Clean out the filesystem specific data. */ vclean(vp, DOCLOSE, p); simple_lock(&vp->v_interlock); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) insmntque(vp, (struct mount *)0); /* * If special device, remove it from special device alias list * if it is on one. */ if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { simple_lock(&spechash_slock); if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_specnext != vp) continue; vq->v_specnext = vp->v_specnext; break; } if (vq == NULL) panic("missing bdev"); } if (vp->v_flag & VALIASED) { vx = NULL; for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; if (vx) break; vx = vq; } if (vx == NULL) panic("missing alias"); if (vq == NULL) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } simple_unlock(&spechash_slock); FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } /* * If it is on the freelist and not already at the head, * move it to the head of the list. The test of the back * pointer and the reference count of zero is because * it will be removed from the free list by getnewvnode, * but will not have its reference count incremented until * after calling vgone. If the reference count were * incremented first, vgone would (incorrectly) try to * close the previous instance of the underlying object. */ if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { s = splbio(); simple_lock(&vnode_free_list_slock); if (vp->v_flag & VFREE) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); } else if (vp->v_flag & VTBFREE) { TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); vp->v_flag &= ~VTBFREE; freevnodes++; } else freevnodes++; vp->v_flag |= VFREE; TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); simple_unlock(&vnode_free_list_slock); splx(s); } vp->v_type = VBAD; simple_unlock(&vp->v_interlock); } /* * Lookup a vnode by device number. */ int vfinddev(dev, type, vpp) dev_t dev; enum vtype type; struct vnode **vpp; { register struct vnode *vp; int rc = 0; simple_lock(&spechash_slock); for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; *vpp = vp; rc = 1; break; } simple_unlock(&spechash_slock); return (rc); } /* * Calculate the total number of references to a special device. */ int vcount(vp) register struct vnode *vp; { struct vnode *vq, *vnext; int count; loop: if ((vp->v_flag & VALIASED) == 0) return (vp->v_usecount); simple_lock(&spechash_slock); for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { vnext = vq->v_specnext; if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ if (vq->v_usecount == 0 && vq != vp) { simple_unlock(&spechash_slock); vgone(vq); goto loop; } count += vq->v_usecount; } simple_unlock(&spechash_slock); return (count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; void vprint(label, vp) char *label; register struct vnode *vp; { char buf[64]; if (label != NULL) printf("%s: %x: ", label, vp); else printf("%x: ", vp); printf("type %s, usecount %d, writecount %d, refcount %ld,", typename[vp->v_type], vp->v_usecount, vp->v_writecount, vp->v_holdcnt); buf[0] = '\0'; if (vp->v_flag & VROOT) strcat(buf, "|VROOT"); if (vp->v_flag & VTEXT) strcat(buf, "|VTEXT"); if (vp->v_flag & VSYSTEM) strcat(buf, "|VSYSTEM"); if (vp->v_flag & VXLOCK) strcat(buf, "|VXLOCK"); if (vp->v_flag & VXWANT) strcat(buf, "|VXWANT"); if (vp->v_flag & VBWAIT) strcat(buf, "|VBWAIT"); if (vp->v_flag & VALIASED) strcat(buf, "|VALIASED"); if (vp->v_flag & VDOOMED) strcat(buf, "|VDOOMED"); if (vp->v_flag & VFREE) strcat(buf, "|VFREE"); if (vp->v_flag & VOBJBUF) strcat(buf, "|VOBJBUF"); if (buf[0] != '\0') printf(" flags (%s)", &buf[1]); if (vp->v_data == NULL) { printf("\n"); } else { printf("\n\t"); VOP_PRINT(vp); } } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ static void printlockedvnodes() { struct proc *p = curproc; /* XXX */ struct mount *mp, *nmp; struct vnode *vp; printf("Locked vnodes\n"); simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = vp->v_mntvnodes.le_next) { if (VOP_ISLOCKED(vp)) vprint((char *)0, vp); } simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); } #endif /* * Top level filesystem related information gathering. */ static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); static int vfs_sysctl SYSCTL_HANDLER_ARGS { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; #ifndef NO_COMPAT_PRELITE2 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif #ifdef notyet /* all sysctl names at this level are at least name and field */ if (namelen < 2) return (ENOTDIR); /* overloaded */ if (name[0] != VFS_GENERIC) { for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[0]) break; if (vfsp == NULL) return (EOPNOTSUPP); return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, oldp, oldlenp, newp, newlen, p)); } #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[2]) break; if (vfsp == NULL) return (EOPNOTSUPP); return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); } return (EOPNOTSUPP); } SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, "Generic filesystem"); #ifndef NO_COMPAT_PRELITE2 static int sysctl_ovfs_conf SYSCTL_HANDLER_ARGS { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error) return error; } return 0; } #endif /* !NO_COMPAT_PRELITE2 */ static volatile int kinfo_vdebug = 1; #if 0 #define KINFO_VNODESLOP 10 /* * Dump vnode list (via sysctl). * Copyout address of vnode followed by vnode. */ /* ARGSUSED */ static int sysctl_vnode SYSCTL_HANDLER_ARGS { struct proc *p = curproc; /* XXX */ struct mount *mp, *nmp; struct vnode *nvp, *vp; int error; #define VPTRSZ sizeof (struct vnode *) #define VNODESZ sizeof (struct vnode) req->lock = 0; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } again: simple_lock(&mntvnode_slock); for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { /* * Check that the vp is still associated with * this filesystem. RACE: could have been * recycled onto the same filesystem. */ if (vp->v_mount != mp) { simple_unlock(&mntvnode_slock); if (kinfo_vdebug) printf("kinfo: vp changed\n"); goto again; } nvp = vp->v_mntvnodes.le_next; simple_unlock(&mntvnode_slock); if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) return (error); simple_lock(&mntvnode_slock); } simple_unlock(&mntvnode_slock); simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); return (0); } #endif /* * XXX * Exporting the vnode list on large systems causes them to crash. * Exporting the vnode list on medium systems causes sysctl to coredump. */ #if 0 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_vnode, "S,vnode", ""); #endif /* * Check to see if a filesystem is mounted on a block device. */ int vfs_mountedon(vp) struct vnode *vp; { struct vnode *vq; int error = 0; - if (vp->v_specflags & SI_MOUNTEDON) + if (vp->v_specmountpoint != NULL) return (EBUSY); if (vp->v_flag & VALIASED) { simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) { + if (vq->v_specmountpoint != NULL) { error = EBUSY; break; } } simple_unlock(&spechash_slock); } return (error); } /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall() { struct mount *mp, *nmp; struct proc *p = initproc; /* XXX XXX should this be proc0? */ int error; /* * Since this only runs when rebooting, it is not interlocked. */ for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { nmp = mp->mnt_list.cqe_prev; error = dounmount(mp, MNT_FORCE, p); if (error) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } } /* * Build hash lists of net addresses and hang them off the mount point. * Called by ufs_mount() to set up the lists of export addresses. */ static int vfs_hang_addrlist(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { register struct netcred *np; register struct radix_node_head *rnh; register int i; struct radix_node *rn; struct sockaddr *saddr, *smask = 0; struct domain *dom; int error; if (argp->ex_addrlen == 0) { if (mp->mnt_flag & MNT_DEFEXPORTED) return (EPERM); np = &nep->ne_defexported; np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; mp->mnt_flag |= MNT_DEFEXPORTED; return (0); } i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); bzero((caddr_t) np, i); saddr = (struct sockaddr *) (np + 1); if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) goto out; if (saddr->sa_len > argp->ex_addrlen) saddr->sa_len = argp->ex_addrlen; if (argp->ex_masklen) { smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); if (error) goto out; if (smask->sa_len > argp->ex_masklen) smask->sa_len = argp->ex_masklen; } i = saddr->sa_family; if ((rnh = nep->ne_rtable[i]) == 0) { /* * Seems silly to initialize every AF when most are not used, * do so on demand here */ for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_family == i && dom->dom_rtattach) { dom->dom_rtattach((void **) &nep->ne_rtable[i], dom->dom_rtoffset); break; } if ((rnh = nep->ne_rtable[i]) == 0) { error = ENOBUFS; goto out; } } rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, np->netc_rnodes); if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ error = EPERM; goto out; } np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; return (0); out: free(np, M_NETADDR); return (error); } /* ARGSUSED */ static int vfs_free_netcred(rn, w) struct radix_node *rn; void *w; { register struct radix_node_head *rnh = (struct radix_node_head *) w; (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); free((caddr_t) rn, M_NETADDR); return (0); } /* * Free the net address hash lists that are hanging off the mount points. */ static void vfs_free_addrlist(nep) struct netexport *nep; { register int i; register struct radix_node_head *rnh; for (i = 0; i <= AF_MAX; i++) if ((rnh = nep->ne_rtable[i])) { (*rnh->rnh_walktree) (rnh, vfs_free_netcred, (caddr_t) rnh); free((caddr_t) rnh, M_RTABLE); nep->ne_rtable[i] = 0; } } int vfs_export(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; if (argp->ex_flags & MNT_DELEXPORT) { if (mp->mnt_flag & MNT_EXPUBLIC) { vfs_setpublicfs(NULL, NULL, NULL); mp->mnt_flag &= ~MNT_EXPUBLIC; } vfs_free_addrlist(nep); mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); } if (argp->ex_flags & MNT_EXPORTED) { if (argp->ex_flags & MNT_EXPUBLIC) { if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) return (error); mp->mnt_flag |= MNT_EXPUBLIC; } if ((error = vfs_hang_addrlist(mp, nep, argp))) return (error); mp->mnt_flag |= MNT_EXPORTED; } return (0); } /* * Set the publicly exported filesystem (WebNFS). Currently, only * one public filesystem is possible in the spec (RFC 2054 and 2055) */ int vfs_setpublicfs(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; struct vnode *rvp; char *cp; /* * mp == NULL -> invalidate the current info, the FS is * no longer exported. May be called from either vfs_export * or unmount, so check if it hasn't already been done. */ if (mp == NULL) { if (nfs_pub.np_valid) { nfs_pub.np_valid = 0; if (nfs_pub.np_index != NULL) { FREE(nfs_pub.np_index, M_TEMP); nfs_pub.np_index = NULL; } } return (0); } /* * Only one allowed at a time. */ if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) return (EBUSY); /* * Get real filehandle for root of exported FS. */ bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; if ((error = VFS_ROOT(mp, &rvp))) return (error); if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) return (error); vput(rvp); /* * If an indexfile was specified, pull it in. */ if (argp->ex_indexfile != NULL) { MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, M_WAITOK); error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, MAXNAMLEN, (size_t *)0); if (!error) { /* * Check for illegal filenames. */ for (cp = nfs_pub.np_index; *cp; cp++) { if (*cp == '/') { error = EINVAL; break; } } } if (error) { FREE(nfs_pub.np_index, M_TEMP); return (error); } } nfs_pub.np_mount = mp; nfs_pub.np_valid = 1; return (0); } struct netcred * vfs_export_lookup(mp, nep, nam) register struct mount *mp; struct netexport *nep; struct sockaddr *nam; { register struct netcred *np; register struct radix_node_head *rnh; struct sockaddr *saddr; np = NULL; if (mp->mnt_flag & MNT_EXPORTED) { /* * Lookup in the export list first. */ if (nam != NULL) { saddr = nam; rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } } /* * If no address match, use the default if it exists. */ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) np = &nep->ne_defexported; } return (np); } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *nvp; int anyio, tries; tries = 5; loop: anyio = 0; for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { nvp = vp->v_mntvnodes.le_next; if (vp->v_mount != mp) { goto loop; } if ((vp->v_flag & VXLOCK) || (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) { continue; } simple_lock(&vp->v_interlock); if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { if (!vget(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0, TRUE); anyio = 1; } vput(vp); } } else { simple_unlock(&vp->v_interlock); } } if (anyio && (--tries > 0)) goto loop; } /* * Create the VM object needed for VMIO and mmap support. This * is done for all VREG files in the system. Some filesystems might * afford the additional metadata buffering capability of the * VMIO code by making the device node be VMIO mode also. * * If !waslocked, must be called with interlock. */ int vfs_object_create(vp, p, cred, waslocked) struct vnode *vp; struct proc *p; struct ucred *cred; int waslocked; { struct vattr vat; vm_object_t object; int error = 0; if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { if (!waslocked) simple_unlock(&vp->v_interlock); return 0; } if (!waslocked) vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); retry: if ((object = vp->v_object) == NULL) { if (vp->v_type == VREG) { if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) goto retn; object = vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); } else if (major(vp->v_rdev) < nblkdev) { /* * This simply allocates the biggest object possible * for a VBLK vnode. This should be fixed, but doesn't * cause any problems (yet). */ object = vnode_pager_alloc(vp, INT_MAX, 0, 0); } object->ref_count--; vp->v_usecount--; } else { if (object->flags & OBJ_DEAD) { VOP_UNLOCK(vp, 0, p); tsleep(object, PVM, "vodead", 0); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } } if (vp->v_object) { vp->v_flag |= VOBJBUF; } retn: if (!waslocked) { simple_lock(&vp->v_interlock); VOP_UNLOCK(vp, LK_INTERLOCK, p); } return error; } static void vfree(vp) struct vnode *vp; { int s; s = splbio(); simple_lock(&vnode_free_list_slock); if (vp->v_flag & VTBFREE) { TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); vp->v_flag &= ~VTBFREE; } if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; simple_unlock(&vnode_free_list_slock); vp->v_flag &= ~VAGE; vp->v_flag |= VFREE; splx(s); } void vbusy(vp) struct vnode *vp; { int s; s = splbio(); simple_lock(&vnode_free_list_slock); if (vp->v_flag & VTBFREE) { TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); vp->v_flag &= ~VTBFREE; } else { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; } simple_unlock(&vnode_free_list_slock); vp->v_flag &= ~(VFREE|VAGE); splx(s); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(vp, p, events) struct vnode *vp; struct proc *p; short events; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo.vpi_revents; vp->v_pollinfo.vpi_revents &= ~events; simple_unlock(&vp->v_pollinfo.vpi_lock); return events; } vp->v_pollinfo.vpi_events |= events; selrecord(p, &vp->v_pollinfo.vpi_selinfo); simple_unlock(&vp->v_pollinfo.vpi_lock); return 0; } /* * Note the occurrence of an event. If the VN_POLLEVENT macro is used, * it is possible for us to miss an event due to race conditions, but * that condition is expected to be rare, so for the moment it is the * preferred interface. */ void vn_pollevent(vp, events) struct vnode *vp; short events; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_events & events) { /* * We clear vpi_events so that we don't * call selwakeup() twice if two events are * posted before the polling process(es) is * awakened. This also ensures that we take at * most one selwakeup() if the polling process * is no longer interested. However, it does * mean that only one event can be noticed at * a time. (Perhaps we should only clear those * event bits which we note?) XXX */ vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ vp->v_pollinfo.vpi_revents |= events; selwakeup(&vp->v_pollinfo.vpi_selinfo); } simple_unlock(&vp->v_pollinfo.vpi_lock); } /* * Wake up anyone polling on vp because it is being revoked. * This depends on dead_poll() returning POLLHUP for correct * behavior. */ void vn_pollgone(vp) struct vnode *vp; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_events) { vp->v_pollinfo.vpi_events = 0; selwakeup(&vp->v_pollinfo.vpi_selinfo); } simple_unlock(&vp->v_pollinfo.vpi_lock); +} + + + +/* + * Routine to create and manage a filesystem syncer vnode. + */ +#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) +int sync_fsync __P((struct vop_fsync_args *)); +int sync_inactive __P((struct vop_inactive_args *)); +int sync_reclaim __P((struct vop_reclaim_args *)); +#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) +#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) +int sync_print __P((struct vop_print_args *)); +#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) + +vop_t **sync_vnodeop_p; +struct vnodeopv_entry_desc sync_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_eopnotsupp }, + { &vop_close_desc, (vop_t *) sync_close }, /* close */ + { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ + { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ + { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ + { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ + { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ + { &vop_print_desc, (vop_t *) sync_print }, /* print */ + { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ + { NULL, NULL } +}; +struct vnodeopv_desc sync_vnodeop_opv_desc = + { &sync_vnodeop_p, sync_vnodeop_entries }; + +VNODEOP_SET(sync_vnodeop_opv_desc); + +/* + * Create a new filesystem syncer vnode for the specified mount point. + */ +int +vfs_allocate_syncvnode(mp) + struct mount *mp; +{ + struct vnode *vp; + static long start, incr, next; + int error; + + /* Allocate a new vnode */ + if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { + mp->mnt_syncer = NULL; + return (error); + } + vp->v_type = VNON; + /* + * Place the vnode onto the syncer worklist. We attempt to + * scatter them about on the list so that they will go off + * at evenly distributed times even if all the filesystems + * are mounted at once. + */ + next += incr; + if (next == 0 || next > syncer_maxdelay) { + start /= 2; + incr /= 2; + if (start == 0) { + start = syncer_maxdelay / 2; + incr = syncer_maxdelay; + } + next = start; + } + vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); + mp->mnt_syncer = vp; + return (0); +} + +/* + * Do a lazy sync of the filesystem. + */ +int +sync_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + struct vnode *syncvp = ap->a_vp; + struct mount *mp = syncvp->v_mount; + struct proc *p = ap->a_p; + int asyncflag; + + /* + * We only need to do something if this is a lazy evaluation. + */ + if (ap->a_waitfor != MNT_LAZY) + return (0); + + /* + * Move ourselves to the back of the sync list. + */ + vn_syncer_add_to_worklist(syncvp, syncdelay); + + /* + * Walk the list of vnodes pushing all that are dirty and + * not already on the sync list. + */ + simple_lock(&mountlist_slock); + if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) + return (0); + asyncflag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &= ~MNT_ASYNC; + VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); + if (asyncflag) + mp->mnt_flag |= MNT_ASYNC; + vfs_unbusy(mp, p); + return (0); +} + +/* + * The syncer vnode is no referenced. + */ +int +sync_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + + vgone(ap->a_vp); + return (0); +} + +/* + * The syncer vnode is no longer needed and is being decommissioned. + */ +int +sync_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + vp->v_mount->mnt_syncer = NULL; + if (vp->v_flag & VONWORKLST) { + LIST_REMOVE(vp, v_synclist); + vp->v_flag &= ~VONWORKLST; + } + + return (0); +} + +/* + * Print out a syncer vnode. + */ +int +sync_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + printf("syncer vnode"); + if (vp->v_vnlock != NULL) + lockmgr_printinfo(vp->v_vnlock); + printf("\n"); + return (0); } Index: head/sys/kern/vfs_extattr.c =================================================================== --- head/sys/kern/vfs_extattr.c (revision 34265) +++ head/sys/kern/vfs_extattr.c (revision 34266) @@ -1,2826 +1,2841 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 - * $Id: vfs_syscalls.c,v 1.93 1998/02/15 04:17:09 dyson Exp $ + * $Id: vfs_syscalls.c,v 1.94 1998/03/07 21:35:39 dyson Exp $ */ /* For 4.3 integer FS ID compatibility */ #include "opt_compat.h" /* * XXX - The following is required because of some magic done * in getdirentries() below which is only done if the translucent * filesystem `UNION' is compiled into the kernel. This is broken, * but I don't have time to study the code deeply enough to understand * what's going on and determine an appropriate fix. -GAW */ #include "opt_union.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef UNION #include #endif #include #include #include #include #include static int change_dir __P((struct nameidata *ndp, struct proc *p)); static void checkdirs __P((struct vnode *olddp)); static int usermount = 0; /* if 1, non-root can mount fs. */ SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); /* * Virtual File System System Calls */ /* * Mount a file system. */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { char *type; char *path; int flags; caddr_t data; }; #endif /* ARGSUSED */ int mount(p, uap) struct proc *p; register struct mount_args /* { syscallarg(char *) type; syscallarg(char *) path; syscallarg(int) flags; syscallarg(caddr_t) data; } */ *uap; { struct vnode *vp; struct mount *mp; struct vfsconf *vfsp; int error, flag = 0, flag2 = 0; struct vattr va; u_long fstypenum; struct nameidata nd; char fstypename[MFSNAMELEN]; if (usermount == 0 && (error = suser(p->p_ucred, &p->p_acflag))) return (error); /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (SCARG(uap, flags) & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } mp = vp->v_mount; flag = mp->mnt_flag; flag2 = mp->mnt_kern_flag; /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ if ((SCARG(uap, flags) & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } mp->mnt_flag |= SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); /* * Only root, or the user that did the original mount is * permitted to update it. */ if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* * Do not allow NFS export by non-root users. Silently * enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (p->p_ucred->cr_uid != 0) { if (SCARG(uap, flags) & MNT_EXPORTED) { vput(vp); return (EPERM); } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } if (vfs_busy(mp, LK_NOWAIT, 0, p)) { vput(vp); return (EBUSY); } VOP_UNLOCK(vp, 0, p); goto update; } /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) || (va.va_uid != p->p_ucred->cr_uid && (error = suser(p->p_ucred, &p->p_acflag)))) { vput(vp); return (error); } /* * Do not allow NFS export by non-root users. Silently * enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (p->p_ucred->cr_uid != 0) { if (SCARG(uap, flags) & MNT_EXPORTED) { vput(vp); return (EPERM); } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) return (error); if (vp->v_type != VDIR) { vput(vp); return (ENOTDIR); } #ifdef COMPAT_43 /* * Historically filesystem types were identified by number. If we * get an integer for the filesystem type instead of a string, we * check to see if it matches one of the historic filesystem types. */ fstypenum = (u_long)SCARG(uap, type); if (fstypenum < maxvfsconf) { for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == fstypenum) break; if (vfsp == NULL) { vput(vp); return (ENODEV); } strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); } else #endif /* COMPAT_43 */ if (error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) { vput(vp); return (error); } for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstypename)) break; if (vfsp == NULL) { vput(vp); return (ENODEV); } if (vp->v_mountedhere != NULL) { vput(vp); return (EBUSY); } /* * Allocate and initialize the filesystem. */ mp = (struct mount *)malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, p); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; vfsp->vfc_refcount++; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_stat.f_owner = p->p_ucred->cr_uid; update: /* * Set the mount level flags. */ if (SCARG(uap, flags) & MNT_RDONLY) mp->mnt_flag |= MNT_RDONLY; else if (mp->mnt_flag & MNT_RDONLY) mp->mnt_kern_flag |= MNTK_WANTRDWR; mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); /* * Mount the filesystem. */ error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p); if (mp->mnt_flag & MNT_UPDATE) { vrele(vp); if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) { mp->mnt_flag = flag; mp->mnt_kern_flag = flag2; } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } vfs_unbusy(mp, p); return (error); } /* * Put the new filesystem on the mount list after root. */ cache_purge(vp); if (!error) { simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); simple_unlock(&mountlist_slock); checkdirs(vp); VOP_UNLOCK(vp, 0, p); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); vfs_unbusy(mp, p); if (error = VFS_START(mp, 0, p)) vrele(vp); } else { mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp, p); free((caddr_t)mp, M_MOUNT); vput(vp); } return (error); } /* * Scan all active processes to see if any of them have a current * or root directory onto which the new filesystem has just been * mounted. If so, replace them with the new mount point. */ static void checkdirs(olddp) struct vnode *olddp; { struct filedesc *fdp; struct vnode *newdp; struct proc *p; if (olddp->v_usecount == 1) return; if (VFS_ROOT(olddp->v_mountedhere, &newdp)) panic("mount: lost mount"); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { fdp = p->p_fd; if (fdp->fd_cdir == olddp) { vrele(fdp->fd_cdir); VREF(newdp); fdp->fd_cdir = newdp; } if (fdp->fd_rdir == olddp) { vrele(fdp->fd_rdir); VREF(newdp); fdp->fd_rdir = newdp; } } if (rootvnode == olddp) { vrele(rootvnode); VREF(newdp); rootvnode = newdp; } vput(newdp); } /* * Unmount a file system. * * Note: unmount takes a path to the vnode mounted on as argument, * not special file (as before). */ #ifndef _SYS_SYSPROTO_H_ struct unmount_args { char *path; int flags; }; #endif /* ARGSUSED */ int unmount(p, uap) struct proc *p; register struct unmount_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct vnode *vp; struct mount *mp; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; mp = vp->v_mount; /* * Only root, or the user that did the original mount is * permitted to unmount this filesystem. */ if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* * Don't allow unmounting the root file system. */ if (mp->mnt_flag & MNT_ROOTFS) { vput(vp); return (EINVAL); } /* * Must be the root of the filesystem */ if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } vput(vp); return (dounmount(mp, SCARG(uap, flags), p)); } /* * Do the actual file system unmount. */ int dounmount(mp, flags, p) register struct mount *mp; int flags; struct proc *p; { struct vnode *coveredvp; int error; simple_lock(&mountlist_slock); mp->mnt_kern_flag |= MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); vfs_msync(mp, MNT_WAIT); mp->mnt_flag &=~ MNT_ASYNC; cache_purgevfs(mp); /* remove cache entries for this file sys */ + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, p); simple_lock(&mountlist_slock); if (error) { + if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) + (void) vfs_allocate_syncvnode(mp); mp->mnt_kern_flag &= ~MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, &mountlist_slock, p); return (error); } CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { coveredvp->v_mountedhere = (struct mount *)0; vrele(coveredvp); } mp->mnt_vfc->vfc_refcount--; if (mp->mnt_vnodelist.lh_first != NULL) panic("unmount: dangling vnode"); lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); free((caddr_t)mp, M_MOUNT); return (0); } /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif #ifdef DEBUG static int syncprt = 0; SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); #endif /* ARGSUSED */ int sync(p, uap) struct proc *p; struct sync_args *uap; { register struct mount *mp, *nmp; int asyncflag; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); - VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p); - if (asyncflag) - mp->mnt_flag |= MNT_ASYNC; + VFS_SYNC(mp, MNT_NOWAIT, + ((p != NULL) ? p->p_ucred : NOCRED), p); + mp->mnt_flag |= asyncflag; } simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); #if 0 /* * XXX don't call vfs_bufstats() yet because that routine * was not imported in the Lite2 merge. */ #ifdef DIAGNOSTIC if (syncprt) vfs_bufstats(); #endif /* DIAGNOSTIC */ #endif return (0); } /* * Change filesystem quotas. */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif /* ARGSUSED */ int quotactl(p, uap) struct proc *p; register struct quotactl_args /* { syscallarg(char *) path; syscallarg(int) cmd; syscallarg(int) uid; syscallarg(caddr_t) arg; } */ *uap; { register struct mount *mp; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; vrele(nd.ni_vp); return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), SCARG(uap, arg), p)); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif /* ARGSUSED */ int statfs(p, uap) struct proc *p; register struct statfs_args /* { syscallarg(char *) path; syscallarg(struct statfs *) buf; } */ *uap; { register struct mount *mp; register struct statfs *sp; int error; struct nameidata nd; struct statfs sb; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; sp = &mp->mnt_stat; vrele(nd.ni_vp); error = VFS_STATFS(mp, sp, p); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (p->p_ucred->cr_uid != 0) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif /* ARGSUSED */ int fstatfs(p, uap) struct proc *p; register struct fstatfs_args /* { syscallarg(int) fd; syscallarg(struct statfs *) buf; } */ *uap; { struct file *fp; struct mount *mp; register struct statfs *sp; int error; struct statfs sb; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; sp = &mp->mnt_stat; error = VFS_STATFS(mp, sp, p); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (p->p_ucred->cr_uid != 0) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int flags; }; #endif int getfsstat(p, uap) struct proc *p; register struct getfsstat_args /* { syscallarg(struct statfs *) buf; syscallarg(long) bufsize; syscallarg(int) flags; } */ *uap; { register struct mount *mp, *nmp; register struct statfs *sp; caddr_t sfsp; long count, maxcount, error; maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); sfsp = (caddr_t)SCARG(uap, buf); count = 0; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + * If MNT_NOWAIT or MNT_LAZY is specified, do not + * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY + * overrides MNT_WAIT. */ - if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 || + if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, p))) { simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); continue; } sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = copyout((caddr_t)sp, sfsp, sizeof(*sp)); if (error) { vfs_unbusy(mp, p); return (error); } sfsp += sizeof(*sp); } count++; simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); if (sfsp && count > maxcount) p->p_retval[0] = maxcount; else p->p_retval[0] = count; return (0); } /* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif /* ARGSUSED */ int fchdir(p, uap) struct proc *p; struct fchdir_args /* { syscallarg(int) fd; } */ *uap; { register struct filedesc *fdp = p->p_fd; struct vnode *vp, *tdp; struct mount *mp; struct file *fp; int error; if (error = getvnode(fdp, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); while (!error && (mp = vp->v_mountedhere) != NULL) { if (vfs_busy(mp, 0, 0, p)) continue; error = VFS_ROOT(mp, &tdp); vfs_unbusy(mp, p); if (error) break; vput(vp); vp = tdp; } if (error) { vput(vp); return (error); } VOP_UNLOCK(vp, 0, p); vrele(fdp->fd_cdir); fdp->fd_cdir = vp; return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif /* ARGSUSED */ int chdir(p, uap) struct proc *p; struct chdir_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_cdir); fdp->fd_cdir = nd.ni_vp; return (0); } /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif /* ARGSUSED */ int chroot(p, uap) struct proc *p; struct chroot_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_rdir); fdp->fd_rdir = nd.ni_vp; return (0); } /* * Common routine for chroot and chdir. */ static int change_dir(ndp, p) register struct nameidata *ndp; struct proc *p; { struct vnode *vp; int error; error = namei(ndp); if (error) return (error); vp = ndp->ni_vp; if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); if (error) vput(vp); else VOP_UNLOCK(vp, 0, p); return (error); } /* * Check permissions, allocate an open file structure, * and call the device open routine if any. */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int open(p, uap) struct proc *p; register struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register struct vnode *vp; int cmode, flags, oflags; struct file *nfp; int type, indx, error; struct flock lf; struct nameidata nd; oflags = SCARG(uap, flags); if ((oflags & O_ACCMODE) == O_ACCMODE) return (EINVAL); flags = FFLAGS(oflags); error = falloc(p, &nfp, &indx); if (error) return (error); fp = nfp; cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ error = vn_open(&nd, flags, cmode); if (error) { ffree(fp); if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0 && /* XXX from fdopen */ (error = dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { p->p_retval[0] = indx; return (0); } if (error == ERESTART) error = EINTR; fdp->fd_ofiles[indx] = NULL; return (error); } p->p_dupfd = 0; vp = nd.ni_vp; fp->f_flag = flags & FMASK; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); fp->f_ops = &vnops; fp->f_data = (caddr_t)vp; if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (flags & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; VOP_UNLOCK(vp, 0, p); if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) { (void) vn_close(vp, fp->f_flag, fp->f_cred, p); ffree(fp); fdp->fd_ofiles[indx] = NULL; return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } if ((vp->v_type == VREG) && (vp->v_object == NULL)) vfs_object_create(vp, p, p->p_ucred, TRUE); VOP_UNLOCK(vp, 0, p); p->p_retval[0] = indx; return (0); } #ifdef COMPAT_43 /* * Create a file. */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int ocreat(p, uap) struct proc *p; register struct ocreat_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, mode) = SCARG(uap, mode); SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC; return (open(p, &nuap)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ struct mknod_args { char *path; int mode; int dev; }; #endif /* ARGSUSED */ int mknod(p, uap) struct proc *p; register struct mknod_args /* { syscallarg(char *) path; syscallarg(int) mode; syscallarg(int) dev; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; int whiteout; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) error = EEXIST; else { VATTR_NULL(&vattr); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; vattr.va_rdev = SCARG(uap, dev); whiteout = 0; switch (SCARG(uap, mode) & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: error = EINVAL; break; } } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (whiteout) { error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); if (error) VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); } else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); } } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (vp) vrele(vp); } ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod"); return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkfifo(p, uap) struct proc *p; register struct mkfifo_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); return (EEXIST); } VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr)); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif /* ARGSUSED */ int link(p, uap) struct proc *p; register struct link_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { register struct vnode *vp; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); error = namei(&nd); if (!error) { if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); error = EEXIST; } else { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); } } } vrele(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "link"); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif /* ARGSUSED */ int symlink(p, uap) struct proc *p; register struct symlink_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { struct vattr vattr; char *path; int error; struct nameidata nd; path = zalloc(namei_zone); if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) goto out; NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); if (error = namei(&nd)) goto out; if (nd.ni_vp) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; goto out; } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink"); out: zfree(namei_zone, path); return (error); } /* * Delete a whiteout from the filesystem. */ /* ARGSUSED */ int undelete(p, uap) struct proc *p; register struct undelete_args /* { syscallarg(char *) path; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return (error); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); return (EEXIST); } VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif /* ARGSUSED */ int unlink(p, uap) struct proc *p; struct unlink_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_flag & VROOT) error = EBUSY; } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (vp != NULLVP) vput(vp); } ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink"); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int lseek(p, uap) struct proc *p; register struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ *uap; { struct ucred *cred = p->p_ucred; register struct filedesc *fdp = p->p_fd; register struct file *fp; struct vattr vattr; int error; if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (ESPIPE); switch (SCARG(uap, whence)) { case L_INCR: fp->f_offset += SCARG(uap, offset); break; case L_XTND: error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p); if (error) return (error); fp->f_offset = SCARG(uap, offset) + vattr.va_size; break; case L_SET: fp->f_offset = SCARG(uap, offset); break; default: return (EINVAL); } *(off_t *)(p->p_retval) = fp->f_offset; return (0); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(p, uap) struct proc *p; register struct olseek_args /* { syscallarg(int) fd; syscallarg(long) offset; syscallarg(int) whence; } */ *uap; { struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ nuap; int error; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, offset) = SCARG(uap, offset); SCARG(&nuap, whence) = SCARG(uap, whence); error = lseek(p, &nuap); return (error); } #endif /* COMPAT_43 */ /* * Check access permissions. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int flags; }; #endif int access(p, uap) struct proc *p; register struct access_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct ucred *cred = p->p_ucred; register struct vnode *vp; int error, flags, t_gid, t_uid; struct nameidata nd; t_uid = cred->cr_uid; t_gid = cred->cr_groups[0]; cred->cr_uid = p->p_cred->p_ruid; cred->cr_groups[0] = p->p_cred->p_rgid; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) goto out1; vp = nd.ni_vp; /* Flags == 0 means only check for existence. */ if (SCARG(uap, flags)) { flags = 0; if (SCARG(uap, flags) & R_OK) flags |= VREAD; if (SCARG(uap, flags) & W_OK) flags |= VWRITE; if (SCARG(uap, flags) & X_OK) flags |= VEXEC; if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, flags, cred, p); } vput(vp); out1: cred->cr_uid = t_uid; cred->cr_groups[0] = t_gid; return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int ostat(p, uap) struct proc *p; register struct ostat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int olstat(p, uap) struct proc *p; register struct olstat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct vnode *vp; struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, p); if (vp->v_type == VLNK) sb.st_mode |= S_IFLNK | ACCESSPERMS; /* 0777 */ vput(vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Convert from an old to a new stat structure. */ void cvtstat(st, ost) struct stat *st; struct ostat *ost; { ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (st->st_size < (quad_t)1 << 32) ost->st_size = st->st_size; else ost->st_size = -2; ost->st_atime = st->st_atime; ost->st_mtime = st->st_mtime; ost->st_ctime = st->st_ctime; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct stat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int stat(p, uap) struct proc *p; register struct stat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { struct stat sb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int lstat(p, uap) struct proc *p; register struct lstat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { int error; struct vnode *vp; struct stat sb; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, p); if (vp->v_type == VLNK) sb.st_mode |= S_IFLNK | ACCESSPERMS; /* 0777 */ vput(vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif /* ARGSUSED */ int pathconf(p, uap) struct proc *p; register struct pathconf_args /* { syscallarg(char *) path; syscallarg(int) name; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval); vput(nd.ni_vp); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; int count; }; #endif /* ARGSUSED */ int readlink(p, uap) struct proc *p; register struct readlink_args /* { syscallarg(char *) path; syscallarg(char *) buf; syscallarg(int) count; } */ *uap; { register struct vnode *vp; struct iovec aiov; struct uio auio; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VLNK) error = EINVAL; else { aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); error = VOP_READLINK(vp, &auio, p->p_ucred); } vput(vp); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { char *path; int flags; }; #endif /* ARGSUSED */ int chflags(p, uap) struct proc *p; register struct chflags_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; int flags; }; #endif /* ARGSUSED */ int fchflags(p, uap) struct proc *p; register struct fchflags_args /* { syscallarg(int) fd; syscallarg(int) flags; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif /* ARGSUSED */ int chmod(p, uap) struct proc *p; register struct chmod_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif /* ARGSUSED */ int fchmod(p, uap) struct proc *p; register struct fchmod_args /* { syscallarg(int) fd; syscallarg(int) mode; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int chown(p, uap) struct proc *p; register struct chown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int lchown(p, uap) struct proc *p; register struct lchown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif /* ARGSUSED */ int fchown(p, uap) struct proc *p; register struct fchown_args /* { syscallarg(int) fd; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif /* ARGSUSED */ int utimes(p, uap) struct proc *p; register struct utimes_args /* { syscallarg(char *) path; syscallarg(struct timeval *) tptr; } */ *uap; { register struct vnode *vp; struct timeval tv[2]; struct vattr vattr; int error; struct nameidata nd; VATTR_NULL(&vattr); if (SCARG(uap, tptr) == NULL) { microtime(&tv[0]); tv[1] = tv[0]; vattr.va_vaflags |= VA_UTIMES_NULL; } else if (error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, sizeof (tv))) return (error); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); vattr.va_atime.tv_sec = tv[0].tv_sec; vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; vattr.va_mtime.tv_sec = tv[1].tv_sec; vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif /* ARGSUSED */ int truncate(p, uap) struct proc *p; register struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; if (uap->length < 0) return(EINVAL); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } vput(vp); return (error); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ftruncate_args { int fd; int pad; off_t length; }; #endif /* ARGSUSED */ int ftruncate(p, uap) struct proc *p; register struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (uap->length < 0) return(EINVAL); if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FWRITE) == 0) return (EINVAL); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } VOP_UNLOCK(vp, 0, p); return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif /* ARGSUSED */ int otruncate(p, uap) struct proc *p; register struct otruncate_args /* { syscallarg(char *) path; syscallarg(long) length; } */ *uap; { struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, length) = SCARG(uap, length); return (truncate(p, &nuap)); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct oftruncate_args { int fd; long length; }; #endif /* ARGSUSED */ int oftruncate(p, uap) struct proc *p; register struct oftruncate_args /* { syscallarg(int) fd; syscallarg(long) length; } */ *uap; { struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, length) = SCARG(uap, length); return (ftruncate(p, &nuap)); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif /* ARGSUSED */ int fsync(p, uap) struct proc *p; struct fsync_args /* { syscallarg(int) fd; } */ *uap; { register struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p)) == NULL) { if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0, FALSE); } error = VOP_FSYNC(vp, fp->f_cred, (vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC)) ? MNT_NOWAIT : MNT_WAIT, p); VOP_UNLOCK(vp, 0, p); } return (error); } /* * Rename files. Source and destination must either both be directories, * or both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif /* ARGSUSED */ int rename(p, uap) struct proc *p; register struct rename_args /* { syscallarg(char *) from; syscallarg(char *) to; } */ *uap; { register struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int error; NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, SCARG(uap, from), p); if (error = namei(&fromnd)) return (error); fvp = fromnd.ni_vp; NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if (error = namei(&tond)) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } } if (fvp == tdvp) error = EINVAL; /* * If source is the same as the destination (that is the * same inode number with the same name in the same directory), * then there is nothing to do. */ if (fvp == tvp && fromnd.ni_dvp == tdvp && fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen)) error = -1; out: if (!error) { VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); if (fromnd.ni_dvp != tdvp) { VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); } if (tvp) { VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename"); zfree(namei_zone, tond.ni_cnd.cn_pnbuf); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf); if (error == -1) return (0); return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkdir(p, uap) struct proc *p; register struct mkdir_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); nd.ni_cnd.cn_flags |= WILLBEDIR; if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EEXIST); } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (!error) vput(nd.ni_vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir"); return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif /* ARGSUSED */ int rmdir(p, uap) struct proc *p; struct rmdir_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) error = EBUSY; out: if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vput(vp); } ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir"); return (error); } #ifdef COMPAT_43 /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(p, uap) struct proc *p; register struct ogetdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { register struct vnode *vp; struct file *fp; struct uio auio, kuio; struct iovec aiov, kiov; struct dirent *dp, *edp; caddr_t dirbuf; int error, eofflag, readcnt; long loff; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) return (EINVAL); aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; } else # endif { kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; kiov.iov_len = SCARG(uap, count); MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = kuio.uio_offset; if (error == 0) { readcnt = SCARG(uap, count) - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) /* * The expected low byte of * dp->d_namlen is our dp->d_type. * The high MBZ byte of dp->d_namlen * is our dp->d_namlen. */ dp->d_type = dp->d_namlen; dp->d_namlen = 0; # else /* * The dp->d_type is the high byte * of the expected dp->d_namlen, * so must be zero'ed. */ dp->d_type = 0; # endif if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, &auio); } FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { struct vnode *lvp; lvp = union_dircache(vp, p); if (lvp != NULLVP) { struct vattr va; /* * If the directory is opaque, * then don't show lower entries */ error = VOP_GETATTR(vp, &va, fp->f_cred, p); if (va.va_flags & OPAQUE) { vput(lvp); lvp = NULL; } } if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { vput(lvp); return (error); } VOP_UNLOCK(lvp, 0, p); fp->f_data = (caddr_t) lvp; fp->f_offset = 0; error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); vp = lvp; goto unionread; } } } #endif /* UNION */ if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } #endif /* COMPAT_43 */ /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int getdirentries(p, uap) struct proc *p; register struct getdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { register struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; long loff; int error, eofflag; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) return (EINVAL); aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); /* vn_lock(vp, LK_SHARED | LK_RETRY, p); */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { struct vnode *lvp; lvp = union_dircache(vp, p); if (lvp != NULLVP) { struct vattr va; /* * If the directory is opaque, * then don't show lower entries */ error = VOP_GETATTR(vp, &va, fp->f_cred, p); if (va.va_flags & OPAQUE) { vput(lvp); lvp = NULL; } } if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { vput(lvp); return (error); } VOP_UNLOCK(lvp, 0, p); fp->f_data = (caddr_t) lvp; fp->f_offset = 0; error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); vp = lvp; goto unionread; } } } #endif /* UNION */ if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } /* * Set the mode mask for creation of filesystem nodes. */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int umask(p, uap) struct proc *p; struct umask_args /* { syscallarg(int) newmask; } */ *uap; { register struct filedesc *fdp; fdp = p->p_fd; p->p_retval[0] = fdp->fd_cmask; fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; return (0); } /* * Void all references to file by ripping underlying filesystem * away from vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif /* ARGSUSED */ int revoke(p, uap) struct proc *p; register struct revoke_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) goto out; if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser(p->p_ucred, &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) VOP_REVOKE(vp, REVOKEALL); out: vrele(vp); return (error); } /* * Convert a user file descriptor to a kernel file entry. */ int getvnode(fdp, fd, fpp) struct filedesc *fdp; int fd; struct file **fpp; { struct file *fp; if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) return (EINVAL); *fpp = fp; return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getcwd_args { u_char *buf; u_int buflen; }; #endif #define STATNODE(mode, name, var) \ SYSCTL_INT(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); static int disablecwd; SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); int __getcwd(p, uap) struct proc *p; struct __getcwd_args *uap; { char *bp, *buf; int error, i, slash_prefixed; struct filedesc *fdp; struct namecache *ncp; struct vnode *vp; numcwdcalls++; if (disablecwd) return (ENODEV); if (uap->buflen < 2) return (EINVAL); if (uap->buflen > MAXPATHLEN) uap->buflen = MAXPATHLEN; buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); bp += uap->buflen - 1; *bp = '\0'; fdp = p->p_fd; slash_prefixed = 0; for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { if (vp->v_flag & VROOT) { vp = vp->v_mount->mnt_vnodecovered; continue; } if (vp->v_dd->v_id != vp->v_ddid) { numcwdfail1++; free(buf, M_TEMP); return (ENOTDIR); } ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { numcwdfail2++; free(buf, M_TEMP); return (ENOENT); } if (ncp->nc_dvp != vp->v_dd) { numcwdfail3++; free(buf, M_TEMP); return (EBADF); } for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = ncp->nc_name[i]; } if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; slash_prefixed = 1; vp = vp->v_dd; } if (!slash_prefixed) { if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; } numcwdfound++; error = copyout(bp, uap->buf, strlen(bp) + 1); free(buf, M_TEMP); return (error); } Index: head/sys/kern/vfs_subr.c =================================================================== --- head/sys/kern/vfs_subr.c (revision 34265) +++ head/sys/kern/vfs_subr.c (revision 34266) @@ -1,2328 +1,2674 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 - * $Id: vfs_subr.c,v 1.136 1998/03/01 23:07:45 dyson Exp $ + * $Id: vfs_subr.c,v 1.137 1998/03/07 21:35:35 dyson Exp $ */ /* * External virtual filesystem routines */ #include "opt_ddb.h" #include "opt_devfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); static void insmntque __P((struct vnode *vp, struct mount *mp)); #ifdef DDB static void printlockedvnodes __P((void)); #endif static void vclean __P((struct vnode *vp, int flags, struct proc *p)); static void vfree __P((struct vnode *)); static void vgonel __P((struct vnode *vp, struct proc *p)); static unsigned long numvnodes; SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[9] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, }; /* * Insq/Remq for the vnode usage lists. */ #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) #define bufremvn(bp) { \ LIST_REMOVE(bp, b_vnbufs); \ (bp)->b_vnbufs.le_next = NOLIST; \ } static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ struct tobefreelist vnode_tobefree_list; /* vnode free list */ static u_long wantfreevnodes = 25; SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); static u_long freevnodes = 0; SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); int vfs_ioopt = 0; SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); struct mntlist mountlist; /* mounted filesystem list */ struct simplelock mountlist_slock; static struct simplelock mntid_slock; struct simplelock mntvnode_slock; static struct simplelock vnode_free_list_slock; static struct simplelock spechash_slock; struct nfs_public nfs_pub; /* publicly exported FS */ static vm_zone_t vnode_zone; +/* + * The workitem queue. + */ +#define SYNCER_MAXDELAY 32 +int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ +time_t syncdelay = 30; +int rushjob; /* number of slots to run ASAP */ + +static int syncer_delayno = 0; +static long syncer_mask; +LIST_HEAD(synclist, vnode); +static struct synclist *syncer_workitem_pending; + int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); static void vfs_free_addrlist __P((struct netexport *nep)); static int vfs_free_netcred __P((struct radix_node *rn, void *w)); static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, struct export_args *argp)); /* * Initialize the vnode management data structures. */ void vntblinit() { desiredvnodes = maxproc + cnt.v_page_count / 4; simple_lock_init(&mntvnode_slock); simple_lock_init(&mntid_slock); simple_lock_init(&spechash_slock); TAILQ_INIT(&vnode_free_list); TAILQ_INIT(&vnode_tobefree_list); simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); + /* + * Initialize the filesystem syncer. + */ + syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, + &syncer_mask); + syncer_maxdelay = syncer_mask + 1; } /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Interlock is not released on failure. */ int vfs_busy(mp, flags, interlkp, p) struct mount *mp; int flags; struct simplelock *interlkp; struct proc *p; { int lkflags; if (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & LK_NOWAIT) return (ENOENT); mp->mnt_kern_flag |= MNTK_MWAIT; if (interlkp) { simple_unlock(interlkp); } /* * Since all busy locks are shared except the exclusive * lock granted when unmounting, the only place that a * wakeup needs to be done is at the release of the * exclusive lock at the end of dounmount. */ tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); if (interlkp) { simple_lock(interlkp); } return (ENOENT); } lkflags = LK_SHARED | LK_NOPAUSE; if (interlkp) lkflags |= LK_INTERLOCK; if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) panic("vfs_busy: unexpected lock failure"); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(mp, p) struct mount *mp; struct proc *p; { lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); } /* * Lookup a filesystem type, and if found allocate and initialize * a mount structure for it. * * Devname is usually updated by mount(8) after booting. */ int vfs_rootmountalloc(fstypename, devname, mpp) char *fstypename; char *devname; struct mount **mpp; { struct proc *p = curproc; /* XXX */ struct vfsconf *vfsp; struct mount *mp; for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstypename)) break; if (vfsp == NULL) return (ENODEV); mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, p); LIST_INIT(&mp->mnt_vnodelist); mp->mnt_vfc = vfsp; mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_flag = MNT_RDONLY; mp->mnt_vnodecovered = NULLVP; vfsp->vfc_refcount++; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); mp->mnt_stat.f_mntonname[0] = '/'; mp->mnt_stat.f_mntonname[1] = 0; (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); *mpp = mp; return (0); } /* * Find an appropriate filesystem to use for the root. If a filesystem * has not been preselected, walk through the list of known filesystems * trying those that have mountroot routines, and try them until one * works or we have tried them all. */ #ifdef notdef /* XXX JH */ int lite2_vfs_mountroot() { struct vfsconf *vfsp; extern int (*lite2_mountroot) __P((void)); int error; if (lite2_mountroot != NULL) return ((*lite2_mountroot)()); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { if (vfsp->vfc_mountroot == NULL) continue; if ((error = (*vfsp->vfc_mountroot)()) == 0) return (0); printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); } return (ENODEV); } #endif /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { simple_unlock(&mountlist_slock); return (mp); } } simple_unlock(&mountlist_slock); return ((struct mount *) 0); } /* * Get a new unique fsid */ void vfs_getnewfsid(mp) struct mount *mp; { static u_short xxxfs_mntid; fsid_t tfsid; int mtype; simple_lock(&mntid_slock); mtype = mp->mnt_vfc->vfc_typenum; mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) ++xxxfs_mntid; tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { while (vfs_getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; simple_unlock(&mntid_slock); } /* * Set vnode attributes to VNOVAL */ void vattr_null(vap) register struct vattr *vap; { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = vap->va_fsid = vap->va_fileid = vap->va_blocksize = vap->va_rdev = vap->va_atime.tv_sec = vap->va_atime.tv_nsec = vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = vap->va_flags = vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Routines having to do with the management of the vnode table. */ extern vop_t **dead_vnodeop_p; /* * Return the next vnode from the free list. */ int getnewvnode(tag, mp, vops, vpp) enum vtagtype tag; struct mount *mp; vop_t **vops; struct vnode **vpp; { int s; struct proc *p = curproc; /* XXX */ struct vnode *vp, *tvp, *nvp; vm_object_t object; TAILQ_HEAD(freelst, vnode) vnode_tmp_list; /* * We take the least recently used vnode from the freelist * if we can get it and it has no cached pages, and no * namecache entries are relative to it. * Otherwise we allocate a new vnode */ s = splbio(); simple_lock(&vnode_free_list_slock); TAILQ_INIT(&vnode_tmp_list); for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { nvp = TAILQ_NEXT(vp, v_freelist); TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } vp->v_flag &= ~(VTBFREE|VAGE); vp->v_flag |= VFREE; if (vp->v_usecount) panic("tobe free vnode isn't"); freevnodes++; } if (wantfreevnodes && freevnodes < wantfreevnodes) { vp = NULL; } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { /* * XXX: this is only here to be backwards compatible */ vp = NULL; } else { for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { nvp = TAILQ_NEXT(vp, v_freelist); if (!simple_lock_try(&vp->v_interlock)) continue; if (vp->v_usecount) panic("free vnode isn't"); object = vp->v_object; if (object && (object->resident_page_count || object->ref_count)) { printf("object inconsistant state: RPC: %d, RC: %d\n", object->resident_page_count, object->ref_count); /* Don't recycle if it's caching some pages */ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); continue; } else if (LIST_FIRST(&vp->v_cache_src)) { /* Don't recycle if active in the namecache */ simple_unlock(&vp->v_interlock); continue; } else { break; } } } for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { nvp = TAILQ_NEXT(tvp, v_freelist); TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); simple_unlock(&tvp->v_interlock); } if (vp) { vp->v_flag |= VDOOMED; TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; simple_unlock(&vnode_free_list_slock); cache_purge(vp); vp->v_lease = NULL; if (vp->v_type != VBAD) { vgonel(vp, p); } else { simple_unlock(&vp->v_interlock); } #ifdef DIAGNOSTIC { int s; if (vp->v_data) panic("cleaned vnode isn't"); s = splbio(); if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); splx(s); } #endif vp->v_flag = 0; vp->v_lastr = 0; vp->v_lastw = 0; vp->v_lasta = 0; vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; vp->v_writecount = 0; /* XXX */ vp->v_maxio = 0; } else { simple_unlock(&vnode_free_list_slock); vp = (struct vnode *) zalloc(vnode_zone); bzero((char *) vp, sizeof *vp); simple_lock_init(&vp->v_interlock); vp->v_dd = vp; cache_purge(vp); LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); numvnodes++; } vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; insmntque(vp, mp); *vpp = vp; vp->v_usecount = 1; vp->v_data = 0; splx(s); vfs_object_create(vp, p, p->p_ucred, TRUE); return (0); } /* * Move a vnode from one mount queue to another. */ static void insmntque(vp, mp) register struct vnode *vp; register struct mount *mp; { simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) LIST_REMOVE(vp, v_mntvnodes); /* * Insert into list of vnodes for the new mount point, if available. */ if ((vp->v_mount = mp) == NULL) { simple_unlock(&mntvnode_slock); return; } LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); simple_unlock(&mntvnode_slock); } /* * Update outstanding I/O count and do wakeup if requested. */ void vwakeup(bp) register struct buf *bp; { register struct vnode *vp; bp->b_flags &= ~B_WRITEINPROG; if ((vp = bp->b_vp)) { vp->v_numoutput--; if (vp->v_numoutput < 0) panic("vwakeup: neg numoutput"); if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { vp->v_flag &= ~VBWAIT; wakeup((caddr_t) &vp->v_numoutput); } } } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) register struct vnode *vp; int flags; struct ucred *cred; struct proc *p; int slpflag, slptimeo; { register struct buf *bp; struct buf *nbp, *blist; int s, error; vm_object_t object; - if (flags & V_SAVE) { + if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) { if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) return (error); if (vp->v_dirtyblkhd.lh_first != NULL) panic("vinvalbuf: dirty bufs"); } s = splbio(); for (;;) { if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist) break; for (bp = blist; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) continue; if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; error = tsleep((caddr_t) bp, slpflag | (PRIBIO + 1), "vinvalbuf", slptimeo); if (error) { splx(s); return (error); } break; } bremfree(bp); bp->b_flags |= B_BUSY; /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { if (bp->b_vp == vp) { if (bp->b_flags & B_CLUSTEROK) { vfs_bio_awrite(bp); } else { bp->b_flags |= B_ASYNC; VOP_BWRITE(bp); } } else { (void) VOP_BWRITE(bp); } break; } bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); brelse(bp); } } while (vp->v_numoutput > 0) { vp->v_flag |= VBWAIT; tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); } splx(s); /* * Destroy the copy in the VM cache, too. */ simple_lock(&vp->v_interlock); object = vp->v_object; if (object != NULL) { if (flags & V_SAVEMETA) vm_object_page_remove(object, 0, object->size, (flags & V_SAVE) ? TRUE : FALSE); else vm_object_page_remove(object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); } simple_unlock(&vp->v_interlock); if (!(flags & V_SAVEMETA) && (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) panic("vinvalbuf: flush failed"); return (0); } /* * Associate a buffer with a vnode. */ void bgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { int s; #if defined(DIAGNOSTIC) if (bp->b_vp) panic("bgetvp: not free"); #endif vhold(vp); bp->b_vp = vp; if (vp->v_type == VBLK || vp->v_type == VCHR) bp->b_dev = vp->v_rdev; else bp->b_dev = NODEV; /* * Insert onto list for new vnode. */ s = splbio(); bufinsvn(bp, &vp->v_cleanblkhd); splx(s); } /* * Disassociate a buffer from a vnode. */ void brelvp(bp) register struct buf *bp; { struct vnode *vp; int s; #if defined(DIAGNOSTIC) if (bp->b_vp == (struct vnode *) 0) panic("brelvp: NULL"); #endif /* * Delete from old vnode list, if on one. */ + vp = bp->b_vp; s = splbio(); if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); + if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) { + vp->v_flag &= ~VONWORKLST; + LIST_REMOVE(vp, v_synclist); + } splx(s); - - vp = bp->b_vp; bp->b_vp = (struct vnode *) 0; vdrop(vp); } /* + * The workitem queue. + * + * It is useful to delay writes of file data and filesystem metadata + * for tens of seconds so that quickly created and deleted files need + * not waste disk bandwidth being created and removed. To realize this, + * we append vnodes to a "workitem" queue. When running with a soft + * updates implementation, most pending metadata dependencies should + * not wait for more than a few seconds. Thus, mounted on block devices + * are delayed only about a half the time that file data is delayed. + * Similarly, directory updates are more critical, so are only delayed + * about a third the time that file data is delayed. Thus, there are + * SYNCER_MAXDELAY queues that are processed round-robin at a rate of + * one each second (driven off the filesystem syner process). The + * syncer_delayno variable indicates the next queue that is to be processed. + * Items that need to be processed soon are placed in this queue: + * + * syncer_workitem_pending[syncer_delayno] + * + * A delay of fifteen seconds is done by placing the request fifteen + * entries later in the queue: + * + * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] + * + */ + +/* + * Add an item to the syncer work queue. + */ +void +vn_syncer_add_to_worklist(vp, delay) + struct vnode *vp; + int delay; +{ + int s, slot; + + s = splbio(); + + if (vp->v_flag & VONWORKLST) { + LIST_REMOVE(vp, v_synclist); + } + + if (delay > syncer_maxdelay - 2) + delay = syncer_maxdelay - 2; + slot = (syncer_delayno + delay) & syncer_mask; + + LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); + vp->v_flag |= VONWORKLST; + splx(s); +} + +static void sched_sync __P((void)); +static struct proc *updateproc; +static struct kproc_desc up_kp = { + "syncer", + sched_sync, + &updateproc +}; +SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) + +/* + * System filesystem synchronizer daemon. + */ +void +sched_sync(void) +{ + struct synclist *slp; + struct vnode *vp; + long starttime; + int s; + struct proc *p = updateproc; + + for (;;) { + starttime = time.tv_sec; + + /* + * Push files whose dirty time has expired. + */ + s = splbio(); + slp = &syncer_workitem_pending[syncer_delayno]; + syncer_delayno += 1; + if (syncer_delayno == syncer_maxdelay) + syncer_delayno = 0; + splx(s); + + while ((vp = LIST_FIRST(slp)) != NULL) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); + VOP_UNLOCK(vp, 0, p); + if (LIST_FIRST(slp) == vp) { + if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && + vp->v_type != VBLK) + panic("sched_sync: fsync failed"); + /* + * Move ourselves to the back of the sync list. + */ + LIST_REMOVE(vp, v_synclist); + vn_syncer_add_to_worklist(vp, syncdelay); + } + } + + /* + * Do soft update processing. + */ + if (bioops.io_sync) + (*bioops.io_sync)(NULL); + + /* + * The variable rushjob allows the kernel to speed up the + * processing of the filesystem syncer process. A rushjob + * value of N tells the filesystem syncer to process the next + * N seconds worth of work on its queue ASAP. Currently rushjob + * is used by the soft update code to speed up the filesystem + * syncer process when the incore state is getting so far + * ahead of the disk that the kernel memory pool is being + * threatened with exhaustion. + */ + if (rushjob > 0) { + rushjob -= 1; + continue; + } + /* + * If it has taken us less than a second to process the + * current work, then wait. Otherwise start right over + * again. We can still lose time if any single round + * takes more than two seconds, but it does not really + * matter as we are just trying to generally pace the + * filesystem activity. + */ + if (time.tv_sec == starttime) + tsleep(&lbolt, PPAUSE, "syncer", 0); + } +} + +/* * Associate a p-buffer with a vnode. */ void pbgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { #if defined(DIAGNOSTIC) if (bp->b_vp) panic("pbgetvp: not free"); #endif bp->b_vp = vp; if (vp->v_type == VBLK || vp->v_type == VCHR) bp->b_dev = vp->v_rdev; else bp->b_dev = NODEV; } /* * Disassociate a p-buffer from a vnode. */ void pbrelvp(bp) register struct buf *bp; { #if defined(DIAGNOSTIC) if (bp->b_vp == (struct vnode *) 0) panic("pbrelvp: NULL"); #endif bp->b_vp = (struct vnode *) 0; } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { + struct buflists *listheadp; + int delay; int s; if (newvp == NULL) { printf("reassignbuf: NULL"); return; } s = splbio(); /* * Delete from old vnode list, if on one. */ if (bp->b_vnbufs.le_next != NOLIST) { bufremvn(bp); vdrop(bp->b_vp); } /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { struct buf *tbp; - tbp = newvp->v_dirtyblkhd.lh_first; + listheadp = &newvp->v_dirtyblkhd; + if ((newvp->v_flag & VONWORKLST) == 0) { + switch (newvp->v_type) { + case VDIR: + delay = syncdelay / 3; + break; + case VBLK: + if (newvp->v_specmountpoint != NULL) { + delay = syncdelay / 2; + break; + } + /* fall through */ + default: + delay = syncdelay; + } + vn_syncer_add_to_worklist(newvp, delay); + } + tbp = listheadp->lh_first; if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { - bufinsvn(bp, &newvp->v_dirtyblkhd); + bufinsvn(bp, listheadp); } else { while (tbp->b_vnbufs.le_next && - (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { + (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { tbp = tbp->b_vnbufs.le_next; } LIST_INSERT_AFTER(tbp, bp, b_vnbufs); } } else { bufinsvn(bp, &newvp->v_cleanblkhd); + if ((newvp->v_flag & VONWORKLST) && + LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { + newvp->v_flag &= ~VONWORKLST; + LIST_REMOVE(newvp, v_synclist); + } } bp->b_vp = newvp; vhold(bp->b_vp); splx(s); } #ifndef DEVFS_ROOT /* * Create a vnode for a block device. * Used for mounting the root file system. */ int bdevvp(dev, vpp) dev_t dev; struct vnode **vpp; { register struct vnode *vp; struct vnode *nvp; int error; if (dev == NODEV) return (0); error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); if (error) { *vpp = 0; return (error); } vp = nvp; vp->v_type = VBLK; if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { vput(vp); vp = nvp; } *vpp = vp; return (0); } #endif /* !DEVFS_ROOT */ /* * Check to see if the new vnode represents a special device * for which we already have a vnode (either because of * bdevvp() or because of a different vnode representing * the same block device). If such an alias exists, deallocate * the existing contents and return the aliased vnode. The * caller is responsible for filling it with its new contents. */ struct vnode * checkalias(nvp, nvp_rdev, mp) register struct vnode *nvp; dev_t nvp_rdev; struct mount *mp; { struct proc *p = curproc; /* XXX */ struct vnode *vp; struct vnode **vpp; if (nvp->v_type != VBLK && nvp->v_type != VCHR) return (NULLVP); vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: simple_lock(&spechash_slock); for (vp = *vpp; vp; vp = vp->v_specnext) { if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { simple_unlock(&spechash_slock); vgonel(vp, p); goto loop; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { simple_unlock(&spechash_slock); goto loop; } break; } if (vp == NULL || vp->v_tag != VT_NON) { MALLOC(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo), M_VNODE, M_WAITOK); nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; - nvp->v_specflags = 0; + nvp->v_specmountpoint = NULL; simple_unlock(&spechash_slock); *vpp = nvp; if (vp != NULLVP) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } simple_unlock(&spechash_slock); VOP_UNLOCK(vp, 0, p); simple_lock(&vp->v_interlock); vclean(vp, 0, p); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; insmntque(vp, mp); return (vp); } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. The vnode lock bit is set the * vnode is being eliminated in vgone. The process is awakened * when the transition is completed, and an error returned to * indicate that the vnode is no longer usable (possibly having * been changed to a new file system type). */ int vget(vp, flags, p) register struct vnode *vp; int flags; struct proc *p; { int error; /* * If the vnode is in the process of being cleaned out for * another use, we wait for the cleaning to finish and then * return failure. Cleaning is determined by checking that * the VXLOCK flag is set. */ if ((flags & LK_INTERLOCK) == 0) { simple_lock(&vp->v_interlock); } if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vget", 0); return (ENOENT); } vp->v_usecount++; if (VSHOULDBUSY(vp)) vbusy(vp); - if (flags & LK_TYPE_MASK) { if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { /* * must expand vrele here because we do not want * to call VOP_INACTIVE if the reference count * drops back to zero since it was never really * active. We must remove it from the free list * before sleeping so that multiple processes do * not try to recycle it. */ simple_lock(&vp->v_interlock); vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); simple_unlock(&vp->v_interlock); } return (error); } simple_unlock(&vp->v_interlock); return (0); } void vref(struct vnode *vp) { simple_lock(&vp->v_interlock); vp->v_usecount++; simple_unlock(&vp->v_interlock); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(vp) struct vnode *vp; { struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if (vp == NULL) panic("vrele: null vp"); #endif simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; simple_unlock(&vp->v_interlock); return; } if (vp->v_usecount == 1) { vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); /* * If we are doing a vput, the node is already locked, and we must * call VOP_INACTIVE with the node locked. So, in the case of * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. */ if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { VOP_INACTIVE(vp, p); } } else { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); simple_unlock(&vp->v_interlock); #endif panic("vrele: negative ref cnt"); } } void vput(vp) struct vnode *vp; { struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if (vp == NULL) panic("vput: null vp"); #endif simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; VOP_UNLOCK(vp, LK_INTERLOCK, p); return; } if (vp->v_usecount == 1) { vp->v_usecount--; if (VSHOULDFREE(vp)) vfree(vp); /* * If we are doing a vput, the node is already locked, and we must * call VOP_INACTIVE with the node locked. So, in the case of * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. */ simple_unlock(&vp->v_interlock); VOP_INACTIVE(vp, p); } else { #ifdef DIAGNOSTIC vprint("vput: negative ref count", vp); #endif panic("vput: negative ref cnt"); } } /* * Somebody doesn't want the vnode recycled. */ void vhold(vp) register struct vnode *vp; { simple_lock(&vp->v_interlock); vp->v_holdcnt++; if (VSHOULDBUSY(vp)) vbusy(vp); simple_unlock(&vp->v_interlock); } /* * One less who cares about this vnode. */ void vdrop(vp) register struct vnode *vp; { simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) - panic("holdrele: holdcnt"); + panic("vdrop: holdcnt"); vp->v_holdcnt--; if (VSHOULDFREE(vp)) vfree(vp); simple_unlock(&vp->v_interlock); } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If MNT_NOFORCE is specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If MNT_FORCE is specified, detach any active vnodes * that are found. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); #endif int vflush(mp, skipvp, flags) struct mount *mp; struct vnode *skipvp; int flags; { struct proc *p = curproc; /* XXX */ struct vnode *vp, *nvp; int busy = 0; simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { /* * Make sure this vnode wasn't reclaimed in getnewvnode(). * Start over if it has (it won't be on the list anymore). */ if (vp->v_mount != mp) goto loop; nvp = vp->v_mntvnodes.le_next; /* * Skip over a selected vnode. */ if (vp == skipvp) continue; simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { simple_unlock(&vp->v_interlock); continue; } /* * If WRITECLOSE is set, only flush out regular file vnodes * open for writing. */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) { simple_unlock(&vp->v_interlock); continue; } /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. */ if (vp->v_usecount == 0) { simple_unlock(&mntvnode_slock); vgonel(vp, p); simple_lock(&mntvnode_slock); continue; } /* * If FORCECLOSE is set, forcibly close the vnode. For block * or character devices, revert to an anonymous device. For * all other files, just kill them. */ if (flags & FORCECLOSE) { simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { vgonel(vp, p); } else { vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } simple_lock(&mntvnode_slock); continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif simple_unlock(&vp->v_interlock); busy++; } simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); } /* * Disassociate the underlying file system from a vnode. */ static void vclean(vp, flags, p) struct vnode *vp; int flags; struct proc *p; { int active; vm_object_t obj; /* * Check to see if the vnode is in use. If so we have to reference it * before we clean it out so that its count cannot fall to zero and * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) vp->v_usecount++; /* * Prevent the vnode from being recycled or brought into use while we * clean it out. */ if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; /* * Even if the count is zero, the VOP_INACTIVE routine may still * have the object locked while it cleans it out. The VOP_LOCK * ensures that the VOP_INACTIVE routine is done with its work. * For active vnodes, it ensures that no other activity can * occur while the underlying object is being cleaned out. */ VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); /* * Clean out any buffers associated with the vnode. */ vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); if (obj = vp->v_object) { if (obj->ref_count == 0) { /* * This is a normal way of shutting down the object/vnode * association. */ vm_object_terminate(obj); } else { /* * Woe to the process that tries to page now :-). */ vm_pager_deallocate(obj); } } /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. Note that the * VOP_INACTIVE will unlock the vnode. */ if (active) { if (flags & DOCLOSE) VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); VOP_INACTIVE(vp, p); } else { /* * Any other processes trying to obtain this lock must first * wait for VXLOCK to clear, then call the new lock operation. */ VOP_UNLOCK(vp, 0, p); } /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); if (active) vrele(vp); cache_purge(vp); if (vp->v_vnlock) { #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ #ifdef DIAGNOSTIC if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) vprint("vclean: lock not drained", vp); #endif #endif FREE(vp->v_vnlock, M_VNODE); vp->v_vnlock = NULL; } if (VSHOULDFREE(vp)) vfree(vp); /* * Done with purge, notify sleepers of the grim news. */ vp->v_op = dead_vnodeop_p; vn_pollgone(vp); vp->v_tag = VT_NON; vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; wakeup((caddr_t) vp); } } /* * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ int vop_revoke(ap) struct vop_revoke_args /* { struct vnode *a_vp; int a_flags; } */ *ap; { struct vnode *vp, *vq; struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if ((ap->a_flags & REVOKEALL) == 0) panic("vop_revoke"); #endif vp = ap->a_vp; simple_lock(&vp->v_interlock); if (vp->v_flag & VALIASED) { /* * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); return (0); } /* * Ensure that vp will not be vgone'd while we * are eliminating its aliases. */ vp->v_flag |= VXLOCK; simple_unlock(&vp->v_interlock); while (vp->v_flag & VALIASED) { simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; simple_unlock(&spechash_slock); vgone(vq); break; } if (vq == NULLVP) { simple_unlock(&spechash_slock); } } /* * Remove the lock so that vgone below will * really eliminate the vnode after which time * vgone will awaken any sleepers. */ simple_lock(&vp->v_interlock); vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; wakeup(vp); } } vgonel(vp, p); return (0); } /* * Recycle an unused vnode to the front of the free list. * Release the passed interlock if the vnode will be recycled. */ int vrecycle(vp, inter_lkp, p) struct vnode *vp; struct simplelock *inter_lkp; struct proc *p; { simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { if (inter_lkp) { simple_unlock(inter_lkp); } vgonel(vp, p); return (1); } simple_unlock(&vp->v_interlock); return (0); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(vp) register struct vnode *vp; { struct proc *p = curproc; /* XXX */ simple_lock(&vp->v_interlock); vgonel(vp, p); } /* * vgone, with the vp interlock held. */ static void vgonel(vp, p) struct vnode *vp; struct proc *p; { int s; struct vnode *vq; struct vnode *vx; /* * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } /* * Clean out the filesystem specific data. */ vclean(vp, DOCLOSE, p); simple_lock(&vp->v_interlock); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) insmntque(vp, (struct mount *)0); /* * If special device, remove it from special device alias list * if it is on one. */ if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { simple_lock(&spechash_slock); if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_specnext != vp) continue; vq->v_specnext = vp->v_specnext; break; } if (vq == NULL) panic("missing bdev"); } if (vp->v_flag & VALIASED) { vx = NULL; for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; if (vx) break; vx = vq; } if (vx == NULL) panic("missing alias"); if (vq == NULL) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } simple_unlock(&spechash_slock); FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } /* * If it is on the freelist and not already at the head, * move it to the head of the list. The test of the back * pointer and the reference count of zero is because * it will be removed from the free list by getnewvnode, * but will not have its reference count incremented until * after calling vgone. If the reference count were * incremented first, vgone would (incorrectly) try to * close the previous instance of the underlying object. */ if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { s = splbio(); simple_lock(&vnode_free_list_slock); if (vp->v_flag & VFREE) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); } else if (vp->v_flag & VTBFREE) { TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); vp->v_flag &= ~VTBFREE; freevnodes++; } else freevnodes++; vp->v_flag |= VFREE; TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); simple_unlock(&vnode_free_list_slock); splx(s); } vp->v_type = VBAD; simple_unlock(&vp->v_interlock); } /* * Lookup a vnode by device number. */ int vfinddev(dev, type, vpp) dev_t dev; enum vtype type; struct vnode **vpp; { register struct vnode *vp; int rc = 0; simple_lock(&spechash_slock); for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; *vpp = vp; rc = 1; break; } simple_unlock(&spechash_slock); return (rc); } /* * Calculate the total number of references to a special device. */ int vcount(vp) register struct vnode *vp; { struct vnode *vq, *vnext; int count; loop: if ((vp->v_flag & VALIASED) == 0) return (vp->v_usecount); simple_lock(&spechash_slock); for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { vnext = vq->v_specnext; if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ if (vq->v_usecount == 0 && vq != vp) { simple_unlock(&spechash_slock); vgone(vq); goto loop; } count += vq->v_usecount; } simple_unlock(&spechash_slock); return (count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; void vprint(label, vp) char *label; register struct vnode *vp; { char buf[64]; if (label != NULL) printf("%s: %x: ", label, vp); else printf("%x: ", vp); printf("type %s, usecount %d, writecount %d, refcount %ld,", typename[vp->v_type], vp->v_usecount, vp->v_writecount, vp->v_holdcnt); buf[0] = '\0'; if (vp->v_flag & VROOT) strcat(buf, "|VROOT"); if (vp->v_flag & VTEXT) strcat(buf, "|VTEXT"); if (vp->v_flag & VSYSTEM) strcat(buf, "|VSYSTEM"); if (vp->v_flag & VXLOCK) strcat(buf, "|VXLOCK"); if (vp->v_flag & VXWANT) strcat(buf, "|VXWANT"); if (vp->v_flag & VBWAIT) strcat(buf, "|VBWAIT"); if (vp->v_flag & VALIASED) strcat(buf, "|VALIASED"); if (vp->v_flag & VDOOMED) strcat(buf, "|VDOOMED"); if (vp->v_flag & VFREE) strcat(buf, "|VFREE"); if (vp->v_flag & VOBJBUF) strcat(buf, "|VOBJBUF"); if (buf[0] != '\0') printf(" flags (%s)", &buf[1]); if (vp->v_data == NULL) { printf("\n"); } else { printf("\n\t"); VOP_PRINT(vp); } } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ static void printlockedvnodes() { struct proc *p = curproc; /* XXX */ struct mount *mp, *nmp; struct vnode *vp; printf("Locked vnodes\n"); simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = vp->v_mntvnodes.le_next) { if (VOP_ISLOCKED(vp)) vprint((char *)0, vp); } simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); } #endif /* * Top level filesystem related information gathering. */ static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); static int vfs_sysctl SYSCTL_HANDLER_ARGS { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; #ifndef NO_COMPAT_PRELITE2 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif #ifdef notyet /* all sysctl names at this level are at least name and field */ if (namelen < 2) return (ENOTDIR); /* overloaded */ if (name[0] != VFS_GENERIC) { for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[0]) break; if (vfsp == NULL) return (EOPNOTSUPP); return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, oldp, oldlenp, newp, newlen, p)); } #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[2]) break; if (vfsp == NULL) return (EOPNOTSUPP); return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); } return (EOPNOTSUPP); } SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, "Generic filesystem"); #ifndef NO_COMPAT_PRELITE2 static int sysctl_ovfs_conf SYSCTL_HANDLER_ARGS { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error) return error; } return 0; } #endif /* !NO_COMPAT_PRELITE2 */ static volatile int kinfo_vdebug = 1; #if 0 #define KINFO_VNODESLOP 10 /* * Dump vnode list (via sysctl). * Copyout address of vnode followed by vnode. */ /* ARGSUSED */ static int sysctl_vnode SYSCTL_HANDLER_ARGS { struct proc *p = curproc; /* XXX */ struct mount *mp, *nmp; struct vnode *nvp, *vp; int error; #define VPTRSZ sizeof (struct vnode *) #define VNODESZ sizeof (struct vnode) req->lock = 0; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } again: simple_lock(&mntvnode_slock); for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { /* * Check that the vp is still associated with * this filesystem. RACE: could have been * recycled onto the same filesystem. */ if (vp->v_mount != mp) { simple_unlock(&mntvnode_slock); if (kinfo_vdebug) printf("kinfo: vp changed\n"); goto again; } nvp = vp->v_mntvnodes.le_next; simple_unlock(&mntvnode_slock); if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) return (error); simple_lock(&mntvnode_slock); } simple_unlock(&mntvnode_slock); simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); return (0); } #endif /* * XXX * Exporting the vnode list on large systems causes them to crash. * Exporting the vnode list on medium systems causes sysctl to coredump. */ #if 0 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_vnode, "S,vnode", ""); #endif /* * Check to see if a filesystem is mounted on a block device. */ int vfs_mountedon(vp) struct vnode *vp; { struct vnode *vq; int error = 0; - if (vp->v_specflags & SI_MOUNTEDON) + if (vp->v_specmountpoint != NULL) return (EBUSY); if (vp->v_flag & VALIASED) { simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) { + if (vq->v_specmountpoint != NULL) { error = EBUSY; break; } } simple_unlock(&spechash_slock); } return (error); } /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall() { struct mount *mp, *nmp; struct proc *p = initproc; /* XXX XXX should this be proc0? */ int error; /* * Since this only runs when rebooting, it is not interlocked. */ for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { nmp = mp->mnt_list.cqe_prev; error = dounmount(mp, MNT_FORCE, p); if (error) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } } /* * Build hash lists of net addresses and hang them off the mount point. * Called by ufs_mount() to set up the lists of export addresses. */ static int vfs_hang_addrlist(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { register struct netcred *np; register struct radix_node_head *rnh; register int i; struct radix_node *rn; struct sockaddr *saddr, *smask = 0; struct domain *dom; int error; if (argp->ex_addrlen == 0) { if (mp->mnt_flag & MNT_DEFEXPORTED) return (EPERM); np = &nep->ne_defexported; np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; mp->mnt_flag |= MNT_DEFEXPORTED; return (0); } i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); bzero((caddr_t) np, i); saddr = (struct sockaddr *) (np + 1); if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) goto out; if (saddr->sa_len > argp->ex_addrlen) saddr->sa_len = argp->ex_addrlen; if (argp->ex_masklen) { smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); if (error) goto out; if (smask->sa_len > argp->ex_masklen) smask->sa_len = argp->ex_masklen; } i = saddr->sa_family; if ((rnh = nep->ne_rtable[i]) == 0) { /* * Seems silly to initialize every AF when most are not used, * do so on demand here */ for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_family == i && dom->dom_rtattach) { dom->dom_rtattach((void **) &nep->ne_rtable[i], dom->dom_rtoffset); break; } if ((rnh = nep->ne_rtable[i]) == 0) { error = ENOBUFS; goto out; } } rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, np->netc_rnodes); if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ error = EPERM; goto out; } np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; return (0); out: free(np, M_NETADDR); return (error); } /* ARGSUSED */ static int vfs_free_netcred(rn, w) struct radix_node *rn; void *w; { register struct radix_node_head *rnh = (struct radix_node_head *) w; (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); free((caddr_t) rn, M_NETADDR); return (0); } /* * Free the net address hash lists that are hanging off the mount points. */ static void vfs_free_addrlist(nep) struct netexport *nep; { register int i; register struct radix_node_head *rnh; for (i = 0; i <= AF_MAX; i++) if ((rnh = nep->ne_rtable[i])) { (*rnh->rnh_walktree) (rnh, vfs_free_netcred, (caddr_t) rnh); free((caddr_t) rnh, M_RTABLE); nep->ne_rtable[i] = 0; } } int vfs_export(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; if (argp->ex_flags & MNT_DELEXPORT) { if (mp->mnt_flag & MNT_EXPUBLIC) { vfs_setpublicfs(NULL, NULL, NULL); mp->mnt_flag &= ~MNT_EXPUBLIC; } vfs_free_addrlist(nep); mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); } if (argp->ex_flags & MNT_EXPORTED) { if (argp->ex_flags & MNT_EXPUBLIC) { if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) return (error); mp->mnt_flag |= MNT_EXPUBLIC; } if ((error = vfs_hang_addrlist(mp, nep, argp))) return (error); mp->mnt_flag |= MNT_EXPORTED; } return (0); } /* * Set the publicly exported filesystem (WebNFS). Currently, only * one public filesystem is possible in the spec (RFC 2054 and 2055) */ int vfs_setpublicfs(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; struct vnode *rvp; char *cp; /* * mp == NULL -> invalidate the current info, the FS is * no longer exported. May be called from either vfs_export * or unmount, so check if it hasn't already been done. */ if (mp == NULL) { if (nfs_pub.np_valid) { nfs_pub.np_valid = 0; if (nfs_pub.np_index != NULL) { FREE(nfs_pub.np_index, M_TEMP); nfs_pub.np_index = NULL; } } return (0); } /* * Only one allowed at a time. */ if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) return (EBUSY); /* * Get real filehandle for root of exported FS. */ bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; if ((error = VFS_ROOT(mp, &rvp))) return (error); if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) return (error); vput(rvp); /* * If an indexfile was specified, pull it in. */ if (argp->ex_indexfile != NULL) { MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, M_WAITOK); error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, MAXNAMLEN, (size_t *)0); if (!error) { /* * Check for illegal filenames. */ for (cp = nfs_pub.np_index; *cp; cp++) { if (*cp == '/') { error = EINVAL; break; } } } if (error) { FREE(nfs_pub.np_index, M_TEMP); return (error); } } nfs_pub.np_mount = mp; nfs_pub.np_valid = 1; return (0); } struct netcred * vfs_export_lookup(mp, nep, nam) register struct mount *mp; struct netexport *nep; struct sockaddr *nam; { register struct netcred *np; register struct radix_node_head *rnh; struct sockaddr *saddr; np = NULL; if (mp->mnt_flag & MNT_EXPORTED) { /* * Lookup in the export list first. */ if (nam != NULL) { saddr = nam; rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } } /* * If no address match, use the default if it exists. */ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) np = &nep->ne_defexported; } return (np); } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *nvp; int anyio, tries; tries = 5; loop: anyio = 0; for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { nvp = vp->v_mntvnodes.le_next; if (vp->v_mount != mp) { goto loop; } if ((vp->v_flag & VXLOCK) || (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) { continue; } simple_lock(&vp->v_interlock); if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { if (!vget(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0, TRUE); anyio = 1; } vput(vp); } } else { simple_unlock(&vp->v_interlock); } } if (anyio && (--tries > 0)) goto loop; } /* * Create the VM object needed for VMIO and mmap support. This * is done for all VREG files in the system. Some filesystems might * afford the additional metadata buffering capability of the * VMIO code by making the device node be VMIO mode also. * * If !waslocked, must be called with interlock. */ int vfs_object_create(vp, p, cred, waslocked) struct vnode *vp; struct proc *p; struct ucred *cred; int waslocked; { struct vattr vat; vm_object_t object; int error = 0; if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { if (!waslocked) simple_unlock(&vp->v_interlock); return 0; } if (!waslocked) vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); retry: if ((object = vp->v_object) == NULL) { if (vp->v_type == VREG) { if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) goto retn; object = vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); } else if (major(vp->v_rdev) < nblkdev) { /* * This simply allocates the biggest object possible * for a VBLK vnode. This should be fixed, but doesn't * cause any problems (yet). */ object = vnode_pager_alloc(vp, INT_MAX, 0, 0); } object->ref_count--; vp->v_usecount--; } else { if (object->flags & OBJ_DEAD) { VOP_UNLOCK(vp, 0, p); tsleep(object, PVM, "vodead", 0); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } } if (vp->v_object) { vp->v_flag |= VOBJBUF; } retn: if (!waslocked) { simple_lock(&vp->v_interlock); VOP_UNLOCK(vp, LK_INTERLOCK, p); } return error; } static void vfree(vp) struct vnode *vp; { int s; s = splbio(); simple_lock(&vnode_free_list_slock); if (vp->v_flag & VTBFREE) { TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); vp->v_flag &= ~VTBFREE; } if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; simple_unlock(&vnode_free_list_slock); vp->v_flag &= ~VAGE; vp->v_flag |= VFREE; splx(s); } void vbusy(vp) struct vnode *vp; { int s; s = splbio(); simple_lock(&vnode_free_list_slock); if (vp->v_flag & VTBFREE) { TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); vp->v_flag &= ~VTBFREE; } else { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; } simple_unlock(&vnode_free_list_slock); vp->v_flag &= ~(VFREE|VAGE); splx(s); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(vp, p, events) struct vnode *vp; struct proc *p; short events; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo.vpi_revents; vp->v_pollinfo.vpi_revents &= ~events; simple_unlock(&vp->v_pollinfo.vpi_lock); return events; } vp->v_pollinfo.vpi_events |= events; selrecord(p, &vp->v_pollinfo.vpi_selinfo); simple_unlock(&vp->v_pollinfo.vpi_lock); return 0; } /* * Note the occurrence of an event. If the VN_POLLEVENT macro is used, * it is possible for us to miss an event due to race conditions, but * that condition is expected to be rare, so for the moment it is the * preferred interface. */ void vn_pollevent(vp, events) struct vnode *vp; short events; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_events & events) { /* * We clear vpi_events so that we don't * call selwakeup() twice if two events are * posted before the polling process(es) is * awakened. This also ensures that we take at * most one selwakeup() if the polling process * is no longer interested. However, it does * mean that only one event can be noticed at * a time. (Perhaps we should only clear those * event bits which we note?) XXX */ vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ vp->v_pollinfo.vpi_revents |= events; selwakeup(&vp->v_pollinfo.vpi_selinfo); } simple_unlock(&vp->v_pollinfo.vpi_lock); } /* * Wake up anyone polling on vp because it is being revoked. * This depends on dead_poll() returning POLLHUP for correct * behavior. */ void vn_pollgone(vp) struct vnode *vp; { simple_lock(&vp->v_pollinfo.vpi_lock); if (vp->v_pollinfo.vpi_events) { vp->v_pollinfo.vpi_events = 0; selwakeup(&vp->v_pollinfo.vpi_selinfo); } simple_unlock(&vp->v_pollinfo.vpi_lock); +} + + + +/* + * Routine to create and manage a filesystem syncer vnode. + */ +#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) +int sync_fsync __P((struct vop_fsync_args *)); +int sync_inactive __P((struct vop_inactive_args *)); +int sync_reclaim __P((struct vop_reclaim_args *)); +#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) +#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) +int sync_print __P((struct vop_print_args *)); +#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) + +vop_t **sync_vnodeop_p; +struct vnodeopv_entry_desc sync_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_eopnotsupp }, + { &vop_close_desc, (vop_t *) sync_close }, /* close */ + { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ + { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ + { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ + { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ + { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ + { &vop_print_desc, (vop_t *) sync_print }, /* print */ + { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ + { NULL, NULL } +}; +struct vnodeopv_desc sync_vnodeop_opv_desc = + { &sync_vnodeop_p, sync_vnodeop_entries }; + +VNODEOP_SET(sync_vnodeop_opv_desc); + +/* + * Create a new filesystem syncer vnode for the specified mount point. + */ +int +vfs_allocate_syncvnode(mp) + struct mount *mp; +{ + struct vnode *vp; + static long start, incr, next; + int error; + + /* Allocate a new vnode */ + if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { + mp->mnt_syncer = NULL; + return (error); + } + vp->v_type = VNON; + /* + * Place the vnode onto the syncer worklist. We attempt to + * scatter them about on the list so that they will go off + * at evenly distributed times even if all the filesystems + * are mounted at once. + */ + next += incr; + if (next == 0 || next > syncer_maxdelay) { + start /= 2; + incr /= 2; + if (start == 0) { + start = syncer_maxdelay / 2; + incr = syncer_maxdelay; + } + next = start; + } + vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); + mp->mnt_syncer = vp; + return (0); +} + +/* + * Do a lazy sync of the filesystem. + */ +int +sync_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + struct vnode *syncvp = ap->a_vp; + struct mount *mp = syncvp->v_mount; + struct proc *p = ap->a_p; + int asyncflag; + + /* + * We only need to do something if this is a lazy evaluation. + */ + if (ap->a_waitfor != MNT_LAZY) + return (0); + + /* + * Move ourselves to the back of the sync list. + */ + vn_syncer_add_to_worklist(syncvp, syncdelay); + + /* + * Walk the list of vnodes pushing all that are dirty and + * not already on the sync list. + */ + simple_lock(&mountlist_slock); + if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) + return (0); + asyncflag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &= ~MNT_ASYNC; + VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); + if (asyncflag) + mp->mnt_flag |= MNT_ASYNC; + vfs_unbusy(mp, p); + return (0); +} + +/* + * The syncer vnode is no referenced. + */ +int +sync_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + + vgone(ap->a_vp); + return (0); +} + +/* + * The syncer vnode is no longer needed and is being decommissioned. + */ +int +sync_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + vp->v_mount->mnt_syncer = NULL; + if (vp->v_flag & VONWORKLST) { + LIST_REMOVE(vp, v_synclist); + vp->v_flag &= ~VONWORKLST; + } + + return (0); +} + +/* + * Print out a syncer vnode. + */ +int +sync_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + printf("syncer vnode"); + if (vp->v_vnlock != NULL) + lockmgr_printinfo(vp->v_vnlock); + printf("\n"); + return (0); } Index: head/sys/kern/vfs_syscalls.c =================================================================== --- head/sys/kern/vfs_syscalls.c (revision 34265) +++ head/sys/kern/vfs_syscalls.c (revision 34266) @@ -1,2826 +1,2841 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 - * $Id: vfs_syscalls.c,v 1.93 1998/02/15 04:17:09 dyson Exp $ + * $Id: vfs_syscalls.c,v 1.94 1998/03/07 21:35:39 dyson Exp $ */ /* For 4.3 integer FS ID compatibility */ #include "opt_compat.h" /* * XXX - The following is required because of some magic done * in getdirentries() below which is only done if the translucent * filesystem `UNION' is compiled into the kernel. This is broken, * but I don't have time to study the code deeply enough to understand * what's going on and determine an appropriate fix. -GAW */ #include "opt_union.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef UNION #include #endif #include #include #include #include #include static int change_dir __P((struct nameidata *ndp, struct proc *p)); static void checkdirs __P((struct vnode *olddp)); static int usermount = 0; /* if 1, non-root can mount fs. */ SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); /* * Virtual File System System Calls */ /* * Mount a file system. */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { char *type; char *path; int flags; caddr_t data; }; #endif /* ARGSUSED */ int mount(p, uap) struct proc *p; register struct mount_args /* { syscallarg(char *) type; syscallarg(char *) path; syscallarg(int) flags; syscallarg(caddr_t) data; } */ *uap; { struct vnode *vp; struct mount *mp; struct vfsconf *vfsp; int error, flag = 0, flag2 = 0; struct vattr va; u_long fstypenum; struct nameidata nd; char fstypename[MFSNAMELEN]; if (usermount == 0 && (error = suser(p->p_ucred, &p->p_acflag))) return (error); /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (SCARG(uap, flags) & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } mp = vp->v_mount; flag = mp->mnt_flag; flag2 = mp->mnt_kern_flag; /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ if ((SCARG(uap, flags) & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } mp->mnt_flag |= SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); /* * Only root, or the user that did the original mount is * permitted to update it. */ if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* * Do not allow NFS export by non-root users. Silently * enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (p->p_ucred->cr_uid != 0) { if (SCARG(uap, flags) & MNT_EXPORTED) { vput(vp); return (EPERM); } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } if (vfs_busy(mp, LK_NOWAIT, 0, p)) { vput(vp); return (EBUSY); } VOP_UNLOCK(vp, 0, p); goto update; } /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) || (va.va_uid != p->p_ucred->cr_uid && (error = suser(p->p_ucred, &p->p_acflag)))) { vput(vp); return (error); } /* * Do not allow NFS export by non-root users. Silently * enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (p->p_ucred->cr_uid != 0) { if (SCARG(uap, flags) & MNT_EXPORTED) { vput(vp); return (EPERM); } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) return (error); if (vp->v_type != VDIR) { vput(vp); return (ENOTDIR); } #ifdef COMPAT_43 /* * Historically filesystem types were identified by number. If we * get an integer for the filesystem type instead of a string, we * check to see if it matches one of the historic filesystem types. */ fstypenum = (u_long)SCARG(uap, type); if (fstypenum < maxvfsconf) { for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == fstypenum) break; if (vfsp == NULL) { vput(vp); return (ENODEV); } strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); } else #endif /* COMPAT_43 */ if (error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) { vput(vp); return (error); } for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstypename)) break; if (vfsp == NULL) { vput(vp); return (ENODEV); } if (vp->v_mountedhere != NULL) { vput(vp); return (EBUSY); } /* * Allocate and initialize the filesystem. */ mp = (struct mount *)malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, p); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; vfsp->vfc_refcount++; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_stat.f_owner = p->p_ucred->cr_uid; update: /* * Set the mount level flags. */ if (SCARG(uap, flags) & MNT_RDONLY) mp->mnt_flag |= MNT_RDONLY; else if (mp->mnt_flag & MNT_RDONLY) mp->mnt_kern_flag |= MNTK_WANTRDWR; mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); /* * Mount the filesystem. */ error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p); if (mp->mnt_flag & MNT_UPDATE) { vrele(vp); if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) { mp->mnt_flag = flag; mp->mnt_kern_flag = flag2; } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } vfs_unbusy(mp, p); return (error); } /* * Put the new filesystem on the mount list after root. */ cache_purge(vp); if (!error) { simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); simple_unlock(&mountlist_slock); checkdirs(vp); VOP_UNLOCK(vp, 0, p); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); vfs_unbusy(mp, p); if (error = VFS_START(mp, 0, p)) vrele(vp); } else { mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp, p); free((caddr_t)mp, M_MOUNT); vput(vp); } return (error); } /* * Scan all active processes to see if any of them have a current * or root directory onto which the new filesystem has just been * mounted. If so, replace them with the new mount point. */ static void checkdirs(olddp) struct vnode *olddp; { struct filedesc *fdp; struct vnode *newdp; struct proc *p; if (olddp->v_usecount == 1) return; if (VFS_ROOT(olddp->v_mountedhere, &newdp)) panic("mount: lost mount"); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { fdp = p->p_fd; if (fdp->fd_cdir == olddp) { vrele(fdp->fd_cdir); VREF(newdp); fdp->fd_cdir = newdp; } if (fdp->fd_rdir == olddp) { vrele(fdp->fd_rdir); VREF(newdp); fdp->fd_rdir = newdp; } } if (rootvnode == olddp) { vrele(rootvnode); VREF(newdp); rootvnode = newdp; } vput(newdp); } /* * Unmount a file system. * * Note: unmount takes a path to the vnode mounted on as argument, * not special file (as before). */ #ifndef _SYS_SYSPROTO_H_ struct unmount_args { char *path; int flags; }; #endif /* ARGSUSED */ int unmount(p, uap) struct proc *p; register struct unmount_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct vnode *vp; struct mount *mp; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; mp = vp->v_mount; /* * Only root, or the user that did the original mount is * permitted to unmount this filesystem. */ if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* * Don't allow unmounting the root file system. */ if (mp->mnt_flag & MNT_ROOTFS) { vput(vp); return (EINVAL); } /* * Must be the root of the filesystem */ if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } vput(vp); return (dounmount(mp, SCARG(uap, flags), p)); } /* * Do the actual file system unmount. */ int dounmount(mp, flags, p) register struct mount *mp; int flags; struct proc *p; { struct vnode *coveredvp; int error; simple_lock(&mountlist_slock); mp->mnt_kern_flag |= MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); vfs_msync(mp, MNT_WAIT); mp->mnt_flag &=~ MNT_ASYNC; cache_purgevfs(mp); /* remove cache entries for this file sys */ + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, p); simple_lock(&mountlist_slock); if (error) { + if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) + (void) vfs_allocate_syncvnode(mp); mp->mnt_kern_flag &= ~MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, &mountlist_slock, p); return (error); } CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { coveredvp->v_mountedhere = (struct mount *)0; vrele(coveredvp); } mp->mnt_vfc->vfc_refcount--; if (mp->mnt_vnodelist.lh_first != NULL) panic("unmount: dangling vnode"); lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); free((caddr_t)mp, M_MOUNT); return (0); } /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif #ifdef DEBUG static int syncprt = 0; SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); #endif /* ARGSUSED */ int sync(p, uap) struct proc *p; struct sync_args *uap; { register struct mount *mp, *nmp; int asyncflag; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); - VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p); - if (asyncflag) - mp->mnt_flag |= MNT_ASYNC; + VFS_SYNC(mp, MNT_NOWAIT, + ((p != NULL) ? p->p_ucred : NOCRED), p); + mp->mnt_flag |= asyncflag; } simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); #if 0 /* * XXX don't call vfs_bufstats() yet because that routine * was not imported in the Lite2 merge. */ #ifdef DIAGNOSTIC if (syncprt) vfs_bufstats(); #endif /* DIAGNOSTIC */ #endif return (0); } /* * Change filesystem quotas. */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif /* ARGSUSED */ int quotactl(p, uap) struct proc *p; register struct quotactl_args /* { syscallarg(char *) path; syscallarg(int) cmd; syscallarg(int) uid; syscallarg(caddr_t) arg; } */ *uap; { register struct mount *mp; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; vrele(nd.ni_vp); return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), SCARG(uap, arg), p)); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif /* ARGSUSED */ int statfs(p, uap) struct proc *p; register struct statfs_args /* { syscallarg(char *) path; syscallarg(struct statfs *) buf; } */ *uap; { register struct mount *mp; register struct statfs *sp; int error; struct nameidata nd; struct statfs sb; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; sp = &mp->mnt_stat; vrele(nd.ni_vp); error = VFS_STATFS(mp, sp, p); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (p->p_ucred->cr_uid != 0) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif /* ARGSUSED */ int fstatfs(p, uap) struct proc *p; register struct fstatfs_args /* { syscallarg(int) fd; syscallarg(struct statfs *) buf; } */ *uap; { struct file *fp; struct mount *mp; register struct statfs *sp; int error; struct statfs sb; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; sp = &mp->mnt_stat; error = VFS_STATFS(mp, sp, p); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (p->p_ucred->cr_uid != 0) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int flags; }; #endif int getfsstat(p, uap) struct proc *p; register struct getfsstat_args /* { syscallarg(struct statfs *) buf; syscallarg(long) bufsize; syscallarg(int) flags; } */ *uap; { register struct mount *mp, *nmp; register struct statfs *sp; caddr_t sfsp; long count, maxcount, error; maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); sfsp = (caddr_t)SCARG(uap, buf); count = 0; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + * If MNT_NOWAIT or MNT_LAZY is specified, do not + * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY + * overrides MNT_WAIT. */ - if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 || + if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, p))) { simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); continue; } sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = copyout((caddr_t)sp, sfsp, sizeof(*sp)); if (error) { vfs_unbusy(mp, p); return (error); } sfsp += sizeof(*sp); } count++; simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); if (sfsp && count > maxcount) p->p_retval[0] = maxcount; else p->p_retval[0] = count; return (0); } /* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif /* ARGSUSED */ int fchdir(p, uap) struct proc *p; struct fchdir_args /* { syscallarg(int) fd; } */ *uap; { register struct filedesc *fdp = p->p_fd; struct vnode *vp, *tdp; struct mount *mp; struct file *fp; int error; if (error = getvnode(fdp, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); while (!error && (mp = vp->v_mountedhere) != NULL) { if (vfs_busy(mp, 0, 0, p)) continue; error = VFS_ROOT(mp, &tdp); vfs_unbusy(mp, p); if (error) break; vput(vp); vp = tdp; } if (error) { vput(vp); return (error); } VOP_UNLOCK(vp, 0, p); vrele(fdp->fd_cdir); fdp->fd_cdir = vp; return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif /* ARGSUSED */ int chdir(p, uap) struct proc *p; struct chdir_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_cdir); fdp->fd_cdir = nd.ni_vp; return (0); } /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif /* ARGSUSED */ int chroot(p, uap) struct proc *p; struct chroot_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_rdir); fdp->fd_rdir = nd.ni_vp; return (0); } /* * Common routine for chroot and chdir. */ static int change_dir(ndp, p) register struct nameidata *ndp; struct proc *p; { struct vnode *vp; int error; error = namei(ndp); if (error) return (error); vp = ndp->ni_vp; if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); if (error) vput(vp); else VOP_UNLOCK(vp, 0, p); return (error); } /* * Check permissions, allocate an open file structure, * and call the device open routine if any. */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int open(p, uap) struct proc *p; register struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register struct vnode *vp; int cmode, flags, oflags; struct file *nfp; int type, indx, error; struct flock lf; struct nameidata nd; oflags = SCARG(uap, flags); if ((oflags & O_ACCMODE) == O_ACCMODE) return (EINVAL); flags = FFLAGS(oflags); error = falloc(p, &nfp, &indx); if (error) return (error); fp = nfp; cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ error = vn_open(&nd, flags, cmode); if (error) { ffree(fp); if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0 && /* XXX from fdopen */ (error = dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { p->p_retval[0] = indx; return (0); } if (error == ERESTART) error = EINTR; fdp->fd_ofiles[indx] = NULL; return (error); } p->p_dupfd = 0; vp = nd.ni_vp; fp->f_flag = flags & FMASK; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); fp->f_ops = &vnops; fp->f_data = (caddr_t)vp; if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (flags & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; VOP_UNLOCK(vp, 0, p); if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) { (void) vn_close(vp, fp->f_flag, fp->f_cred, p); ffree(fp); fdp->fd_ofiles[indx] = NULL; return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } if ((vp->v_type == VREG) && (vp->v_object == NULL)) vfs_object_create(vp, p, p->p_ucred, TRUE); VOP_UNLOCK(vp, 0, p); p->p_retval[0] = indx; return (0); } #ifdef COMPAT_43 /* * Create a file. */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int ocreat(p, uap) struct proc *p; register struct ocreat_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, mode) = SCARG(uap, mode); SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC; return (open(p, &nuap)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ struct mknod_args { char *path; int mode; int dev; }; #endif /* ARGSUSED */ int mknod(p, uap) struct proc *p; register struct mknod_args /* { syscallarg(char *) path; syscallarg(int) mode; syscallarg(int) dev; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; int whiteout; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) error = EEXIST; else { VATTR_NULL(&vattr); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; vattr.va_rdev = SCARG(uap, dev); whiteout = 0; switch (SCARG(uap, mode) & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: error = EINVAL; break; } } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (whiteout) { error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); if (error) VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); } else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); } } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (vp) vrele(vp); } ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod"); return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkfifo(p, uap) struct proc *p; register struct mkfifo_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); return (EEXIST); } VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr)); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif /* ARGSUSED */ int link(p, uap) struct proc *p; register struct link_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { register struct vnode *vp; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); error = namei(&nd); if (!error) { if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); error = EEXIST; } else { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); } } } vrele(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "link"); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif /* ARGSUSED */ int symlink(p, uap) struct proc *p; register struct symlink_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { struct vattr vattr; char *path; int error; struct nameidata nd; path = zalloc(namei_zone); if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) goto out; NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); if (error = namei(&nd)) goto out; if (nd.ni_vp) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; goto out; } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink"); out: zfree(namei_zone, path); return (error); } /* * Delete a whiteout from the filesystem. */ /* ARGSUSED */ int undelete(p, uap) struct proc *p; register struct undelete_args /* { syscallarg(char *) path; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return (error); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); return (EEXIST); } VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif /* ARGSUSED */ int unlink(p, uap) struct proc *p; struct unlink_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_flag & VROOT) error = EBUSY; } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (vp != NULLVP) vput(vp); } ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink"); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int lseek(p, uap) struct proc *p; register struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ *uap; { struct ucred *cred = p->p_ucred; register struct filedesc *fdp = p->p_fd; register struct file *fp; struct vattr vattr; int error; if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (ESPIPE); switch (SCARG(uap, whence)) { case L_INCR: fp->f_offset += SCARG(uap, offset); break; case L_XTND: error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p); if (error) return (error); fp->f_offset = SCARG(uap, offset) + vattr.va_size; break; case L_SET: fp->f_offset = SCARG(uap, offset); break; default: return (EINVAL); } *(off_t *)(p->p_retval) = fp->f_offset; return (0); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(p, uap) struct proc *p; register struct olseek_args /* { syscallarg(int) fd; syscallarg(long) offset; syscallarg(int) whence; } */ *uap; { struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ nuap; int error; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, offset) = SCARG(uap, offset); SCARG(&nuap, whence) = SCARG(uap, whence); error = lseek(p, &nuap); return (error); } #endif /* COMPAT_43 */ /* * Check access permissions. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int flags; }; #endif int access(p, uap) struct proc *p; register struct access_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct ucred *cred = p->p_ucred; register struct vnode *vp; int error, flags, t_gid, t_uid; struct nameidata nd; t_uid = cred->cr_uid; t_gid = cred->cr_groups[0]; cred->cr_uid = p->p_cred->p_ruid; cred->cr_groups[0] = p->p_cred->p_rgid; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) goto out1; vp = nd.ni_vp; /* Flags == 0 means only check for existence. */ if (SCARG(uap, flags)) { flags = 0; if (SCARG(uap, flags) & R_OK) flags |= VREAD; if (SCARG(uap, flags) & W_OK) flags |= VWRITE; if (SCARG(uap, flags) & X_OK) flags |= VEXEC; if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, flags, cred, p); } vput(vp); out1: cred->cr_uid = t_uid; cred->cr_groups[0] = t_gid; return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int ostat(p, uap) struct proc *p; register struct ostat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int olstat(p, uap) struct proc *p; register struct olstat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct vnode *vp; struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, p); if (vp->v_type == VLNK) sb.st_mode |= S_IFLNK | ACCESSPERMS; /* 0777 */ vput(vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Convert from an old to a new stat structure. */ void cvtstat(st, ost) struct stat *st; struct ostat *ost; { ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (st->st_size < (quad_t)1 << 32) ost->st_size = st->st_size; else ost->st_size = -2; ost->st_atime = st->st_atime; ost->st_mtime = st->st_mtime; ost->st_ctime = st->st_ctime; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct stat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int stat(p, uap) struct proc *p; register struct stat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { struct stat sb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int lstat(p, uap) struct proc *p; register struct lstat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { int error; struct vnode *vp; struct stat sb; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, p); if (vp->v_type == VLNK) sb.st_mode |= S_IFLNK | ACCESSPERMS; /* 0777 */ vput(vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif /* ARGSUSED */ int pathconf(p, uap) struct proc *p; register struct pathconf_args /* { syscallarg(char *) path; syscallarg(int) name; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval); vput(nd.ni_vp); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; int count; }; #endif /* ARGSUSED */ int readlink(p, uap) struct proc *p; register struct readlink_args /* { syscallarg(char *) path; syscallarg(char *) buf; syscallarg(int) count; } */ *uap; { register struct vnode *vp; struct iovec aiov; struct uio auio; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VLNK) error = EINVAL; else { aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); error = VOP_READLINK(vp, &auio, p->p_ucred); } vput(vp); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { char *path; int flags; }; #endif /* ARGSUSED */ int chflags(p, uap) struct proc *p; register struct chflags_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; int flags; }; #endif /* ARGSUSED */ int fchflags(p, uap) struct proc *p; register struct fchflags_args /* { syscallarg(int) fd; syscallarg(int) flags; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif /* ARGSUSED */ int chmod(p, uap) struct proc *p; register struct chmod_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif /* ARGSUSED */ int fchmod(p, uap) struct proc *p; register struct fchmod_args /* { syscallarg(int) fd; syscallarg(int) mode; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int chown(p, uap) struct proc *p; register struct chown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int lchown(p, uap) struct proc *p; register struct lchown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif /* ARGSUSED */ int fchown(p, uap) struct proc *p; register struct fchown_args /* { syscallarg(int) fd; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif /* ARGSUSED */ int utimes(p, uap) struct proc *p; register struct utimes_args /* { syscallarg(char *) path; syscallarg(struct timeval *) tptr; } */ *uap; { register struct vnode *vp; struct timeval tv[2]; struct vattr vattr; int error; struct nameidata nd; VATTR_NULL(&vattr); if (SCARG(uap, tptr) == NULL) { microtime(&tv[0]); tv[1] = tv[0]; vattr.va_vaflags |= VA_UTIMES_NULL; } else if (error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, sizeof (tv))) return (error); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); vattr.va_atime.tv_sec = tv[0].tv_sec; vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; vattr.va_mtime.tv_sec = tv[1].tv_sec; vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif /* ARGSUSED */ int truncate(p, uap) struct proc *p; register struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; if (uap->length < 0) return(EINVAL); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } vput(vp); return (error); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ftruncate_args { int fd; int pad; off_t length; }; #endif /* ARGSUSED */ int ftruncate(p, uap) struct proc *p; register struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (uap->length < 0) return(EINVAL); if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FWRITE) == 0) return (EINVAL); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } VOP_UNLOCK(vp, 0, p); return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif /* ARGSUSED */ int otruncate(p, uap) struct proc *p; register struct otruncate_args /* { syscallarg(char *) path; syscallarg(long) length; } */ *uap; { struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, length) = SCARG(uap, length); return (truncate(p, &nuap)); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct oftruncate_args { int fd; long length; }; #endif /* ARGSUSED */ int oftruncate(p, uap) struct proc *p; register struct oftruncate_args /* { syscallarg(int) fd; syscallarg(long) length; } */ *uap; { struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, length) = SCARG(uap, length); return (ftruncate(p, &nuap)); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif /* ARGSUSED */ int fsync(p, uap) struct proc *p; struct fsync_args /* { syscallarg(int) fd; } */ *uap; { register struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p)) == NULL) { if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0, FALSE); } error = VOP_FSYNC(vp, fp->f_cred, (vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC)) ? MNT_NOWAIT : MNT_WAIT, p); VOP_UNLOCK(vp, 0, p); } return (error); } /* * Rename files. Source and destination must either both be directories, * or both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif /* ARGSUSED */ int rename(p, uap) struct proc *p; register struct rename_args /* { syscallarg(char *) from; syscallarg(char *) to; } */ *uap; { register struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int error; NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, SCARG(uap, from), p); if (error = namei(&fromnd)) return (error); fvp = fromnd.ni_vp; NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if (error = namei(&tond)) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } } if (fvp == tdvp) error = EINVAL; /* * If source is the same as the destination (that is the * same inode number with the same name in the same directory), * then there is nothing to do. */ if (fvp == tvp && fromnd.ni_dvp == tdvp && fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen)) error = -1; out: if (!error) { VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); if (fromnd.ni_dvp != tdvp) { VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); } if (tvp) { VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename"); zfree(namei_zone, tond.ni_cnd.cn_pnbuf); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf); if (error == -1) return (0); return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkdir(p, uap) struct proc *p; register struct mkdir_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); nd.ni_cnd.cn_flags |= WILLBEDIR; if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EEXIST); } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (!error) vput(nd.ni_vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir"); return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif /* ARGSUSED */ int rmdir(p, uap) struct proc *p; struct rmdir_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) error = EBUSY; out: if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vput(vp); } ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir"); return (error); } #ifdef COMPAT_43 /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(p, uap) struct proc *p; register struct ogetdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { register struct vnode *vp; struct file *fp; struct uio auio, kuio; struct iovec aiov, kiov; struct dirent *dp, *edp; caddr_t dirbuf; int error, eofflag, readcnt; long loff; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) return (EINVAL); aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; } else # endif { kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; kiov.iov_len = SCARG(uap, count); MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = kuio.uio_offset; if (error == 0) { readcnt = SCARG(uap, count) - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) /* * The expected low byte of * dp->d_namlen is our dp->d_type. * The high MBZ byte of dp->d_namlen * is our dp->d_namlen. */ dp->d_type = dp->d_namlen; dp->d_namlen = 0; # else /* * The dp->d_type is the high byte * of the expected dp->d_namlen, * so must be zero'ed. */ dp->d_type = 0; # endif if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, &auio); } FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { struct vnode *lvp; lvp = union_dircache(vp, p); if (lvp != NULLVP) { struct vattr va; /* * If the directory is opaque, * then don't show lower entries */ error = VOP_GETATTR(vp, &va, fp->f_cred, p); if (va.va_flags & OPAQUE) { vput(lvp); lvp = NULL; } } if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { vput(lvp); return (error); } VOP_UNLOCK(lvp, 0, p); fp->f_data = (caddr_t) lvp; fp->f_offset = 0; error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); vp = lvp; goto unionread; } } } #endif /* UNION */ if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } #endif /* COMPAT_43 */ /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int getdirentries(p, uap) struct proc *p; register struct getdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { register struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; long loff; int error, eofflag; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) return (EINVAL); aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); /* vn_lock(vp, LK_SHARED | LK_RETRY, p); */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { struct vnode *lvp; lvp = union_dircache(vp, p); if (lvp != NULLVP) { struct vattr va; /* * If the directory is opaque, * then don't show lower entries */ error = VOP_GETATTR(vp, &va, fp->f_cred, p); if (va.va_flags & OPAQUE) { vput(lvp); lvp = NULL; } } if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { vput(lvp); return (error); } VOP_UNLOCK(lvp, 0, p); fp->f_data = (caddr_t) lvp; fp->f_offset = 0; error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); vp = lvp; goto unionread; } } } #endif /* UNION */ if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } /* * Set the mode mask for creation of filesystem nodes. */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int umask(p, uap) struct proc *p; struct umask_args /* { syscallarg(int) newmask; } */ *uap; { register struct filedesc *fdp; fdp = p->p_fd; p->p_retval[0] = fdp->fd_cmask; fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; return (0); } /* * Void all references to file by ripping underlying filesystem * away from vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif /* ARGSUSED */ int revoke(p, uap) struct proc *p; register struct revoke_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) goto out; if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser(p->p_ucred, &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) VOP_REVOKE(vp, REVOKEALL); out: vrele(vp); return (error); } /* * Convert a user file descriptor to a kernel file entry. */ int getvnode(fdp, fd, fpp) struct filedesc *fdp; int fd; struct file **fpp; { struct file *fp; if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) return (EINVAL); *fpp = fp; return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getcwd_args { u_char *buf; u_int buflen; }; #endif #define STATNODE(mode, name, var) \ SYSCTL_INT(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); static int disablecwd; SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); int __getcwd(p, uap) struct proc *p; struct __getcwd_args *uap; { char *bp, *buf; int error, i, slash_prefixed; struct filedesc *fdp; struct namecache *ncp; struct vnode *vp; numcwdcalls++; if (disablecwd) return (ENODEV); if (uap->buflen < 2) return (EINVAL); if (uap->buflen > MAXPATHLEN) uap->buflen = MAXPATHLEN; buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); bp += uap->buflen - 1; *bp = '\0'; fdp = p->p_fd; slash_prefixed = 0; for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { if (vp->v_flag & VROOT) { vp = vp->v_mount->mnt_vnodecovered; continue; } if (vp->v_dd->v_id != vp->v_ddid) { numcwdfail1++; free(buf, M_TEMP); return (ENOTDIR); } ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { numcwdfail2++; free(buf, M_TEMP); return (ENOENT); } if (ncp->nc_dvp != vp->v_dd) { numcwdfail3++; free(buf, M_TEMP); return (EBADF); } for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = ncp->nc_name[i]; } if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; slash_prefixed = 1; vp = vp->v_dd; } if (!slash_prefixed) { if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; } numcwdfound++; error = copyout(bp, uap->buf, strlen(bp) + 1); free(buf, M_TEMP); return (error); } Index: head/sys/kern/vnode_if.src =================================================================== --- head/sys/kern/vnode_if.src (revision 34265) +++ head/sys/kern/vnode_if.src (revision 34266) @@ -1,461 +1,473 @@ # # Copyright (c) 1992, 1993 # The Regents of the University of California. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. All advertising materials mentioning features or use of this software # must display the following acknowledgement: # This product includes software developed by the University of # California, Berkeley and its contributors. # 4. Neither the name of the University nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # @(#)vnode_if.src 8.12 (Berkeley) 5/14/95 -# $Id: vnode_if.src,v 1.14 1997/10/16 10:48:00 phk Exp $ +# $Id: vnode_if.src,v 1.15 1997/10/16 20:32:23 phk Exp $ # # # Above each of the vop descriptors is a specification of the locking # protocol used by each vop call. The first column is the name of # the variable, the remaining three columns are in, out and error # respectively. The "in" column defines the lock state on input, # the "out" column defines the state on succesful return, and the # "error" column defines the locking state on error exit. # # The locking value can take the following values: # L: locked. # U: unlocked/ # -: not applicable. vnode does not yet (or no longer) exists. # =: the same on input and output, may be either L or U. # X: locked if not nil. # # #% lookup dvp L ? ? #% lookup vpp - L - # # XXX - the lookup locking protocol defies simple description and depends # on the flags and operation fields in the (cnp) structure. Note # especially that *vpp may equal dvp and both may be locked. # vop_lookup { IN struct vnode *dvp; INOUT struct vnode **vpp; IN struct componentname *cnp; }; # #% cachedlookup dvp L ? ? #% cachedlookup vpp - L - # # This must be an exact copy of lookup. See kern/vfs_cache.c for details. # vop_cachedlookup { IN struct vnode *dvp; INOUT struct vnode **vpp; IN struct componentname *cnp; }; # #% create dvp L U U #% create vpp - L - # vop_create { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; IN struct componentname *cnp; IN struct vattr *vap; }; # #% whiteout dvp L L L #% whiteout cnp - - - #% whiteout flag - - - # vop_whiteout { IN WILLRELE struct vnode *dvp; IN struct componentname *cnp; IN int flags; }; # #% mknod dvp L U U #% mknod vpp - X - # vop_mknod { IN WILLRELE struct vnode *dvp; OUT WILLRELE struct vnode **vpp; IN struct componentname *cnp; IN struct vattr *vap; }; # #% open vp L L L # vop_open { IN struct vnode *vp; IN int mode; IN struct ucred *cred; IN struct proc *p; }; # #% close vp U U U # vop_close { IN struct vnode *vp; IN int fflag; IN struct ucred *cred; IN struct proc *p; }; # #% access vp L L L # vop_access { IN struct vnode *vp; IN int mode; IN struct ucred *cred; IN struct proc *p; }; # #% getattr vp = = = # vop_getattr { IN struct vnode *vp; IN struct vattr *vap; IN struct ucred *cred; IN struct proc *p; }; # #% setattr vp L L L # vop_setattr { IN struct vnode *vp; IN struct vattr *vap; IN struct ucred *cred; IN struct proc *p; }; # #% read vp L L L # vop_read { IN struct vnode *vp; INOUT struct uio *uio; IN int ioflag; IN struct ucred *cred; }; # #% write vp L L L # vop_write { IN struct vnode *vp; INOUT struct uio *uio; IN int ioflag; IN struct ucred *cred; }; # #% lease vp = = = # vop_lease { IN struct vnode *vp; IN struct proc *p; IN struct ucred *cred; IN int flag; }; # #% ioctl vp U U U # vop_ioctl { IN struct vnode *vp; IN u_long command; IN caddr_t data; IN int fflag; IN struct ucred *cred; IN struct proc *p; }; # #% poll vp U U U # vop_poll { IN struct vnode *vp; IN int events; IN struct ucred *cred; IN struct proc *p; }; # #% revoke vp U U U # vop_revoke { IN struct vnode *vp; IN int flags; }; # # XXX - not used # vop_mmap { IN struct vnode *vp; IN int fflags; IN struct ucred *cred; IN struct proc *p; }; # #% fsync vp L L L # vop_fsync { IN struct vnode *vp; IN struct ucred *cred; IN int waitfor; IN struct proc *p; }; # #% remove dvp L U U #% remove vp L U U # vop_remove { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; }; # #% link vp U U U #% link tdvp L U U # vop_link { IN WILLRELE struct vnode *tdvp; IN struct vnode *vp; IN struct componentname *cnp; }; # #% rename fdvp U U U #% rename fvp U U U #% rename tdvp L U U #% rename tvp X U U # vop_rename { IN WILLRELE struct vnode *fdvp; IN WILLRELE struct vnode *fvp; IN struct componentname *fcnp; IN WILLRELE struct vnode *tdvp; IN WILLRELE struct vnode *tvp; IN struct componentname *tcnp; }; # #% mkdir dvp L U U #% mkdir vpp - L - # vop_mkdir { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; IN struct componentname *cnp; IN struct vattr *vap; }; # #% rmdir dvp L U U #% rmdir vp L U U # vop_rmdir { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; }; # #% symlink dvp L U U #% symlink vpp - U - # # XXX - note that the return vnode has already been VRELE'ed # by the filesystem layer. To use it you must use vget, # possibly with a further namei. # vop_symlink { IN WILLRELE struct vnode *dvp; OUT WILLRELE struct vnode **vpp; IN struct componentname *cnp; IN struct vattr *vap; IN char *target; }; # #% readdir vp L L L # vop_readdir { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; INOUT int *eofflag; OUT int *ncookies; INOUT u_long **cookies; }; # #% readlink vp L L L # vop_readlink { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; }; # #% abortop dvp = = = # vop_abortop { IN struct vnode *dvp; IN struct componentname *cnp; }; # #% inactive vp L U U # vop_inactive { IN struct vnode *vp; IN struct proc *p; }; # #% reclaim vp U U U # vop_reclaim { IN struct vnode *vp; IN struct proc *p; }; # #% lock vp U L U # vop_lock { IN struct vnode *vp; IN int flags; IN struct proc *p; }; # #% unlock vp L U L # vop_unlock { IN struct vnode *vp; IN int flags; IN struct proc *p; }; # #% bmap vp L L L #% bmap vpp - U - # vop_bmap { IN struct vnode *vp; IN daddr_t bn; OUT struct vnode **vpp; IN daddr_t *bnp; OUT int *runp; OUT int *runb; }; # # Needs work: no vp? # #vop_strategy { # IN struct buf *bp; #}; # #% print vp = = = # vop_print { IN struct vnode *vp; }; # #% islocked vp = = = # vop_islocked { IN struct vnode *vp; }; # #% pathconf vp L L L # vop_pathconf { IN struct vnode *vp; IN int name; OUT register_t *retval; }; # #% advlock vp U U U # vop_advlock { IN struct vnode *vp; IN caddr_t id; IN int op; IN struct flock *fl; IN int flags; +}; + +# +#% balloc vp L L L +# +vop_balloc { + IN struct vnode *vp; + IN off_t startoffset; + IN int size; + IN struct ucred *cred; + IN int flags; + OUT struct buf **bpp; }; # #% reallocblks vp L L L # vop_reallocblks { IN struct vnode *vp; IN struct cluster_save *buflist; }; vop_getpages { IN struct vnode *vp; IN vm_page_t *m; IN int count; IN int reqpage; IN vm_ooffset_t offset; }; vop_putpages { IN struct vnode *vp; IN vm_page_t *m; IN int count; IN int sync; IN int *rtvals; IN vm_ooffset_t offset; }; # # Needs work: no vp? # #vop_bwrite { # IN struct buf *bp; #}; Index: head/sys/miscfs/specfs/spec_vnops.c =================================================================== --- head/sys/miscfs/specfs/spec_vnops.c (revision 34265) +++ head/sys/miscfs/specfs/spec_vnops.c (revision 34266) @@ -1,911 +1,917 @@ /* * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 - * $Id: spec_vnops.c,v 1.58 1998/03/07 21:35:52 dyson Exp $ + * $Id: spec_vnops.c,v 1.59 1998/03/08 08:46:18 dyson Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int spec_getattr __P((struct vop_getattr_args *)); static int spec_badop __P((void)); static int spec_strategy __P((struct vop_strategy_args *)); static int spec_print __P((struct vop_print_args *)); static int spec_lookup __P((struct vop_lookup_args *)); static int spec_open __P((struct vop_open_args *)); static int spec_close __P((struct vop_close_args *)); static int spec_read __P((struct vop_read_args *)); static int spec_write __P((struct vop_write_args *)); static int spec_ioctl __P((struct vop_ioctl_args *)); static int spec_poll __P((struct vop_poll_args *)); static int spec_inactive __P((struct vop_inactive_args *)); static int spec_fsync __P((struct vop_fsync_args *)); static int spec_bmap __P((struct vop_bmap_args *)); static int spec_advlock __P((struct vop_advlock_args *)); static int spec_getpages __P((struct vop_getpages_args *)); struct vnode *speclisth[SPECHSZ]; vop_t **spec_vnodeop_p; static struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_access_desc, (vop_t *) vop_ebadf }, { &vop_advlock_desc, (vop_t *) spec_advlock }, { &vop_bmap_desc, (vop_t *) spec_bmap }, { &vop_close_desc, (vop_t *) spec_close }, { &vop_create_desc, (vop_t *) spec_badop }, { &vop_fsync_desc, (vop_t *) spec_fsync }, { &vop_getattr_desc, (vop_t *) spec_getattr }, { &vop_getpages_desc, (vop_t *) spec_getpages }, { &vop_inactive_desc, (vop_t *) spec_inactive }, { &vop_ioctl_desc, (vop_t *) spec_ioctl }, { &vop_lease_desc, (vop_t *) vop_null }, { &vop_link_desc, (vop_t *) spec_badop }, { &vop_lookup_desc, (vop_t *) spec_lookup }, { &vop_mkdir_desc, (vop_t *) spec_badop }, { &vop_mknod_desc, (vop_t *) spec_badop }, { &vop_open_desc, (vop_t *) spec_open }, { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, { &vop_poll_desc, (vop_t *) spec_poll }, { &vop_print_desc, (vop_t *) spec_print }, { &vop_read_desc, (vop_t *) spec_read }, { &vop_readdir_desc, (vop_t *) spec_badop }, { &vop_readlink_desc, (vop_t *) spec_badop }, { &vop_reallocblks_desc, (vop_t *) spec_badop }, { &vop_reclaim_desc, (vop_t *) vop_null }, { &vop_remove_desc, (vop_t *) spec_badop }, { &vop_rename_desc, (vop_t *) spec_badop }, { &vop_rmdir_desc, (vop_t *) spec_badop }, { &vop_setattr_desc, (vop_t *) vop_ebadf }, { &vop_strategy_desc, (vop_t *) spec_strategy }, { &vop_symlink_desc, (vop_t *) spec_badop }, { &vop_write_desc, (vop_t *) spec_write }, { NULL, NULL } }; static struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; VNODEOP_SET(spec_vnodeop_opv_desc); int spec_vnoperate(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap)); } static void spec_getpages_iodone __P((struct buf *bp)); /* * Trivial lookup routine that always fails. */ static int spec_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * Open a special file. */ /* ARGSUSED */ static int spec_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct proc *p = ap->a_p; struct vnode *bvp, *vp = ap->a_vp; dev_t bdev, dev = (dev_t)vp->v_rdev; int maj = major(dev); int error; /* * Don't allow open if fs is mounted -nodev. */ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) return (ENXIO); switch (vp->v_type) { case VCHR: if ((u_int)maj >= nchrdev) return (ENXIO); if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL)) return ENXIO; if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { /* * When running in very secure mode, do not allow * opens for writing of any disk character devices. */ if (securelevel >= 2 && cdevsw[maj]->d_bdev && (cdevsw[maj]->d_bdev->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); /* * When running in secure mode, do not allow opens * for writing of /dev/mem, /dev/kmem, or character * devices whose corresponding block devices are * currently mounted. */ if (securelevel >= 1) { if ((bdev = chrtoblk(dev)) != NODEV && vfinddev(bdev, VBLK, &bvp) && bvp->v_usecount > 0 && (error = vfs_mountedon(bvp))) return (error); if (iskmemdev(dev)) return (EPERM); } } #if 0 /* * Lite2 stuff. We will almost certainly do this * differently with devfs. The only use of this flag * is in dead_read to make ttys return EOF instead of * EIO when they are dead. Pre-lite2 FreeBSD returns * EOF for all character devices. */ if (cdevsw[maj]->d_type == D_TTY) vp->v_flag |= VISTTY; #endif VOP_UNLOCK(vp, 0, p); error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if ((u_int)maj >= nblkdev) return (ENXIO); if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL)) return ENXIO; /* * When running in very secure mode, do not allow * opens for writing of any disk block devices. */ if (securelevel >= 2 && ap->a_cred != FSCRED && (ap->a_mode & FWRITE) && (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); /* * Do not allow opens of block devices that are * currently mounted. */ error = vfs_mountedon(vp); if (error) return (error); return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p)); } return (0); } /* * Vnode op for read */ /* ARGSUSED */ static int spec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn, nextbn; long bsize, bscale; struct partinfo dpart; int n, on, majordev; d_ioctl_t *ioctl; int error = 0; dev_t dev; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_read proc"); #endif if (uio->uio_resid == 0) return (0); switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)]->d_read) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; dev = vp->v_rdev; if ((majordev = major(dev)) < nblkdev && (ioctl = bdevsw[majordev]->d_ioctl) != NULL && (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; bscale = btodb(bsize); do { bn = btodb(uio->uio_offset) & ~(bscale - 1); on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (vp->v_lastr + bscale == bn) { nextbn = bn + bscale; error = breadn(vp, bn, (int)bsize, &nextbn, (int *)&bsize, 1, NOCRED, &bp); } else error = bread(vp, bn, (int)bsize, NOCRED, &bp); vp->v_lastr = bn; n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ } /* * Vnode op for write */ /* ARGSUSED */ static int spec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn; int bsize, blkmask; struct partinfo dpart; register int n, on; int error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)]->d_write) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { if (dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; } blkmask = btodb(bsize) - 1; do { bn = btodb(uio->uio_offset) & ~blkmask; on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (n == bsize) bp = getblk(vp, bn, bsize, 0, 0); else error = bread(vp, bn, bsize, NOCRED, &bp); n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); if (n + on == bsize) bawrite(bp); else bdwrite(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ } /* * Device ioctl operation. */ /* ARGSUSED */ static int spec_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { dev_t dev = ap->a_vp->v_rdev; switch (ap->a_vp->v_type) { case VCHR: return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); case VBLK: if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) if ((bdevsw[major(dev)]->d_flags & D_TYPEMASK) == D_TAPE) return (0); else return (1); return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); default: panic("spec_ioctl"); /* NOTREACHED */ } } /* ARGSUSED */ static int spec_poll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register dev_t dev; switch (ap->a_vp->v_type) { case VCHR: dev = ap->a_vp->v_rdev; return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p); default: return (vop_defaultop((struct vop_generic_args *)ap)); } } /* * Synch buffers associated with a block device */ /* ARGSUSED */ static int spec_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct buf *bp; struct buf *nbp; int s; if (vp->v_type == VCHR) return (0); /* * Flush all dirty buffers associated with a block device. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("spec_fsync: not dirty"); if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) { vfs_bio_awrite(bp); splx(s); } else { bremfree(bp); bp->b_flags |= B_BUSY; splx(s); bawrite(bp); } goto loop; } if (ap->a_waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0); } #ifdef DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("spec_fsync: dirty", vp); splx(s); goto loop; } #endif } splx(s); return (0); } static int spec_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } /* * Just call the device strategy routine */ static int spec_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { + struct buf *bp; - (*bdevsw[major(ap->a_bp->b_dev)]->d_strategy)(ap->a_bp); + bp = ap->a_bp; + if ((LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start) + (*bioops.io_start)(bp); + (*bdevsw[major(bp->b_dev)]->d_strategy)(bp); return (0); } /* * This is a noop, simply returning what one has been given. */ static int spec_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * Device close routine */ /* ARGSUSED */ static int spec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; dev_t dev = vp->v_rdev; d_close_t *devclose; int mode, error; switch (vp->v_type) { case VCHR: /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ if (vcount(vp) == 2 && ap->a_p && (vp->v_flag & VXLOCK) == 0 && vp == ap->a_p->p_session->s_ttyvp) { vrele(vp); ap->a_p->p_session->s_ttyvp = NULL; } /* * If the vnode is locked, then we are in the midst * of forcably closing the device, otherwise we only * close on last reference. */ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) return (0); devclose = cdevsw[major(dev)]->d_close; mode = S_IFCHR; break; case VBLK: /* * On last close of a block device (that isn't mounted) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); + VOP_UNLOCK(vp, 0, ap->a_p); if (error) return (error); /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0) return (0); devclose = bdevsw[major(dev)]->d_close; mode = S_IFBLK; break; default: panic("spec_close: not special"); } return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); } /* * Print out the contents of a special device vnode. */ static int spec_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), minor(ap->a_vp->v_rdev)); return (0); } /* * Special device advisory byte-level locks. */ /* ARGSUSED */ static int spec_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); } /* * Special device bad operation */ static int spec_badop() { panic("spec_badop called"); /* NOTREACHED */ } static void spec_getpages_iodone(bp) struct buf *bp; { bp->b_flags |= B_DONE; wakeup(bp); } static int spec_getpages(ap) struct vop_getpages_args *ap; { vm_offset_t kva; int error; int i, pcount, size, s; daddr_t blkno; struct buf *bp; vm_page_t m; vm_ooffset_t offset; int toff, nextoff, nread; struct vnode *vp = ap->a_vp; int blksiz; int gotreqpage; error = 0; pcount = round_page(ap->a_count) / PAGE_SIZE; /* * Calculate the offset of the transfer. */ offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; /* XXX sanity check before we go into details. */ /* XXX limits should be defined elsewhere. */ #define DADDR_T_BIT 32 #define OFFSET_MAX ((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1) if (offset < 0 || offset > OFFSET_MAX) { /* XXX still no %q in kernel. */ printf("spec_getpages: preposterous offset 0x%x%08x\n", (u_int)((u_quad_t)offset >> 32), (u_int)(offset & 0xffffffff)); return (VM_PAGER_ERROR); } blkno = btodb(offset); /* * Round up physical size for real devices, use the * fundamental blocksize of the fs if possible. */ if (vp && vp->v_mount) blksiz = vp->v_mount->mnt_stat.f_bsize; else blksiz = DEV_BSIZE; size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); bp = getpbuf(); kva = (vm_offset_t)bp->b_data; /* * Map the pages to be read into the kva. */ pmap_qenter(kva, ap->a_m, pcount); /* Build a minimal buffer header. */ bp->b_flags = B_BUSY | B_READ | B_CALL; bp->b_iodone = spec_getpages_iodone; /* B_PHYS is not set, but it is nice to fill this in. */ bp->b_proc = curproc; bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; if (bp->b_rcred != NOCRED) crhold(bp->b_rcred); if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_blkno = blkno; bp->b_lblkno = blkno; pbgetvp(ap->a_vp, bp); bp->b_bcount = size; bp->b_bufsize = size; bp->b_resid = 0; cnt.v_vnodein++; cnt.v_vnodepgsin += pcount; /* Do the input. */ VOP_STRATEGY(bp); s = splbio(); /* We definitely need to be at splbio here. */ while ((bp->b_flags & B_DONE) == 0) tsleep(bp, PVM, "spread", 0); splx(s); if ((bp->b_flags & B_ERROR) != 0) { if (bp->b_error) error = bp->b_error; else error = EIO; } nread = size - bp->b_resid; if (nread < ap->a_count) { bzero((caddr_t)kva + nread, ap->a_count - nread); } pmap_qremove(kva, pcount); gotreqpage = 0; for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { nextoff = toff + PAGE_SIZE; m = ap->a_m[i]; m->flags &= ~PG_ZERO; if (nextoff <= nread) { m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else if (toff < nread) { int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); vm_page_set_validclean(m, 0, nvalid); } else { m->valid = 0; m->dirty = 0; } if (i != ap->a_reqpage) { /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error || (m->valid == VM_PAGE_BITS_ALL)) { if (m->valid) { if (m->flags & PG_WANTED) { vm_page_activate(m); } else { vm_page_deactivate(m); } PAGE_WAKEUP(m); } else { vm_page_free(m); } } else { vm_page_free(m); } } else if (m->valid) { gotreqpage = 1; } } if (!gotreqpage) { m = ap->a_m[ap->a_reqpage]; #ifndef MAX_PERF printf("spec_getpages: I/O read failure: (error code=%d)\n", error); printf(" size: %d, resid: %d, a_count: %d, valid: 0x%x\n", size, bp->b_resid, ap->a_count, m->valid); printf(" nread: %d, reqpage: %d, pindex: %d, pcount: %d\n", nread, ap->a_reqpage, m->pindex, pcount); #endif /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); return VM_PAGER_ERROR; } /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); return VM_PAGER_OK; } /* ARGSUSED */ static int spec_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vattr *vap = ap->a_vap; struct partinfo dpart; bzero(vap, sizeof (*vap)); if (vp->v_type == VBLK) vap->va_blocksize = BLKDEV_IOSIZE; else if (vp->v_type == VCHR) vap->va_blocksize = MAXBSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, ap->a_p) == 0) { vap->va_bytes = dbtob(dpart.disklab->d_partitions [minor(vp->v_rdev)].p_size); vap->va_size = vap->va_bytes; } return (0); } Index: head/sys/miscfs/specfs/specdev.h =================================================================== --- head/sys/miscfs/specfs/specdev.h (revision 34265) +++ head/sys/miscfs/specfs/specdev.h (revision 34266) @@ -1,84 +1,79 @@ /* * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)specdev.h 8.6 (Berkeley) 5/21/95 - * $Id: specdev.h,v 1.12 1997/09/14 02:58:03 peter Exp $ + * $Id: specdev.h,v 1.13 1997/10/15 13:23:21 phk Exp $ */ /* * This structure defines the information maintained about * special devices. It is allocated in checkalias and freed * in vgone. */ struct specinfo { struct vnode **si_hashchain; struct vnode *si_specnext; - long si_flags; + struct mount *si_mountpoint; dev_t si_rdev; }; /* * Exported shorthand */ #define v_rdev v_specinfo->si_rdev #define v_hashchain v_specinfo->si_hashchain #define v_specnext v_specinfo->si_specnext -#define v_specflags v_specinfo->si_flags - -/* - * Flags for specinfo - */ -#define SI_MOUNTEDON 0x0001 /* block special device is mounted on */ +#define v_specmountpoint v_specinfo->si_mountpoint /* * Special device management */ #define SPECHSZ 64 #if ((SPECHSZ&(SPECHSZ-1)) == 0) #define SPECHASH(rdev) (((rdev>>5)+(rdev))&(SPECHSZ-1)) #else #define SPECHASH(rdev) (((unsigned)((rdev>>5)+(rdev)))%SPECHSZ) #endif extern struct vnode *speclisth[SPECHSZ]; /* * Prototypes for special file operations on vnodes. */ extern vop_t **spec_vnodeop_p; struct nameidata; struct componentname; struct ucred; struct flock; struct buf; struct uio; int spec_vnoperate __P((struct vop_generic_args *)); Index: head/sys/msdosfs/msdosfs_vfsops.c =================================================================== --- head/sys/msdosfs/msdosfs_vfsops.c (revision 34265) +++ head/sys/msdosfs/msdosfs_vfsops.c (revision 34266) @@ -1,1051 +1,1054 @@ -/* $Id: msdosfs_vfsops.c,v 1.28 1998/02/23 16:44:32 ache Exp $ */ +/* $Id: msdosfs_vfsops.c,v 1.29 1998/03/01 22:46:27 msmith Exp $ */ /* $NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include #include #include #include /* XXX */ /* defines v_rdev */ #include #include #include #include #include /* defines ALLPERMS */ #include #include #include #include #include #include MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure"); static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table"); static int update_mp __P((struct mount *mp, struct msdosfs_args *argp)); static int mountmsdosfs __P((struct vnode *devvp, struct mount *mp, struct proc *p, struct msdosfs_args *argp)); static int msdosfs_fhtovp __P((struct mount *, struct fid *, struct sockaddr *, struct vnode **, int *, struct ucred **)); static int msdosfs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); static int msdosfs_root __P((struct mount *, struct vnode **)); static int msdosfs_start __P((struct mount *, int, struct proc *)); static int msdosfs_statfs __P((struct mount *, struct statfs *, struct proc *)); static int msdosfs_sync __P((struct mount *, int, struct ucred *, struct proc *)); static int msdosfs_unmount __P((struct mount *, int, struct proc *)); static int msdosfs_vget __P((struct mount *mp, ino_t ino, struct vnode **vpp)); static int msdosfs_vptofh __P((struct vnode *, struct fid *)); static int update_mp(mp, argp) struct mount *mp; struct msdosfs_args *argp; { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error; pmp->pm_gid = argp->gid; pmp->pm_uid = argp->uid; pmp->pm_mask = argp->mask & ALLPERMS; pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT; if (pmp->pm_flags & MSDOSFSMNT_U2WTABLE) { bcopy(argp->u2w, pmp->pm_u2w, sizeof(pmp->pm_u2w)); bcopy(argp->d2u, pmp->pm_d2u, sizeof(pmp->pm_d2u)); bcopy(argp->u2d, pmp->pm_u2d, sizeof(pmp->pm_u2d)); } if (pmp->pm_flags & MSDOSFSMNT_ULTABLE) { bcopy(argp->ul, pmp->pm_ul, sizeof(pmp->pm_ul)); bcopy(argp->lu, pmp->pm_lu, sizeof(pmp->pm_lu)); } #ifndef __FreeBSD__ /* * GEMDOS knows nothing (yet) about win95 */ if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS) pmp->pm_flags |= MSDOSFSMNT_NOWIN95; #endif if (pmp->pm_flags & MSDOSFSMNT_NOWIN95) pmp->pm_flags |= MSDOSFSMNT_SHORTNAME; else if (!(pmp->pm_flags & (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) { struct vnode *rootvp; /* * Try to divine whether to support Win'95 long filenames */ if (FAT32(pmp)) pmp->pm_flags |= MSDOSFSMNT_LONGNAME; else { if ((error = msdosfs_root(mp, &rootvp)) != 0) return error; pmp->pm_flags |= findwin95(VTODE(rootvp)) ? MSDOSFSMNT_LONGNAME : MSDOSFSMNT_SHORTNAME; vput(rootvp); } } return 0; } #ifndef __FreeBSD__ int msdosfs_mountroot() { register struct mount *mp; struct proc *p = curproc; /* XXX */ size_t size; int error; struct msdosfs_args args; if (root_device->dv_class != DV_DISK) return (ENODEV); /* * Get vnodes for swapdev and rootdev. */ if (bdevvp(rootdev, &rootvp)) panic("msdosfs_mountroot: can't setup rootvp"); mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); mp->mnt_op = &msdosfs_vfsops; mp->mnt_flag = 0; LIST_INIT(&mp->mnt_vnodelist); args.flags = 0; args.uid = 0; args.gid = 0; args.mask = 0777; if ((error = mountmsdosfs(rootvp, mp, p, &args)) != 0) { free(mp, M_MOUNT); return (error); } if ((error = update_mp(mp, &args)) != 0) { (void)msdosfs_unmount(mp, 0, p); free(mp, M_MOUNT); return (error); } if ((error = vfs_lock(mp)) != 0) { (void)msdosfs_unmount(mp, 0, p); free(mp, M_MOUNT); return (error); } CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); mp->mnt_vnodecovered = NULLVP; (void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void)msdosfs_statfs(mp, &mp->mnt_stat, p); vfs_unlock(mp); return (0); } #endif /* * mp - path - addr in user space of mount point (ie /usr or whatever) * data - addr in user space of mount params including the name of the block * special file to treat as a filesystem. */ static int msdosfs_mount(mp, path, data, ndp, p) struct mount *mp; char *path; caddr_t data; struct nameidata *ndp; struct proc *p; { struct vnode *devvp; /* vnode for blk device to mount */ struct msdosfs_args args; /* will hold data from mount request */ /* msdosfs specific mount control block */ struct msdosfsmount *pmp = NULL; size_t size; int error, flags; mode_t accessmode; error = copyin(data, (caddr_t)&args, sizeof(struct msdosfs_args)); if (error) return (error); if (args.magic != MSDOSFS_ARGSMAGIC) { printf("Old mount_msdosfs, flags=%d\n", args.flags); args.flags = 0; } /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { pmp = VFSTOMSDOSFS(mp); error = 0; if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = vflush(mp, NULLVP, flags); } if (!error && (mp->mnt_flag & MNT_RELOAD)) /* not yet implemented */ error = EOPNOTSUPP; if (error) return (error); if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ if (p->p_ucred->cr_uid != 0) { devvp = pmp->pm_devvp; vn_lock(devvp, LK_EXCLUSIVE, p); error = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p); if (error) { VOP_UNLOCK(devvp, 0, p); return (error); } VOP_UNLOCK(devvp, 0, p); } pmp->pm_flags &= ~MSDOSFSMNT_RONLY; } if (args.fspec == 0) { #ifdef __notyet__ /* doesn't work correctly with current mountd XXX */ if (args.flags & MSDOSFSMNT_MNTOPT) { pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT; pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT; if (pmp->pm_flags & MSDOSFSMNT_NOWIN95) pmp->pm_flags |= MSDOSFSMNT_SHORTNAME; } #endif /* * Process export requests. */ return (vfs_export(mp, &pmp->pm_export, &args.export)); } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); error = namei(ndp); if (error) return (error); devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { vrele(devvp); return (ENOTBLK); } if (major(devvp->v_rdev) >= nblkdev) { vrele(devvp); return (ENXIO); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ if (p->p_ucred->cr_uid != 0) { accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); if (error) { vput(devvp); return (error); } VOP_UNLOCK(devvp, 0, p); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = mountmsdosfs(devvp, mp, p, &args); #ifdef MSDOSFS_DEBUG /* only needed for the printf below */ pmp = VFSTOMSDOSFS(mp); #endif } else { if (devvp != pmp->pm_devvp) error = EINVAL; /* XXX needs translation */ else vrele(devvp); } if (error) { vrele(devvp); return (error); } error = update_mp(mp, &args); if (error) { msdosfs_unmount(mp, MNT_FORCE, p); return error; } (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void) msdosfs_statfs(mp, &mp->mnt_stat, p); #ifdef MSDOSFS_DEBUG printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap); #endif return (0); } static int mountmsdosfs(devvp, mp, p, argp) struct vnode *devvp; struct mount *mp; struct proc *p; struct msdosfs_args *argp; { struct msdosfsmount *pmp; struct buf *bp; dev_t dev = devvp->v_rdev; #ifndef __FreeBSD__ struct partinfo dpart; #endif union bootsector *bsp; struct byte_bpb33 *b33; struct byte_bpb50 *b50; #ifdef PC98 u_int pc98_wrk; u_int Phy_Sector_Size; #endif struct byte_bpb710 *b710; u_int8_t SecPerClust; int ronly, error; int bsize = 0, dtype = 0, tmp; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ error = vfs_mountedon(devvp); if (error) return (error); if (vcount(devvp) > 1 && devvp != rootvp) return (EBUSY); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0); VOP_UNLOCK(devvp, 0, p); if (error) return (error); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); if (error) return (error); bp = NULL; /* both used in error_exit */ pmp = NULL; #ifndef __FreeBSD__ if (argp->flags & MSDOSFSMNT_GEMDOSFS) { /* * We need the disklabel to calculate the size of a FAT entry * later on. Also make sure the partition contains a filesystem * of type FS_MSDOS. This doesn't work for floppies, so we have * to check for them too. * * At least some parts of the msdos fs driver seem to assume * that the size of a disk block will always be 512 bytes. * Let's check it... */ error = VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p); if (error) goto error_exit; tmp = dpart.part->p_fstype; dtype = dpart.disklab->d_type; bsize = dpart.disklab->d_secsize; if (bsize != 512 || (dtype!=DTYPE_FLOPPY && tmp!=FS_MSDOS)) { error = EINVAL; goto error_exit; } } #endif /* * Read the boot sector of the filesystem, and then check the * boot signature. If not a dos boot sector then error out. */ #ifdef PC98 devvp->v_flag &= 0xffff; error = bread(devvp, 0, 1024, NOCRED, &bp); #else error = bread(devvp, 0, 512, NOCRED, &bp); #endif if (error) goto error_exit; bp->b_flags |= B_AGE; bsp = (union bootsector *)bp->b_data; b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB; b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB; b710 = (struct byte_bpb710 *)bsp->bs710.bsPBP; #ifndef __FreeBSD__ if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) { #endif #ifdef PC98 if ((bsp->bs50.bsBootSectSig0 != BOOTSIG0 || bsp->bs50.bsBootSectSig1 != BOOTSIG1) && (bsp->bs50.bsBootSectSig0 != 0 /* PC98 DOS 3.3x */ || bsp->bs50.bsBootSectSig1 != 0) && (bsp->bs50.bsBootSectSig0 != 0x90 /* PC98 DOS 5.0 */ || bsp->bs50.bsBootSectSig1 != 0x3d) && (bsp->bs50.bsBootSectSig0 != 0x46 /* PC98 DOS 3.3B */ || bsp->bs50.bsBootSectSig1 != 0xfa)) { #else if (bsp->bs50.bsBootSectSig0 != BOOTSIG0 || bsp->bs50.bsBootSectSig1 != BOOTSIG1) { #endif error = EINVAL; goto error_exit; } #ifndef __FreeBSD__ } #endif pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK); bzero((caddr_t)pmp, sizeof *pmp); pmp->pm_mountp = mp; /* * Compute several useful quantities from the bpb in the * bootsector. Copy in the dos 5 variant of the bpb then fix up * the fields that are different between dos 5 and dos 3.3. */ SecPerClust = b50->bpbSecPerClust; pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec); pmp->pm_ResSectors = getushort(b50->bpbResSectors); pmp->pm_FATs = b50->bpbFATs; pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts); pmp->pm_Sectors = getushort(b50->bpbSectors); pmp->pm_FATsecs = getushort(b50->bpbFATsecs); pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack); pmp->pm_Heads = getushort(b50->bpbHeads); pmp->pm_Media = b50->bpbMedia; #ifndef __FreeBSD__ if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) { #endif /* XXX - We should probably check more values here */ if (!pmp->pm_BytesPerSec || !SecPerClust || !pmp->pm_Heads || pmp->pm_Heads > 255 #ifdef PC98 || !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 255) { #else || !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 63) { #endif error = EINVAL; goto error_exit; } #ifndef __FreeBSD__ } #endif if (pmp->pm_Sectors == 0) { pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs); pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors); } else { pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs); pmp->pm_HugeSectors = pmp->pm_Sectors; } #ifdef PC98 /* for PC98 added Satoshi Yasuda */ Phy_Sector_Size = 512; if ((devvp->v_rdev>>8) == 2) { /* floppy check */ if (((devvp->v_rdev&077) == 2) && (pmp->pm_HugeSectors == 1232)) { Phy_Sector_Size = 1024; /* 2HD */ /* * 1024byte/sector support */ devvp->v_flag |= 0x10000; } else { if ((((devvp->v_rdev&077) == 3) /* 2DD 8 or 9 sector */ && (pmp->pm_HugeSectors == 1440)) /* 9 sector */ || (((devvp->v_rdev&077) == 4) && (pmp->pm_HugeSectors == 1280)) /* 8 sector */ || (((devvp->v_rdev&077) == 5) && (pmp->pm_HugeSectors == 2880))) { /* 1.44M */ Phy_Sector_Size = 512; } else { if (((devvp->v_rdev&077) != 1) && ((devvp->v_rdev&077) != 0)) { /* 2HC */ error = EINVAL; goto error_exit; } } } } pc98_wrk = pmp->pm_BytesPerSec / Phy_Sector_Size; pmp->pm_BytesPerSec = Phy_Sector_Size; SecPerClust = SecPerClust * pc98_wrk; pmp->pm_HugeSectors = pmp->pm_HugeSectors * pc98_wrk; pmp->pm_ResSectors = pmp->pm_ResSectors * pc98_wrk; pmp->pm_FATsecs = pmp->pm_FATsecs * pc98_wrk; pmp->pm_SecPerTrack = pmp->pm_SecPerTrack * pc98_wrk; pmp->pm_HiddenSects = pmp->pm_HiddenSects * pc98_wrk; #endif /* */ if (pmp->pm_HugeSectors > 0xffffffff / pmp->pm_BytesPerSec + 1) { /* * We cannot deal currently with this size of disk * due to fileid limitations (see msdosfs_getattr and * msdosfs_readdir) */ error = EINVAL; goto error_exit; } if (pmp->pm_RootDirEnts == 0) { if (bsp->bs710.bsBootSectSig2 != BOOTSIG2 || bsp->bs710.bsBootSectSig3 != BOOTSIG3 || pmp->pm_Sectors || pmp->pm_FATsecs || getushort(b710->bpbFSVers)) { error = EINVAL; goto error_exit; } pmp->pm_fatmask = FAT32_MASK; pmp->pm_fatmult = 4; pmp->pm_fatdiv = 1; pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs); if (getushort(b710->bpbExtFlags) & FATMIRROR) pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM; else pmp->pm_flags |= MSDOSFS_FATMIRROR; } else pmp->pm_flags |= MSDOSFS_FATMIRROR; #ifndef __FreeBSD__ if (argp->flags & MSDOSFSMNT_GEMDOSFS) { if (FAT32(pmp)) { /* * GEMDOS doesn't know fat32. */ error = EINVAL; goto error_exit; } /* * Check a few values (could do some more): * - logical sector size: power of 2, >= block size * - sectors per cluster: power of 2, >= 1 * - number of sectors: >= 1, <= size of partition */ if ( (SecPerClust == 0) || (SecPerClust & (SecPerClust - 1)) || (pmp->pm_BytesPerSec < bsize) || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1)) || (pmp->pm_HugeSectors == 0) || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / bsize) > dpart.part->p_size) ) { error = EINVAL; goto error_exit; } /* * XXX - Many parts of the msdos fs driver seem to assume that * the number of bytes per logical sector (BytesPerSec) will * always be the same as the number of bytes per disk block * Let's pretend it is. */ tmp = pmp->pm_BytesPerSec / bsize; pmp->pm_BytesPerSec = bsize; pmp->pm_HugeSectors *= tmp; pmp->pm_HiddenSects *= tmp; pmp->pm_ResSectors *= tmp; pmp->pm_Sectors *= tmp; pmp->pm_FATsecs *= tmp; SecPerClust *= tmp; } #endif pmp->pm_fatblk = pmp->pm_ResSectors; if (FAT32(pmp)) { pmp->pm_rootdirblk = getulong(b710->bpbRootClust); pmp->pm_firstcluster = pmp->pm_fatblk + (pmp->pm_FATs * pmp->pm_FATsecs); pmp->pm_fsinfo = getushort(b710->bpbFSInfo); } else { pmp->pm_rootdirblk = pmp->pm_fatblk + (pmp->pm_FATs * pmp->pm_FATsecs); pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry) + pmp->pm_BytesPerSec - 1) / pmp->pm_BytesPerSec;/* in sectors */ pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize; } pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) / SecPerClust; pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1; pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec; #ifndef __FreeBSD__ if (argp->flags & MSDOSFSMNT_GEMDOSFS) { if ((pmp->pm_nmbrofclusters <= (0xff0 - 2)) && ((dtype == DTYPE_FLOPPY) || ((dtype == DTYPE_VNODE) && ((pmp->pm_Heads == 1) || (pmp->pm_Heads == 2)))) ) { pmp->pm_fatmask = FAT12_MASK; pmp->pm_fatmult = 3; pmp->pm_fatdiv = 2; } else { pmp->pm_fatmask = FAT16_MASK; pmp->pm_fatmult = 2; pmp->pm_fatdiv = 1; } } else #endif if (pmp->pm_fatmask == 0) { if (pmp->pm_maxcluster <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) { /* * This will usually be a floppy disk. This size makes * sure that one fat entry will not be split across * multiple blocks. */ pmp->pm_fatmask = FAT12_MASK; pmp->pm_fatmult = 3; pmp->pm_fatdiv = 2; } else { pmp->pm_fatmask = FAT16_MASK; pmp->pm_fatmult = 2; pmp->pm_fatdiv = 1; } } if (FAT12(pmp)) pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec; else pmp->pm_fatblocksize = MAXBSIZE; pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec; pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1; /* * Compute mask and shift value for isolating cluster relative byte * offsets and cluster numbers from a file offset. */ pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec; pmp->pm_crbomask = pmp->pm_bpcluster - 1; pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1; /* * Check for valid cluster size * must be a power of 2 */ if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) { error = EINVAL; goto error_exit; } /* * Release the bootsector buffer. */ brelse(bp); bp = NULL; /* * Check FSInfo. */ if (pmp->pm_fsinfo) { struct fsinfo *fp; if ((error = bread(devvp, pmp->pm_fsinfo, 1024, NOCRED, &bp)) != 0) goto error_exit; fp = (struct fsinfo *)bp->b_data; if (!bcmp(fp->fsisig1, "RRaA", 4) && !bcmp(fp->fsisig2, "rrAa", 4) && !bcmp(fp->fsisig3, "\0\0\125\252", 4) && !bcmp(fp->fsisig4, "\0\0\125\252", 4)) pmp->pm_nxtfree = getulong(fp->fsinxtfree); else pmp->pm_fsinfo = 0; brelse(bp); bp = NULL; } /* * Check and validate (or perhaps invalidate?) the fsinfo structure? XXX */ /* * Allocate memory for the bitmap of allocated clusters, and then * fill it in. */ pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS - 1) / N_INUSEBITS) * sizeof(*pmp->pm_inusemap), M_MSDOSFSFAT, M_WAITOK); /* * fillinusemap() needs pm_devvp. */ pmp->pm_dev = dev; pmp->pm_devvp = devvp; /* * Have the inuse map filled in. */ if ((error = fillinusemap(pmp)) != 0) goto error_exit; /* * If they want fat updates to be synchronous then let them suffer * the performance degradation in exchange for the on disk copy of * the fat being correct just about all the time. I suppose this * would be a good thing to turn on if the kernel is still flakey. */ if (mp->mnt_flag & MNT_SYNCHRONOUS) pmp->pm_flags |= MSDOSFSMNT_WAITONFAT; /* * Finish up. */ if (ronly) pmp->pm_flags |= MSDOSFSMNT_RONLY; else pmp->pm_fmod = 1; mp->mnt_data = (qaddr_t) pmp; mp->mnt_stat.f_fsid.val[0] = (long)dev; mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_flag |= MNT_LOCAL; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; return 0; error_exit: if (bp) brelse(bp); (void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, p); if (pmp) { if (pmp->pm_inusemap) free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = (qaddr_t)0; } return (error); } static int msdosfs_start(mp, flags, p) struct mount *mp; int flags; struct proc *p; { return (0); } /* * Unmount the filesystem described by mp. */ static int msdosfs_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { struct msdosfsmount *pmp; int error, flags; flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; error = vflush(mp, NULLVP, flags); if (error) return error; pmp = VFSTOMSDOSFS(mp); - pmp->pm_devvp->v_specflags &= ~SI_MOUNTEDON; + pmp->pm_devvp->v_specmountpoint = NULL; #ifdef MSDOSFS_DEBUG { struct vnode *vp = pmp->pm_devvp; printf("msdosfs_umount(): just before calling VOP_CLOSE()\n"); printf("flag %08lx, usecount %d, writecount %d, holdcnt %ld\n", vp->v_flag, vp->v_usecount, vp->v_writecount, vp->v_holdcnt); printf("lastr %d, id %lu, mount %p, op %p\n", vp->v_lastr, vp->v_id, vp->v_mount, vp->v_op); printf("freef %p, freeb %p, mount %p\n", vp->v_freelist.tqe_next, vp->v_freelist.tqe_prev, vp->v_mount); printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n", vp->v_cleanblkhd.lh_first, vp->v_dirtyblkhd.lh_first, vp->v_numoutput, vp->v_type); printf("union %p, tag %d, data[0] %08x, data[1] %08x\n", vp->v_socket, vp->v_tag, ((u_int *)vp->v_data)[0], ((u_int *)vp->v_data)[1]); } #endif - error = VOP_CLOSE(pmp->pm_devvp, (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE, - NOCRED, p); + error = VOP_CLOSE(pmp->pm_devvp, + (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE, + NOCRED, p); vrele(pmp->pm_devvp); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } static int msdosfs_root(mp, vpp) struct mount *mp; struct vnode **vpp; { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); struct denode *ndep; int error; #ifdef MSDOSFS_DEBUG printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp); #endif error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep); if (error) return (error); *vpp = DETOV(ndep); return (0); } static int msdosfs_quotactl(mp, cmds, uid, arg, p) struct mount *mp; int cmds; uid_t uid; caddr_t arg; struct proc *p; { return EOPNOTSUPP; } static int msdosfs_statfs(mp, sbp, p) struct mount *mp; struct statfs *sbp; struct proc *p; { struct msdosfsmount *pmp; pmp = VFSTOMSDOSFS(mp); sbp->f_bsize = pmp->pm_bpcluster; sbp->f_iosize = pmp->pm_bpcluster; sbp->f_blocks = pmp->pm_nmbrofclusters; sbp->f_bfree = pmp->pm_freeclustercount; sbp->f_bavail = pmp->pm_freeclustercount; sbp->f_files = pmp->pm_RootDirEnts; /* XXX */ sbp->f_ffree = 0; /* what to put in here? */ if (sbp != &mp->mnt_stat) { sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } static int msdosfs_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { struct vnode *vp, *nvp; struct denode *dep; struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error, allerror = 0; /* * If we ever switch to not updating all of the fats all the time, * this would be the place to update them from the first one. */ if (pmp->pm_fmod != 0) if (pmp->pm_flags & MSDOSFSMNT_RONLY) panic("msdosfs_sync: rofs mod"); else { /* update fats here */ } /* * Write back each (modified) denode. */ simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { /* * If the vnode that we are about to sync is no longer * assoicated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; simple_lock(&vp->v_interlock); nvp = vp->v_mntvnodes.le_next; dep = VTODE(vp); - if (vp->v_type == VNON || ((dep->de_flag & - (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) - && vp->v_dirtyblkhd.lh_first == NULL) { + if (vp->v_type == VNON + || (waitfor == MNT_LAZY) /* can this happen with msdosfs? */ + || (((dep->de_flag & + (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) + && (vp->v_dirtyblkhd.lh_first == NULL))) { simple_unlock(&vp->v_interlock); continue; } simple_unlock(&mntvnode_slock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); if (error) { simple_lock(&mntvnode_slock); if (error == ENOENT) goto loop; continue; } error = VOP_FSYNC(vp, cred, waitfor, p); if (error) allerror = error; VOP_UNLOCK(vp, 0, p); vrele(vp); /* done with this one */ simple_lock(&mntvnode_slock); } simple_unlock(&mntvnode_slock); /* * Flush filesystem control info. */ error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor, p); if (error) allerror = error; return (allerror); } static int msdosfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) struct mount *mp; struct fid *fhp; struct sockaddr *nam; struct vnode **vpp; int *exflagsp; struct ucred **credanonp; { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); struct defid *defhp = (struct defid *) fhp; struct denode *dep; struct netcred *np; int error; np = vfs_export_lookup(mp, &pmp->pm_export, nam); if (np == NULL) return (EACCES); error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep); if (error) { *vpp = NULLVP; return (error); } *vpp = DETOV(dep); *exflagsp = np->netc_exflags; *credanonp = &np->netc_anon; return (0); } static int msdosfs_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { struct denode *dep; struct defid *defhp; dep = VTODE(vp); defhp = (struct defid *)fhp; defhp->defid_len = sizeof(struct defid); defhp->defid_dirclust = dep->de_dirclust; defhp->defid_dirofs = dep->de_diroffset; /* defhp->defid_gen = dep->de_gen; */ return (0); } static int msdosfs_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { return EOPNOTSUPP; } static struct vfsops msdosfs_vfsops = { msdosfs_mount, msdosfs_start, msdosfs_unmount, msdosfs_root, msdosfs_quotactl, msdosfs_statfs, msdosfs_sync, msdosfs_vget, vfs_vrele, msdosfs_fhtovp, msdosfs_vptofh, msdosfs_init }; VFS_SET(msdosfs_vfsops, msdos, MOUNT_MSDOS, 0); Index: head/sys/nfs/nfs_bio.c =================================================================== --- head/sys/nfs/nfs_bio.c (revision 34265) +++ head/sys/nfs/nfs_bio.c (revision 34266) @@ -1,1234 +1,1238 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 - * $Id: nfs_bio.c,v 1.51 1998/03/06 09:46:43 msmith Exp $ + * $Id: nfs_bio.c,v 1.52 1998/03/07 21:36:01 dyson Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size, struct proc *p)); static void nfs_prot_buf __P((struct buf *bp, int off, int n)); extern int nfs_numasync; extern struct nfsstats nfsstats; /* * Vnode op for VM getpages. */ int nfs_getpages(ap) struct vop_getpages_args *ap; { int i, error, nextoff, size, toff, npages; struct uio uio; struct iovec iov; vm_page_t m; vm_offset_t kva; struct buf *bp; if ((ap->a_vp->v_object) == NULL) { printf("nfs_getpages: called with non-merged cache vnode??\n"); return EOPNOTSUPP; } /* * We use only the kva address for the buffer, but this is extremely * convienient and fast. */ bp = getpbuf(); npages = btoc(ap->a_count); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, ap->a_m, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = ap->a_count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(ap->a_m[0]->pindex); uio.uio_resid = ap->a_count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_procp = curproc; error = nfs_readrpc(ap->a_vp, &uio, curproc->p_ucred); pmap_qremove(kva, npages); relpbuf(bp); if (error && (uio.uio_resid == ap->a_count)) return VM_PAGER_ERROR; size = ap->a_count - uio.uio_resid; for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; m = ap->a_m[i]; m->flags &= ~PG_ZERO; if (nextoff <= size) { m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else { int nvalid = ((size + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); vm_page_set_validclean(m, 0, nvalid); } if (i != ap->a_reqpage) { /* * Whether or not to leave the page activated is up in * the air, but we should put the page on a page queue * somewhere (it already is in the object). Result: * It appears that emperical results show that * deactivating pages is best. */ /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error) { if (m->flags & PG_WANTED) vm_page_activate(m); else vm_page_deactivate(m); PAGE_WAKEUP(m); } else { vnode_pager_freepage(m); } } } return 0; } /* * Vnode op for VM putpages. */ int nfs_putpages(ap) struct vop_putpages_args *ap; { struct uio uio; struct iovec iov; vm_page_t m; vm_offset_t kva; struct buf *bp; int iomode, must_commit, i, error, npages; int *rtvals; rtvals = ap->a_rtvals; npages = btoc(ap->a_count); for (i = 0; i < npages; i++) { rtvals[i] = VM_PAGER_AGAIN; } /* * We use only the kva address for the buffer, but this is extremely * convienient and fast. */ bp = getpbuf(); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, ap->a_m, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = ap->a_count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(ap->a_m[0]->pindex); uio.uio_resid = ap->a_count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_procp = curproc; if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0) iomode = NFSV3WRITE_UNSTABLE; else iomode = NFSV3WRITE_FILESYNC; error = nfs_writerpc(ap->a_vp, &uio, curproc->p_ucred, &iomode, &must_commit); pmap_qremove(kva, npages); relpbuf(bp); if (!error) { int nwritten = round_page(ap->a_count - uio.uio_resid) / PAGE_SIZE; for (i = 0; i < nwritten; i++) { rtvals[i] = VM_PAGER_OK; ap->a_m[i]->dirty = 0; } if (must_commit) nfs_clearcommit(ap->a_vp->v_mount); } return ap->a_rtvals[0]; } /* * Vnode op for read using bio * Any similarity to readip() is purely coincidental */ int nfs_bioread(vp, uio, ioflag, cred, getpages) register struct vnode *vp; register struct uio *uio; int ioflag; struct ucred *cred; int getpages; { register struct nfsnode *np = VTONFS(vp); register int biosize, diff, i; struct buf *bp = 0, *rabp; struct vattr vattr; struct proc *p; struct nfsmount *nmp = VFSTONFS(vp->v_mount); daddr_t lbn, rabn; int bufsize; int nra, error = 0, n = 0, on = 0, not_readin; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("nfs_read mode"); #endif if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); p = uio->uio_procp; if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) (void)nfs_fsinfo(nmp, vp, cred, p); biosize = vp->v_mount->mnt_stat.f_iosize; /* * For nfs, cache consistency can only be maintained approximately. * Although RFC1094 does not specify the criteria, the following is * believed to be compatible with the reference port. * For nqnfs, full cache consistency is maintained within the loop. * For nfs: * If the file's modify time on the server has changed since the * last read rpc or you have written to the file, * you may have lost data cache consistency with the * server, so flush all of the file's data out of the cache. * Then force a getattr rpc to ensure that you have up to date * attributes. * NB: This implies that cache data can be read when up to * NFS_ATTRTIMEO seconds out of date. If you find that you need current * attributes this could be forced by setting n_attrstamp to 0 before * the VOP_GETATTR() call. */ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) { if (np->n_flag & NMODIFIED) { if (vp->v_type != VREG) { if (vp->v_type != VDIR) panic("nfs: bioread, not dir"); nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); } np->n_attrstamp = 0; error = VOP_GETATTR(vp, &vattr, cred, p); if (error) return (error); np->n_mtime = vattr.va_mtime.tv_sec; } else { error = VOP_GETATTR(vp, &vattr, cred, p); if (error) return (error); if (np->n_mtime != vattr.va_mtime.tv_sec) { if (vp->v_type == VDIR) nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); np->n_mtime = vattr.va_mtime.tv_sec; } } } do { /* * Get a valid lease. If cached data is stale, flush it. */ if (nmp->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKINVALID(vp, np, ND_READ)) { do { error = nqnfs_getlease(vp, ND_READ, cred, p); } while (error == NQNFS_EXPIRED); if (error) return (error); if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE) || ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { if (vp->v_type == VDIR) nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); np->n_brev = np->n_lrev; } } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); } } if (np->n_flag & NQNFSNONCACHE) { switch (vp->v_type) { case VREG: return (nfs_readrpc(vp, uio, cred)); case VLNK: return (nfs_readlinkrpc(vp, uio, cred)); case VDIR: break; default: printf(" NQNFSNONCACHE: type %x unexpected\n", vp->v_type); }; } switch (vp->v_type) { case VREG: nfsstats.biocache_reads++; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); not_readin = 1; /* * Start the read ahead(s), as required. */ if (nfs_numasync > 0 && nmp->nm_readahead > 0) { for (nra = 0; nra < nmp->nm_readahead && (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) { rabn = lbn + 1 + nra; if (!incore(vp, rabn)) { rabp = nfs_getcacheblk(vp, rabn, biosize, p); if (!rabp) return (EINTR); if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { rabp->b_flags |= B_INVAL|B_ERROR; vfs_unbusy_pages(rabp); brelse(rabp); } } else brelse(rabp); } } } /* * If the block is in the cache and has the required data * in a valid region, just copy it out. * Otherwise, get the block and write back/read in, * as required. */ again: bufsize = biosize; if ((off_t)(lbn + 1) * biosize > np->n_size && (off_t)(lbn + 1) * biosize - np->n_size < biosize) { bufsize = np->n_size - lbn * biosize; bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); } bp = nfs_getcacheblk(vp, lbn, bufsize, p); if (!bp) return (EINTR); /* * If we are being called from nfs_getpages, we must * make sure the buffer is a vmio buffer. The vp will * already be setup for vmio but there may be some old * non-vmio buffers attached to it. */ if (getpages && !(bp->b_flags & B_VMIO)) { #ifdef DIAGNOSTIC printf("nfs_bioread: non vmio buf found, discarding\n"); #endif bp->b_flags |= B_NOCACHE; bp->b_flags |= B_INVAFTERWRITE; if (bp->b_dirtyend > 0) { if ((bp->b_flags & B_DELWRI) == 0) panic("nfsbioread"); if (VOP_BWRITE(bp) == EINTR) return (EINTR); } else brelse(bp); goto again; } if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); not_readin = 0; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { brelse(bp); return (error); } } if (bufsize > on) { n = min((unsigned)(bufsize - on), uio->uio_resid); } else { n = 0; } diff = np->n_size - uio->uio_offset; if (diff < n) n = diff; if (not_readin && n > 0) { if (on < bp->b_validoff || (on + n) > bp->b_validend) { bp->b_flags |= B_NOCACHE; bp->b_flags |= B_INVAFTERWRITE; if (bp->b_dirtyend > 0) { if ((bp->b_flags & B_DELWRI) == 0) panic("nfsbioread"); if (VOP_BWRITE(bp) == EINTR) return (EINTR); } else brelse(bp); goto again; } } vp->v_lastr = lbn; diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); if (diff < n) n = diff; break; case VLNK: nfsstats.biocache_readlinks++; bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); if (!bp) return (EINTR); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags |= B_READ; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { bp->b_flags |= B_ERROR; brelse(bp); return (error); } } n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); on = 0; break; case VDIR: nfsstats.biocache_readdirs++; if (np->n_direofoffset && uio->uio_offset >= np->n_direofoffset) { return (0); } lbn = uio->uio_offset / NFS_DIRBLKSIZ; on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); if (!bp) return (EINTR); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags |= B_READ; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { brelse(bp); } while (error == NFSERR_BAD_COOKIE) { nfs_invaldir(vp); error = nfs_vinvalbuf(vp, 0, cred, p, 1); /* * Yuck! The directory has been modified on the * server. The only way to get the block is by * reading from the beginning to get all the * offset cookies. */ for (i = 0; i <= lbn && !error; i++) { if (np->n_direofoffset && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) return (0); bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p); if (!bp) return (EINTR); if ((bp->b_flags & B_DONE) == 0) { bp->b_flags |= B_READ; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { brelse(bp); } else if (i < lbn) { brelse(bp); } } } } if (error) return (error); } /* * If not eof and read aheads are enabled, start one. * (You need the current block first, so that you have the * directory offset cookie of the next block.) */ if (nfs_numasync > 0 && nmp->nm_readahead > 0 && (np->n_direofoffset == 0 || (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && !(np->n_flag & NQNFSNONCACHE) && !incore(vp, lbn + 1)) { rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p); if (rabp) { if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { rabp->b_flags |= B_INVAL|B_ERROR; vfs_unbusy_pages(rabp); brelse(rabp); } } else { brelse(rabp); } } } /* * Make sure we use a signed variant of min() since * the second term may be negative. */ n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); break; default: printf(" nfs_bioread: type %x unexpected\n",vp->v_type); break; }; if (n > 0) { error = uiomove(bp->b_data + on, (int)n, uio); } switch (vp->v_type) { case VREG: break; case VLNK: n = 0; break; case VDIR: if (np->n_flag & NQNFSNONCACHE) bp->b_flags |= B_INVAL; break; default: printf(" nfs_bioread: type %x unexpected\n",vp->v_type); } brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n > 0); return (error); } static void nfs_prot_buf(bp, off, n) struct buf *bp; int off; int n; { int pindex, boff, end; if ((bp->b_flags & B_VMIO) == 0) return; end = round_page(off + n); for (boff = trunc_page(off); boff < end; boff += PAGE_SIZE) { pindex = boff >> PAGE_SHIFT; vm_page_protect(bp->b_pages[pindex], VM_PROT_NONE); } } /* * Vnode op for write using bio */ int nfs_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register int biosize; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; register struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); register struct ucred *cred = ap->a_cred; int ioflag = ap->a_ioflag; struct buf *bp; struct vattr vattr; struct nfsmount *nmp = VFSTONFS(vp->v_mount); daddr_t lbn; int bufsize; int n, on, error = 0, iomode, must_commit; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("nfs_write mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("nfs_write proc"); #endif if (vp->v_type != VREG) return (EIO); if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; return (np->n_error); } if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) (void)nfs_fsinfo(nmp, vp, cred, p); if (ioflag & (IO_APPEND | IO_SYNC)) { if (np->n_flag & NMODIFIED) { np->n_attrstamp = 0; error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); } if (ioflag & IO_APPEND) { np->n_attrstamp = 0; error = VOP_GETATTR(vp, &vattr, cred, p); if (error) return (error); uio->uio_offset = np->n_size; } } if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, i don't think it matters */ if (p && uio->uio_offset + uio->uio_resid > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { psignal(p, SIGXFSZ); return (EFBIG); } /* * I use nm_rsize, not nm_wsize so that all buffer cache blocks * will be the same size within a filesystem. nfs_writerpc will * still use nm_wsize when sizing the rpc's. */ biosize = vp->v_mount->mnt_stat.f_iosize; do { /* * Check for a valid write lease. */ if ((nmp->nm_flag & NFSMNT_NQNFS) && NQNFS_CKINVALID(vp, np, ND_WRITE)) { do { error = nqnfs_getlease(vp, ND_WRITE, cred, p); } while (error == NQNFS_EXPIRED); if (error) return (error); if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE)) { error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); np->n_brev = np->n_lrev; } } if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { iomode = NFSV3WRITE_FILESYNC; error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); if (must_commit) nfs_clearcommit(vp->v_mount); return (error); } nfsstats.biocache_writes++; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize-1); n = min((unsigned)(biosize - on), uio->uio_resid); again: if (uio->uio_offset + n > np->n_size) { np->n_size = uio->uio_offset + n; np->n_flag |= NMODIFIED; vnode_pager_setsize(vp, (u_long)np->n_size); } bufsize = biosize; if ((lbn + 1) * biosize > np->n_size) { bufsize = np->n_size - lbn * biosize; bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); } bp = nfs_getcacheblk(vp, lbn, bufsize, p); if (!bp) return (EINTR); if (bp->b_wcred == NOCRED) { crhold(cred); bp->b_wcred = cred; } np->n_flag |= NMODIFIED; if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) { bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); } /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. */ if (bp->b_dirtyend > 0 && (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { bp->b_proc = p; if (VOP_BWRITE(bp) == EINTR) return (EINTR); goto again; } /* * Check for valid write lease and get one as required. * In case getblk() and/or bwrite() delayed us. */ if ((nmp->nm_flag & NFSMNT_NQNFS) && NQNFS_CKINVALID(vp, np, ND_WRITE)) { do { error = nqnfs_getlease(vp, ND_WRITE, cred, p); } while (error == NQNFS_EXPIRED); if (error) { brelse(bp); return (error); } if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE)) { brelse(bp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); np->n_brev = np->n_lrev; goto again; } } error = uiomove((char *)bp->b_data + on, n, uio); if (error) { bp->b_flags |= B_ERROR; brelse(bp); return (error); } /* * This will keep the buffer and mmaped regions more coherent. */ nfs_prot_buf(bp, on, n); if (bp->b_dirtyend > 0) { bp->b_dirtyoff = min(on, bp->b_dirtyoff); bp->b_dirtyend = max((on + n), bp->b_dirtyend); } else { bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || bp->b_validoff > bp->b_dirtyend) { bp->b_validoff = bp->b_dirtyoff; bp->b_validend = bp->b_dirtyend; } else { bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); bp->b_validend = max(bp->b_validend, bp->b_dirtyend); } /* * Since this block is being modified, it must be written * again and not just committed. */ bp->b_flags &= ~B_NEEDCOMMIT; /* * If the lease is non-cachable or IO_SYNC do bwrite(). */ if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { bp->b_proc = p; if (ioflag & IO_INVAL) bp->b_flags |= B_INVAL; error = VOP_BWRITE(bp); if (error) return (error); if (np->n_flag & NQNFSNONCACHE) { error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); } } else if ((n + on) == biosize && (nmp->nm_flag & NFSMNT_NQNFS) == 0) { bp->b_proc = (struct proc *)0; bp->b_flags |= B_ASYNC; (void)nfs_writebp(bp, 0); } else bdwrite(bp); } while (uio->uio_resid > 0 && n > 0); return (0); } /* * Get an nfs cache block. * Allocate a new one if the block isn't currently in the cache * and return the block marked busy. If the calling process is * interrupted by a signal for an interruptible mount point, return * NULL. */ static struct buf * nfs_getcacheblk(vp, bn, size, p) struct vnode *vp; daddr_t bn; int size; struct proc *p; { register struct buf *bp; struct mount *mp; struct nfsmount *nmp; mp = vp->v_mount; nmp = VFSTONFS(mp); if (nmp->nm_flag & NFSMNT_INT) { bp = getblk(vp, bn, size, PCATCH, 0); while (bp == (struct buf *)0) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) return ((struct buf *)0); bp = getblk(vp, bn, size, 0, 2 * hz); } } else bp = getblk(vp, bn, size, 0, 0); if( vp->v_type == VREG) { int biosize; biosize = mp->mnt_stat.f_iosize; bp->b_blkno = (bn * biosize) / DEV_BSIZE; } return (bp); } /* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int nfs_vinvalbuf(vp, flags, cred, p, intrflg) struct vnode *vp; int flags; struct ucred *cred; struct proc *p; int intrflg; { register struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, slpflag, slptimeo; if (vp->v_flag & VXLOCK) { return (0); } if ((nmp->nm_flag & NFSMNT_INT) == 0) intrflg = 0; if (intrflg) { slpflag = PCATCH; slptimeo = 2 * hz; } else { slpflag = 0; slptimeo = 0; } /* * First wait for any other process doing a flush to complete. */ while (np->n_flag & NFLUSHINPROG) { np->n_flag |= NFLUSHWANT; error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", slptimeo); if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) return (EINTR); } /* * Now, flush as required. */ np->n_flag |= NFLUSHINPROG; error = vinvalbuf(vp, flags, cred, p, slpflag, 0); while (error) { if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { np->n_flag &= ~NFLUSHINPROG; if (np->n_flag & NFLUSHWANT) { np->n_flag &= ~NFLUSHWANT; wakeup((caddr_t)&np->n_flag); } return (EINTR); } error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); } np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); if (np->n_flag & NFLUSHWANT) { np->n_flag &= ~NFLUSHWANT; wakeup((caddr_t)&np->n_flag); } return (0); } /* * Initiate asynchronous I/O. Return an error if no nfsiods are available. * This is mainly to avoid queueing async I/O requests when the nfsiods * are all hung on a dead server. */ int nfs_asyncio(bp, cred) register struct buf *bp; struct ucred *cred; { struct nfsmount *nmp; int i; int gotiod; int slpflag = 0; int slptimeo = 0; int error; if (nfs_numasync == 0) return (EIO); nmp = VFSTONFS(bp->b_vp->v_mount); again: if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; gotiod = FALSE; /* * Find a free iod to process this request. */ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) if (nfs_iodwant[i]) { /* * Found one, so wake it up and tell it which * mount to process. */ NFS_DPF(ASYNCIO, ("nfs_asyncio: waking iod %d for mount %p\n", i, nmp)); nfs_iodwant[i] = (struct proc *)0; nfs_iodmount[i] = nmp; nmp->nm_bufqiods++; wakeup((caddr_t)&nfs_iodwant[i]); gotiod = TRUE; break; } /* * If none are free, we may already have an iod working on this mount * point. If so, it will process our request. */ if (!gotiod) { if (nmp->nm_bufqiods > 0) { NFS_DPF(ASYNCIO, ("nfs_asyncio: %d iods are already processing mount %p\n", nmp->nm_bufqiods, nmp)); gotiod = TRUE; } } /* * If we have an iod which can process the request, then queue * the buffer. */ if (gotiod) { /* * Ensure that the queue never grows too large. */ while (nmp->nm_bufqlen >= 2*nfs_numasync) { NFS_DPF(ASYNCIO, ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp)); nmp->nm_bufqwant = TRUE; error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, "nfsaio", slptimeo); if (error) { if (nfs_sigintr(nmp, NULL, bp->b_proc)) return (EINTR); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } /* * We might have lost our iod while sleeping, * so check and loop if nescessary. */ if (nmp->nm_bufqiods == 0) { NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp)); goto again; } } if (bp->b_flags & B_READ) { if (bp->b_rcred == NOCRED && cred != NOCRED) { crhold(cred); bp->b_rcred = cred; } } else { bp->b_flags |= B_WRITEINPROG; if (bp->b_wcred == NOCRED && cred != NOCRED) { crhold(cred); bp->b_wcred = cred; } } TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen++; return (0); } /* * All the iods are busy on other mounts, so return EIO to * force the caller to process the i/o synchronously. */ NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n")); return (EIO); } /* * Do an I/O operation to/from a cache block. This may be called * synchronously or from an nfsiod. */ int nfs_doio(bp, cr, p) register struct buf *bp; struct ucred *cr; struct proc *p; { register struct uio *uiop; register struct vnode *vp; struct nfsnode *np; struct nfsmount *nmp; int error = 0, diff, len, iomode, must_commit = 0; struct uio uio; struct iovec io; vp = bp->b_vp; np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = p; /* * Historically, paging was done with physio, but no more. */ if (bp->b_flags & B_PHYS) { /* * ...though reading /dev/drum still gets us here. */ io.iov_len = uiop->uio_resid = bp->b_bcount; /* mapping was done by vmapbuf() */ io.iov_base = bp->b_data; uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; if (bp->b_flags & B_READ) { uiop->uio_rw = UIO_READ; nfsstats.read_physios++; error = nfs_readrpc(vp, uiop, cr); } else { int com; iomode = NFSV3WRITE_DATASYNC; uiop->uio_rw = UIO_WRITE; nfsstats.write_physios++; error = nfs_writerpc(vp, uiop, cr, &iomode, &com); } if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; } } else if (bp->b_flags & B_READ) { io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; switch (vp->v_type) { case VREG: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; nfsstats.read_bios++; error = nfs_readrpc(vp, uiop, cr); if (!error) { bp->b_validoff = 0; if (uiop->uio_resid) { /* * If len > 0, there is a hole in the file and * no writes after the hole have been pushed to * the server yet. * Just zero fill the rest of the valid area. */ diff = bp->b_bcount - uiop->uio_resid; len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE + diff); if (len > 0) { len = min(len, uiop->uio_resid); bzero((char *)bp->b_data + diff, len); bp->b_validend = diff + len; } else bp->b_validend = diff; } else bp->b_validend = bp->b_bcount; } if (p && (vp->v_flag & VTEXT) && (((nmp->nm_flag & NFSMNT_NQNFS) && NQNFS_CKINVALID(vp, np, ND_READ) && np->n_lrev != np->n_brev) || (!(nmp->nm_flag & NFSMNT_NQNFS) && np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { uprintf("Process killed due to text file modification\n"); psignal(p, SIGKILL); p->p_flag |= P_NOSWAP; } break; case VLNK: uiop->uio_offset = (off_t)0; nfsstats.readlink_bios++; error = nfs_readlinkrpc(vp, uiop, cr); break; case VDIR: nfsstats.readdir_bios++; uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; if (nmp->nm_flag & NFSMNT_RDIRPLUS) { error = nfs_readdirplusrpc(vp, uiop, cr); if (error == NFSERR_NOTSUPP) nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) error = nfs_readdirrpc(vp, uiop, cr); break; default: printf("nfs_doio: type %x unexpected\n",vp->v_type); break; }; if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; } } else { if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size) bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; nfsstats.write_bios++; if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) iomode = NFSV3WRITE_UNSTABLE; else iomode = NFSV3WRITE_FILESYNC; bp->b_flags |= B_WRITEINPROG; error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); if (!error && iomode == NFSV3WRITE_UNSTABLE) { bp->b_flags |= B_NEEDCOMMIT; if (bp->b_dirtyoff == 0 && bp->b_dirtyend == bp->b_bufsize) bp->b_flags |= B_CLUSTEROK; } else bp->b_flags &= ~B_NEEDCOMMIT; bp->b_flags &= ~B_WRITEINPROG; /* * For an interrupted write, the buffer is still valid * and the write hasn't been pushed to the server yet, * so we can't set B_ERROR and report the interruption * by setting B_EINTR. For the B_ASYNC case, B_EINTR * is not relevant, so the rpc attempt is essentially * a noop. For the case of a V3 write rpc not being * committed to stable storage, the block is still * dirty and requires either a commit rpc or another * write rpc with iomode == NFSV3WRITE_FILESYNC before * the block is reused. This is indicated by setting * the B_DELWRI and B_NEEDCOMMIT flags. */ if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { + int s; + bp->b_flags &= ~(B_INVAL|B_NOCACHE); ++numdirtybuffers; bp->b_flags |= B_DELWRI; + s = splbio(); reassignbuf(bp, vp); + splx(s); if ((bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { if (error) { bp->b_flags |= B_ERROR; bp->b_error = np->n_error = error; np->n_flag |= NWRITEERR; } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; biodone(bp); return (0); } } bp->b_resid = uiop->uio_resid; if (must_commit) nfs_clearcommit(vp->v_mount); biodone(bp); return (error); } Index: head/sys/nfs/nfs_vnops.c =================================================================== --- head/sys/nfs/nfs_vnops.c (revision 34265) +++ head/sys/nfs/nfs_vnops.c (revision 34266) @@ -1,3296 +1,3303 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 - * $Id: nfs_vnops.c,v 1.79 1998/03/06 09:46:48 msmith Exp $ + * $Id: nfs_vnops.c,v 1.80 1998/03/07 21:36:06 dyson Exp $ */ /* * vnode op calls for Sun NFS version 2 and 3 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Defs */ #define TRUE 1 #define FALSE 0 /* * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these * calls are not in getblk() and brelse() so that they would not be necessary * here. */ #ifndef B_VMIO #define vfs_busy_pages(bp, f) #endif static int nfsspec_read __P((struct vop_read_args *)); static int nfsspec_write __P((struct vop_write_args *)); static int nfsfifo_read __P((struct vop_read_args *)); static int nfsfifo_write __P((struct vop_write_args *)); static int nfsspec_close __P((struct vop_close_args *)); static int nfsfifo_close __P((struct vop_close_args *)); #define nfs_poll vop_nopoll static int nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int)); static int nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *)); static int nfs_lookup __P((struct vop_lookup_args *)); static int nfs_create __P((struct vop_create_args *)); static int nfs_mknod __P((struct vop_mknod_args *)); static int nfs_open __P((struct vop_open_args *)); static int nfs_close __P((struct vop_close_args *)); static int nfs_access __P((struct vop_access_args *)); static int nfs_getattr __P((struct vop_getattr_args *)); static int nfs_setattr __P((struct vop_setattr_args *)); static int nfs_read __P((struct vop_read_args *)); static int nfs_mmap __P((struct vop_mmap_args *)); static int nfs_fsync __P((struct vop_fsync_args *)); static int nfs_remove __P((struct vop_remove_args *)); static int nfs_link __P((struct vop_link_args *)); static int nfs_rename __P((struct vop_rename_args *)); static int nfs_mkdir __P((struct vop_mkdir_args *)); static int nfs_rmdir __P((struct vop_rmdir_args *)); static int nfs_symlink __P((struct vop_symlink_args *)); static int nfs_readdir __P((struct vop_readdir_args *)); static int nfs_bmap __P((struct vop_bmap_args *)); static int nfs_strategy __P((struct vop_strategy_args *)); static int nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **)); static int nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *)); static int nfsspec_access __P((struct vop_access_args *)); static int nfs_readlink __P((struct vop_readlink_args *)); static int nfs_print __P((struct vop_print_args *)); static int nfs_advlock __P((struct vop_advlock_args *)); static int nfs_bwrite __P((struct vop_bwrite_args *)); /* * Global vfs data structures for nfs */ vop_t **nfsv2_vnodeop_p; static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_abortop_desc, (vop_t *) nfs_abortop }, { &vop_access_desc, (vop_t *) nfs_access }, { &vop_advlock_desc, (vop_t *) nfs_advlock }, { &vop_bmap_desc, (vop_t *) nfs_bmap }, { &vop_bwrite_desc, (vop_t *) nfs_bwrite }, { &vop_close_desc, (vop_t *) nfs_close }, { &vop_create_desc, (vop_t *) nfs_create }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_getpages_desc, (vop_t *) nfs_getpages }, { &vop_putpages_desc, (vop_t *) nfs_putpages }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lease_desc, (vop_t *) vop_null }, { &vop_link_desc, (vop_t *) nfs_link }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_lookup_desc, (vop_t *) nfs_lookup }, { &vop_mkdir_desc, (vop_t *) nfs_mkdir }, { &vop_mknod_desc, (vop_t *) nfs_mknod }, { &vop_mmap_desc, (vop_t *) nfs_mmap }, { &vop_open_desc, (vop_t *) nfs_open }, { &vop_poll_desc, (vop_t *) nfs_poll }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfs_read }, { &vop_readdir_desc, (vop_t *) nfs_readdir }, { &vop_readlink_desc, (vop_t *) nfs_readlink }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_remove_desc, (vop_t *) nfs_remove }, { &vop_rename_desc, (vop_t *) nfs_rename }, { &vop_rmdir_desc, (vop_t *) nfs_rmdir }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_strategy_desc, (vop_t *) nfs_strategy }, { &vop_symlink_desc, (vop_t *) nfs_symlink }, { &vop_write_desc, (vop_t *) nfs_write }, { NULL, NULL } }; static struct vnodeopv_desc nfsv2_vnodeop_opv_desc = { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries }; VNODEOP_SET(nfsv2_vnodeop_opv_desc); /* * Special device vnode ops */ vop_t **spec_nfsv2nodeop_p; static struct vnodeopv_entry_desc nfsv2_specop_entries[] = { { &vop_default_desc, (vop_t *) spec_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsspec_close }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfsspec_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_write_desc, (vop_t *) nfsspec_write }, { NULL, NULL } }; static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc = { &spec_nfsv2nodeop_p, nfsv2_specop_entries }; VNODEOP_SET(spec_nfsv2nodeop_opv_desc); vop_t **fifo_nfsv2nodeop_p; static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = { { &vop_default_desc, (vop_t *) fifo_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsfifo_close }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfsfifo_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_write_desc, (vop_t *) nfsfifo_write }, { NULL, NULL } }; static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc = { &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries }; VNODEOP_SET(fifo_nfsv2nodeop_opv_desc); static int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct proc *procp)); static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap)); static int nfs_removerpc __P((struct vnode *dvp, char *name, int namelen, struct ucred *cred, struct proc *proc)); static int nfs_renamerpc __P((struct vnode *fdvp, char *fnameptr, int fnamelen, struct vnode *tdvp, char *tnameptr, int tnamelen, struct ucred *cred, struct proc *proc)); static int nfs_renameit __P((struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp)); /* * Global variables */ extern u_long nfs_true, nfs_false; extern struct nfsstats nfsstats; extern nfstype nfsv3_type[9]; struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; int nfs_numasync = 0; #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register u_long *tl; register caddr_t cp; register int t1, t2; caddr_t bpos, dpos, cp2; int error = 0, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; u_long mode, rmode; int v3 = NFS_ISV3(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * For nfs v3, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about, but * this is better than just returning anything that is lying about * in the cache. */ if (v3) { nfsstats.rpccnt[NFSPROC_ACCESS]++; nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (ap->a_mode & VREAD) mode = NFSV3ACCESS_READ; else mode = 0; if (vp->v_type == VDIR) { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_LOOKUP; } else { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_EXECUTE; } *tl = txdr_unsigned(mode); nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred); nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_long, *tl); /* * The NFS V3 spec does not clarify whether or not * the returned access bits can be a superset of * the ones requested, so... */ if ((rmode & mode) != mode) error = EACCES; } nfsm_reqdone; return (error); } else { if (error = nfsspec_access(ap)) return (error); /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_procp = ap->a_p; if (vp->v_type == VREG) error = nfs_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = nfs_readdirrpc(vp, &auio, ap->a_cred); free(bp, M_TEMP); } else if (vp->v_type = VLNK) error = nfs_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } return (error); } } /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct vattr vattr; int error; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { printf("open eacces vtyp=%d\n",vp->v_type); return (EACCES); } /* * Get a valid lease. If cached data is stale, flush it. */ if (nmp->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKINVALID(vp, np, ND_READ)) { do { error = nqnfs_getlease(vp, ND_READ, ap->a_cred, ap->a_p); } while (error == NQNFS_EXPIRED); if (error) { return (error); } if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE)) { if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_brev = np->n_lrev; } } } else { if (np->n_flag & NMODIFIED) { if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_attrstamp = 0; if (vp->v_type == VDIR) np->n_direofoffset = 0; error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); if (error) { return (error); } np->n_mtime = vattr.va_mtime.tv_sec; } else { error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); if (error) { return (error); } if (np->n_mtime != vattr.va_mtime.tv_sec) { if (vp->v_type == VDIR) np->n_direofoffset = 0; if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_mtime = vattr.va_mtime.tv_sec; } } } if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) np->n_attrstamp = 0; /* For Open/Close consistency */ return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to nfs_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. * for NQNFS - do nothing now, since 2 is dealt with via leases and * 1 should be dealt with via an fsync() system call for * cases where write errors are important. */ /* ARGSUSED */ static int nfs_close(ap) struct vop_close_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); int error = 0; if (vp->v_type == VREG) { if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 && (np->n_flag & NMODIFIED)) { if (NFS_ISV3(vp)) { error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0); np->n_flag &= ~NMODIFIED; } else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); np->n_attrstamp = 0; } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } } return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register caddr_t cp; register u_long *tl; register int t1, t2; caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); /* * Update local times for special files. */ if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; /* * First look in the cache. */ if (nfs_getattrcache(vp, ap->a_vap) == 0) return (0); nfsstats.rpccnt[NFSPROC_GETATTR]++; nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred); if (!error) nfsm_loadattr(vp, ap->a_vap); nfsm_reqdone; return (error); } /* * nfs setattr call. */ static int nfs_setattr(ap) struct vop_setattr_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register struct vattr *vap = ap->a_vap; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (u_short)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (np->n_flag & NMODIFIED) { if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, ap->a_cred, ap->a_p, 1); else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); if (error) return (error); } tsize = np->n_size; np->n_size = np->n_vattr.va_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); }; } else if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG && (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) return (error); error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p); if (error && vap->va_size != VNOVAL) { np->n_size = np->n_vattr.va_size = tsize; vnode_pager_setsize(vp, (u_long)np->n_size); } return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(vp, vap, cred, procp) register struct vnode *vp; register struct vattr *vap; struct ucred *cred; struct proc *procp; { register struct nfsv2_sattr *sp; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; u_long *tl; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_SETATTR]++; nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); nfsm_fhtom(vp, v3); if (v3) { if (vap->va_mode != (u_short)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_mode); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_uid != (uid_t)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_uid); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_gid != (gid_t)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_gid); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_size != VNOVAL) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = nfs_true; txdr_hyper(&vap->va_size, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if (vap->va_atime.tv_sec != time.tv_sec) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if (vap->va_mtime.tv_sec != time.tv_sec) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (u_short)VNOVAL) sp->sa_mode = VNOVAL; else sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = VNOVAL; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = VNOVAL; else sp->sa_gid = txdr_unsigned(vap->va_gid); sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(vp, NFSPROC_SETATTR, procp, cred); if (v3) { nfsm_wcc_data(vp, wccflag); } else nfsm_loadattr(vp, (struct vattr *)0); nfsm_reqdone; return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { register struct componentname *cnp = ap->a_cnp; register struct vnode *dvp = ap->a_dvp; register struct vnode **vpp = ap->a_vpp; register int flags = cnp->cn_flags; register struct vnode *newvp; register u_long *tl; register caddr_t cp; register long t1, t2; struct nfsmount *nmp; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; long len; nfsfh_t *fhp; struct nfsnode *np; int lockparent, wantparent, error = 0, attrflag, fhsize; int v3 = NFS_ISV3(dvp); struct proc *p = cnp->cn_proc; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); *vpp = NULLVP; if (dvp->v_type != VDIR) return (ENOTDIR); lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { struct vattr vattr; int vpid; if (error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) { *vpp = NULLVP; return (error); } newvp = *vpp; vpid = newvp->v_id; /* * See the comment starting `Step through' in ufs/ufs_lookup.c * for an explanation of the locking protocol */ if (dvp == newvp) { VREF(newvp); error = 0; } else if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, p); error = vget(newvp, LK_EXCLUSIVE, p); if (!error && lockparent && (flags & ISLASTCN)) error = vn_lock(dvp, LK_EXCLUSIVE, p); } else { error = vget(newvp, LK_EXCLUSIVE, p); if (!lockparent || error || !(flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); } if (!error) { if (vpid == newvp->v_id) { if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p) && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { nfsstats.lookupcache_hits++; if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); } vput(newvp); if (lockparent && dvp != newvp && (flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); } error = vn_lock(dvp, LK_EXCLUSIVE, p); *vpp = NULLVP; if (error) return (error); } error = 0; newvp = NULLVP; nfsstats.lookupcache_misses++; nfsstats.rpccnt[NFSPROC_LOOKUP]++; len = cnp->cn_namelen; nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred); if (error) { nfsm_postop_attr(dvp, attrflag); m_freem(mrep); goto nfsmout; } nfsm_getfh(fhp, fhsize, v3); /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) { if (NFS_CMPFH(np, fhp, fhsize)) { m_freem(mrep); return (EISDIR); } if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) { m_freem(mrep); return (error); } newvp = NFSTOV(np); if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, (struct vattr *)0); *vpp = newvp; m_freem(mrep); cnp->cn_flags |= SAVENAME; if (!lockparent) VOP_UNLOCK(dvp, 0, p); return (0); } if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, p); error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); if (error) { vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p); return (error); } newvp = NFSTOV(np); if (lockparent && (flags & ISLASTCN) && (error = vn_lock(dvp, LK_EXCLUSIVE, p))) { vput(newvp); return (error); } } else if (NFS_CMPFH(np, fhp, fhsize)) { VREF(dvp); newvp = dvp; } else { if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) { m_freem(mrep); return (error); } if (!lockparent || !(flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, (struct vattr *)0); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { np->n_ctime = np->n_vattr.va_ctime.tv_sec; cache_enter(dvp, newvp, cnp); } *vpp = newvp; nfsm_reqdone; if (error) { if (newvp != NULLVP) { vrele(newvp); *vpp = NULLVP; } if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN) && error == ENOENT) { if (!lockparent) VOP_UNLOCK(dvp, 0, p); if (dvp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else error = EJUSTRETURN; } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; } return (error); } /* * nfs read call. * Just call nfs_bioread() to do the work. */ static int nfs_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; if (vp->v_type != VREG) return (EPERM); return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0)); } /* * nfs readlink call */ static int nfs_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EPERM); return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred, 0)); } /* * Do a readlink rpc. * Called by nfs_doio() from below the buffer cache. */ int nfs_readlinkrpc(vp, uiop, cred) register struct vnode *vp; struct uio *uiop; struct ucred *cred; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, len, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_READLINK]++; nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred); if (v3) nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_strsiz(len, NFS_MAXPATHLEN); nfsm_mtouio(uiop, len); } nfsm_reqdone; return (error); } /* * nfs read rpc call * Ditto above */ int nfs_readrpc(vp, uiop, cred) register struct vnode *vp; struct uio *uiop; struct ucred *cred; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct nfsmount *nmp; int error = 0, len, retlen, tsiz, eof, attrflag; int v3 = NFS_ISV3(vp); #ifndef nolint eof = 0; #endif nmp = VFSTONFS(vp->v_mount); tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > 0xffffffff && !v3) return (EFBIG); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_READ]++; len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz; nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3); if (v3) { txdr_hyper(&uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } else { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else nfsm_loadattr(vp, (struct vattr *)0); nfsm_strsiz(retlen, nmp->nm_rsize); nfsm_mtouio(uiop, retlen); m_freem(mrep); tsiz -= retlen; if (v3) { if (eof || retlen == 0) tsiz = 0; } else if (retlen < len) tsiz = 0; } nfsmout: return (error); } /* * nfs write call */ int nfs_writerpc(vp, uiop, cred, iomode, must_commit) register struct vnode *vp; register struct uio *uiop; struct ucred *cred; int *iomode, *must_commit; { register u_long *tl; register caddr_t cp; register int t1, t2, backup; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfs: writerpc iovcnt > 1"); #endif *must_commit = 0; tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > 0xffffffff && !v3) return (EFBIG); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_WRITE]++; len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; nfsm_reqhead(vp, NFSPROC_WRITE, NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); txdr_hyper(&uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); } else { nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *++tl = txdr_unsigned(uiop->uio_offset); tl += 2; } *tl = txdr_unsigned(len); nfsm_uiotom(uiop, len); nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred); if (v3) { wccflag = NFSV3_WCCCHK; nfsm_wcc_data(vp, wccflag); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; break; } else if (rlen < len) { backup = len - rlen; uiop->uio_iov->iov_base -= backup; uiop->uio_iov->iov_len += backup; uiop->uio_offset -= backup; uiop->uio_resid += backup; len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest committment level * obtained by any of the RPCs. */ if (committed == NFSV3WRITE_FILESYNC) committed = commit; else if (committed == NFSV3WRITE_DATASYNC && commit == NFSV3WRITE_UNSTABLE) committed = commit; if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); nmp->nm_flag |= NFSMNT_HASWRITEVERF; } else if (bcmp((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { *must_commit = 1; bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); } } } else nfsm_loadattr(vp, (struct vattr *)0); if (wccflag) VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; m_freem(mrep); tsiz -= len; } nfsmout: if (vp->v_mount->mnt_flag & MNT_ASYNC) committed = NFSV3WRITE_FILESYNC; *iomode = committed; if (error) uiop->uio_resid = tsiz; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(dvp, vpp, cnp, vap) register struct vnode *dvp; register struct vnode **vpp; register struct componentname *cnp; register struct vattr *vap; { register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; struct vnode *newvp = (struct vnode *)0; struct nfsnode *np = (struct nfsnode *)0; struct vattr vattr; char *cp2; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; u_long rdev; int v3 = NFS_ISV3(dvp); if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = txdr_unsigned(vap->va_rdev); else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; else { VOP_ABORTOP(dvp, cnp); vput(dvp); return (EOPNOTSUPP); } if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); vput(dvp); return (error); } nfsstats.rpccnt[NFSPROC_MKNOD]++; nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3SRVSATTR); *tl++ = vtonfsv3_type(vap->va_type); sp3 = (struct nfsv3_sattr *)tl; nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); if (vap->va_type == VCHR || vap->va_type == VBLK) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(major(vap->va_rdev)); *tl = txdr_unsigned(minor(vap->va_rdev)); } } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = rdev; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; if (error) { if (newvp) vput(newvp); } else { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *vpp = newvp; } zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; vput(dvp); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *newvp; int error; error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap); if (!error) vput(newvp); return (error); } static u_long create_verf; /* * nfs file create call */ static int nfs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; struct nfsnode *np = (struct nfsnode *)0; struct vnode *newvp = (struct vnode *)0; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); vput(dvp); return (error); } if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; again: nfsstats.rpccnt[NFSPROC_CREATE]++; nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); nfsm_build(tl, u_long *, NFSX_V3CREATEVERF); #ifdef INET if (!TAILQ_EMPTY(&in_ifaddrhead)) *tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr; else #endif *tl++ = create_verf; *tl = ++create_verf; } else { *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); nfsm_build(tl, u_long *, NFSX_V3SRVSATTR); sp3 = (struct nfsv3_sattr *)tl; nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); } } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = 0; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; if (error) { if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } if (newvp) vput(newvp); } else if (v3 && (fmode & O_EXCL)) error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc); if (!error) { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; } zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; vput(dvp); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(ap) struct vop_remove_args /* { struct vnodeop_desc *a_desc; struct vnode * a_dvp; struct vnode * a_vp; struct componentname * a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; #ifndef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("nfs_remove: no name"); if (vp->v_usecount < 1) panic("nfs_remove: bad v_usecount"); #endif if (vp->v_usecount == 1 || (np->n_sillyrename && VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1); /* Do the rpc */ if (error != EINTR) error = nfs_removerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); zfree(namei_zone, cnp->cn_pnbuf); np->n_attrstamp = 0; vput(dvp); if (vp == dvp) vrele(vp); else vput(vp); return (error); } /* * nfs file remove rpc called from nfs_inactive */ int nfs_removeit(sp) register struct sillyrename *sp; { return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, (struct proc *)0)); } /* * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). */ static int nfs_removerpc(dvp, name, namelen, cred, proc) register struct vnode *dvp; char *name; int namelen; struct ucred *cred; struct proc *proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_REMOVE]++; nfsm_reqhead(dvp, NFSPROC_REMOVE, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_REMOVE, proc, cred); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); } /* * nfs file rename call */ static int nfs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { register struct vnode *fvp = ap->a_fvp; register struct vnode *tvp = ap->a_tvp; register struct vnode *fdvp = ap->a_fdvp; register struct vnode *tdvp = ap->a_tdvp; register struct componentname *tcnp = ap->a_tcnp; register struct componentname *fcnp = ap->a_fcnp; int error; #ifndef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("nfs_rename: no name"); #endif /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_proc); if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(sdvp, scnp, sp) struct vnode *sdvp; struct componentname *scnp; register struct sillyrename *sp; { return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc) register struct vnode *fdvp; char *fnameptr; int fnamelen; register struct vnode *tdvp; char *tnameptr; int tnamelen; struct ucred *cred; struct proc *proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(fdvp); nfsstats.rpccnt[NFSPROC_RENAME]++; nfsm_reqhead(fdvp, NFSPROC_RENAME, (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen)); nfsm_fhtom(fdvp, v3); nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); nfsm_fhtom(tdvp, v3); nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); nfsm_request(fdvp, NFSPROC_RENAME, proc, cred); if (v3) { nfsm_wcc_data(fdvp, fwccflag); nfsm_wcc_data(tdvp, twccflag); } nfsm_reqdone; VTONFS(fdvp)->n_flag |= NMODIFIED; VTONFS(tdvp)->n_flag |= NMODIFIED; if (!fwccflag) VTONFS(fdvp)->n_attrstamp = 0; if (!twccflag) VTONFS(tdvp)->n_attrstamp = 0; return (error); } /* * nfs hard link create call */ static int nfs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *tdvp = ap->a_tdvp; register struct componentname *cnp = ap->a_cnp; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); if (vp->v_mount != tdvp->v_mount) { VOP_ABORTOP(vp, cnp); if (tdvp == vp) vrele(tdvp); else vput(tdvp); return (EXDEV); } /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc); nfsstats.rpccnt[NFSPROC_LINK]++; nfsm_reqhead(vp, NFSPROC_LINK, NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); nfsm_fhtom(vp, v3); nfsm_fhtom(tdvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred); if (v3) { nfsm_postop_attr(vp, attrflag); nfsm_wcc_data(tdvp, wccflag); } nfsm_reqdone; zfree(namei_zone, cnp->cn_pnbuf); VTONFS(tdvp)->n_flag |= NMODIFIED; if (!attrflag) VTONFS(vp)->n_attrstamp = 0; if (!wccflag) VTONFS(tdvp)->n_attrstamp = 0; vput(tdvp); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vnode *newvp = (struct vnode *)0; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_SYMLINK]++; slen = strlen(ap->a_target); nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, cnp->cn_cred->cr_gid); } nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); if (!v3) { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid); sp->sa_size = -1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred); if (v3) { if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); nfsm_wcc_data(dvp, wccflag); } nfsm_reqdone; if (newvp) vput(newvp); zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; vput(dvp); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs make dir call */ static int nfs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; register int len; struct nfsnode *np = (struct nfsnode *)0; struct vnode *newvp = (struct vnode *)0; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; int gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); vput(dvp); return (error); } len = cnp->cn_namelen; nfsstats.rpccnt[NFSPROC_MKDIR]++; nfsm_reqhead(dvp, NFSPROC_MKDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); if (v3) { nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = -1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred); if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; /* * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry * if we can succeed in looking up the directory. */ if (error == EEXIST || (!error && !gotvp)) { if (newvp) { vrele(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, cnp->cn_proc, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vrele(newvp); } else *ap->a_vpp = newvp; zfree(namei_zone, cnp->cn_pnbuf); vput(dvp); return (error); } /* * nfs remove directory call */ static int nfs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_RMDIR]++; nfsm_reqhead(dvp, NFSPROC_RMDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; cache_purge(dvp); cache_purge(vp); vput(vp); vput(dvp); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register struct uio *uio = ap->a_uio; int tresid, error; struct vattr vattr; if (vp->v_type != VDIR) return (EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKCACHABLE(vp, ND_READ)) { nfsstats.direofcache_hits++; return (0); } } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 && np->n_mtime == vattr.va_mtime.tv_sec) { nfsstats.direofcache_hits++; return (0); } } /* * Call nfs_bioread() to do the real work. */ tresid = uio->uio_resid; error = nfs_bioread(vp, uio, 0, ap->a_cred, 0); if (!error && uio->uio_resid == tresid) nfsstats.direofcache_misses++; return (error); } /* * Readdir rpc call. * Called from below the buffer cache by nfs_doio(). */ int nfs_readdirrpc(vp, uiop, cred) struct vnode *vp; register struct uio *uiop; struct ucred *cred; { register int len, left; register struct dirent *dp; register u_long *tl; register caddr_t cp; register long t1, t2; register nfsuint64 *cookiep; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp); u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag; int v3 = NFS_ISV3(vp); #ifndef nolint dp = (struct dirent *)0; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (NFS_DIRBLKSIZ - 1)) || (uiop->uio_resid & (NFS_DIRBLKSIZ - 1))) panic("nfs readdirrpc bad uio"); #endif /* * If there is no cookie, assume directory was stale. */ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) cookie = *cookiep; else return (NFSERR_BAD_COOKIE); /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIR]++; nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + NFSX_READDIR(v3)); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; } else { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; } *tl = txdr_unsigned(nmp->nm_readdirsize); nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; } else { m_freem(mrep); goto nfsmout; } } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (v3) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); fxdr_hyper(tl, &fileno); len = fxdr_unsigned(int, *(tl + 2)); } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fileno = fxdr_unsigned(u_quad_t, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base += DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; /* null terminate */ uiop->uio_iov->iov_base += tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); if (v3) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); } if (bigenough) { cookie.nfsuquad[0] = *tl++; if (v3) cookie.nfsuquad[1] = *tl++; } else if (v3) tl += 2; else tl++; more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; } nfsmout: return (error); } /* * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). */ int nfs_readdirplusrpc(vp, uiop, cred) struct vnode *vp; register struct uio *uiop; struct ucred *cred; { register int len, left; register struct dirent *dp; register u_long *tl; register caddr_t cp; register long t1, t2; register struct vnode *newvp; register nfsuint64 *cookiep; caddr_t bpos, dpos, cp2, dpossav1, dpossav2; struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp), *np; nfsfh_t *fhp; u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; int attrflag, fhsize; #ifndef nolint dp = (struct dirent *)0; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || (uiop->uio_resid & (DIRBLKSIZ - 1))) panic("nfs readdirplusrpc bad uio"); #endif ndp->ni_dvp = vp; newvp = NULLVP; /* * If there is no cookie, assume directory was stale. */ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) cookie = *cookiep; else return (NFSERR_BAD_COOKIE); /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; nfsm_reqhead(vp, NFSPROC_READDIRPLUS, NFSX_FH(1) + 6 * NFSX_UNSIGNED); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 6 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_rsize); nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred); nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); fxdr_hyper(tl, &fileno); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination*/ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base += DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; cnp->cn_nameptr = uiop->uio_iov->iov_base; cnp->cn_namelen = len; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; uiop->uio_iov->iov_base += tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); if (bigenough) { cookie.nfsuquad[0] = *tl++; cookie.nfsuquad[1] = *tl++; } else tl += 2; /* * Since the attributes are before the file handle * (sigh), we must skip over the attributes and then * come back and get them. */ attrflag = fxdr_unsigned(int, *tl); if (attrflag) { dpossav1 = dpos; mdsav1 = md; nfsm_adv(NFSX_V3FATTR); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); doit = fxdr_unsigned(int, *tl); if (doit) { nfsm_getfh(fhp, fhsize, 1); if (NFS_CMPFH(dnp, fhp, fhsize)) { VREF(vp); newvp = vp; np = dnp; } else { if (error = nfs_nget(vp->v_mount, fhp, fhsize, &np)) doit = 0; else newvp = NFSTOV(np); } } if (doit) { dpossav2 = dpos; dpos = dpossav1; mdsav2 = md; md = mdsav1; nfsm_loadattr(newvp, (struct vattr *)0); dpos = dpossav2; md = mdsav2; dp->d_type = IFTODT(VTTOIF(np->n_vattr.va_type)); ndp->ni_vp = newvp; cnp->cn_hash = 0; for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++) cnp->cn_hash += (unsigned char)*cp * i; cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); } } else { /* Just skip over the file handle */ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); nfsm_adv(nfsm_rndup(i)); } if (newvp != NULLVP) { vrele(newvp); newvp = NULLVP; } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirplusrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; } nfsmout: if (newvp != NULLVP) { if (newvp == vp) vrele(newvp); else vput(newvp); newvp = NULLVP; } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(dvp, vp, cnp) struct vnode *dvp, *vp; struct componentname *cnp; { register struct sillyrename *sp; struct nfsnode *np; int error; short pid; cache_purge(dvp); np = VTONFS(vp); #ifndef DIAGNOSTIC if (vp->v_type == VDIR) panic("nfs: sillyrename dir"); #endif MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), M_NFSREQ, M_WAITOK); sp->s_cred = crdup(cnp->cn_cred); sp->s_dvp = dvp; VREF(dvp); /* Fudge together a funny name */ pid = cnp->cn_proc->p_pid; sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid); /* Try lookitups until we get one that isn't there */ while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_proc, (struct nfsnode **)0) == 0) { sp->s_name[4]++; if (sp->s_name[4] > 'z') { error = EINVAL; goto bad; } } if (error = nfs_renameit(dvp, cnp, sp)) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_proc, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(dvp, name, len, cred, procp, npp) register struct vnode *dvp; char *name; int len; struct ucred *cred; struct proc *procp; struct nfsnode **npp; { register u_long *tl; register caddr_t cp; register long t1, t2; struct vnode *newvp = (struct vnode *)0; struct nfsnode *np, *dnp = VTONFS(dvp); caddr_t bpos, dpos, cp2; int error = 0, fhlen, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsfh_t *nfhp; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_LOOKUP]++; nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred); if (npp && !error) { nfsm_getfh(nfhp, fhlen, v3); if (*npp) { np = *npp; if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { free((caddr_t)np->n_fhp, M_NFSBIGFH); np->n_fhp = &np->n_fh; } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK); bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); np->n_fhsize = fhlen; newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); if (!attrflag && *npp == NULL) { m_freem(mrep); if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } } else nfsm_loadattr(newvp, (struct vattr *)0); } nfsm_reqdone; if (npp && *npp == NULL) { if (error) { if (newvp) if (newvp == dvp) vrele(newvp); else vput(newvp); } else *npp = np; } return (error); } /* * Nfs Version 3 commit rpc */ static int nfs_commit(vp, offset, cnt, cred, procp) register struct vnode *vp; u_quad_t offset; int cnt; struct ucred *cred; struct proc *procp; { register caddr_t cp; register u_long *tl; register int t1, t2; register struct nfsmount *nmp = VFSTONFS(vp->v_mount); caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) return (0); nfsstats.rpccnt[NFSPROC_COMMIT]++; nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); txdr_hyper(&offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nfsm_request(vp, NFSPROC_COMMIT, procp, cred); nfsm_wcc_data(vp, wccflag); if (!error) { nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF); if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, NFSX_V3WRITEVERF)) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); error = NFSERR_STALEWRITEVERF; } } nfsm_reqdone; return (error); } /* * Kludge City.. * - make nfs_bmap() essentially a no-op that does no translation * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc * (Maybe I could use the process's page mapping, but I was concerned that * Kernel Write might not be enabled and also figured copyout() would do * a lot more work than bcopy() and also it currently happens in the * context of the swapper process (2). */ static int nfs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { register struct vnode *vp = ap->a_vp; if (ap->a_vpp != NULL) *ap->a_vpp = vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling nfs_asyncio(), otherwise just all nfs_doio() to do the * request. */ static int nfs_strategy(ap) struct vop_strategy_args *ap; { register struct buf *bp = ap->a_bp; struct ucred *cr; struct proc *p; int error = 0; if (bp->b_flags & B_PHYS) panic("nfs physio"); if (bp->b_flags & B_ASYNC) p = (struct proc *)0; else p = curproc; /* XXX */ if (bp->b_flags & B_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(bp, NOCRED)) error = nfs_doio(bp, cr, p); return (error); } /* * Mmap a file * * NB Currently unsupported. */ /* ARGSUSED */ static int nfs_mmap(ap) struct vop_mmap_args /* { struct vnode *a_vp; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { return (EINVAL); } /* * fsync vnode op. Just call nfs_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(ap) struct vop_fsync_args /* { struct vnodeop_desc *a_desc; struct vnode * a_vp; struct ucred * a_cred; int a_waitfor; struct proc * a_p; } */ *ap; { return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. */ static int nfs_flush(vp, cred, waitfor, p, commit) register struct vnode *vp; struct ucred *cred; int waitfor; struct proc *p; int commit; { register struct nfsnode *np = VTONFS(vp); register struct buf *bp; register int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but nas not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, nfs_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { s = splbio(); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if (bvecpos >= bvecsize) break; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) continue; bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; bp->b_flags |= (B_BUSY | B_WRITEINPROG); vfs_busy_pages(bp, 1); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } splx(s); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = nfs_commit(vp, off, (int)(endoff - off), wcred, p); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = nfs_commit(vp, off, (int)size, bp->b_wcred, p); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG); if (retv) { vfs_unbusy_pages(bp); brelse(bp); } else { vp->v_numoutput++; bp->b_flags |= B_ASYNC; if (bp->b_flags & B_DELWRI) { --numdirtybuffers; if (needsbuffer) { vfs_bio_need_satisfy(); } } + s = splbio(); /* XXX check this positionning */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); bp->b_dirtyoff = bp->b_dirtyend = 0; reassignbuf(bp, vp); + splx(s); biodone(bp); } } } /* * Start/do any write(s) that are required. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if (bp->b_flags & B_BUSY) { if (waitfor != MNT_WAIT || passone) continue; bp->b_flags |= B_WANTED; error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); splx(s); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) continue; bremfree(bp); if (passone || !commit) bp->b_flags |= (B_BUSY|B_ASYNC); else bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT); splx(s); VOP_BWRITE(bp); goto loop; } splx(s); if (passone) { passone = 0; goto again; } if (waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; error = tsleep((caddr_t)&vp->v_numoutput, slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } } if (vp->v_dirtyblkhd.lh_first && commit) { goto loop; } } if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); return (error); } /* * NFS advisory byte-level locks. * Currently unsupported. */ static int nfs_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); /* * The following kludge is to allow diskless support to work * until a real NFS lockd is implemented. Basically, just pretend * that this is a local lock. */ return (lf_advlock(ap, &(np->n_lockf), np->n_size)); } /* * Print out the contents of an nfsnode. */ static int nfs_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); printf("tag VT_NFS, fileid %ld fsid 0x%lx", np->n_vattr.va_fileid, np->n_vattr.va_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * Just call nfs_writebp() with the force argument set to 1. */ static int nfs_bwrite(ap) struct vop_bwrite_args /* { struct vnode *a_bp; } */ *ap; { return (nfs_writebp(ap->a_bp, 1)); } /* * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless * the force flag is one and it also handles the B_NEEDCOMMIT flag. */ int nfs_writebp(bp, force) register struct buf *bp; int force; { + int s; register int oldflags = bp->b_flags, retv = 1; off_t off; if(!(bp->b_flags & B_BUSY)) panic("bwrite: buffer is not busy???"); if (bp->b_flags & B_INVAL) bp->b_flags |= B_INVAL | B_NOCACHE; if (bp->b_flags & B_DELWRI) { --numdirtybuffers; if (needsbuffer) vfs_bio_need_satisfy(); } + s = splbio(); /* XXX check if needed */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { reassignbuf(bp, bp->b_vp); } bp->b_vp->v_numoutput++; curproc->p_stats->p_ru.ru_oublock++; + splx(s); /* * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not * an actual write will have to be scheduled via. VOP_STRATEGY(). * If B_WRITEINPROG is already set, then push it with a write anyhow. */ vfs_busy_pages(bp, 1); if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) { off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; bp->b_flags |= B_WRITEINPROG; retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff, bp->b_wcred, bp->b_proc); bp->b_flags &= ~B_WRITEINPROG; if (!retv) { bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_flags &= ~B_NEEDCOMMIT; biodone(bp); } else if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(bp->b_vp->v_mount); } if (retv) { if (force) bp->b_flags |= B_WRITEINPROG; VOP_STRATEGY(bp); } if( (oldflags & B_ASYNC) == 0) { int rtval = biowait(bp); if (oldflags & B_DELWRI) { + s = splbio(); reassignbuf(bp, bp->b_vp); + splx(s); } brelse(bp); return (rtval); } return (0); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vattr *vap; register gid_t *gp; register struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; mode_t mode = ap->a_mode; struct vattr vattr; register int i; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * If you're the super-user, * you always get access. */ if (cred->cr_uid == 0) return (0); vap = &vattr; error = VOP_GETATTR(vp, vap, cred, ap->a_p); if (error) return (error); /* * Access check is based on only one of owner, group, public. * If not owner, then check group. If not a member of the * group, then check public access. */ if (cred->cr_uid != vap->va_uid) { mode >>= 3; gp = cred->cr_groups; for (i = 0; i < cred->cr_ngroups; i++, gp++) if (vap->va_gid == *gp) goto found; mode >>= 3; found: ; } error = (vap->va_mode & mode) == mode ? 0 : EACCES; return (error); } /* * Read wrapper for special devices. */ static int nfsspec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set access flag. */ np->n_flag |= NACC; gettime(&tv); np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for special devices. */ static int nfsspec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set update flag. */ np->n_flag |= NUPD; gettime(&tv); np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for special devices. * * Update the times on the nfsnode then do device close. */ static int nfsspec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); struct vattr vattr; if (np->n_flag & (NACC | NUPD)) { np->n_flag |= NCHG; if (vp->v_usecount == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); } } return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); } /* * Read wrapper for fifos. */ static int nfsfifo_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set access flag. */ np->n_flag |= NACC; gettime(&tv); np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for fifos. */ static int nfsfifo_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set update flag. */ np->n_flag |= NUPD; gettime(&tv); np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); struct timeval tv; struct vattr vattr; if (np->n_flag & (NACC | NUPD)) { gettime(&tv); if (np->n_flag & NACC) { np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; } if (np->n_flag & NUPD) { np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; } np->n_flag |= NCHG; if (vp->v_usecount == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); } } return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); } Index: head/sys/nfsclient/nfs_bio.c =================================================================== --- head/sys/nfsclient/nfs_bio.c (revision 34265) +++ head/sys/nfsclient/nfs_bio.c (revision 34266) @@ -1,1234 +1,1238 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 - * $Id: nfs_bio.c,v 1.51 1998/03/06 09:46:43 msmith Exp $ + * $Id: nfs_bio.c,v 1.52 1998/03/07 21:36:01 dyson Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size, struct proc *p)); static void nfs_prot_buf __P((struct buf *bp, int off, int n)); extern int nfs_numasync; extern struct nfsstats nfsstats; /* * Vnode op for VM getpages. */ int nfs_getpages(ap) struct vop_getpages_args *ap; { int i, error, nextoff, size, toff, npages; struct uio uio; struct iovec iov; vm_page_t m; vm_offset_t kva; struct buf *bp; if ((ap->a_vp->v_object) == NULL) { printf("nfs_getpages: called with non-merged cache vnode??\n"); return EOPNOTSUPP; } /* * We use only the kva address for the buffer, but this is extremely * convienient and fast. */ bp = getpbuf(); npages = btoc(ap->a_count); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, ap->a_m, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = ap->a_count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(ap->a_m[0]->pindex); uio.uio_resid = ap->a_count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_procp = curproc; error = nfs_readrpc(ap->a_vp, &uio, curproc->p_ucred); pmap_qremove(kva, npages); relpbuf(bp); if (error && (uio.uio_resid == ap->a_count)) return VM_PAGER_ERROR; size = ap->a_count - uio.uio_resid; for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; m = ap->a_m[i]; m->flags &= ~PG_ZERO; if (nextoff <= size) { m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else { int nvalid = ((size + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); vm_page_set_validclean(m, 0, nvalid); } if (i != ap->a_reqpage) { /* * Whether or not to leave the page activated is up in * the air, but we should put the page on a page queue * somewhere (it already is in the object). Result: * It appears that emperical results show that * deactivating pages is best. */ /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error) { if (m->flags & PG_WANTED) vm_page_activate(m); else vm_page_deactivate(m); PAGE_WAKEUP(m); } else { vnode_pager_freepage(m); } } } return 0; } /* * Vnode op for VM putpages. */ int nfs_putpages(ap) struct vop_putpages_args *ap; { struct uio uio; struct iovec iov; vm_page_t m; vm_offset_t kva; struct buf *bp; int iomode, must_commit, i, error, npages; int *rtvals; rtvals = ap->a_rtvals; npages = btoc(ap->a_count); for (i = 0; i < npages; i++) { rtvals[i] = VM_PAGER_AGAIN; } /* * We use only the kva address for the buffer, but this is extremely * convienient and fast. */ bp = getpbuf(); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, ap->a_m, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = ap->a_count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(ap->a_m[0]->pindex); uio.uio_resid = ap->a_count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_procp = curproc; if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0) iomode = NFSV3WRITE_UNSTABLE; else iomode = NFSV3WRITE_FILESYNC; error = nfs_writerpc(ap->a_vp, &uio, curproc->p_ucred, &iomode, &must_commit); pmap_qremove(kva, npages); relpbuf(bp); if (!error) { int nwritten = round_page(ap->a_count - uio.uio_resid) / PAGE_SIZE; for (i = 0; i < nwritten; i++) { rtvals[i] = VM_PAGER_OK; ap->a_m[i]->dirty = 0; } if (must_commit) nfs_clearcommit(ap->a_vp->v_mount); } return ap->a_rtvals[0]; } /* * Vnode op for read using bio * Any similarity to readip() is purely coincidental */ int nfs_bioread(vp, uio, ioflag, cred, getpages) register struct vnode *vp; register struct uio *uio; int ioflag; struct ucred *cred; int getpages; { register struct nfsnode *np = VTONFS(vp); register int biosize, diff, i; struct buf *bp = 0, *rabp; struct vattr vattr; struct proc *p; struct nfsmount *nmp = VFSTONFS(vp->v_mount); daddr_t lbn, rabn; int bufsize; int nra, error = 0, n = 0, on = 0, not_readin; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("nfs_read mode"); #endif if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); p = uio->uio_procp; if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) (void)nfs_fsinfo(nmp, vp, cred, p); biosize = vp->v_mount->mnt_stat.f_iosize; /* * For nfs, cache consistency can only be maintained approximately. * Although RFC1094 does not specify the criteria, the following is * believed to be compatible with the reference port. * For nqnfs, full cache consistency is maintained within the loop. * For nfs: * If the file's modify time on the server has changed since the * last read rpc or you have written to the file, * you may have lost data cache consistency with the * server, so flush all of the file's data out of the cache. * Then force a getattr rpc to ensure that you have up to date * attributes. * NB: This implies that cache data can be read when up to * NFS_ATTRTIMEO seconds out of date. If you find that you need current * attributes this could be forced by setting n_attrstamp to 0 before * the VOP_GETATTR() call. */ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) { if (np->n_flag & NMODIFIED) { if (vp->v_type != VREG) { if (vp->v_type != VDIR) panic("nfs: bioread, not dir"); nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); } np->n_attrstamp = 0; error = VOP_GETATTR(vp, &vattr, cred, p); if (error) return (error); np->n_mtime = vattr.va_mtime.tv_sec; } else { error = VOP_GETATTR(vp, &vattr, cred, p); if (error) return (error); if (np->n_mtime != vattr.va_mtime.tv_sec) { if (vp->v_type == VDIR) nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); np->n_mtime = vattr.va_mtime.tv_sec; } } } do { /* * Get a valid lease. If cached data is stale, flush it. */ if (nmp->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKINVALID(vp, np, ND_READ)) { do { error = nqnfs_getlease(vp, ND_READ, cred, p); } while (error == NQNFS_EXPIRED); if (error) return (error); if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE) || ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { if (vp->v_type == VDIR) nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); np->n_brev = np->n_lrev; } } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); } } if (np->n_flag & NQNFSNONCACHE) { switch (vp->v_type) { case VREG: return (nfs_readrpc(vp, uio, cred)); case VLNK: return (nfs_readlinkrpc(vp, uio, cred)); case VDIR: break; default: printf(" NQNFSNONCACHE: type %x unexpected\n", vp->v_type); }; } switch (vp->v_type) { case VREG: nfsstats.biocache_reads++; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); not_readin = 1; /* * Start the read ahead(s), as required. */ if (nfs_numasync > 0 && nmp->nm_readahead > 0) { for (nra = 0; nra < nmp->nm_readahead && (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) { rabn = lbn + 1 + nra; if (!incore(vp, rabn)) { rabp = nfs_getcacheblk(vp, rabn, biosize, p); if (!rabp) return (EINTR); if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { rabp->b_flags |= B_INVAL|B_ERROR; vfs_unbusy_pages(rabp); brelse(rabp); } } else brelse(rabp); } } } /* * If the block is in the cache and has the required data * in a valid region, just copy it out. * Otherwise, get the block and write back/read in, * as required. */ again: bufsize = biosize; if ((off_t)(lbn + 1) * biosize > np->n_size && (off_t)(lbn + 1) * biosize - np->n_size < biosize) { bufsize = np->n_size - lbn * biosize; bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); } bp = nfs_getcacheblk(vp, lbn, bufsize, p); if (!bp) return (EINTR); /* * If we are being called from nfs_getpages, we must * make sure the buffer is a vmio buffer. The vp will * already be setup for vmio but there may be some old * non-vmio buffers attached to it. */ if (getpages && !(bp->b_flags & B_VMIO)) { #ifdef DIAGNOSTIC printf("nfs_bioread: non vmio buf found, discarding\n"); #endif bp->b_flags |= B_NOCACHE; bp->b_flags |= B_INVAFTERWRITE; if (bp->b_dirtyend > 0) { if ((bp->b_flags & B_DELWRI) == 0) panic("nfsbioread"); if (VOP_BWRITE(bp) == EINTR) return (EINTR); } else brelse(bp); goto again; } if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); not_readin = 0; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { brelse(bp); return (error); } } if (bufsize > on) { n = min((unsigned)(bufsize - on), uio->uio_resid); } else { n = 0; } diff = np->n_size - uio->uio_offset; if (diff < n) n = diff; if (not_readin && n > 0) { if (on < bp->b_validoff || (on + n) > bp->b_validend) { bp->b_flags |= B_NOCACHE; bp->b_flags |= B_INVAFTERWRITE; if (bp->b_dirtyend > 0) { if ((bp->b_flags & B_DELWRI) == 0) panic("nfsbioread"); if (VOP_BWRITE(bp) == EINTR) return (EINTR); } else brelse(bp); goto again; } } vp->v_lastr = lbn; diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); if (diff < n) n = diff; break; case VLNK: nfsstats.biocache_readlinks++; bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); if (!bp) return (EINTR); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags |= B_READ; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { bp->b_flags |= B_ERROR; brelse(bp); return (error); } } n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); on = 0; break; case VDIR: nfsstats.biocache_readdirs++; if (np->n_direofoffset && uio->uio_offset >= np->n_direofoffset) { return (0); } lbn = uio->uio_offset / NFS_DIRBLKSIZ; on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); if (!bp) return (EINTR); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags |= B_READ; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { brelse(bp); } while (error == NFSERR_BAD_COOKIE) { nfs_invaldir(vp); error = nfs_vinvalbuf(vp, 0, cred, p, 1); /* * Yuck! The directory has been modified on the * server. The only way to get the block is by * reading from the beginning to get all the * offset cookies. */ for (i = 0; i <= lbn && !error; i++) { if (np->n_direofoffset && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) return (0); bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p); if (!bp) return (EINTR); if ((bp->b_flags & B_DONE) == 0) { bp->b_flags |= B_READ; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { brelse(bp); } else if (i < lbn) { brelse(bp); } } } } if (error) return (error); } /* * If not eof and read aheads are enabled, start one. * (You need the current block first, so that you have the * directory offset cookie of the next block.) */ if (nfs_numasync > 0 && nmp->nm_readahead > 0 && (np->n_direofoffset == 0 || (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && !(np->n_flag & NQNFSNONCACHE) && !incore(vp, lbn + 1)) { rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p); if (rabp) { if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { rabp->b_flags |= B_INVAL|B_ERROR; vfs_unbusy_pages(rabp); brelse(rabp); } } else { brelse(rabp); } } } /* * Make sure we use a signed variant of min() since * the second term may be negative. */ n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); break; default: printf(" nfs_bioread: type %x unexpected\n",vp->v_type); break; }; if (n > 0) { error = uiomove(bp->b_data + on, (int)n, uio); } switch (vp->v_type) { case VREG: break; case VLNK: n = 0; break; case VDIR: if (np->n_flag & NQNFSNONCACHE) bp->b_flags |= B_INVAL; break; default: printf(" nfs_bioread: type %x unexpected\n",vp->v_type); } brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n > 0); return (error); } static void nfs_prot_buf(bp, off, n) struct buf *bp; int off; int n; { int pindex, boff, end; if ((bp->b_flags & B_VMIO) == 0) return; end = round_page(off + n); for (boff = trunc_page(off); boff < end; boff += PAGE_SIZE) { pindex = boff >> PAGE_SHIFT; vm_page_protect(bp->b_pages[pindex], VM_PROT_NONE); } } /* * Vnode op for write using bio */ int nfs_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register int biosize; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; register struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); register struct ucred *cred = ap->a_cred; int ioflag = ap->a_ioflag; struct buf *bp; struct vattr vattr; struct nfsmount *nmp = VFSTONFS(vp->v_mount); daddr_t lbn; int bufsize; int n, on, error = 0, iomode, must_commit; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("nfs_write mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("nfs_write proc"); #endif if (vp->v_type != VREG) return (EIO); if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; return (np->n_error); } if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) (void)nfs_fsinfo(nmp, vp, cred, p); if (ioflag & (IO_APPEND | IO_SYNC)) { if (np->n_flag & NMODIFIED) { np->n_attrstamp = 0; error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); } if (ioflag & IO_APPEND) { np->n_attrstamp = 0; error = VOP_GETATTR(vp, &vattr, cred, p); if (error) return (error); uio->uio_offset = np->n_size; } } if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, i don't think it matters */ if (p && uio->uio_offset + uio->uio_resid > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { psignal(p, SIGXFSZ); return (EFBIG); } /* * I use nm_rsize, not nm_wsize so that all buffer cache blocks * will be the same size within a filesystem. nfs_writerpc will * still use nm_wsize when sizing the rpc's. */ biosize = vp->v_mount->mnt_stat.f_iosize; do { /* * Check for a valid write lease. */ if ((nmp->nm_flag & NFSMNT_NQNFS) && NQNFS_CKINVALID(vp, np, ND_WRITE)) { do { error = nqnfs_getlease(vp, ND_WRITE, cred, p); } while (error == NQNFS_EXPIRED); if (error) return (error); if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE)) { error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); np->n_brev = np->n_lrev; } } if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { iomode = NFSV3WRITE_FILESYNC; error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); if (must_commit) nfs_clearcommit(vp->v_mount); return (error); } nfsstats.biocache_writes++; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize-1); n = min((unsigned)(biosize - on), uio->uio_resid); again: if (uio->uio_offset + n > np->n_size) { np->n_size = uio->uio_offset + n; np->n_flag |= NMODIFIED; vnode_pager_setsize(vp, (u_long)np->n_size); } bufsize = biosize; if ((lbn + 1) * biosize > np->n_size) { bufsize = np->n_size - lbn * biosize; bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); } bp = nfs_getcacheblk(vp, lbn, bufsize, p); if (!bp) return (EINTR); if (bp->b_wcred == NOCRED) { crhold(cred); bp->b_wcred = cred; } np->n_flag |= NMODIFIED; if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) { bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); } /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. */ if (bp->b_dirtyend > 0 && (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { bp->b_proc = p; if (VOP_BWRITE(bp) == EINTR) return (EINTR); goto again; } /* * Check for valid write lease and get one as required. * In case getblk() and/or bwrite() delayed us. */ if ((nmp->nm_flag & NFSMNT_NQNFS) && NQNFS_CKINVALID(vp, np, ND_WRITE)) { do { error = nqnfs_getlease(vp, ND_WRITE, cred, p); } while (error == NQNFS_EXPIRED); if (error) { brelse(bp); return (error); } if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE)) { brelse(bp); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); np->n_brev = np->n_lrev; goto again; } } error = uiomove((char *)bp->b_data + on, n, uio); if (error) { bp->b_flags |= B_ERROR; brelse(bp); return (error); } /* * This will keep the buffer and mmaped regions more coherent. */ nfs_prot_buf(bp, on, n); if (bp->b_dirtyend > 0) { bp->b_dirtyoff = min(on, bp->b_dirtyoff); bp->b_dirtyend = max((on + n), bp->b_dirtyend); } else { bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || bp->b_validoff > bp->b_dirtyend) { bp->b_validoff = bp->b_dirtyoff; bp->b_validend = bp->b_dirtyend; } else { bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); bp->b_validend = max(bp->b_validend, bp->b_dirtyend); } /* * Since this block is being modified, it must be written * again and not just committed. */ bp->b_flags &= ~B_NEEDCOMMIT; /* * If the lease is non-cachable or IO_SYNC do bwrite(). */ if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { bp->b_proc = p; if (ioflag & IO_INVAL) bp->b_flags |= B_INVAL; error = VOP_BWRITE(bp); if (error) return (error); if (np->n_flag & NQNFSNONCACHE) { error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) return (error); } } else if ((n + on) == biosize && (nmp->nm_flag & NFSMNT_NQNFS) == 0) { bp->b_proc = (struct proc *)0; bp->b_flags |= B_ASYNC; (void)nfs_writebp(bp, 0); } else bdwrite(bp); } while (uio->uio_resid > 0 && n > 0); return (0); } /* * Get an nfs cache block. * Allocate a new one if the block isn't currently in the cache * and return the block marked busy. If the calling process is * interrupted by a signal for an interruptible mount point, return * NULL. */ static struct buf * nfs_getcacheblk(vp, bn, size, p) struct vnode *vp; daddr_t bn; int size; struct proc *p; { register struct buf *bp; struct mount *mp; struct nfsmount *nmp; mp = vp->v_mount; nmp = VFSTONFS(mp); if (nmp->nm_flag & NFSMNT_INT) { bp = getblk(vp, bn, size, PCATCH, 0); while (bp == (struct buf *)0) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) return ((struct buf *)0); bp = getblk(vp, bn, size, 0, 2 * hz); } } else bp = getblk(vp, bn, size, 0, 0); if( vp->v_type == VREG) { int biosize; biosize = mp->mnt_stat.f_iosize; bp->b_blkno = (bn * biosize) / DEV_BSIZE; } return (bp); } /* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int nfs_vinvalbuf(vp, flags, cred, p, intrflg) struct vnode *vp; int flags; struct ucred *cred; struct proc *p; int intrflg; { register struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, slpflag, slptimeo; if (vp->v_flag & VXLOCK) { return (0); } if ((nmp->nm_flag & NFSMNT_INT) == 0) intrflg = 0; if (intrflg) { slpflag = PCATCH; slptimeo = 2 * hz; } else { slpflag = 0; slptimeo = 0; } /* * First wait for any other process doing a flush to complete. */ while (np->n_flag & NFLUSHINPROG) { np->n_flag |= NFLUSHWANT; error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", slptimeo); if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) return (EINTR); } /* * Now, flush as required. */ np->n_flag |= NFLUSHINPROG; error = vinvalbuf(vp, flags, cred, p, slpflag, 0); while (error) { if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { np->n_flag &= ~NFLUSHINPROG; if (np->n_flag & NFLUSHWANT) { np->n_flag &= ~NFLUSHWANT; wakeup((caddr_t)&np->n_flag); } return (EINTR); } error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); } np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); if (np->n_flag & NFLUSHWANT) { np->n_flag &= ~NFLUSHWANT; wakeup((caddr_t)&np->n_flag); } return (0); } /* * Initiate asynchronous I/O. Return an error if no nfsiods are available. * This is mainly to avoid queueing async I/O requests when the nfsiods * are all hung on a dead server. */ int nfs_asyncio(bp, cred) register struct buf *bp; struct ucred *cred; { struct nfsmount *nmp; int i; int gotiod; int slpflag = 0; int slptimeo = 0; int error; if (nfs_numasync == 0) return (EIO); nmp = VFSTONFS(bp->b_vp->v_mount); again: if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; gotiod = FALSE; /* * Find a free iod to process this request. */ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) if (nfs_iodwant[i]) { /* * Found one, so wake it up and tell it which * mount to process. */ NFS_DPF(ASYNCIO, ("nfs_asyncio: waking iod %d for mount %p\n", i, nmp)); nfs_iodwant[i] = (struct proc *)0; nfs_iodmount[i] = nmp; nmp->nm_bufqiods++; wakeup((caddr_t)&nfs_iodwant[i]); gotiod = TRUE; break; } /* * If none are free, we may already have an iod working on this mount * point. If so, it will process our request. */ if (!gotiod) { if (nmp->nm_bufqiods > 0) { NFS_DPF(ASYNCIO, ("nfs_asyncio: %d iods are already processing mount %p\n", nmp->nm_bufqiods, nmp)); gotiod = TRUE; } } /* * If we have an iod which can process the request, then queue * the buffer. */ if (gotiod) { /* * Ensure that the queue never grows too large. */ while (nmp->nm_bufqlen >= 2*nfs_numasync) { NFS_DPF(ASYNCIO, ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp)); nmp->nm_bufqwant = TRUE; error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, "nfsaio", slptimeo); if (error) { if (nfs_sigintr(nmp, NULL, bp->b_proc)) return (EINTR); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } /* * We might have lost our iod while sleeping, * so check and loop if nescessary. */ if (nmp->nm_bufqiods == 0) { NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp)); goto again; } } if (bp->b_flags & B_READ) { if (bp->b_rcred == NOCRED && cred != NOCRED) { crhold(cred); bp->b_rcred = cred; } } else { bp->b_flags |= B_WRITEINPROG; if (bp->b_wcred == NOCRED && cred != NOCRED) { crhold(cred); bp->b_wcred = cred; } } TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen++; return (0); } /* * All the iods are busy on other mounts, so return EIO to * force the caller to process the i/o synchronously. */ NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n")); return (EIO); } /* * Do an I/O operation to/from a cache block. This may be called * synchronously or from an nfsiod. */ int nfs_doio(bp, cr, p) register struct buf *bp; struct ucred *cr; struct proc *p; { register struct uio *uiop; register struct vnode *vp; struct nfsnode *np; struct nfsmount *nmp; int error = 0, diff, len, iomode, must_commit = 0; struct uio uio; struct iovec io; vp = bp->b_vp; np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = p; /* * Historically, paging was done with physio, but no more. */ if (bp->b_flags & B_PHYS) { /* * ...though reading /dev/drum still gets us here. */ io.iov_len = uiop->uio_resid = bp->b_bcount; /* mapping was done by vmapbuf() */ io.iov_base = bp->b_data; uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; if (bp->b_flags & B_READ) { uiop->uio_rw = UIO_READ; nfsstats.read_physios++; error = nfs_readrpc(vp, uiop, cr); } else { int com; iomode = NFSV3WRITE_DATASYNC; uiop->uio_rw = UIO_WRITE; nfsstats.write_physios++; error = nfs_writerpc(vp, uiop, cr, &iomode, &com); } if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; } } else if (bp->b_flags & B_READ) { io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; switch (vp->v_type) { case VREG: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; nfsstats.read_bios++; error = nfs_readrpc(vp, uiop, cr); if (!error) { bp->b_validoff = 0; if (uiop->uio_resid) { /* * If len > 0, there is a hole in the file and * no writes after the hole have been pushed to * the server yet. * Just zero fill the rest of the valid area. */ diff = bp->b_bcount - uiop->uio_resid; len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE + diff); if (len > 0) { len = min(len, uiop->uio_resid); bzero((char *)bp->b_data + diff, len); bp->b_validend = diff + len; } else bp->b_validend = diff; } else bp->b_validend = bp->b_bcount; } if (p && (vp->v_flag & VTEXT) && (((nmp->nm_flag & NFSMNT_NQNFS) && NQNFS_CKINVALID(vp, np, ND_READ) && np->n_lrev != np->n_brev) || (!(nmp->nm_flag & NFSMNT_NQNFS) && np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { uprintf("Process killed due to text file modification\n"); psignal(p, SIGKILL); p->p_flag |= P_NOSWAP; } break; case VLNK: uiop->uio_offset = (off_t)0; nfsstats.readlink_bios++; error = nfs_readlinkrpc(vp, uiop, cr); break; case VDIR: nfsstats.readdir_bios++; uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; if (nmp->nm_flag & NFSMNT_RDIRPLUS) { error = nfs_readdirplusrpc(vp, uiop, cr); if (error == NFSERR_NOTSUPP) nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) error = nfs_readdirrpc(vp, uiop, cr); break; default: printf("nfs_doio: type %x unexpected\n",vp->v_type); break; }; if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; } } else { if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size) bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; nfsstats.write_bios++; if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) iomode = NFSV3WRITE_UNSTABLE; else iomode = NFSV3WRITE_FILESYNC; bp->b_flags |= B_WRITEINPROG; error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); if (!error && iomode == NFSV3WRITE_UNSTABLE) { bp->b_flags |= B_NEEDCOMMIT; if (bp->b_dirtyoff == 0 && bp->b_dirtyend == bp->b_bufsize) bp->b_flags |= B_CLUSTEROK; } else bp->b_flags &= ~B_NEEDCOMMIT; bp->b_flags &= ~B_WRITEINPROG; /* * For an interrupted write, the buffer is still valid * and the write hasn't been pushed to the server yet, * so we can't set B_ERROR and report the interruption * by setting B_EINTR. For the B_ASYNC case, B_EINTR * is not relevant, so the rpc attempt is essentially * a noop. For the case of a V3 write rpc not being * committed to stable storage, the block is still * dirty and requires either a commit rpc or another * write rpc with iomode == NFSV3WRITE_FILESYNC before * the block is reused. This is indicated by setting * the B_DELWRI and B_NEEDCOMMIT flags. */ if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { + int s; + bp->b_flags &= ~(B_INVAL|B_NOCACHE); ++numdirtybuffers; bp->b_flags |= B_DELWRI; + s = splbio(); reassignbuf(bp, vp); + splx(s); if ((bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { if (error) { bp->b_flags |= B_ERROR; bp->b_error = np->n_error = error; np->n_flag |= NWRITEERR; } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; biodone(bp); return (0); } } bp->b_resid = uiop->uio_resid; if (must_commit) nfs_clearcommit(vp->v_mount); biodone(bp); return (error); } Index: head/sys/nfsclient/nfs_vnops.c =================================================================== --- head/sys/nfsclient/nfs_vnops.c (revision 34265) +++ head/sys/nfsclient/nfs_vnops.c (revision 34266) @@ -1,3296 +1,3303 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 - * $Id: nfs_vnops.c,v 1.79 1998/03/06 09:46:48 msmith Exp $ + * $Id: nfs_vnops.c,v 1.80 1998/03/07 21:36:06 dyson Exp $ */ /* * vnode op calls for Sun NFS version 2 and 3 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Defs */ #define TRUE 1 #define FALSE 0 /* * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these * calls are not in getblk() and brelse() so that they would not be necessary * here. */ #ifndef B_VMIO #define vfs_busy_pages(bp, f) #endif static int nfsspec_read __P((struct vop_read_args *)); static int nfsspec_write __P((struct vop_write_args *)); static int nfsfifo_read __P((struct vop_read_args *)); static int nfsfifo_write __P((struct vop_write_args *)); static int nfsspec_close __P((struct vop_close_args *)); static int nfsfifo_close __P((struct vop_close_args *)); #define nfs_poll vop_nopoll static int nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int)); static int nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *)); static int nfs_lookup __P((struct vop_lookup_args *)); static int nfs_create __P((struct vop_create_args *)); static int nfs_mknod __P((struct vop_mknod_args *)); static int nfs_open __P((struct vop_open_args *)); static int nfs_close __P((struct vop_close_args *)); static int nfs_access __P((struct vop_access_args *)); static int nfs_getattr __P((struct vop_getattr_args *)); static int nfs_setattr __P((struct vop_setattr_args *)); static int nfs_read __P((struct vop_read_args *)); static int nfs_mmap __P((struct vop_mmap_args *)); static int nfs_fsync __P((struct vop_fsync_args *)); static int nfs_remove __P((struct vop_remove_args *)); static int nfs_link __P((struct vop_link_args *)); static int nfs_rename __P((struct vop_rename_args *)); static int nfs_mkdir __P((struct vop_mkdir_args *)); static int nfs_rmdir __P((struct vop_rmdir_args *)); static int nfs_symlink __P((struct vop_symlink_args *)); static int nfs_readdir __P((struct vop_readdir_args *)); static int nfs_bmap __P((struct vop_bmap_args *)); static int nfs_strategy __P((struct vop_strategy_args *)); static int nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **)); static int nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *)); static int nfsspec_access __P((struct vop_access_args *)); static int nfs_readlink __P((struct vop_readlink_args *)); static int nfs_print __P((struct vop_print_args *)); static int nfs_advlock __P((struct vop_advlock_args *)); static int nfs_bwrite __P((struct vop_bwrite_args *)); /* * Global vfs data structures for nfs */ vop_t **nfsv2_vnodeop_p; static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_abortop_desc, (vop_t *) nfs_abortop }, { &vop_access_desc, (vop_t *) nfs_access }, { &vop_advlock_desc, (vop_t *) nfs_advlock }, { &vop_bmap_desc, (vop_t *) nfs_bmap }, { &vop_bwrite_desc, (vop_t *) nfs_bwrite }, { &vop_close_desc, (vop_t *) nfs_close }, { &vop_create_desc, (vop_t *) nfs_create }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_getpages_desc, (vop_t *) nfs_getpages }, { &vop_putpages_desc, (vop_t *) nfs_putpages }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lease_desc, (vop_t *) vop_null }, { &vop_link_desc, (vop_t *) nfs_link }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_lookup_desc, (vop_t *) nfs_lookup }, { &vop_mkdir_desc, (vop_t *) nfs_mkdir }, { &vop_mknod_desc, (vop_t *) nfs_mknod }, { &vop_mmap_desc, (vop_t *) nfs_mmap }, { &vop_open_desc, (vop_t *) nfs_open }, { &vop_poll_desc, (vop_t *) nfs_poll }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfs_read }, { &vop_readdir_desc, (vop_t *) nfs_readdir }, { &vop_readlink_desc, (vop_t *) nfs_readlink }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_remove_desc, (vop_t *) nfs_remove }, { &vop_rename_desc, (vop_t *) nfs_rename }, { &vop_rmdir_desc, (vop_t *) nfs_rmdir }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_strategy_desc, (vop_t *) nfs_strategy }, { &vop_symlink_desc, (vop_t *) nfs_symlink }, { &vop_write_desc, (vop_t *) nfs_write }, { NULL, NULL } }; static struct vnodeopv_desc nfsv2_vnodeop_opv_desc = { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries }; VNODEOP_SET(nfsv2_vnodeop_opv_desc); /* * Special device vnode ops */ vop_t **spec_nfsv2nodeop_p; static struct vnodeopv_entry_desc nfsv2_specop_entries[] = { { &vop_default_desc, (vop_t *) spec_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsspec_close }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfsspec_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_write_desc, (vop_t *) nfsspec_write }, { NULL, NULL } }; static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc = { &spec_nfsv2nodeop_p, nfsv2_specop_entries }; VNODEOP_SET(spec_nfsv2nodeop_opv_desc); vop_t **fifo_nfsv2nodeop_p; static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = { { &vop_default_desc, (vop_t *) fifo_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsfifo_close }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfsfifo_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_write_desc, (vop_t *) nfsfifo_write }, { NULL, NULL } }; static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc = { &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries }; VNODEOP_SET(fifo_nfsv2nodeop_opv_desc); static int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct proc *procp)); static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap)); static int nfs_removerpc __P((struct vnode *dvp, char *name, int namelen, struct ucred *cred, struct proc *proc)); static int nfs_renamerpc __P((struct vnode *fdvp, char *fnameptr, int fnamelen, struct vnode *tdvp, char *tnameptr, int tnamelen, struct ucred *cred, struct proc *proc)); static int nfs_renameit __P((struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp)); /* * Global variables */ extern u_long nfs_true, nfs_false; extern struct nfsstats nfsstats; extern nfstype nfsv3_type[9]; struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; int nfs_numasync = 0; #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register u_long *tl; register caddr_t cp; register int t1, t2; caddr_t bpos, dpos, cp2; int error = 0, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; u_long mode, rmode; int v3 = NFS_ISV3(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * For nfs v3, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about, but * this is better than just returning anything that is lying about * in the cache. */ if (v3) { nfsstats.rpccnt[NFSPROC_ACCESS]++; nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (ap->a_mode & VREAD) mode = NFSV3ACCESS_READ; else mode = 0; if (vp->v_type == VDIR) { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_LOOKUP; } else { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_EXECUTE; } *tl = txdr_unsigned(mode); nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred); nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_long, *tl); /* * The NFS V3 spec does not clarify whether or not * the returned access bits can be a superset of * the ones requested, so... */ if ((rmode & mode) != mode) error = EACCES; } nfsm_reqdone; return (error); } else { if (error = nfsspec_access(ap)) return (error); /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_procp = ap->a_p; if (vp->v_type == VREG) error = nfs_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = nfs_readdirrpc(vp, &auio, ap->a_cred); free(bp, M_TEMP); } else if (vp->v_type = VLNK) error = nfs_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } return (error); } } /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct vattr vattr; int error; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { printf("open eacces vtyp=%d\n",vp->v_type); return (EACCES); } /* * Get a valid lease. If cached data is stale, flush it. */ if (nmp->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKINVALID(vp, np, ND_READ)) { do { error = nqnfs_getlease(vp, ND_READ, ap->a_cred, ap->a_p); } while (error == NQNFS_EXPIRED); if (error) { return (error); } if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE)) { if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_brev = np->n_lrev; } } } else { if (np->n_flag & NMODIFIED) { if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_attrstamp = 0; if (vp->v_type == VDIR) np->n_direofoffset = 0; error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); if (error) { return (error); } np->n_mtime = vattr.va_mtime.tv_sec; } else { error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); if (error) { return (error); } if (np->n_mtime != vattr.va_mtime.tv_sec) { if (vp->v_type == VDIR) np->n_direofoffset = 0; if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_mtime = vattr.va_mtime.tv_sec; } } } if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) np->n_attrstamp = 0; /* For Open/Close consistency */ return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to nfs_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. * for NQNFS - do nothing now, since 2 is dealt with via leases and * 1 should be dealt with via an fsync() system call for * cases where write errors are important. */ /* ARGSUSED */ static int nfs_close(ap) struct vop_close_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); int error = 0; if (vp->v_type == VREG) { if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 && (np->n_flag & NMODIFIED)) { if (NFS_ISV3(vp)) { error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0); np->n_flag &= ~NMODIFIED; } else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); np->n_attrstamp = 0; } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } } return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register caddr_t cp; register u_long *tl; register int t1, t2; caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); /* * Update local times for special files. */ if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; /* * First look in the cache. */ if (nfs_getattrcache(vp, ap->a_vap) == 0) return (0); nfsstats.rpccnt[NFSPROC_GETATTR]++; nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred); if (!error) nfsm_loadattr(vp, ap->a_vap); nfsm_reqdone; return (error); } /* * nfs setattr call. */ static int nfs_setattr(ap) struct vop_setattr_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register struct vattr *vap = ap->a_vap; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (u_short)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (np->n_flag & NMODIFIED) { if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, ap->a_cred, ap->a_p, 1); else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); if (error) return (error); } tsize = np->n_size; np->n_size = np->n_vattr.va_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); }; } else if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG && (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) return (error); error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p); if (error && vap->va_size != VNOVAL) { np->n_size = np->n_vattr.va_size = tsize; vnode_pager_setsize(vp, (u_long)np->n_size); } return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(vp, vap, cred, procp) register struct vnode *vp; register struct vattr *vap; struct ucred *cred; struct proc *procp; { register struct nfsv2_sattr *sp; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; u_long *tl; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_SETATTR]++; nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); nfsm_fhtom(vp, v3); if (v3) { if (vap->va_mode != (u_short)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_mode); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_uid != (uid_t)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_uid); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_gid != (gid_t)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_gid); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_size != VNOVAL) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = nfs_true; txdr_hyper(&vap->va_size, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if (vap->va_atime.tv_sec != time.tv_sec) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if (vap->va_mtime.tv_sec != time.tv_sec) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (u_short)VNOVAL) sp->sa_mode = VNOVAL; else sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = VNOVAL; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = VNOVAL; else sp->sa_gid = txdr_unsigned(vap->va_gid); sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(vp, NFSPROC_SETATTR, procp, cred); if (v3) { nfsm_wcc_data(vp, wccflag); } else nfsm_loadattr(vp, (struct vattr *)0); nfsm_reqdone; return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { register struct componentname *cnp = ap->a_cnp; register struct vnode *dvp = ap->a_dvp; register struct vnode **vpp = ap->a_vpp; register int flags = cnp->cn_flags; register struct vnode *newvp; register u_long *tl; register caddr_t cp; register long t1, t2; struct nfsmount *nmp; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; long len; nfsfh_t *fhp; struct nfsnode *np; int lockparent, wantparent, error = 0, attrflag, fhsize; int v3 = NFS_ISV3(dvp); struct proc *p = cnp->cn_proc; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); *vpp = NULLVP; if (dvp->v_type != VDIR) return (ENOTDIR); lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { struct vattr vattr; int vpid; if (error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) { *vpp = NULLVP; return (error); } newvp = *vpp; vpid = newvp->v_id; /* * See the comment starting `Step through' in ufs/ufs_lookup.c * for an explanation of the locking protocol */ if (dvp == newvp) { VREF(newvp); error = 0; } else if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, p); error = vget(newvp, LK_EXCLUSIVE, p); if (!error && lockparent && (flags & ISLASTCN)) error = vn_lock(dvp, LK_EXCLUSIVE, p); } else { error = vget(newvp, LK_EXCLUSIVE, p); if (!lockparent || error || !(flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); } if (!error) { if (vpid == newvp->v_id) { if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p) && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { nfsstats.lookupcache_hits++; if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); } vput(newvp); if (lockparent && dvp != newvp && (flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); } error = vn_lock(dvp, LK_EXCLUSIVE, p); *vpp = NULLVP; if (error) return (error); } error = 0; newvp = NULLVP; nfsstats.lookupcache_misses++; nfsstats.rpccnt[NFSPROC_LOOKUP]++; len = cnp->cn_namelen; nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred); if (error) { nfsm_postop_attr(dvp, attrflag); m_freem(mrep); goto nfsmout; } nfsm_getfh(fhp, fhsize, v3); /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) { if (NFS_CMPFH(np, fhp, fhsize)) { m_freem(mrep); return (EISDIR); } if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) { m_freem(mrep); return (error); } newvp = NFSTOV(np); if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, (struct vattr *)0); *vpp = newvp; m_freem(mrep); cnp->cn_flags |= SAVENAME; if (!lockparent) VOP_UNLOCK(dvp, 0, p); return (0); } if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, p); error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); if (error) { vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p); return (error); } newvp = NFSTOV(np); if (lockparent && (flags & ISLASTCN) && (error = vn_lock(dvp, LK_EXCLUSIVE, p))) { vput(newvp); return (error); } } else if (NFS_CMPFH(np, fhp, fhsize)) { VREF(dvp); newvp = dvp; } else { if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) { m_freem(mrep); return (error); } if (!lockparent || !(flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, (struct vattr *)0); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { np->n_ctime = np->n_vattr.va_ctime.tv_sec; cache_enter(dvp, newvp, cnp); } *vpp = newvp; nfsm_reqdone; if (error) { if (newvp != NULLVP) { vrele(newvp); *vpp = NULLVP; } if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN) && error == ENOENT) { if (!lockparent) VOP_UNLOCK(dvp, 0, p); if (dvp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else error = EJUSTRETURN; } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; } return (error); } /* * nfs read call. * Just call nfs_bioread() to do the work. */ static int nfs_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; if (vp->v_type != VREG) return (EPERM); return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0)); } /* * nfs readlink call */ static int nfs_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EPERM); return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred, 0)); } /* * Do a readlink rpc. * Called by nfs_doio() from below the buffer cache. */ int nfs_readlinkrpc(vp, uiop, cred) register struct vnode *vp; struct uio *uiop; struct ucred *cred; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, len, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_READLINK]++; nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred); if (v3) nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_strsiz(len, NFS_MAXPATHLEN); nfsm_mtouio(uiop, len); } nfsm_reqdone; return (error); } /* * nfs read rpc call * Ditto above */ int nfs_readrpc(vp, uiop, cred) register struct vnode *vp; struct uio *uiop; struct ucred *cred; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct nfsmount *nmp; int error = 0, len, retlen, tsiz, eof, attrflag; int v3 = NFS_ISV3(vp); #ifndef nolint eof = 0; #endif nmp = VFSTONFS(vp->v_mount); tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > 0xffffffff && !v3) return (EFBIG); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_READ]++; len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz; nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3); if (v3) { txdr_hyper(&uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } else { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else nfsm_loadattr(vp, (struct vattr *)0); nfsm_strsiz(retlen, nmp->nm_rsize); nfsm_mtouio(uiop, retlen); m_freem(mrep); tsiz -= retlen; if (v3) { if (eof || retlen == 0) tsiz = 0; } else if (retlen < len) tsiz = 0; } nfsmout: return (error); } /* * nfs write call */ int nfs_writerpc(vp, uiop, cred, iomode, must_commit) register struct vnode *vp; register struct uio *uiop; struct ucred *cred; int *iomode, *must_commit; { register u_long *tl; register caddr_t cp; register int t1, t2, backup; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfs: writerpc iovcnt > 1"); #endif *must_commit = 0; tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > 0xffffffff && !v3) return (EFBIG); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_WRITE]++; len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; nfsm_reqhead(vp, NFSPROC_WRITE, NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); txdr_hyper(&uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); } else { nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *++tl = txdr_unsigned(uiop->uio_offset); tl += 2; } *tl = txdr_unsigned(len); nfsm_uiotom(uiop, len); nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred); if (v3) { wccflag = NFSV3_WCCCHK; nfsm_wcc_data(vp, wccflag); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; break; } else if (rlen < len) { backup = len - rlen; uiop->uio_iov->iov_base -= backup; uiop->uio_iov->iov_len += backup; uiop->uio_offset -= backup; uiop->uio_resid += backup; len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest committment level * obtained by any of the RPCs. */ if (committed == NFSV3WRITE_FILESYNC) committed = commit; else if (committed == NFSV3WRITE_DATASYNC && commit == NFSV3WRITE_UNSTABLE) committed = commit; if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); nmp->nm_flag |= NFSMNT_HASWRITEVERF; } else if (bcmp((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { *must_commit = 1; bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); } } } else nfsm_loadattr(vp, (struct vattr *)0); if (wccflag) VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; m_freem(mrep); tsiz -= len; } nfsmout: if (vp->v_mount->mnt_flag & MNT_ASYNC) committed = NFSV3WRITE_FILESYNC; *iomode = committed; if (error) uiop->uio_resid = tsiz; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(dvp, vpp, cnp, vap) register struct vnode *dvp; register struct vnode **vpp; register struct componentname *cnp; register struct vattr *vap; { register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; struct vnode *newvp = (struct vnode *)0; struct nfsnode *np = (struct nfsnode *)0; struct vattr vattr; char *cp2; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; u_long rdev; int v3 = NFS_ISV3(dvp); if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = txdr_unsigned(vap->va_rdev); else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; else { VOP_ABORTOP(dvp, cnp); vput(dvp); return (EOPNOTSUPP); } if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); vput(dvp); return (error); } nfsstats.rpccnt[NFSPROC_MKNOD]++; nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3SRVSATTR); *tl++ = vtonfsv3_type(vap->va_type); sp3 = (struct nfsv3_sattr *)tl; nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); if (vap->va_type == VCHR || vap->va_type == VBLK) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(major(vap->va_rdev)); *tl = txdr_unsigned(minor(vap->va_rdev)); } } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = rdev; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; if (error) { if (newvp) vput(newvp); } else { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *vpp = newvp; } zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; vput(dvp); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *newvp; int error; error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap); if (!error) vput(newvp); return (error); } static u_long create_verf; /* * nfs file create call */ static int nfs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; struct nfsnode *np = (struct nfsnode *)0; struct vnode *newvp = (struct vnode *)0; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); vput(dvp); return (error); } if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; again: nfsstats.rpccnt[NFSPROC_CREATE]++; nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); nfsm_build(tl, u_long *, NFSX_V3CREATEVERF); #ifdef INET if (!TAILQ_EMPTY(&in_ifaddrhead)) *tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr; else #endif *tl++ = create_verf; *tl = ++create_verf; } else { *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); nfsm_build(tl, u_long *, NFSX_V3SRVSATTR); sp3 = (struct nfsv3_sattr *)tl; nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); } } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = 0; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; if (error) { if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } if (newvp) vput(newvp); } else if (v3 && (fmode & O_EXCL)) error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc); if (!error) { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; } zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; vput(dvp); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(ap) struct vop_remove_args /* { struct vnodeop_desc *a_desc; struct vnode * a_dvp; struct vnode * a_vp; struct componentname * a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; #ifndef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("nfs_remove: no name"); if (vp->v_usecount < 1) panic("nfs_remove: bad v_usecount"); #endif if (vp->v_usecount == 1 || (np->n_sillyrename && VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1); /* Do the rpc */ if (error != EINTR) error = nfs_removerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); zfree(namei_zone, cnp->cn_pnbuf); np->n_attrstamp = 0; vput(dvp); if (vp == dvp) vrele(vp); else vput(vp); return (error); } /* * nfs file remove rpc called from nfs_inactive */ int nfs_removeit(sp) register struct sillyrename *sp; { return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, (struct proc *)0)); } /* * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). */ static int nfs_removerpc(dvp, name, namelen, cred, proc) register struct vnode *dvp; char *name; int namelen; struct ucred *cred; struct proc *proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_REMOVE]++; nfsm_reqhead(dvp, NFSPROC_REMOVE, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_REMOVE, proc, cred); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); } /* * nfs file rename call */ static int nfs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { register struct vnode *fvp = ap->a_fvp; register struct vnode *tvp = ap->a_tvp; register struct vnode *fdvp = ap->a_fdvp; register struct vnode *tdvp = ap->a_tdvp; register struct componentname *tcnp = ap->a_tcnp; register struct componentname *fcnp = ap->a_fcnp; int error; #ifndef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("nfs_rename: no name"); #endif /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_proc); if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(sdvp, scnp, sp) struct vnode *sdvp; struct componentname *scnp; register struct sillyrename *sp; { return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc) register struct vnode *fdvp; char *fnameptr; int fnamelen; register struct vnode *tdvp; char *tnameptr; int tnamelen; struct ucred *cred; struct proc *proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(fdvp); nfsstats.rpccnt[NFSPROC_RENAME]++; nfsm_reqhead(fdvp, NFSPROC_RENAME, (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen)); nfsm_fhtom(fdvp, v3); nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); nfsm_fhtom(tdvp, v3); nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); nfsm_request(fdvp, NFSPROC_RENAME, proc, cred); if (v3) { nfsm_wcc_data(fdvp, fwccflag); nfsm_wcc_data(tdvp, twccflag); } nfsm_reqdone; VTONFS(fdvp)->n_flag |= NMODIFIED; VTONFS(tdvp)->n_flag |= NMODIFIED; if (!fwccflag) VTONFS(fdvp)->n_attrstamp = 0; if (!twccflag) VTONFS(tdvp)->n_attrstamp = 0; return (error); } /* * nfs hard link create call */ static int nfs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *tdvp = ap->a_tdvp; register struct componentname *cnp = ap->a_cnp; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); if (vp->v_mount != tdvp->v_mount) { VOP_ABORTOP(vp, cnp); if (tdvp == vp) vrele(tdvp); else vput(tdvp); return (EXDEV); } /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc); nfsstats.rpccnt[NFSPROC_LINK]++; nfsm_reqhead(vp, NFSPROC_LINK, NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); nfsm_fhtom(vp, v3); nfsm_fhtom(tdvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred); if (v3) { nfsm_postop_attr(vp, attrflag); nfsm_wcc_data(tdvp, wccflag); } nfsm_reqdone; zfree(namei_zone, cnp->cn_pnbuf); VTONFS(tdvp)->n_flag |= NMODIFIED; if (!attrflag) VTONFS(vp)->n_attrstamp = 0; if (!wccflag) VTONFS(tdvp)->n_attrstamp = 0; vput(tdvp); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vnode *newvp = (struct vnode *)0; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_SYMLINK]++; slen = strlen(ap->a_target); nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, cnp->cn_cred->cr_gid); } nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); if (!v3) { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid); sp->sa_size = -1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred); if (v3) { if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); nfsm_wcc_data(dvp, wccflag); } nfsm_reqdone; if (newvp) vput(newvp); zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; vput(dvp); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs make dir call */ static int nfs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; register int len; struct nfsnode *np = (struct nfsnode *)0; struct vnode *newvp = (struct vnode *)0; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; int gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); vput(dvp); return (error); } len = cnp->cn_namelen; nfsstats.rpccnt[NFSPROC_MKDIR]++; nfsm_reqhead(dvp, NFSPROC_MKDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); if (v3) { nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = -1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred); if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; /* * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry * if we can succeed in looking up the directory. */ if (error == EEXIST || (!error && !gotvp)) { if (newvp) { vrele(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, cnp->cn_proc, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vrele(newvp); } else *ap->a_vpp = newvp; zfree(namei_zone, cnp->cn_pnbuf); vput(dvp); return (error); } /* * nfs remove directory call */ static int nfs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_RMDIR]++; nfsm_reqhead(dvp, NFSPROC_RMDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; cache_purge(dvp); cache_purge(vp); vput(vp); vput(dvp); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register struct uio *uio = ap->a_uio; int tresid, error; struct vattr vattr; if (vp->v_type != VDIR) return (EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKCACHABLE(vp, ND_READ)) { nfsstats.direofcache_hits++; return (0); } } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 && np->n_mtime == vattr.va_mtime.tv_sec) { nfsstats.direofcache_hits++; return (0); } } /* * Call nfs_bioread() to do the real work. */ tresid = uio->uio_resid; error = nfs_bioread(vp, uio, 0, ap->a_cred, 0); if (!error && uio->uio_resid == tresid) nfsstats.direofcache_misses++; return (error); } /* * Readdir rpc call. * Called from below the buffer cache by nfs_doio(). */ int nfs_readdirrpc(vp, uiop, cred) struct vnode *vp; register struct uio *uiop; struct ucred *cred; { register int len, left; register struct dirent *dp; register u_long *tl; register caddr_t cp; register long t1, t2; register nfsuint64 *cookiep; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp); u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag; int v3 = NFS_ISV3(vp); #ifndef nolint dp = (struct dirent *)0; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (NFS_DIRBLKSIZ - 1)) || (uiop->uio_resid & (NFS_DIRBLKSIZ - 1))) panic("nfs readdirrpc bad uio"); #endif /* * If there is no cookie, assume directory was stale. */ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) cookie = *cookiep; else return (NFSERR_BAD_COOKIE); /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIR]++; nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + NFSX_READDIR(v3)); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; } else { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; } *tl = txdr_unsigned(nmp->nm_readdirsize); nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; } else { m_freem(mrep); goto nfsmout; } } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (v3) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); fxdr_hyper(tl, &fileno); len = fxdr_unsigned(int, *(tl + 2)); } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fileno = fxdr_unsigned(u_quad_t, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base += DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; /* null terminate */ uiop->uio_iov->iov_base += tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); if (v3) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); } if (bigenough) { cookie.nfsuquad[0] = *tl++; if (v3) cookie.nfsuquad[1] = *tl++; } else if (v3) tl += 2; else tl++; more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; } nfsmout: return (error); } /* * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). */ int nfs_readdirplusrpc(vp, uiop, cred) struct vnode *vp; register struct uio *uiop; struct ucred *cred; { register int len, left; register struct dirent *dp; register u_long *tl; register caddr_t cp; register long t1, t2; register struct vnode *newvp; register nfsuint64 *cookiep; caddr_t bpos, dpos, cp2, dpossav1, dpossav2; struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp), *np; nfsfh_t *fhp; u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; int attrflag, fhsize; #ifndef nolint dp = (struct dirent *)0; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || (uiop->uio_resid & (DIRBLKSIZ - 1))) panic("nfs readdirplusrpc bad uio"); #endif ndp->ni_dvp = vp; newvp = NULLVP; /* * If there is no cookie, assume directory was stale. */ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) cookie = *cookiep; else return (NFSERR_BAD_COOKIE); /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; nfsm_reqhead(vp, NFSPROC_READDIRPLUS, NFSX_FH(1) + 6 * NFSX_UNSIGNED); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 6 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_rsize); nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred); nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); fxdr_hyper(tl, &fileno); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination*/ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base += DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; cnp->cn_nameptr = uiop->uio_iov->iov_base; cnp->cn_namelen = len; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; uiop->uio_iov->iov_base += tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); if (bigenough) { cookie.nfsuquad[0] = *tl++; cookie.nfsuquad[1] = *tl++; } else tl += 2; /* * Since the attributes are before the file handle * (sigh), we must skip over the attributes and then * come back and get them. */ attrflag = fxdr_unsigned(int, *tl); if (attrflag) { dpossav1 = dpos; mdsav1 = md; nfsm_adv(NFSX_V3FATTR); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); doit = fxdr_unsigned(int, *tl); if (doit) { nfsm_getfh(fhp, fhsize, 1); if (NFS_CMPFH(dnp, fhp, fhsize)) { VREF(vp); newvp = vp; np = dnp; } else { if (error = nfs_nget(vp->v_mount, fhp, fhsize, &np)) doit = 0; else newvp = NFSTOV(np); } } if (doit) { dpossav2 = dpos; dpos = dpossav1; mdsav2 = md; md = mdsav1; nfsm_loadattr(newvp, (struct vattr *)0); dpos = dpossav2; md = mdsav2; dp->d_type = IFTODT(VTTOIF(np->n_vattr.va_type)); ndp->ni_vp = newvp; cnp->cn_hash = 0; for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++) cnp->cn_hash += (unsigned char)*cp * i; cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); } } else { /* Just skip over the file handle */ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); nfsm_adv(nfsm_rndup(i)); } if (newvp != NULLVP) { vrele(newvp); newvp = NULLVP; } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirplusrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; } nfsmout: if (newvp != NULLVP) { if (newvp == vp) vrele(newvp); else vput(newvp); newvp = NULLVP; } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(dvp, vp, cnp) struct vnode *dvp, *vp; struct componentname *cnp; { register struct sillyrename *sp; struct nfsnode *np; int error; short pid; cache_purge(dvp); np = VTONFS(vp); #ifndef DIAGNOSTIC if (vp->v_type == VDIR) panic("nfs: sillyrename dir"); #endif MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), M_NFSREQ, M_WAITOK); sp->s_cred = crdup(cnp->cn_cred); sp->s_dvp = dvp; VREF(dvp); /* Fudge together a funny name */ pid = cnp->cn_proc->p_pid; sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid); /* Try lookitups until we get one that isn't there */ while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_proc, (struct nfsnode **)0) == 0) { sp->s_name[4]++; if (sp->s_name[4] > 'z') { error = EINVAL; goto bad; } } if (error = nfs_renameit(dvp, cnp, sp)) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_proc, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(dvp, name, len, cred, procp, npp) register struct vnode *dvp; char *name; int len; struct ucred *cred; struct proc *procp; struct nfsnode **npp; { register u_long *tl; register caddr_t cp; register long t1, t2; struct vnode *newvp = (struct vnode *)0; struct nfsnode *np, *dnp = VTONFS(dvp); caddr_t bpos, dpos, cp2; int error = 0, fhlen, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsfh_t *nfhp; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_LOOKUP]++; nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred); if (npp && !error) { nfsm_getfh(nfhp, fhlen, v3); if (*npp) { np = *npp; if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { free((caddr_t)np->n_fhp, M_NFSBIGFH); np->n_fhp = &np->n_fh; } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK); bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); np->n_fhsize = fhlen; newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); if (!attrflag && *npp == NULL) { m_freem(mrep); if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } } else nfsm_loadattr(newvp, (struct vattr *)0); } nfsm_reqdone; if (npp && *npp == NULL) { if (error) { if (newvp) if (newvp == dvp) vrele(newvp); else vput(newvp); } else *npp = np; } return (error); } /* * Nfs Version 3 commit rpc */ static int nfs_commit(vp, offset, cnt, cred, procp) register struct vnode *vp; u_quad_t offset; int cnt; struct ucred *cred; struct proc *procp; { register caddr_t cp; register u_long *tl; register int t1, t2; register struct nfsmount *nmp = VFSTONFS(vp->v_mount); caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) return (0); nfsstats.rpccnt[NFSPROC_COMMIT]++; nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); txdr_hyper(&offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nfsm_request(vp, NFSPROC_COMMIT, procp, cred); nfsm_wcc_data(vp, wccflag); if (!error) { nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF); if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, NFSX_V3WRITEVERF)) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); error = NFSERR_STALEWRITEVERF; } } nfsm_reqdone; return (error); } /* * Kludge City.. * - make nfs_bmap() essentially a no-op that does no translation * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc * (Maybe I could use the process's page mapping, but I was concerned that * Kernel Write might not be enabled and also figured copyout() would do * a lot more work than bcopy() and also it currently happens in the * context of the swapper process (2). */ static int nfs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { register struct vnode *vp = ap->a_vp; if (ap->a_vpp != NULL) *ap->a_vpp = vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling nfs_asyncio(), otherwise just all nfs_doio() to do the * request. */ static int nfs_strategy(ap) struct vop_strategy_args *ap; { register struct buf *bp = ap->a_bp; struct ucred *cr; struct proc *p; int error = 0; if (bp->b_flags & B_PHYS) panic("nfs physio"); if (bp->b_flags & B_ASYNC) p = (struct proc *)0; else p = curproc; /* XXX */ if (bp->b_flags & B_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(bp, NOCRED)) error = nfs_doio(bp, cr, p); return (error); } /* * Mmap a file * * NB Currently unsupported. */ /* ARGSUSED */ static int nfs_mmap(ap) struct vop_mmap_args /* { struct vnode *a_vp; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { return (EINVAL); } /* * fsync vnode op. Just call nfs_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(ap) struct vop_fsync_args /* { struct vnodeop_desc *a_desc; struct vnode * a_vp; struct ucred * a_cred; int a_waitfor; struct proc * a_p; } */ *ap; { return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. */ static int nfs_flush(vp, cred, waitfor, p, commit) register struct vnode *vp; struct ucred *cred; int waitfor; struct proc *p; int commit; { register struct nfsnode *np = VTONFS(vp); register struct buf *bp; register int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but nas not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, nfs_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { s = splbio(); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if (bvecpos >= bvecsize) break; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) continue; bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; bp->b_flags |= (B_BUSY | B_WRITEINPROG); vfs_busy_pages(bp, 1); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } splx(s); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = nfs_commit(vp, off, (int)(endoff - off), wcred, p); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = nfs_commit(vp, off, (int)size, bp->b_wcred, p); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG); if (retv) { vfs_unbusy_pages(bp); brelse(bp); } else { vp->v_numoutput++; bp->b_flags |= B_ASYNC; if (bp->b_flags & B_DELWRI) { --numdirtybuffers; if (needsbuffer) { vfs_bio_need_satisfy(); } } + s = splbio(); /* XXX check this positionning */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); bp->b_dirtyoff = bp->b_dirtyend = 0; reassignbuf(bp, vp); + splx(s); biodone(bp); } } } /* * Start/do any write(s) that are required. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if (bp->b_flags & B_BUSY) { if (waitfor != MNT_WAIT || passone) continue; bp->b_flags |= B_WANTED; error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); splx(s); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) continue; bremfree(bp); if (passone || !commit) bp->b_flags |= (B_BUSY|B_ASYNC); else bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT); splx(s); VOP_BWRITE(bp); goto loop; } splx(s); if (passone) { passone = 0; goto again; } if (waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; error = tsleep((caddr_t)&vp->v_numoutput, slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } } if (vp->v_dirtyblkhd.lh_first && commit) { goto loop; } } if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); return (error); } /* * NFS advisory byte-level locks. * Currently unsupported. */ static int nfs_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); /* * The following kludge is to allow diskless support to work * until a real NFS lockd is implemented. Basically, just pretend * that this is a local lock. */ return (lf_advlock(ap, &(np->n_lockf), np->n_size)); } /* * Print out the contents of an nfsnode. */ static int nfs_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); printf("tag VT_NFS, fileid %ld fsid 0x%lx", np->n_vattr.va_fileid, np->n_vattr.va_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * Just call nfs_writebp() with the force argument set to 1. */ static int nfs_bwrite(ap) struct vop_bwrite_args /* { struct vnode *a_bp; } */ *ap; { return (nfs_writebp(ap->a_bp, 1)); } /* * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless * the force flag is one and it also handles the B_NEEDCOMMIT flag. */ int nfs_writebp(bp, force) register struct buf *bp; int force; { + int s; register int oldflags = bp->b_flags, retv = 1; off_t off; if(!(bp->b_flags & B_BUSY)) panic("bwrite: buffer is not busy???"); if (bp->b_flags & B_INVAL) bp->b_flags |= B_INVAL | B_NOCACHE; if (bp->b_flags & B_DELWRI) { --numdirtybuffers; if (needsbuffer) vfs_bio_need_satisfy(); } + s = splbio(); /* XXX check if needed */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { reassignbuf(bp, bp->b_vp); } bp->b_vp->v_numoutput++; curproc->p_stats->p_ru.ru_oublock++; + splx(s); /* * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not * an actual write will have to be scheduled via. VOP_STRATEGY(). * If B_WRITEINPROG is already set, then push it with a write anyhow. */ vfs_busy_pages(bp, 1); if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) { off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; bp->b_flags |= B_WRITEINPROG; retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff, bp->b_wcred, bp->b_proc); bp->b_flags &= ~B_WRITEINPROG; if (!retv) { bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_flags &= ~B_NEEDCOMMIT; biodone(bp); } else if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(bp->b_vp->v_mount); } if (retv) { if (force) bp->b_flags |= B_WRITEINPROG; VOP_STRATEGY(bp); } if( (oldflags & B_ASYNC) == 0) { int rtval = biowait(bp); if (oldflags & B_DELWRI) { + s = splbio(); reassignbuf(bp, bp->b_vp); + splx(s); } brelse(bp); return (rtval); } return (0); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vattr *vap; register gid_t *gp; register struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; mode_t mode = ap->a_mode; struct vattr vattr; register int i; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * If you're the super-user, * you always get access. */ if (cred->cr_uid == 0) return (0); vap = &vattr; error = VOP_GETATTR(vp, vap, cred, ap->a_p); if (error) return (error); /* * Access check is based on only one of owner, group, public. * If not owner, then check group. If not a member of the * group, then check public access. */ if (cred->cr_uid != vap->va_uid) { mode >>= 3; gp = cred->cr_groups; for (i = 0; i < cred->cr_ngroups; i++, gp++) if (vap->va_gid == *gp) goto found; mode >>= 3; found: ; } error = (vap->va_mode & mode) == mode ? 0 : EACCES; return (error); } /* * Read wrapper for special devices. */ static int nfsspec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set access flag. */ np->n_flag |= NACC; gettime(&tv); np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for special devices. */ static int nfsspec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set update flag. */ np->n_flag |= NUPD; gettime(&tv); np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for special devices. * * Update the times on the nfsnode then do device close. */ static int nfsspec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); struct vattr vattr; if (np->n_flag & (NACC | NUPD)) { np->n_flag |= NCHG; if (vp->v_usecount == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); } } return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); } /* * Read wrapper for fifos. */ static int nfsfifo_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set access flag. */ np->n_flag |= NACC; gettime(&tv); np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for fifos. */ static int nfsfifo_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set update flag. */ np->n_flag |= NUPD; gettime(&tv); np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); struct timeval tv; struct vattr vattr; if (np->n_flag & (NACC | NUPD)) { gettime(&tv); if (np->n_flag & NACC) { np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; } if (np->n_flag & NUPD) { np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; } np->n_flag |= NCHG; if (vp->v_usecount == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); } } return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); } Index: head/sys/pci/if_de.c =================================================================== --- head/sys/pci/if_de.c (revision 34265) +++ head/sys/pci/if_de.c (revision 34266) @@ -1,5442 +1,5444 @@ +#undef __FreeBSD__ +#define __FreeBSD__ 3 /* $NetBSD: if_de.c,v 1.56 1997/10/20 14:32:46 matt Exp $ */ -/* $Id: if_de.c,v 1.79 1998/02/06 12:14:08 eivind Exp $ */ +/* $Id: if_de.c,v 1.80 1998/02/20 13:11:50 bde Exp $ */ /*- * Copyright (c) 1994-1997 Matt Thomas (matt@3am-software.com) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Id: if_de.c,v 1.94 1997/07/03 16:55:07 thomas Exp * */ /* * DEC 21040 PCI Ethernet Controller * * Written by Matt Thomas * BPF support code stolen directly from if_ec.c * * This driver supports the DEC DE435 or any other PCI * board which support 21040, 21041, or 21140 (mostly). */ #define TULIP_HDR_DATA #include "opt_inet.h" #include "opt_ipx.h" #include #include #include #include #include #include #include #if defined(__FreeBSD__) #include #elif defined(__bsdi__) || defined(__NetBSD__) #include #endif #if defined(__NetBSD__) #include "rnd.h" #if NRND > 0 #include #endif #endif #include #if defined(SIOCSIFMEDIA) && !defined(TULIP_NOIFMEDIA) #include #endif #include #ifdef TULIP_USE_SOFTINTR #include #endif #if defined(__bsdi__) && _BSDI_VERSION >= 199701 #include #include #endif #include "bpfilter.h" #if NBPFILTER > 0 #include #endif #ifdef INET #include #include #endif #ifdef IPX #include #include #endif #ifdef NS #include #include #endif #include #if defined(__FreeBSD__) #include #include #if NPCI > 0 #include #include #define DEVAR_INCLUDE "pci/if_devar.h" #endif #endif /* __FreeBSD__ */ #if defined(__bsdi__) #include #include #include #include #include #include #include #if _BSDI_VERSION < 199510 #include #else #define NEISA 0 #endif #if NEISA > 0 && _BSDI_VERSION >= 199401 #include #define TULIP_EISA #endif #define DEVAR_INCLUDE "i386/pci/if_devar.h" #endif /* __bsdi__ */ #if defined(__NetBSD__) #include #if defined(INET) #include #endif #include #if defined(__alpha__) #include #endif #include #include #include #define DEVAR_INCLUDE "dev/pci/if_devar.h" #endif /* __NetBSD__ */ /* * Intel CPUs should use I/O mapped access. */ #if defined(__i386__) || defined(TULIP_EISA) #define TULIP_IOMAPPED #endif #if 0 /* * This turns on all sort of debugging stuff and make the * driver much larger. */ #define TULIP_DEBUG #endif #if 0 #define TULIP_PERFSTATS #endif #if 0 #define TULIP_USE_SOFTINTR #endif #define TULIP_HZ 10 #include DEVAR_INCLUDE /* * This module supports * the DEC 21040 PCI Ethernet Controller. * the DEC 21041 PCI Ethernet Controller. * the DEC 21140 PCI Fast Ethernet Controller. */ static void tulip_mii_autonegotiate(tulip_softc_t * const sc, const unsigned phyaddr); static tulip_intrfunc_t tulip_intr_shared(void *arg); static tulip_intrfunc_t tulip_intr_normal(void *arg); static void tulip_init(tulip_softc_t * const sc); static void tulip_reset(tulip_softc_t * const sc); static ifnet_ret_t tulip_ifstart_one(struct ifnet *ifp); static ifnet_ret_t tulip_ifstart(struct ifnet *ifp); static struct mbuf *tulip_txput(tulip_softc_t * const sc, struct mbuf *m); static void tulip_txput_setup(tulip_softc_t * const sc); static void tulip_rx_intr(tulip_softc_t * const sc); static void tulip_addr_filter(tulip_softc_t * const sc); static unsigned tulip_mii_readreg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno); static void tulip_mii_writereg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data); static int tulip_mii_map_abilities(tulip_softc_t * const sc, unsigned abilities); static tulip_media_t tulip_mii_phy_readspecific(tulip_softc_t * const sc); static int tulip_srom_decode(tulip_softc_t * const sc); #if defined(IFM_ETHER) static int tulip_ifmedia_change(struct ifnet * const ifp); static void tulip_ifmedia_status(struct ifnet * const ifp, struct ifmediareq *req); #endif /* static void tulip_21140_map_media(tulip_softc_t *sc); */ static void tulip_timeout_callback( void *arg) { tulip_softc_t * const sc = arg; tulip_spl_t s = TULIP_RAISESPL(); TULIP_PERFSTART(timeout) sc->tulip_flags &= ~TULIP_TIMEOUTPENDING; sc->tulip_probe_timeout -= 1000 / TULIP_HZ; (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_TIMER); TULIP_PERFEND(timeout); TULIP_RESTORESPL(s); } static void tulip_timeout( tulip_softc_t * const sc) { if (sc->tulip_flags & TULIP_TIMEOUTPENDING) return; sc->tulip_flags |= TULIP_TIMEOUTPENDING; timeout(tulip_timeout_callback, sc, (hz + TULIP_HZ / 2) / TULIP_HZ); } #if defined(TULIP_NEED_FASTTIMEOUT) static void tulip_fasttimeout_callback( void *arg) { tulip_softc_t * const sc = arg; tulip_spl_t s = TULIP_RAISESPL(); sc->tulip_flags &= ~TULIP_FASTTIMEOUTPENDING; (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_FASTTIMER); TULIP_RESTORESPL(s); } static void tulip_fasttimeout( tulip_softc_t * const sc) { if (sc->tulip_flags & TULIP_FASTTIMEOUTPENDING) return; sc->tulip_flags |= TULIP_FASTTIMEOUTPENDING; timeout(tulip_fasttimeout_callback, sc, 1); } #endif static int tulip_txprobe( tulip_softc_t * const sc) { struct mbuf *m; /* * Before we are sure this is the right media we need * to send a small packet to make sure there's carrier. * Strangely, BNC and AUI will "see" receive data if * either is connected so the transmit is the only way * to verify the connectivity. */ MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return 0; /* * Construct a LLC TEST message which will point to ourselves. */ bcopy(sc->tulip_enaddr, mtod(m, struct ether_header *)->ether_dhost, 6); bcopy(sc->tulip_enaddr, mtod(m, struct ether_header *)->ether_shost, 6); mtod(m, struct ether_header *)->ether_type = htons(3); mtod(m, unsigned char *)[14] = 0; mtod(m, unsigned char *)[15] = 0; mtod(m, unsigned char *)[16] = 0xE3; /* LLC Class1 TEST (no poll) */ m->m_len = m->m_pkthdr.len = sizeof(struct ether_header) + 3; /* * send it! */ sc->tulip_cmdmode |= TULIP_CMD_TXRUN; sc->tulip_intrmask |= TULIP_STS_TXINTR; sc->tulip_flags |= TULIP_TXPROBE_ACTIVE; TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); if ((m = tulip_txput(sc, m)) != NULL) m_freem(m); sc->tulip_probe.probe_txprobes++; return 1; } #ifdef BIG_PACKET #define TULIP_SIAGEN_WATCHDOG (sc->tulip_if.if_mtu > ETHERMTU ? TULIP_WATCHDOG_RXDISABLE|TULIP_WATCHDOG_TXDISABLE : 0) #else #define TULIP_SIAGEN_WATCHDOG 0 #endif static void tulip_media_set( tulip_softc_t * const sc, tulip_media_t media) { const tulip_media_info_t *mi = sc->tulip_mediums[media]; if (mi == NULL) return; /* * If we are switching media, make sure we don't think there's * any stale RX activity */ sc->tulip_flags &= ~TULIP_RXACT; if (mi->mi_type == TULIP_MEDIAINFO_SIA) { TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET); TULIP_CSR_WRITE(sc, csr_sia_tx_rx, mi->mi_sia_tx_rx); if (sc->tulip_features & TULIP_HAVE_SIAGP) { TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_sia_gp_control|mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG); DELAY(50); TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_sia_gp_data|mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG); } else { TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG); } TULIP_CSR_WRITE(sc, csr_sia_connectivity, mi->mi_sia_connectivity); } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) { #define TULIP_GPR_CMDBITS (TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_SCRAMBLER|TULIP_CMD_TXTHRSHLDCTL) /* * If the cmdmode bits don't match the currently operating mode, * set the cmdmode appropriately and reset the chip. */ if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) { sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS; sc->tulip_cmdmode |= mi->mi_cmdmode; tulip_reset(sc); } TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit); DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, (u_int8_t) mi->mi_gpdata); } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) { /* * If the cmdmode bits don't match the currently operating mode, * set the cmdmode appropriately and reset the chip. */ if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) { sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS; sc->tulip_cmdmode |= mi->mi_cmdmode; tulip_reset(sc); } TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpcontrol); TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpdata); } else if (mi->mi_type == TULIP_MEDIAINFO_MII && sc->tulip_probe_state != TULIP_PROBE_INACTIVE) { int idx; if (sc->tulip_features & TULIP_HAVE_SIAGP) { const u_int8_t *dp; dp = &sc->tulip_rombuf[mi->mi_reset_offset]; for (idx = 0; idx < mi->mi_reset_length; idx++, dp += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16); } sc->tulip_phyaddr = mi->mi_phyaddr; dp = &sc->tulip_rombuf[mi->mi_gpr_offset]; for (idx = 0; idx < mi->mi_gpr_length; idx++, dp += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16); } } else { for (idx = 0; idx < mi->mi_reset_length; idx++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx]); } sc->tulip_phyaddr = mi->mi_phyaddr; for (idx = 0; idx < mi->mi_gpr_length; idx++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx]); } } if (sc->tulip_flags & TULIP_TRYNWAY) { tulip_mii_autonegotiate(sc, sc->tulip_phyaddr); } else if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) { u_int32_t data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_CONTROL); data &= ~(PHYCTL_SELECT_100MB|PHYCTL_FULL_DUPLEX|PHYCTL_AUTONEG_ENABLE); sc->tulip_flags &= ~TULIP_DIDNWAY; if (TULIP_IS_MEDIA_FD(media)) data |= PHYCTL_FULL_DUPLEX; if (TULIP_IS_MEDIA_100MB(media)) data |= PHYCTL_SELECT_100MB; tulip_mii_writereg(sc, sc->tulip_phyaddr, PHYREG_CONTROL, data); } } } static void tulip_linkup( tulip_softc_t * const sc, tulip_media_t media) { if ((sc->tulip_flags & TULIP_LINKUP) == 0) sc->tulip_flags |= TULIP_PRINTLINKUP; sc->tulip_flags |= TULIP_LINKUP; sc->tulip_if.if_flags &= ~IFF_OACTIVE; #if 0 /* XXX how does with work with ifmedia? */ if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) { if (sc->tulip_if.if_flags & IFF_FULLDUPLEX) { if (TULIP_CAN_MEDIA_FD(media) && sc->tulip_mediums[TULIP_FD_MEDIA_OF(media)] != NULL) media = TULIP_FD_MEDIA_OF(media); } else { if (TULIP_IS_MEDIA_FD(media) && sc->tulip_mediums[TULIP_HD_MEDIA_OF(media)] != NULL) media = TULIP_HD_MEDIA_OF(media); } } #endif if (sc->tulip_media != media) { #ifdef TULIP_DEBUG sc->tulip_dbg.dbg_last_media = sc->tulip_media; #endif sc->tulip_media = media; sc->tulip_flags |= TULIP_PRINTMEDIA; if (TULIP_IS_MEDIA_FD(sc->tulip_media)) { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX; } else if (sc->tulip_chipid != TULIP_21041 || (sc->tulip_flags & TULIP_DIDNWAY) == 0) { sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; } } /* * We could set probe_timeout to 0 but setting to 3000 puts this * in one central place and the only matters is tulip_link is * followed by a tulip_timeout. Therefore setting it should not * result in aberrant behavour. */ sc->tulip_probe_timeout = 3000; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_TRYNWAY); if (sc->tulip_flags & TULIP_INRESET) { tulip_media_set(sc, sc->tulip_media); } else if (sc->tulip_probe_media != sc->tulip_media) { /* * No reason to change media if we have the right media. */ tulip_reset(sc); tulip_init(sc); } } static void tulip_media_print( tulip_softc_t * const sc) { if ((sc->tulip_flags & TULIP_LINKUP) == 0) return; if (sc->tulip_flags & TULIP_PRINTMEDIA) { printf(TULIP_PRINTF_FMT ": enabling %s port\n", TULIP_PRINTF_ARGS, tulip_mediums[sc->tulip_media]); sc->tulip_flags &= ~(TULIP_PRINTMEDIA|TULIP_PRINTLINKUP); } else if (sc->tulip_flags & TULIP_PRINTLINKUP) { printf(TULIP_PRINTF_FMT ": link up\n", TULIP_PRINTF_ARGS); sc->tulip_flags &= ~TULIP_PRINTLINKUP; } } #if defined(TULIP_DO_GPR_SENSE) static tulip_media_t tulip_21140_gpr_media_sense( tulip_softc_t * const sc) { tulip_media_t maybe_media = TULIP_MEDIA_UNKNOWN; tulip_media_t last_media = TULIP_MEDIA_UNKNOWN; tulip_media_t media; /* * If one of the media blocks contained a default media flag, * use that. */ for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { const tulip_media_info_t *mi; /* * Media is not supported (or is full-duplex). */ if ((mi = sc->tulip_mediums[media]) == NULL || TULIP_IS_MEDIA_FD(media)) continue; if (mi->mi_type != TULIP_MEDIAINFO_GPR) continue; /* * Remember the media is this is the "default" media. */ if (mi->mi_default && maybe_media == TULIP_MEDIA_UNKNOWN) maybe_media = media; /* * No activity mask? Can't see if it is active if there's no mask. */ if (mi->mi_actmask == 0) continue; /* * Does the activity data match? */ if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) != mi->mi_actdata) continue; #if defined(TULIP_DEBUG) printf(TULIP_PRINTF_FMT ": gpr_media_sense: %s: 0x%02x & 0x%02x == 0x%02x\n", TULIP_PRINTF_ARGS, tulip_mediums[media], TULIP_CSR_READ(sc, csr_gp) & 0xFF, mi->mi_actmask, mi->mi_actdata); #endif /* * It does! If this is the first media we detected, then * remember this media. If isn't the first, then there were * multiple matches which we equate to no match (since we don't * which to select (if any). */ if (last_media == TULIP_MEDIA_UNKNOWN) { last_media = media; } else if (last_media != media) { last_media = TULIP_MEDIA_UNKNOWN; } } return (last_media != TULIP_MEDIA_UNKNOWN) ? last_media : maybe_media; } #endif /* TULIP_DO_GPR_SENSE */ static tulip_link_status_t tulip_media_link_monitor( tulip_softc_t * const sc) { const tulip_media_info_t * const mi = sc->tulip_mediums[sc->tulip_media]; tulip_link_status_t linkup = TULIP_LINK_DOWN; if (mi == NULL) { #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG) panic("tulip_media_link_monitor: %s: botch at line %d\n", tulip_mediums[sc->tulip_media],__LINE__); #endif return TULIP_LINK_UNKNOWN; } /* * Have we seen some packets? If so, the link must be good. */ if ((sc->tulip_flags & (TULIP_RXACT|TULIP_LINKUP)) == (TULIP_RXACT|TULIP_LINKUP)) { sc->tulip_flags &= ~TULIP_RXACT; sc->tulip_probe_timeout = 3000; return TULIP_LINK_UP; } sc->tulip_flags &= ~TULIP_RXACT; if (mi->mi_type == TULIP_MEDIAINFO_MII) { u_int32_t status; /* * Read the PHY status register. */ status = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS); if (status & PHYSTS_AUTONEG_DONE) { /* * If the PHY has completed autonegotiation, see the if the * remote systems abilities have changed. If so, upgrade or * downgrade as appropriate. */ u_int32_t abilities = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_AUTONEG_ABILITIES); abilities = (abilities << 6) & status; if (abilities != sc->tulip_abilities) { #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation changed: 0x%04x -> 0x%04x\n", TULIP_PRINTF_ARGS, sc->tulip_phyaddr, sc->tulip_abilities, abilities); #endif if (tulip_mii_map_abilities(sc, abilities)) { tulip_linkup(sc, sc->tulip_probe_media); return TULIP_LINK_UP; } /* * if we had selected media because of autonegotiation, * we need to probe for the new media. */ sc->tulip_probe_state = TULIP_PROBE_INACTIVE; if (sc->tulip_flags & TULIP_DIDNWAY) return TULIP_LINK_DOWN; } } /* * The link is now up. If was down, say its back up. */ if ((status & (PHYSTS_LINK_UP|PHYSTS_REMOTE_FAULT)) == PHYSTS_LINK_UP) linkup = TULIP_LINK_UP; } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) { /* * No activity sensor? Assume all's well. */ if (mi->mi_actmask == 0) return TULIP_LINK_UNKNOWN; /* * Does the activity data match? */ if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) == mi->mi_actdata) linkup = TULIP_LINK_UP; } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) { /* * Assume non TP ok for now. */ if (!TULIP_IS_MEDIA_TP(sc->tulip_media)) return TULIP_LINK_UNKNOWN; if ((TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL) == 0) linkup = TULIP_LINK_UP; #if defined(TULIP_DEBUG) if (sc->tulip_probe_timeout <= 0) printf(TULIP_PRINTF_FMT ": sia status = 0x%08x\n", TULIP_PRINTF_ARGS, TULIP_CSR_READ(sc, csr_sia_status)); #endif } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) { return TULIP_LINK_UNKNOWN; } /* * We will wait for 3 seconds until the link goes into suspect mode. */ if (sc->tulip_flags & TULIP_LINKUP) { if (linkup == TULIP_LINK_UP) sc->tulip_probe_timeout = 3000; if (sc->tulip_probe_timeout > 0) return TULIP_LINK_UP; sc->tulip_flags &= ~TULIP_LINKUP; printf(TULIP_PRINTF_FMT ": link down: cable problem?\n", TULIP_PRINTF_ARGS); } #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_downed++; #endif return TULIP_LINK_DOWN; } static void tulip_media_poll( tulip_softc_t * const sc, tulip_mediapoll_event_t event) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_events[event]++; #endif if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE && event == TULIP_MEDIAPOLL_TIMER) { switch (tulip_media_link_monitor(sc)) { case TULIP_LINK_DOWN: { /* * Link Monitor failed. Probe for new media. */ event = TULIP_MEDIAPOLL_LINKFAIL; break; } case TULIP_LINK_UP: { /* * Check again soon. */ tulip_timeout(sc); return; } case TULIP_LINK_UNKNOWN: { /* * We can't tell so don't bother. */ return; } } } if (event == TULIP_MEDIAPOLL_LINKFAIL) { if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) { if (TULIP_DO_AUTOSENSE(sc)) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_failures++; #endif sc->tulip_media = TULIP_MEDIA_UNKNOWN; tulip_reset(sc); /* restart probe */ } return; } #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_pollintrs++; #endif } if (event == TULIP_MEDIAPOLL_START) { sc->tulip_if.if_flags |= IFF_OACTIVE; if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE) return; sc->tulip_probe_mediamask = 0; sc->tulip_probe_passes = 0; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_media_probes++; #endif /* * If the SROM contained an explicit media to use, use it. */ sc->tulip_cmdmode &= ~(TULIP_CMD_RXRUN|TULIP_CMD_FULLDUPLEX); sc->tulip_flags |= TULIP_TRYNWAY|TULIP_PROBE1STPASS; sc->tulip_flags &= ~(TULIP_DIDNWAY|TULIP_PRINTMEDIA|TULIP_PRINTLINKUP); /* * connidx is defaulted to a media_unknown type. */ sc->tulip_probe_media = tulip_srom_conninfo[sc->tulip_connidx].sc_media; if (sc->tulip_probe_media != TULIP_MEDIA_UNKNOWN) { tulip_linkup(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } if (sc->tulip_features & TULIP_HAVE_GPR) { sc->tulip_probe_state = TULIP_PROBE_GPRTEST; sc->tulip_probe_timeout = 2000; } else { sc->tulip_probe_media = TULIP_MEDIA_MAX; sc->tulip_probe_timeout = 0; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; } } /* * Ignore txprobe failures or spurious callbacks. */ if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED && sc->tulip_probe_state != TULIP_PROBE_MEDIATEST) { sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; return; } /* * If we really transmitted a packet, then that's the media we'll use. */ if (event == TULIP_MEDIAPOLL_TXPROBE_OK || event == TULIP_MEDIAPOLL_LINKPASS) { if (event == TULIP_MEDIAPOLL_LINKPASS) sc->tulip_probe_media = TULIP_MEDIA_10BASET; #if defined(TULIP_DEBUG) else sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++; #endif tulip_linkup(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } if (sc->tulip_probe_state == TULIP_PROBE_GPRTEST) { #if defined(TULIP_DO_GPR_SENSE) /* * Check for media via the general purpose register. * * Try to sense the media via the GPR. If the same value * occurs 3 times in a row then just use that. */ if (sc->tulip_probe_timeout > 0) { tulip_media_t new_probe_media = tulip_21140_gpr_media_sense(sc); #if defined(TULIP_DEBUG) printf(TULIP_PRINTF_FMT ": media_poll: gpr sensing = %s\n", TULIP_PRINTF_ARGS, tulip_mediums[new_probe_media]); #endif if (new_probe_media != TULIP_MEDIA_UNKNOWN) { if (new_probe_media == sc->tulip_probe_media) { if (--sc->tulip_probe_count == 0) tulip_linkup(sc, sc->tulip_probe_media); } else { sc->tulip_probe_count = 10; } } sc->tulip_probe_media = new_probe_media; tulip_timeout(sc); return; } #endif /* TULIP_DO_GPR_SENSE */ /* * Brute force. We cycle through each of the media types * and try to transmit a packet. */ sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; sc->tulip_probe_media = TULIP_MEDIA_MAX; sc->tulip_probe_timeout = 0; tulip_timeout(sc); return; } if (sc->tulip_probe_state != TULIP_PROBE_MEDIATEST && (sc->tulip_features & TULIP_HAVE_MII)) { tulip_media_t old_media = sc->tulip_probe_media; tulip_mii_autonegotiate(sc, sc->tulip_phyaddr); switch (sc->tulip_probe_state) { case TULIP_PROBE_FAILED: case TULIP_PROBE_MEDIATEST: { /* * Try the next media. */ sc->tulip_probe_mediamask |= sc->tulip_mediums[sc->tulip_probe_media]->mi_mediamask; sc->tulip_probe_timeout = 0; #ifdef notyet if (sc->tulip_probe_state == TULIP_PROBE_FAILED) break; if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc)) break; sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 300; #endif break; } case TULIP_PROBE_PHYAUTONEG: { return; } case TULIP_PROBE_INACTIVE: { /* * Only probe if we autonegotiated a media that hasn't failed. */ sc->tulip_probe_timeout = 0; if (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media)) { sc->tulip_probe_media = old_media; break; } tulip_linkup(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } default: { #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG) panic("tulip_media_poll: botch at line %d\n", __LINE__); #endif break; } } } if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txprobes_failed[sc->tulip_probe_media]++; #endif sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; return; } /* * switch to another media if we tried this one enough. */ if (/* event == TULIP_MEDIAPOLL_TXPROBE_FAILED || */ sc->tulip_probe_timeout <= 0) { #if defined(TULIP_DEBUG) if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) { printf(TULIP_PRINTF_FMT ": poll media unknown!\n", TULIP_PRINTF_ARGS); sc->tulip_probe_media = TULIP_MEDIA_MAX; } #endif /* * Find the next media type to check for. Full Duplex * types are not allowed. */ do { sc->tulip_probe_media -= 1; if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) { if (++sc->tulip_probe_passes == 3) { printf(TULIP_PRINTF_FMT ": autosense failed: cable problem?\n", TULIP_PRINTF_ARGS); if ((sc->tulip_if.if_flags & IFF_UP) == 0) { sc->tulip_if.if_flags &= ~IFF_RUNNING; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; return; } } sc->tulip_flags ^= TULIP_TRYNWAY; /* XXX */ sc->tulip_probe_mediamask = 0; sc->tulip_probe_media = TULIP_MEDIA_MAX - 1; } } while (sc->tulip_mediums[sc->tulip_probe_media] == NULL || (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media)) || TULIP_IS_MEDIA_FD(sc->tulip_probe_media)); #if defined(TULIP_DEBUG) printf(TULIP_PRINTF_FMT ": %s: probing %s\n", TULIP_PRINTF_ARGS, event == TULIP_MEDIAPOLL_TXPROBE_FAILED ? "txprobe failed" : "timeout", tulip_mediums[sc->tulip_probe_media]); #endif sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 1000; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; sc->tulip_probe.probe_txprobes = 0; tulip_reset(sc); tulip_media_set(sc, sc->tulip_probe_media); sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; } tulip_timeout(sc); /* * If this is hanging off a phy, we know are doing NWAY and we have * forced the phy to a specific speed. Wait for link up before * before sending a packet. */ switch (sc->tulip_mediums[sc->tulip_probe_media]->mi_type) { case TULIP_MEDIAINFO_MII: { if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc)) return; break; } case TULIP_MEDIAINFO_SIA: { if (TULIP_IS_MEDIA_TP(sc->tulip_probe_media)) { if (TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL) return; tulip_linkup(sc, sc->tulip_probe_media); #ifdef notyet if (sc->tulip_features & TULIP_HAVE_MII) tulip_timeout(sc); #endif return; } break; } case TULIP_MEDIAINFO_RESET: case TULIP_MEDIAINFO_SYM: case TULIP_MEDIAINFO_NONE: case TULIP_MEDIAINFO_GPR: { break; } } /* * Try to send a packet. */ tulip_txprobe(sc); } static void tulip_media_select( tulip_softc_t * const sc) { if (sc->tulip_features & TULIP_HAVE_GPR) { TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit); DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpdata); } /* * If this board has no media, just return */ if (sc->tulip_features & TULIP_HAVE_NOMEDIA) return; if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) { TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); (*sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_START); } else { tulip_media_set(sc, sc->tulip_media); } } static void tulip_21040_mediainfo_init( tulip_softc_t * const sc, tulip_media_t media) { sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_THRSHLD160 |TULIP_CMD_BACKOFFCTR; sc->tulip_if.if_baudrate = 10000000; if (media == TULIP_MEDIA_10BASET || media == TULIP_MEDIA_UNKNOWN) { TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[0], 21040, 10BASET); TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[1], 21040, 10BASET_FD); } if (media == TULIP_MEDIA_AUIBNC || media == TULIP_MEDIA_UNKNOWN) { TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[2], 21040, AUIBNC); } if (media == TULIP_MEDIA_UNKNOWN) { TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[3], 21040, EXTSIA); } } static void tulip_21040_media_probe( tulip_softc_t * const sc) { tulip_21040_mediainfo_init(sc, TULIP_MEDIA_UNKNOWN); return; } static void tulip_21040_10baset_only_media_probe( tulip_softc_t * const sc) { tulip_21040_mediainfo_init(sc, TULIP_MEDIA_10BASET); tulip_media_set(sc, TULIP_MEDIA_10BASET); sc->tulip_media = TULIP_MEDIA_10BASET; } static void tulip_21040_10baset_only_media_select( tulip_softc_t * const sc) { sc->tulip_flags |= TULIP_LINKUP; if (sc->tulip_media == TULIP_MEDIA_10BASET_FD) { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX; sc->tulip_flags &= ~TULIP_SQETEST; } else { sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; sc->tulip_flags |= TULIP_SQETEST; } tulip_media_set(sc, sc->tulip_media); } static void tulip_21040_auibnc_only_media_probe( tulip_softc_t * const sc) { tulip_21040_mediainfo_init(sc, TULIP_MEDIA_AUIBNC); sc->tulip_flags |= TULIP_SQETEST|TULIP_LINKUP; tulip_media_set(sc, TULIP_MEDIA_AUIBNC); sc->tulip_media = TULIP_MEDIA_AUIBNC; } static void tulip_21040_auibnc_only_media_select( tulip_softc_t * const sc) { tulip_media_set(sc, TULIP_MEDIA_AUIBNC); sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; } static const tulip_boardsw_t tulip_21040_boardsw = { TULIP_21040_GENERIC, tulip_21040_media_probe, tulip_media_select, tulip_media_poll, }; static const tulip_boardsw_t tulip_21040_10baset_only_boardsw = { TULIP_21040_GENERIC, tulip_21040_10baset_only_media_probe, tulip_21040_10baset_only_media_select, NULL, }; static const tulip_boardsw_t tulip_21040_auibnc_only_boardsw = { TULIP_21040_GENERIC, tulip_21040_auibnc_only_media_probe, tulip_21040_auibnc_only_media_select, NULL, }; static void tulip_21041_mediainfo_init( tulip_softc_t * const sc) { tulip_media_info_t * const mi = sc->tulip_mediainfo; #ifdef notyet if (sc->tulip_revinfo >= 0x20) { TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, 10BASET); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, 10BASET_FD); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, AUI); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, BNC); return; } #endif TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041, 10BASET); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041, 10BASET_FD); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[2], 21041, AUI); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[3], 21041, BNC); } static void tulip_21041_media_probe( tulip_softc_t * const sc) { sc->tulip_if.if_baudrate = 10000000; sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_ENHCAPTEFFCT |TULIP_CMD_THRSHLD160|TULIP_CMD_BACKOFFCTR; sc->tulip_intrmask |= TULIP_STS_LINKPASS; tulip_21041_mediainfo_init(sc); } static void tulip_21041_media_poll( tulip_softc_t * const sc, const tulip_mediapoll_event_t event) { u_int32_t sia_status; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_events[event]++; #endif if (event == TULIP_MEDIAPOLL_LINKFAIL) { if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE || !TULIP_DO_AUTOSENSE(sc)) return; sc->tulip_media = TULIP_MEDIA_UNKNOWN; tulip_reset(sc); /* start probe */ return; } /* * If we've been been asked to start a poll or link change interrupt * restart the probe (and reset the tulip to a known state). */ if (event == TULIP_MEDIAPOLL_START) { sc->tulip_if.if_flags |= IFF_OACTIVE; sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_RXRUN); #ifdef notyet if (sc->tulip_revinfo >= 0x20) { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX; sc->tulip_flags |= TULIP_DIDNWAY; } #endif TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; sc->tulip_probe_media = TULIP_MEDIA_10BASET; sc->tulip_probe_timeout = TULIP_21041_PROBE_10BASET_TIMEOUT; tulip_media_set(sc, TULIP_MEDIA_10BASET); tulip_timeout(sc); return; } if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) return; if (event == TULIP_MEDIAPOLL_TXPROBE_OK) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++; #endif tulip_linkup(sc, sc->tulip_probe_media); return; } sia_status = TULIP_CSR_READ(sc, csr_sia_status); TULIP_CSR_WRITE(sc, csr_sia_status, sia_status); if ((sia_status & TULIP_SIASTS_LINKFAIL) == 0) { if (sc->tulip_revinfo >= 0x20) { if (sia_status & (PHYSTS_10BASET_FD << (16 - 6))) sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD; } /* * If the link has passed LinkPass, 10baseT is the * proper media to use. */ tulip_linkup(sc, sc->tulip_probe_media); return; } /* * wait for up to 2.4 seconds for the link to reach pass state. * Only then start scanning the other media for activity. * choose media with receive activity over those without. */ if (sc->tulip_probe_media == TULIP_MEDIA_10BASET) { if (event != TULIP_MEDIAPOLL_TIMER) return; if (sc->tulip_probe_timeout > 0 && (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) == 0) { tulip_timeout(sc); return; } sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; sc->tulip_flags |= TULIP_WANTRXACT; if (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) { sc->tulip_probe_media = TULIP_MEDIA_BNC; } else { sc->tulip_probe_media = TULIP_MEDIA_AUI; } tulip_media_set(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } /* * If we failed, clear the txprobe active flag. */ if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED) sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; if (event == TULIP_MEDIAPOLL_TIMER) { /* * If we've received something, then that's our link! */ if (sc->tulip_flags & TULIP_RXACT) { tulip_linkup(sc, sc->tulip_probe_media); return; } /* * if no txprobe active */ if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0 && ((sc->tulip_flags & TULIP_WANTRXACT) == 0 || (sia_status & TULIP_SIASTS_RXACTIVITY))) { sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; tulip_txprobe(sc); tulip_timeout(sc); return; } /* * Take 2 passes through before deciding to not * wait for receive activity. Then take another * two passes before spitting out a warning. */ if (sc->tulip_probe_timeout <= 0) { if (sc->tulip_flags & TULIP_WANTRXACT) { sc->tulip_flags &= ~TULIP_WANTRXACT; sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; } else { printf(TULIP_PRINTF_FMT ": autosense failed: cable problem?\n", TULIP_PRINTF_ARGS); if ((sc->tulip_if.if_flags & IFF_UP) == 0) { sc->tulip_if.if_flags &= ~IFF_RUNNING; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; return; } } } } /* * Since this media failed to probe, try the other one. */ sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; if (sc->tulip_probe_media == TULIP_MEDIA_AUI) { sc->tulip_probe_media = TULIP_MEDIA_BNC; } else { sc->tulip_probe_media = TULIP_MEDIA_AUI; } tulip_media_set(sc, sc->tulip_probe_media); sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; tulip_timeout(sc); } static const tulip_boardsw_t tulip_21041_boardsw = { TULIP_21041_GENERIC, tulip_21041_media_probe, tulip_media_select, tulip_21041_media_poll }; static const tulip_phy_attr_t tulip_mii_phy_attrlist[] = { { 0x20005c00, 0, /* 08-00-17 */ { { 0x19, 0x0040, 0x0040 }, /* 10TX */ { 0x19, 0x0040, 0x0000 }, /* 100TX */ }, #if defined(TULIP_DEBUG) "NS DP83840", #endif }, { 0x0281F400, 0, /* 00-A0-7D */ { { 0x12, 0x0010, 0x0000 }, /* 10T */ { }, /* 100TX */ { 0x12, 0x0010, 0x0010 }, /* 100T4 */ { 0x12, 0x0008, 0x0008 }, /* FULL_DUPLEX */ }, #if defined(TULIP_DEBUG) "Seeq 80C240" #endif }, #if 0 { 0x0015F420, 0, /* 00-A0-7D */ { { 0x12, 0x0010, 0x0000 }, /* 10T */ { }, /* 100TX */ { 0x12, 0x0010, 0x0010 }, /* 100T4 */ { 0x12, 0x0008, 0x0008 }, /* FULL_DUPLEX */ }, #if defined(TULIP_DEBUG) "Broadcom BCM5000" #endif }, #endif { 0x0281F400, 0, /* 00-A0-BE */ { { 0x11, 0x8000, 0x0000 }, /* 10T */ { 0x11, 0x8000, 0x8000 }, /* 100TX */ { }, /* 100T4 */ { 0x11, 0x4000, 0x4000 }, /* FULL_DUPLEX */ }, #if defined(TULIP_DEBUG) "ICS 1890" #endif }, { 0 } }; static tulip_media_t tulip_mii_phy_readspecific( tulip_softc_t * const sc) { const tulip_phy_attr_t *attr; u_int16_t data; u_int32_t id; unsigned idx = 0; static const tulip_media_t table[] = { TULIP_MEDIA_UNKNOWN, TULIP_MEDIA_10BASET, TULIP_MEDIA_100BASETX, TULIP_MEDIA_100BASET4, TULIP_MEDIA_UNKNOWN, TULIP_MEDIA_10BASET_FD, TULIP_MEDIA_100BASETX_FD, TULIP_MEDIA_UNKNOWN }; /* * Don't read phy specific registers if link is not up. */ data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS); if ((data & (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS)) != (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS)) return TULIP_MEDIA_UNKNOWN; id = (tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDHIGH); for (attr = tulip_mii_phy_attrlist;; attr++) { if (attr->attr_id == 0) return TULIP_MEDIA_UNKNOWN; if ((id & ~0x0F) == attr->attr_id) break; } if (attr->attr_modes[PHY_MODE_100TX].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100TX]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); if ((data & pm->pm_mask) == pm->pm_value) idx = 2; } if (idx == 0 && attr->attr_modes[PHY_MODE_100T4].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100T4]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); if ((data & pm->pm_mask) == pm->pm_value) idx = 3; } if (idx == 0 && attr->attr_modes[PHY_MODE_10T].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_10T]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); if ((data & pm->pm_mask) == pm->pm_value) idx = 1; } if (idx != 0 && attr->attr_modes[PHY_MODE_FULLDUPLEX].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_FULLDUPLEX]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); idx += ((data & pm->pm_mask) == pm->pm_value ? 4 : 0); } return table[idx]; } static unsigned tulip_mii_get_phyaddr( tulip_softc_t * const sc, unsigned offset) { unsigned phyaddr; for (phyaddr = 1; phyaddr < 32; phyaddr++) { unsigned status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS); if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET) continue; if (offset == 0) return phyaddr; offset--; } if (offset == 0) { unsigned status = tulip_mii_readreg(sc, 0, PHYREG_STATUS); if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET) return TULIP_MII_NOPHY; return 0; } return TULIP_MII_NOPHY; } static int tulip_mii_map_abilities( tulip_softc_t * const sc, unsigned abilities) { sc->tulip_abilities = abilities; if (abilities & PHYSTS_100BASETX_FD) { sc->tulip_probe_media = TULIP_MEDIA_100BASETX_FD; } else if (abilities & PHYSTS_100BASET4) { sc->tulip_probe_media = TULIP_MEDIA_100BASET4; } else if (abilities & PHYSTS_100BASETX) { sc->tulip_probe_media = TULIP_MEDIA_100BASETX; } else if (abilities & PHYSTS_10BASET_FD) { sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD; } else if (abilities & PHYSTS_10BASET) { sc->tulip_probe_media = TULIP_MEDIA_10BASET; } else { sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; return 0; } sc->tulip_probe_state = TULIP_PROBE_INACTIVE; return 1; } static void tulip_mii_autonegotiate( tulip_softc_t * const sc, const unsigned phyaddr) { switch (sc->tulip_probe_state) { case TULIP_PROBE_MEDIATEST: case TULIP_PROBE_INACTIVE: { sc->tulip_flags |= TULIP_DIDNWAY; tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, PHYCTL_RESET); sc->tulip_probe_timeout = 3000; sc->tulip_intrmask |= TULIP_STS_ABNRMLINTR|TULIP_STS_NORMALINTR; sc->tulip_probe_state = TULIP_PROBE_PHYRESET; /* FALL THROUGH */ } case TULIP_PROBE_PHYRESET: { u_int32_t status; u_int32_t data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL); if (data & PHYCTL_RESET) { if (sc->tulip_probe_timeout > 0) { tulip_timeout(sc); return; } printf(TULIP_PRINTF_FMT "(phy%d): error: reset of PHY never completed!\n", TULIP_PRINTF_ARGS, phyaddr); sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; sc->tulip_probe_state = TULIP_PROBE_FAILED; sc->tulip_if.if_flags &= ~(IFF_UP|IFF_RUNNING); return; } status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS); if ((status & PHYSTS_CAN_AUTONEG) == 0) { #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation disabled\n", TULIP_PRINTF_ARGS, phyaddr); #endif sc->tulip_flags &= ~TULIP_DIDNWAY; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; return; } if (tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT) != ((status >> 6) | 0x01)) tulip_mii_writereg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT, (status >> 6) | 0x01); tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, data|PHYCTL_AUTONEG_RESTART|PHYCTL_AUTONEG_ENABLE); data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL); #if defined(TULIP_DEBUG) if ((data & PHYCTL_AUTONEG_ENABLE) == 0) loudprintf(TULIP_PRINTF_FMT "(phy%d): oops: enable autonegotiation failed: 0x%04x\n", TULIP_PRINTF_ARGS, phyaddr, data); else loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation restarted: 0x%04x\n", TULIP_PRINTF_ARGS, phyaddr, data); sc->tulip_dbg.dbg_nway_starts++; #endif sc->tulip_probe_state = TULIP_PROBE_PHYAUTONEG; sc->tulip_probe_timeout = 3000; /* FALL THROUGH */ } case TULIP_PROBE_PHYAUTONEG: { u_int32_t status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS); u_int32_t data; if ((status & PHYSTS_AUTONEG_DONE) == 0) { if (sc->tulip_probe_timeout > 0) { tulip_timeout(sc); return; } #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation timeout: sts=0x%04x, ctl=0x%04x\n", TULIP_PRINTF_ARGS, phyaddr, status, tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL)); #endif sc->tulip_flags &= ~TULIP_DIDNWAY; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; return; } data = tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ABILITIES); #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation complete: 0x%04x\n", TULIP_PRINTF_ARGS, phyaddr, data); #endif data = (data << 6) & status; if (!tulip_mii_map_abilities(sc, data)) sc->tulip_flags &= ~TULIP_DIDNWAY; return; } default: { #if defined(DIAGNOSTIC) panic("tulip_media_poll: botch at line %d\n", __LINE__); #endif break; } } #if defined(TULIP_DEBUG) loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation failure: state = %d\n", TULIP_PRINTF_ARGS, phyaddr, sc->tulip_probe_state); sc->tulip_dbg.dbg_nway_failures++; #endif } static void tulip_2114x_media_preset( tulip_softc_t * const sc) { const tulip_media_info_t *mi = NULL; tulip_media_t media = sc->tulip_media; if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) media = sc->tulip_media; else media = sc->tulip_probe_media; sc->tulip_cmdmode &= ~TULIP_CMD_PORTSELECT; sc->tulip_flags &= ~TULIP_SQETEST; if (media != TULIP_MEDIA_UNKNOWN && media != TULIP_MEDIA_MAX) { #if defined(TULIP_DEBUG) if (media < TULIP_MEDIA_MAX && sc->tulip_mediums[media] != NULL) { #endif mi = sc->tulip_mediums[media]; if (mi->mi_type == TULIP_MEDIAINFO_MII) { sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT; } else if (mi->mi_type == TULIP_MEDIAINFO_GPR || mi->mi_type == TULIP_MEDIAINFO_SYM) { sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS; sc->tulip_cmdmode |= mi->mi_cmdmode; } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) { TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET); } #if defined(TULIP_DEBUG) } else { printf(TULIP_PRINTF_FMT ": preset: bad media %d!\n", TULIP_PRINTF_ARGS, media); } #endif } switch (media) { case TULIP_MEDIA_BNC: case TULIP_MEDIA_AUI: case TULIP_MEDIA_10BASET: { sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; sc->tulip_cmdmode |= TULIP_CMD_TXTHRSHLDCTL; sc->tulip_if.if_baudrate = 10000000; sc->tulip_flags |= TULIP_SQETEST; break; } case TULIP_MEDIA_10BASET_FD: { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL; sc->tulip_if.if_baudrate = 10000000; break; } case TULIP_MEDIA_100BASEFX: case TULIP_MEDIA_100BASET4: case TULIP_MEDIA_100BASETX: { sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL); sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT; sc->tulip_if.if_baudrate = 100000000; break; } case TULIP_MEDIA_100BASEFX_FD: case TULIP_MEDIA_100BASETX_FD: { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_PORTSELECT; sc->tulip_cmdmode &= ~TULIP_CMD_TXTHRSHLDCTL; sc->tulip_if.if_baudrate = 100000000; break; } default: { break; } } TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } /* ******************************************************************** * Start of 21140/21140A support which does not use the MII interface */ static void tulip_null_media_poll( tulip_softc_t * const sc, tulip_mediapoll_event_t event) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_events[event]++; #endif #if defined(DIAGNOSTIC) printf(TULIP_PRINTF_FMT ": botch(media_poll) at line %d\n", TULIP_PRINTF_ARGS, __LINE__); #endif } __inline__ static void tulip_21140_mediainit( tulip_softc_t * const sc, tulip_media_info_t * const mip, tulip_media_t const media, unsigned gpdata, unsigned cmdmode) { sc->tulip_mediums[media] = mip; mip->mi_type = TULIP_MEDIAINFO_GPR; mip->mi_cmdmode = cmdmode; mip->mi_gpdata = gpdata; } static void tulip_21140_evalboard_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; sc->tulip_gpinit = TULIP_GP_EB_PINS; sc->tulip_gpdata = TULIP_GP_EB_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL); DELAY(1000000); if ((TULIP_CSR_READ(sc, csr_gp) & TULIP_GP_EB_OK100) != 0) { sc->tulip_media = TULIP_MEDIA_10BASET; } else { sc->tulip_media = TULIP_MEDIA_100BASETX; } tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_EB_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_EB_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_EB_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_EB_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_eb_boardsw = { TULIP_21140_DEC_EB, tulip_21140_evalboard_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_21140_accton_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; unsigned gpdata; sc->tulip_gpinit = TULIP_GP_EB_PINS; sc->tulip_gpdata = TULIP_GP_EB_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL); DELAY(1000000); gpdata = TULIP_CSR_READ(sc, csr_gp); if ((gpdata & TULIP_GP_EN1207_UTP_INIT) == 0) { sc->tulip_media = TULIP_MEDIA_10BASET; } else { if ((gpdata & TULIP_GP_EN1207_BNC_INIT) == 0) { sc->tulip_media = TULIP_MEDIA_BNC; } else { sc->tulip_media = TULIP_MEDIA_100BASETX; } } tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_BNC, TULIP_GP_EN1207_BNC_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_EN1207_UTP_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_EN1207_UTP_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_EN1207_100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_EN1207_100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_accton_boardsw = { TULIP_21140_EN1207, tulip_21140_accton_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_21140_smc9332_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; int idx, cnt = 0; TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT|TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(10); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); sc->tulip_gpinit = TULIP_GP_SMC_9332_PINS; sc->tulip_gpdata = TULIP_GP_SMC_9332_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_PINS|TULIP_GP_PINSET); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_INIT); DELAY(200000); for (idx = 1000; idx > 0; idx--) { u_int32_t csr = TULIP_CSR_READ(sc, csr_gp); if ((csr & (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) == (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) { if (++cnt > 100) break; } else if ((csr & TULIP_GP_SMC_9332_OK10) == 0) { break; } else { cnt = 0; } DELAY(1000); } sc->tulip_media = cnt > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_SMC_9332_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_SMC_9332_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_SMC_9332_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_SMC_9332_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_smc9332_boardsw = { TULIP_21140_SMC_9332, tulip_21140_smc9332_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_21140_cogent_em100_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; u_int32_t cmdmode = TULIP_CSR_READ(sc, csr_command); sc->tulip_gpinit = TULIP_GP_EM100_PINS; sc->tulip_gpdata = TULIP_GP_EM100_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_INIT); cmdmode = TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_MUSTBEONE; cmdmode &= ~(TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_SCRAMBLER); if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) { TULIP_CSR_WRITE(sc, csr_command, cmdmode); sc->tulip_media = TULIP_MEDIA_100BASEFX; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX_FD, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_FULLDUPLEX); } else { TULIP_CSR_WRITE(sc, csr_command, cmdmode|TULIP_CMD_SCRAMBLER); sc->tulip_media = TULIP_MEDIA_100BASETX; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } } static const tulip_boardsw_t tulip_21140_cogent_em100_boardsw = { TULIP_21140_COGENT_EM100, tulip_21140_cogent_em100_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset }; static void tulip_21140_znyx_zx34x_media_probe( tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; int cnt10 = 0, cnt100 = 0, idx; sc->tulip_gpinit = TULIP_GP_ZX34X_PINS; sc->tulip_gpdata = TULIP_GP_ZX34X_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_INIT); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL); DELAY(200000); for (idx = 1000; idx > 0; idx--) { u_int32_t csr = TULIP_CSR_READ(sc, csr_gp); if ((csr & (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) == (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) { if (++cnt100 > 100) break; } else if ((csr & TULIP_GP_ZX34X_LNKFAIL) == 0) { if (++cnt10 > 100) break; } else { cnt10 = 0; cnt100 = 0; } DELAY(1000); } sc->tulip_media = cnt100 > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_ZX34X_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_ZX34X_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_ZX34X_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_ZX34X_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_znyx_zx34x_boardsw = { TULIP_21140_ZNYX_ZX34X, tulip_21140_znyx_zx34x_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_2114x_media_probe( tulip_softc_t * const sc) { sc->tulip_cmdmode |= TULIP_CMD_MUSTBEONE |TULIP_CMD_BACKOFFCTR|TULIP_CMD_THRSHLD72; } static const tulip_boardsw_t tulip_2114x_isv_boardsw = { TULIP_21140_ISV, tulip_2114x_media_probe, tulip_media_select, tulip_media_poll, tulip_2114x_media_preset, }; /* * ******** END of chip-specific handlers. *********** */ /* * Code the read the SROM and MII bit streams (I2C) */ static void tulip_delay_300ns( tulip_softc_t * const sc) { int idx; for (idx = (300 / 33) + 1; idx > 0; idx--) (void) TULIP_CSR_READ(sc, csr_busmode); } #define EMIT do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); tulip_delay_300ns(sc); } while (0) static void tulip_srom_idle( tulip_softc_t * const sc) { unsigned bit, csr; csr = SROMSEL ; EMIT; csr = SROMSEL | SROMRD; EMIT; csr ^= SROMCS; EMIT; csr ^= SROMCLKON; EMIT; /* * Write 25 cycles of 0 which will force the SROM to be idle. */ for (bit = 3 + SROM_BITWIDTH + 16; bit > 0; bit--) { csr ^= SROMCLKOFF; EMIT; /* clock low; data not valid */ csr ^= SROMCLKON; EMIT; /* clock high; data valid */ } csr ^= SROMCLKOFF; EMIT; csr ^= SROMCS; EMIT; csr = 0; EMIT; } static void tulip_srom_read( tulip_softc_t * const sc) { unsigned idx; const unsigned bitwidth = SROM_BITWIDTH; const unsigned cmdmask = (SROMCMD_RD << bitwidth); const unsigned msb = 1 << (bitwidth + 3 - 1); unsigned lastidx = (1 << bitwidth) - 1; tulip_srom_idle(sc); for (idx = 0; idx <= lastidx; idx++) { unsigned lastbit, data, bits, bit, csr; csr = SROMSEL ; EMIT; csr = SROMSEL | SROMRD; EMIT; csr ^= SROMCSON; EMIT; csr ^= SROMCLKON; EMIT; lastbit = 0; for (bits = idx|cmdmask, bit = bitwidth + 3; bit > 0; bit--, bits <<= 1) { const unsigned thisbit = bits & msb; csr ^= SROMCLKOFF; EMIT; /* clock low; data not valid */ if (thisbit != lastbit) { csr ^= SROMDOUT; EMIT; /* clock low; invert data */ } else { EMIT; } csr ^= SROMCLKON; EMIT; /* clock high; data valid */ lastbit = thisbit; } csr ^= SROMCLKOFF; EMIT; for (data = 0, bits = 0; bits < 16; bits++) { data <<= 1; csr ^= SROMCLKON; EMIT; /* clock high; data valid */ data |= TULIP_CSR_READ(sc, csr_srom_mii) & SROMDIN ? 1 : 0; csr ^= SROMCLKOFF; EMIT; /* clock low; data not valid */ } sc->tulip_rombuf[idx*2] = data & 0xFF; sc->tulip_rombuf[idx*2+1] = data >> 8; csr = SROMSEL | SROMRD; EMIT; csr = 0; EMIT; } tulip_srom_idle(sc); } #define MII_EMIT do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); tulip_delay_300ns(sc); } while (0) static void tulip_mii_writebits( tulip_softc_t * const sc, unsigned data, unsigned bits) { unsigned msb = 1 << (bits - 1); unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); unsigned lastbit = (csr & MII_DOUT) ? msb : 0; csr |= MII_WR; MII_EMIT; /* clock low; assert write */ for (; bits > 0; bits--, data <<= 1) { const unsigned thisbit = data & msb; if (thisbit != lastbit) { csr ^= MII_DOUT; MII_EMIT; /* clock low; invert data */ } csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ lastbit = thisbit; csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ } } static void tulip_mii_turnaround( tulip_softc_t * const sc, unsigned cmd) { unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); if (cmd == MII_WRCMD) { csr |= MII_DOUT; MII_EMIT; /* clock low; change data */ csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ csr ^= MII_DOUT; MII_EMIT; /* clock low; change data */ } else { csr |= MII_RD; MII_EMIT; /* clock low; switch to read */ } csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ } static unsigned tulip_mii_readbits( tulip_softc_t * const sc) { unsigned data; unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); int idx; for (idx = 0, data = 0; idx < 16; idx++) { data <<= 1; /* this is NOOP on the first pass through */ csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ if (TULIP_CSR_READ(sc, csr_srom_mii) & MII_DIN) data |= 1; csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ } csr ^= MII_RD; MII_EMIT; /* clock low; turn off read */ return data; } static unsigned tulip_mii_readreg( tulip_softc_t * const sc, unsigned devaddr, unsigned regno) { unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); unsigned data; csr &= ~(MII_RD|MII_CLK); MII_EMIT; tulip_mii_writebits(sc, MII_PREAMBLE, 32); tulip_mii_writebits(sc, MII_RDCMD, 8); tulip_mii_writebits(sc, devaddr, 5); tulip_mii_writebits(sc, regno, 5); tulip_mii_turnaround(sc, MII_RDCMD); data = tulip_mii_readbits(sc); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_phyregs[regno][0] = data; sc->tulip_dbg.dbg_phyregs[regno][1]++; #endif return data; } static void tulip_mii_writereg( tulip_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data) { unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); csr &= ~(MII_RD|MII_CLK); MII_EMIT; tulip_mii_writebits(sc, MII_PREAMBLE, 32); tulip_mii_writebits(sc, MII_WRCMD, 8); tulip_mii_writebits(sc, devaddr, 5); tulip_mii_writebits(sc, regno, 5); tulip_mii_turnaround(sc, MII_WRCMD); tulip_mii_writebits(sc, data, 16); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_phyregs[regno][2] = data; sc->tulip_dbg.dbg_phyregs[regno][3]++; #endif } #define tulip_mchash(mca) (tulip_crc32(mca, 6) & 0x1FF) #define tulip_srom_crcok(databuf) ( \ ((tulip_crc32(databuf, 126) & 0xFFFFU) ^ 0xFFFFU) == \ ((databuf)[126] | ((databuf)[127] << 8))) static unsigned tulip_crc32( const unsigned char *databuf, size_t datalen) { u_int idx, bit, data, crc = 0xFFFFFFFFUL; for (idx = 0; idx < datalen; idx++) for (data = *databuf++, bit = 0; bit < 8; bit++, data >>= 1) crc = (crc >> 1) ^ (((crc ^ data) & 1) ? TULIP_CRC32_POLY : 0); return crc; } static void tulip_identify_dec_nic( tulip_softc_t * const sc) { strcpy(sc->tulip_boardid, "DEC "); #define D0 4 if (sc->tulip_chipid <= TULIP_DE425) return; if (bcmp(sc->tulip_rombuf + 29, "DE500", 5) == 0 || bcmp(sc->tulip_rombuf + 29, "DE450", 5) == 0) { bcopy(sc->tulip_rombuf + 29, &sc->tulip_boardid[D0], 8); sc->tulip_boardid[D0+8] = ' '; } #undef D0 } static void tulip_identify_znyx_nic( tulip_softc_t * const sc) { unsigned id = 0; strcpy(sc->tulip_boardid, "ZNYX ZX3XX "); if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) { unsigned znyx_ptr; sc->tulip_boardid[8] = '4'; znyx_ptr = sc->tulip_rombuf[124] + 256 * sc->tulip_rombuf[125]; if (znyx_ptr < 26 || znyx_ptr > 116) { sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; return; } /* ZX344 = 0010 .. 0013FF */ if (sc->tulip_rombuf[znyx_ptr] == 0x4A && sc->tulip_rombuf[znyx_ptr + 1] == 0x52 && sc->tulip_rombuf[znyx_ptr + 2] == 0x01) { id = sc->tulip_rombuf[znyx_ptr + 5] + 256 * sc->tulip_rombuf[znyx_ptr + 4]; if ((id >> 8) == (TULIP_ZNYX_ID_ZX342 >> 8)) { sc->tulip_boardid[9] = '2'; if (id == TULIP_ZNYX_ID_ZX342B) { sc->tulip_boardid[10] = 'B'; sc->tulip_boardid[11] = ' '; } sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; } else if (id == TULIP_ZNYX_ID_ZX344) { sc->tulip_boardid[10] = '4'; sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; } else if (id == TULIP_ZNYX_ID_ZX345) { sc->tulip_boardid[9] = (sc->tulip_rombuf[19] > 1) ? '8' : '5'; } else if (id == TULIP_ZNYX_ID_ZX346) { sc->tulip_boardid[9] = '6'; } else if (id == TULIP_ZNYX_ID_ZX351) { sc->tulip_boardid[8] = '5'; sc->tulip_boardid[9] = '1'; } } if (id == 0) { /* * Assume it's a ZX342... */ sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; } return; } sc->tulip_boardid[8] = '1'; if (sc->tulip_chipid == TULIP_21041) { sc->tulip_boardid[10] = '1'; return; } if (sc->tulip_rombuf[32] == 0x4A && sc->tulip_rombuf[33] == 0x52) { id = sc->tulip_rombuf[37] + 256 * sc->tulip_rombuf[36]; if (id == TULIP_ZNYX_ID_ZX312T) { sc->tulip_boardid[9] = '2'; sc->tulip_boardid[10] = 'T'; sc->tulip_boardid[11] = ' '; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; } else if (id == TULIP_ZNYX_ID_ZX314_INTA) { sc->tulip_boardid[9] = '4'; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if (id == TULIP_ZNYX_ID_ZX314) { sc->tulip_boardid[9] = '4'; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; sc->tulip_features |= TULIP_HAVE_BASEROM; } else if (id == TULIP_ZNYX_ID_ZX315_INTA) { sc->tulip_boardid[9] = '5'; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if (id == TULIP_ZNYX_ID_ZX315) { sc->tulip_boardid[9] = '5'; sc->tulip_features |= TULIP_HAVE_BASEROM; } else { id = 0; } } if (id == 0) { if ((sc->tulip_enaddr[3] & ~3) == 0xF0 && (sc->tulip_enaddr[5] & 2) == 0) { sc->tulip_boardid[9] = '4'; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if ((sc->tulip_enaddr[3] & ~3) == 0xF4 && (sc->tulip_enaddr[5] & 1) == 0) { sc->tulip_boardid[9] = '5'; sc->tulip_boardsw = &tulip_21040_boardsw; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if ((sc->tulip_enaddr[3] & ~3) == 0xEC) { sc->tulip_boardid[9] = '2'; sc->tulip_boardsw = &tulip_21040_boardsw; } } } static void tulip_identify_smc_nic( tulip_softc_t * const sc) { u_int32_t id1, id2, ei; int auibnc = 0, utp = 0; char *cp; strcpy(sc->tulip_boardid, "SMC "); if (sc->tulip_chipid == TULIP_21041) return; if (sc->tulip_chipid != TULIP_21040) { if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw) { strcpy(&sc->tulip_boardid[4], "9332DST "); sc->tulip_boardsw = &tulip_21140_smc9332_boardsw; } else if (sc->tulip_features & (TULIP_HAVE_BASEROM|TULIP_HAVE_SLAVEDROM)) { strcpy(&sc->tulip_boardid[4], "9334BDT "); } else { strcpy(&sc->tulip_boardid[4], "9332BDT "); } return; } id1 = sc->tulip_rombuf[0x60] | (sc->tulip_rombuf[0x61] << 8); id2 = sc->tulip_rombuf[0x62] | (sc->tulip_rombuf[0x63] << 8); ei = sc->tulip_rombuf[0x66] | (sc->tulip_rombuf[0x67] << 8); strcpy(&sc->tulip_boardid[4], "8432"); cp = &sc->tulip_boardid[8]; if ((id1 & 1) == 0) *cp++ = 'B', auibnc = 1; if ((id1 & 0xFF) > 0x32) *cp++ = 'T', utp = 1; if ((id1 & 0x4000) == 0) *cp++ = 'A', auibnc = 1; if (id2 == 0x15) { sc->tulip_boardid[7] = '4'; *cp++ = '-'; *cp++ = 'C'; *cp++ = 'H'; *cp++ = (ei ? '2' : '1'); } *cp++ = ' '; *cp = '\0'; if (utp && !auibnc) sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; else if (!utp && auibnc) sc->tulip_boardsw = &tulip_21040_auibnc_only_boardsw; } static void tulip_identify_cogent_nic( tulip_softc_t * const sc) { strcpy(sc->tulip_boardid, "Cogent "); if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) { if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100TX_ID) { strcat(sc->tulip_boardid, "EM100FX "); sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw; } else if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) { strcat(sc->tulip_boardid, "EM100FX "); sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw; } /* * Magic number (0x24001109U) is the SubVendor (0x2400) and * SubDevId (0x1109) for the ANA6944TX (EM440TX). */ if (*(u_int32_t *) sc->tulip_rombuf == 0x24001109U && (sc->tulip_features & TULIP_HAVE_BASEROM)) { /* * Cogent (Adaptec) is still mapping all INTs to INTA of * first 21140. Dumb! Dumb! */ strcat(sc->tulip_boardid, "EM440TX "); sc->tulip_features |= TULIP_HAVE_SHAREDINTR; } } else if (sc->tulip_chipid == TULIP_21040) { sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } } static void tulip_identify_accton_nic( tulip_softc_t * const sc) { strcpy(sc->tulip_boardid, "ACCTON "); switch (sc->tulip_chipid) { case TULIP_21140A: strcat(sc->tulip_boardid, "EN1207 "); sc->tulip_boardsw = &tulip_21140_accton_boardsw; break; case TULIP_21140: strcat(sc->tulip_boardid, "EN1207TX "); sc->tulip_boardsw = &tulip_21140_eb_boardsw; break; case TULIP_21040: strcat(sc->tulip_boardid, "EN1203 "); sc->tulip_boardsw = &tulip_21040_boardsw; break; case TULIP_21041: strcat(sc->tulip_boardid, "EN1203 "); sc->tulip_boardsw = &tulip_21041_boardsw; break; default: sc->tulip_boardsw = &tulip_2114x_isv_boardsw; break; } } static void tulip_identify_asante_nic( tulip_softc_t * const sc) { strcpy(sc->tulip_boardid, "Asante "); if ((sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) && sc->tulip_boardsw != &tulip_2114x_isv_boardsw) { tulip_media_info_t *mi = sc->tulip_mediainfo; int idx; /* * The Asante Fast Ethernet doesn't always ship with a valid * new format SROM. So if isn't in the new format, we cheat * set it up as if we had. */ sc->tulip_gpinit = TULIP_GP_ASANTE_PINS; sc->tulip_gpdata = 0; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PINS|TULIP_GP_PINSET); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PHYRESET); DELAY(100); TULIP_CSR_WRITE(sc, csr_gp, 0); mi->mi_type = TULIP_MEDIAINFO_MII; mi->mi_gpr_length = 0; mi->mi_gpr_offset = 0; mi->mi_reset_length = 0; mi->mi_reset_offset = 0;; mi->mi_phyaddr = TULIP_MII_NOPHY; for (idx = 20; idx > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx--) { DELAY(10000); mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, 0); } if (mi->mi_phyaddr == TULIP_MII_NOPHY) { printf(TULIP_PRINTF_FMT ": can't find phy 0\n", TULIP_PRINTF_ARGS); return; } sc->tulip_features |= TULIP_HAVE_MII; mi->mi_capabilities = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD; mi->mi_advertisement = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD; mi->mi_full_duplex = PHYSTS_10BASET_FD|PHYSTS_100BASETX_FD; mi->mi_tx_threshold = PHYSTS_10BASET|PHYSTS_10BASET_FD; TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET); mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH); sc->tulip_boardsw = &tulip_2114x_isv_boardsw; } } static int tulip_srom_decode( tulip_softc_t * const sc) { unsigned idx1, idx2, idx3; const tulip_srom_header_t *shp = (tulip_srom_header_t *) &sc->tulip_rombuf[0]; const tulip_srom_adapter_info_t *saip = (tulip_srom_adapter_info_t *) (shp + 1); tulip_srom_media_t srom_media; tulip_media_info_t *mi = sc->tulip_mediainfo; const u_int8_t *dp; u_int32_t leaf_offset, blocks, data; for (idx1 = 0; idx1 < shp->sh_adapter_count; idx1++, saip++) { if (shp->sh_adapter_count == 1) break; if (saip->sai_device == sc->tulip_pci_devno) break; } /* * Didn't find the right media block for this card. */ if (idx1 == shp->sh_adapter_count) return 0; /* * Save the hardware address. */ bcopy((caddr_t) shp->sh_ieee802_address, (caddr_t) sc->tulip_enaddr, 6); /* * If this is a multiple port card, add the adapter index to the last * byte of the hardware address. (if it isn't multiport, adding 0 * won't hurt. */ sc->tulip_enaddr[5] += idx1; leaf_offset = saip->sai_leaf_offset_lowbyte + saip->sai_leaf_offset_highbyte * 256; dp = sc->tulip_rombuf + leaf_offset; sc->tulip_conntype = (tulip_srom_connection_t) (dp[0] + dp[1] * 256); dp += 2; for (idx2 = 0;; idx2++) { if (tulip_srom_conninfo[idx2].sc_type == sc->tulip_conntype || tulip_srom_conninfo[idx2].sc_type == TULIP_SROM_CONNTYPE_NOT_USED) break; } sc->tulip_connidx = idx2; if (sc->tulip_chipid == TULIP_21041) { blocks = *dp++; for (idx2 = 0; idx2 < blocks; idx2++) { tulip_media_t media; data = *dp++; srom_media = (tulip_srom_media_t) (data & 0x3F); for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media != TULIP_MEDIA_UNKNOWN) { if (data & TULIP_SROM_21041_EXTENDED) { mi->mi_type = TULIP_MEDIAINFO_SIA; sc->tulip_mediums[media] = mi; mi->mi_sia_connectivity = dp[0] + dp[1] * 256; mi->mi_sia_tx_rx = dp[2] + dp[3] * 256; mi->mi_sia_general = dp[4] + dp[5] * 256; mi++; } else { switch (media) { case TULIP_MEDIA_BNC: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC); mi++; break; } case TULIP_MEDIA_AUI: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI); mi++; break; } case TULIP_MEDIA_10BASET: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET); mi++; break; } case TULIP_MEDIA_10BASET_FD: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD); mi++; break; } default: { break; } } } } if (data & TULIP_SROM_21041_EXTENDED) dp += 6; } #ifdef notdef if (blocks == 0) { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC); mi++; TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI); mi++; TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET); mi++; TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD); mi++; } #endif } else { unsigned length, type; tulip_media_t gp_media = TULIP_MEDIA_UNKNOWN; if (sc->tulip_features & TULIP_HAVE_GPR) sc->tulip_gpinit = *dp++; blocks = *dp++; for (idx2 = 0; idx2 < blocks; idx2++) { const u_int8_t *ep; if ((*dp & 0x80) == 0) { length = 4; type = 0; } else { length = (*dp++ & 0x7f) - 1; type = *dp++ & 0x3f; } ep = dp + length; switch (type & 0x3f) { case 0: { /* 21140[A] GPR block */ tulip_media_t media; srom_media = (tulip_srom_media_t) dp[0]; for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media == TULIP_MEDIA_UNKNOWN) break; mi->mi_type = TULIP_MEDIAINFO_GPR; sc->tulip_mediums[media] = mi; mi->mi_gpdata = dp[1]; if (media > gp_media && !TULIP_IS_MEDIA_FD(media)) { sc->tulip_gpdata = mi->mi_gpdata; gp_media = media; } data = dp[2] + dp[3] * 256; mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data); if (data & TULIP_SROM_2114X_NOINDICATOR) { mi->mi_actmask = 0; } else { #if 0 mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0; #endif mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data); mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask; } mi++; break; } case 1: { /* 21140[A] MII block */ const unsigned phyno = *dp++; mi->mi_type = TULIP_MEDIAINFO_MII; mi->mi_gpr_length = *dp++; mi->mi_gpr_offset = dp - sc->tulip_rombuf; dp += mi->mi_gpr_length; mi->mi_reset_length = *dp++; mi->mi_reset_offset = dp - sc->tulip_rombuf; dp += mi->mi_reset_length; /* * Before we probe for a PHY, use the GPR information * to select it. If we don't, it may be inaccessible. */ TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpinit|TULIP_GP_PINSET); for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx3]); } sc->tulip_phyaddr = mi->mi_phyaddr; for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx3]); } /* * At least write something! */ if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0) TULIP_CSR_WRITE(sc, csr_gp, 0); mi->mi_phyaddr = TULIP_MII_NOPHY; for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) { DELAY(10000); mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno); } if (mi->mi_phyaddr == TULIP_MII_NOPHY) { printf(TULIP_PRINTF_FMT ": can't find phy %d\n", TULIP_PRINTF_ARGS, phyno); break; } sc->tulip_features |= TULIP_HAVE_MII; mi->mi_capabilities = dp[0] + dp[1] * 256; dp += 2; mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2; mi->mi_full_duplex = dp[0] + dp[1] * 256; dp += 2; mi->mi_tx_threshold = dp[0] + dp[1] * 256; dp += 2; TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET); mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH); mi++; break; } case 2: { /* 2114[23] SIA block */ tulip_media_t media; srom_media = (tulip_srom_media_t) dp[0]; for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media == TULIP_MEDIA_UNKNOWN) break; mi->mi_type = TULIP_MEDIAINFO_SIA; sc->tulip_mediums[media] = mi; if (type & 0x40) { mi->mi_sia_connectivity = dp[0] + dp[1] * 256; mi->mi_sia_tx_rx = dp[2] + dp[3] * 256; mi->mi_sia_general = dp[4] + dp[5] * 256; dp += 6; } else { switch (media) { case TULIP_MEDIA_BNC: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, BNC); break; } case TULIP_MEDIA_AUI: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, AUI); break; } case TULIP_MEDIA_10BASET: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET); break; } case TULIP_MEDIA_10BASET_FD: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET_FD); break; } default: { goto bad_media; } } } mi->mi_sia_gp_control = (dp[0] + dp[1] * 256) << 16; mi->mi_sia_gp_data = (dp[2] + dp[3] * 256) << 16; mi++; bad_media: break; } case 3: { /* 2114[23] MII PHY block */ const unsigned phyno = *dp++; const u_int8_t *dp0; mi->mi_type = TULIP_MEDIAINFO_MII; mi->mi_gpr_length = *dp++; mi->mi_gpr_offset = dp - sc->tulip_rombuf; dp += 2 * mi->mi_gpr_length; mi->mi_reset_length = *dp++; mi->mi_reset_offset = dp - sc->tulip_rombuf; dp += 2 * mi->mi_reset_length; dp0 = &sc->tulip_rombuf[mi->mi_reset_offset]; for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++, dp0 += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16); } sc->tulip_phyaddr = mi->mi_phyaddr; dp0 = &sc->tulip_rombuf[mi->mi_gpr_offset]; for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++, dp0 += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16); } if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0) TULIP_CSR_WRITE(sc, csr_sia_general, 0); mi->mi_phyaddr = TULIP_MII_NOPHY; for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) { DELAY(10000); mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno); } if (mi->mi_phyaddr == TULIP_MII_NOPHY) { printf(TULIP_PRINTF_FMT ": can't find phy %d\n", TULIP_PRINTF_ARGS, phyno); break; } sc->tulip_features |= TULIP_HAVE_MII; mi->mi_capabilities = dp[0] + dp[1] * 256; dp += 2; mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2; mi->mi_full_duplex = dp[0] + dp[1] * 256; dp += 2; mi->mi_tx_threshold = dp[0] + dp[1] * 256; dp += 2; mi->mi_mii_interrupt = dp[0] + dp[1] * 256; dp += 2; TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET); mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH); mi++; break; } case 4: { /* 21143 SYM block */ tulip_media_t media; srom_media = (tulip_srom_media_t) dp[0]; for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media == TULIP_MEDIA_UNKNOWN) break; mi->mi_type = TULIP_MEDIAINFO_SYM; sc->tulip_mediums[media] = mi; mi->mi_gpcontrol = (dp[1] + dp[2] * 256) << 16; mi->mi_gpdata = (dp[3] + dp[4] * 256) << 16; data = dp[5] + dp[6] * 256; mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data); if (data & TULIP_SROM_2114X_NOINDICATOR) { mi->mi_actmask = 0; } else { mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0; mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data); mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask; } mi++; break; } #if 0 case 5: { /* 21143 Reset block */ mi->mi_type = TULIP_MEDIAINFO_RESET; mi->mi_reset_length = *dp++; mi->mi_reset_offset = dp - sc->tulip_rombuf; dp += 2 * mi->mi_reset_length; mi++; break; } #endif default: { } } dp = ep; } } return mi - sc->tulip_mediainfo; } static const struct { void (*vendor_identify_nic)(tulip_softc_t * const sc); unsigned char vendor_oui[3]; } tulip_vendors[] = { { tulip_identify_dec_nic, { 0x08, 0x00, 0x2B } }, { tulip_identify_dec_nic, { 0x00, 0x00, 0xF8 } }, { tulip_identify_smc_nic, { 0x00, 0x00, 0xC0 } }, { tulip_identify_smc_nic, { 0x00, 0xE0, 0x29 } }, { tulip_identify_znyx_nic, { 0x00, 0xC0, 0x95 } }, { tulip_identify_cogent_nic, { 0x00, 0x00, 0x92 } }, { tulip_identify_asante_nic, { 0x00, 0x00, 0x94 } }, { tulip_identify_accton_nic, { 0x00, 0x00, 0xE8 } }, { NULL } }; /* * This deals with the vagaries of the address roms and the * brain-deadness that various vendors commit in using them. */ static int tulip_read_macaddr( tulip_softc_t * const sc) { unsigned cksum, rom_cksum, idx; u_int32_t csr; unsigned char tmpbuf[8]; static const u_char testpat[] = { 0xFF, 0, 0x55, 0xAA, 0xFF, 0, 0x55, 0xAA }; sc->tulip_connidx = TULIP_SROM_LASTCONNIDX; if (sc->tulip_chipid == TULIP_21040) { TULIP_CSR_WRITE(sc, csr_enetrom, 1); for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) { int cnt = 0; while (((csr = TULIP_CSR_READ(sc, csr_enetrom)) & 0x80000000L) && cnt < 10000) cnt++; sc->tulip_rombuf[idx] = csr & 0xFF; } sc->tulip_boardsw = &tulip_21040_boardsw; #if defined(TULIP_EISA) } else if (sc->tulip_chipid == TULIP_DE425) { int cnt; for (idx = 0, cnt = 0; idx < sizeof(testpat) && cnt < 32; cnt++) { tmpbuf[idx] = TULIP_CSR_READBYTE(sc, csr_enetrom); if (tmpbuf[idx] == testpat[idx]) ++idx; else idx = 0; } for (idx = 0; idx < 32; idx++) sc->tulip_rombuf[idx] = TULIP_CSR_READBYTE(sc, csr_enetrom); sc->tulip_boardsw = &tulip_21040_boardsw; #endif /* TULIP_EISA */ } else { if (sc->tulip_chipid == TULIP_21041) { /* * Thankfully all 21041's act the same. */ sc->tulip_boardsw = &tulip_21041_boardsw; } else { /* * Assume all 21140 board are compatible with the * DEC 10/100 evaluation board. Not really valid but * it's the best we can do until every one switches to * the new SROM format. */ sc->tulip_boardsw = &tulip_21140_eb_boardsw; } tulip_srom_read(sc); if (tulip_srom_crcok(sc->tulip_rombuf)) { /* * SROM CRC is valid therefore it must be in the * new format. */ sc->tulip_features |= TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM; } else if (sc->tulip_rombuf[126] == 0xff && sc->tulip_rombuf[127] == 0xFF) { /* * No checksum is present. See if the SROM id checks out; * the first 18 bytes should be 0 followed by a 1 followed * by the number of adapters (which we don't deal with yet). */ for (idx = 0; idx < 18; idx++) { if (sc->tulip_rombuf[idx] != 0) break; } if (idx == 18 && sc->tulip_rombuf[18] == 1 && sc->tulip_rombuf[19] != 0) sc->tulip_features |= TULIP_HAVE_ISVSROM; } else if (sc->tulip_chipid >= TULIP_21142) { sc->tulip_features |= TULIP_HAVE_ISVSROM; sc->tulip_boardsw = &tulip_2114x_isv_boardsw; } if ((sc->tulip_features & TULIP_HAVE_ISVSROM) && tulip_srom_decode(sc)) { if (sc->tulip_chipid != TULIP_21041) sc->tulip_boardsw = &tulip_2114x_isv_boardsw; /* * If the SROM specifies more than one adapter, tag this as a * BASE rom. */ if (sc->tulip_rombuf[19] > 1) sc->tulip_features |= TULIP_HAVE_BASEROM; if (sc->tulip_boardsw == NULL) return -6; goto check_oui; } } if (bcmp(&sc->tulip_rombuf[0], &sc->tulip_rombuf[16], 8) != 0) { /* * Some folks don't use the standard ethernet rom format * but instead just put the address in the first 6 bytes * of the rom and let the rest be all 0xffs. (Can we say * ZNYX???) (well sometimes they put in a checksum so we'll * start at 8). */ for (idx = 8; idx < 32; idx++) { if (sc->tulip_rombuf[idx] != 0xFF) return -4; } /* * Make sure the address is not multicast or locally assigned * that the OUI is not 00-00-00. */ if ((sc->tulip_rombuf[0] & 3) != 0) return -4; if (sc->tulip_rombuf[0] == 0 && sc->tulip_rombuf[1] == 0 && sc->tulip_rombuf[2] == 0) return -4; bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6); sc->tulip_features |= TULIP_HAVE_OKROM; goto check_oui; } else { /* * A number of makers of multiport boards (ZNYX and Cogent) * only put on one address ROM on their 21040 boards. So * if the ROM is all zeros (or all 0xFFs), look at the * previous configured boards (as long as they are on the same * PCI bus and the bus number is non-zero) until we find the * master board with address ROM. We then use its address ROM * as the base for this board. (we add our relative board * to the last byte of its address). */ for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) { if (sc->tulip_rombuf[idx] != 0 && sc->tulip_rombuf[idx] != 0xFF) break; } if (idx == sizeof(sc->tulip_rombuf)) { int root_unit; tulip_softc_t *root_sc = NULL; for (root_unit = sc->tulip_unit - 1; root_unit >= 0; root_unit--) { root_sc = TULIP_UNIT_TO_SOFTC(root_unit); if (root_sc == NULL || (root_sc->tulip_features & (TULIP_HAVE_OKROM|TULIP_HAVE_SLAVEDROM)) == TULIP_HAVE_OKROM) break; root_sc = NULL; } if (root_sc != NULL && (root_sc->tulip_features & TULIP_HAVE_BASEROM) && root_sc->tulip_chipid == sc->tulip_chipid && root_sc->tulip_pci_busno == sc->tulip_pci_busno) { sc->tulip_features |= TULIP_HAVE_SLAVEDROM; sc->tulip_boardsw = root_sc->tulip_boardsw; strcpy(sc->tulip_boardid, root_sc->tulip_boardid); if (sc->tulip_boardsw->bd_type == TULIP_21140_ISV) { bcopy(root_sc->tulip_rombuf, sc->tulip_rombuf, sizeof(sc->tulip_rombuf)); if (!tulip_srom_decode(sc)) return -5; } else { bcopy(root_sc->tulip_enaddr, sc->tulip_enaddr, 6); sc->tulip_enaddr[5] += sc->tulip_unit - root_sc->tulip_unit; } /* * Now for a truly disgusting kludge: all 4 21040s on * the ZX314 share the same INTA line so the mapping * setup by the BIOS on the PCI bridge is worthless. * Rather than reprogramming the value in the config * register, we will handle this internally. */ if (root_sc->tulip_features & TULIP_HAVE_SHAREDINTR) { sc->tulip_slaves = root_sc->tulip_slaves; root_sc->tulip_slaves = sc; sc->tulip_features |= TULIP_HAVE_SLAVEDINTR; } return 0; } } } /* * This is the standard DEC address ROM test. */ if (bcmp(&sc->tulip_rombuf[24], testpat, 8) != 0) return -3; tmpbuf[0] = sc->tulip_rombuf[15]; tmpbuf[1] = sc->tulip_rombuf[14]; tmpbuf[2] = sc->tulip_rombuf[13]; tmpbuf[3] = sc->tulip_rombuf[12]; tmpbuf[4] = sc->tulip_rombuf[11]; tmpbuf[5] = sc->tulip_rombuf[10]; tmpbuf[6] = sc->tulip_rombuf[9]; tmpbuf[7] = sc->tulip_rombuf[8]; if (bcmp(&sc->tulip_rombuf[0], tmpbuf, 8) != 0) return -2; bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6); cksum = *(u_int16_t *) &sc->tulip_enaddr[0]; cksum *= 2; if (cksum > 65535) cksum -= 65535; cksum += *(u_int16_t *) &sc->tulip_enaddr[2]; if (cksum > 65535) cksum -= 65535; cksum *= 2; if (cksum > 65535) cksum -= 65535; cksum += *(u_int16_t *) &sc->tulip_enaddr[4]; if (cksum >= 65535) cksum -= 65535; rom_cksum = *(u_int16_t *) &sc->tulip_rombuf[6]; if (cksum != rom_cksum) return -1; check_oui: /* * Check for various boards based on OUI. Did I say braindead? */ for (idx = 0; tulip_vendors[idx].vendor_identify_nic != NULL; idx++) { if (bcmp((caddr_t) sc->tulip_enaddr, (caddr_t) tulip_vendors[idx].vendor_oui, 3) == 0) { (*tulip_vendors[idx].vendor_identify_nic)(sc); break; } } sc->tulip_features |= TULIP_HAVE_OKROM; return 0; } #if defined(IFM_ETHER) static void tulip_ifmedia_add( tulip_softc_t * const sc) { tulip_media_t media; int medias = 0; for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL) { ifmedia_add(&sc->tulip_ifmedia, tulip_media_to_ifmedia[media], 0, 0); medias++; } } if (medias == 0) { sc->tulip_features |= TULIP_HAVE_NOMEDIA; ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE, 0, 0); ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE); } else if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) { ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO, 0, 0); ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO); } else { ifmedia_set(&sc->tulip_ifmedia, tulip_media_to_ifmedia[sc->tulip_media]); sc->tulip_flags |= TULIP_PRINTMEDIA; tulip_linkup(sc, sc->tulip_media); } } static int tulip_ifmedia_change( struct ifnet * const ifp) { tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); sc->tulip_flags |= TULIP_NEEDRESET; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_media = TULIP_MEDIA_UNKNOWN; if (IFM_SUBTYPE(sc->tulip_ifmedia.ifm_media) != IFM_AUTO) { tulip_media_t media; for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL && sc->tulip_ifmedia.ifm_media == tulip_media_to_ifmedia[media]) { sc->tulip_flags |= TULIP_PRINTMEDIA; sc->tulip_flags &= ~TULIP_DIDNWAY; tulip_linkup(sc, media); return 0; } } } sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_WANTRXACT); tulip_reset(sc); tulip_init(sc); return 0; } /* * Media status callback */ static void tulip_ifmedia_status( struct ifnet * const ifp, struct ifmediareq *req) { tulip_softc_t *sc = TULIP_IFP_TO_SOFTC(ifp); #if defined(__bsdi__) if (sc->tulip_mii.mii_instance != 0) { mii_pollstat(&sc->tulip_mii); req->ifm_active = sc->tulip_mii.mii_media_active; req->ifm_status = sc->tulip_mii.mii_media_status; return; } #endif if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) return; req->ifm_status = IFM_AVALID; if (sc->tulip_flags & TULIP_LINKUP) req->ifm_status |= IFM_ACTIVE; req->ifm_active = tulip_media_to_ifmedia[sc->tulip_media]; } #endif static void tulip_addr_filter( tulip_softc_t * const sc) { #if defined(__FreeBSD__) && __FreeBSD__ >= 3 struct ifmultiaddr *ifma; u_char *addrp; #else struct ether_multistep step; struct ether_multi *enm; #endif int multicnt; sc->tulip_flags &= ~(TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY|TULIP_ALLMULTI); sc->tulip_flags |= TULIP_WANTSETUP|TULIP_WANTTXSTART; sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN; sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED; #if defined(IFF_ALLMULTI) sc->tulip_if.if_flags &= ~IFF_ALLMULTI; #endif #if defined(__FreeBSD__) && __FreeBSD__ >= 3 multicnt = 0; for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL; ifma = ifma->ifma_link.le_next) { if (ifma->ifma_addr->sa_family == AF_LINK) multicnt++; } #else multicnt = sc->tulip_multicnt; #endif sc->tulip_if.if_start = tulip_ifstart; /* so the setup packet gets queued */ if (multicnt > 14) { u_int32_t *sp = sc->tulip_setupdata; unsigned hash; /* * Some early passes of the 21140 have broken implementations of * hash-perfect mode. When we get too many multicasts for perfect * filtering with these chips, we need to switch into hash-only * mode (this is better than all-multicast on network with lots * of multicast traffic). */ if (sc->tulip_features & TULIP_HAVE_BROKEN_HASH) sc->tulip_flags |= TULIP_WANTHASHONLY; else sc->tulip_flags |= TULIP_WANTHASHPERFECT; /* * If we have more than 14 multicasts, we have * go into hash perfect mode (512 bit multicast * hash and one perfect hardware). */ bzero(sc->tulip_setupdata, sizeof(sc->tulip_setupdata)); #if defined(__FreeBSD__) && __FreeBSD__ >= 3 for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL; ifma = ifma->ifma_link.le_next) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; hash = tulip_mchash(LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); sp[hash >> 4] |= 1 << (hash & 0xF); } #else ETHER_FIRST_MULTI(step, TULIP_ETHERCOM(sc), enm); while (enm != NULL) { if (bcmp(enm->enm_addrlo, enm->enm_addrhi, 6) == 0) { hash = tulip_mchash(enm->enm_addrlo); sp[hash >> 4] |= 1 << (hash & 0xF); } else { sc->tulip_flags |= TULIP_ALLMULTI; sc->tulip_flags &= ~(TULIP_WANTHASHONLY|TULIP_WANTHASHPERFECT); break; } ETHER_NEXT_MULTI(step, enm); } #endif /* * No reason to use a hash if we are going to be * receiving every multicast. */ if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) { hash = tulip_mchash(etherbroadcastaddr); sp[hash >> 4] |= 1 << (hash & 0xF); if (sc->tulip_flags & TULIP_WANTHASHONLY) { hash = tulip_mchash(sc->tulip_enaddr); sp[hash >> 4] |= 1 << (hash & 0xF); } else { sp[39] = ((u_int16_t *) sc->tulip_enaddr)[0]; sp[40] = ((u_int16_t *) sc->tulip_enaddr)[1]; sp[41] = ((u_int16_t *) sc->tulip_enaddr)[2]; } } } if ((sc->tulip_flags & (TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY)) == 0) { u_int32_t *sp = sc->tulip_setupdata; int idx = 0; if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) { /* * Else can get perfect filtering for 16 addresses. */ #if defined(__FreeBSD__) && __FreeBSD__ >= 3 for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL; ifma = ifma->ifma_link.le_next) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; addrp = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); *sp++ = ((u_int16_t *) addrp)[0]; *sp++ = ((u_int16_t *) addrp)[1]; *sp++ = ((u_int16_t *) addrp)[2]; idx++; } #else ETHER_FIRST_MULTI(step, TULIP_ETHERCOM(sc), enm); for (; enm != NULL; idx++) { if (bcmp(enm->enm_addrlo, enm->enm_addrhi, 6) == 0) { *sp++ = ((u_int16_t *) enm->enm_addrlo)[0]; *sp++ = ((u_int16_t *) enm->enm_addrlo)[1]; *sp++ = ((u_int16_t *) enm->enm_addrlo)[2]; } else { sc->tulip_flags |= TULIP_ALLMULTI; break; } ETHER_NEXT_MULTI(step, enm); } #endif /* * Add the broadcast address. */ idx++; *sp++ = 0xFFFF; *sp++ = 0xFFFF; *sp++ = 0xFFFF; } /* * Pad the rest with our hardware address */ for (; idx < 16; idx++) { *sp++ = ((u_int16_t *) sc->tulip_enaddr)[0]; *sp++ = ((u_int16_t *) sc->tulip_enaddr)[1]; *sp++ = ((u_int16_t *) sc->tulip_enaddr)[2]; } } #if defined(IFF_ALLMULTI) if (sc->tulip_flags & TULIP_ALLMULTI) sc->tulip_if.if_flags |= IFF_ALLMULTI; #endif } static void tulip_reset( tulip_softc_t * const sc) { tulip_ringinfo_t *ri; tulip_desc_t *di; u_int32_t inreset = (sc->tulip_flags & TULIP_INRESET); /* * Brilliant. Simply brilliant. When switching modes/speeds * on a 2114*, you need to set the appriopriate MII/PCS/SCL/PS * bits in CSR6 and then do a software reset to get the 21140 * to properly reset its internal pathways to the right places. * Grrrr. */ if (sc->tulip_boardsw->bd_media_preset != NULL) (*sc->tulip_boardsw->bd_media_preset)(sc); TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(10); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ if (!inreset) { sc->tulip_flags |= TULIP_INRESET; sc->tulip_flags &= ~(TULIP_NEEDRESET|TULIP_RXBUFSLOW); sc->tulip_if.if_flags &= ~IFF_OACTIVE; } TULIP_CSR_WRITE(sc, csr_txlist, TULIP_KVATOPHYS(sc, &sc->tulip_txinfo.ri_first[0])); TULIP_CSR_WRITE(sc, csr_rxlist, TULIP_KVATOPHYS(sc, &sc->tulip_rxinfo.ri_first[0])); TULIP_CSR_WRITE(sc, csr_busmode, (1 << (TULIP_BURSTSIZE(sc->tulip_unit) + 8)) |TULIP_BUSMODE_CACHE_ALIGN8 |TULIP_BUSMODE_READMULTIPLE |(BYTE_ORDER != LITTLE_ENDIAN ? TULIP_BUSMODE_BIGENDIAN : 0)); sc->tulip_txtimer = 0; sc->tulip_txq.ifq_maxlen = TULIP_TXDESCS; /* * Free all the mbufs that were on the transmit ring. */ for (;;) { struct mbuf *m; IF_DEQUEUE(&sc->tulip_txq, m); if (m == NULL) break; m_freem(m); } ri = &sc->tulip_txinfo; ri->ri_nextin = ri->ri_nextout = ri->ri_first; ri->ri_free = ri->ri_max; for (di = ri->ri_first; di < ri->ri_last; di++) di->d_status = 0; /* * We need to collect all the mbufs were on the * receive ring before we reinit it either to put * them back on or to know if we have to allocate * more. */ ri = &sc->tulip_rxinfo; ri->ri_nextin = ri->ri_nextout = ri->ri_first; ri->ri_free = ri->ri_max; for (di = ri->ri_first; di < ri->ri_last; di++) { di->d_status = 0; di->d_length1 = 0; di->d_addr1 = 0; di->d_length2 = 0; di->d_addr2 = 0; } for (;;) { struct mbuf *m; IF_DEQUEUE(&sc->tulip_rxq, m); if (m == NULL) break; m_freem(m); } /* * If tulip_reset is being called recurisvely, exit quickly knowing * that when the outer tulip_reset returns all the right stuff will * have happened. */ if (inreset) return; sc->tulip_intrmask |= TULIP_STS_NORMALINTR|TULIP_STS_RXINTR|TULIP_STS_TXINTR |TULIP_STS_ABNRMLINTR|TULIP_STS_SYSERROR|TULIP_STS_TXSTOPPED |TULIP_STS_TXUNDERFLOW|TULIP_STS_TXBABBLE|TULIP_STS_LINKFAIL |TULIP_STS_RXSTOPPED; if ((sc->tulip_flags & TULIP_DEVICEPROBE) == 0) (*sc->tulip_boardsw->bd_media_select)(sc); #if defined(TULIP_DEBUG) if ((sc->tulip_flags & TULIP_NEEDRESET) == TULIP_NEEDRESET) printf(TULIP_PRINTF_FMT ": tulip_reset: additional reset needed?!?\n", TULIP_PRINTF_ARGS); #endif tulip_media_print(sc); if (sc->tulip_features & TULIP_HAVE_DUALSENSE) TULIP_CSR_WRITE(sc, csr_sia_status, TULIP_CSR_READ(sc, csr_sia_status)); sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_WANTSETUP|TULIP_INRESET |TULIP_RXACT); tulip_addr_filter(sc); } static void tulip_init( tulip_softc_t * const sc) { if (sc->tulip_if.if_flags & IFF_UP) { if ((sc->tulip_if.if_flags & IFF_RUNNING) == 0) { /* initialize the media */ tulip_reset(sc); } sc->tulip_if.if_flags |= IFF_RUNNING; if (sc->tulip_if.if_flags & IFF_PROMISC) { sc->tulip_flags |= TULIP_PROMISC; sc->tulip_cmdmode |= TULIP_CMD_PROMISCUOUS; sc->tulip_intrmask |= TULIP_STS_TXINTR; } else { sc->tulip_flags &= ~TULIP_PROMISC; sc->tulip_cmdmode &= ~TULIP_CMD_PROMISCUOUS; if (sc->tulip_flags & TULIP_ALLMULTI) { sc->tulip_cmdmode |= TULIP_CMD_ALLMULTI; } else { sc->tulip_cmdmode &= ~TULIP_CMD_ALLMULTI; } } sc->tulip_cmdmode |= TULIP_CMD_TXRUN; if ((sc->tulip_flags & (TULIP_TXPROBE_ACTIVE|TULIP_WANTSETUP)) == 0) { tulip_rx_intr(sc); sc->tulip_cmdmode |= TULIP_CMD_RXRUN; sc->tulip_intrmask |= TULIP_STS_RXSTOPPED; } else { sc->tulip_if.if_flags |= IFF_OACTIVE; sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN; sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED; } TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP) tulip_txput_setup(sc); } else { sc->tulip_if.if_flags &= ~IFF_RUNNING; tulip_reset(sc); } } static void tulip_rx_intr( tulip_softc_t * const sc) { TULIP_PERFSTART(rxintr) tulip_ringinfo_t * const ri = &sc->tulip_rxinfo; struct ifnet * const ifp = &sc->tulip_if; int fillok = 1; #if defined(TULIP_DEBUG) int cnt = 0; #endif for (;;) { TULIP_PERFSTART(rxget) struct ether_header eh; tulip_desc_t *eop = ri->ri_nextin; int total_len = 0, last_offset = 0; struct mbuf *ms = NULL, *me = NULL; int accept = 0; if (fillok && sc->tulip_rxq.ifq_len < TULIP_RXQ_TARGET) goto queue_mbuf; #if defined(TULIP_DEBUG) if (cnt == ri->ri_max) break; #endif /* * If the TULIP has no descriptors, there can't be any receive * descriptors to process. */ if (eop == ri->ri_nextout) break; /* * 90% of the packets will fit in one descriptor. So we optimize * for that case. */ if ((((volatile tulip_desc_t *) eop)->d_status & (TULIP_DSTS_OWNER|TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) == (TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) { IF_DEQUEUE(&sc->tulip_rxq, ms); me = ms; } else { /* * If still owned by the TULIP, don't touch it. */ if (((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_OWNER) break; /* * It is possible (though improbable unless the BIG_PACKET support * is enabled or MCLBYTES < 1518) for a received packet to cross * more than one receive descriptor. */ while ((((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_RxLASTDESC) == 0) { if (++eop == ri->ri_last) eop = ri->ri_first; if (eop == ri->ri_nextout || ((((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_OWNER))) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_rxintrs++; sc->tulip_dbg.dbg_rxpktsperintr[cnt]++; #endif TULIP_PERFEND(rxget); TULIP_PERFEND(rxintr); return; } total_len++; } /* * Dequeue the first buffer for the start of the packet. Hopefully * this will be the only one we need to dequeue. However, if the * packet consumed multiple descriptors, then we need to dequeue * those buffers and chain to the starting mbuf. All buffers but * the last buffer have the same length so we can set that now. * (we add to last_offset instead of multiplying since we normally * won't go into the loop and thereby saving a ourselves from * doing a multiplication by 0 in the normal case). */ IF_DEQUEUE(&sc->tulip_rxq, ms); for (me = ms; total_len > 0; total_len--) { me->m_len = TULIP_RX_BUFLEN; last_offset += TULIP_RX_BUFLEN; IF_DEQUEUE(&sc->tulip_rxq, me->m_next); me = me->m_next; } } /* * Now get the size of received packet (minus the CRC). */ total_len = ((eop->d_status >> 16) & 0x7FFF) - 4; if ((sc->tulip_flags & TULIP_RXIGNORE) == 0 && ((eop->d_status & TULIP_DSTS_ERRSUM) == 0 #ifdef BIG_PACKET || (total_len <= sc->tulip_if.if_mtu + sizeof(struct ether_header) && (eop->d_status & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxRUNT| TULIP_DSTS_RxCOLLSEEN|TULIP_DSTS_RxBADCRC| TULIP_DSTS_RxOVERFLOW)) == 0) #endif )) { me->m_len = total_len - last_offset; eh = *mtod(ms, struct ether_header *); #if NBPFILTER > 0 if (sc->tulip_bpf != NULL) if (me == ms) TULIP_BPF_TAP(sc, mtod(ms, caddr_t), total_len); else TULIP_BPF_MTAP(sc, ms); #endif sc->tulip_flags |= TULIP_RXACT; if ((sc->tulip_flags & (TULIP_PROMISC|TULIP_HASHONLY)) && (eh.ether_dhost[0] & 1) == 0 && !TULIP_ADDREQUAL(eh.ether_dhost, sc->tulip_enaddr)) goto next; accept = 1; total_len -= sizeof(struct ether_header); } else { ifp->if_ierrors++; if (eop->d_status & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxOVERFLOW|TULIP_DSTS_RxWATCHDOG)) { sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++; } else { const char *error = NULL; if (eop->d_status & TULIP_DSTS_RxTOOLONG) { sc->tulip_dot3stats.dot3StatsFrameTooLongs++; error = "frame too long"; } if (eop->d_status & TULIP_DSTS_RxBADCRC) { if (eop->d_status & TULIP_DSTS_RxDRBBLBIT) { sc->tulip_dot3stats.dot3StatsAlignmentErrors++; error = "alignment error"; } else { sc->tulip_dot3stats.dot3StatsFCSErrors++; error = "bad crc"; } } if (error != NULL && (sc->tulip_flags & TULIP_NOMESSAGES) == 0) { printf(TULIP_PRINTF_FMT ": receive: " TULIP_EADDR_FMT ": %s\n", TULIP_PRINTF_ARGS, TULIP_EADDR_ARGS(mtod(ms, u_char *) + 6), error); sc->tulip_flags |= TULIP_NOMESSAGES; } } } next: #if defined(TULIP_DEBUG) cnt++; #endif ifp->if_ipackets++; if (++eop == ri->ri_last) eop = ri->ri_first; ri->ri_nextin = eop; queue_mbuf: /* * Either we are priming the TULIP with mbufs (m == NULL) * or we are about to accept an mbuf for the upper layers * so we need to allocate an mbuf to replace it. If we * can't replace it, send up it anyways. This may cause * us to drop packets in the future but that's better than * being caught in livelock. * * Note that if this packet crossed multiple descriptors * we don't even try to reallocate all the mbufs here. * Instead we rely on the test of the beginning of * the loop to refill for the extra consumed mbufs. */ if (accept || ms == NULL) { struct mbuf *m0; MGETHDR(m0, M_DONTWAIT, MT_DATA); if (m0 != NULL) { #if defined(TULIP_COPY_RXDATA) if (!accept || total_len >= MHLEN) { #endif MCLGET(m0, M_DONTWAIT); if ((m0->m_flags & M_EXT) == 0) { m_freem(m0); m0 = NULL; } #if defined(TULIP_COPY_RXDATA) } #endif } if (accept #if defined(TULIP_COPY_RXDATA) && m0 != NULL #endif ) { #if defined(__bsdi__) eh.ether_type = ntohs(eh.ether_type); #endif #if !defined(TULIP_COPY_RXDATA) ms->m_data += sizeof(struct ether_header); ms->m_len -= sizeof(struct ether_header); ms->m_pkthdr.len = total_len; ms->m_pkthdr.rcvif = ifp; ether_input(ifp, &eh, ms); #else #ifdef BIG_PACKET #error BIG_PACKET is incompatible with TULIP_COPY_RXDATA #endif if (ms == me) bcopy(mtod(ms, caddr_t) + sizeof(struct ether_header), mtod(m0, caddr_t), total_len); else m_copydata(ms, 0, total_len, mtod(m0, caddr_t)); m0->m_len = m0->m_pkthdr.len = total_len; m0->m_pkthdr.rcvif = ifp; ether_input(ifp, &eh, m0); m0 = ms; #endif } ms = m0; } if (ms == NULL) { /* * Couldn't allocate a new buffer. Don't bother * trying to replenish the receive queue. */ fillok = 0; sc->tulip_flags |= TULIP_RXBUFSLOW; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_rxlowbufs++; #endif TULIP_PERFEND(rxget); continue; } /* * Now give the buffer(s) to the TULIP and save in our * receive queue. */ do { ri->ri_nextout->d_length1 = TULIP_RX_BUFLEN; ri->ri_nextout->d_addr1 = TULIP_KVATOPHYS(sc, mtod(ms, caddr_t)); ri->ri_nextout->d_status = TULIP_DSTS_OWNER; if (++ri->ri_nextout == ri->ri_last) ri->ri_nextout = ri->ri_first; me = ms->m_next; ms->m_next = NULL; IF_ENQUEUE(&sc->tulip_rxq, ms); } while ((ms = me) != NULL); if (sc->tulip_rxq.ifq_len >= TULIP_RXQ_TARGET) sc->tulip_flags &= ~TULIP_RXBUFSLOW; TULIP_PERFEND(rxget); } #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_rxintrs++; sc->tulip_dbg.dbg_rxpktsperintr[cnt]++; #endif TULIP_PERFEND(rxintr); } static int tulip_tx_intr( tulip_softc_t * const sc) { TULIP_PERFSTART(txintr) tulip_ringinfo_t * const ri = &sc->tulip_txinfo; struct mbuf *m; int xmits = 0; int descs = 0; while (ri->ri_free < ri->ri_max) { u_int32_t d_flag; if (((volatile tulip_desc_t *) ri->ri_nextin)->d_status & TULIP_DSTS_OWNER) break; d_flag = ri->ri_nextin->d_flag; if (d_flag & TULIP_DFLAG_TxLASTSEG) { if (d_flag & TULIP_DFLAG_TxSETUPPKT) { /* * We've just finished processing a setup packet. * Mark that we finished it. If there's not * another pending, startup the TULIP receiver. * Make sure we ack the RXSTOPPED so we won't get * an abormal interrupt indication. */ sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_HASHONLY); if (ri->ri_nextin->d_flag & TULIP_DFLAG_TxINVRSFILT) sc->tulip_flags |= TULIP_HASHONLY; if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == 0) { tulip_rx_intr(sc); sc->tulip_cmdmode |= TULIP_CMD_RXRUN; sc->tulip_intrmask |= TULIP_STS_RXSTOPPED; TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } } else { const u_int32_t d_status = ri->ri_nextin->d_status; IF_DEQUEUE(&sc->tulip_txq, m); if (m != NULL) { #if NBPFILTER > 0 if (sc->tulip_bpf != NULL) TULIP_BPF_MTAP(sc, m); #endif m_freem(m); #if defined(TULIP_DEBUG) } else { printf(TULIP_PRINTF_FMT ": tx_intr: failed to dequeue mbuf?!?\n", TULIP_PRINTF_ARGS); #endif } if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) { tulip_mediapoll_event_t event = TULIP_MEDIAPOLL_TXPROBE_OK; if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxEXCCOLL)) { #if defined(TULIP_DEBUG) if (d_status & TULIP_DSTS_TxNOCARR) sc->tulip_dbg.dbg_txprobe_nocarr++; if (d_status & TULIP_DSTS_TxEXCCOLL) sc->tulip_dbg.dbg_txprobe_exccoll++; #endif event = TULIP_MEDIAPOLL_TXPROBE_FAILED; } (*sc->tulip_boardsw->bd_media_poll)(sc, event); /* * Escape from the loop before media poll has reset the TULIP! */ break; } else { xmits++; if (d_status & TULIP_DSTS_ERRSUM) { sc->tulip_if.if_oerrors++; if (d_status & TULIP_DSTS_TxEXCCOLL) sc->tulip_dot3stats.dot3StatsExcessiveCollisions++; if (d_status & TULIP_DSTS_TxLATECOLL) sc->tulip_dot3stats.dot3StatsLateCollisions++; if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxCARRLOSS)) sc->tulip_dot3stats.dot3StatsCarrierSenseErrors++; if (d_status & (TULIP_DSTS_TxUNDERFLOW|TULIP_DSTS_TxBABBLE)) sc->tulip_dot3stats.dot3StatsInternalMacTransmitErrors++; if (d_status & TULIP_DSTS_TxUNDERFLOW) sc->tulip_dot3stats.dot3StatsInternalTransmitUnderflows++; if (d_status & TULIP_DSTS_TxBABBLE) sc->tulip_dot3stats.dot3StatsInternalTransmitBabbles++; } else { u_int32_t collisions = (d_status & TULIP_DSTS_TxCOLLMASK) >> TULIP_DSTS_V_TxCOLLCNT; sc->tulip_if.if_collisions += collisions; if (collisions == 1) sc->tulip_dot3stats.dot3StatsSingleCollisionFrames++; else if (collisions > 1) sc->tulip_dot3stats.dot3StatsMultipleCollisionFrames++; else if (d_status & TULIP_DSTS_TxDEFERRED) sc->tulip_dot3stats.dot3StatsDeferredTransmissions++; /* * SQE is only valid for 10baseT/BNC/AUI when not * running in full-duplex. In order to speed up the * test, the corresponding bit in tulip_flags needs to * set as well to get us to count SQE Test Errors. */ if (d_status & TULIP_DSTS_TxNOHRTBT & sc->tulip_flags) sc->tulip_dot3stats.dot3StatsSQETestErrors++; } } } } if (++ri->ri_nextin == ri->ri_last) ri->ri_nextin = ri->ri_first; ri->ri_free++; descs++; if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0) sc->tulip_if.if_flags &= ~IFF_OACTIVE; } /* * If nothing left to transmit, disable the timer. * Else if progress, reset the timer back to 2 ticks. */ if (ri->ri_free == ri->ri_max || (sc->tulip_flags & TULIP_TXPROBE_ACTIVE)) sc->tulip_txtimer = 0; else if (xmits > 0) sc->tulip_txtimer = TULIP_TXTIMER; sc->tulip_if.if_opackets += xmits; TULIP_PERFEND(txintr); return descs; } static void tulip_print_abnormal_interrupt( tulip_softc_t * const sc, u_int32_t csr) { const char * const *msgp = tulip_status_bits; const char *sep; u_int32_t mask; const char thrsh[] = "72|128\0\0\096|256\0\0\0128|512\0\0160|1024\0"; csr &= (1 << (sizeof(tulip_status_bits)/sizeof(tulip_status_bits[0]))) - 1; printf(TULIP_PRINTF_FMT ": abnormal interrupt:", TULIP_PRINTF_ARGS); for (sep = " ", mask = 1; mask <= csr; mask <<= 1, msgp++) { if ((csr & mask) && *msgp != NULL) { printf("%s%s", sep, *msgp); if (mask == TULIP_STS_TXUNDERFLOW && (sc->tulip_flags & TULIP_NEWTXTHRESH)) { sc->tulip_flags &= ~TULIP_NEWTXTHRESH; if (sc->tulip_cmdmode & TULIP_CMD_STOREFWD) { printf(" (switching to store-and-forward mode)"); } else { printf(" (raising TX threshold to %s)", &thrsh[9 * ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) >> 14)]); } } sep = ", "; } } printf("\n"); } static void tulip_intr_handler( tulip_softc_t * const sc, int *progress_p) { TULIP_PERFSTART(intr) u_int32_t csr; #if defined(__NetBSD__) && !defined(TULIP_USE_SOFTINTR) int only_once; only_once = 1; #endif while ((csr = TULIP_CSR_READ(sc, csr_status)) & sc->tulip_intrmask) { #if defined(__NetBSD__) && !defined(TULIP_USE_SOFTINTR) if (only_once == 1) { #if NRND > 0 rnd_add_uint32(&sc->tulip_rndsource, csr); #endif only_once = 0; } #endif *progress_p = 1; TULIP_CSR_WRITE(sc, csr_status, csr); if (csr & TULIP_STS_SYSERROR) { sc->tulip_last_system_error = (csr & TULIP_STS_ERRORMASK) >> TULIP_STS_ERR_SHIFT; if (sc->tulip_flags & TULIP_NOMESSAGES) { sc->tulip_flags |= TULIP_SYSTEMERROR; } else { printf(TULIP_PRINTF_FMT ": system error: %s\n", TULIP_PRINTF_ARGS, tulip_system_errors[sc->tulip_last_system_error]); } sc->tulip_flags |= TULIP_NEEDRESET; sc->tulip_system_errors++; break; } if (csr & (TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL)) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_intrs++; #endif if (sc->tulip_boardsw->bd_media_poll != NULL) { (*sc->tulip_boardsw->bd_media_poll)(sc, csr & TULIP_STS_LINKFAIL ? TULIP_MEDIAPOLL_LINKFAIL : TULIP_MEDIAPOLL_LINKPASS); csr &= ~TULIP_STS_ABNRMLINTR; } tulip_media_print(sc); } if (csr & (TULIP_STS_RXINTR|TULIP_STS_RXNOBUF)) { u_int32_t misses = TULIP_CSR_READ(sc, csr_missed_frames); if (csr & TULIP_STS_RXNOBUF) sc->tulip_dot3stats.dot3StatsMissedFrames += misses & 0xFFFF; /* * Pass 2.[012] of the 21140A-A[CDE] may hang and/or corrupt data * on receive overflows. */ if ((misses & 0x0FFE0000) && (sc->tulip_features & TULIP_HAVE_RXBADOVRFLW)) { sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++; /* * Stop the receiver process and spin until it's stopped. * Tell rx_intr to drop the packets it dequeues. */ TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode & ~TULIP_CMD_RXRUN); while ((TULIP_CSR_READ(sc, csr_status) & TULIP_STS_RXSTOPPED) == 0) ; TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED); sc->tulip_flags |= TULIP_RXIGNORE; } tulip_rx_intr(sc); if (sc->tulip_flags & TULIP_RXIGNORE) { /* * Restart the receiver. */ sc->tulip_flags &= ~TULIP_RXIGNORE; TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } } if (csr & TULIP_STS_ABNRMLINTR) { u_int32_t tmp = csr & sc->tulip_intrmask & ~(TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR); if (csr & TULIP_STS_TXUNDERFLOW) { if ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) != TULIP_CMD_THRSHLD160) { sc->tulip_cmdmode += TULIP_CMD_THRSHLD96; sc->tulip_flags |= TULIP_NEWTXTHRESH; } else if (sc->tulip_features & TULIP_HAVE_STOREFWD) { sc->tulip_cmdmode |= TULIP_CMD_STOREFWD; sc->tulip_flags |= TULIP_NEWTXTHRESH; } } if (sc->tulip_flags & TULIP_NOMESSAGES) { sc->tulip_statusbits |= tmp; } else { tulip_print_abnormal_interrupt(sc, tmp); sc->tulip_flags |= TULIP_NOMESSAGES; } TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_TXPROBE_ACTIVE|TULIP_DOINGSETUP|TULIP_PROMISC)) { tulip_tx_intr(sc); if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0) tulip_ifstart(&sc->tulip_if); } } if (sc->tulip_flags & TULIP_NEEDRESET) { tulip_reset(sc); tulip_init(sc); } TULIP_PERFEND(intr); } #if defined(TULIP_USE_SOFTINTR) /* * This is a experimental idea to alleviate problems due to interrupt * livelock. What is interrupt livelock? It's when you spend all your * time servicing device interrupts and never drop below device ipl * to do "useful" work. * * So what we do here is see if the device needs service and if so, * disable interrupts (dismiss the interrupt), place it in a list of devices * needing service, and issue a network software interrupt. * * When our network software interrupt routine gets called, we simply * walk done the list of devices that we have created and deal with them * at splnet/splsoftnet. * */ static void tulip_hardintr_handler( tulip_softc_t * const sc, int *progress_p) { if (TULIP_CSR_READ(sc, csr_status) & (TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR) == 0) return; *progress_p = 1; /* * disable interrupts */ TULIP_CSR_WRITE(sc, csr_intr, 0); /* * mark it as needing a software interrupt */ tulip_softintr_mask |= (1U << sc->tulip_unit); #if defined(__NetBSD__) && NRND > 0 /* * This isn't all that random (the value we feed in) but it is * better than a constant probably. It isn't used in entropy * calculation anyway, just to add something to the pool. */ rnd_add_uint32(&sc->tulip_rndsource, sc->tulip_flags); #endif } static void tulip_softintr( void) { u_int32_t softintr_mask, mask; int progress = 0; int unit; tulip_spl_t s; /* * Copy mask to local copy and reset global one to 0. */ s = TULIP_RAISESPL(); softintr_mask = tulip_softintr_mask; tulip_softintr_mask = 0; TULIP_RESTORESPL(s); /* * Optimize for the single unit case. */ if (tulip_softintr_max_unit == 0) { if (softintr_mask & 1) { tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(0); /* * Handle the "interrupt" and then reenable interrupts */ softintr_mask = 0; tulip_intr_handler(sc, &progress); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } return; } /* * Handle all "queued" interrupts in a round robin fashion. * This is done so as not to favor a particular interface. */ unit = tulip_softintr_last_unit; mask = (1U << unit); while (softintr_mask != 0) { if (tulip_softintr_max_unit == unit) { unit = 0; mask = 1; } else { unit += 1; mask <<= 1; } if (softintr_mask & mask) { tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(unit); /* * Handle the "interrupt" and then reenable interrupts */ softintr_mask ^= mask; tulip_intr_handler(sc, &progress); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } /* * Save where we ending up. */ tulip_softintr_last_unit = unit; } #endif /* TULIP_USE_SOFTINTR */ static tulip_intrfunc_t tulip_intr_shared( void *arg) { tulip_softc_t * sc = arg; int progress = 0; for (; sc != NULL; sc = sc->tulip_slaves) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_intrs++; #endif #if defined(TULIP_USE_SOFTINTR) tulip_hardintr_handler(sc, &progress); #else tulip_intr_handler(sc, &progress); #endif } #if defined(TULIP_USE_SOFTINTR) if (progress) schednetisr(NETISR_DE); #endif #if !defined(TULIP_VOID_INTRFUNC) return progress; #endif } static tulip_intrfunc_t tulip_intr_normal( void *arg) { tulip_softc_t * sc = (tulip_softc_t *) arg; int progress = 0; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_intrs++; #endif #if defined(TULIP_USE_SOFTINTR) tulip_hardintr_handler(sc, &progress); if (progress) schednetisr(NETISR_DE); #else tulip_intr_handler(sc, &progress); #endif #if !defined(TULIP_VOID_INTRFUNC) return progress; #endif } static struct mbuf * tulip_mbuf_compress( struct mbuf *m) { struct mbuf *m0; #if MCLBYTES >= ETHERMTU + 18 && !defined(BIG_PACKET) MGETHDR(m0, M_DONTWAIT, MT_DATA); if (m0 != NULL) { if (m->m_pkthdr.len > MHLEN) { MCLGET(m0, M_DONTWAIT); if ((m0->m_flags & M_EXT) == 0) { m_freem(m); m_freem(m0); return NULL; } } m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t)); m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len; } #else int mlen = MHLEN; int len = m->m_pkthdr.len; struct mbuf **mp = &m0; while (len > 0) { if (mlen == MHLEN) { MGETHDR(*mp, M_DONTWAIT, MT_DATA); } else { MGET(*mp, M_DONTWAIT, MT_DATA); } if (*mp == NULL) { m_freem(m0); m0 = NULL; break; } if (len > MLEN) { MCLGET(*mp, M_DONTWAIT); if (((*mp)->m_flags & M_EXT) == 0) { m_freem(m0); m0 = NULL; break; } (*mp)->m_len = len <= MCLBYTES ? len : MCLBYTES; } else { (*mp)->m_len = len <= mlen ? len : mlen; } m_copydata(m, m->m_pkthdr.len - len, (*mp)->m_len, mtod((*mp), caddr_t)); len -= (*mp)->m_len; mp = &(*mp)->m_next; mlen = MLEN; } #endif m_freem(m); return m0; } static struct mbuf * tulip_txput( tulip_softc_t * const sc, struct mbuf *m) { TULIP_PERFSTART(txput) tulip_ringinfo_t * const ri = &sc->tulip_txinfo; tulip_desc_t *eop, *nextout; int segcnt, free; u_int32_t d_status; struct mbuf *m0; #if defined(TULIP_DEBUG) if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) { printf(TULIP_PRINTF_FMT ": txput%s: tx not running\n", TULIP_PRINTF_ARGS, (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) ? "(probe)" : ""); sc->tulip_flags |= TULIP_WANTTXSTART; goto finish; } #endif /* * Now we try to fill in our transmit descriptors. This is * a bit reminiscent of going on the Ark two by two * since each descriptor for the TULIP can describe * two buffers. So we advance through packet filling * each of the two entries at a time to to fill each * descriptor. Clear the first and last segment bits * in each descriptor (actually just clear everything * but the end-of-ring or chain bits) to make sure * we don't get messed up by previously sent packets. * * We may fail to put the entire packet on the ring if * there is either not enough ring entries free or if the * packet has more than MAX_TXSEG segments. In the former * case we will just wait for the ring to empty. In the * latter case we have to recopy. */ again: d_status = 0; eop = nextout = ri->ri_nextout; m0 = m; segcnt = 0; free = ri->ri_free; do { int len = m0->m_len; caddr_t addr = mtod(m0, caddr_t); unsigned clsize = CLBYTES - (((u_long) addr) & (CLBYTES-1)); while (len > 0) { unsigned slen = min(len, clsize); #ifdef BIG_PACKET int partial = 0; if (slen >= 2048) slen = 2040, partial = 1; #endif segcnt++; if (segcnt > TULIP_MAX_TXSEG) { /* * The packet exceeds the number of transmit buffer * entries that we can use for one packet, so we have * recopy it into one mbuf and then try again. */ m = tulip_mbuf_compress(m); if (m == NULL) goto finish; goto again; } if (segcnt & 1) { if (--free == 0) { /* * See if there's any unclaimed space in the * transmit ring. */ if ((free += tulip_tx_intr(sc)) == 0) { /* * There's no more room but since nothing * has been committed at this point, just * show output is active, put back the * mbuf and return. */ sc->tulip_flags |= TULIP_WANTTXSTART; goto finish; } } eop = nextout; if (++nextout == ri->ri_last) nextout = ri->ri_first; eop->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN; eop->d_status = d_status; eop->d_addr1 = TULIP_KVATOPHYS(sc, addr); eop->d_length1 = slen; } else { /* * Fill in second half of descriptor */ eop->d_addr2 = TULIP_KVATOPHYS(sc, addr); eop->d_length2 = slen; } d_status = TULIP_DSTS_OWNER; len -= slen; addr += slen; #ifdef BIG_PACKET if (partial) continue; #endif clsize = CLBYTES; } } while ((m0 = m0->m_next) != NULL); /* * The descriptors have been filled in. Now get ready * to transmit. */ IF_ENQUEUE(&sc->tulip_txq, m); m = NULL; /* * Make sure the next descriptor after this packet is owned * by us since it may have been set up above if we ran out * of room in the ring. */ nextout->d_status = 0; /* * If we only used the first segment of the last descriptor, * make sure the second segment will not be used. */ if (segcnt & 1) { eop->d_addr2 = 0; eop->d_length2 = 0; } /* * Mark the last and first segments, indicate we want a transmit * complete interrupt, and tell it to transmit! */ eop->d_flag |= TULIP_DFLAG_TxLASTSEG|TULIP_DFLAG_TxWANTINTR; /* * Note that ri->ri_nextout is still the start of the packet * and until we set the OWNER bit, we can still back out of * everything we have done. */ ri->ri_nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG; ri->ri_nextout->d_status = TULIP_DSTS_OWNER; TULIP_CSR_WRITE(sc, csr_txpoll, 1); /* * This advances the ring for us. */ ri->ri_nextout = nextout; ri->ri_free = free; TULIP_PERFEND(txput); if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) { sc->tulip_if.if_flags |= IFF_OACTIVE; TULIP_PERFEND(txput); return NULL; } /* * switch back to the single queueing ifstart. */ sc->tulip_flags &= ~TULIP_WANTTXSTART; sc->tulip_if.if_start = tulip_ifstart_one; if (sc->tulip_txtimer == 0) sc->tulip_txtimer = TULIP_TXTIMER; /* * If we want a txstart, there must be not enough space in the * transmit ring. So we want to enable transmit done interrupts * so we can immediately reclaim some space. When the transmit * interrupt is posted, the interrupt handler will call tx_intr * to reclaim space and then txstart (since WANTTXSTART is set). * txstart will move the packet into the transmit ring and clear * WANTTXSTART thereby causing TXINTR to be cleared. */ finish: if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_DOINGSETUP)) { sc->tulip_if.if_flags |= IFF_OACTIVE; sc->tulip_if.if_start = tulip_ifstart; if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) { sc->tulip_intrmask |= TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } else if ((sc->tulip_flags & TULIP_PROMISC) == 0) { if (sc->tulip_intrmask & TULIP_STS_TXINTR) { sc->tulip_intrmask &= ~TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } TULIP_PERFEND(txput); return m; } static void tulip_txput_setup( tulip_softc_t * const sc) { tulip_ringinfo_t * const ri = &sc->tulip_txinfo; tulip_desc_t *nextout; /* * We will transmit, at most, one setup packet per call to ifstart. */ #if defined(TULIP_DEBUG) if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) { printf(TULIP_PRINTF_FMT ": txput_setup: tx not running\n", TULIP_PRINTF_ARGS); sc->tulip_flags |= TULIP_WANTTXSTART; sc->tulip_if.if_start = tulip_ifstart; return; } #endif /* * Try to reclaim some free descriptors.. */ if (ri->ri_free < 2) tulip_tx_intr(sc); if ((sc->tulip_flags & TULIP_DOINGSETUP) || ri->ri_free == 1) { sc->tulip_flags |= TULIP_WANTTXSTART; sc->tulip_if.if_start = tulip_ifstart; return; } bcopy(sc->tulip_setupdata, sc->tulip_setupbuf, sizeof(sc->tulip_setupbuf)); /* * Clear WANTSETUP and set DOINGSETUP. Set know that WANTSETUP is * set and DOINGSETUP is clear doing an XOR of the two will DTRT. */ sc->tulip_flags ^= TULIP_WANTSETUP|TULIP_DOINGSETUP; ri->ri_free--; nextout = ri->ri_nextout; nextout->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN; nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG|TULIP_DFLAG_TxLASTSEG |TULIP_DFLAG_TxSETUPPKT|TULIP_DFLAG_TxWANTINTR; if (sc->tulip_flags & TULIP_WANTHASHPERFECT) nextout->d_flag |= TULIP_DFLAG_TxHASHFILT; else if (sc->tulip_flags & TULIP_WANTHASHONLY) nextout->d_flag |= TULIP_DFLAG_TxHASHFILT|TULIP_DFLAG_TxINVRSFILT; nextout->d_length1 = sizeof(sc->tulip_setupbuf); nextout->d_addr1 = TULIP_KVATOPHYS(sc, sc->tulip_setupbuf); nextout->d_length2 = 0; nextout->d_addr2 = 0; /* * Advance the ring for the next transmit packet. */ if (++ri->ri_nextout == ri->ri_last) ri->ri_nextout = ri->ri_first; /* * Make sure the next descriptor is owned by us since it * may have been set up above if we ran out of room in the * ring. */ ri->ri_nextout->d_status = 0; nextout->d_status = TULIP_DSTS_OWNER; TULIP_CSR_WRITE(sc, csr_txpoll, 1); if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) { sc->tulip_intrmask |= TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } /* * This routine is entered at splnet() (splsoftnet() on NetBSD) * and thereby imposes no problems when TULIP_USE_SOFTINTR is * defined or not. */ static int tulip_ifioctl( struct ifnet * ifp, ioctl_cmd_t cmd, caddr_t data) { TULIP_PERFSTART(ifioctl) tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); struct ifaddr *ifa = (struct ifaddr *)data; struct ifreq *ifr = (struct ifreq *) data; tulip_spl_t s; int error = 0; #if defined(TULIP_USE_SOFTINTR) s = TULIP_RAISESOFTSPL(); #else s = TULIP_RAISESPL(); #endif switch (cmd) { case SIOCSIFADDR: { ifp->if_flags |= IFF_UP; switch(ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: { tulip_init(sc); TULIP_ARP_IFINIT(sc, ifa); break; } #endif /* INET */ #ifdef IPX case AF_IPX: { struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr); if (ipx_nullhost(*ina)) { ina->x_host = *(union ipx_host *)(sc->tulip_enaddr); } else { ifp->if_flags &= ~IFF_RUNNING; bcopy((caddr_t)ina->x_host.c_host, (caddr_t)sc->tulip_enaddr, sizeof(sc->tulip_enaddr)); } tulip_init(sc); break; } #endif /* IPX */ #ifdef NS /* * This magic copied from if_is.c; I don't use XNS, * so I have no way of telling if this actually * works or not. */ case AF_NS: { struct ns_addr *ina = &(IA_SNS(ifa)->sns_addr); if (ns_nullhost(*ina)) { ina->x_host = *(union ns_host *)(sc->tulip_enaddr); } else { ifp->if_flags &= ~IFF_RUNNING; bcopy((caddr_t)ina->x_host.c_host, (caddr_t)sc->tulip_enaddr, sizeof(sc->tulip_enaddr)); } tulip_init(sc); break; } #endif /* NS */ default: { tulip_init(sc); break; } } break; } case SIOCGIFADDR: { bcopy((caddr_t) sc->tulip_enaddr, (caddr_t) ((struct sockaddr *)&ifr->ifr_data)->sa_data, 6); break; } case SIOCSIFFLAGS: { #if !defined(IFM_ETHER) int flags = 0; if (ifp->if_flags & IFF_LINK0) flags |= 1; if (ifp->if_flags & IFF_LINK1) flags |= 2; if (ifp->if_flags & IFF_LINK2) flags |= 4; if (flags == 7) { ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2); sc->tulip_media = TULIP_MEDIA_UNKNOWN; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP|TULIP_NOAUTOSENSE); tulip_reset(sc); } else if (flags) { tulip_media_t media; for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL && --flags == 0) { sc->tulip_flags |= TULIP_NOAUTOSENSE; if (sc->tulip_media != media || (sc->tulip_flags & TULIP_DIDNWAY)) { sc->tulip_flags &= ~TULIP_DIDNWAY; tulip_linkup(sc, media); } break; } } if (flags) printf(TULIP_PRINTF_FMT ": ignored invalid media request\n", TULIP_PRINTF_ARGS); } #endif tulip_init(sc); break; } #if defined(SIOCSIFMEDIA) case SIOCSIFMEDIA: case SIOCGIFMEDIA: { error = ifmedia_ioctl(ifp, ifr, &sc->tulip_ifmedia, cmd); break; } #endif case SIOCADDMULTI: case SIOCDELMULTI: { /* * Update multicast listeners */ #if defined(__FreeBSD__) && __FreeBSD__ >= 3 tulip_addr_filter(sc); /* reset multicast filtering */ tulip_init(sc); error = 0; #else if (cmd == SIOCADDMULTI) error = ether_addmulti(ifr, TULIP_ETHERCOM(sc)); else error = ether_delmulti(ifr, TULIP_ETHERCOM(sc)); if (error == ENETRESET) { tulip_addr_filter(sc); /* reset multicast filtering */ tulip_init(sc); error = 0; } #endif break; } #if defined(SIOCSIFMTU) #if !defined(ifr_mtu) #define ifr_mtu ifr_metric #endif case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ETHERMTU #ifdef BIG_PACKET && sc->tulip_chipid != TULIP_21140 && sc->tulip_chipid != TULIP_21140A && sc->tulip_chipid != TULIP_21041 #endif ) { error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; #ifdef BIG_PACKET tulip_reset(sc); tulip_init(sc); #endif break; #endif /* SIOCSIFMTU */ #ifdef SIOCGADDRROM case SIOCGADDRROM: { error = copyout(sc->tulip_rombuf, ifr->ifr_data, sizeof(sc->tulip_rombuf)); break; } #endif #ifdef SIOCGCHIPID case SIOCGCHIPID: { ifr->ifr_metric = (int) sc->tulip_chipid; break; } #endif default: { error = EINVAL; break; } } TULIP_RESTORESPL(s); TULIP_PERFEND(ifioctl); return error; } /* * These routines gets called at device spl (from ether_output). This might * pose a problem for TULIP_USE_SOFTINTR if ether_output is called at * device spl from another driver. */ static ifnet_ret_t tulip_ifstart( struct ifnet * const ifp) { TULIP_PERFSTART(ifstart) tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); if (sc->tulip_if.if_flags & IFF_RUNNING) { if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP) tulip_txput_setup(sc); while (sc->tulip_if.if_snd.ifq_head != NULL) { struct mbuf *m; IF_DEQUEUE(&sc->tulip_if.if_snd, m); if ((m = tulip_txput(sc, m)) != NULL) { IF_PREPEND(&sc->tulip_if.if_snd, m); break; } } } TULIP_PERFEND(ifstart); } static ifnet_ret_t tulip_ifstart_one( struct ifnet * const ifp) { TULIP_PERFSTART(ifstart_one) tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); if ((sc->tulip_if.if_flags & IFF_RUNNING) && sc->tulip_if.if_snd.ifq_head != NULL) { struct mbuf *m; IF_DEQUEUE(&sc->tulip_if.if_snd, m); if ((m = tulip_txput(sc, m)) != NULL) IF_PREPEND(&sc->tulip_if.if_snd, m); } TULIP_PERFEND(ifstart_one); } /* * Even though this routine runs at device spl, it does not break * our use of splnet (splsoftnet under NetBSD) for the majority * of this driver (if TULIP_USE_SOFTINTR defined) since * if_watcbog is called from if_watchdog which is called from * splsoftclock which is below spl[soft]net. */ static void tulip_ifwatchdog( struct ifnet *ifp) { TULIP_PERFSTART(ifwatchdog) tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp); #if defined(TULIP_DEBUG) u_int32_t rxintrs = sc->tulip_dbg.dbg_rxintrs - sc->tulip_dbg.dbg_last_rxintrs; if (rxintrs > sc->tulip_dbg.dbg_high_rxintrs_hz) sc->tulip_dbg.dbg_high_rxintrs_hz = rxintrs; sc->tulip_dbg.dbg_last_rxintrs = sc->tulip_dbg.dbg_rxintrs; #endif /* TULIP_DEBUG */ sc->tulip_if.if_timer = 1; /* * These should be rare so do a bulk test up front so we can just skip * them if needed. */ if (sc->tulip_flags & (TULIP_SYSTEMERROR|TULIP_RXBUFSLOW|TULIP_NOMESSAGES)) { /* * If the number of receive buffer is low, try to refill */ if (sc->tulip_flags & TULIP_RXBUFSLOW) tulip_rx_intr(sc); if (sc->tulip_flags & TULIP_SYSTEMERROR) { printf(TULIP_PRINTF_FMT ": %d system errors: last was %s\n", TULIP_PRINTF_ARGS, sc->tulip_system_errors, tulip_system_errors[sc->tulip_last_system_error]); } if (sc->tulip_statusbits) { tulip_print_abnormal_interrupt(sc, sc->tulip_statusbits); sc->tulip_statusbits = 0; } sc->tulip_flags &= ~(TULIP_NOMESSAGES|TULIP_SYSTEMERROR); } if (sc->tulip_txtimer) tulip_tx_intr(sc); if (sc->tulip_txtimer && --sc->tulip_txtimer == 0) { printf(TULIP_PRINTF_FMT ": transmission timeout\n", TULIP_PRINTF_ARGS); if (TULIP_DO_AUTOSENSE(sc)) { sc->tulip_media = TULIP_MEDIA_UNKNOWN; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP); } tulip_reset(sc); tulip_init(sc); } TULIP_PERFEND(ifwatchdog); TULIP_PERFMERGE(sc, perf_intr_cycles); TULIP_PERFMERGE(sc, perf_ifstart_cycles); TULIP_PERFMERGE(sc, perf_ifioctl_cycles); TULIP_PERFMERGE(sc, perf_ifwatchdog_cycles); TULIP_PERFMERGE(sc, perf_timeout_cycles); TULIP_PERFMERGE(sc, perf_ifstart_one_cycles); TULIP_PERFMERGE(sc, perf_txput_cycles); TULIP_PERFMERGE(sc, perf_txintr_cycles); TULIP_PERFMERGE(sc, perf_rxintr_cycles); TULIP_PERFMERGE(sc, perf_rxget_cycles); TULIP_PERFMERGE(sc, perf_intr); TULIP_PERFMERGE(sc, perf_ifstart); TULIP_PERFMERGE(sc, perf_ifioctl); TULIP_PERFMERGE(sc, perf_ifwatchdog); TULIP_PERFMERGE(sc, perf_timeout); TULIP_PERFMERGE(sc, perf_ifstart_one); TULIP_PERFMERGE(sc, perf_txput); TULIP_PERFMERGE(sc, perf_txintr); TULIP_PERFMERGE(sc, perf_rxintr); TULIP_PERFMERGE(sc, perf_rxget); } #if defined(__bsdi__) || (defined(__FreeBSD__) && BSD < 199506) static ifnet_ret_t tulip_ifwatchdog_wrapper( int unit) { tulip_ifwatchdog(&TULIP_UNIT_TO_SOFTC(unit)->tulip_if); } #define tulip_ifwatchdog tulip_ifwatchdog_wrapper #endif /* * All printf's are real as of now! */ #ifdef printf #undef printf #endif #if !defined(IFF_NOTRAILERS) #define IFF_NOTRAILERS 0 #endif static void tulip_attach( tulip_softc_t * const sc) { struct ifnet * const ifp = &sc->tulip_if; ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST; ifp->if_ioctl = tulip_ifioctl; ifp->if_start = tulip_ifstart; ifp->if_watchdog = tulip_ifwatchdog; ifp->if_timer = 1; #if !defined(__bsdi__) || _BSDI_VERSION < 199401 ifp->if_output = ether_output; #endif #if defined(__bsdi__) && _BSDI_VERSION < 199401 ifp->if_mtu = ETHERMTU; #endif #if defined(__bsdi__) && _BSDI_VERSION >= 199510 aprint_naive(": DEC Ethernet"); aprint_normal(": %s%s", sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid]); aprint_verbose(" pass %d.%d", (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F); printf("\n"); sc->tulip_pf = aprint_normal; aprint_normal(TULIP_PRINTF_FMT ": address " TULIP_EADDR_FMT "\n", TULIP_PRINTF_ARGS, TULIP_EADDR_ARGS(sc->tulip_enaddr)); #else printf( #if defined(__bsdi__) "\n" #endif TULIP_PRINTF_FMT ": %s%s pass %d.%d%s\n", TULIP_PRINTF_ARGS, sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid], (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F, (sc->tulip_features & (TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM)) == TULIP_HAVE_ISVSROM ? " (invalid EESPROM checksum)" : ""); printf(TULIP_PRINTF_FMT ": address " TULIP_EADDR_FMT "\n", TULIP_PRINTF_ARGS, TULIP_EADDR_ARGS(sc->tulip_enaddr)); #endif #if defined(__alpha__) /* * In case the SRM console told us about a bogus media, * we need to check to be safe. */ if (sc->tulip_mediums[sc->tulip_media] == NULL) sc->tulip_media = TULIP_MEDIA_UNKNOWN; #endif (*sc->tulip_boardsw->bd_media_probe)(sc); #if defined(IFM_ETHER) ifmedia_init(&sc->tulip_ifmedia, 0, tulip_ifmedia_change, tulip_ifmedia_status); #else { tulip_media_t media; int cnt; printf(TULIP_PRINTF_FMT ": media:", TULIP_PRINTF_ARGS); for (media = TULIP_MEDIA_UNKNOWN, cnt = 1; cnt < 7 && media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL) { printf(" %d=\"%s\"", cnt, tulip_mediums[media]); cnt++; } } if (cnt == 1) { sc->tulip_features |= TULIP_HAVE_NOMEDIA; printf(" none\n"); } else { printf("\n"); } } #endif sc->tulip_flags &= ~TULIP_DEVICEPROBE; #if defined(IFM_ETHER) tulip_ifmedia_add(sc); #endif tulip_reset(sc); #if defined(__bsdi__) && _BSDI_VERSION >= 199510 sc->tulip_pf = printf; TULIP_ETHER_IFATTACH(sc); #else if_attach(ifp); #if defined(__NetBSD__) || (defined(__FreeBSD__) && BSD >= 199506) TULIP_ETHER_IFATTACH(sc); #endif #endif /* __bsdi__ */ #if NBPFILTER > 0 TULIP_BPF_ATTACH(sc); #endif #if defined(__NetBSD__) && NRND > 0 rnd_attach_source(&sc->tulip_rndsource, sc->tulip_dev.dv_xname, RND_TYPE_NET); #endif } static void tulip_initcsrs( tulip_softc_t * const sc, tulip_csrptr_t csr_base, size_t csr_size) { sc->tulip_csrs.csr_busmode = csr_base + 0 * csr_size; sc->tulip_csrs.csr_txpoll = csr_base + 1 * csr_size; sc->tulip_csrs.csr_rxpoll = csr_base + 2 * csr_size; sc->tulip_csrs.csr_rxlist = csr_base + 3 * csr_size; sc->tulip_csrs.csr_txlist = csr_base + 4 * csr_size; sc->tulip_csrs.csr_status = csr_base + 5 * csr_size; sc->tulip_csrs.csr_command = csr_base + 6 * csr_size; sc->tulip_csrs.csr_intr = csr_base + 7 * csr_size; sc->tulip_csrs.csr_missed_frames = csr_base + 8 * csr_size; sc->tulip_csrs.csr_9 = csr_base + 9 * csr_size; sc->tulip_csrs.csr_10 = csr_base + 10 * csr_size; sc->tulip_csrs.csr_11 = csr_base + 11 * csr_size; sc->tulip_csrs.csr_12 = csr_base + 12 * csr_size; sc->tulip_csrs.csr_13 = csr_base + 13 * csr_size; sc->tulip_csrs.csr_14 = csr_base + 14 * csr_size; sc->tulip_csrs.csr_15 = csr_base + 15 * csr_size; #if defined(TULIP_EISA) sc->tulip_csrs.csr_enetrom = csr_base + DE425_ENETROM_OFFSET; #endif } static void tulip_initring( tulip_softc_t * const sc, tulip_ringinfo_t * const ri, tulip_desc_t *descs, int ndescs) { ri->ri_max = ndescs; ri->ri_first = descs; ri->ri_last = ri->ri_first + ri->ri_max; bzero((caddr_t) ri->ri_first, sizeof(ri->ri_first[0]) * ri->ri_max); ri->ri_last[-1].d_flag = TULIP_DFLAG_ENDRING; } /* * This is the PCI configuration support. Since the 21040 is available * on both EISA and PCI boards, one must be careful in how defines the * 21040 in the config file. */ #define PCI_CFID 0x00 /* Configuration ID */ #define PCI_CFCS 0x04 /* Configurtion Command/Status */ #define PCI_CFRV 0x08 /* Configuration Revision */ #define PCI_CFLT 0x0c /* Configuration Latency Timer */ #define PCI_CBIO 0x10 /* Configuration Base IO Address */ #define PCI_CBMA 0x14 /* Configuration Base Memory Address */ #define PCI_CFIT 0x3c /* Configuration Interrupt */ #define PCI_CFDA 0x40 /* Configuration Driver Area */ #if defined(TULIP_EISA) static const int tulip_eisa_irqs[4] = { IRQ5, IRQ9, IRQ10, IRQ11 }; #endif #if defined(__FreeBSD__) #define TULIP_PCI_ATTACH_ARGS pcici_t config_id, int unit #define TULIP_SHUTDOWN_ARGS int howto, void * arg #if defined(TULIP_DEVCONF) static void tulip_shutdown(TULIP_SHUTDOWN_ARGS); static int tulip_pci_shutdown( struct kern_devconf * const kdc, int force) { if (kdc->kdc_unit < TULIP_MAX_DEVICES) { tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(kdc->kdc_unit); if (sc != NULL) tulip_shutdown(0, sc); } (void) dev_detach(kdc); return 0; } #endif static char* tulip_pci_probe( pcici_t config_id, pcidi_t device_id) { if (PCI_VENDORID(device_id) != DEC_VENDORID) return NULL; if (PCI_CHIPID(device_id) == CHIPID_21040) return "Digital 21040 Ethernet"; if (PCI_CHIPID(device_id) == CHIPID_21041) return "Digital 21041 Ethernet"; if (PCI_CHIPID(device_id) == CHIPID_21140) { u_int32_t revinfo = pci_conf_read(config_id, PCI_CFRV) & 0xFF; if (revinfo >= 0x20) return "Digital 21140A Fast Ethernet"; else return "Digital 21140 Fast Ethernet"; } if (PCI_CHIPID(device_id) == CHIPID_21142) { u_int32_t revinfo = pci_conf_read(config_id, PCI_CFRV) & 0xFF; if (revinfo >= 0x20) return "Digital 21143 Fast Ethernet"; else return "Digital 21142 Fast Ethernet"; } return NULL; } static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS); static u_long tulip_pci_count; static struct pci_device dedevice = { "de", tulip_pci_probe, tulip_pci_attach, &tulip_pci_count, #if defined(TULIP_DEVCONF) tulip_pci_shutdown, #endif }; DATA_SET (pcidevice_set, dedevice); #endif /* __FreeBSD__ */ #if defined(__bsdi__) #define TULIP_PCI_ATTACH_ARGS struct device * const parent, struct device * const self, void * const aux #define TULIP_SHUTDOWN_ARGS void *arg static int tulip_pci_match( pci_devaddr_t *pa) { int irq; unsigned id; id = pci_inl(pa, PCI_VENDOR_ID); if (PCI_VENDORID(id) != DEC_VENDORID) return 0; id = PCI_CHIPID(id); if (id != CHIPID_21040 && id != CHIPID_21041 && id != CHIPID_21140 && id != CHIPID_21142) return 0; irq = pci_inl(pa, PCI_I_LINE) & 0xFF; if (irq == 0 || irq >= 16) { printf("de?: invalid IRQ %d; skipping\n", irq); return 0; } return 1; } static int tulip_probe( struct device *parent, struct cfdata *cf, void *aux) { struct isa_attach_args * const ia = (struct isa_attach_args *) aux; unsigned irq, slot; pci_devaddr_t *pa; #if _BSDI_VERSION >= 199401 switch (ia->ia_bustype) { case BUS_PCI: #endif pa = pci_scan(tulip_pci_match); if (pa == NULL) return 0; irq = (1 << (pci_inl(pa, PCI_I_LINE) & 0xFF)); /* Get the base address; assume the BIOS set it up correctly */ #if defined(TULIP_IOMAPPED) ia->ia_maddr = NULL; ia->ia_msize = 0; ia->ia_iobase = pci_inl(pa, PCI_CBIO) & ~7; pci_outl(pa, PCI_CBIO, 0xFFFFFFFF); ia->ia_iosize = ((~pci_inl(pa, PCI_CBIO)) | 7) + 1; pci_outl(pa, PCI_CBIO, (int) ia->ia_iobase); /* Disable memory space access */ pci_outl(pa, PCI_COMMAND, pci_inl(pa, PCI_COMMAND) & ~2); #else ia->ia_maddr = (caddr_t) (pci_inl(pa, PCI_CBMA) & ~7); pci_outl(pa, PCI_CBMA, 0xFFFFFFFF); ia->ia_msize = ((~pci_inl(pa, PCI_CBMA)) | 7) + 1; pci_outl(pa, PCI_CBMA, (int) ia->ia_maddr); ia->ia_iobase = 0; ia->ia_iosize = 0; /* Disable I/O space access */ pci_outl(pa, PCI_COMMAND, pci_inl(pa, PCI_COMMAND) & ~1); #endif /* TULIP_IOMAPPED */ ia->ia_aux = (void *) pa; #if _BSDI_VERSION >= 199401 break; #if defined(TULIP_EISA) case BUS_EISA: { unsigned tmp; if ((slot = eisa_match(cf, ia)) == 0) return 0; ia->ia_iobase = slot << 12; ia->ia_iosize = EISA_NPORT; eisa_slotalloc(slot); tmp = inb(ia->ia_iobase + DE425_CFG0); irq = tulip_eisa_irqs[(tmp >> 1) & 0x03]; /* * Until BSD/OS likes level interrupts, force * the DE425 into edge-triggered mode. */ if ((tmp & 1) == 0) outb(ia->ia_iobase + DE425_CFG0, tmp | 1); /* * CBIO needs to map to the EISA slot * enable I/O access and Master */ outl(ia->ia_iobase + DE425_CBIO, ia->ia_iobase); outl(ia->ia_iobase + DE425_CFCS, 5 | inl(ia->ia_iobase + DE425_CFCS)); ia->ia_aux = NULL; break; } #endif /* TULIP_EISA */ default: return 0; } #endif /* PCI bus masters don't use host DMA channels */ ia->ia_drq = DRQNONE; if (ia->ia_irq != IRQUNK && irq != ia->ia_irq) { printf("de%d: error: desired IRQ of %d does not match device's " "actual IRQ of %d,\n", cf->cf_unit, ffs(ia->ia_irq) - 1, ffs(irq) - 1); return 0; } if (ia->ia_irq == IRQUNK) ia->ia_irq = irq; #ifdef IRQSHARE ia->ia_irq |= IRQSHARE; #endif return 1; } static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS); #if defined(TULIP_EISA) static char *tulip_eisa_ids[] = { "DEC4250", NULL }; #endif struct cfdriver decd = { 0, "de", tulip_probe, tulip_pci_attach, #if _BSDI_VERSION >= 199401 DV_IFNET, #endif sizeof(tulip_softc_t), #if defined(TULIP_EISA) tulip_eisa_ids #endif }; #endif /* __bsdi__ */ #if defined(__NetBSD__) #define TULIP_PCI_ATTACH_ARGS struct device * const parent, struct device * const self, void * const aux #define TULIP_SHUTDOWN_ARGS void *arg static int tulip_pci_probe( struct device *parent, #ifdef __BROKEN_INDIRECT_CONFIG void *match, #else struct cfdata *match, #endif void *aux) { struct pci_attach_args *pa = (struct pci_attach_args *) aux; if (PCI_VENDORID(pa->pa_id) != DEC_VENDORID) return 0; if (PCI_CHIPID(pa->pa_id) == CHIPID_21040 || PCI_CHIPID(pa->pa_id) == CHIPID_21041 || PCI_CHIPID(pa->pa_id) == CHIPID_21140 || PCI_CHIPID(pa->pa_id) == CHIPID_21142) return 1; return 0; } static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS); struct cfattach de_ca = { sizeof(tulip_softc_t), tulip_pci_probe, tulip_pci_attach }; struct cfdriver de_cd = { 0, "de", DV_IFNET }; #endif /* __NetBSD__ */ static void tulip_shutdown( TULIP_SHUTDOWN_ARGS) { tulip_softc_t * const sc = arg; TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(10); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ } static void tulip_pci_attach( TULIP_PCI_ATTACH_ARGS) { #if defined(__FreeBSD__) tulip_softc_t *sc; #define PCI_CONF_WRITE(r, v) pci_conf_write(config_id, (r), (v)) #define PCI_CONF_READ(r) pci_conf_read(config_id, (r)) #if __FreeBSD__ >= 3 #define PCI_GETBUSDEVINFO(sc) ((void)((sc)->tulip_pci_busno = (config_id->bus), /* XXX */ \ (sc)->tulip_pci_devno = (config_id->slot))) /* XXX */ #else #define PCI_GETBUSDEVINFO(sc) ((void)((sc)->tulip_pci_busno = ((config_id.cfg1 >> 16) & 0xFF), /* XXX */ \ (sc)->tulip_pci_devno = ((config_id.cfg1 >> 11) & 0x1F))) /* XXX */ #endif #endif #if defined(__bsdi__) tulip_softc_t * const sc = (tulip_softc_t *) self; struct isa_attach_args * const ia = (struct isa_attach_args *) aux; pci_devaddr_t *pa = (pci_devaddr_t *) ia->ia_aux; const int unit = sc->tulip_dev.dv_unit; #define PCI_CONF_WRITE(r, v) pci_outl(pa, (r), (v)) #define PCI_CONF_READ(r) pci_inl(pa, (r)) #define PCI_GETBUSDEVINFO(sc) ((void)((sc)->tulip_pci_busno = pa->d_bus, \ (sc)->tulip_pci_devno = pa->d_agent)) #endif #if defined(__NetBSD__) tulip_softc_t * const sc = (tulip_softc_t *) self; struct pci_attach_args * const pa = (struct pci_attach_args *) aux; const int unit = sc->tulip_dev.dv_unit; #define PCI_CONF_WRITE(r, v) pci_conf_write(pa->pa_pc, pa->pa_tag, (r), (v)) #define PCI_CONF_READ(r) pci_conf_read(pa->pa_pc, pa->pa_tag, (r)) #define PCI_GETBUSDEVINFO(sc) do { \ (sc)->tulip_pci_busno = parent; \ (sc)->tulip_pci_devno = pa->pa_device; \ } while (0) #endif /* __NetBSD__ */ #if defined(__alpha__) tulip_media_t media = TULIP_MEDIA_UNKNOWN; #endif int retval, idx; u_int32_t revinfo, cfdainfo, id; #if !defined(TULIP_IOMAPPED) && defined(__FreeBSD__) vm_offset_t pa_csrs; #endif unsigned csroffset = TULIP_PCI_CSROFFSET; unsigned csrsize = TULIP_PCI_CSRSIZE; tulip_csrptr_t csr_base; tulip_chipid_t chipid = TULIP_CHIPID_UNKNOWN; if (unit >= TULIP_MAX_DEVICES) { #ifdef __FreeBSD__ printf("de%d", unit); #endif printf(": not configured; limit of %d reached or exceeded\n", TULIP_MAX_DEVICES); return; } #if defined(__bsdi__) if (pa != NULL) { revinfo = pci_inl(pa, PCI_CFRV) & 0xFF; id = pci_inl(pa, PCI_CFID); cfdainfo = pci_inl(pa, PCI_CFDA); #if defined(TULIP_EISA) } else { revinfo = inl(ia->ia_iobase + DE425_CFRV) & 0xFF; csroffset = TULIP_EISA_CSROFFSET; csrsize = TULIP_EISA_CSRSIZE; chipid = TULIP_DE425; cfdainfo = 0; #endif /* TULIP_EISA */ } #else /* __bsdi__ */ revinfo = PCI_CONF_READ(PCI_CFRV) & 0xFF; id = PCI_CONF_READ(PCI_CFID); cfdainfo = PCI_CONF_READ(PCI_CFDA); #endif /* __bsdi__ */ if (PCI_VENDORID(id) == DEC_VENDORID) { if (PCI_CHIPID(id) == CHIPID_21040) chipid = TULIP_21040; else if (PCI_CHIPID(id) == CHIPID_21140) { chipid = (revinfo >= 0x20) ? TULIP_21140A : TULIP_21140; } else if (PCI_CHIPID(id) == CHIPID_21142) { chipid = (revinfo >= 0x20) ? TULIP_21143 : TULIP_21142; } else if (PCI_CHIPID(id) == CHIPID_21041) chipid = TULIP_21041; else if (PCI_CHIPID(id) == CHIPID_21142) chipid = TULIP_21142; } if (chipid == TULIP_CHIPID_UNKNOWN) return; if ((chipid == TULIP_21040 || chipid == TULIP_DE425) && revinfo < 0x20) { #ifdef __FreeBSD__ printf("de%d", unit); #endif printf(": not configured; 21040 pass 2.0 required (%d.%d found)\n", revinfo >> 4, revinfo & 0x0f); return; } else if (chipid == TULIP_21140 && revinfo < 0x11) { #ifndef __FreeBSD__ printf("\n"); #endif printf("de%d: not configured; 21140 pass 1.1 required (%d.%d found)\n", unit, revinfo >> 4, revinfo & 0x0f); return; } #if defined(__FreeBSD__) sc = (tulip_softc_t *) malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT); if (sc == NULL) return; bzero(sc, sizeof(*sc)); /* Zero out the softc*/ sc->tulip_rxdescs = (tulip_desc_t *) malloc(sizeof(tulip_desc_t) * TULIP_RXDESCS, M_DEVBUF, M_NOWAIT); sc->tulip_txdescs = (tulip_desc_t *) malloc(sizeof(tulip_desc_t) * TULIP_TXDESCS, M_DEVBUF, M_NOWAIT); if (sc->tulip_rxdescs == NULL || sc->tulip_txdescs == NULL) { if (sc->tulip_rxdescs) free((caddr_t) sc->tulip_rxdescs, M_DEVBUF); if (sc->tulip_txdescs) free((caddr_t) sc->tulip_txdescs, M_DEVBUF); free((caddr_t) sc, M_DEVBUF); return; } #endif PCI_GETBUSDEVINFO(sc); sc->tulip_chipid = chipid; sc->tulip_flags |= TULIP_DEVICEPROBE; if (chipid == TULIP_21140 || chipid == TULIP_21140A) sc->tulip_features |= TULIP_HAVE_GPR|TULIP_HAVE_STOREFWD; if (chipid == TULIP_21140A && revinfo <= 0x22) sc->tulip_features |= TULIP_HAVE_RXBADOVRFLW; if (chipid == TULIP_21140) sc->tulip_features |= TULIP_HAVE_BROKEN_HASH; if (chipid != TULIP_21040 && chipid != TULIP_DE425 && chipid != TULIP_21140) sc->tulip_features |= TULIP_HAVE_POWERMGMT; if (chipid == TULIP_21041 || chipid == TULIP_21142 || chipid == TULIP_21143) { sc->tulip_features |= TULIP_HAVE_DUALSENSE; if (chipid != TULIP_21041 || sc->tulip_revinfo >= 0x20) sc->tulip_features |= TULIP_HAVE_SIANWAY; if (chipid != TULIP_21041) sc->tulip_features |= TULIP_HAVE_SIAGP|TULIP_HAVE_RXBADOVRFLW|TULIP_HAVE_STOREFWD; if (chipid != TULIP_21041 && sc->tulip_revinfo >= 0x20) sc->tulip_features |= TULIP_HAVE_SIA100; } if (sc->tulip_features & TULIP_HAVE_POWERMGMT && (cfdainfo & (TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE))) { cfdainfo &= ~(TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE); PCI_CONF_WRITE(PCI_CFDA, cfdainfo); DELAY(11*1000); } #if defined(__alpha__) && defined(__NetBSD__) /* * The Alpha SRM console encodes a console set media in the driver * part of the CFDA register. Note that the Multia presents a * problem in that its BNC mode is really EXTSIA. So in that case * force a probe. */ switch ((cfdainfo >> 8) & 0xff) { case 1: media = chipid > TULIP_DE425 ? TULIP_MEDIA_AUI : TULIP_MEDIA_AUIBNC; break; case 2: media = chipid > TULIP_DE425 ? TULIP_MEDIA_BNC : TULIP_MEDIA_UNKNOWN; break; case 3: media = TULIP_MEDIA_10BASET; break; case 4: media = TULIP_MEDIA_10BASET_FD; break; case 5: media = TULIP_MEDIA_100BASETX; break; case 6: media = TULIP_MEDIA_100BASETX_FD; break; } #endif #if defined(__NetBSD__) bcopy(self->dv_xname, sc->tulip_if.if_xname, IFNAMSIZ); sc->tulip_if.if_softc = sc; sc->tulip_pc = pa->pa_pc; #else sc->tulip_unit = unit; sc->tulip_name = "de"; #endif sc->tulip_revinfo = revinfo; #if defined(__FreeBSD__) #if BSD >= 199506 sc->tulip_if.if_softc = sc; #endif #if defined(TULIP_IOMAPPED) retval = pci_map_port(config_id, PCI_CBIO, &csr_base); #else retval = pci_map_mem(config_id, PCI_CBMA, (vm_offset_t *) &csr_base, &pa_csrs); #endif if (!retval) { free((caddr_t) sc->tulip_rxdescs, M_DEVBUF); free((caddr_t) sc->tulip_txdescs, M_DEVBUF); free((caddr_t) sc, M_DEVBUF); return; } tulips[unit] = sc; #endif /* __FreeBSD__ */ #if defined(__bsdi__) sc->tulip_pf = printf; #if defined(TULIP_IOMAPPED) csr_base = ia->ia_iobase; #else csr_base = (vm_offset_t) mapphys((vm_offset_t) ia->ia_maddr, ia->ia_msize); #endif #endif /* __bsdi__ */ #if defined(__NetBSD__) csr_base = 0; { bus_space_tag_t iot, memt; bus_space_handle_t ioh, memh; int ioh_valid, memh_valid; ioh_valid = (pci_mapreg_map(pa, PCI_CBIO, PCI_MAPREG_TYPE_IO, 0, &iot, &ioh, NULL, NULL) == 0); memh_valid = (pci_mapreg_map(pa, PCI_CBMA, PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT, 0, &memt, &memh, NULL, NULL) == 0); if (memh_valid) { sc->tulip_bustag = memt; sc->tulip_bushandle = memh; } else if (ioh_valid) { sc->tulip_bustag = iot; sc->tulip_bushandle = ioh; } else { printf(": unable to map device registers\n"); return; } } #endif /* __NetBSD__ */ tulip_initcsrs(sc, csr_base + csroffset, csrsize); tulip_initring(sc, &sc->tulip_rxinfo, sc->tulip_rxdescs, TULIP_RXDESCS); tulip_initring(sc, &sc->tulip_txinfo, sc->tulip_txdescs, TULIP_TXDESCS); /* * Make sure there won't be any interrupts or such... */ TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(100); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ if ((retval = tulip_read_macaddr(sc)) < 0) { #if defined(__FreeBSD__) printf(TULIP_PRINTF_FMT, TULIP_PRINTF_ARGS); #endif printf(": can't read ENET ROM (why=%d) (", retval); for (idx = 0; idx < 32; idx++) printf("%02x", sc->tulip_rombuf[idx]); printf("\n"); printf(TULIP_PRINTF_FMT ": %s%s pass %d.%d\n", TULIP_PRINTF_ARGS, sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid], (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F); printf(TULIP_PRINTF_FMT ": address unknown\n", TULIP_PRINTF_ARGS); } else { tulip_spl_t s; tulip_intrfunc_t (*intr_rtn)(void *) = tulip_intr_normal; if (sc->tulip_features & TULIP_HAVE_SHAREDINTR) intr_rtn = tulip_intr_shared; #if defined(__NetBSD__) if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) { pci_intr_handle_t intrhandle; const char *intrstr; if (pci_intr_map(pa->pa_pc, pa->pa_intrtag, pa->pa_intrpin, pa->pa_intrline, &intrhandle)) { printf(": couldn't map interrupt\n"); return; } intrstr = pci_intr_string(pa->pa_pc, intrhandle); sc->tulip_ih = pci_intr_establish(pa->pa_pc, intrhandle, IPL_NET, intr_rtn, sc); if (sc->tulip_ih == NULL) printf(": couldn't establish interrupt"); if (intrstr != NULL) printf(" at %s", intrstr); printf("\n"); if (sc->tulip_ih == NULL) return; } sc->tulip_ats = shutdownhook_establish(tulip_shutdown, sc); if (sc->tulip_ats == NULL) printf("\n%s: warning: couldn't establish shutdown hook\n", sc->tulip_xname); #endif #if defined(__FreeBSD__) if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) { if (!pci_map_int (config_id, intr_rtn, (void*) sc, &net_imask)) { printf(TULIP_PRINTF_FMT ": couldn't map interrupt\n", TULIP_PRINTF_ARGS); return; } } #if !defined(TULIP_DEVCONF) at_shutdown(tulip_shutdown, sc, SHUTDOWN_POST_SYNC); #endif #endif #if defined(__bsdi__) if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) { isa_establish(&sc->tulip_id, &sc->tulip_dev); sc->tulip_ih.ih_fun = intr_rtn; sc->tulip_ih.ih_arg = (void *) sc; intr_establish(ia->ia_irq, &sc->tulip_ih, DV_NET); } sc->tulip_ats.func = tulip_shutdown; sc->tulip_ats.arg = (void *) sc; atshutdown(&sc->tulip_ats, ATSH_ADD); #endif #if defined(TULIP_USE_SOFTINTR) if (sc->tulip_unit > tulip_softintr_max_unit) tulip_softintr_max_unit = sc->tulip_unit; #endif s = TULIP_RAISESPL(); tulip_reset(sc); tulip_attach(sc); #if defined(__alpha__) && defined(__NetBSD__) if (media != TULIP_MEDIA_UNKNOWN) tulip_linkup(sc, media); #endif TULIP_RESTORESPL(s); } } Index: head/sys/sys/bio.h =================================================================== --- head/sys/sys/bio.h (revision 34265) +++ head/sys/sys/bio.h (revision 34266) @@ -1,309 +1,329 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)buf.h 8.9 (Berkeley) 3/30/95 - * $Id: buf.h,v 1.45 1998/01/22 17:30:10 dyson Exp $ + * $Id: buf.h,v 1.46 1998/03/07 21:36:20 dyson Exp $ */ #ifndef _SYS_BUF_H_ #define _SYS_BUF_H_ #include #define NOLIST ((struct buf *)0x87654321) struct buf; +struct mount; +/* + * To avoid including + */ +LIST_HEAD(workhead, worklist); +/* + * These are currently used only by the soft dependency code, hence + * are stored once in a global variable. If other subsystems wanted + * to use these hooks, a pointer to a set of bio_ops could be added + * to each buffer. + */ +extern struct bio_ops { + void (*io_start) __P((struct buf *)); + void (*io_complete) __P((struct buf *)); + void (*io_deallocate) __P((struct buf *)); + int (*io_sync) __P((struct mount *)); +} bioops; + struct iodone_chain { long ic_prev_flags; void (*ic_prev_iodone) __P((struct buf *)); void *ic_prev_iodone_chain; struct { long ia_long; void *ia_ptr; } ic_args[5]; }; /* * The buffer header describes an I/O operation in the kernel. */ struct buf { LIST_ENTRY(buf) b_hash; /* Hash chain. */ LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ TAILQ_ENTRY(buf) b_act; /* Device driver queue when active. *new* */ struct proc *b_proc; /* Associated proc; NULL if kernel. */ long b_flags; /* B_* flags. */ unsigned short b_qindex; /* buffer queue index */ unsigned char b_usecount; /* buffer use count */ int b_error; /* Errno value. */ long b_bufsize; /* Allocated buffer size. */ long b_bcount; /* Valid bytes in buffer. */ long b_resid; /* Remaining I/O. */ dev_t b_dev; /* Device associated with buffer. */ caddr_t b_data; /* Memory, superblocks, indirect etc. */ caddr_t b_kvabase; /* base kva for buffer */ int b_kvasize; /* size of kva for buffer */ daddr_t b_lblkno; /* Logical block number. */ daddr_t b_blkno; /* Underlying physical block number. */ /* Function to call upon completion. */ void (*b_iodone) __P((struct buf *)); /* For nested b_iodone's. */ struct iodone_chain *b_iodone_chain; struct vnode *b_vp; /* Device vnode. */ int b_dirtyoff; /* Offset in buffer of dirty region. */ int b_dirtyend; /* Offset of end of dirty region. */ int b_generation; /* Generation count of buffer */ struct ucred *b_rcred; /* Read credentials reference. */ struct ucred *b_wcred; /* Write credentials reference. */ int b_validoff; /* Offset in buffer of valid region. */ int b_validend; /* Offset of end of valid region. */ daddr_t b_pblkno; /* physical block number */ void *b_saveaddr; /* Original b_addr for physio. */ caddr_t b_savekva; /* saved kva for transfer while bouncing */ void *b_driver1; /* for private use by the driver */ void *b_driver2; /* for private use by the driver */ void *b_spc; union cluster_info { TAILQ_HEAD(cluster_list_head, buf) cluster_head; TAILQ_ENTRY(buf) cluster_entry; } b_cluster; struct vm_page *b_pages[btoc(MAXPHYS)]; int b_npages; + struct workhead b_dep; /* List of filesystem dependencies. */ }; /* * These flags are kept in b_flags. */ #define B_AGE 0x00000001 /* Move to age queue when I/O done. */ #define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */ #define B_ASYNC 0x00000004 /* Start I/O, do not wait. */ #define B_BAD 0x00000008 /* Bad block revectoring in progress. */ #define B_BUSY 0x00000010 /* I/O in progress. */ #define B_CACHE 0x00000020 /* Bread found us in the cache. */ #define B_CALL 0x00000040 /* Call b_iodone from biodone. */ #define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ #define B_DIRTY 0x00000100 /* Dirty page to be pushed out async. */ #define B_DONE 0x00000200 /* I/O completed. */ #define B_EINTR 0x00000400 /* I/O was interrupted */ #define B_ERROR 0x00000800 /* I/O error occurred. */ #define B_GATHERED 0x00001000 /* LFS: already in a segment. */ #define B_INVAL 0x00002000 /* Does not contain valid info. */ #define B_LOCKED 0x00004000 /* Locked in core (not reusable). */ #define B_NOCACHE 0x00008000 /* Do not cache block after use. */ #define B_MALLOC 0x00010000 /* malloced b_data */ #define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */ #define B_PHYS 0x00040000 /* I/O to user memory. */ #define B_RAW 0x00080000 /* Set by physio for raw transfers. */ #define B_READ 0x00100000 /* Read buffer. */ #define B_TAPE 0x00200000 /* Magnetic tape I/O. */ #define B_RELBUF 0x00400000 /* Release VMIO buffer. */ #define B_WANTED 0x00800000 /* Process wants this buffer. */ #define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */ #define B_WRITEINPROG 0x01000000 /* Write in progress. */ #define B_XXX 0x02000000 /* Debugging flag. */ #define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */ #define B_ORDERED 0x08000000 /* Must guarantee I/O ordering */ #define B_RAM 0x10000000 /* Read ahead mark (flag) */ #define B_VMIO 0x20000000 /* VMIO flag */ #define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */ #define B_BOUNCE 0x80000000 /* bounce buffer flag */ typedef struct buf_queue_head { TAILQ_HEAD(, buf) queue; struct buf *insert_point; struct buf *switch_point; } buf_queue_head, *buf_queue_head_t; static __inline void bufq_init __P((buf_queue_head *head)); static __inline void bufq_insert_tail __P((buf_queue_head *head, struct buf *bp)); static __inline void bufq_remove __P((buf_queue_head *head, struct buf *bp)); static __inline struct buf *bufq_first __P((buf_queue_head *head)); static __inline void bufq_init(buf_queue_head *head) { TAILQ_INIT(&head->queue); head->insert_point = NULL; head->switch_point = NULL; } static __inline void bufq_insert_tail(buf_queue_head *head, struct buf *bp) { if ((bp->b_flags & B_ORDERED) != 0) { head->insert_point = bp; head->switch_point = NULL; } TAILQ_INSERT_TAIL(&head->queue, bp, b_act); } static __inline void bufq_remove(buf_queue_head *head, struct buf *bp) { if (bp == TAILQ_FIRST(&head->queue)) { if (bp == head->insert_point) head->insert_point = NULL; if (TAILQ_NEXT(bp, b_act) == head->switch_point) head->switch_point = NULL; } else { if (bp == head->insert_point) { /* * Not 100% correct (we really want the * previous bp), but it will ensure queue * ordering and is less expensive than * using a CIRCLEQ. */ head->insert_point = TAILQ_NEXT(bp, b_act); } if (bp == head->switch_point) { head->switch_point = TAILQ_NEXT(bp, b_act); } } TAILQ_REMOVE(&head->queue, bp, b_act); } static __inline struct buf * bufq_first(buf_queue_head *head) { return (TAILQ_FIRST(&head->queue)); } /* * number of buffer hash entries */ #define BUFHSZ 512 /* * buffer hash table calculation, originally by David Greenman */ #define BUFHASH(vnp, bn) \ (&bufhashtbl[(((unsigned long)(vnp) >> 7)+(int)(bn)) % BUFHSZ]) /* * Definitions for the buffer free lists. */ #define BUFFER_QUEUES 6 /* number of free buffer queues */ #define QUEUE_NONE 0 /* on no queue */ #define QUEUE_LOCKED 1 /* locked buffers */ #define QUEUE_LRU 2 /* useful buffers */ #define QUEUE_VMIO 3 /* VMIO buffers */ #define QUEUE_AGE 4 /* not-useful buffers */ #define QUEUE_EMPTY 5 /* empty buffer headers*/ /* * Zero out the buffer's data area. */ #define clrbuf(bp) { \ bzero((bp)->b_data, (u_int)(bp)->b_bcount); \ (bp)->b_resid = 0; \ } /* Flags to low-level allocation routines. */ #define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */ #define B_SYNC 0x02 /* Do all allocations synchronously. */ #ifdef KERNEL extern int nbuf; /* The number of buffer headers */ extern struct buf *buf; /* The buffer headers. */ extern char *buffers; /* The buffer contents. */ extern int bufpages; /* Number of memory pages in the buffer pool. */ extern struct buf *swbuf; /* Swap I/O buffer headers. */ extern int nswbuf; /* Number of swap I/O buffer headers. */ extern int needsbuffer, numdirtybuffers; extern TAILQ_HEAD(swqueue, buf) bswlist; extern TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES]; void bufinit __P((void)); void bremfree __P((struct buf *)); int bread __P((struct vnode *, daddr_t, int, struct ucred *, struct buf **)); int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int, struct ucred *, struct buf **)); int bwrite __P((struct buf *)); void bdwrite __P((struct buf *)); void bawrite __P((struct buf *)); +void bdirty __P((struct buf *)); int bowrite __P((struct buf *)); void brelse __P((struct buf *)); void bqrelse __P((struct buf *)); int vfs_bio_awrite __P((struct buf *)); struct buf * getpbuf __P((void)); struct buf *incore __P((struct vnode *, daddr_t)); struct buf *gbincore __P((struct vnode *, daddr_t)); int inmem __P((struct vnode *, daddr_t)); struct buf *getblk __P((struct vnode *, daddr_t, int, int, int)); struct buf *geteblk __P((int)); int allocbuf __P((struct buf *, int)); int biowait __P((struct buf *)); void biodone __P((struct buf *)); void cluster_callback __P((struct buf *)); int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long, struct ucred *, long, int, struct buf **)); int cluster_wbuild __P((struct vnode *, long, daddr_t, int)); void cluster_write __P((struct buf *, u_quad_t)); int physio __P((void (*)(struct buf *), struct buf *, dev_t, int, u_int (*)(struct buf *), struct uio *)); u_int minphys __P((struct buf *)); void vfs_bio_clrbuf __P((struct buf *)); void vfs_busy_pages __P((struct buf *, int clear_modify)); void vfs_unbusy_pages __P((struct buf *)); void vwakeup __P((struct buf *)); void vmapbuf __P((struct buf *)); void vunmapbuf __P((struct buf *)); void relpbuf __P((struct buf *)); void brelvp __P((struct buf *)); void bgetvp __P((struct vnode *, struct buf *)); void pbgetvp __P((struct vnode *, struct buf *)); void pbrelvp __P((struct buf *)); void reassignbuf __P((struct buf *, struct vnode *)); struct buf *trypbuf __P((void)); void vm_bounce_alloc __P((struct buf *)); void vm_bounce_free __P((struct buf *)); vm_offset_t vm_bounce_kva_alloc __P((int)); void vm_bounce_kva_alloc_free __P((vm_offset_t, int)); void vfs_bio_need_satisfy __P((void)); #endif /* KERNEL */ #endif /* !_SYS_BUF_H_ */ Index: head/sys/sys/buf.h =================================================================== --- head/sys/sys/buf.h (revision 34265) +++ head/sys/sys/buf.h (revision 34266) @@ -1,309 +1,329 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)buf.h 8.9 (Berkeley) 3/30/95 - * $Id: buf.h,v 1.45 1998/01/22 17:30:10 dyson Exp $ + * $Id: buf.h,v 1.46 1998/03/07 21:36:20 dyson Exp $ */ #ifndef _SYS_BUF_H_ #define _SYS_BUF_H_ #include #define NOLIST ((struct buf *)0x87654321) struct buf; +struct mount; +/* + * To avoid including + */ +LIST_HEAD(workhead, worklist); +/* + * These are currently used only by the soft dependency code, hence + * are stored once in a global variable. If other subsystems wanted + * to use these hooks, a pointer to a set of bio_ops could be added + * to each buffer. + */ +extern struct bio_ops { + void (*io_start) __P((struct buf *)); + void (*io_complete) __P((struct buf *)); + void (*io_deallocate) __P((struct buf *)); + int (*io_sync) __P((struct mount *)); +} bioops; + struct iodone_chain { long ic_prev_flags; void (*ic_prev_iodone) __P((struct buf *)); void *ic_prev_iodone_chain; struct { long ia_long; void *ia_ptr; } ic_args[5]; }; /* * The buffer header describes an I/O operation in the kernel. */ struct buf { LIST_ENTRY(buf) b_hash; /* Hash chain. */ LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ TAILQ_ENTRY(buf) b_act; /* Device driver queue when active. *new* */ struct proc *b_proc; /* Associated proc; NULL if kernel. */ long b_flags; /* B_* flags. */ unsigned short b_qindex; /* buffer queue index */ unsigned char b_usecount; /* buffer use count */ int b_error; /* Errno value. */ long b_bufsize; /* Allocated buffer size. */ long b_bcount; /* Valid bytes in buffer. */ long b_resid; /* Remaining I/O. */ dev_t b_dev; /* Device associated with buffer. */ caddr_t b_data; /* Memory, superblocks, indirect etc. */ caddr_t b_kvabase; /* base kva for buffer */ int b_kvasize; /* size of kva for buffer */ daddr_t b_lblkno; /* Logical block number. */ daddr_t b_blkno; /* Underlying physical block number. */ /* Function to call upon completion. */ void (*b_iodone) __P((struct buf *)); /* For nested b_iodone's. */ struct iodone_chain *b_iodone_chain; struct vnode *b_vp; /* Device vnode. */ int b_dirtyoff; /* Offset in buffer of dirty region. */ int b_dirtyend; /* Offset of end of dirty region. */ int b_generation; /* Generation count of buffer */ struct ucred *b_rcred; /* Read credentials reference. */ struct ucred *b_wcred; /* Write credentials reference. */ int b_validoff; /* Offset in buffer of valid region. */ int b_validend; /* Offset of end of valid region. */ daddr_t b_pblkno; /* physical block number */ void *b_saveaddr; /* Original b_addr for physio. */ caddr_t b_savekva; /* saved kva for transfer while bouncing */ void *b_driver1; /* for private use by the driver */ void *b_driver2; /* for private use by the driver */ void *b_spc; union cluster_info { TAILQ_HEAD(cluster_list_head, buf) cluster_head; TAILQ_ENTRY(buf) cluster_entry; } b_cluster; struct vm_page *b_pages[btoc(MAXPHYS)]; int b_npages; + struct workhead b_dep; /* List of filesystem dependencies. */ }; /* * These flags are kept in b_flags. */ #define B_AGE 0x00000001 /* Move to age queue when I/O done. */ #define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */ #define B_ASYNC 0x00000004 /* Start I/O, do not wait. */ #define B_BAD 0x00000008 /* Bad block revectoring in progress. */ #define B_BUSY 0x00000010 /* I/O in progress. */ #define B_CACHE 0x00000020 /* Bread found us in the cache. */ #define B_CALL 0x00000040 /* Call b_iodone from biodone. */ #define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ #define B_DIRTY 0x00000100 /* Dirty page to be pushed out async. */ #define B_DONE 0x00000200 /* I/O completed. */ #define B_EINTR 0x00000400 /* I/O was interrupted */ #define B_ERROR 0x00000800 /* I/O error occurred. */ #define B_GATHERED 0x00001000 /* LFS: already in a segment. */ #define B_INVAL 0x00002000 /* Does not contain valid info. */ #define B_LOCKED 0x00004000 /* Locked in core (not reusable). */ #define B_NOCACHE 0x00008000 /* Do not cache block after use. */ #define B_MALLOC 0x00010000 /* malloced b_data */ #define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */ #define B_PHYS 0x00040000 /* I/O to user memory. */ #define B_RAW 0x00080000 /* Set by physio for raw transfers. */ #define B_READ 0x00100000 /* Read buffer. */ #define B_TAPE 0x00200000 /* Magnetic tape I/O. */ #define B_RELBUF 0x00400000 /* Release VMIO buffer. */ #define B_WANTED 0x00800000 /* Process wants this buffer. */ #define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */ #define B_WRITEINPROG 0x01000000 /* Write in progress. */ #define B_XXX 0x02000000 /* Debugging flag. */ #define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */ #define B_ORDERED 0x08000000 /* Must guarantee I/O ordering */ #define B_RAM 0x10000000 /* Read ahead mark (flag) */ #define B_VMIO 0x20000000 /* VMIO flag */ #define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */ #define B_BOUNCE 0x80000000 /* bounce buffer flag */ typedef struct buf_queue_head { TAILQ_HEAD(, buf) queue; struct buf *insert_point; struct buf *switch_point; } buf_queue_head, *buf_queue_head_t; static __inline void bufq_init __P((buf_queue_head *head)); static __inline void bufq_insert_tail __P((buf_queue_head *head, struct buf *bp)); static __inline void bufq_remove __P((buf_queue_head *head, struct buf *bp)); static __inline struct buf *bufq_first __P((buf_queue_head *head)); static __inline void bufq_init(buf_queue_head *head) { TAILQ_INIT(&head->queue); head->insert_point = NULL; head->switch_point = NULL; } static __inline void bufq_insert_tail(buf_queue_head *head, struct buf *bp) { if ((bp->b_flags & B_ORDERED) != 0) { head->insert_point = bp; head->switch_point = NULL; } TAILQ_INSERT_TAIL(&head->queue, bp, b_act); } static __inline void bufq_remove(buf_queue_head *head, struct buf *bp) { if (bp == TAILQ_FIRST(&head->queue)) { if (bp == head->insert_point) head->insert_point = NULL; if (TAILQ_NEXT(bp, b_act) == head->switch_point) head->switch_point = NULL; } else { if (bp == head->insert_point) { /* * Not 100% correct (we really want the * previous bp), but it will ensure queue * ordering and is less expensive than * using a CIRCLEQ. */ head->insert_point = TAILQ_NEXT(bp, b_act); } if (bp == head->switch_point) { head->switch_point = TAILQ_NEXT(bp, b_act); } } TAILQ_REMOVE(&head->queue, bp, b_act); } static __inline struct buf * bufq_first(buf_queue_head *head) { return (TAILQ_FIRST(&head->queue)); } /* * number of buffer hash entries */ #define BUFHSZ 512 /* * buffer hash table calculation, originally by David Greenman */ #define BUFHASH(vnp, bn) \ (&bufhashtbl[(((unsigned long)(vnp) >> 7)+(int)(bn)) % BUFHSZ]) /* * Definitions for the buffer free lists. */ #define BUFFER_QUEUES 6 /* number of free buffer queues */ #define QUEUE_NONE 0 /* on no queue */ #define QUEUE_LOCKED 1 /* locked buffers */ #define QUEUE_LRU 2 /* useful buffers */ #define QUEUE_VMIO 3 /* VMIO buffers */ #define QUEUE_AGE 4 /* not-useful buffers */ #define QUEUE_EMPTY 5 /* empty buffer headers*/ /* * Zero out the buffer's data area. */ #define clrbuf(bp) { \ bzero((bp)->b_data, (u_int)(bp)->b_bcount); \ (bp)->b_resid = 0; \ } /* Flags to low-level allocation routines. */ #define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */ #define B_SYNC 0x02 /* Do all allocations synchronously. */ #ifdef KERNEL extern int nbuf; /* The number of buffer headers */ extern struct buf *buf; /* The buffer headers. */ extern char *buffers; /* The buffer contents. */ extern int bufpages; /* Number of memory pages in the buffer pool. */ extern struct buf *swbuf; /* Swap I/O buffer headers. */ extern int nswbuf; /* Number of swap I/O buffer headers. */ extern int needsbuffer, numdirtybuffers; extern TAILQ_HEAD(swqueue, buf) bswlist; extern TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES]; void bufinit __P((void)); void bremfree __P((struct buf *)); int bread __P((struct vnode *, daddr_t, int, struct ucred *, struct buf **)); int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int, struct ucred *, struct buf **)); int bwrite __P((struct buf *)); void bdwrite __P((struct buf *)); void bawrite __P((struct buf *)); +void bdirty __P((struct buf *)); int bowrite __P((struct buf *)); void brelse __P((struct buf *)); void bqrelse __P((struct buf *)); int vfs_bio_awrite __P((struct buf *)); struct buf * getpbuf __P((void)); struct buf *incore __P((struct vnode *, daddr_t)); struct buf *gbincore __P((struct vnode *, daddr_t)); int inmem __P((struct vnode *, daddr_t)); struct buf *getblk __P((struct vnode *, daddr_t, int, int, int)); struct buf *geteblk __P((int)); int allocbuf __P((struct buf *, int)); int biowait __P((struct buf *)); void biodone __P((struct buf *)); void cluster_callback __P((struct buf *)); int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long, struct ucred *, long, int, struct buf **)); int cluster_wbuild __P((struct vnode *, long, daddr_t, int)); void cluster_write __P((struct buf *, u_quad_t)); int physio __P((void (*)(struct buf *), struct buf *, dev_t, int, u_int (*)(struct buf *), struct uio *)); u_int minphys __P((struct buf *)); void vfs_bio_clrbuf __P((struct buf *)); void vfs_busy_pages __P((struct buf *, int clear_modify)); void vfs_unbusy_pages __P((struct buf *)); void vwakeup __P((struct buf *)); void vmapbuf __P((struct buf *)); void vunmapbuf __P((struct buf *)); void relpbuf __P((struct buf *)); void brelvp __P((struct buf *)); void bgetvp __P((struct vnode *, struct buf *)); void pbgetvp __P((struct vnode *, struct buf *)); void pbrelvp __P((struct buf *)); void reassignbuf __P((struct buf *, struct vnode *)); struct buf *trypbuf __P((void)); void vm_bounce_alloc __P((struct buf *)); void vm_bounce_free __P((struct buf *)); vm_offset_t vm_bounce_kva_alloc __P((int)); void vm_bounce_kva_alloc_free __P((vm_offset_t, int)); void vfs_bio_need_satisfy __P((void)); #endif /* KERNEL */ #endif /* !_SYS_BUF_H_ */ Index: head/sys/sys/malloc.h =================================================================== --- head/sys/sys/malloc.h (revision 34265) +++ head/sys/sys/malloc.h (revision 34266) @@ -1,216 +1,218 @@ /* * Copyright (c) 1987, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)malloc.h 8.5 (Berkeley) 5/3/95 - * $Id: malloc.h,v 1.35 1997/12/05 19:14:36 bde Exp $ + * $Id: malloc.h,v 1.36 1997/12/27 09:42:03 bde Exp $ */ #ifndef _SYS_MALLOC_H_ #define _SYS_MALLOC_H_ +#define splmem splhigh + #define KMEMSTATS /* * flags to malloc */ #define M_WAITOK 0x0000 #define M_NOWAIT 0x0001 #define M_KERNEL 0x0002 #define M_MAGIC 877983977 /* time when first defined :-) */ struct malloc_type { struct malloc_type *ks_next; /* next in list */ long ks_memuse; /* total memory held in bytes */ long ks_limit; /* most that are allowed to exist */ long ks_size; /* sizes of this thing that are allocated */ long ks_inuse; /* # of packets of this type currently in use */ long ks_calls; /* total packets of this type ever allocated */ long ks_maxused; /* maximum number ever used */ u_long ks_magic; /* if it's not magic, don't touch it */ const char *ks_shortdesc; /* short description */ u_short ks_limblocks; /* number of times blocked for hitting limit */ u_short ks_mapblocks; /* number of times blocked for kernel map */ }; #define MALLOC_DEFINE(type, shortdesc, longdesc) \ struct malloc_type type[1] = { \ { NULL, 0, 0, 0, 0, 0, 0, M_MAGIC, shortdesc, 0, 0 } \ }; \ struct __hack #define MALLOC_DECLARE(type) \ extern struct malloc_type type[1]; \ struct __hack #ifdef MALLOC_INSTANTIATE #define MALLOC_MAKE_TYPE(type, shortdesc, longdesc) \ MALLOC_DEFINE(type, shortdesc, longdesc) #else #define MALLOC_MAKE_TYPE(type, shortdesc, longdesc) \ MALLOC_DECLARE(type) #endif MALLOC_MAKE_TYPE(M_CACHE, "namecache", "Dynamically allocated cache entries"); MALLOC_MAKE_TYPE(M_DEVBUF, "devbuf", "device driver memory"); MALLOC_MAKE_TYPE(M_TEMP, "temp", "misc temporary data buffers"); /* * Array of descriptors that describe the contents of each page */ struct kmemusage { short ku_indx; /* bucket index */ union { u_short freecnt;/* for small allocations, free pieces in page */ u_short pagecnt;/* for large allocations, pages alloced */ } ku_un; }; #define ku_freecnt ku_un.freecnt #define ku_pagecnt ku_un.pagecnt /* * Set of buckets for each size of memory block that is retained */ struct kmembuckets { caddr_t kb_next; /* list of free blocks */ caddr_t kb_last; /* last free block */ long kb_total; /* total number of blocks allocated */ long kb_elmpercl; /* # of elements in this sized allocation */ long kb_totalfree; /* # of free elements in this bucket */ long kb_calls; /* total calls to allocate this size */ long kb_highwat; /* high water mark */ long kb_couldfree; /* over high water mark and could free */ }; #ifdef KERNEL #define MINALLOCSIZE (1 << MINBUCKET) #define BUCKETINDX(size) \ ((size) <= (MINALLOCSIZE * 128) \ ? (size) <= (MINALLOCSIZE * 8) \ ? (size) <= (MINALLOCSIZE * 2) \ ? (size) <= (MINALLOCSIZE * 1) \ ? (MINBUCKET + 0) \ : (MINBUCKET + 1) \ : (size) <= (MINALLOCSIZE * 4) \ ? (MINBUCKET + 2) \ : (MINBUCKET + 3) \ : (size) <= (MINALLOCSIZE* 32) \ ? (size) <= (MINALLOCSIZE * 16) \ ? (MINBUCKET + 4) \ : (MINBUCKET + 5) \ : (size) <= (MINALLOCSIZE * 64) \ ? (MINBUCKET + 6) \ : (MINBUCKET + 7) \ : (size) <= (MINALLOCSIZE * 2048) \ ? (size) <= (MINALLOCSIZE * 512) \ ? (size) <= (MINALLOCSIZE * 256) \ ? (MINBUCKET + 8) \ : (MINBUCKET + 9) \ : (size) <= (MINALLOCSIZE * 1024) \ ? (MINBUCKET + 10) \ : (MINBUCKET + 11) \ : (size) <= (MINALLOCSIZE * 8192) \ ? (size) <= (MINALLOCSIZE * 4096) \ ? (MINBUCKET + 12) \ : (MINBUCKET + 13) \ : (size) <= (MINALLOCSIZE * 16384) \ ? (MINBUCKET + 14) \ : (MINBUCKET + 15)) /* * Turn virtual addresses into kmem map indices */ #define kmemxtob(alloc) (kmembase + (alloc) * PAGE_SIZE) #define btokmemx(addr) (((caddr_t)(addr) - kmembase) / PAGE_SIZE) #define btokup(addr) (&kmemusage[(caddr_t)(addr) - kmembase >> PAGE_SHIFT]) /* * Macro versions for the usual cases of malloc/free */ #if defined(KMEMSTATS) || defined(DIAGNOSTIC) #define MALLOC(space, cast, size, type, flags) \ (space) = (cast)malloc((u_long)(size), type, flags) #define FREE(addr, type) free((addr), type) #else /* do not collect statistics */ #define MALLOC(space, cast, size, type, flags) do { \ register struct kmembuckets *kbp = &bucket[BUCKETINDX(size)]; \ - long s = splimp(); \ + long s = splmem(); \ if (kbp->kb_next == NULL) { \ (space) = (cast)malloc((u_long)(size), type, flags); \ } else { \ (space) = (cast)kbp->kb_next; \ kbp->kb_next = *(caddr_t *)(space); \ } \ splx(s); \ } while (0) #define FREE(addr, type) do { \ register struct kmembuckets *kbp; \ register struct kmemusage *kup = btokup(addr); \ - long s = splimp(); \ + long s = splmem(); \ if (1 << kup->ku_indx > MAXALLOCSAVE) { \ free((addr), type); \ } else { \ kbp = &bucket[kup->ku_indx]; \ if (kbp->kb_next == NULL) \ kbp->kb_next = (caddr_t)(addr); \ else \ *(caddr_t *)(kbp->kb_last) = (caddr_t)(addr); \ *(caddr_t *)(addr) = NULL; \ kbp->kb_last = (caddr_t)(addr); \ } \ splx(s); \ } while (0) extern struct kmemusage *kmemusage; extern char *kmembase; extern struct kmembuckets bucket[]; #endif /* do not collect statistics */ /* * XXX this should be declared in , but that tends to fail * because is included in a header before the source file * has a chance to include to get MALLOC_DECLARE() defined. */ MALLOC_DECLARE(M_IOV); void *contigmalloc __P((unsigned long size, struct malloc_type *type, int flags, unsigned long low, unsigned long high, unsigned long alignment, unsigned long boundary)); void free __P((void *addr, struct malloc_type *type)); void *malloc __P((unsigned long size, struct malloc_type *type, int flags)); #endif /* KERNEL */ #endif /* !_SYS_MALLOC_H_ */ Index: head/sys/sys/mount.h =================================================================== --- head/sys/sys/mount.h (revision 34265) +++ head/sys/sys/mount.h (revision 34266) @@ -1,488 +1,493 @@ /* * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mount.h 8.21 (Berkeley) 5/20/95 - * $Id: mount.h,v 1.56 1998/02/22 01:17:51 jkh Exp $ + * $Id: mount.h,v 1.57 1998/03/01 22:46:36 msmith Exp $ */ #ifndef _SYS_MOUNT_H_ #define _SYS_MOUNT_H_ #ifndef KERNEL #include #endif #include #include #include #include /* XXX for AF_MAX */ typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ /* * File identifier. * These are unique per filesystem on a single machine. */ #define MAXFIDSZ 16 struct fid { u_short fid_len; /* length of data in bytes */ u_short fid_reserved; /* force longword alignment */ char fid_data[MAXFIDSZ]; /* data (variable length) */ }; /* * file system statistics */ #define MFSNAMELEN 16 /* length of fs type name, including null */ #define MNAMELEN 90 /* length of buffer for returned name */ struct statfs { long f_spare2; /* placeholder */ long f_bsize; /* fundamental file system block size */ long f_iosize; /* optimal transfer block size */ long f_blocks; /* total data blocks in file system */ long f_bfree; /* free blocks in fs */ long f_bavail; /* free blocks avail to non-superuser */ long f_files; /* total file nodes in file system */ long f_ffree; /* free file nodes in fs */ fsid_t f_fsid; /* file system id */ uid_t f_owner; /* user that mounted the filesystem */ int f_type; /* type of filesystem (see below) */ int f_flags; /* copy of mount exported flags */ - long f_spare[2]; /* spare for later */ + long f_syncwrites; /* count of sync writes since mount */ + long f_asyncwrites; /* count of async writes since mount */ char f_fstypename[MFSNAMELEN]; /* fs type name */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ char f_mntfromname[MNAMELEN];/* mounted filesystem */ }; /* * File system types (for backwards compat with 4.4Lite.) */ #define MOUNT_NONE 0 #define MOUNT_UFS 1 /* Fast Filesystem */ #define MOUNT_NFS 2 /* Sun-compatible Network Filesystem */ #define MOUNT_MFS 3 /* Memory-based Filesystem */ #define MOUNT_MSDOS 4 /* MS/DOS Filesystem */ #define MOUNT_LFS 5 /* Log-based Filesystem */ #define MOUNT_LOFS 6 /* Loopback Filesystem */ #define MOUNT_FDESC 7 /* File Descriptor Filesystem */ #define MOUNT_PORTAL 8 /* Portal Filesystem */ #define MOUNT_NULL 9 /* Minimal Filesystem Layer */ #define MOUNT_UMAP 10 /* User/Group Identifier Remapping Filesystem */ #define MOUNT_KERNFS 11 /* Kernel Information Filesystem */ #define MOUNT_PROCFS 12 /* /proc Filesystem */ #define MOUNT_AFS 13 /* Andrew Filesystem */ #define MOUNT_CD9660 14 /* ISO9660 (aka CDROM) Filesystem */ #define MOUNT_UNION 15 /* Union (translucent) Filesystem */ #define MOUNT_DEVFS 16 /* existing device Filesystem */ #define MOUNT_EXT2FS 17 /* Linux EXT2FS */ #define MOUNT_TFS 18 /* Netcon Novell filesystem */ #define MOUNT_CFS 19 /* Coda filesystem */ #define MOUNT_MAXTYPE 19 #define INITMOUNTNAMES { \ "none", /* 0 MOUNT_NONE */ \ "ufs", /* 1 MOUNT_UFS */ \ "nfs", /* 2 MOUNT_NFS */ \ "mfs", /* 3 MOUNT_MFS */ \ "msdos", /* 4 MOUNT_MSDOS */ \ "lfs", /* 5 MOUNT_LFS */ \ "lofs", /* 6 MOUNT_LOFS */ \ "fdesc", /* 7 MOUNT_FDESC */ \ "portal", /* 8 MOUNT_PORTAL */ \ "null", /* 9 MOUNT_NULL */ \ "umap", /* 10 MOUNT_UMAP */ \ "kernfs", /* 11 MOUNT_KERNFS */ \ "procfs", /* 12 MOUNT_PROCFS */ \ "afs", /* 13 MOUNT_AFS */ \ "cd9660", /* 14 MOUNT_CD9660 */ \ "union", /* 15 MOUNT_UNION */ \ "devfs", /* 16 MOUNT_DEVFS */ \ "ext2fs", /* 17 MOUNT_EXT2FS */ \ "tfs", /* 18 MOUNT_TFS */ \ "cfs", /* 19 MOUNT_CFS */ \ 0, /* 20 MOUNT_SPARE */ \ } /* * Structure per mounted file system. Each mounted file system has an * array of operations and an instance record. The file systems are * put on a doubly linked list. */ LIST_HEAD(vnodelst, vnode); struct mount { CIRCLEQ_ENTRY(mount) mnt_list; /* mount list */ struct vfsops *mnt_op; /* operations on fs */ struct vfsconf *mnt_vfc; /* configuration info */ struct vnode *mnt_vnodecovered; /* vnode we mounted on */ + struct vnode *mnt_syncer; /* syncer vnode */ struct vnodelst mnt_vnodelist; /* list of vnodes this mount */ struct lock mnt_lock; /* mount structure lock */ int mnt_flag; /* flags shared with user */ int mnt_kern_flag; /* kernel only flags */ int mnt_maxsymlinklen; /* max size of short symlink */ struct statfs mnt_stat; /* cache of filesystem stats */ qaddr_t mnt_data; /* private data */ time_t mnt_time; /* last time written*/ }; /* * User specifiable flags. */ #define MNT_RDONLY 0x00000001 /* read only filesystem */ #define MNT_SYNCHRONOUS 0x00000002 /* file system written synchronously */ #define MNT_NOEXEC 0x00000004 /* can't exec from filesystem */ #define MNT_NOSUID 0x00000008 /* don't honor setuid bits on fs */ #define MNT_NODEV 0x00000010 /* don't interpret special files */ #define MNT_UNION 0x00000020 /* union with underlying filesystem */ #define MNT_ASYNC 0x00000040 /* file system written asynchronously */ #define MNT_SUIDDIR 0x00100000 /* special handling of SUID on dirs */ +#define MNT_SOFTDEP 0x00200000 /* soft updates being done */ #define MNT_NOATIME 0x10000000 /* disable update of file access time */ #define MNT_NOCLUSTERR 0x40000000 /* disable cluster read */ #define MNT_NOCLUSTERW 0x80000000 /* disable cluster write */ /* * NFS export related mount flags. */ #define MNT_EXRDONLY 0x00000080 /* exported read only */ #define MNT_EXPORTED 0x00000100 /* file system is exported */ #define MNT_DEFEXPORTED 0x00000200 /* exported to the world */ #define MNT_EXPORTANON 0x00000400 /* use anon uid mapping for everyone */ #define MNT_EXKERB 0x00000800 /* exported with Kerberos uid mapping */ #define MNT_EXPUBLIC 0x20000000 /* public export (WebNFS) */ /* * Flags set by internal operations, * but visible to the user. * XXX some of these are not quite right.. (I've never seen the root flag set) */ #define MNT_LOCAL 0x00001000 /* filesystem is stored locally */ #define MNT_QUOTA 0x00002000 /* quotas are enabled on filesystem */ #define MNT_ROOTFS 0x00004000 /* identifies the root filesystem */ #define MNT_USER 0x00008000 /* mounted by a user */ /* * Mask of flags that are visible to statfs() * XXX I think that this could now become (~(MNT_CMDFLAGS)) * but the 'mount' program may need changing to handle this. * XXX MNT_EXPUBLIC is presently left out. I don't know why. */ #define MNT_VISFLAGMASK (MNT_RDONLY | MNT_SYNCHRONOUS | MNT_NOEXEC | \ MNT_NOSUID | MNT_NODEV | MNT_UNION | \ MNT_ASYNC | MNT_EXRDONLY | MNT_EXPORTED | \ MNT_DEFEXPORTED | MNT_EXPORTANON| MNT_EXKERB | \ MNT_LOCAL | MNT_USER | MNT_QUOTA | \ MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ - MNT_NOCLUSTERW | MNT_SUIDDIR/* | MNT_EXPUBLIC */) + MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP \ + /* | MNT_EXPUBLIC */) /* * External filesystem command modifier flags. * Unmount can use the MNT_FORCE flag. * XXX These are not STATES and really should be somewhere else. */ #define MNT_UPDATE 0x00010000 /* not a real mount, just an update */ #define MNT_DELEXPORT 0x00020000 /* delete export host lists */ #define MNT_RELOAD 0x00040000 /* reload filesystem data */ #define MNT_FORCE 0x00080000 /* force unmount or readonly change */ #define MNT_CMDFLAGS (MNT_UPDATE|MNT_DELEXPORT|MNT_RELOAD|MNT_FORCE) /* * Internal filesystem control flags stored in mnt_kern_flag. * * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed * past the mount point. This keeps the subtree stable during mounts * and unmounts. */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ #define MNTK_WANTRDWR 0x04000000 /* upgrade to read/write requested */ /* * Sysctl CTL_VFS definitions. * * Second level identifier specifies which filesystem. Second level * identifier VFS_VFSCONF returns information about all filesystems. * Second level identifier VFS_GENERIC is non-terminal. */ #define VFS_VFSCONF 0 /* get configured filesystems */ #define VFS_GENERIC 0 /* generic filesystem information */ /* * Third level identifiers for VFS_GENERIC are given below; third * level identifiers for specific filesystems are given in their * mount specific header files. */ #define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */ #define VFS_CONF 2 /* struct: vfsconf for filesystem given as next argument */ /* * Flags for various system call interfaces. * * waitfor flags to vfs_sync() and getfsstat() */ -#define MNT_WAIT 1 -#define MNT_NOWAIT 2 +#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ +#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ #define MNT_LAZY 3 /* push data not written by filesystem syncer */ /* * Generic file handle */ struct fhandle { fsid_t fh_fsid; /* File system id of mount point */ struct fid fh_fid; /* File sys specific id */ }; typedef struct fhandle fhandle_t; /* * Export arguments for local filesystem mount calls. */ struct export_args { int ex_flags; /* export related flags */ uid_t ex_root; /* mapping for root uid */ struct ucred ex_anon; /* mapping for anonymous user */ struct sockaddr *ex_addr; /* net address to which exported */ int ex_addrlen; /* and the net address length */ struct sockaddr *ex_mask; /* mask of valid bits in saddr */ int ex_masklen; /* and the smask length */ char *ex_indexfile; /* index file for WebNFS URLs */ }; /* * Structure holding information for a publicly exported filesystem * (WebNFS). Currently the specs allow just for one such filesystem. */ struct nfs_public { int np_valid; /* Do we hold valid information */ fhandle_t np_handle; /* Filehandle for pub fs (internal) */ struct mount *np_mount; /* Mountpoint of exported fs */ char *np_index; /* Index file */ }; /* * Filesystem configuration information. One of these exists for each * type of filesystem supported by the kernel. These are searched at * mount time to identify the requested filesystem. */ struct vfsconf { struct vfsops *vfc_vfsops; /* filesystem operations vector */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ struct vfsconf *vfc_next; /* next in list */ }; struct ovfsconf { void *vfc_vfsops; char vfc_name[32]; int vfc_index; int vfc_refcount; int vfc_flags; }; /* * NB: these flags refer to IMPLEMENTATION properties, not properties of * any actual mounts; i.e., it does not make sense to change the flags. */ #define VFCF_STATIC 0x00010000 /* statically compiled into kernel */ #define VFCF_NETWORK 0x00020000 /* may get data over the network */ #define VFCF_READONLY 0x00040000 /* writes are not implemented */ #define VFCF_SYNTHETIC 0x00080000 /* data does not represent real files */ #define VFCF_LOOPBACK 0x00100000 /* aliases some other mounted FS */ #define VFCF_UNICODE 0x00200000 /* stores file names as Unicode*/ #ifdef KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_MOUNT); #endif extern int maxvfsconf; /* highest defined filesystem type */ extern struct vfsconf *vfsconf; /* head of list of filesystem types */ /* * Operations supported on mounted file system. */ #ifdef __STDC__ struct nameidata; struct mbuf; #endif struct vfsops { int (*vfs_mount) __P((struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, struct proc *p)); int (*vfs_start) __P((struct mount *mp, int flags, struct proc *p)); int (*vfs_unmount) __P((struct mount *mp, int mntflags, struct proc *p)); int (*vfs_root) __P((struct mount *mp, struct vnode **vpp)); int (*vfs_quotactl) __P((struct mount *mp, int cmds, uid_t uid, caddr_t arg, struct proc *p)); int (*vfs_statfs) __P((struct mount *mp, struct statfs *sbp, struct proc *p)); int (*vfs_sync) __P((struct mount *mp, int waitfor, struct ucred *cred, struct proc *p)); int (*vfs_vget) __P((struct mount *mp, ino_t ino, struct vnode **vpp)); int (*vfs_vrele) __P((struct mount *mp, struct vnode *vp)); int (*vfs_fhtovp) __P((struct mount *mp, struct fid *fhp, struct sockaddr *nam, struct vnode **vpp, int *exflagsp, struct ucred **credanonp)); int (*vfs_vptofh) __P((struct vnode *vp, struct fid *fhp)); int (*vfs_init) __P((struct vfsconf *)); }; #define VFS_MOUNT(MP, PATH, DATA, NDP, P) \ (*(MP)->mnt_op->vfs_mount)(MP, PATH, DATA, NDP, P) #define VFS_START(MP, FLAGS, P) (*(MP)->mnt_op->vfs_start)(MP, FLAGS, P) #define VFS_UNMOUNT(MP, FORCE, P) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE, P) #define VFS_ROOT(MP, VPP) (*(MP)->mnt_op->vfs_root)(MP, VPP) #define VFS_QUOTACTL(MP,C,U,A,P) (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A, P) #define VFS_STATFS(MP, SBP, P) (*(MP)->mnt_op->vfs_statfs)(MP, SBP, P) #define VFS_SYNC(MP, WAIT, C, P) (*(MP)->mnt_op->vfs_sync)(MP, WAIT, C, P) #define VFS_VGET(MP, INO, VPP) (*(MP)->mnt_op->vfs_vget)(MP, INO, VPP) #define VFS_VRELE(MP, VP) (*(MP)->mnt_op->vfs_vrele)(MP, VP) #define VFS_FHTOVP(MP, FIDP, NAM, VPP, EXFLG, CRED) \ (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, NAM, VPP, EXFLG, CRED) #define VFS_VPTOFH(VP, FIDP) (*(VP)->v_mount->mnt_op->vfs_vptofh)(VP, FIDP) #ifdef VFS_LKM #include #include #include #include #define VFS_SET(vfsops, fsname, index, flags) \ static struct vfsconf _fs_vfsconf = { \ &vfsops, \ #fsname, \ index, \ 0, \ flags, \ }; \ extern struct linker_set MODVNOPS; \ MOD_VFS(fsname,&MODVNOPS,&_fs_vfsconf); \ extern int \ fsname ## _mod __P((struct lkm_table *, int, int)); \ int \ fsname ## _mod(struct lkm_table *lkmtp, int cmd, int ver) { \ MOD_DISPATCH(fsname, \ lkmtp, cmd, ver, lkm_nullcmd, lkm_nullcmd, lkm_nullcmd); } #else #define VFS_SET(vfsops, fsname, index, flags) \ static struct vfsconf _fs_vfsconf = { \ &vfsops, \ #fsname, \ index, \ 0, \ flags | VFCF_STATIC, \ }; \ DATA_SET(vfs_set,_fs_vfsconf) #endif /* VFS_LKM */ #endif /* KERNEL */ #ifdef KERNEL #include #include /* XXX for AF_MAX */ /* * Network address lookup element */ struct netcred { struct radix_node netc_rnodes[2]; int netc_exflags; struct ucred netc_anon; }; /* * Network export information */ struct netexport { struct netcred ne_defexported; /* Default export */ struct radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */ }; extern char *mountrootfsname; /* * exported vnode operations */ int dounmount __P((struct mount *, int, struct proc *)); int vfs_setpublicfs /* set publicly exported fs */ __P((struct mount *, struct netexport *, struct export_args *)); int vfs_lock __P((struct mount *)); /* lock a vfs */ void vfs_msync __P((struct mount *, int)); void vfs_unlock __P((struct mount *)); /* unlock a vfs */ int vfs_busy __P((struct mount *, int, struct simplelock *, struct proc *)); int vfs_export /* process mount export info */ __P((struct mount *, struct netexport *, struct export_args *)); int vfs_vrele __P((struct mount *, struct vnode *)); struct netcred *vfs_export_lookup /* lookup host in fs export list */ __P((struct mount *, struct netexport *, struct sockaddr *)); +int vfs_allocate_syncvnode __P((struct mount *)); void vfs_getnewfsid __P((struct mount *)); struct mount *vfs_getvfs __P((fsid_t *)); /* return vfs given fsid */ int vfs_mountedon __P((struct vnode *)); /* is a vfs mounted on vp */ int vfs_rootmountalloc __P((char *, char *, struct mount **)); void vfs_unbusy __P((struct mount *, struct proc *)); void vfs_unmountall __P((void)); extern CIRCLEQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct simplelock mountlist_slock; extern struct nfs_public nfs_pub; #else /* !KERNEL */ #include __BEGIN_DECLS int fstatfs __P((int, struct statfs *)); int getfh __P((const char *, fhandle_t *)); int getfsstat __P((struct statfs *, long, int)); int getmntinfo __P((struct statfs **, int)); int mount __P((const char *, const char *, int, void *)); int statfs __P((const char *, struct statfs *)); int unmount __P((const char *, int)); /* C library stuff */ void endvfsent __P((void)); struct ovfsconf *getvfsbyname __P((const char *)); struct ovfsconf *getvfsbytype __P((int)); struct ovfsconf *getvfsent __P((void)); #define getvfsbyname new_getvfsbyname int new_getvfsbyname __P((const char *, struct vfsconf *)); void setvfsent __P((int)); int vfsisloadable __P((const char *)); int vfsload __P((const char *)); __END_DECLS #endif /* KERNEL */ #endif /* !_SYS_MOUNT_H_ */ Index: head/sys/sys/vnode.h =================================================================== --- head/sys/sys/vnode.h (revision 34265) +++ head/sys/sys/vnode.h (revision 34266) @@ -1,539 +1,545 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 - * $Id: vnode.h,v 1.66 1998/01/24 02:01:31 dyson Exp $ + * $Id: vnode.h,v 1.67 1998/03/07 21:36:27 dyson Exp $ */ #ifndef _SYS_VNODE_H_ #define _SYS_VNODE_H_ #include #include /* needed for struct selinfo in vnodes */ #include /* * The vnode is the focus of all file activity in UNIX. There is a * unique vnode allocated for each active file, each current directory, * each mounted-on file, text file, and the root. */ /* * Vnode types. VNON means no type. */ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD }; /* * Vnode tag types. * These are for the benefit of external programs only (e.g., pstat) * and should NEVER be inspected by the kernel. */ enum vtagtype { VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC, VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS, - VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS + VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS, VT_VFS }; /* * Each underlying filesystem allocates its own private area and hangs * it from v_data. If non-null, this area is freed in getnewvnode(). */ LIST_HEAD(buflists, buf); typedef int vop_t __P((void *)); struct namecache; /* * Reading or writing any of these items requires holding the appropriate lock. * v_freelist is locked by the global vnode_free_list simple lock. * v_mntvnodes is locked by the global mntvnodes simple lock. * v_flag, v_usecount, v_holdcount and v_writecount are * locked by the v_interlock simple lock. * v_pollinfo is locked by the lock contained inside it. */ struct vnode { u_long v_flag; /* vnode flags (see below) */ int v_usecount; /* reference count of users */ int v_writecount; /* reference count of writers */ int v_holdcnt; /* page & buffer references */ daddr_t v_lastr; /* last read (read-ahead) */ u_long v_id; /* capability identifier */ struct mount *v_mount; /* ptr to vfs we are in */ vop_t **v_op; /* vnode operations vector */ TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */ LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ struct buflists v_cleanblkhd; /* clean blocklist head */ struct buflists v_dirtyblkhd; /* dirty blocklist head */ + LIST_ENTRY(vnode) v_synclist; /* vnodes with dirty buffers */ long v_numoutput; /* num of writes in progress */ enum vtype v_type; /* vnode type */ union { struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */ struct socket *vu_socket; /* unix ipc (VSOCK) */ struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */ struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */ } v_un; struct nqlease *v_lease; /* Soft reference to lease */ daddr_t v_lastw; /* last write (write cluster) */ daddr_t v_cstart; /* start block of cluster */ daddr_t v_lasta; /* last allocation */ int v_clen; /* length of current cluster */ int v_maxio; /* maximum I/O cluster size */ struct vm_object *v_object; /* Place to store VM object */ struct simplelock v_interlock; /* lock on usecount and flag */ struct lock *v_vnlock; /* used for non-locking fs's */ enum vtagtype v_tag; /* type of underlying data */ void *v_data; /* private data for fs */ LIST_HEAD(, namecache) v_cache_src; /* Cache entries from us */ TAILQ_HEAD(, namecache) v_cache_dst; /* Cache entries to us */ struct vnode *v_dd; /* .. vnode */ u_long v_ddid; /* .. capability identifier */ struct { struct simplelock vpi_lock; /* lock to protect below */ struct selinfo vpi_selinfo; /* identity of poller(s) */ short vpi_events; /* what they are looking for */ short vpi_revents; /* what has happened */ } v_pollinfo; }; #define v_mountedhere v_un.vu_mountedhere #define v_socket v_un.vu_socket #define v_specinfo v_un.vu_specinfo #define v_fifoinfo v_un.vu_fifoinfo #define VN_POLLEVENT(vp, events) \ do { \ if ((vp)->v_pollinfo.vpi_events & (events)) \ vn_pollevent((vp), (events)); \ } while (0) /* * Vnode flags. */ #define VROOT 0x00001 /* root of its file system */ #define VTEXT 0x00002 /* vnode is a pure text prototype */ #define VSYSTEM 0x00004 /* vnode being used by kernel */ #define VISTTY 0x00008 /* vnode represents a tty */ #define VXLOCK 0x00100 /* vnode is locked to change underlying type */ #define VXWANT 0x00200 /* process is waiting for vnode */ #define VBWAIT 0x00400 /* waiting for output to complete */ #define VALIASED 0x00800 /* vnode has an alias */ #define VDIROP 0x01000 /* LFS: vnode is involved in a directory op */ #define VOBJBUF 0x02000 /* Allocate buffers in VM object */ #define VNINACT 0x04000 /* LFS: skip ufs_inactive() in lfs_vunref */ #define VAGE 0x08000 /* Insert vnode at head of free list */ #define VOLOCK 0x10000 /* vnode is locked waiting for an object */ #define VOWANT 0x20000 /* a process is waiting for VOLOCK */ #define VDOOMED 0x40000 /* This vnode is being recycled */ #define VFREE 0x80000 /* This vnode is on the freelist */ -#define VTBFREE 0x100000 /* This vnode is no the to be freelist */ +#define VTBFREE 0x100000 /* This vnode is on the to-be-freelist */ +#define VONWORKLST 0x200000 /* On syncer work-list */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value * is unavailable (getattr) or which is not to be changed (setattr). */ struct vattr { enum vtype va_type; /* vnode type (for create) */ u_short va_mode; /* files access mode and type */ short va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ gid_t va_gid; /* owner group id */ long va_fsid; /* file system id (dev for now) */ long va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ struct timespec va_atime; /* time of last access */ struct timespec va_mtime; /* time of last modification */ struct timespec va_ctime; /* time file changed */ u_long va_gen; /* generation number of file */ u_long va_flags; /* flags defined for file */ dev_t va_rdev; /* device the special file represents */ u_quad_t va_bytes; /* bytes of disk space held by file */ u_quad_t va_filerev; /* file modification number */ u_int va_vaflags; /* operations flags, see below */ long va_spare; /* remain quad aligned */ }; /* * Flags for va_vaflags. */ #define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ #define VA_EXCLUSIVE 0x02 /* exclusive create request */ /* * Flags for ioflag. */ #define IO_UNIT 0x01 /* do I/O as atomic unit */ #define IO_APPEND 0x02 /* append write to end */ #define IO_SYNC 0x04 /* do I/O synchronously */ #define IO_NODELOCKED 0x08 /* underlying node already locked */ #define IO_NDELAY 0x10 /* FNDELAY flag set in file table */ #define IO_VMIO 0x20 /* data already in VMIO space */ #define IO_INVAL 0x40 /* invalidate after I/O */ /* * Modes. Some values same as Ixxx entries from inode.h for now. */ #define VSUID 04000 /* set user id on execution */ #define VSGID 02000 /* set group id on execution */ #define VSVTX 01000 /* save swapped text even after use */ #define VREAD 00400 /* read, write, execute permissions */ #define VWRITE 00200 #define VEXEC 00100 /* * Token indicating no attribute value yet assigned. */ #define VNOVAL (-1) #ifdef KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_VNODE); #endif /* * Convert between vnode types and inode formats (since POSIX.1 * defines mode word of stat structure in terms of inode formats). */ extern enum vtype iftovt_tab[]; extern int vttoif_tab[]; #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) /* * Flags to various vnode functions. */ #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */ #define FORCECLOSE 0x0002 /* vflush: force file closure */ #define WRITECLOSE 0x0004 /* vflush: only close writable files */ #define DOCLOSE 0x0008 /* vclean: close active files */ #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_SAVEMETA 0x0002 /* vinvalbuf: leave indirect blocks */ #define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */ #define VREF(vp) vref(vp) + #ifdef DIAGNOSTIC #define VATTR_NULL(vap) vattr_null(vap) #else #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */ #endif /* DIAGNOSTIC */ #define NULLVP ((struct vnode *)NULL) #ifdef VFS_LKM #define VNODEOP_SET(f) DATA_SET(MODVNOPS,f) #else #define VNODEOP_SET(f) DATA_SET(vfs_opv_descs_,f) #endif /* * Global vnode data. */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ extern int desiredvnodes; /* number of vnodes desired */ +extern time_t syncdelay; /* time to delay syncing vnodes */ +extern int rushjob; /* # of slots filesys_syncer should run ASAP */ extern struct vm_zone *namei_zone; extern int prtactive; /* nonzero to call vprint() */ extern struct vattr va_null; /* predefined null vattr structure */ extern int vfs_ioopt; /* * Macro/function to check for client cache inconsistency w.r.t. leasing. */ #define LEASE_READ 0x1 /* Check lease for readers */ #define LEASE_WRITE 0x2 /* Check lease for modifiers */ extern void (*lease_updatetime) __P((int deltat)); #define VSHOULDFREE(vp) \ (!((vp)->v_flag & (VFREE|VDOOMED)) && \ !(vp)->v_holdcnt && !(vp)->v_usecount && \ (!(vp)->v_object || \ !((vp)->v_object->ref_count || (vp)->v_object->resident_page_count))) #define VSHOULDBUSY(vp) \ (((vp)->v_flag & (VFREE|VTBFREE)) && \ ((vp)->v_holdcnt || (vp)->v_usecount)) #endif /* KERNEL */ /* * Mods for extensibility. */ /* * Flags for vdesc_flags: */ #define VDESC_MAX_VPS 16 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */ #define VDESC_VP0_WILLRELE 0x0001 #define VDESC_VP1_WILLRELE 0x0002 #define VDESC_VP2_WILLRELE 0x0004 #define VDESC_VP3_WILLRELE 0x0008 #define VDESC_NOMAP_VPP 0x0100 #define VDESC_VPP_WILLRELE 0x0200 /* * VDESC_NO_OFFSET is used to identify the end of the offset list * and in places where no such field exists. */ #define VDESC_NO_OFFSET -1 /* * This structure describes the vnode operation taking place. */ struct vnodeop_desc { int vdesc_offset; /* offset in vector--first for speed */ char *vdesc_name; /* a readable name for debugging */ int vdesc_flags; /* VDESC_* flags */ /* * These ops are used by bypass routines to map and locate arguments. * Creds and procs are not needed in bypass routines, but sometimes * they are useful to (for example) transport layers. * Nameidata is useful because it has a cred in it. */ int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */ int vdesc_vpp_offset; /* return vpp location */ int vdesc_cred_offset; /* cred location, if any */ int vdesc_proc_offset; /* proc location, if any */ int vdesc_componentname_offset; /* if any */ /* * Finally, we've got a list of private data (about each operation) * for each transport layer. (Support to manage this list is not * yet part of BSD.) */ caddr_t *vdesc_transports; }; #ifdef KERNEL /* * A list of all the operation descs. */ extern struct vnodeop_desc *vnodeop_descs[]; /* * Interlock for scanning list of vnodes attached to a mountpoint */ extern struct simplelock mntvnode_slock; /* * This macro is very helpful in defining those offsets in the vdesc struct. * * This is stolen from X11R4. I ignored all the fancy stuff for * Crays, so if you decide to port this to such a serious machine, * you might want to consult Intrinsic.h's XtOffset{,Of,To}. */ #define VOPARG_OFFSET(p_type,field) \ ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL))) #define VOPARG_OFFSETOF(s_type,field) \ VOPARG_OFFSET(s_type*,field) #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \ ((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET))) /* * This structure is used to configure the new vnodeops vector. */ struct vnodeopv_entry_desc { struct vnodeop_desc *opve_op; /* which operation this is */ vop_t *opve_impl; /* code implementing this operation */ }; struct vnodeopv_desc { /* ptr to the ptr to the vector where op should go */ vop_t ***opv_desc_vector_p; struct vnodeopv_entry_desc *opv_desc_ops; /* null terminated list */ }; /* * A generic structure. * This can be used by bypass routines to identify generic arguments. */ struct vop_generic_args { struct vnodeop_desc *a_desc; /* other random data follows, presumably */ }; #ifdef DEBUG_VFS_LOCKS /* * Macros to aid in tracing VFS locking problems. Not totally * reliable since if the process sleeps between changing the lock * state and checking it with the assert, some other process could * change the state. They are good enough for debugging a single * filesystem using a single-threaded test. I find that 'cvs co src' * is a pretty good test. */ /* * [dfr] Kludge until I get around to fixing all the vfs locking. */ #define IS_LOCKING_VFS(vp) ((vp)->v_tag == VT_UFS \ || (vp)->v_tag == VT_MFS \ || (vp)->v_tag == VT_NFS \ || (vp)->v_tag == VT_LFS \ || (vp)->v_tag == VT_ISOFS \ || (vp)->v_tag == VT_MSDOSFS \ || (vp)->v_tag == VT_DEVFS) #define ASSERT_VOP_LOCKED(vp, str) \ if ((vp) && IS_LOCKING_VFS(vp) && !VOP_ISLOCKED(vp)) { \ panic("%s: %x is not locked but should be", str, vp); \ } #define ASSERT_VOP_UNLOCKED(vp, str) \ if ((vp) && IS_LOCKING_VFS(vp) && VOP_ISLOCKED(vp)) { \ panic("%s: %x is locked but shouldn't be", str, vp); \ } #else #define ASSERT_VOP_LOCKED(vp, str) #define ASSERT_VOP_UNLOCKED(vp, str) #endif /* * VOCALL calls an op given an ops vector. We break it out because BSD's * vclean changes the ops vector and then wants to call ops with the old * vector. */ #define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP)) /* * This call works for vnodes in the kernel. */ #define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP)) #define VDESC(OP) (& __CONCAT(OP,_desc)) #define VOFFSET(OP) (VDESC(OP)->vdesc_offset) /* * Finally, include the default set of vnode operations. */ #include "vnode_if.h" /* * Public vnode manipulation functions. */ struct componentname; struct file; struct mount; struct nameidata; struct ostat; struct proc; struct stat; struct ucred; struct uio; struct vattr; struct vnode; struct vop_bwrite_args; extern int (*lease_check_hook) __P((struct vop_lease_args *)); int bdevvp __P((dev_t dev, struct vnode **vpp)); /* cache_* may belong in namei.h. */ void cache_enter __P((struct vnode *dvp, struct vnode *vp, struct componentname *cnp)); int cache_lookup __P((struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)); void cache_purge __P((struct vnode *vp)); void cache_purgevfs __P((struct mount *mp)); void cvtstat __P((struct stat *st, struct ostat *ost)); int getnewvnode __P((enum vtagtype tag, struct mount *mp, vop_t **vops, struct vnode **vpp)); int lease_check __P((struct vop_lease_args *ap)); void vattr_null __P((struct vattr *vap)); int vcount __P((struct vnode *vp)); void vdrop __P((struct vnode *)); int vfinddev __P((dev_t dev, enum vtype type, struct vnode **vpp)); void vfs_opv_init __P((struct vnodeopv_desc **them)); int vflush __P((struct mount *mp, struct vnode *skipvp, int flags)); int vget __P((struct vnode *vp, int lockflag, struct proc *p)); void vgone __P((struct vnode *vp)); void vhold __P((struct vnode *)); int vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred, struct proc *p, int slpflag, int slptimeo)); void vprint __P((char *label, struct vnode *vp)); int vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp, struct proc *p)); int vn_close __P((struct vnode *vp, int flags, struct ucred *cred, struct proc *p)); int vn_lock __P((struct vnode *vp, int flags, struct proc *p)); int vn_open __P((struct nameidata *ndp, int fmode, int cmode)); void vn_pollevent __P((struct vnode *vp, int events)); void vn_pollgone __P((struct vnode *vp)); int vn_pollrecord __P((struct vnode *vp, struct proc *p, int events)); int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *cred, int *aresid, struct proc *p)); int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p)); +void vn_syncer_add_to_worklist __P((struct vnode *vp, int delay)); int vfs_cache_lookup __P((struct vop_lookup_args *ap)); int vfs_object_create __P((struct vnode *vp, struct proc *p, struct ucred *cred, int waslocked)); int vn_writechk __P((struct vnode *vp)); int vop_stdbwrite __P((struct vop_bwrite_args *ap)); int vop_stdislocked __P((struct vop_islocked_args *)); int vop_stdlock __P((struct vop_lock_args *)); int vop_stdunlock __P((struct vop_unlock_args *)); int vop_noislocked __P((struct vop_islocked_args *)); int vop_nolock __P((struct vop_lock_args *)); int vop_nopoll __P((struct vop_poll_args *)); int vop_nounlock __P((struct vop_unlock_args *)); int vop_stdpathconf __P((struct vop_pathconf_args *)); int vop_stdpoll __P((struct vop_poll_args *)); int vop_revoke __P((struct vop_revoke_args *)); int vop_sharedlock __P((struct vop_lock_args *)); int vop_eopnotsupp __P((struct vop_generic_args *ap)); int vop_ebadf __P((struct vop_generic_args *ap)); int vop_einval __P((struct vop_generic_args *ap)); int vop_enotty __P((struct vop_generic_args *ap)); int vop_defaultop __P((struct vop_generic_args *ap)); int vop_null __P((struct vop_generic_args *ap)); struct vnode * checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp)); void vput __P((struct vnode *vp)); void vrele __P((struct vnode *vp)); void vref __P((struct vnode *vp)); void vbusy __P((struct vnode *vp)); extern vop_t **default_vnodeop_p; extern TAILQ_HEAD(tobefreelist, vnode) vnode_tobefree_list; /* vnode free list */ #endif /* KERNEL */ #endif /* !_SYS_VNODE_H_ */ Index: head/sys/ufs/ffs/ffs_alloc.c =================================================================== --- head/sys/ufs/ffs/ffs_alloc.c (revision 34265) +++ head/sys/ufs/ffs/ffs_alloc.c (revision 34266) @@ -1,1636 +1,1683 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 - * $Id: ffs_alloc.c,v 1.46 1998/02/04 22:33:27 eivind Exp $ + * $Id: ffs_alloc.c,v 1.47 1998/02/06 12:14:13 eivind Exp $ */ #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef ufs_daddr_t allocfcn_t __P((struct inode *ip, int cg, ufs_daddr_t bpref, int size)); static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int)); -static ufs_daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, ufs_daddr_t)); +static ufs_daddr_t + ffs_alloccgblk __P((struct inode *, struct buf *, ufs_daddr_t)); #ifdef DIAGNOSTIC static int ffs_checkblk __P((struct inode *, ufs_daddr_t, long)); #endif static void ffs_clusteracct __P((struct fs *, struct cg *, ufs_daddr_t, int)); #ifdef notyet static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t, int)); #endif static ino_t ffs_dirpref __P((struct fs *)); static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int)); static void ffs_fserr __P((struct fs *, u_int, char *)); static u_long ffs_hashalloc __P((struct inode *, int, long, int, allocfcn_t *)); static ino_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int)); static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t, int)); /* * Allocate a block in the file system. * * The size of the requested block is given, which must be some * multiple of fs_fsize and <= fs_bsize. * A preference may be optionally specified. If a preference is given * the following hierarchy is used to allocate a block: * 1) allocate the requested block. * 2) allocate a rotationally optimal block in the same cylinder. * 3) allocate a block in the same cylinder group. * 4) quadradically rehash into other cylinder groups, until an * available block is located. * If no block preference is given the following heirarchy is used * to allocate a block: * 1) allocate a block in the cylinder group that contains the * inode for the file. * 2) quadradically rehash into other cylinder groups, until an * available block is located. */ int ffs_alloc(ip, lbn, bpref, size, cred, bnp) register struct inode *ip; ufs_daddr_t lbn, bpref; int size; struct ucred *cred; ufs_daddr_t *bnp; { register struct fs *fs; ufs_daddr_t bno; int cg; #ifdef QUOTA int error; #endif *bnp = 0; fs = ip->i_fs; #ifdef DIAGNOSTIC if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { printf("dev = 0x%lx, bsize = %ld, size = %d, fs = %s\n", (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); panic("ffs_alloc: bad size"); } if (cred == NOCRED) panic("ffs_alloc: missing credential"); #endif /* DIAGNOSTIC */ if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) goto nospace; if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) goto nospace; #ifdef QUOTA error = chkdq(ip, (long)btodb(size), cred, 0); if (error) return (error); #endif if (bpref >= fs->fs_size) bpref = 0; if (bpref == 0) cg = ino_to_cg(fs, ip->i_number); else cg = dtog(fs, bpref); bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, ffs_alloccg); if (bno > 0) { ip->i_blocks += btodb(size); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bnp = bno; return (0); } #ifdef QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (long)-btodb(size), cred, FORCE); #endif nospace: ffs_fserr(fs, cred->cr_uid, "file system full"); uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); return (ENOSPC); } /* * Reallocate a fragment to a bigger size * * The number and size of the old block is given, and a preference * and new size is also specified. The allocator attempts to extend * the original block. Failing that, the regular block allocator is * invoked to get an appropriate block. */ int ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) register struct inode *ip; ufs_daddr_t lbprev; ufs_daddr_t bpref; int osize, nsize; struct ucred *cred; struct buf **bpp; { register struct fs *fs; struct buf *bp; int cg, request, error; ufs_daddr_t bprev, bno; *bpp = 0; fs = ip->i_fs; #ifdef DIAGNOSTIC if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { printf( "dev = 0x%lx, bsize = %ld, osize = %d, " "nsize = %d, fs = %s\n", (u_long)ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); panic("ffs_realloccg: bad size"); } if (cred == NOCRED) panic("ffs_realloccg: missing credential"); #endif /* DIAGNOSTIC */ if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) goto nospace; if ((bprev = ip->i_db[lbprev]) == 0) { printf("dev = 0x%lx, bsize = %ld, bprev = %ld, fs = %s\n", (u_long) ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt); panic("ffs_realloccg: bad bprev"); } /* * Allocate the extra space in the buffer. */ error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp); if (error) { brelse(bp); return (error); } if( bp->b_blkno == bp->b_lblkno) { if( lbprev >= NDADDR) panic("ffs_realloccg: lbprev out of range"); bp->b_blkno = fsbtodb(fs, bprev); } #ifdef QUOTA error = chkdq(ip, (long)btodb(nsize - osize), cred, 0); if (error) { brelse(bp); return (error); } #endif /* * Check for extension in the existing location. */ cg = dtog(fs, bprev); bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); if (bno) { if (bp->b_blkno != fsbtodb(fs, bno)) panic("ffs_realloccg: bad blockno"); ip->i_blocks += btodb(nsize - osize); ip->i_flag |= IN_CHANGE | IN_UPDATE; allocbuf(bp, nsize); bp->b_flags |= B_DONE; bzero((char *)bp->b_data + osize, (u_int)nsize - osize); *bpp = bp; return (0); } /* * Allocate a new disk location. */ if (bpref >= fs->fs_size) bpref = 0; switch ((int)fs->fs_optim) { case FS_OPTSPACE: /* * Allocate an exact sized fragment. Although this makes * best use of space, we will waste time relocating it if * the file continues to grow. If the fragmentation is * less than half of the minimum free reserve, we choose * to begin optimizing for time. */ request = nsize; if (fs->fs_minfree <= 5 || fs->fs_cstotal.cs_nffree > fs->fs_dsize * fs->fs_minfree / (2 * 100)) break; log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", fs->fs_fsmnt); fs->fs_optim = FS_OPTTIME; break; case FS_OPTTIME: /* * At this point we have discovered a file that is trying to * grow a small fragment to a larger fragment. To save time, * we allocate a full sized block, then free the unused portion. * If the file continues to grow, the `ffs_fragextend' call * above will be able to grow it in place without further * copying. If aberrant programs cause disk fragmentation to * grow within 2% of the free reserve, we choose to begin * optimizing for space. */ request = fs->fs_bsize; if (fs->fs_cstotal.cs_nffree < fs->fs_dsize * (fs->fs_minfree - 2) / 100) break; log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", fs->fs_fsmnt); fs->fs_optim = FS_OPTSPACE; break; default: printf("dev = 0x%lx, optim = %ld, fs = %s\n", (u_long)ip->i_dev, fs->fs_optim, fs->fs_fsmnt); panic("ffs_realloccg: bad optim"); /* NOTREACHED */ } bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, ffs_alloccg); if (bno > 0) { bp->b_blkno = fsbtodb(fs, bno); - ffs_blkfree(ip, bprev, (long)osize); + if (!DOINGSOFTDEP(ITOV(ip))) + ffs_blkfree(ip, bprev, (long)osize); if (nsize < request) ffs_blkfree(ip, bno + numfrags(fs, nsize), (long)(request - nsize)); ip->i_blocks += btodb(nsize - osize); ip->i_flag |= IN_CHANGE | IN_UPDATE; allocbuf(bp, nsize); bp->b_flags |= B_DONE; bzero((char *)bp->b_data + osize, (u_int)nsize - osize); *bpp = bp; return (0); } #ifdef QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); #endif brelse(bp); nospace: /* * no space available */ ffs_fserr(fs, cred->cr_uid, "file system full"); uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); return (ENOSPC); } #ifdef notyet /* * Reallocate a sequence of blocks into a contiguous sequence of blocks. * * The vnode and an array of buffer pointers for a range of sequential * logical blocks to be made contiguous is given. The allocator attempts * to find a range of sequential blocks starting as close as possible to * an fs_rotdelay offset from the end of the allocation for the logical * block immediately preceeding the current range. If successful, the * physical block numbers in the buffer pointers and in the inode are * changed to reflect the new allocation. If unsuccessful, the allocation * is left unchanged. The success in doing the reallocation is returned. * Note that the error return is not reflected back to the user. Rather * the previous block allocation will be used. */ static int doasyncfree = 1; SYSCTL_INT(_vfs_ffs, FFS_ASYNCFREE, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); static int doreallocblks = 1; SYSCTL_INT(_vfs_ffs, FFS_REALLOCBLKS, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); static int prtrealloc = 0; #endif int ffs_reallocblks(ap) struct vop_reallocblks_args /* { struct vnode *a_vp; struct cluster_save *a_buflist; } */ *ap; { #if !defined (not_yes) return (ENOSPC); #else struct fs *fs; struct inode *ip; struct vnode *vp; struct buf *sbp, *ebp; ufs_daddr_t *bap, *sbap, *ebap = 0; struct cluster_save *buflist; ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno; struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; int i, len, start_lvl, end_lvl, pref, ssize; struct timeval tv; if (doreallocblks == 0) return (ENOSPC); vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_fs; if (fs->fs_contigsumsize <= 0) return (ENOSPC); buflist = ap->a_buflist; len = buflist->bs_nchildren; start_lbn = buflist->bs_children[0]->b_lblkno; end_lbn = start_lbn + len - 1; #ifdef DIAGNOSTIC for (i = 0; i < len; i++) if (!ffs_checkblk(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) panic("ffs_reallocblks: unallocated block 1"); for (i = 1; i < len; i++) if (buflist->bs_children[i]->b_lblkno != start_lbn + i) panic("ffs_reallocblks: non-logical cluster"); blkno = buflist->bs_children[0]->b_blkno; ssize = fsbtodb(fs, fs->fs_frag); for (i = 1; i < len - 1; i++) if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) panic("ffs_reallocblks: non-physical cluster %d", i); #endif /* * If the latest allocation is in a new cylinder group, assume that * the filesystem has decided to move and do not force it back to * the previous cylinder group. */ if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) return (ENOSPC); if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) return (ENOSPC); /* * Get the starting offset and block map for the first block. */ if (start_lvl == 0) { sbap = &ip->i_db[0]; soff = start_lbn; } else { idp = &start_ap[start_lvl - 1]; if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { brelse(sbp); return (ENOSPC); } sbap = (ufs_daddr_t *)sbp->b_data; soff = idp->in_off; } /* * Find the preferred location for the cluster. */ pref = ffs_blkpref(ip, start_lbn, soff, sbap); /* * If the block range spans two block maps, get the second map. */ if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { ssize = len; } else { #ifdef DIAGNOSTIC if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) panic("ffs_reallocblk: start == end"); #endif ssize = len - (idp->in_off + 1); if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) goto fail; ebap = (ufs_daddr_t *)ebp->b_data; } /* * Search the block map looking for an allocation of the desired size. */ if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, len, ffs_clusteralloc)) == 0) goto fail; /* * We have found a new contiguous block. * * First we have to replace the old block pointers with the new * block pointers in the inode and indirect blocks associated * with the file. */ #ifdef DEBUG if (prtrealloc) printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number, start_lbn, end_lbn); #endif blkno = newblk; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { - if (i == ssize) + if (i == ssize) { bap = ebap; + soff = -i; + } #ifdef DIAGNOSTIC if (!ffs_checkblk(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) panic("ffs_reallocblks: unallocated block 2"); if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) panic("ffs_reallocblks: alloc mismatch"); #endif #ifdef DEBUG if (prtrealloc) printf(" %d,", *bap); #endif + if (DOINGSOFTDEP(vp)) { + if (sbap == &ip->i_db[0] && i < ssize) + softdep_setup_allocdirect(ip, start_lbn + i, + blkno, *bap, fs->fs_bsize, fs->fs_bsize, + buflist->bs_children[i]); + else + softdep_setup_allocindir_page(ip, start_lbn + i, + i < ssize ? sbp : ebp, soff + i, blkno, + *bap, buflist->bs_children[i]); + } *bap++ = blkno; } /* * Next we must write out the modified inode and indirect blocks. * For strict correctness, the writes should be synchronous since * the old block values may have been written to disk. In practise * they are almost never written, but if we are concerned about * strict correctness, the `doasyncfree' flag should be set to zero. * * The test on `doasyncfree' should be changed to test a flag * that shows whether the associated buffers and inodes have * been written. The flag should be set when the cluster is * started and cleared whenever the buffer or inode is flushed. * We can then check below to see if it is set, and do the * synchronous write only when it has been cleared. */ if (sbap != &ip->i_db[0]) { if (doasyncfree) bdwrite(sbp); else bwrite(sbp); } else { ip->i_flag |= IN_CHANGE | IN_UPDATE; if (!doasyncfree) { gettime(&tv); UFS_UPDATE(vp, &tv, &tv, 1); } } if (ssize < len) if (doasyncfree) bdwrite(ebp); else bwrite(ebp); /* * Last, free the old blocks and assign the new blocks to the buffers. */ #ifdef DEBUG if (prtrealloc) printf("\n\tnew:"); #endif for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { - ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize); + if (!DOINGSOFTDEP(vp)) + ffs_blkfree(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->fs_bsize); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef DEBUG if (!ffs_checkblk(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) panic("ffs_reallocblks: unallocated block 3"); if (prtrealloc) printf(" %d,", blkno); #endif } #ifdef DEBUG if (prtrealloc) { prtrealloc--; printf("\n"); } #endif return (0); fail: if (ssize < len) brelse(ebp); if (sbap != &ip->i_db[0]) brelse(sbp); return (ENOSPC); #endif } /* * Allocate an inode in the file system. * * If allocating a directory, use ffs_dirpref to select the inode. * If allocating in a directory, the following hierarchy is followed: * 1) allocate the preferred inode. * 2) allocate an inode in the same cylinder group. * 3) quadradically rehash into other cylinder groups, until an * available inode is located. * If no inode preference is given the following heirarchy is used * to allocate an inode: * 1) allocate an inode in cylinder group 0. * 2) quadradically rehash into other cylinder groups, until an * available inode is located. */ int ffs_valloc(pvp, mode, cred, vpp) struct vnode *pvp; int mode; struct ucred *cred; struct vnode **vpp; { register struct inode *pip; register struct fs *fs; register struct inode *ip; ino_t ino, ipref; int cg, error; *vpp = NULL; pip = VTOI(pvp); fs = pip->i_fs; if (fs->fs_cstotal.cs_nifree == 0) goto noinodes; if ((mode & IFMT) == IFDIR) ipref = ffs_dirpref(fs); else ipref = pip->i_number; if (ipref >= fs->fs_ncg * fs->fs_ipg) ipref = 0; cg = ino_to_cg(fs, ipref); ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, (allocfcn_t *)ffs_nodealloccg); if (ino == 0) goto noinodes; error = VFS_VGET(pvp->v_mount, ino, vpp); if (error) { UFS_VFREE(pvp, ino, mode); return (error); } ip = VTOI(*vpp); if (ip->i_mode) { printf("mode = 0%o, inum = %ld, fs = %s\n", ip->i_mode, ip->i_number, fs->fs_fsmnt); panic("ffs_valloc: dup alloc"); } if (ip->i_blocks) { /* XXX */ printf("free inode %s/%ld had %ld blocks\n", fs->fs_fsmnt, ino, ip->i_blocks); ip->i_blocks = 0; } ip->i_flags = 0; /* * Set up a new generation number for this inode. */ if (ip->i_gen == 0 || ++ip->i_gen == 0) ip->i_gen = random() / 2 + 1; return (0); noinodes: ffs_fserr(fs, cred->cr_uid, "out of inodes"); uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); return (ENOSPC); } /* * Find a cylinder to place a directory. * * The policy implemented by this algorithm is to select from * among those cylinder groups with above the average number of * free inodes, the one with the smallest number of directories. */ static ino_t ffs_dirpref(fs) register struct fs *fs; { int cg, minndir, mincg, avgifree; avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; minndir = fs->fs_ipg; mincg = 0; for (cg = 0; cg < fs->fs_ncg; cg++) if (fs->fs_cs(fs, cg).cs_ndir < minndir && fs->fs_cs(fs, cg).cs_nifree >= avgifree) { mincg = cg; minndir = fs->fs_cs(fs, cg).cs_ndir; } return ((ino_t)(fs->fs_ipg * mincg)); } /* * Select the desired position for the next block in a file. The file is * logically divided into sections. The first section is composed of the * direct blocks. Each additional section contains fs_maxbpg blocks. * * If no blocks have been allocated in the first section, the policy is to * request a block in the same cylinder group as the inode that describes * the file. If no blocks have been allocated in any other section, the * policy is to place the section in a cylinder group with a greater than * average number of free blocks. An appropriate cylinder group is found * by using a rotor that sweeps the cylinder groups. When a new group of * blocks is needed, the sweep begins in the cylinder group following the * cylinder group from which the previous allocation was made. The sweep * continues until a cylinder group with greater than the average number * of free blocks is found. If the allocation is for the first block in an * indirect block, the information on the previous allocation is unavailable; * here a best guess is made based upon the logical block number being * allocated. * * If a section is already partially allocated, the policy is to * contiguously allocate fs_maxcontig blocks. The end of one of these * contiguous blocks and the beginning of the next is physically separated * so that the disk head will be in transit between them for at least * fs_rotdelay milliseconds. This is to allow time for the processor to * schedule another I/O transfer. */ ufs_daddr_t ffs_blkpref(ip, lbn, indx, bap) struct inode *ip; ufs_daddr_t lbn; int indx; ufs_daddr_t *bap; { register struct fs *fs; register int cg; int avgbfree, startcg; ufs_daddr_t nextblk; fs = ip->i_fs; if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { if (lbn < NDADDR) { cg = ino_to_cg(fs, ip->i_number); return (fs->fs_fpg * cg + fs->fs_frag); } /* * Find a cylinder with greater than average number of * unused data blocks. */ if (indx == 0 || bap[indx - 1] == 0) startcg = ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; else startcg = dtog(fs, bap[indx - 1]) + 1; startcg %= fs->fs_ncg; avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; for (cg = startcg; cg < fs->fs_ncg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; return (fs->fs_fpg * cg + fs->fs_frag); } for (cg = 0; cg <= startcg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; return (fs->fs_fpg * cg + fs->fs_frag); } return (0); } /* * One or more previous blocks have been laid out. If less * than fs_maxcontig previous blocks are contiguous, the * next block is requested contiguously, otherwise it is * requested rotationally delayed by fs_rotdelay milliseconds. */ nextblk = bap[indx - 1] + fs->fs_frag; if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig) != nextblk) return (nextblk); /* * Here we convert ms of delay to frags as: * (frags) = (ms) * (rev/sec) * (sect/rev) / * ((sect/frag) * (ms/sec)) * then round up to the next block. */ nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / (NSPF(fs) * 1000), fs->fs_frag); return (nextblk); } /* * Implement the cylinder overflow algorithm. * * The policy implemented by this algorithm is: * 1) allocate the block in its requested cylinder group. * 2) quadradically rehash on the cylinder group number. * 3) brute force search for a free block. */ /*VARARGS5*/ static u_long ffs_hashalloc(ip, cg, pref, size, allocator) struct inode *ip; int cg; long pref; int size; /* size for data blocks, mode for inodes */ allocfcn_t *allocator; { register struct fs *fs; long result; /* XXX why not same type as we return? */ int i, icg = cg; fs = ip->i_fs; /* * 1: preferred cylinder group */ result = (*allocator)(ip, cg, pref, size); if (result) return (result); /* * 2: quadratic rehash */ for (i = 1; i < fs->fs_ncg; i *= 2) { cg += i; if (cg >= fs->fs_ncg) cg -= fs->fs_ncg; result = (*allocator)(ip, cg, 0, size); if (result) return (result); } /* * 3: brute force search * Note that we start at i == 2, since 0 was checked initially, * and 1 is always checked in the quadratic rehash. */ cg = (icg + 2) % fs->fs_ncg; for (i = 2; i < fs->fs_ncg; i++) { result = (*allocator)(ip, cg, 0, size); if (result) return (result); cg++; if (cg == fs->fs_ncg) cg = 0; } return (0); } /* * Determine whether a fragment can be extended. * * Check to see if the necessary fragments are available, and * if they are, allocate them. */ static ufs_daddr_t ffs_fragextend(ip, cg, bprev, osize, nsize) struct inode *ip; int cg; long bprev; int osize, nsize; { register struct fs *fs; register struct cg *cgp; struct buf *bp; long bno; int frags, bbase; int i, error; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) return (0); frags = numfrags(fs, nsize); bbase = fragnum(fs, bprev); if (bbase > fragnum(fs, (bprev + frags - 1))) { /* cannot extend across a block boundary */ return (0); } error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, NOCRED, &bp); if (error) { brelse(bp); return (0); } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) { brelse(bp); return (0); } cgp->cg_time = time.tv_sec; bno = dtogd(fs, bprev); for (i = numfrags(fs, osize); i < frags; i++) if (isclr(cg_blksfree(cgp), bno + i)) { brelse(bp); return (0); } /* * the current fragment can be extended * deduct the count on fragment being extended into * increase the count on the remaining fragment (if any) * allocate the extended piece */ for (i = frags; i < fs->fs_frag - bbase; i++) if (isclr(cg_blksfree(cgp), bno + i)) break; cgp->cg_frsum[i - numfrags(fs, osize)]--; if (i != frags) cgp->cg_frsum[i - frags]++; for (i = numfrags(fs, osize); i < frags; i++) { clrbit(cg_blksfree(cgp), bno + i); cgp->cg_cs.cs_nffree--; fs->fs_cstotal.cs_nffree--; fs->fs_cs(fs, cg).cs_nffree--; } fs->fs_fmod = 1; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, bprev); bdwrite(bp); return (bprev); } /* * Determine whether a block can be allocated. * * Check to see if a block of the appropriate size is available, * and if it is, allocate it. */ static ufs_daddr_t ffs_alloccg(ip, cg, bpref, size) struct inode *ip; int cg; ufs_daddr_t bpref; int size; { register struct fs *fs; register struct cg *cgp; struct buf *bp; register int i; - int error, bno, frags, allocsiz; + ufs_daddr_t bno, blkno; + int allocsiz, error, frags; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) return (0); error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, NOCRED, &bp); if (error) { brelse(bp); return (0); } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp) || (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { brelse(bp); return (0); } cgp->cg_time = time.tv_sec; if (size == fs->fs_bsize) { - bno = ffs_alloccgblk(fs, cgp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref); bdwrite(bp); return (bno); } /* * check to see if any fragments are already available * allocsiz is the size which will be allocated, hacking * it down to a smaller size if necessary */ frags = numfrags(fs, size); for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) if (cgp->cg_frsum[allocsiz] != 0) break; if (allocsiz == fs->fs_frag) { /* * no fragments were available, so a block will be * allocated, and hacked up */ if (cgp->cg_cs.cs_nbfree == 0) { brelse(bp); return (0); } - bno = ffs_alloccgblk(fs, cgp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref); bpref = dtogd(fs, bno); for (i = frags; i < fs->fs_frag; i++) setbit(cg_blksfree(cgp), bpref + i); i = fs->fs_frag - frags; cgp->cg_cs.cs_nffree += i; fs->fs_cstotal.cs_nffree += i; fs->fs_cs(fs, cg).cs_nffree += i; fs->fs_fmod = 1; cgp->cg_frsum[i]++; bdwrite(bp); return (bno); } bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); if (bno < 0) { brelse(bp); return (0); } for (i = 0; i < frags; i++) clrbit(cg_blksfree(cgp), bno + i); cgp->cg_cs.cs_nffree -= frags; fs->fs_cstotal.cs_nffree -= frags; fs->fs_cs(fs, cg).cs_nffree -= frags; fs->fs_fmod = 1; cgp->cg_frsum[allocsiz]--; if (frags != allocsiz) cgp->cg_frsum[allocsiz - frags]++; + blkno = cg * fs->fs_fpg + bno; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, blkno); bdwrite(bp); - return (cg * fs->fs_fpg + bno); + return ((u_long)blkno); } /* * Allocate a block in a cylinder group. * * This algorithm implements the following policy: * 1) allocate the requested block. * 2) allocate a rotationally optimal block in the same cylinder. * 3) allocate the next available block on the block rotor for the * specified cylinder group. * Note that this routine only allocates fs_bsize blocks; these * blocks may be fragmented by the routine that allocates them. */ static ufs_daddr_t -ffs_alloccgblk(fs, cgp, bpref) - register struct fs *fs; - register struct cg *cgp; +ffs_alloccgblk(ip, bp, bpref) + struct inode *ip; + struct buf *bp; ufs_daddr_t bpref; { + struct fs *fs; + struct cg *cgp; ufs_daddr_t bno, blkno; int cylno, pos, delta; short *cylbp; register int i; + fs = ip->i_fs; + cgp = (struct cg *)bp->b_data; if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { bpref = cgp->cg_rotor; goto norot; } bpref = blknum(fs, bpref); bpref = dtogd(fs, bpref); /* * if the requested block is available, use it */ if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { bno = bpref; goto gotit; } if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { /* * Block layout information is not available. * Leaving bpref unchanged means we take the * next available free block following the one * we just allocated. Hopefully this will at * least hit a track cache on drives of unknown * geometry (e.g. SCSI). */ goto norot; } /* * check for a block available on the same cylinder */ cylno = cbtocylno(fs, bpref); if (cg_blktot(cgp)[cylno] == 0) goto norot; /* * check the summary information to see if a block is * available in the requested cylinder starting at the * requested rotational position and proceeding around. */ cylbp = cg_blks(fs, cgp, cylno); pos = cbtorpos(fs, bpref); for (i = pos; i < fs->fs_nrpos; i++) if (cylbp[i] > 0) break; if (i == fs->fs_nrpos) for (i = 0; i < pos; i++) if (cylbp[i] > 0) break; if (cylbp[i] > 0) { /* * found a rotational position, now find the actual * block. A panic if none is actually there. */ pos = cylno % fs->fs_cpc; bno = (cylno - pos) * fs->fs_spc / NSPB(fs); if (fs_postbl(fs, pos)[i] == -1) { printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); panic("ffs_alloccgblk: cyl groups corrupted"); } for (i = fs_postbl(fs, pos)[i];; ) { if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) { bno = blkstofrags(fs, (bno + i)); goto gotit; } delta = fs_rotbl(fs)[i]; if (delta <= 0 || delta + i > fragstoblks(fs, fs->fs_fpg)) break; i += delta; } printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); panic("ffs_alloccgblk: can't find blk in cyl"); } norot: /* * no blocks in the requested cylinder, so take next * available one in this cylinder group. */ bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); if (bno < 0) return (0); cgp->cg_rotor = bno; gotit: blkno = fragstoblks(fs, bno); ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno); ffs_clusteracct(fs, cgp, blkno, -1); cgp->cg_cs.cs_nbfree--; fs->fs_cstotal.cs_nbfree--; fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; cylno = cbtocylno(fs, bno); cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; cg_blktot(cgp)[cylno]--; fs->fs_fmod = 1; - return (cgp->cg_cgx * fs->fs_fpg + bno); + blkno = cgp->cg_cgx * fs->fs_fpg + bno; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, blkno); + return (blkno); } #ifdef notyet /* * Determine whether a cluster can be allocated. * * We do not currently check for optimal rotational layout if there * are multiple choices in the same cylinder group. Instead we just * take the first one that we find following bpref. */ static ufs_daddr_t ffs_clusteralloc(ip, cg, bpref, len) struct inode *ip; int cg; ufs_daddr_t bpref; int len; { register struct fs *fs; register struct cg *cgp; struct buf *bp; int i, got, run, bno, bit, map; u_char *mapp; int32_t *lp; fs = ip->i_fs; if (fs->fs_maxcluster[cg] < len) return (NULL); if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, NOCRED, &bp)) goto fail; cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) goto fail; /* * Check to see if a cluster of the needed size (or bigger) is * available in this cylinder group. */ lp = &cg_clustersum(cgp)[len]; for (i = len; i <= fs->fs_contigsumsize; i++) if (*lp++ > 0) break; if (i > fs->fs_contigsumsize) { /* * This is the first time looking for a cluster in this * cylinder group. Update the cluster summary information * to reflect the true maximum sized cluster so that * future cluster allocation requests can avoid reading * the cylinder group map only to find no clusters. */ lp = &cg_clustersum(cgp)[len - 1]; for (i = len - 1; i > 0; i--) if (*lp-- > 0) break; fs->fs_maxcluster[cg] = i; goto fail; } /* * Search the cluster map to find a big enough cluster. * We take the first one that we find, even if it is larger * than we need as we prefer to get one close to the previous * block allocation. We do not search before the current * preference point as we do not want to allocate a block * that is allocated before the previous one (as we will * then have to wait for another pass of the elevator * algorithm before it will be read). We prefer to fail and * be recalled to try an allocation in the next cylinder group. */ if (dtog(fs, bpref) != cg) bpref = 0; else bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); mapp = &cg_clustersfree(cgp)[bpref / NBBY]; map = *mapp++; bit = 1 << (bpref % NBBY); for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { if ((map & bit) == 0) { run = 0; } else { run++; if (run == len) break; } if ((got & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } if (got >= cgp->cg_nclusterblks) goto fail; /* * Allocate the cluster that we have found. */ for (i = 1; i <= len; i++) if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i)) panic("ffs_clusteralloc: map mismatch"); bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); if (dtog(fs, bno) != cg) panic("ffs_clusteralloc: allocated out of group"); len = blkstofrags(fs, len); for (i = 0; i < len; i += fs->fs_frag) - if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i) + if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) panic("ffs_clusteralloc: lost block"); bdwrite(bp); return (bno); fail: brelse(bp); return (0); } #endif /* * Determine whether an inode can be allocated. * * Check to see if an inode is available, and if it is, * allocate it using the following policy: * 1) allocate the requested inode. * 2) allocate the next available inode after the requested * inode in the specified cylinder group. */ static ino_t ffs_nodealloccg(ip, cg, ipref, mode) struct inode *ip; int cg; ufs_daddr_t ipref; int mode; { register struct fs *fs; register struct cg *cgp; struct buf *bp; int error, start, len, loc, map, i; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nifree == 0) return (0); error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, NOCRED, &bp); if (error) { brelse(bp); return (0); } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { brelse(bp); return (0); } cgp->cg_time = time.tv_sec; if (ipref) { ipref %= fs->fs_ipg; if (isclr(cg_inosused(cgp), ipref)) goto gotit; } start = cgp->cg_irotor / NBBY; len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); loc = skpc(0xff, len, &cg_inosused(cgp)[start]); if (loc == 0) { len = start + 1; start = 0; loc = skpc(0xff, len, &cg_inosused(cgp)[0]); if (loc == 0) { printf("cg = %d, irotor = %ld, fs = %s\n", cg, cgp->cg_irotor, fs->fs_fsmnt); panic("ffs_nodealloccg: map corrupted"); /* NOTREACHED */ } } i = start + len - loc; map = cg_inosused(cgp)[i]; ipref = i * NBBY; for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { if ((map & i) == 0) { cgp->cg_irotor = ipref; goto gotit; } } printf("fs = %s\n", fs->fs_fsmnt); panic("ffs_nodealloccg: block not in map"); /* NOTREACHED */ gotit: + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); setbit(cg_inosused(cgp), ipref); cgp->cg_cs.cs_nifree--; fs->fs_cstotal.cs_nifree--; fs->fs_cs(fs, cg).cs_nifree--; fs->fs_fmod = 1; if ((mode & IFMT) == IFDIR) { cgp->cg_cs.cs_ndir++; fs->fs_cstotal.cs_ndir++; fs->fs_cs(fs, cg).cs_ndir++; } bdwrite(bp); return (cg * fs->fs_ipg + ipref); } /* * Free a block or fragment. * * The specified block or fragment is placed back in the * free map. If a fragment is deallocated, a possible * block reassembly is checked. */ void ffs_blkfree(ip, bno, size) register struct inode *ip; ufs_daddr_t bno; long size; { register struct fs *fs; register struct cg *cgp; struct buf *bp; ufs_daddr_t blkno; int i, error, cg, blk, frags, bbase; fs = ip->i_fs; - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { - printf("dev = 0x%lx, bsize = %ld, size = %ld, fs = %s\n", - (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || + fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { + printf("dev=0x%lx, bno = %d, bsize = %d, size = %ld, fs = %s\n", + (u_long)ip->i_dev, bno, fs->fs_bsize, size, fs->fs_fsmnt); panic("ffs_blkfree: bad size"); } cg = dtog(fs, bno); if ((u_int)bno >= fs->fs_size) { printf("bad block %ld, ino %ld\n", bno, ip->i_number); ffs_fserr(fs, ip->i_uid, "bad block"); return; } error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, NOCRED, &bp); if (error) { brelse(bp); return; } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) { brelse(bp); return; } cgp->cg_time = time.tv_sec; bno = dtogd(fs, bno); if (size == fs->fs_bsize) { blkno = fragstoblks(fs, bno); - if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { + if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) { printf("dev = 0x%lx, block = %ld, fs = %s\n", (u_long) ip->i_dev, bno, fs->fs_fsmnt); panic("ffs_blkfree: freeing free block"); } ffs_setblock(fs, cg_blksfree(cgp), blkno); ffs_clusteracct(fs, cgp, blkno, 1); cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; i = cbtocylno(fs, bno); cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; cg_blktot(cgp)[i]++; } else { bbase = bno - fragnum(fs, bno); /* * decrement the counts associated with the old frags */ blk = blkmap(fs, cg_blksfree(cgp), bbase); ffs_fragacct(fs, blk, cgp->cg_frsum, -1); /* * deallocate the fragment */ frags = numfrags(fs, size); for (i = 0; i < frags; i++) { if (isset(cg_blksfree(cgp), bno + i)) { printf("dev = 0x%lx, block = %ld, fs = %s\n", (u_long) ip->i_dev, bno + i, fs->fs_fsmnt); panic("ffs_blkfree: freeing free frag"); } setbit(cg_blksfree(cgp), bno + i); } cgp->cg_cs.cs_nffree += i; fs->fs_cstotal.cs_nffree += i; fs->fs_cs(fs, cg).cs_nffree += i; /* * add back in counts associated with the new frags */ blk = blkmap(fs, cg_blksfree(cgp), bbase); ffs_fragacct(fs, blk, cgp->cg_frsum, 1); /* * if a complete block has been reassembled, account for it */ blkno = fragstoblks(fs, bbase); if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { cgp->cg_cs.cs_nffree -= fs->fs_frag; fs->fs_cstotal.cs_nffree -= fs->fs_frag; fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; ffs_clusteracct(fs, cgp, blkno, 1); cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; i = cbtocylno(fs, bbase); cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; cg_blktot(cgp)[i]++; } } fs->fs_fmod = 1; bdwrite(bp); } #ifdef DIAGNOSTIC /* * Verify allocation of a block or fragment. Returns true if block or * fragment is allocated, false if it is free. */ static int ffs_checkblk(ip, bno, size) struct inode *ip; ufs_daddr_t bno; long size; { struct fs *fs; struct cg *cgp; struct buf *bp; int i, error, frags, free; fs = ip->i_fs; if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { printf("bsize = %d, size = %d, fs = %s\n", fs->fs_bsize, size, fs->fs_fsmnt); panic("ffs_checkblk: bad size"); } if ((u_int)bno >= fs->fs_size) panic("ffs_checkblk: bad block %d", bno); error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), (int)fs->fs_cgsize, NOCRED, &bp); if (error) panic("ffs_checkblk: cg bread failed"); cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) panic("ffs_checkblk: cg magic mismatch"); bno = dtogd(fs, bno); if (size == fs->fs_bsize) { free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); } else { frags = numfrags(fs, size); for (free = 0, i = 0; i < frags; i++) if (isset(cg_blksfree(cgp), bno + i)) free++; if (free != 0 && free != frags) panic("ffs_checkblk: partially free fragment"); } brelse(bp); return (!free); } #endif /* DIAGNOSTIC */ /* * Free an inode. - * - * The specified inode is placed back in the free map. */ int -ffs_vfree(pvp, ino, mode) +ffs_vfree( pvp, ino, mode) struct vnode *pvp; ino_t ino; int mode; { + if (DOINGSOFTDEP(pvp)) { + softdep_freefile(pvp, ino, mode); + return (0); + } + return (ffs_freefile(pvp, ino, mode)); +} + +/* + * Do the actual free operation. + * The specified inode is placed back in the free map. + */ + int + ffs_freefile( pvp, ino, mode) + struct vnode *pvp; + ino_t ino; + int mode; +{ register struct fs *fs; register struct cg *cgp; register struct inode *pip; struct buf *bp; int error, cg; pip = VTOI(pvp); fs = pip->i_fs; if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) panic("ffs_vfree: range: dev = 0x%x, ino = %d, fs = %s", pip->i_dev, ino, fs->fs_fsmnt); cg = ino_to_cg(fs, ino); error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, NOCRED, &bp); if (error) { brelse(bp); - return (0); + return (error); } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) { brelse(bp); return (0); } cgp->cg_time = time.tv_sec; ino %= fs->fs_ipg; if (isclr(cg_inosused(cgp), ino)) { printf("dev = 0x%lx, ino = %ld, fs = %s\n", (u_long)pip->i_dev, ino, fs->fs_fsmnt); if (fs->fs_ronly == 0) panic("ffs_vfree: freeing free inode"); } clrbit(cg_inosused(cgp), ino); if (ino < cgp->cg_irotor) cgp->cg_irotor = ino; cgp->cg_cs.cs_nifree++; fs->fs_cstotal.cs_nifree++; fs->fs_cs(fs, cg).cs_nifree++; if ((mode & IFMT) == IFDIR) { cgp->cg_cs.cs_ndir--; fs->fs_cstotal.cs_ndir--; fs->fs_cs(fs, cg).cs_ndir--; } fs->fs_fmod = 1; bdwrite(bp); return (0); } /* * Find a block of the specified size in the specified cylinder group. * * It is a panic if a request is made to find a block if none are * available. */ static ufs_daddr_t ffs_mapsearch(fs, cgp, bpref, allocsiz) register struct fs *fs; register struct cg *cgp; ufs_daddr_t bpref; int allocsiz; { ufs_daddr_t bno; int start, len, loc, i; int blk, field, subfield, pos; /* * find the fragment by searching through the free block * map for an appropriate bit pattern */ if (bpref) start = dtogd(fs, bpref) / NBBY; else start = cgp->cg_frotor / NBBY; len = howmany(fs->fs_fpg, NBBY) - start; loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start], (u_char *)fragtbl[fs->fs_frag], (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); if (loc == 0) { len = start + 1; start = 0; loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0], (u_char *)fragtbl[fs->fs_frag], (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); if (loc == 0) { printf("start = %d, len = %d, fs = %s\n", start, len, fs->fs_fsmnt); panic("ffs_alloccg: map corrupted"); /* NOTREACHED */ } } bno = (start + len - loc) * NBBY; cgp->cg_frotor = bno; /* * found the byte in the map * sift through the bits to find the selected frag */ for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { blk = blkmap(fs, cg_blksfree(cgp), bno); blk <<= 1; field = around[allocsiz]; subfield = inside[allocsiz]; for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { if ((blk & field) == subfield) return (bno + pos); field <<= 1; subfield <<= 1; } } printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); panic("ffs_alloccg: block not in map"); return (-1); } /* * Update the cluster map because of an allocation or free. * * Cnt == 1 means free; cnt == -1 means allocating. */ static void ffs_clusteracct(fs, cgp, blkno, cnt) struct fs *fs; struct cg *cgp; ufs_daddr_t blkno; int cnt; { int32_t *sump; int32_t *lp; u_char *freemapp, *mapp; int i, start, end, forw, back, map, bit; if (fs->fs_contigsumsize <= 0) return; freemapp = cg_clustersfree(cgp); sump = cg_clustersum(cgp); /* * Allocate or clear the actual block. */ if (cnt > 0) setbit(freemapp, blkno); else clrbit(freemapp, blkno); /* * Find the size of the cluster going forward. */ start = blkno + 1; end = start + fs->fs_contigsumsize; if (end >= cgp->cg_nclusterblks) end = cgp->cg_nclusterblks; mapp = &freemapp[start / NBBY]; map = *mapp++; bit = 1 << (start % NBBY); for (i = start; i < end; i++) { if ((map & bit) == 0) break; if ((i & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } forw = i - start; /* * Find the size of the cluster going backward. */ start = blkno - 1; end = start - fs->fs_contigsumsize; if (end < 0) end = -1; mapp = &freemapp[start / NBBY]; map = *mapp--; bit = 1 << (start % NBBY); for (i = start; i > end; i--) { if ((map & bit) == 0) break; if ((i & (NBBY - 1)) != 0) { bit >>= 1; } else { map = *mapp--; bit = 1 << (NBBY - 1); } } back = start - i; /* * Account for old cluster and the possibly new forward and * back clusters. */ i = back + forw + 1; if (i > fs->fs_contigsumsize) i = fs->fs_contigsumsize; sump[i] += cnt; if (back > 0) sump[back] -= cnt; if (forw > 0) sump[forw] -= cnt; /* * Update cluster summary information. */ lp = &sump[fs->fs_contigsumsize]; for (i = fs->fs_contigsumsize; i > 0; i--) if (*lp-- > 0) break; fs->fs_maxcluster[cgp->cg_cgx] = i; } /* * Fserr prints the name of a file system with an error diagnostic. * * The form of the error message is: * fs: error message */ static void ffs_fserr(fs, uid, cp) struct fs *fs; u_int uid; char *cp; { struct proc *p = curproc; /* XXX */ log(LOG_ERR, "pid %d (%s), uid %d on %s: %s\n", p ? p->p_pid : -1, p ? p->p_comm : "-", uid, fs->fs_fsmnt, cp); } Index: head/sys/ufs/ffs/ffs_balloc.c =================================================================== --- head/sys/ufs/ffs/ffs_balloc.c (revision 34265) +++ head/sys/ufs/ffs/ffs_balloc.c (revision 34266) @@ -1,312 +1,354 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 - * $Id: ffs_balloc.c,v 1.18 1998/02/04 22:33:31 eivind Exp $ + * $Id: ffs_balloc.c,v 1.19 1998/02/06 12:14:14 eivind Exp $ */ #include #include #include #include +#include #include #include #include #include #include #include /* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int -ffs_balloc(ip, lbn, size, cred, bpp, flags) +ffs_balloc(ap) + struct vop_balloc_args /* { + struct inode *a_ip; + ufs_daddr_t a_lbn; + int a_size; + struct ucred *a_cred; + int a_flags; + struct buf *a_bpp; + } */ *ap; +{ register struct inode *ip; register ufs_daddr_t lbn; int size; struct ucred *cred; - struct buf **bpp; int flags; -{ - register struct fs *fs; - register ufs_daddr_t nb; + struct fs *fs; + ufs_daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; - *bpp = NULL; + vp = ap->a_vp; + ip = VTOI(vp); + fs = ip->i_fs; + lbn = lblkno(fs, ap->a_startoffset); + size = blkoff(fs, ap->a_startoffset) + ap->a_size; + if (size > fs->fs_bsize) + panic("ffs_balloc: blk too big"); + *ap->a_bpp = NULL; if (lbn < 0) return (EFBIG); - fs = ip->i_fs; + cred = ap->a_cred; + flags = ap->a_flags; /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, nb, + dbtofsb(fs, bp->b_blkno), ip->i_db[nb], + fs->fs_bsize, osize, bp); ip->i_size = smalllblktosize(fs, nb + 1); ip->i_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & B_SYNC) bwrite(bp); else bawrite(bp); } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); - *bpp = bp; + *ap->a_bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lbn, osize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); } else { error = ffs_realloccg(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, lbn, + dbtofsb(fs, bp->b_blkno), nb, + nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); bp = getblk(vp, lbn, nsize, 0, 0); bp->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) vfs_bio_clrbuf(bp); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, lbn, newb, 0, + nsize, 0, bp); } ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bpp = bp; + *ap->a_bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if (error = ufs_getlbns(vp, lbn, indirs, &num)) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ffs_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) return (error); nb = newb; *allocblk++ = nb; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if (error = bwrite(bp)) - goto fail; + if (DOINGSOFTDEP(vp)) { + softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, + newb, 0, fs->fs_bsize, 0, bp); + bdwrite(bp); + } else { + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(bp)) + goto fail; + } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if (error = bwrite(nbp)) { - brelse(bp); - goto fail; + if (DOINGSOFTDEP(vp)) { + softdep_setup_allocindir_meta(nbp, ip, bp, + indirs[i - 1].in_off, nb); + bdwrite(nbp); + } else { + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(nbp)) { + brelse(bp); + goto fail; + } } bap[indirs[i - 1].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) vfs_bio_clrbuf(nbp); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocindir_page(ip, lbn, bp, + indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } - *bpp = nbp; + *ap->a_bpp = nbp; return (0); } brelse(bp); if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); if (error) { brelse(nbp); goto fail; } } else { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } - *bpp = nbp; + *ap->a_bpp = nbp; return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) *allocib = 0; if (deallocated) { #ifdef QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE); #endif ip->i_blocks -= btodb(deallocated); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); } Index: head/sys/ufs/ffs/ffs_extern.h =================================================================== --- head/sys/ufs/ffs/ffs_extern.h (revision 34265) +++ head/sys/ufs/ffs/ffs_extern.h (revision 34266) @@ -1,105 +1,133 @@ /*- * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_extern.h 8.6 (Berkeley) 3/30/95 - * $Id: ffs_extern.h,v 1.21 1997/11/22 08:35:45 bde Exp $ + * $Id: ffs_extern.h,v 1.22 1998/02/03 21:52:00 bde Exp $ */ #ifndef _UFS_FFS_EXTERN_H #define _UFS_FFS_EXTERN_H /* * Sysctl values for the fast filesystem. */ #define FFS_REALLOCBLKS 3 /* block reallocation enabled */ #define FFS_ASYNCFREE 4 /* asynchronous block freeing enabled */ #define FFS_MAXID 5 /* number of valid ffs ids */ #define FFS_NAMES { \ { 0, 0 }, \ { 0, 0 }, \ { 0, 0 }, \ { "doreallocblks", CTLTYPE_INT }, \ { "doasyncfree", CTLTYPE_INT }, \ } struct buf; struct fid; struct fs; struct inode; struct malloc_type; struct mount; struct proc; struct sockaddr; struct statfs; struct ucred; struct vnode; struct vop_bmap_args; struct vop_reallocblks_args; int ffs_alloc __P((struct inode *, ufs_daddr_t, ufs_daddr_t, int, struct ucred *, ufs_daddr_t *)); -int ffs_balloc __P((struct inode *, - ufs_daddr_t, int, struct ucred *, struct buf **, int)); +int ffs_balloc __P((struct vop_balloc_args *)); int ffs_blkatoff __P((struct vnode *, off_t, char **, struct buf **)); void ffs_blkfree __P((struct inode *, ufs_daddr_t, long)); ufs_daddr_t ffs_blkpref __P((struct inode *, ufs_daddr_t, int, ufs_daddr_t *)); int ffs_bmap __P((struct vop_bmap_args *)); void ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t)); int ffs_fhtovp __P((struct mount *, struct fid *, struct sockaddr *, struct vnode **, int *, struct ucred **)); int ffs_flushfiles __P((struct mount *, int, struct proc *)); void ffs_fragacct __P((struct fs *, int, int32_t [], int)); +int ffs_freefile __P(( struct vnode *, ino_t, int )); int ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t)); +int ffs_isfreeblock __P((struct fs *, unsigned char *, ufs_daddr_t)); int ffs_mountfs __P((struct vnode *, struct mount *, struct proc *, struct malloc_type *)); int ffs_mountroot __P((void)); int ffs_reallocblks __P((struct vop_reallocblks_args *)); int ffs_realloccg __P((struct inode *, ufs_daddr_t, ufs_daddr_t, int, int, struct ucred *, struct buf **)); void ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t)); int ffs_statfs __P((struct mount *, struct statfs *, struct proc *)); int ffs_sync __P((struct mount *, int, struct ucred *, struct proc *)); int ffs_truncate __P((struct vnode *, off_t, int, struct ucred *, struct proc *)); int ffs_unmount __P((struct mount *, int, struct proc *)); int ffs_update __P((struct vnode *, struct timeval *, struct timeval *, int)); int ffs_valloc __P((struct vnode *, int, struct ucred *, struct vnode **)); int ffs_vfree __P((struct vnode *, ino_t, int)); int ffs_vget __P((struct mount *, ino_t, struct vnode **)); int ffs_vptofh __P((struct vnode *, struct fid *)); extern vop_t **ffs_vnodeop_p; extern vop_t **ffs_specop_p; extern vop_t **ffs_fifoop_p; +/* + * Soft update function prototypes. + */ +void softdep_initialize __P((void)); +int softdep_process_worklist __P((struct mount *)); +int softdep_mount __P((struct vnode *, struct mount *, struct fs *, + struct ucred *)); +int softdep_flushfiles __P((struct mount *, int, struct proc *)); +void softdep_update_inodeblock __P((struct inode *, struct buf *, int)); +void softdep_load_inodeblock __P((struct inode *)); +int softdep_fsync __P((struct vnode *)); +void softdep_freefile __P((struct vnode *, ino_t, int)); +void softdep_setup_freeblocks __P((struct inode *, off_t)); +void softdep_deallocate_dependencies __P((struct buf *)); +void softdep_setup_inomapdep __P((struct buf *, struct inode *, ino_t)); +void softdep_setup_blkmapdep __P((struct buf *, struct fs *, ufs_daddr_t)); +void softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, ufs_daddr_t, + ufs_daddr_t, long, long, struct buf *)); +void softdep_setup_allocindir_meta __P((struct buf *, struct inode *, + struct buf *, int, ufs_daddr_t)); +void softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t, + struct buf *, int, ufs_daddr_t, ufs_daddr_t, struct buf *)); +void softdep_disk_io_initiation __P((struct buf *)); +void softdep_disk_write_complete __P((struct buf *)); +int softdep_sync_metadata __P((struct vop_fsync_args *)); + #endif /* !_UFS_FFS_EXTERN_H */ + Index: head/sys/ufs/ffs/ffs_inode.c =================================================================== --- head/sys/ufs/ffs/ffs_inode.c (revision 34265) +++ head/sys/ufs/ffs/ffs_inode.c (revision 34266) @@ -1,520 +1,558 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 - * $Id: ffs_inode.c,v 1.34 1998/02/06 12:14:14 eivind Exp $ + * $Id: ffs_inode.c,v 1.35 1998/03/07 21:36:33 dyson Exp $ */ #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ffs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t, ufs_daddr_t, int, long *)); /* * Update the access, modified, and inode change times as specified by the * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED * flag is used to specify that the inode needs to be updated even if none * of the times needs to be updated. The access and modified times are taken * from the second and third parameters; the inode change time is always * taken from the current time. If waitfor is set, then wait for the disk * write of the inode to complete. */ int ffs_update(vp, access, modify, waitfor) struct vnode *vp; struct timeval *access; struct timeval *modify; int waitfor; { register struct fs *fs; struct buf *bp; struct inode *ip; int error; time_t tv_sec; ip = VTOI(vp); if (vp->v_mount->mnt_flag & MNT_RDONLY) { ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); return (0); } - if ((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) + if (((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && + (waitfor != MNT_WAIT)) return (0); /* * Use a copy of the current time to get consistent timestamps * (a_access and a_modify are sometimes aliases for &time). * * XXX in 2.0, a_access and a_modify are often pointers to the * same copy of `time'. This is not as good. Some callers forget * to make a copy; others make a copy too early (before the i/o * has completed)... * * XXX there should be a function or macro for reading the time * (e.g., some machines may require splclock()). */ tv_sec = time.tv_sec; if (ip->i_flag & IN_ACCESS) ip->i_atime = (access == &time ? tv_sec : access->tv_sec); if (ip->i_flag & IN_UPDATE) { ip->i_mtime = (modify == &time ? tv_sec : modify->tv_sec); ip->i_modrev++; } if (ip->i_flag & IN_CHANGE) ip->i_ctime = tv_sec; ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); fs = ip->i_fs; /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ ip->i_din.di_ouid = ip->i_uid; /* XXX */ ip->i_din.di_ogid = ip->i_gid; /* XXX */ } /* XXX */ error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } + if (DOINGSOFTDEP(vp)) + softdep_update_inodeblock(ip, bp, waitfor); + else if (ip->i_effnlink != ip->i_nlink) + panic("ffs_update: bad link cnt"); *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = ip->i_din; - if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) + if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) { return (bwrite(bp)); - else { + } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); return (0); } } #define SINGLE 0 /* index of single indirect block */ #define DOUBLE 1 /* index of double indirect block */ #define TRIPLE 2 /* index of triple indirect block */ /* * Truncate the inode oip to at most length size, freeing the * disk blocks. */ int ffs_truncate(vp, length, flags, cred, p) struct vnode *vp; off_t length; int flags; struct ucred *cred; struct proc *p; { register struct vnode *ovp = vp; ufs_daddr_t lastblock; register struct inode *oip; ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; register struct fs *fs; struct buf *bp; int offset, size, level; long count, nblocks, vflags, blocksreleased = 0; struct timeval tv; register int i; int aflags, error, allerror; off_t osize; oip = VTOI(ovp); + if (oip->i_size == length) + return (0); fs = oip->i_fs; if (length < 0) return (EINVAL); if (length > fs->fs_maxfilesize) return (EFBIG); gettime(&tv); if (ovp->v_type == VLNK && (oip->i_size < ovp->v_mount->mnt_maxsymlinklen || oip->i_din.di_blocks == 0)) { #ifdef DIAGNOSTIC if (length != 0) panic("ffs_truncate: partial truncate of symlink"); #endif bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); oip->i_size = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(ovp, &tv, &tv, 1)); } if (oip->i_size == length) { oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(ovp, &tv, &tv, 0)); } #ifdef QUOTA error = getinoquota(oip); if (error) return (error); #endif + ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0; + if (DOINGSOFTDEP(ovp)) { + if (length > 0) { + /* + * If a file is only partially truncated, then + * we have to clean up the data structures + * describing the allocation past the truncation + * point. Finding and deallocating those structures + * is a lot of work. Since partial truncation occurs + * rarely, we solve the problem by syncing the file + * so that it will have no data structures left. + */ + if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, + p)) != 0) + return (error); + } else { +#ifdef QUOTA + (void) chkdq(oip, -oip->i_blocks, NOCRED, 0); +#endif + softdep_setup_freeblocks(oip, length); + (void) vinvalbuf(ovp, 0, cred, p, 0, 0); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (ffs_update(ovp, &tv, &tv, 0)); + } + } osize = oip->i_size; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { vnode_pager_setsize(ovp, length); +#if 0 offset = blkoff(fs, length - 1); lbn = lblkno(fs, length - 1); +#endif aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; - error = ffs_balloc(oip, lbn, offset + 1, cred, - &bp, aflags); + error = VOP_BALLOC(ovp, length - 1, 1, + cred, aflags, &bp); if (error) return (error); oip->i_size = length; if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; if (aflags & B_SYNC) bwrite(bp); else if (ovp->v_mount->mnt_flag & MNT_ASYNC) bdwrite(bp); else bawrite(bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(ovp, &tv, &tv, 1)); } /* * Shorten the size of the file. If the file is not being * truncated to a block boundry, the contents of the * partial block following the end of the file must be * zero'ed in case it ever become accessable again because * of subsequent file growth. */ offset = blkoff(fs, length); if (offset == 0) { oip->i_size = length; } else { lbn = lblkno(fs, length); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; - error = ffs_balloc(oip, lbn, offset, cred, &bp, aflags); - if (error) + error = VOP_BALLOC(ovp, length - 1, 1, cred, aflags, &bp); + if (error) { +#if 0 /* kirk's version had this */ + vnode_pager_setsize(ovp, (u_long)osize); +#endif return (error); + } oip->i_size = length; size = blksize(fs, oip, lbn); bzero((char *)bp->b_data + offset, (u_int)(size - offset)); allocbuf(bp, size); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; if (aflags & B_SYNC) bwrite(bp); else if (ovp->v_mount->mnt_flag & MNT_ASYNC) bdwrite(bp); else bawrite(bp); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks); for (level = TRIPLE; level >= SINGLE; level--) if (lastiblock[level] < 0) { oip->i_ib[level] = 0; lastiblock[level] = -1; } for (i = NDADDR - 1; i > lastblock; i--) oip->i_db[i] = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; error = UFS_UPDATE(ovp, &tv, &tv, ((length > 0) ? 0 : 1)); if (error) allerror = error; /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); oip->i_size = osize; vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; allerror = vinvalbuf(ovp, vflags, cred, p, 0, 0); vnode_pager_setsize(ovp, length); /* * Indirect blocks first. */ indir_lbn[SINGLE] = -NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = oip->i_ib[level]; if (bn != 0) { error = ffs_indirtrunc(oip, indir_lbn[level], fsbtodb(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { oip->i_ib[level] = 0; ffs_blkfree(oip, bn, fs->fs_bsize); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = NDADDR - 1; i > lastblock; i--) { register long bsize; bn = oip->i_db[i]; if (bn == 0) continue; oip->i_db[i] = 0; bsize = blksize(fs, oip, i); ffs_blkfree(oip, bn, bsize); blocksreleased += btodb(bsize); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = oip->i_db[lastblock]; if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = blksize(fs, oip, lastblock); oip->i_size = length; newspace = blksize(fs, oip, lastblock); if (newspace == 0) panic("ffs_truncate: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += numfrags(fs, newspace); ffs_blkfree(oip, bn, oldspace - newspace); blocksreleased += btodb(oldspace - newspace); } } done: #ifdef DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if (newblks[NDADDR + level] != oip->i_ib[level]) panic("ffs_truncate1"); for (i = 0; i < NDADDR; i++) if (newblks[i] != oip->i_db[i]) panic("ffs_truncate2"); if (length == 0 && (ovp->v_dirtyblkhd.lh_first || ovp->v_cleanblkhd.lh_first)) panic("ffs_truncate3"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ oip->i_size = length; oip->i_blocks -= blocksreleased; if (oip->i_blocks < 0) /* sanity */ oip->i_blocks = 0; oip->i_flag |= IN_CHANGE; vnode_pager_setsize(ovp, length); #ifdef QUOTA (void) chkdq(oip, -blocksreleased, NOCRED, 0); #endif return (allerror); } /* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) register struct inode *ip; ufs_daddr_t lbn, lastbn; ufs_daddr_t dbn; int level; long *countp; { register int i; struct buf *bp; register struct fs *fs = ip->i_fs; register ufs_daddr_t *bap; struct vnode *vp; ufs_daddr_t *copy = NULL, nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { curproc->p_stats->p_ru.ru_inblock++; /* pay for read */ bp->b_flags |= B_READ; if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); bp->b_blkno = dbn; vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); error = biowait(bp); } if (error) { brelse(bp); *countp = 0; return (error); } bap = (ufs_daddr_t *)bp->b_data; if (lastbn != -1) { MALLOC(copy, ufs_daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); bzero((caddr_t)&bap[last + 1], (u_int)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t)); if ((vp->v_mount->mnt_flag & MNT_ASYNC) == 0) { error = bwrite(bp); if (error) allerror = error; } else { bawrite(bp); } bap = copy; } /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (ufs_daddr_t)-1, level - 1, &blkcount)) allerror = error; blocksreleased += blkcount; } ffs_blkfree(ip, nb, fs->fs_bsize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { FREE(copy, M_TEMP); } else { bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); } *countp = blocksreleased; return (allerror); } Index: head/sys/ufs/ffs/ffs_subr.c =================================================================== --- head/sys/ufs/ffs/ffs_subr.c (revision 34265) +++ head/sys/ufs/ffs/ffs_subr.c (revision 34266) @@ -1,248 +1,272 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_subr.c 8.5 (Berkeley) 3/21/95 - * $Id: ffs_subr.c,v 1.18 1998/02/06 12:14:14 eivind Exp $ + * $Id: ffs_subr.c,v 1.19 1998/02/13 00:20:36 bde Exp $ */ #include #include #ifndef KERNEL #include #else #include "opt_ddb.h" #include #include #include #include #include #include #include #ifdef DDB static void ffs_checkoverlap __P((struct buf *, struct inode *)); #endif /* * Return buffer with the contents of block "offset" from the beginning of * directory "ip". If "res" is non-zero, fill it in with a pointer to the * remaining space in the directory. */ int ffs_blkatoff(vp, offset, res, bpp) struct vnode *vp; off_t offset; char **res; struct buf **bpp; { struct inode *ip; register struct fs *fs; struct buf *bp; ufs_daddr_t lbn; int bsize, error; ip = VTOI(vp); fs = ip->i_fs; lbn = lblkno(fs, offset); bsize = blksize(fs, ip, lbn); *bpp = NULL; error = bread(vp, lbn, bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } if (res) *res = (char *)bp->b_data + blkoff(fs, offset); *bpp = bp; return (0); } #endif /* * Update the frsum fields to reflect addition or deletion * of some frags. */ void ffs_fragacct(fs, fragmap, fraglist, cnt) struct fs *fs; int fragmap; int32_t fraglist[]; int cnt; { int inblk; register int field, subfield; register int siz, pos; inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; fragmap <<= 1; for (siz = 1; siz < fs->fs_frag; siz++) { if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0) continue; field = around[siz]; subfield = inside[siz]; for (pos = siz; pos <= fs->fs_frag; pos++) { if ((fragmap & field) == subfield) { fraglist[siz] += cnt; pos += siz; field <<= siz; subfield <<= siz; } field <<= 1; subfield <<= 1; } } } #ifdef DDB static void ffs_checkoverlap(bp, ip) struct buf *bp; struct inode *ip; { register struct buf *ebp, *ep; register ufs_daddr_t start, last; struct vnode *vp; ebp = &buf[nbuf]; start = bp->b_blkno; last = start + btodb(bp->b_bcount) - 1; for (ep = buf; ep < ebp; ep++) { if (ep == bp || (ep->b_flags & B_INVAL) || ep->b_vp == NULLVP) continue; if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0, NULL, NULL)) continue; if (vp != ip->i_devvp) continue; /* look for overlap */ if (ep->b_bcount == 0 || ep->b_blkno > last || ep->b_blkno + btodb(ep->b_bcount) <= start) continue; vprint("Disk overlap", vp); (void)printf("\tstart %lu, end %lu overlap start %lu, end %lu\n", (u_long)start, (u_long)last, (u_long)ep->b_blkno, (u_long)(ep->b_blkno + btodb(ep->b_bcount) - 1)); panic("ffs_checkoverlap: Disk buffer overlap"); } } #endif /* DDB */ /* * block operations * * check if a block is available */ int ffs_isblock(fs, cp, h) struct fs *fs; unsigned char *cp; ufs_daddr_t h; { unsigned char mask; switch ((int)fs->fs_frag) { case 8: return (cp[h] == 0xff); case 4: mask = 0x0f << ((h & 0x1) << 2); return ((cp[h >> 1] & mask) == mask); case 2: mask = 0x03 << ((h & 0x3) << 1); return ((cp[h >> 2] & mask) == mask); case 1: mask = 0x01 << (h & 0x7); return ((cp[h >> 3] & mask) == mask); default: panic("ffs_isblock"); + } +} + +/* + * check if a block is free + */ +int +ffs_isfreeblock(fs, cp, h) + struct fs *fs; + unsigned char *cp; + ufs_daddr_t h; +{ + + switch ((int)fs->fs_frag) { + case 8: + return (cp[h] == 0); + case 4: + return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); + case 2: + return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); + case 1: + return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); + default: + panic("ffs_isfreeblock"); } } /* * take a block out of the map */ void ffs_clrblock(fs, cp, h) struct fs *fs; u_char *cp; ufs_daddr_t h; { switch ((int)fs->fs_frag) { case 8: cp[h] = 0; return; case 4: cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] &= ~(0x01 << (h & 0x7)); return; default: panic("ffs_clrblock"); } } /* * put a block into the map */ void ffs_setblock(fs, cp, h) struct fs *fs; unsigned char *cp; ufs_daddr_t h; { switch ((int)fs->fs_frag) { case 8: cp[h] = 0xff; return; case 4: cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] |= (0x01 << (h & 0x7)); return; default: panic("ffs_setblock"); } } Index: head/sys/ufs/ffs/ffs_vfsops.c =================================================================== --- head/sys/ufs/ffs/ffs_vfsops.c (revision 34265) +++ head/sys/ufs/ffs/ffs_vfsops.c (revision 34266) @@ -1,1223 +1,1264 @@ /* * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 - * $Id: ffs_vfsops.c,v 1.74 1998/03/07 14:59:44 bde Exp $ + * $Id: ffs_vfsops.c,v 1.75 1998/03/07 21:36:36 dyson Exp $ */ #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part"); static int ffs_sbupdate __P((struct ufsmount *, int)); static int ffs_reload __P((struct mount *,struct ucred *,struct proc *)); static int ffs_oldfscompat __P((struct fs *)); static int ffs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int ffs_init __P((struct vfsconf *)); static struct vfsops ufs_vfsops = { ffs_mount, ufs_start, ffs_unmount, ufs_root, ufs_quotactl, ffs_statfs, ffs_sync, ffs_vget, vfs_vrele, ffs_fhtovp, ffs_vptofh, ffs_init, }; VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0); /* * ffs_mount * * Called when mounting local physical media * * PARAMETERS: * mountroot * mp mount point structure * path NULL (flag for root mount!!!) * data * ndp * p process (user credentials check [statfs]) * * mount * mp mount point structure * path path to mount point * data pointer to argument struct in user space * ndp mount point namei() return (used for * credentials on reload), reused to look * up block device. * p process (user credentials check) * * RETURNS: 0 Success * !0 error number (errno.h) * * LOCK STATE: * * ENTRY * mount point is locked * EXIT * mount point is locked * * NOTES: * A NULL path can be used for a flag since the mount * system call will fail with EFAULT in copyinstr in * namei() if it is a genuine NULL from the user. */ static int ffs_mount( mp, path, data, ndp, p) struct mount *mp; /* mount struct pointer*/ char *path; /* path to mount point*/ caddr_t data; /* arguments to FS specific mount*/ struct nameidata *ndp; /* mount point credentials*/ struct proc *p; /* process requesting mount*/ { u_int size; int err = 0; struct vnode *devvp; struct ufs_args args; struct ufsmount *ump = 0; register struct fs *fs; int error, flags; mode_t accessmode; /* * Use NULL path to flag a root mount */ if( path == NULL) { /* *** * Mounting root file system *** */ if ((err = bdevvp(rootdev, &rootvp))) { printf("ffs_mountroot: can't find rootvp"); return (err); } if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR) mp->mnt_flag |= MNT_NOCLUSTERR; if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERW) mp->mnt_flag |= MNT_NOCLUSTERW; if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) { /* fs specific cleanup (if any)*/ goto error_1; } goto dostatfs; /* success*/ } /* *** * Mounting non-root file system or updating a file system *** */ /* copy in user arguments*/ err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)); if (err) goto error_1; /* can't get arguments*/ /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags, * if block device requests. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; err = 0; if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERR) mp->mnt_flag |= MNT_NOCLUSTERR; if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERW) mp->mnt_flag |= MNT_NOCLUSTERW; if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; - err = ffs_flushfiles(mp, flags, p); + if (mp->mnt_flag & MNT_SOFTDEP) { + err = softdep_flushfiles(mp, flags, p); + } else { + err = ffs_flushfiles(mp, flags, p); + } } if (!err && (mp->mnt_flag & MNT_RELOAD)) err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p); if (err) { goto error_1; } if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { if (!fs->fs_clean) { if (mp->mnt_flag & MNT_FORCE) { printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt); } else { printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n", fs->fs_fsmnt); err = EPERM; goto error_1; } } /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ if (p->p_ucred->cr_uid != 0) { devvp = ump->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); if (error = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p)) { VOP_UNLOCK(devvp, 0, p); return (error); } VOP_UNLOCK(devvp, 0, p); } fs->fs_ronly = 0; } if (fs->fs_ronly == 0) { fs->fs_clean = 0; ffs_sbupdate(ump, MNT_WAIT); } /* if not updating name...*/ if (args.fspec == 0) { /* * Process export requests. Jumping to "success" * will return the vfs_export() error code. */ err = vfs_export(mp, &ump->um_export, &args.export); goto success; } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); err = namei(ndp); if (err) { /* can't get devvp!*/ goto error_1; } devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { err = ENOTBLK; goto error_2; } if (major(devvp->v_rdev) >= nblkdev) { err = ENXIO; goto error_2; } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ if (p->p_ucred->cr_uid != 0) { accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); if (error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) { vput(devvp); return (error); } VOP_UNLOCK(devvp, 0, p); } if (mp->mnt_flag & MNT_UPDATE) { /* ******************** * UPDATE ******************** */ if (devvp != ump->um_devvp) err = EINVAL; /* needs translation */ else vrele(devvp); /* * Update device name only on success */ if( !err) { /* Save "mounted from" info for mount point (NULL pad)*/ copyinstr( args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); } } else { /* ******************** * NEW MOUNT ******************** */ if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR) mp->mnt_flag |= MNT_NOCLUSTERR; if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERW) mp->mnt_flag |= MNT_NOCLUSTERW; /* * Since this is a new mount, we want the names for * the device and the mount point copied in. If an * error occurs, the mountpoint is discarded by the * upper level code. */ /* Save "last mounted on" info for mount point (NULL pad)*/ copyinstr( path, /* mount point*/ mp->mnt_stat.f_mntonname, /* save area*/ MNAMELEN - 1, /* max size*/ &size); /* real size*/ bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size); /* Save "mounted from" info for mount point (NULL pad)*/ copyinstr( args.fspec, /* device name*/ mp->mnt_stat.f_mntfromname, /* save area*/ MNAMELEN - 1, /* max size*/ &size); /* real size*/ bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); err = ffs_mountfs(devvp, mp, p, M_FFSNODE); } if (err) { goto error_2; } dostatfs: /* * Initialize FS stat information in mount struct; uses both * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname * * This code is common to root and non-root mounts */ (void)VFS_STATFS(mp, &mp->mnt_stat, p); goto success; error_2: /* error with devvp held*/ /* release devvp before failing*/ vrele(devvp); error_1: /* no state to back out*/ success: return( err); } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. */ static int ffs_reload(mp, cred, p) register struct mount *mp; struct ucred *cred; struct proc *p; { register struct vnode *vp, *nvp, *devvp; struct inode *ip; struct csum *space; struct buf *bp; struct fs *fs, *newfs; struct partinfo dpart; dev_t dev; int i, blks, size, error; int32_t *lp; if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mp)->um_devvp; - if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, 0, cred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) panic("ffs_reload: dirty1"); dev = devvp->v_rdev; /* * Only VMIO the backing device if the backing device is a real * block device. This excludes the original MFS implementation. * Note that it is optional that the backing device be VMIOed. This * increases the opportunity for metadata caching. */ if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) { simple_lock(&devvp->v_interlock); vfs_object_create(devvp, p, p->p_ucred, 0); } /* * Step 2: re-read superblock from disk. */ if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) size = DEV_BSIZE; else size = dpart.disklab->d_secsize; if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) return (error); newfs = (struct fs *)bp->b_data; if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE || newfs->fs_bsize < sizeof(struct fs)) { brelse(bp); return (EIO); /* XXX needs translation */ } fs = VFSTOUFS(mp)->um_fs; /* * Copy pointer fields back into superblock before copying in XXX * new superblock. These should really be in the ufsmount. XXX * Note that important parameters (eg fs_ncg) are unchanged. */ bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp)); newfs->fs_maxcluster = fs->fs_maxcluster; bcopy(newfs, fs, (u_int)fs->fs_sbsize); if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; brelse(bp); mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; ffs_oldfscompat(fs); /* * Step 3: re-read summary information from disk. */ blks = howmany(fs->fs_cssize, fs->fs_fsize); space = fs->fs_csp[0]; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, NOCRED, &bp); if (error) return (error); bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size); brelse(bp); } /* * We no longer know anything about clusters per cylinder group. */ if (fs->fs_contigsumsize > 0) { lp = fs->fs_maxcluster; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; } loop: simple_lock(&mntvnode_slock); for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { if (vp->v_mount != mp) { simple_unlock(&mntvnode_slock); goto loop; } nvp = vp->v_mntvnodes.le_next; /* * Step 4: invalidate all inactive vnodes. */ if (vrecycle(vp, &mntvnode_slock, p)) goto loop; /* * Step 5: invalidate all cached file data. */ simple_lock(&vp->v_interlock); simple_unlock(&mntvnode_slock); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { goto loop; } if (vinvalbuf(vp, 0, cred, p, 0, 0)) panic("ffs_reload: dirty2"); /* * Step 6: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { vput(vp); return (error); } ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)); + ip->i_effnlink = ip->i_nlink; brelse(bp); vput(vp); simple_lock(&mntvnode_slock); } simple_unlock(&mntvnode_slock); return (0); } /* * Common code for mount and mountroot */ int ffs_mountfs(devvp, mp, p, malloctype) register struct vnode *devvp; struct mount *mp; struct proc *p; struct malloc_type *malloctype; { register struct ufsmount *ump; struct buf *bp; register struct fs *fs; + struct cg *cgp; dev_t dev; struct partinfo dpart; + struct csum cstotal; caddr_t base, space; - int error, i, blks, size, ronly; + int error, i, cyl, blks, size, ronly; int32_t *lp; struct ucred *cred; u_int64_t maxfilesize; /* XXX */ u_int strsize; int ncount; dev = devvp->v_rdev; cred = p ? p->p_ucred : NOCRED; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ error = vfs_mountedon(devvp); if (error) return (error); ncount = vcount(devvp); if (ncount > 1 && devvp != rootvp) return (EBUSY); - if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) return (error); /* * Only VMIO the backing device if the backing device is a real * block device. This excludes the original MFS implementation. * Note that it is optional that the backing device be VMIOed. This * increases the opportunity for metadata caching. */ if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) { simple_lock(&devvp->v_interlock); vfs_object_create(devvp, p, p->p_ucred, 0); } ronly = (mp->mnt_flag & MNT_RDONLY) != 0; error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); if (error) return (error); if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0) size = DEV_BSIZE; else size = dpart.disklab->d_secsize; bp = NULL; ump = NULL; if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) goto out; fs = (struct fs *)bp->b_data; if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || fs->fs_bsize < sizeof(struct fs)) { error = EINVAL; /* XXX needs translation */ goto out; } fs->fs_fmod = 0; if (!fs->fs_clean) { if (ronly || (mp->mnt_flag & MNT_FORCE)) { printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt); } else { printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt); error = EPERM; goto out; } } /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */ if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) { error = EROFS; /* needs translation */ goto out; } ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); bzero((caddr_t)ump, sizeof *ump); ump->um_malloctype = malloctype; ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK); ump->um_blkatoff = ffs_blkatoff; ump->um_truncate = ffs_truncate; ump->um_update = ffs_update; ump->um_valloc = ffs_valloc; ump->um_vfree = ffs_vfree; bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; brelse(bp); bp = NULL; fs = ump->um_fs; fs->fs_ronly = ronly; if (ronly == 0) { fs->fs_fmod = 1; fs->fs_clean = 0; } size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK); for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, cred, &bp)) { free(base, M_UFSMNT); goto out; } bcopy(bp->b_data, space, (u_int)size); fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space; space += size; brelse(bp); bp = NULL; } if (fs->fs_contigsumsize > 0) { fs->fs_maxcluster = lp = (int32_t *)space; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; } mp->mnt_data = (qaddr_t)ump; mp->mnt_stat.f_fsid.val[0] = (long)dev; if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0) mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; else mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; mp->mnt_flag |= MNT_LOCAL; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = fs->fs_nindir; ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; ffs_oldfscompat(fs); /* * Set FS local "last mounted on" information (NULL pad) */ copystr( mp->mnt_stat.f_mntonname, /* mount point*/ fs->fs_fsmnt, /* copy area*/ sizeof(fs->fs_fsmnt) - 1, /* max size*/ &strsize); /* real size*/ bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize); if( mp->mnt_flag & MNT_ROOTFS) { /* * Root mount; update timestamp in mount structure. * this will be used by the common root mount code * to update the system clock. */ mp->mnt_time = fs->fs_time; } ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */ maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */ if (fs->fs_maxfilesize > maxfilesize) /* XXX */ fs->fs_maxfilesize = maxfilesize; /* XXX */ if (ronly == 0) { + if ((fs->fs_flags & FS_DOSOFTDEP) && + (error = softdep_mount(devvp, mp, fs, cred)) != 0) { + free(base, M_UFSMNT); + goto out; + } fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT); } return (0); out: + devvp->v_specmountpoint = NULL; if (bp) brelse(bp); (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); if (ump) { free(ump->um_fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; } return (error); } /* * Sanity checks for old file systems. * * XXX - goes away some day. */ static int ffs_oldfscompat(fs) struct fs *fs; { fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */ fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */ if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ fs->fs_nrpos = 8; /* XXX */ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ #if 0 int i; /* XXX */ u_int64_t sizepb = fs->fs_bsize; /* XXX */ /* XXX */ fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */ for (i = 0; i < NIADDR; i++) { /* XXX */ sizepb *= NINDIR(fs); /* XXX */ fs->fs_maxfilesize += sizepb; /* XXX */ } /* XXX */ #endif fs->fs_maxfilesize = (u_quad_t) 1LL << 39; fs->fs_qbmask = ~fs->fs_bmask; /* XXX */ fs->fs_qfmask = ~fs->fs_fmask; /* XXX */ } /* XXX */ return (0); } /* * unmount system call */ int ffs_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct ufsmount *ump; register struct fs *fs; int error, flags; flags = 0; if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; } - error = ffs_flushfiles(mp, flags, p); - if (error) - return (error); + if (mp->mnt_flag & MNT_SOFTDEP) { + if ((error = softdep_flushfiles(mp, flags, p)) != 0) + return (error); + } else { + if ((error = ffs_flushfiles(mp, flags, p)) != 0) + return (error); + } ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_ronly == 0) { fs->fs_clean = 1; error = ffs_sbupdate(ump, MNT_WAIT); if (error) { fs->fs_clean = 0; return (error); } } - ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; + ump->um_devvp->v_specmountpoint = NULL; vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0); error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, NOCRED, p); vrele(ump->um_devvp); free(fs->fs_csp[0], M_UFSMNT); free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } /* * Flush out all the files in a filesystem. */ int ffs_flushfiles(mp, flags, p) register struct mount *mp; int flags; struct proc *p; { register struct ufsmount *ump; int error; ump = VFSTOUFS(mp); #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { int i; error = vflush(mp, NULLVP, SKIPSYSTEM|flags); if (error) return (error); for (i = 0; i < MAXQUOTAS; i++) { if (ump->um_quotas[i] == NULLVP) continue; quotaoff(p, mp, i); } /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } #endif - error = vflush(mp, NULLVP, flags); + /* + * Flush all the files. + */ + if ((error = vflush(mp, NULL, flags)) != 0) + return (error); + /* + * Flush filesystem metadata. + */ + vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p); + VOP_UNLOCK(ump->um_devvp, 0, p); return (error); } /* * Get file system statistics. */ int ffs_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { register struct ufsmount *ump; register struct fs *fs; ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_magic != FS_MAGIC) panic("ffs_statfs"); sbp->f_bsize = fs->fs_fsize; sbp->f_iosize = fs->fs_bsize; sbp->f_blocks = fs->fs_dsize; sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree; sbp->f_bavail = freespace(fs, fs->fs_minfree); sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; sbp->f_ffree = fs->fs_cstotal.cs_nifree; if (sbp != &mp->mnt_stat) { sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy((caddr_t)mp->mnt_stat.f_mntonname, (caddr_t)&sbp->f_mntonname[0], MNAMELEN); bcopy((caddr_t)mp->mnt_stat.f_mntfromname, (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); } return (0); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked `MPBUSY'. */ int ffs_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { struct vnode *nvp, *vp; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; struct timeval tv; int error, allerror = 0; fs = ump->um_fs; if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ printf("fs = %s\n", fs->fs_fsmnt); panic("ffs_sync: rofs mod"); } /* * Write back each (modified) inode. */ simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; simple_lock(&vp->v_interlock); nvp = vp->v_mntvnodes.le_next; ip = VTOI(vp); - if (((ip->i_flag & + if ((vp->v_type == VNON) || ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && - vp->v_dirtyblkhd.lh_first == NULL) { + ((vp->v_dirtyblkhd.lh_first == NULL) || (waitfor == MNT_LAZY))) { simple_unlock(&vp->v_interlock); continue; } if (vp->v_type != VCHR) { simple_unlock(&mntvnode_slock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); if (error) { simple_lock(&mntvnode_slock); if (error == ENOENT) goto loop; continue; } if (error = VOP_FSYNC(vp, cred, waitfor, p)) allerror = error; VOP_UNLOCK(vp, 0, p); vrele(vp); simple_lock(&mntvnode_slock); } else { simple_unlock(&mntvnode_slock); simple_unlock(&vp->v_interlock); gettime(&tv); /* UFS_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */ UFS_UPDATE(vp, &tv, &tv, 0); simple_lock(&mntvnode_slock); } } simple_unlock(&mntvnode_slock); /* * Force stale file system control information to be flushed. */ - error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p); - if (error) - allerror = error; + if (waitfor != MNT_LAZY) { + if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) + waitfor = MNT_NOWAIT; + vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); + if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) + allerror = error; + VOP_UNLOCK(ump->um_devvp, 0, p); + } #ifdef QUOTA qsync(mp); #endif /* * Write back modified superblock. */ - if (fs->fs_fmod != 0) { - fs->fs_fmod = 0; - fs->fs_time = time.tv_sec; - if (error = ffs_sbupdate(ump, waitfor)) - allerror = error; - } + if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) + allerror = error; return (allerror); } /* * Look up a FFS dinode number to find its incore vnode, otherwise read it * in from disk. If it is in core, wait for the lock bit to clear, then * return the inode locked. Detection and handling of mount points must be * done by the calling routine. */ static int ffs_inode_hash_lock; int ffs_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { struct fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; dev_t dev; int error; ump = VFSTOUFS(mp); dev = ump->um_dev; restart: if ((*vpp = ufs_ihashget(dev, ino)) != NULL) { return (0); } /* * Lock out the creation of new entries in the FFS hash table in * case getnewvnode() or MALLOC() blocks, otherwise a duplicate * may occur! */ if (ffs_inode_hash_lock) { while (ffs_inode_hash_lock) { ffs_inode_hash_lock = -1; tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0); } goto restart; } ffs_inode_hash_lock = 1; /* * If this MALLOC() is performed after the getnewvnode() * it might block, leaving a vnode with a NULL v_data to be * found by ffs_sync() if a sync happens to fire right then, * which will cause a panic because ffs_sync() blindly * dereferences vp->v_data (as well it should). */ MALLOC(ip, struct inode *, sizeof(struct inode), ump->um_malloctype, M_WAITOK); /* Allocate a new vnode/inode. */ error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp); if (error) { if (ffs_inode_hash_lock < 0) wakeup(&ffs_inode_hash_lock); ffs_inode_hash_lock = 0; *vpp = NULL; FREE(ip, ump->um_malloctype); return (error); } bzero((caddr_t)ip, sizeof(struct inode)); lockinit(&ip->i_lock, PINOD, "inode", 0, 0); vp->v_data = ip; ip->i_vnode = vp; ip->i_fs = fs = ump->um_fs; ip->i_dev = dev; ip->i_number = ino; #ifdef QUOTA { int i; for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; } #endif /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ ufs_ihashins(ip); if (ffs_inode_hash_lock < 0) wakeup(&ffs_inode_hash_lock); ffs_inode_hash_lock = 0; /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ brelse(bp); vput(vp); *vpp = NULL; return (error); } ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); + if (DOINGSOFTDEP(vp)) + softdep_load_inodeblock(ip); + else + ip->i_effnlink = ip->i_nlink; bqrelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); if (error) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization now that aliasing has been resolved. */ ip->i_devvp = ump->um_devvp; VREF(ip->i_devvp); /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { ip->i_gen = random() / 2 + 1; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; } /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ ip->i_uid = ip->i_din.di_ouid; /* XXX */ ip->i_gid = ip->i_din.di_ogid; /* XXX */ } /* XXX */ *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - call ffs_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ int ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) register struct mount *mp; struct fid *fhp; struct sockaddr *nam; struct vnode **vpp; int *exflagsp; struct ucred **credanonp; { register struct ufid *ufhp; struct fs *fs; ufhp = (struct ufid *)fhp; fs = VFSTOUFS(mp)->um_fs; if (ufhp->ufid_ino < ROOTINO || ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) return (ESTALE); return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); } /* * Vnode pointer to File handle */ /* ARGSUSED */ int ffs_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { register struct inode *ip; register struct ufid *ufhp; ip = VTOI(vp); ufhp = (struct ufid *)fhp; ufhp->ufid_len = sizeof(struct ufid); ufhp->ufid_ino = ip->i_number; ufhp->ufid_gen = ip->i_gen; return (0); } /* * Initialize the filesystem; just use ufs_init. */ static int ffs_init(vfsp) struct vfsconf *vfsp; { + softdep_initialize(); return (ufs_init(vfsp)); } /* * Write a superblock and associated information back to disk. */ static int ffs_sbupdate(mp, waitfor) struct ufsmount *mp; int waitfor; { register struct fs *dfs, *fs = mp->um_fs; register struct buf *bp; int blks; caddr_t space; int i, size, error, allerror = 0; /* * First write back the summary information. */ blks = howmany(fs->fs_cssize, fs->fs_fsize); space = (caddr_t)fs->fs_csp[0]; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 0, 0); bcopy(space, bp->b_data, (u_int)size); space += size; if (waitfor != MNT_WAIT) bawrite(bp); else if (error = bwrite(bp)) allerror = error; } /* * Now write back the superblock itself. If any errors occurred * up to this point, then fail so that the superblock avoids * being written out as clean. */ if (allerror) return (allerror); bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); + fs->fs_fmod = 0; + fs->fs_time = time.tv_sec; bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); /* Restore compatibility to old file systems. XXX */ dfs = (struct fs *)bp->b_data; /* XXX */ if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ dfs->fs_nrpos = -1; /* XXX */ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ int32_t *lp, tmp; /* XXX */ /* XXX */ lp = (int32_t *)&dfs->fs_qbmask; /* XXX */ tmp = lp[4]; /* XXX */ for (i = 4; i > 0; i--) /* XXX */ lp[i] = lp[i-1]; /* XXX */ lp[0] = tmp; /* XXX */ } /* XXX */ dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ if (waitfor != MNT_WAIT) bawrite(bp); else if (error = bwrite(bp)) allerror = error; return (allerror); } Index: head/sys/ufs/ffs/ffs_vnops.c =================================================================== --- head/sys/ufs/ffs/ffs_vnops.c (revision 34265) +++ head/sys/ufs/ffs/ffs_vnops.c (revision 34266) @@ -1,212 +1,253 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 - * $Id: ffs_vnops.c,v 1.42 1998/02/06 12:14:16 eivind Exp $ + * $Id: ffs_vnops.c,v 1.43 1998/02/26 06:39:38 msmith Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ffs_fsync __P((struct vop_fsync_args *)); static int ffs_getpages __P((struct vop_getpages_args *)); static int ffs_putpages __P((struct vop_putpages_args *)); static int ffs_read __P((struct vop_read_args *)); static int ffs_write __P((struct vop_write_args *)); /* Global vfs data structures for ufs. */ vop_t **ffs_vnodeop_p; static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) ufs_vnoperate }, { &vop_fsync_desc, (vop_t *) ffs_fsync }, { &vop_getpages_desc, (vop_t *) ffs_getpages }, { &vop_putpages_desc, (vop_t *) ffs_putpages }, { &vop_read_desc, (vop_t *) ffs_read }, + { &vop_balloc_desc, (vop_t *) ffs_balloc }, { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, { &vop_write_desc, (vop_t *) ffs_write }, { NULL, NULL } }; static struct vnodeopv_desc ffs_vnodeop_opv_desc = { &ffs_vnodeop_p, ffs_vnodeop_entries }; vop_t **ffs_specop_p; static struct vnodeopv_entry_desc ffs_specop_entries[] = { { &vop_default_desc, (vop_t *) ufs_vnoperatespec }, { &vop_fsync_desc, (vop_t *) ffs_fsync }, { NULL, NULL } }; static struct vnodeopv_desc ffs_specop_opv_desc = { &ffs_specop_p, ffs_specop_entries }; vop_t **ffs_fifoop_p; static struct vnodeopv_entry_desc ffs_fifoop_entries[] = { { &vop_default_desc, (vop_t *) ufs_vnoperatefifo }, { &vop_fsync_desc, (vop_t *) ffs_fsync }, { NULL, NULL } }; static struct vnodeopv_desc ffs_fifoop_opv_desc = { &ffs_fifoop_p, ffs_fifoop_entries }; VNODEOP_SET(ffs_vnodeop_opv_desc); VNODEOP_SET(ffs_specop_opv_desc); VNODEOP_SET(ffs_fifoop_opv_desc); SYSCTL_NODE(_vfs, MOUNT_UFS, ffs, CTLFLAG_RW, 0, "FFS filesystem"); #include /* * Synch an open file. */ /* ARGSUSED */ static int ffs_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct buf *bp; + struct vnode *vp = ap->a_vp; + struct buf *bp; struct timeval tv; struct buf *nbp; - int pass; - int s; + int s, error, passes, skipmeta; daddr_t lbn; if (vp->v_type == VBLK) { lbn = INT_MAX; } else { struct inode *ip; ip = VTOI(vp); lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); } - pass = 0; /* * Flush all dirty buffers associated with a vnode. */ + passes = NIADDR; + skipmeta = 0; + if (ap->a_waitfor == MNT_WAIT) + skipmeta = 1; loop: s = splbio(); +loop2: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; - if ((bp->b_flags & B_BUSY) || (pass == 0 && (bp->b_lblkno < 0))) + /* + * First time through on a synchronous call, + * or if it's already scheduled, skip to the next + * buffer + */ + if ((bp->b_flags & B_BUSY) || + ((skipmeta == 1) && (bp->b_lblkno < 0))) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); - - if (((bp->b_vp != vp) || (ap->a_waitfor != MNT_NOWAIT)) || - ((vp->v_type != VREG) && (vp->v_type != VBLK))) { - + /* + * If data is outstanding to another vnode, or we were + * asked to wait for everything, or it's not a file or BDEV, + * start the IO on this buffer immediatly. + */ + if (((bp->b_vp != vp) || (ap->a_waitfor == MNT_WAIT)) || + ((vp->v_type != VREG) && (vp->v_type != VBLK))) { bremfree(bp); bp->b_flags |= B_BUSY; splx(s); /* - * Wait for I/O associated with indirect blocks to complete, - * since there is no way to quickly wait for them below. + * Wait for I/O associated with indirect blocks to + * complete, since there is no way to quickly wait + * for them below. */ - if ((bp->b_vp == vp) && (ap->a_waitfor == MNT_NOWAIT)) { + if ((bp->b_vp == vp) || (ap->a_waitfor != MNT_WAIT)) { if (bp->b_flags & B_CLUSTEROK) { bdwrite(bp); (void) vfs_bio_awrite(bp); } else { (void) bawrite(bp); } } else { (void) bwrite(bp); } - } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { - + /* + * If the buffer is for data that has been truncated + * off the file, then throw it away. + */ bremfree(bp); bp->b_flags |= B_BUSY | B_INVAL | B_NOCACHE; brelse(bp); splx(s); - } else { vfs_bio_awrite(bp); splx(s); } goto loop; } + /* + * If we were asked to do this synchronously, then go back for + * another pass, this time doing the metadata. + */ + if (skipmeta) { + skipmeta = 0; + goto loop2; /* stay within the splbio() */ + } splx(s); - if (pass == 0) { - pass = 1; - goto loop; - } - if (ap->a_waitfor == MNT_WAIT) { s = splbio(); while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "ffsfsn", 0); } + /* + * Ensure that any filesystem metatdata associated + * with the vnode has been written. + */ splx(s); -#ifdef DIAGNOSTIC + if ((error = softdep_sync_metadata(ap)) != 0) + return (error); + s = splbio(); if (vp->v_dirtyblkhd.lh_first) { - vprint("ffs_fsync: dirty", vp); - goto loop; - } + /* + * Block devices associated with filesystems may + * have new I/O requests posted for them even if + * the vnode is locked, so no amount of trying will + * get them clean. Thus we give block devices a + * good effort, then just give up. For all other file + * types, go around and try again until it is clean. + */ + if (passes > 0) { + passes -= 1; + goto loop2; + } +#ifdef DIAGNOSTIC + if (vp->v_type != VBLK) + vprint("ffs_fsync: dirty", vp); #endif + } } - gettime(&tv); - return (UFS_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); + error = UFS_UPDATE(ap->a_vp, &tv, &tv, (ap->a_waitfor == MNT_WAIT)); + if (error) + return (error); + if (DOINGSOFTDEP(vp) && ap->a_waitfor == MNT_WAIT) + error = softdep_fsync(vp); + return (error); } Index: head/sys/ufs/ffs/fs.h =================================================================== --- head/sys/ufs/ffs/fs.h (revision 34265) +++ head/sys/ufs/ffs/fs.h (revision 34266) @@ -1,510 +1,522 @@ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fs.h 8.13 (Berkeley) 3/21/95 - * $Id: fs.h,v 1.11 1997/03/23 20:08:22 guido Exp $ + * $Id: fs.h,v 1.12 1997/03/24 03:19:37 bde Exp $ */ #ifndef _UFS_FFS_FS_H_ #define _UFS_FFS_FS_H_ /* * Each disk drive contains some number of file systems. * A file system consists of a number of cylinder groups. * Each cylinder group has inodes and data. * * A file system is described by its super-block, which in turn * describes the cylinder groups. The super-block is critical * data and is replicated in each cylinder group to protect against * catastrophic loss. This is done at `newfs' time and the critical * super-block data does not change, so the copies need not be * referenced further unless disaster strikes. * * For file system fs, the offsets of the various blocks of interest * are given in the super block as: * [fs->fs_sblkno] Super-block * [fs->fs_cblkno] Cylinder group block * [fs->fs_iblkno] Inode blocks * [fs->fs_dblkno] Data blocks * The beginning of cylinder group cg in fs, is given by * the ``cgbase(fs, cg)'' macro. * * The first boot and super blocks are given in absolute disk addresses. * The byte-offset forms are preferred, as they don't imply a sector size. */ #define BBSIZE 8192 #define SBSIZE 8192 #define BBOFF ((off_t)(0)) #define SBOFF ((off_t)(BBOFF + BBSIZE)) #define BBLOCK ((ufs_daddr_t)(0)) #define SBLOCK ((ufs_daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE)) /* * Addresses stored in inodes are capable of addressing fragments * of `blocks'. File system blocks of at most size MAXBSIZE can * be optionally broken into 2, 4, or 8 pieces, each of which is * addressable; these pieces may be DEV_BSIZE, or some multiple of * a DEV_BSIZE unit. * * Large files consist of exclusively large data blocks. To avoid * undue wasted disk space, the last data block of a small file may be * allocated as only as many fragments of a large block as are * necessary. The file system format retains only a single pointer * to such a fragment, which is a piece of a single large block that * has been divided. The size of such a fragment is determinable from * information in the inode, using the ``blksize(fs, ip, lbn)'' macro. * * The file system records space availability at the fragment level; * to determine block availability, aligned fragments are examined. */ /* * MINBSIZE is the smallest allowable block size. * In order to insure that it is possible to create files of size * 2^32 with only two levels of indirection, MINBSIZE is set to 4096. * MINBSIZE must be big enough to hold a cylinder group block, * thus changes to (struct cg) must keep its size within MINBSIZE. * Note that super blocks are always of size SBSIZE, * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE. */ #define MINBSIZE 4096 /* * The path name on which the file system is mounted is maintained * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in * the super block for this name. */ #define MAXMNTLEN 512 /* * The limit on the amount of summary information per file system * is defined by MAXCSBUFS. It is currently parameterized for a * size of 128 bytes (2 million cylinder groups on machines with * 32-bit pointers, and 1 million on 64-bit machines). One pointer * is taken away to point to an array of cluster sizes that is * computed as cylinder groups are inspected. */ #define MAXCSBUFS ((128 / sizeof(void *)) - 1) /* * A summary of contiguous blocks of various sizes is maintained * in each cylinder group. Normally this is set by the initial * value of fs_maxcontig. To conserve space, a maximum summary size * is set by FS_MAXCONTIG. */ #define FS_MAXCONTIG 16 /* * MINFREE gives the minimum acceptable percentage of file system * blocks which may be free. If the freelist drops below this level * only the superuser may continue to allocate blocks. This may * be set to 0 if no reserve of free blocks is deemed necessary, * however throughput drops by fifty percent if the file system * is run at between 95% and 100% full; thus the minimum default * value of fs_minfree is 5%. However, to get good clustering * performance, 10% is a better choice. hence we use 10% as our * default value. With 10% free space, fragmentation is not a * problem, so we choose to optimize for time. */ #define MINFREE 8 #define DEFAULTOPT FS_OPTTIME /* * Per cylinder group information; summarized in blocks allocated * from first cylinder group data blocks. These blocks have to be * read in from fs_csaddr (size fs_cssize) in addition to the * super block. * * N.B. sizeof(struct csum) must be a power of two in order for * the ``fs_cs'' macro to work (see below). */ struct csum { int32_t cs_ndir; /* number of directories */ int32_t cs_nbfree; /* number of free blocks */ int32_t cs_nifree; /* number of free inodes */ int32_t cs_nffree; /* number of free frags */ }; /* * Super block for an FFS file system. */ struct fs { int32_t fs_firstfield; /* historic file system linked list, */ int32_t fs_unused_1; /* used for incore super blocks */ ufs_daddr_t fs_sblkno; /* addr of super-block in filesys */ ufs_daddr_t fs_cblkno; /* offset of cyl-block in filesys */ ufs_daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ ufs_daddr_t fs_dblkno; /* offset of first data after cg */ int32_t fs_cgoffset; /* cylinder group offset in cylinder */ int32_t fs_cgmask; /* used to calc mod fs_ntrak */ time_t fs_time; /* last time written */ int32_t fs_size; /* number of blocks in fs */ int32_t fs_dsize; /* number of data blocks in fs */ int32_t fs_ncg; /* number of cylinder groups */ int32_t fs_bsize; /* size of basic blocks in fs */ int32_t fs_fsize; /* size of frag blocks in fs */ int32_t fs_frag; /* number of frags in a block in fs */ /* these are configuration parameters */ int32_t fs_minfree; /* minimum percentage of free blocks */ int32_t fs_rotdelay; /* num of ms for optimal next block */ int32_t fs_rps; /* disk revolutions per second */ /* these fields can be computed from the others */ int32_t fs_bmask; /* ``blkoff'' calc of blk offsets */ int32_t fs_fmask; /* ``fragoff'' calc of frag offsets */ int32_t fs_bshift; /* ``lblkno'' calc of logical blkno */ int32_t fs_fshift; /* ``numfrags'' calc number of frags */ /* these are configuration parameters */ int32_t fs_maxcontig; /* max number of contiguous blks */ int32_t fs_maxbpg; /* max number of blks per cyl group */ /* these fields can be computed from the others */ int32_t fs_fragshift; /* block to frag shift */ int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ int32_t fs_sbsize; /* actual size of super block */ int32_t fs_csmask; /* csum block offset */ int32_t fs_csshift; /* csum block number */ int32_t fs_nindir; /* value of NINDIR */ int32_t fs_inopb; /* value of INOPB */ int32_t fs_nspf; /* value of NSPF */ /* yet another configuration parameter */ int32_t fs_optim; /* optimization preference, see below */ /* these fields are derived from the hardware */ int32_t fs_npsect; /* # sectors/track including spares */ int32_t fs_interleave; /* hardware sector interleave */ int32_t fs_trackskew; /* sector 0 skew, per track */ /* fs_id takes the space of the unused fs_headswitch and fs_trkseek fields */ int32_t fs_id[2]; /* unique filesystem id */ /* sizes determined by number of cylinder groups and their sizes */ ufs_daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ int32_t fs_cssize; /* size of cyl grp summary area */ int32_t fs_cgsize; /* cylinder group size */ /* these fields are derived from the hardware */ int32_t fs_ntrak; /* tracks per cylinder */ int32_t fs_nsect; /* sectors per track */ int32_t fs_spc; /* sectors per cylinder */ /* this comes from the disk driver partitioning */ int32_t fs_ncyl; /* cylinders in file system */ /* these fields can be computed from the others */ int32_t fs_cpg; /* cylinders per group */ int32_t fs_ipg; /* inodes per group */ int32_t fs_fpg; /* blocks per group * fs_frag */ /* this data must be re-computed after crashes */ struct csum fs_cstotal; /* cylinder summary information */ /* these fields are cleared at mount time */ int8_t fs_fmod; /* super block modified flag */ int8_t fs_clean; /* file system is clean flag */ int8_t fs_ronly; /* mounted read-only flag */ - int8_t fs_flags; /* currently unused flag */ + int8_t fs_flags; /* see FS_ flags below */ u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ /* these fields retain the current block allocation info */ int32_t fs_cgrotor; /* last cg searched */ struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */ int32_t *fs_maxcluster; /* max cluster in each cyl group */ int32_t fs_cpc; /* cyl per cycle in postbl */ int16_t fs_opostbl[16][8]; /* old rotation block list head */ int32_t fs_sparecon[50]; /* reserved for future constants */ int32_t fs_contigsumsize; /* size of cluster summary array */ int32_t fs_maxsymlinklen; /* max length of an internal symlink */ int32_t fs_inodefmt; /* format of on-disk inodes */ u_int64_t fs_maxfilesize; /* maximum representable file size */ int64_t fs_qbmask; /* ~fs_bmask for use with 64-bit size */ int64_t fs_qfmask; /* ~fs_fmask for use with 64-bit size */ int32_t fs_state; /* validate fs_clean field */ int32_t fs_postblformat; /* format of positional layout tables */ int32_t fs_nrpos; /* number of rotational positions */ int32_t fs_postbloff; /* (u_int16) rotation block list head */ int32_t fs_rotbloff; /* (u_int8) blocks for each rotation */ int32_t fs_magic; /* magic number */ u_int8_t fs_space[1]; /* list of blocks for each rotation */ /* actually longer */ }; /* * Filesystem identification */ #define FS_MAGIC 0x011954 /* the fast filesystem magic number */ #define FS_OKAY 0x7c269d38 /* superblock checksum */ #define FS_42INODEFMT -1 /* 4.2BSD inode format */ #define FS_44INODEFMT 2 /* 4.4BSD inode format */ + /* * Preference for optimization. */ #define FS_OPTTIME 0 /* minimize allocation time */ #define FS_OPTSPACE 1 /* minimize disk fragmentation */ /* + * Filesystem flags. + */ +#define FS_UNCLEAN 0x01 /* filesystem not clean at mount */ +#define FS_DOSOFTDEP 0x02 /* filesystem using soft dependencies */ + +/* * Rotational layout table format types */ #define FS_42POSTBLFMT -1 /* 4.2BSD rotational table format */ #define FS_DYNAMICPOSTBLFMT 1 /* dynamic rotational table format */ /* * Macros for access to superblock array structures */ #define fs_postbl(fs, cylno) \ (((fs)->fs_postblformat == FS_42POSTBLFMT) \ ? ((fs)->fs_opostbl[cylno]) \ : ((int16_t *)((u_int8_t *)(fs) + \ (fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos)) #define fs_rotbl(fs) \ (((fs)->fs_postblformat == FS_42POSTBLFMT) \ ? ((fs)->fs_space) \ : ((u_int8_t *)((u_int8_t *)(fs) + (fs)->fs_rotbloff))) /* * The size of a cylinder group is calculated by CGSIZE. The maximum size * is limited by the fact that cylinder groups are at most one block. * Its size is derived from the size of the maps maintained in the * cylinder group and the (struct cg) size. */ #define CGSIZE(fs) \ /* base cg */ (sizeof(struct cg) + sizeof(int32_t) + \ /* blktot size */ (fs)->fs_cpg * sizeof(int32_t) + \ /* blks size */ (fs)->fs_cpg * (fs)->fs_nrpos * sizeof(int16_t) + \ /* inode map */ howmany((fs)->fs_ipg, NBBY) + \ /* block map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY) +\ /* if present */ ((fs)->fs_contigsumsize <= 0 ? 0 : \ /* cluster sum */ (fs)->fs_contigsumsize * sizeof(int32_t) + \ /* cluster map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPB(fs), NBBY))) /* * Convert cylinder group to base address of its global summary info. * * N.B. This macro assumes that sizeof(struct csum) is a power of two. */ #define fs_cs(fs, indx) \ fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask] /* * Cylinder group block for a file system. */ #define CG_MAGIC 0x090255 struct cg { int32_t cg_firstfield; /* historic cyl groups linked list */ int32_t cg_magic; /* magic number */ time_t cg_time; /* time last written */ int32_t cg_cgx; /* we are the cgx'th cylinder group */ int16_t cg_ncyl; /* number of cyl's this cg */ int16_t cg_niblk; /* number of inode blocks this cg */ int32_t cg_ndblk; /* number of data blocks this cg */ struct csum cg_cs; /* cylinder summary information */ int32_t cg_rotor; /* position of last used block */ int32_t cg_frotor; /* position of last used frag */ int32_t cg_irotor; /* position of last used inode */ int32_t cg_frsum[MAXFRAG]; /* counts of available frags */ int32_t cg_btotoff; /* (int32) block totals per cylinder */ int32_t cg_boff; /* (u_int16) free block positions */ int32_t cg_iusedoff; /* (u_int8) used inode map */ int32_t cg_freeoff; /* (u_int8) free block map */ int32_t cg_nextfreeoff; /* (u_int8) next available space */ int32_t cg_clustersumoff; /* (u_int32) counts of avail clusters */ int32_t cg_clusteroff; /* (u_int8) free cluster map */ int32_t cg_nclusterblks; /* number of clusters this cg */ int32_t cg_sparecon[13]; /* reserved for future use */ u_int8_t cg_space[1]; /* space for cylinder group maps */ /* actually longer */ }; /* * Macros for access to cylinder group array structures */ #define cg_blktot(cgp) \ (((cgp)->cg_magic != CG_MAGIC) \ ? (((struct ocg *)(cgp))->cg_btot) \ : ((int32_t *)((u_int8_t *)(cgp) + (cgp)->cg_btotoff))) #define cg_blks(fs, cgp, cylno) \ (((cgp)->cg_magic != CG_MAGIC) \ ? (((struct ocg *)(cgp))->cg_b[cylno]) \ : ((int16_t *)((u_int8_t *)(cgp) + \ (cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos)) #define cg_inosused(cgp) \ (((cgp)->cg_magic != CG_MAGIC) \ ? (((struct ocg *)(cgp))->cg_iused) \ : ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff))) #define cg_blksfree(cgp) \ (((cgp)->cg_magic != CG_MAGIC) \ ? (((struct ocg *)(cgp))->cg_free) \ : ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff))) #define cg_chkmagic(cgp) \ ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC) #define cg_clustersfree(cgp) \ ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff)) #define cg_clustersum(cgp) \ ((int32_t *)((u_int8_t *)(cgp) + (cgp)->cg_clustersumoff)) /* * The following structure is defined * for compatibility with old file systems. */ struct ocg { int32_t cg_firstfield; /* historic linked list of cyl groups */ int32_t cg_unused_1; /* used for incore cyl groups */ time_t cg_time; /* time last written */ int32_t cg_cgx; /* we are the cgx'th cylinder group */ int16_t cg_ncyl; /* number of cyl's this cg */ int16_t cg_niblk; /* number of inode blocks this cg */ int32_t cg_ndblk; /* number of data blocks this cg */ struct csum cg_cs; /* cylinder summary information */ int32_t cg_rotor; /* position of last used block */ int32_t cg_frotor; /* position of last used frag */ int32_t cg_irotor; /* position of last used inode */ int32_t cg_frsum[8]; /* counts of available frags */ int32_t cg_btot[32]; /* block totals per cylinder */ int16_t cg_b[32][8]; /* positions of free blocks */ u_int8_t cg_iused[256]; /* used inode map */ int32_t cg_magic; /* magic number */ u_int8_t cg_free[1]; /* free block map */ /* actually longer */ }; /* * Turn file system block numbers into disk block addresses. * This maps file system blocks to device size blocks. */ #define fsbtodb(fs, b) ((b) << (fs)->fs_fsbtodb) #define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb) /* * Cylinder group macros to locate things in cylinder groups. * They calc file system addresses of cylinder group data structures. */ #define cgbase(fs, c) ((ufs_daddr_t)((fs)->fs_fpg * (c))) #define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ #define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ #define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ #define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */ #define cgstart(fs, c) \ (cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask))) /* * Macros for handling inode numbers: * inode number to file system block offset. * inode number to cylinder group number. * inode number to file system block address. */ #define ino_to_cg(fs, x) ((x) / (fs)->fs_ipg) #define ino_to_fsba(fs, x) \ ((ufs_daddr_t)(cgimin(fs, ino_to_cg(fs, x)) + \ (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs)))))) #define ino_to_fsbo(fs, x) ((x) % INOPB(fs)) /* * Give cylinder group number for a file system block. * Give cylinder group block number for a file system block. */ #define dtog(fs, d) ((d) / (fs)->fs_fpg) #define dtogd(fs, d) ((d) % (fs)->fs_fpg) /* * Extract the bits for a block from a map. * Compute the cylinder and rotational position of a cyl block addr. */ #define blkmap(fs, map, loc) \ (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag))) #define cbtocylno(fs, bno) \ ((bno) * NSPF(fs) / (fs)->fs_spc) #define cbtorpos(fs, bno) \ (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \ (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \ (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect) /* * The following macros optimize certain frequently calculated * quantities by using shifts and masks in place of divisions * modulos and multiplications. */ #define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ ((loc) & (fs)->fs_qbmask) #define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ ((loc) & (fs)->fs_qfmask) #define lblktosize(fs, blk) /* calculates ((off_t)blk * fs->fs_bsize) */ \ ((off_t)(blk) << (fs)->fs_bshift) /* Use this only when `blk' is known to be small, e.g., < NDADDR. */ #define smalllblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ ((blk) << (fs)->fs_bshift) #define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ ((loc) >> (fs)->fs_bshift) #define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ ((loc) >> (fs)->fs_fshift) #define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \ (((size) + (fs)->fs_qbmask) & (fs)->fs_bmask) #define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) #define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \ ((frags) >> (fs)->fs_fragshift) #define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \ ((blks) << (fs)->fs_fragshift) #define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \ ((fsb) & ((fs)->fs_frag - 1)) #define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \ ((fsb) &~ ((fs)->fs_frag - 1)) /* * Determine the number of available frags given a * percentage to hold in reserve. */ #define freespace(fs, percentreserved) \ (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \ (fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100)) /* * Determining the size of a file block in the file system. */ #define blksize(fs, ip, lbn) \ (((lbn) >= NDADDR || (ip)->i_size >= smalllblktosize(fs, (lbn) + 1)) \ ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (ip)->i_size)))) #define dblksize(fs, dip, lbn) \ (((lbn) >= NDADDR || (dip)->di_size >= smalllblktosize(fs, (lbn) + 1)) \ ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (dip)->di_size)))) +#define sblksize(fs, size, lbn) \ + (((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (size))))) + /* * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte * sector size. */ #define NSPB(fs) ((fs)->fs_nspf << (fs)->fs_fragshift) #define NSPF(fs) ((fs)->fs_nspf) /* * Number of inodes in a secondary storage block/fragment. */ #define INOPB(fs) ((fs)->fs_inopb) #define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift) /* * Number of indirects in a file system block. */ #define NINDIR(fs) ((fs)->fs_nindir) extern int inside[], around[]; extern u_char *fragtbl[]; #endif Index: head/sys/ufs/ufs/inode.h =================================================================== --- head/sys/ufs/ufs/inode.h (revision 34265) +++ head/sys/ufs/ufs/inode.h (revision 34266) @@ -1,172 +1,181 @@ /* * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)inode.h 8.9 (Berkeley) 5/14/95 - * $Id: inode.h,v 1.19 1997/12/05 13:43:47 jkh Exp $ + * $Id: inode.h,v 1.20 1998/01/30 11:34:02 phk Exp $ */ #ifndef _UFS_UFS_INODE_H_ #define _UFS_UFS_INODE_H_ #include #include /* + * The size of a logical block number. + */ +typedef long ufs_lbn_t; + +/* * This must agree with the definition in . */ #define doff_t int32_t /* * The inode is used to describe each active (or recently active) file in the * UFS filesystem. It is composed of two types of information. The first part * is the information that is needed only while the file is active (such as * the identity of the file and linkage to speed its lookup). The second part * is the permanent meta-data associated with the file which is read in * from the permanent dinode from long term storage when the file becomes * active, and is put back when the file is no longer being used. */ struct inode { struct lock i_lock; /* Inode lock. >Keep this first< */ LIST_ENTRY(inode) i_hash;/* Hash chain. */ struct vnode *i_vnode;/* Vnode associated with this inode. */ struct vnode *i_devvp;/* Vnode for block I/O. */ u_int32_t i_flag; /* flags, see below */ dev_t i_dev; /* Device associated with the inode. */ ino_t i_number; /* The identity of the inode. */ + int i_effnlink; /* i_nlink when I/O completes */ union { /* Associated filesystem. */ struct fs *fs; /* FFS */ struct ext2_sb_info *e2fs; /* EXT2FS */ } inode_u; #define i_fs inode_u.fs #define i_e2fs inode_u.e2fs struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ u_quad_t i_modrev; /* Revision level for NFS lease. */ struct lockf *i_lockf;/* Head of byte-level lock list. */ /* * Side effects; used during directory lookup. */ int32_t i_count; /* Size of free slot in directory. */ doff_t i_endoff; /* End of useful stuff in directory. */ doff_t i_diroff; /* Offset in dir, where we found last entry. */ doff_t i_offset; /* Offset of free space in directory. */ ino_t i_ino; /* Inode number of found directory. */ u_int32_t i_reclen; /* Size of found directory entry. */ int i_spare[5]; /* XXX actually non-spare (for ext2fs). */ /* * The on-disk dinode itself. */ struct dinode i_din; /* 128 bytes of the on-disk dinode. */ }; #define i_atime i_din.di_atime #define i_atimensec i_din.di_atimensec #define i_blocks i_din.di_blocks #define i_ctime i_din.di_ctime #define i_ctimensec i_din.di_ctimensec #define i_db i_din.di_db #define i_flags i_din.di_flags #define i_gen i_din.di_gen #define i_gid i_din.di_gid #define i_ib i_din.di_ib #define i_mode i_din.di_mode #define i_mtime i_din.di_mtime #define i_mtimensec i_din.di_mtimensec #define i_nlink i_din.di_nlink #define i_rdev i_din.di_rdev #define i_shortlink i_din.di_shortlink #define i_size i_din.di_size #define i_uid i_din.di_uid /* These flags are kept in i_flag. */ #define IN_ACCESS 0x0001 /* Access time update request. */ #define IN_CHANGE 0x0002 /* Inode change time update request. */ #define IN_UPDATE 0x0004 /* Modification time update request. */ #define IN_MODIFIED 0x0008 /* Inode has been modified. */ #define IN_RENAME 0x0010 /* Inode is being renamed. */ #define IN_SHLOCK 0x0020 /* File has shared lock. */ #define IN_EXLOCK 0x0040 /* File has exclusive lock. */ #define IN_HASHED 0x0080 /* Inode is on hash list */ #ifdef KERNEL /* * Structure used to pass around logical block paths generated by * ufs_getlbns and used by truncate and bmap code. */ struct indir { ufs_daddr_t in_lbn; /* Logical block number. */ int in_off; /* Offset in buffer. */ int in_exists; /* Flag if the block exists. */ }; /* Convert between inode pointers and vnode pointers. */ #define VTOI(vp) ((struct inode *)(vp)->v_data) #define ITOV(ip) ((ip)->i_vnode) /* * XXX this is too long to be a macro, and isn't used in any time-critical * place; in fact it is only used in ufs_vnops.c so it shouldn't be in a * header file. */ #define ITIMES(ip, t1, t2) { \ long tv_sec = time.tv_sec; \ if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \ (ip)->i_flag |= IN_MODIFIED; \ if ((ip)->i_flag & IN_ACCESS) \ (ip)->i_atime \ = ((t1) == &time ? tv_sec : (t1)->tv_sec); \ if ((ip)->i_flag & IN_UPDATE) { \ (ip)->i_mtime \ = ((t2) == &time ? tv_sec : (t2)->tv_sec); \ (ip)->i_modrev++; \ } \ if ((ip)->i_flag & IN_CHANGE) \ (ip)->i_ctime = tv_sec; \ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \ } \ } + +/* Determine if soft dependencies are being done */ +#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) /* This overlays the fid structure (see mount.h). */ struct ufid { u_int16_t ufid_len; /* Length of structure. */ u_int16_t ufid_pad; /* Force 32-bit alignment. */ ino_t ufid_ino; /* File number (ino). */ int32_t ufid_gen; /* Generation number. */ }; #endif /* KERNEL */ #endif /* !_UFS_UFS_INODE_H_ */ Index: head/sys/ufs/ufs/ufs_extern.h =================================================================== --- head/sys/ufs/ufs/ufs_extern.h (revision 34265) +++ head/sys/ufs/ufs/ufs_extern.h (revision 34266) @@ -1,93 +1,106 @@ /*- * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_extern.h 8.10 (Berkeley) 5/14/95 - * $Id: ufs_extern.h,v 1.21 1997/10/16 11:59:09 phk Exp $ + * $Id: ufs_extern.h,v 1.22 1997/10/27 12:50:57 bde Exp $ */ #ifndef _UFS_UFS_EXTERN_H_ #define _UFS_UFS_EXTERN_H_ struct componentname; struct direct; struct indir; struct inode; struct mount; struct proc; struct sockaddr; struct ucred; struct ufid; struct vfsconf; struct vnode; struct vop_bmap_args; struct vop_cachedlookup_args; struct vop_generic_args; struct vop_inactive_args; struct vop_reclaim_args; int ufs_vnoperate __P((struct vop_generic_args *)); int ufs_vnoperatefifo __P((struct vop_generic_args *)); int ufs_vnoperatespec __P((struct vop_generic_args *)); int ufs_bmap __P((struct vop_bmap_args *)); int ufs_bmaparray __P((struct vnode *, daddr_t, daddr_t *, struct indir *, int *, int *, int *)); int ufs_check_export __P((struct mount *, struct ufid *, struct sockaddr *, struct vnode **, int *exflagsp, struct ucred **)); int ufs_checkpath __P((struct inode *, struct inode *, struct ucred *)); void ufs_dirbad __P((struct inode *, doff_t, char *)); int ufs_dirbadentry __P((struct vnode *, struct direct *, int)); int ufs_dirempty __P((struct inode *, ino_t, struct ucred *)); -int ufs_direnter __P((struct inode *, struct vnode *,struct componentname *)); -int ufs_direnter2 __P((struct vnode *, struct direct *, struct ucred *, - struct proc *)); -int ufs_dirremove __P((struct vnode *, struct componentname*)); -int ufs_dirrewrite - __P((struct inode *, struct inode *, struct componentname *)); +void ufs_makedirentry __P((struct inode *, struct componentname *, + struct direct *)); +int ufs_direnter __P((struct vnode *, struct vnode *, struct direct *, + struct componentname *, struct buf *)); +int ufs_dirremove __P((struct vnode *, struct inode *, int, int)); +int ufs_dirrewrite __P((struct inode *, struct inode *, ino_t, int, int)); int ufs_getlbns __P((struct vnode *, ufs_daddr_t, struct indir *, int *)); struct vnode * ufs_ihashget __P((dev_t, ino_t)); void ufs_ihashinit __P((void)); void ufs_ihashins __P((struct inode *)); struct vnode * ufs_ihashlookup __P((dev_t, ino_t)); void ufs_ihashrem __P((struct inode *)); int ufs_inactive __P((struct vop_inactive_args *)); int ufs_init __P((struct vfsconf *)); int ufs_lookup __P((struct vop_cachedlookup_args *)); int ufs_reclaim __P((struct vop_reclaim_args *)); int ufs_root __P((struct mount *, struct vnode **)); int ufs_start __P((struct mount *, int, struct proc *)); int ufs_vinit __P((struct mount *, vop_t **, vop_t **, struct vnode **)); + +/* + * Soft update function prototypes. + */ +void softdep_setup_directory_add __P((struct buf *, struct inode *, off_t, + long, struct buf *)); +void softdep_change_directoryentry_offset __P((struct inode *, caddr_t, + caddr_t, caddr_t, int)); +void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *, + int)); +void softdep_setup_directory_change __P((struct buf *, struct inode *, + struct inode *, long, int)); +void softdep_increase_linkcnt __P((struct inode *)); #endif /* !_UFS_UFS_EXTERN_H_ */ Index: head/sys/ufs/ufs/ufs_lookup.c =================================================================== --- head/sys/ufs/ufs/ufs_lookup.c (revision 34265) +++ head/sys/ufs/ufs/ufs_lookup.c (revision 34266) @@ -1,1009 +1,1098 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 - * $Id: ufs_lookup.c,v 1.20 1998/02/04 22:33:36 eivind Exp $ + * $Id: ufs_lookup.c,v 1.21 1998/02/06 12:14:18 eivind Exp $ */ #include #include +#include #include #include +#include +#include #include #include +#include +#include + #include #include #include #include #include #ifdef DIAGNOSTIC int dirchk = 1; #else int dirchk = 0; #endif /* true if old FS format...*/ #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) /* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the file system is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending * on whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and vput * instead of two vputs. * * This routine is actually used as VOP_CACHEDLOOKUP method, and the * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP * method. * * vfs_cache_lookup() performs the following for us: * check that it is a directory * check accessibility of directory * check for modification attempts on read-only mounts * if name found in cache * if at end of path and deleting or creating * drop it * else * return name. * return VOP_CACHEDLOOKUP() * * Overall outline of ufs_lookup: * * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache */ int ufs_lookup(ap) struct vop_cachedlookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vdp; /* vnode for directory being searched */ register struct inode *dp; /* inode for directory being searched */ struct buf *bp; /* a buffer of directory entries */ register struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ enum {NONE, COMPACT, FOUND} slotstatus; doff_t slotoffset; /* offset of area with free space */ int slotsize; /* size of area at slotoffset */ int slotfreespace; /* amount of space free in slot */ int slotneeded; /* size of the entry we're seeking */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* prev entry dp->i_offset */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by VFS_VGET */ doff_t enduseful; /* pointer past last used dir slot */ u_long bmask; /* block offset mask */ int lockparent; /* 1 => lockparent flag is set */ int wantparent; /* 1 => wantparent or lockparent flag */ int namlen, error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; struct proc *p = cnp->cn_proc; bp = NULL; slotoffset = -1; +/* + * XXX there was a soft-update diff about this I couldn't merge. + * I think this was the equiv. + */ *vpp = NULL; + vdp = ap->a_dvp; dp = VTOI(vdp); lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); /* * We now have a segment name to search for, and a directory to search. * * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ slotstatus = FOUND; slotfreespace = slotsize = slotneeded = 0; if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { slotstatus = NONE; slotneeded = DIRECTSIZ(cnp->cn_namelen); } /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; if (nameiop != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) { entryoffsetinblock = 0; dp->i_offset = 0; numdirpasses = 1; } else { dp->i_offset = dp->i_diroff; if ((entryoffsetinblock = dp->i_offset & bmask) && (error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) return (error); numdirpasses = 2; nchstats.ncs_2passes++; } prevoff = dp->i_offset; endsearch = roundup2(dp->i_size, DIRBLKSIZ); enduseful = 0; searchloop: while (dp->i_offset < endsearch) { /* * If necessary, get the next directory block. */ if ((dp->i_offset & bmask) == 0) { if (bp != NULL) brelse(bp); error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp); if (error) return (error); entryoffsetinblock = 0; } /* * If still looking for a slot, and at a DIRBLKSIZE * boundary, have to start looking for free space again. */ if (slotstatus == NONE && (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { slotoffset = -1; slotfreespace = 0; } /* * Get pointer to next entry. * Full validation checks are slow, so we only check * enough to insure forward progress through the * directory. Complete checks can be run by patching * "dirchk" to be true. */ ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); if (ep->d_reclen == 0 || (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { int i; ufs_dirbad(dp, dp->i_offset, "mangled entry"); i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); dp->i_offset += i; entryoffsetinblock += i; continue; } /* * If an appropriate sized slot has not yet been found, * check to see if one is available. Also accumulate space * in the current block so that we can determine if * compaction is viable. */ if (slotstatus != FOUND) { int size = ep->d_reclen; if (ep->d_ino != 0) size -= DIRSIZ(OFSFMT(vdp), ep); if (size > 0) { if (size >= slotneeded) { slotstatus = FOUND; slotoffset = dp->i_offset; slotsize = ep->d_reclen; } else if (slotstatus == NONE) { slotfreespace += size; if (slotoffset == -1) slotoffset = dp->i_offset; if (slotfreespace >= slotneeded) { slotstatus = COMPACT; slotsize = dp->i_offset + ep->d_reclen - slotoffset; } } } } /* * Check for a name match. */ if (ep->d_ino) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(vdp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if (namlen == cnp->cn_namelen && (cnp->cn_nameptr[0] == ep->d_name[0]) && !bcmp(cnp->cn_nameptr, ep->d_name, (unsigned)namlen)) { /* * Save directory entry's inode number and * reclen in ndp->ni_ufs area, and release * directory buffer. */ if (vdp->v_mount->mnt_maxsymlinklen > 0 && ep->d_type == DT_WHT) { slotstatus = FOUND; slotoffset = dp->i_offset; slotsize = ep->d_reclen; dp->i_reclen = slotsize; enduseful = dp->i_size; ap->a_cnp->cn_flags |= ISWHITEOUT; numdirpasses--; goto notfound; } dp->i_ino = ep->d_ino; dp->i_reclen = ep->d_reclen; brelse(bp); goto found; } } prevoff = dp->i_offset; dp->i_offset += ep->d_reclen; entryoffsetinblock += ep->d_reclen; if (ep->d_ino) enduseful = dp->i_offset; } notfound: /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; dp->i_offset = 0; endsearch = dp->i_diroff; goto searchloop; } if (bp != NULL) brelse(bp); /* * If creating, and at end of pathname and current * directory has not been removed, then can consider * allowing file to be created. */ if ((nameiop == CREATE || nameiop == RENAME || (nameiop == DELETE && (ap->a_cnp->cn_flags & DOWHITEOUT) && (ap->a_cnp->cn_flags & ISWHITEOUT))) && - (flags & ISLASTCN) && dp->i_nlink != 0) { + (flags & ISLASTCN) && dp->i_effnlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. */ error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); if (error) return (error); /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, * then set dp->i_count to 0 indicating * that the new slot belongs at the end of the * directory. If we found a slot, then the new entry * can be put in the range from dp->i_offset to * dp->i_offset + dp->i_count. */ if (slotstatus == NONE) { dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); dp->i_count = 0; enduseful = dp->i_offset; } else if (nameiop == DELETE) { dp->i_offset = slotoffset; if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; } else { dp->i_offset = slotoffset; dp->i_count = slotsize; if (enduseful < slotoffset + slotsize) enduseful = slotoffset + slotsize; } dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * The pathname buffer is saved so that the name * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ cnp->cn_flags |= SAVENAME; if (!lockparent) VOP_UNLOCK(vdp, 0, p); return (EJUSTRETURN); } /* * Insert name into cache (as non-existent) if appropriate. */ if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) cache_enter(vdp, *vpp, cnp); return (ENOENT); found: if (numdirpasses == 2) nchstats.ncs_pass2++; /* * Check that directory length properly reflects presence * of this entry. */ if (entryoffsetinblock + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { ufs_dirbad(dp, dp->i_offset, "i_size too small"); dp->i_size = entryoffsetinblock + DIRSIZ(OFSFMT(vdp), ep); dp->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. * If the wantparent flag isn't set, we return only * the directory (in ndp->ni_dvp), otherwise we go * on and lock the inode, being careful with ".". */ if (nameiop == DELETE && (flags & ISLASTCN)) { /* * Write access to directory required to delete files. */ error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); if (error) return (error); /* * Return pointer to current entry in dp->i_offset, * and distance past previous entry (if there * is a previous entry in this block) in dp->i_count. * Save directory inode pointer in ndp->ni_dvp for dirremove(). */ if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; if (dp->i_number == dp->i_ino) { VREF(vdp); *vpp = vdp; return (0); } error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) return (error); /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ if ((dp->i_mode & ISVTX) && cred->cr_uid != 0 && cred->cr_uid != dp->i_uid && VTOI(tdp)->i_uid != cred->cr_uid) { vput(tdp); return (EPERM); } *vpp = tdp; if (!lockparent) VOP_UNLOCK(vdp, 0, p); return (0); } /* * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a * regular file, or empty directory. */ if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) { if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) return (error); /* * Careful about locking second inode. * This can only occur if the target is ".". */ if (dp->i_number == dp->i_ino) return (EISDIR); error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) return (error); *vpp = tdp; cnp->cn_flags |= SAVENAME; if (!lockparent) VOP_UNLOCK(vdp, 0, p); return (0); } /* * Step through the translation in the name. We do not `vput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the VFS_VGET for the * inode associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the file system has any hard links other than ".." * that point backwards in the directory structure. */ pdp = vdp; if (flags & ISDOTDOT) { VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) { vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } if (lockparent && (flags & ISLASTCN) && (error = vn_lock(pdp, LK_EXCLUSIVE, p))) { vput(tdp); return (error); } *vpp = tdp; } else if (dp->i_number == dp->i_ino) { VREF(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) return (error); if (!lockparent || !(flags & ISLASTCN)) VOP_UNLOCK(pdp, 0, p); *vpp = tdp; } /* * Insert name into cache if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); return (0); } void ufs_dirbad(ip, offset, how) struct inode *ip; doff_t offset; char *how; { struct mount *mp; mp = ITOV(ip)->v_mount; (void)printf("%s: bad dir ino %ld at offset %ld: %s\n", mp->mnt_stat.f_mntonname, ip->i_number, offset, how); if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0) panic("ufs_dirbad: bad dir"); } /* * Do consistency checking on a directory entry: * record length must be multiple of 4 * entry must fit in rest of its DIRBLKSIZ block * record must be large enough to contain entry * name is not longer than MAXNAMLEN * name must be as long as advertised, and null terminated */ int ufs_dirbadentry(dp, ep, entryoffsetinblock) struct vnode *dp; register struct direct *ep; int entryoffsetinblock; { register int i; int namlen; # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(dp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) { /*return (1); */ printf("First bad\n"); goto bad; } if (ep->d_ino == 0) return (0); for (i = 0; i < namlen; i++) if (ep->d_name[i] == '\0') { /*return (1); */ printf("Second bad\n"); goto bad; } if (ep->d_name[i]) goto bad; return (0); bad: return (1); } /* - * Write a directory entry after a call to namei, using the parameters - * that it left in nameidata. The argument ip is the inode which the new - * directory entry will refer to. Dvp is a pointer to the directory to - * be written, which was left locked by namei. Remaining parameters - * (dp->i_offset, dp->i_count) indicate how the space for the new - * entry is to be obtained. + * Construct a new directory entry after a call to namei, using the + * parameters that it left in the componentname argument cnp. The + * argument ip is the inode to which the new directory entry will refer. */ -int -ufs_direnter(ip, dvp, cnp) +void +ufs_makedirentry(ip, cnp, newdirp) struct inode *ip; - struct vnode *dvp; - register struct componentname *cnp; + struct componentname *cnp; + struct direct *newdirp; { - register struct inode *dp; - struct direct newdir; #ifdef DIAGNOSTIC if ((cnp->cn_flags & SAVENAME) == 0) - panic("ufs_direnter: missing name"); + panic("ufs_makedirentry: missing name"); #endif - dp = VTOI(dvp); - newdir.d_ino = ip->i_number; - newdir.d_namlen = cnp->cn_namelen; - bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); - if (!OFSFMT(dvp)) - newdir.d_type = IFTODT(ip->i_mode); + newdirp->d_ino = ip->i_number; + newdirp->d_namlen = cnp->cn_namelen; + bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); + if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) + newdirp->d_type = IFTODT(ip->i_mode); else { - newdir.d_type = 0; + newdirp->d_type = 0; # if (BYTE_ORDER == LITTLE_ENDIAN) - { u_char tmp = newdir.d_namlen; - newdir.d_namlen = newdir.d_type; - newdir.d_type = tmp; } + { u_char tmp = newdirp->d_namlen; + newdirp->d_namlen = newdirp->d_type; + newdirp->d_type = tmp; } # endif } - return (ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc)); } /* - * Common entry point for directory entry removal used by ufs_direnter - * and ufs_whiteout + * Write a directory entry after a call to namei, using the parameters + * that it left in nameidata. The argument dirp is the new directory + * entry contents. Dvp is a pointer to the directory to be written, + * which was left locked by namei. Remaining parameters (dp->i_offset, + * dp->i_count) indicate how the space for the new entry is to be obtained. + * Non-null bp indicates that a directory is being created (for the + * soft dependency code). */ int -ufs_direnter2(dvp, dirp, cr, p) +ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) struct vnode *dvp; + struct vnode *tvp; struct direct *dirp; + struct componentname *cnp; + struct buf *newdirbp; +{ struct ucred *cr; struct proc *p; -{ int newentrysize; struct inode *dp; struct buf *bp; - struct iovec aiov; - struct uio auio; u_int dsize; struct direct *ep, *nep; - int error, loc, spacefree; + int error, ret, blkoff, loc, spacefree, flags; char *dirbuf; + p = curproc; /* XXX */ + cr = p->p_ucred; + dp = VTOI(dvp); newentrysize = DIRSIZ(OFSFMT(dvp), dirp); if (dp->i_count == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (dp->i_offset & (DIRBLKSIZ - 1)) - panic("ufs_direnter2: newblk"); - auio.uio_offset = dp->i_offset; + panic("ufs_direnter: newblk"); + flags = B_CLRBUF; + if (!DOINGSOFTDEP(dvp)) + flags |= B_SYNC; + if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, + cr, flags, &bp)) != 0) { + if (DOINGSOFTDEP(dvp) && newdirbp != NULL) + bdwrite(newdirbp); + return (error); + } + dp->i_size = dp->i_offset + DIRBLKSIZ; + dp->i_flag |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(dvp, (u_long)dp->i_size); dirp->d_reclen = DIRBLKSIZ; - auio.uio_resid = newentrysize; - aiov.iov_len = newentrysize; - aiov.iov_base = (caddr_t)dirp; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = (struct proc *)0; - error = VOP_WRITE(dvp, &auio, IO_SYNC, cr); - if (DIRBLKSIZ > - VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) - /* XXX should grow with balloc() */ - panic("ufs_direnter2: frag size"); - else if (!error) { - dp->i_size = roundup2(dp->i_size, DIRBLKSIZ); - dp->i_flag |= IN_CHANGE; + blkoff = dp->i_offset & + (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); + bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); + if (DOINGSOFTDEP(dvp)) { + /* + * Ensure that the entire newly allocated block is a + * valid directory so that future growth within the + * block does not have to ensure that the block is + * written before the inode. + */ + blkoff += DIRBLKSIZ; + while (blkoff < bp->b_bcount) { + ((struct direct *) + (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; + blkoff += DIRBLKSIZ; + } + softdep_setup_directory_add(bp, dp, dp->i_offset, + dirp->d_ino, newdirbp); + bdwrite(bp); + } else { + error = VOP_BWRITE(bp); } + ret = UFS_UPDATE(dvp, &time, &time, !DOINGSOFTDEP(dvp)); + if (error == 0) + return (ret); return (error); } /* - * If dp->i_count is non-zero, then namei found space - * for the new entry in the range dp->i_offset to - * dp->i_offset + dp->i_count in the directory. - * To use this space, we may have to compact the entries located - * there, by copying them together towards the beginning of the - * block, leaving the free space in one usable chunk at the end. + * If dp->i_count is non-zero, then namei found space for the new + * entry in the range dp->i_offset to dp->i_offset + dp->i_count + * in the directory. To use this space, we may have to compact + * the entries located there, by copying them together towards the + * beginning of the block, leaving the free space in one usable + * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (dp->i_offset + dp->i_count > dp->i_size) dp->i_size = dp->i_offset + dp->i_count; /* * Get the block containing the space for the new directory entry. */ error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); - if (error) + if (error) { + if (DOINGSOFTDEP(dvp) && newdirbp != NULL) + bdwrite(newdirbp); return (error); + } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to - * dp->i_offset + dp->i_count would yield the - * space. + * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = DIRSIZ(OFSFMT(dvp), ep); spacefree = ep->d_reclen - dsize; for (loc = ep->d_reclen; loc < dp->i_count; ) { nep = (struct direct *)(dirbuf + loc); if (ep->d_ino) { /* trim the existing slot */ ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } else { /* overwrite; nothing there; header is ours */ spacefree += dsize; } dsize = DIRSIZ(OFSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; loc += nep->d_reclen; - bcopy((caddr_t)nep, (caddr_t)ep, dsize); + if (DOINGSOFTDEP(dvp)) + softdep_change_directoryentry_offset(dp, dirbuf, + (caddr_t)nep, (caddr_t)ep, dsize); + else + bcopy((caddr_t)nep, (caddr_t)ep, dsize); } /* * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ if (ep->d_ino == 0 || (ep->d_ino == WINO && bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) - panic("ufs_direnter2: compact1"); + panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) - panic("ufs_direnter2: compact2"); + panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); - if (dvp->v_mount->mnt_flag & MNT_ASYNC) { + if (DOINGSOFTDEP(dvp)) { + softdep_setup_directory_add(bp, dp, + dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); bdwrite(bp); - error = 0; } else { - error = bowrite(bp); + if (dvp->v_mount->mnt_flag & MNT_ASYNC) { + bdwrite(bp); + error = 0; + } else { + error = bowrite(bp); + } } dp->i_flag |= IN_CHANGE | IN_UPDATE; - if (!error && dp->i_endoff && dp->i_endoff < dp->i_size) - error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); + /* + * If all went well, and the directory can be shortened, proceed + * with the truncation. Note that we have to unlock the inode for + * the entry that we just entered, as the truncation may need to + * lock other inodes which can lead to deadlock if we also hold a + * lock on the newly entered node. + */ + if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { + if (tvp != NULL) + VOP_UNLOCK(tvp, 0, p); + (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); + if (tvp != NULL) + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); + } return (error); } /* * Remove a directory entry after a call to namei, using * the parameters which it left in nameidata. The entry * dp->i_offset contains the offset into the directory of the * entry to be eliminated. The dp->i_count field contains the * size of the previous record in the directory. If this * is 0, the first entry is being deleted, so we need only * zero the inode number to mark the entry as free. If the * entry is not the first in the directory, we must reclaim * the space of the now empty record by adding the record size * to the size of the previous entry. */ int -ufs_dirremove(dvp, cnp) +ufs_dirremove(dvp, ip, flags, isrmdir) struct vnode *dvp; - struct componentname *cnp; + struct inode *ip; + int flags; + int isrmdir; { - register struct inode *dp; + struct inode *dp; struct direct *ep; struct buf *bp; int error; dp = VTOI(dvp); - if (cnp->cn_flags & DOWHITEOUT) { + if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to WINO. */ if (error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) return (error); ep->d_ino = WINO; ep->d_type = DT_WHT; - error = VOP_BWRITE(bp); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); + goto out; } + if ((error = UFS_BLKATOFF(dvp, + (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) + return (error); if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. */ +#if 0 error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); +#endif ep->d_ino = 0; - error = bowrite(bp); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); + } else { + /* + * Collapse new free space into previous entry. + */ + ep->d_reclen += dp->i_reclen; } +out: + if (ip) { + ip->i_effnlink--; + ip->i_flag |= IN_CHANGE; + } + if (DOINGSOFTDEP(dvp)) { + if (ip) + softdep_setup_remove(bp, dp, ip, isrmdir); + bdwrite(bp); + } else { + if (ip) + ip->i_nlink--; + error = bowrite(bp); /* maybe this should be as below? */ + } +#if 0 /* * Collapse new free space into previous entry. */ error = UFS_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp); if (error) return (error); ep->d_reclen += dp->i_reclen; if (dvp->v_mount->mnt_flag & MNT_ASYNC) { bdwrite(bp); error = 0; } else { error = bowrite(bp); } +#endif dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); } /* * Rewrite an existing directory entry to point at the inode * supplied. The parameters describing the directory entry are * set up by a call to namei. */ int -ufs_dirrewrite(dp, ip, cnp) - struct inode *dp, *ip; - struct componentname *cnp; +ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) + struct inode *dp, *oip; + ino_t newinum; + int newtype; + int isrmdir; { struct buf *bp; struct direct *ep; struct vnode *vdp = ITOV(dp); int error; error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); - ep->d_ino = ip->i_number; + ep->d_ino = newinum; if (!OFSFMT(vdp)) - ep->d_type = IFTODT(ip->i_mode); - if (vdp->v_mount->mnt_flag & MNT_ASYNC) { + ep->d_type = newtype; + oip->i_effnlink--; + oip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(vdp)) { + softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); - error = 0; } else { - error = bowrite(bp); + oip->i_nlink--; + if (vdp->v_mount->mnt_flag & MNT_ASYNC) { + bdwrite(bp); + error = 0; + } else { + error = bowrite(bp); + } } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); } /* * Check if a directory is empty or not. * Inode supplied must be locked. * * Using a struct dirtemplate here is not precisely * what we want, but better than using a struct direct. * * NB: does not handle corrupted directories. */ int ufs_dirempty(ip, parentino, cred) register struct inode *ip; ino_t parentino; struct ucred *cred; { register off_t off; struct dirtemplate dbuf; register struct direct *dp = (struct direct *)&dbuf; int error, count, namlen; #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) for (off = 0; off < ip->i_size; off += dp->d_reclen) { error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0); /* * Since we read MINDIRSIZ, residual must * be 0 unless we're at end of file. */ if (error || count != 0) return (0); /* avoid infinite loops */ if (dp->d_reclen == 0) return (0); /* skip empty entries */ if (dp->d_ino == 0 || dp->d_ino == WINO) continue; /* accept only "." and ".." */ # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(ITOV(ip))) namlen = dp->d_type; else namlen = dp->d_namlen; # else namlen = dp->d_namlen; # endif if (namlen > 2) return (0); if (dp->d_name[0] != '.') return (0); /* * At this point namlen must be 1 or 2. * 1 implies ".", 2 implies ".." if second * char is also "." */ - if (namlen == 1) + if (namlen == 1 && dp->d_ino == ip->i_number) continue; if (dp->d_name[1] == '.' && dp->d_ino == parentino) continue; return (0); } return (1); } /* * Check if source directory is in the path of the target directory. * Target is supplied locked, source is unlocked. * The target is always vput before returning. */ int ufs_checkpath(source, target, cred) struct inode *source, *target; struct ucred *cred; { struct vnode *vp; int error, rootino, namlen; struct dirtemplate dirbuf; vp = ITOV(target); if (target->i_number == source->i_number) { error = EEXIST; goto out; } rootino = ROOTINO; error = 0; if (target->i_number == rootino) goto out; for (;;) { if (vp->v_type != VDIR) { error = ENOTDIR; break; } error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, cred, (int *)0, (struct proc *)0); if (error != 0) break; # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(vp)) namlen = dirbuf.dotdot_type; else namlen = dirbuf.dotdot_namlen; # else namlen = dirbuf.dotdot_namlen; # endif if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') { error = ENOTDIR; break; } if (dirbuf.dotdot_ino == source->i_number) { error = EINVAL; break; } if (dirbuf.dotdot_ino == rootino) break; vput(vp); error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp); if (error) { vp = NULL; break; } } out: if (error == ENOTDIR) printf("checkpath: .. not a directory\n"); if (vp != NULL) vput(vp); return (error); } Index: head/sys/ufs/ufs/ufs_quota.c =================================================================== --- head/sys/ufs/ufs/ufs_quota.c (revision 34265) +++ head/sys/ufs/ufs/ufs_quota.c (revision 34266) @@ -1,941 +1,945 @@ /* * Copyright (c) 1982, 1986, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Robert Elz at The University of Melbourne. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_quota.c 8.5 (Berkeley) 5/20/95 - * $Id: ufs_quota.c,v 1.18 1998/02/06 12:14:18 eivind Exp $ + * $Id: ufs_quota.c,v 1.19 1998/02/09 06:11:12 eivind Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_DQUOT, "UFS quota", "UFS quota entries"); /* * Quota name to error message mapping. */ static char *quotatypes[] = INITQFNAMES; static int chkdqchg __P((struct inode *, long, struct ucred *, int)); static int chkiqchg __P((struct inode *, long, struct ucred *, int)); static int dqget __P((struct vnode *, u_long, struct ufsmount *, int, struct dquot **)); static int dqsync __P((struct vnode *, struct dquot *)); static void dqflush __P((struct vnode *)); #ifdef DIAGNOSTIC static void dqref __P((struct dquot *)); static void chkdquot __P((struct inode *)); #endif /* * Set up the quotas for an inode. * * This routine completely defines the semantics of quotas. * If other criterion want to be used to establish quotas, the * MAXQUOTAS value in quotas.h should be increased, and the * additional dquots set up here. */ int getinoquota(ip) register struct inode *ip; { struct ufsmount *ump; struct vnode *vp = ITOV(ip); int error; ump = VFSTOUFS(vp->v_mount); /* * Set up the user quota based on file uid. * EINVAL means that quotas are not enabled. */ if (ip->i_dquot[USRQUOTA] == NODQUOT && (error = dqget(vp, ip->i_uid, ump, USRQUOTA, &ip->i_dquot[USRQUOTA])) && error != EINVAL) return (error); /* * Set up the group quota based on file gid. * EINVAL means that quotas are not enabled. */ if (ip->i_dquot[GRPQUOTA] == NODQUOT && (error = dqget(vp, ip->i_gid, ump, GRPQUOTA, &ip->i_dquot[GRPQUOTA])) && error != EINVAL) return (error); return (0); } /* * Update disk usage, and take corrective action. */ int chkdq(ip, change, cred, flags) register struct inode *ip; long change; struct ucred *cred; int flags; { register struct dquot *dq; register int i; int ncurblocks, error; #ifdef DIAGNOSTIC if ((flags & CHOWN) == 0) chkdquot(ip); #endif if (change == 0) return (0); if (change < 0) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; (void) tsleep((caddr_t)dq, PINOD+1, "chkdq1", 0); } ncurblocks = dq->dq_curblocks + change; if (ncurblocks >= 0) dq->dq_curblocks = ncurblocks; else dq->dq_curblocks = 0; dq->dq_flags &= ~DQ_BLKS; dq->dq_flags |= DQ_MOD; } return (0); } if ((flags & FORCE) == 0 && cred->cr_uid != 0) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; error = chkdqchg(ip, change, cred, i); if (error) return (error); } } for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; (void) tsleep((caddr_t)dq, PINOD+1, "chkdq2", 0); } dq->dq_curblocks += change; dq->dq_flags |= DQ_MOD; } return (0); } /* * Check for a valid change to a users allocation. * Issue an error message if appropriate. */ static int chkdqchg(ip, change, cred, type) struct inode *ip; long change; struct ucred *cred; int type; { register struct dquot *dq = ip->i_dquot[type]; long ncurblocks = dq->dq_curblocks + change; /* * If user would exceed their hard limit, disallow space allocation. */ if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) { if ((dq->dq_flags & DQ_BLKS) == 0 && ip->i_uid == cred->cr_uid) { uprintf("\n%s: write failed, %s disk limit reached\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type]); dq->dq_flags |= DQ_BLKS; } return (EDQUOT); } /* * If user is over their soft limit for too long, disallow space * allocation. Reset time limit as they cross their soft limit. */ if (ncurblocks >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) { if (dq->dq_curblocks < dq->dq_bsoftlimit) { dq->dq_btime = time.tv_sec + VFSTOUFS(ITOV(ip)->v_mount)->um_btime[type]; if (ip->i_uid == cred->cr_uid) uprintf("\n%s: warning, %s %s\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type], "disk quota exceeded"); return (0); } if (time.tv_sec > dq->dq_btime) { if ((dq->dq_flags & DQ_BLKS) == 0 && ip->i_uid == cred->cr_uid) { uprintf("\n%s: write failed, %s %s\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type], "disk quota exceeded for too long"); dq->dq_flags |= DQ_BLKS; } return (EDQUOT); } } return (0); } /* * Check the inode limit, applying corrective action. */ int chkiq(ip, change, cred, flags) register struct inode *ip; long change; struct ucred *cred; int flags; { register struct dquot *dq; register int i; int ncurinodes, error; #ifdef DIAGNOSTIC if ((flags & CHOWN) == 0) chkdquot(ip); #endif if (change == 0) return (0); if (change < 0) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; (void) tsleep((caddr_t)dq, PINOD+1, "chkiq1", 0); } ncurinodes = dq->dq_curinodes + change; if (ncurinodes >= 0) dq->dq_curinodes = ncurinodes; else dq->dq_curinodes = 0; dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; } return (0); } if ((flags & FORCE) == 0 && cred->cr_uid != 0) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; error = chkiqchg(ip, change, cred, i); if (error) return (error); } } for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; (void) tsleep((caddr_t)dq, PINOD+1, "chkiq2", 0); } dq->dq_curinodes += change; dq->dq_flags |= DQ_MOD; } return (0); } /* * Check for a valid change to a users allocation. * Issue an error message if appropriate. */ static int chkiqchg(ip, change, cred, type) struct inode *ip; long change; struct ucred *cred; int type; { register struct dquot *dq = ip->i_dquot[type]; long ncurinodes = dq->dq_curinodes + change; /* * If user would exceed their hard limit, disallow inode allocation. */ if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) { if ((dq->dq_flags & DQ_INODS) == 0 && ip->i_uid == cred->cr_uid) { uprintf("\n%s: write failed, %s inode limit reached\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type]); dq->dq_flags |= DQ_INODS; } return (EDQUOT); } /* * If user is over their soft limit for too long, disallow inode * allocation. Reset time limit as they cross their soft limit. */ if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) { if (dq->dq_curinodes < dq->dq_isoftlimit) { dq->dq_itime = time.tv_sec + VFSTOUFS(ITOV(ip)->v_mount)->um_itime[type]; if (ip->i_uid == cred->cr_uid) uprintf("\n%s: warning, %s %s\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type], "inode quota exceeded"); return (0); } if (time.tv_sec > dq->dq_itime) { if ((dq->dq_flags & DQ_INODS) == 0 && ip->i_uid == cred->cr_uid) { uprintf("\n%s: write failed, %s %s\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[type], "inode quota exceeded for too long"); dq->dq_flags |= DQ_INODS; } return (EDQUOT); } } return (0); } #ifdef DIAGNOSTIC /* * On filesystems with quotas enabled, it is an error for a file to change * size and not to have a dquot structure associated with it. */ static void chkdquot(ip) register struct inode *ip; { struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount); register int i; for (i = 0; i < MAXQUOTAS; i++) { if (ump->um_quotas[i] == NULLVP || (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING))) continue; if (ip->i_dquot[i] == NODQUOT) { vprint("chkdquot: missing dquot", ITOV(ip)); panic("chkdquot: missing dquot"); } } } #endif /* * Code to process quotactl commands. */ /* * Q_QUOTAON - set up a quota file for a particular file system. */ int quotaon(p, mp, type, fname) struct proc *p; struct mount *mp; register int type; caddr_t fname; { struct ufsmount *ump = VFSTOUFS(mp); struct vnode *vp, **vpp; struct vnode *nextvp; struct dquot *dq; int error; struct nameidata nd; vpp = &ump->um_quotas[type]; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, p); error = vn_open(&nd, FREAD|FWRITE, 0); if (error) return (error); vp = nd.ni_vp; VOP_UNLOCK(vp, 0, p); if (vp->v_type != VREG) { (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p); return (EACCES); } if (*vpp != vp) quotaoff(p, mp, type); ump->um_qflags[type] |= QTF_OPENING; mp->mnt_flag |= MNT_QUOTA; vp->v_flag |= VSYSTEM; *vpp = vp; /* * Save the credential of the process that turned on quotas. * Set up the time limits for this quota. */ crhold(p->p_ucred); ump->um_cred[type] = p->p_ucred; ump->um_btime[type] = MAX_DQ_TIME; ump->um_itime[type] = MAX_IQ_TIME; if (dqget(NULLVP, 0, ump, type, &dq) == 0) { if (dq->dq_btime > 0) ump->um_btime[type] = dq->dq_btime; if (dq->dq_itime > 0) ump->um_itime[type] = dq->dq_itime; dqrele(NULLVP, dq); } /* * Search vnodes associated with this mount point, * adding references to quota file being opened. * NB: only need to add dquot's for inodes being modified. */ again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { nextvp = vp->v_mntvnodes.le_next; - if (vp->v_writecount == 0) + if (vp->v_type == VNON || vp->v_writecount == 0) continue; if (vget(vp, LK_EXCLUSIVE, p)) goto again; error = getinoquota(VTOI(vp)); if (error) { vput(vp); break; } vput(vp); if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp) goto again; } ump->um_qflags[type] &= ~QTF_OPENING; if (error) quotaoff(p, mp, type); return (error); } /* * Q_QUOTAOFF - turn off disk quotas for a filesystem. */ int quotaoff(p, mp, type) struct proc *p; struct mount *mp; register int type; { struct vnode *vp; struct vnode *qvp, *nextvp; struct ufsmount *ump = VFSTOUFS(mp); struct dquot *dq; struct inode *ip; int error; if ((qvp = ump->um_quotas[type]) == NULLVP) return (0); ump->um_qflags[type] |= QTF_CLOSING; /* * Search vnodes associated with this mount point, * deleting any references to quota file being closed. */ again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { nextvp = vp->v_mntvnodes.le_next; + if (vp->v_type == VNON) + continue; if (vget(vp, LK_EXCLUSIVE, p)) goto again; ip = VTOI(vp); dq = ip->i_dquot[type]; ip->i_dquot[type] = NODQUOT; dqrele(vp, dq); vput(vp); if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp) goto again; } dqflush(qvp); qvp->v_flag &= ~VSYSTEM; error = vn_close(qvp, FREAD|FWRITE, p->p_ucred, p); ump->um_quotas[type] = NULLVP; crfree(ump->um_cred[type]); ump->um_cred[type] = NOCRED; ump->um_qflags[type] &= ~QTF_CLOSING; for (type = 0; type < MAXQUOTAS; type++) if (ump->um_quotas[type] != NULLVP) break; if (type == MAXQUOTAS) mp->mnt_flag &= ~MNT_QUOTA; return (error); } /* * Q_GETQUOTA - return current values in a dqblk structure. */ int getquota(mp, id, type, addr) struct mount *mp; u_long id; int type; caddr_t addr; { struct dquot *dq; int error; error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq); if (error) return (error); error = copyout((caddr_t)&dq->dq_dqb, addr, sizeof (struct dqblk)); dqrele(NULLVP, dq); return (error); } /* * Q_SETQUOTA - assign an entire dqblk structure. */ int setquota(mp, id, type, addr) struct mount *mp; u_long id; int type; caddr_t addr; { register struct dquot *dq; struct dquot *ndq; struct ufsmount *ump = VFSTOUFS(mp); struct dqblk newlim; int error; error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)); if (error) return (error); error = dqget(NULLVP, id, ump, type, &ndq); if (error) return (error); dq = ndq; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; (void) tsleep((caddr_t)dq, PINOD+1, "setqta", 0); } /* * Copy all but the current values. * Reset time limit if previously had no soft limit or were * under it, but now have a soft limit and are over it. */ newlim.dqb_curblocks = dq->dq_curblocks; newlim.dqb_curinodes = dq->dq_curinodes; if (dq->dq_id != 0) { newlim.dqb_btime = dq->dq_btime; newlim.dqb_itime = dq->dq_itime; } if (newlim.dqb_bsoftlimit && dq->dq_curblocks >= newlim.dqb_bsoftlimit && (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit)) newlim.dqb_btime = time.tv_sec + ump->um_btime[type]; if (newlim.dqb_isoftlimit && dq->dq_curinodes >= newlim.dqb_isoftlimit && (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) newlim.dqb_itime = time.tv_sec + ump->um_itime[type]; dq->dq_dqb = newlim; if (dq->dq_curblocks < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_BLKS; if (dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_flags &= ~DQ_INODS; if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 && dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0) dq->dq_flags |= DQ_FAKE; else dq->dq_flags &= ~DQ_FAKE; dq->dq_flags |= DQ_MOD; dqrele(NULLVP, dq); return (0); } /* * Q_SETUSE - set current inode and block usage. */ int setuse(mp, id, type, addr) struct mount *mp; u_long id; int type; caddr_t addr; { register struct dquot *dq; struct ufsmount *ump = VFSTOUFS(mp); struct dquot *ndq; struct dqblk usage; int error; error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk)); if (error) return (error); error = dqget(NULLVP, id, ump, type, &ndq); if (error) return (error); dq = ndq; while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; (void) tsleep((caddr_t)dq, PINOD+1, "setuse", 0); } /* * Reset time limit if have a soft limit and were * previously under it, but are now over it. */ if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit && usage.dqb_curblocks >= dq->dq_bsoftlimit) dq->dq_btime = time.tv_sec + ump->um_btime[type]; if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit && usage.dqb_curinodes >= dq->dq_isoftlimit) dq->dq_itime = time.tv_sec + ump->um_itime[type]; dq->dq_curblocks = usage.dqb_curblocks; dq->dq_curinodes = usage.dqb_curinodes; if (dq->dq_curblocks < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_BLKS; if (dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; dqrele(NULLVP, dq); return (0); } /* * Q_SYNC - sync quota files to disk. */ int qsync(mp) struct mount *mp; { struct ufsmount *ump = VFSTOUFS(mp); struct proc *p = curproc; /* XXX */ struct vnode *vp, *nextvp; struct dquot *dq; int i, error; /* * Check if the mount point has any quotas. * If not, simply return. */ for (i = 0; i < MAXQUOTAS; i++) if (ump->um_quotas[i] != NULLVP) break; if (i == MAXQUOTAS) return (0); /* * Search vnodes associated with this mount point, * synchronizing any modified dquot structures. */ simple_lock(&mntvnode_slock); again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { if (vp->v_mount != mp) goto again; nextvp = vp->v_mntvnodes.le_next; + if (vp->v_type == VNON) + continue; simple_lock(&vp->v_interlock); simple_unlock(&mntvnode_slock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); if (error) { simple_lock(&mntvnode_slock); if (error == ENOENT) goto again; continue; } for (i = 0; i < MAXQUOTAS; i++) { dq = VTOI(vp)->i_dquot[i]; if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) dqsync(vp, dq); } vput(vp); simple_lock(&mntvnode_slock); if (vp->v_mntvnodes.le_next != nextvp) goto again; } simple_unlock(&mntvnode_slock); return (0); } /* * Code pertaining to management of the in-core dquot data structures. */ #define DQHASH(dqvp, id) \ (&dqhashtbl[((((int)(dqvp)) >> 8) + id) & dqhash]) static LIST_HEAD(dqhash, dquot) *dqhashtbl; static u_long dqhash; /* * Dquot free list. */ #define DQUOTINC 5 /* minimum free dquots desired */ static TAILQ_HEAD(dqfreelist, dquot) dqfreelist; static long numdquot, desireddquot = DQUOTINC; /* * Initialize the quota system. */ void dqinit() { dqhashtbl = hashinit(desiredvnodes, M_DQUOT, &dqhash); TAILQ_INIT(&dqfreelist); } /* * Obtain a dquot structure for the specified identifier and quota file * reading the information from the file if necessary. */ static int dqget(vp, id, ump, type, dqp) struct vnode *vp; u_long id; register struct ufsmount *ump; register int type; struct dquot **dqp; { struct proc *p = curproc; /* XXX */ struct dquot *dq; struct dqhash *dqh; struct vnode *dqvp; struct iovec aiov; struct uio auio; int error; dqvp = ump->um_quotas[type]; if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) { *dqp = NODQUOT; return (EINVAL); } /* * Check the cache first. */ dqh = DQHASH(dqvp, id); for (dq = dqh->lh_first; dq; dq = dq->dq_hash.le_next) { if (dq->dq_id != id || dq->dq_ump->um_quotas[dq->dq_type] != dqvp) continue; /* * Cache hit with no references. Take * the structure off the free list. */ if (dq->dq_cnt == 0) TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); DQREF(dq); *dqp = dq; return (0); } /* * Not in cache, allocate a new one. */ if (dqfreelist.tqh_first == NODQUOT && numdquot < MAXQUOTAS * desiredvnodes) desireddquot += DQUOTINC; if (numdquot < desireddquot) { dq = (struct dquot *)malloc(sizeof *dq, M_DQUOT, M_WAITOK); bzero((char *)dq, sizeof *dq); numdquot++; } else { if ((dq = dqfreelist.tqh_first) == NULL) { tablefull("dquot"); *dqp = NODQUOT; return (EUSERS); } if (dq->dq_cnt || (dq->dq_flags & DQ_MOD)) panic("dqget: free dquot isn't"); TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); LIST_REMOVE(dq, dq_hash); } /* * Initialize the contents of the dquot structure. */ if (vp != dqvp) vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); LIST_INSERT_HEAD(dqh, dq, dq_hash); DQREF(dq); dq->dq_flags = DQ_LOCK; dq->dq_id = id; dq->dq_ump = ump; dq->dq_type = type; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = (caddr_t)&dq->dq_dqb; aiov.iov_len = sizeof (struct dqblk); auio.uio_resid = sizeof (struct dqblk); auio.uio_offset = (off_t)(id * sizeof (struct dqblk)); auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_procp = (struct proc *)0; error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]); if (auio.uio_resid == sizeof(struct dqblk) && error == 0) bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk)); if (vp != dqvp) VOP_UNLOCK(dqvp, 0, p); if (dq->dq_flags & DQ_WANT) wakeup((caddr_t)dq); dq->dq_flags = 0; /* * I/O error in reading quota file, release * quota structure and reflect problem to caller. */ if (error) { LIST_REMOVE(dq, dq_hash); dqrele(vp, dq); *dqp = NODQUOT; return (error); } /* * Check for no limit to enforce. * Initialize time values if necessary. */ if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 && dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0) dq->dq_flags |= DQ_FAKE; if (dq->dq_id != 0) { if (dq->dq_btime == 0) dq->dq_btime = time.tv_sec + ump->um_btime[type]; if (dq->dq_itime == 0) dq->dq_itime = time.tv_sec + ump->um_itime[type]; } *dqp = dq; return (0); } #ifdef DIAGNOSTIC /* * Obtain a reference to a dquot. */ static void dqref(dq) struct dquot *dq; { dq->dq_cnt++; } #endif /* * Release a reference to a dquot. */ void dqrele(vp, dq) struct vnode *vp; register struct dquot *dq; { if (dq == NODQUOT) return; if (dq->dq_cnt > 1) { dq->dq_cnt--; return; } if (dq->dq_flags & DQ_MOD) (void) dqsync(vp, dq); if (--dq->dq_cnt > 0) return; TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist); } /* * Update the disk quota in the quota file. */ static int dqsync(vp, dq) struct vnode *vp; struct dquot *dq; { struct proc *p = curproc; /* XXX */ struct vnode *dqvp; struct iovec aiov; struct uio auio; int error; if (dq == NODQUOT) panic("dqsync: dquot"); if ((dq->dq_flags & DQ_MOD) == 0) return (0); if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP) panic("dqsync: file"); if (vp != dqvp) vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; (void) tsleep((caddr_t)dq, PINOD+2, "dqsync", 0); if ((dq->dq_flags & DQ_MOD) == 0) { if (vp != dqvp) VOP_UNLOCK(dqvp, 0, p); return (0); } } dq->dq_flags |= DQ_LOCK; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = (caddr_t)&dq->dq_dqb; aiov.iov_len = sizeof (struct dqblk); auio.uio_resid = sizeof (struct dqblk); auio.uio_offset = (off_t)(dq->dq_id * sizeof (struct dqblk)); auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_procp = (struct proc *)0; error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]); if (auio.uio_resid && error == 0) error = EIO; if (dq->dq_flags & DQ_WANT) wakeup((caddr_t)dq); dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT); if (vp != dqvp) VOP_UNLOCK(dqvp, 0, p); return (error); } /* * Flush all entries from the cache for a particular vnode. */ static void dqflush(vp) register struct vnode *vp; { register struct dquot *dq, *nextdq; struct dqhash *dqh; /* * Move all dquot's that used to refer to this quota * file off their hash chains (they will eventually * fall off the head of the free list and be re-used). */ for (dqh = &dqhashtbl[dqhash]; dqh >= dqhashtbl; dqh--) { for (dq = dqh->lh_first; dq; dq = nextdq) { nextdq = dq->dq_hash.le_next; if (dq->dq_ump->um_quotas[dq->dq_type] != vp) continue; if (dq->dq_cnt) panic("dqflush: stray dquot"); LIST_REMOVE(dq, dq_hash); dq->dq_ump = (struct ufsmount *)0; } } } Index: head/sys/ufs/ufs/ufs_readwrite.c =================================================================== --- head/sys/ufs/ufs/ufs_readwrite.c (revision 34265) +++ head/sys/ufs/ufs/ufs_readwrite.c (revision 34266) @@ -1,609 +1,609 @@ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 - * $Id: ufs_readwrite.c,v 1.43 1998/02/26 06:39:50 msmith Exp $ + * $Id: ufs_readwrite.c,v 1.44 1998/03/07 21:36:42 dyson Exp $ */ #define BLKSIZE(a, b, c) blksize(a, b, c) #define FS struct fs #define I_FS i_fs #define READ ffs_read #define READ_S "ffs_read" #define WRITE ffs_write #define WRITE_S "ffs_write" #include #include #include #include #include #include #include /* * Vnode op for reading. */ /* ARGSUSED */ int READ(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp; register struct inode *ip; register struct uio *uio; register FS *fs; struct buf *bp; ufs_daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; int error; u_short mode; int seqcount; int ioflag; vm_object_t object; vp = ap->a_vp; seqcount = ap->a_ioflag >> 16; ip = VTOI(vp); mode = ip->i_mode; uio = ap->a_uio; ioflag = ap->a_ioflag; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("%s: mode", READ_S); if (vp->v_type == VLNK) { if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) panic("%s: short symlink", READ_S); } else if (vp->v_type != VREG && vp->v_type != VDIR) panic("%s: type %d", READ_S, vp->v_type); #endif fs = ip->I_FS; if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize) return (EFBIG); object = vp->v_object; bytesinfile = ip->i_size - uio->uio_offset; if (bytesinfile <= 0) { return 0; } if (object) vm_object_reference(object); #if 1 if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) { int nread, toread; toread = uio->uio_resid; if (toread > bytesinfile) toread = bytesinfile; if (toread >= PAGE_SIZE) { error = uioread(toread, uio, object, &nread); if ((uio->uio_resid == 0) || (error != 0)) { if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) ip->i_flag |= IN_ACCESS; if (object) vm_object_vndeallocate(object); return error; } } } #endif for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; #if 1 if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) { int nread, toread; toread = uio->uio_resid; if (toread > bytesinfile) toread = bytesinfile; if (toread >= PAGE_SIZE) { error = uioread(toread, uio, object, &nread); if ((uio->uio_resid == 0) || (error != 0)) { if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) ip->i_flag |= IN_ACCESS; if (object) vm_object_vndeallocate(object); return error; } if (nread > 0) { continue; } } } #endif lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; size = BLKSIZE(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= ip->i_size) error = bread(vp, lbn, size, NOCRED, &bp); else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, uio->uio_resid, seqcount, &bp); else if (lbn - 1 == vp->v_lastr) { int nextsize = BLKSIZE(fs, ip, nextlbn); error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else error = bread(vp, lbn, size, NOCRED, &bp); if (error) { brelse(bp); bp = NULL; break; } vp->v_lastr = lbn; /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } if (vfs_ioopt && object && (bp->b_flags & B_VMIO) && ((blkoffset & PAGE_MASK) == 0) && ((xfersize & PAGE_MASK) == 0)) { error = uiomoveco((char *)bp->b_data + blkoffset, (int)xfersize, uio, object); } else { error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); } if (error) break; if (ioflag & IO_VMIO) { bp->b_flags |= B_RELBUF; brelse(bp); } else { bqrelse(bp); } } if (bp != NULL) { if (ioflag & IO_VMIO) { bp->b_flags |= B_RELBUF; brelse(bp); } else { bqrelse(bp); } } if (object) vm_object_vndeallocate(object); if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) ip->i_flag |= IN_ACCESS; return (error); } /* * Vnode op for writing. */ int WRITE(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp; register struct uio *uio; register struct inode *ip; register FS *fs; struct buf *bp; struct proc *p; ufs_daddr_t lbn; off_t osize; int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; struct timeval tv; vm_object_t object; extended = 0; ioflag = ap->a_ioflag; uio = ap->a_uio; vp = ap->a_vp; ip = VTOI(vp); object = vp->v_object; if (object) vm_object_reference(object); #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("%s: mode", WRITE_S); #endif switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = ip->i_size; if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) { if (object) vm_object_vndeallocate(object); return (EPERM); } /* FALLTHROUGH */ case VLNK: break; case VDIR: if ((ioflag & IO_SYNC) == 0) panic("%s: nonsync dir write", WRITE_S); break; default: panic("%s: type", WRITE_S); } fs = ip->I_FS; if (uio->uio_offset < 0 || (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) { if (object) vm_object_vndeallocate(object); return (EFBIG); } /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ p = uio->uio_procp; if (vp->v_type == VREG && p && uio->uio_offset + uio->uio_resid > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { psignal(p, SIGXFSZ); if (object) vm_object_vndeallocate(object); return (EFBIG); } resid = uio->uio_resid; osize = ip->i_size; flags = ioflag & IO_SYNC ? B_SYNC : 0; if (object && (object->flags & OBJ_OPT)) { vm_freeze_copyopts(object, OFF_TO_IDX(uio->uio_offset), OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK)); } for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (uio->uio_offset + xfersize > ip->i_size) vnode_pager_setsize(vp, uio->uio_offset + xfersize); if (fs->fs_bsize > xfersize) flags |= B_CLRBUF; else flags &= ~B_CLRBUF; - - error = ffs_balloc(ip, - lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); - if (error) +/* XXX is uio->uio_offset the right thing here? */ + error = VOP_BALLOC(vp, uio->uio_offset, xfersize, + ap->a_cred, flags, &bp); + if (error != 0) break; if (uio->uio_offset + xfersize > ip->i_size) { ip->i_size = uio->uio_offset + xfersize; extended = 1; } size = BLKSIZE(fs, ip, lbn) - bp->b_resid; if (size < xfersize) xfersize = size; error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (ioflag & IO_VMIO) bp->b_flags |= B_RELBUF; if (ioflag & IO_SYNC) { (void)bwrite(bp); } else if (xfersize + blkoffset == fs->fs_bsize) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; cluster_write(bp, ip->i_size); } else { bawrite(bp); } } else { bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } if (error || xfersize == 0) break; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) ip->i_mode &= ~(ISUID | ISGID); if (error) { if (ioflag & IO_UNIT) { (void)UFS_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred, uio->uio_procp); uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { gettime(&tv); error = UFS_UPDATE(vp, &tv, &tv, 1); } if (!error) VN_POLLEVENT(vp, POLLWRITE | (extended ? POLLEXTEND : 0)); if (object) vm_object_vndeallocate(object); return (error); } /* * get page routine */ int ffs_getpages(ap) struct vop_getpages_args *ap; { off_t foff, physoffset; int i, size, bsize; struct vnode *dp, *vp; vm_object_t obj; vm_pindex_t pindex, firstindex; vm_page_t m, mreq; int bbackwards, bforwards; int pbackwards, pforwards; int firstpage; int reqlblkno; daddr_t reqblkno; int poff; int pcount; int rtval; int pagesperblock; pcount = round_page(ap->a_count) / PAGE_SIZE; mreq = ap->a_m[ap->a_reqpage]; firstindex = ap->a_m[0]->pindex; /* * if ANY DEV_BSIZE blocks are valid on a large filesystem block * then, the entire page is valid -- */ if (mreq->valid) { mreq->valid = VM_PAGE_BITS_ALL; for (i = 0; i < pcount; i++) { if (i != ap->a_reqpage) { vm_page_free(ap->a_m[i]); } } return VM_PAGER_OK; } vp = ap->a_vp; obj = vp->v_object; bsize = vp->v_mount->mnt_stat.f_iosize; pindex = mreq->pindex; foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */; if (firstindex == 0) vp->v_lastr = 0; if ((obj->behavior != OBJ_RANDOM) && ((firstindex != 0) && (firstindex <= vp->v_lastr) && ((firstindex + pcount) > vp->v_lastr)) || (obj->behavior == OBJ_SEQUENTIAL)) { struct uio auio; struct iovec aiov; int error; for (i = 0; i < pcount; i++) { m = ap->a_m[i]; vm_page_activate(m); m->busy++; m->flags &= ~PG_BUSY; } auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = 0; aiov.iov_len = MAXBSIZE; auio.uio_resid = MAXBSIZE; auio.uio_offset = foff; auio.uio_segflg = UIO_NOCOPY; auio.uio_rw = UIO_READ; auio.uio_procp = curproc; error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), curproc->p_ucred); for (i = 0; i < pcount; i++) { m = ap->a_m[i]; m->busy--; if ((m != mreq) && (m->wire_count == 0) && (m->hold_count == 0) && (m->valid == 0) && (m->busy == 0) && (m->flags & PG_BUSY) == 0) { m->flags |= PG_BUSY; vm_page_free(m); } else if (m == mreq) { while (m->flags & PG_BUSY) { vm_page_sleep(m, "ffspwt", NULL); } m->flags |= PG_BUSY; vp->v_lastr = m->pindex + 1; } else { if (m->wire_count == 0) { if (m->busy || (m->flags & PG_MAPPED) || (m->flags & (PG_WANTED | PG_BUSY)) == PG_WANTED) { vm_page_activate(m); } else { vm_page_deactivate(m); } } vp->v_lastr = m->pindex + 1; } } if (mreq->valid == 0) return VM_PAGER_ERROR; mreq->valid = VM_PAGE_BITS_ALL; return VM_PAGER_OK; } /* * foff is the file offset of the required page * reqlblkno is the logical block that contains the page * poff is the index of the page into the logical block */ reqlblkno = foff / bsize; poff = (foff % bsize) / PAGE_SIZE; if ( VOP_BMAP( vp, reqlblkno, &dp, &reqblkno, &bforwards, &bbackwards) || (reqblkno == -1)) { for(i = 0; i < pcount; i++) { if (i != ap->a_reqpage) vm_page_free(ap->a_m[i]); } if (reqblkno == -1) { if ((mreq->flags & PG_ZERO) == 0) vm_page_zero_fill(mreq); mreq->dirty = 0; mreq->valid = VM_PAGE_BITS_ALL; return VM_PAGER_OK; } else { return VM_PAGER_ERROR; } } physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE; pagesperblock = bsize / PAGE_SIZE; /* * find the first page that is contiguous... * note that pbackwards is the number of pages that are contiguous * backwards. */ firstpage = 0; if (ap->a_count) { pbackwards = poff + bbackwards * pagesperblock; if (ap->a_reqpage > pbackwards) { firstpage = ap->a_reqpage - pbackwards; for(i=0;ia_m[i]); } /* * pforwards is the number of pages that are contiguous * after the current page. */ pforwards = (pagesperblock - (poff + 1)) + bforwards * pagesperblock; if (pforwards < (pcount - (ap->a_reqpage + 1))) { for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++) vm_page_free(ap->a_m[i]); pcount = ap->a_reqpage + pforwards + 1; } /* * number of pages for I/O corrected for the non-contig pages at * the beginning of the array. */ pcount -= firstpage; } /* * calculate the size of the transfer */ size = pcount * PAGE_SIZE; vp->v_lastr = mreq->pindex + pcount; if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) > obj->un_pager.vnp.vnp_size) size = obj->un_pager.vnp.vnp_size - foff; physoffset -= foff; rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size, (ap->a_reqpage - firstpage), physoffset); return (rtval); } /* * put page routine * * XXX By default, wimp out... note that a_offset is ignored (and always * XXX has been). */ int ffs_putpages(ap) struct vop_putpages_args *ap; { return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, ap->a_rtvals); } Index: head/sys/ufs/ufs/ufs_vnops.c =================================================================== --- head/sys/ufs/ufs/ufs_vnops.c (revision 34265) +++ head/sys/ufs/ufs/ufs_vnops.c (revision 34266) @@ -1,2245 +1,2255 @@ /* * Copyright (c) 1982, 1986, 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 - * $Id: ufs_vnops.c,v 1.77 1998/02/06 12:14:19 eivind Exp $ + * $Id: ufs_vnops.c,v 1.78 1998/02/09 06:11:14 eivind Exp $ */ #include "opt_quota.h" #include "opt_suiddir.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include #include #include #include #include #include #include #include static int ufs_abortop __P((struct vop_abortop_args *)); static int ufs_access __P((struct vop_access_args *)); static int ufs_advlock __P((struct vop_advlock_args *)); static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *)); static int ufs_chown __P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *)); static int ufs_close __P((struct vop_close_args *)); static int ufs_create __P((struct vop_create_args *)); static int ufs_getattr __P((struct vop_getattr_args *)); static int ufs_link __P((struct vop_link_args *)); static int ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *)); static int ufs_missingop __P((struct vop_generic_args *ap)); static int ufs_mkdir __P((struct vop_mkdir_args *)); static int ufs_mknod __P((struct vop_mknod_args *)); static int ufs_mmap __P((struct vop_mmap_args *)); static int ufs_open __P((struct vop_open_args *)); static int ufs_pathconf __P((struct vop_pathconf_args *)); static int ufs_print __P((struct vop_print_args *)); static int ufs_readdir __P((struct vop_readdir_args *)); static int ufs_readlink __P((struct vop_readlink_args *)); static int ufs_remove __P((struct vop_remove_args *)); static int ufs_rename __P((struct vop_rename_args *)); static int ufs_rmdir __P((struct vop_rmdir_args *)); static int ufs_setattr __P((struct vop_setattr_args *)); static int ufs_strategy __P((struct vop_strategy_args *)); static int ufs_symlink __P((struct vop_symlink_args *)); static int ufs_whiteout __P((struct vop_whiteout_args *)); static int ufsfifo_close __P((struct vop_close_args *)); static int ufsfifo_read __P((struct vop_read_args *)); static int ufsfifo_write __P((struct vop_write_args *)); static int ufsspec_close __P((struct vop_close_args *)); static int ufsspec_read __P((struct vop_read_args *)); static int ufsspec_write __P((struct vop_write_args *)); union _qcvt { int64_t qcvt; int32_t val[2]; }; #define SETHIGH(q, h) { \ union _qcvt tmp; \ tmp.qcvt = (q); \ tmp.val[_QUAD_HIGHWORD] = (h); \ (q) = tmp.qcvt; \ } #define SETLOW(q, l) { \ union _qcvt tmp; \ tmp.qcvt = (q); \ tmp.val[_QUAD_LOWWORD] = (l); \ (q) = tmp.qcvt; \ } /* + * A virgin directory (no blushing please). + */ +static struct dirtemplate mastertemplate = { + 0, 12, DT_DIR, 1, ".", + 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." +}; +static struct odirtemplate omastertemplate = { + 0, 12, 1, ".", + 0, DIRBLKSIZ - 12, 2, ".." +}; + +/* * Create a regular file */ int ufs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { int error; error = ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp); if (error) return (error); VN_POLLEVENT(ap->a_dvp, POLLWRITE); return (0); } /* * Mknod vnode call */ /* ARGSUSED */ int ufs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; struct inode *ip; int error; error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); VN_POLLEVENT(ap->a_dvp, POLLWRITE); ip = VTOI(*vpp); ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; if (vap->va_rdev != VNOVAL) { /* * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ ip->i_rdev = vap->va_rdev; } /* * Remove inode so that it will be reloaded by VFS_VGET and * checked to see if it is an alias of an existing entry in * the inode cache. */ vput(*vpp); (*vpp)->v_type = VNON; vgone(*vpp); *vpp = 0; return (0); } /* * Open called. * * Nothing to do. */ /* ARGSUSED */ int ufs_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { /* * Files marked append-only must be opened for appending. */ if ((VTOI(ap->a_vp)->i_flags & APPEND) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); return (0); } /* * Close called. * * Update the times on the inode. */ /* ARGSUSED */ int ufs_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) ITIMES(ip, &time, &time); simple_unlock(&vp->v_interlock); return (0); } int ufs_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ucred *cred = ap->a_cred; mode_t mask, mode = ap->a_mode; register gid_t *gp; int i; #ifdef QUOTA int error; #endif /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ if (mode & VWRITE) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); #ifdef QUOTA if (error = getinoquota(ip)) return (error); #endif break; + default: + break; } } /* If immutable bit set, nobody gets to write it. */ if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE)) return (EPERM); /* Otherwise, user id 0 always gets access. */ if (cred->cr_uid == 0) return (0); mask = 0; /* Otherwise, check the owner. */ if (cred->cr_uid == ip->i_uid) { if (mode & VEXEC) mask |= S_IXUSR; if (mode & VREAD) mask |= S_IRUSR; if (mode & VWRITE) mask |= S_IWUSR; return ((ip->i_mode & mask) == mask ? 0 : EACCES); } /* Otherwise, check the groups. */ for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) if (ip->i_gid == *gp) { if (mode & VEXEC) mask |= S_IXGRP; if (mode & VREAD) mask |= S_IRGRP; if (mode & VWRITE) mask |= S_IWGRP; return ((ip->i_mode & mask) == mask ? 0 : EACCES); } /* Otherwise, check everyone else. */ if (mode & VEXEC) mask |= S_IXOTH; if (mode & VREAD) mask |= S_IROTH; if (mode & VWRITE) mask |= S_IWOTH; return ((ip->i_mode & mask) == mask ? 0 : EACCES); } /* ARGSUSED */ int ufs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); register struct vattr *vap = ap->a_vap; ITIMES(ip, &time, &time); /* * Copy from inode table */ vap->va_fsid = ip->i_dev; vap->va_fileid = ip->i_number; vap->va_mode = ip->i_mode & ~IFMT; - vap->va_nlink = ip->i_nlink; + vap->va_nlink = ip->i_effnlink; vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; vap->va_rdev = (dev_t)ip->i_rdev; vap->va_size = ip->i_din.di_size; vap->va_atime.tv_sec = ip->i_atime; vap->va_atime.tv_nsec = ip->i_atimensec; vap->va_mtime.tv_sec = ip->i_mtime; vap->va_mtime.tv_nsec = ip->i_mtimensec; vap->va_ctime.tv_sec = ip->i_ctime; vap->va_ctime.tv_nsec = ip->i_ctimensec; vap->va_flags = ip->i_flags; vap->va_gen = ip->i_gen; /* this doesn't belong here */ if (vp->v_type == VBLK) vap->va_blocksize = BLKDEV_IOSIZE; else if (vp->v_type == VCHR) vap->va_blocksize = MAXBSIZE; else vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); vap->va_type = vp->v_type; vap->va_filerev = ip->i_modrev; return (0); } /* * Set attribute vnode op. called from several syscalls */ int ufs_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; struct timeval atimeval, mtimeval; int error; /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } if (vap->va_flags != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != ip->i_uid && (error = suser(cred, &p->p_acflag))) return (error); if (cred->cr_uid == 0) { if ((ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) && securelevel > 0) return (EPERM); ip->i_flags = vap->va_flags; } else { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || (vap->va_flags & UF_SETTABLE) != vap->va_flags) return (EPERM); ip->i_flags &= SF_SETTABLE; ip->i_flags |= (vap->va_flags & UF_SETTABLE); } ip->i_flag |= IN_CHANGE; if (vap->va_flags & (IMMUTABLE | APPEND)) return (0); } if (ip->i_flags & (IMMUTABLE | APPEND)) return (EPERM); /* * Go through the fields and update iff not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p)) return (error); } if (vap->va_size != VNOVAL) { /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; + default: + break; } if (error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, p)) return (error); } ip = VTOI(vp); if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != ip->i_uid && (error = suser(cred, &p->p_acflag)) && ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || (error = VOP_ACCESS(vp, VWRITE, cred, p)))) return (error); if (vap->va_atime.tv_sec != VNOVAL) ip->i_flag |= IN_ACCESS; if (vap->va_mtime.tv_sec != VNOVAL) ip->i_flag |= IN_CHANGE | IN_UPDATE; atimeval.tv_sec = vap->va_atime.tv_sec; atimeval.tv_usec = vap->va_atime.tv_nsec / 1000; mtimeval.tv_sec = vap->va_mtime.tv_sec; mtimeval.tv_usec = vap->va_mtime.tv_nsec / 1000; - error = UFS_UPDATE(vp, &atimeval, &mtimeval, 1); + error = UFS_UPDATE(vp, &atimeval, &mtimeval, 0); if (error) return (error); } error = 0; if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); error = ufs_chmod(vp, (int)vap->va_mode, cred, p); } VN_POLLEVENT(vp, POLLATTRIB); return (error); } /* * Change the mode on a file. * Inode must be locked before calling. */ static int ufs_chmod(vp, mode, cred, p) register struct vnode *vp; register int mode; register struct ucred *cred; struct proc *p; { register struct inode *ip = VTOI(vp); int error; if (cred->cr_uid != ip->i_uid) { error = suser(cred, &p->p_acflag); if (error) return (error); } if (cred->cr_uid) { if (vp->v_type != VDIR && (mode & S_ISTXT)) return (EFTYPE); if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) return (EPERM); } ip->i_mode &= ~ALLPERMS; ip->i_mode |= (mode & ALLPERMS); ip->i_flag |= IN_CHANGE; return (0); } /* * Perform chown operation on inode ip; * inode must be locked prior to call. */ static int ufs_chown(vp, uid, gid, cred, p) register struct vnode *vp; uid_t uid; gid_t gid; struct ucred *cred; struct proc *p; { register struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; #ifdef QUOTA register int i; long change; #endif if (uid == (uid_t)VNOVAL) uid = ip->i_uid; if (gid == (gid_t)VNOVAL) gid = ip->i_gid; /* * If we don't own the file, are trying to change the owner * of the file, or are not a member of the target group, * the caller must be superuser or the call fails. */ if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid || (gid != ip->i_gid && !groupmember((gid_t)gid, cred))) && (error = suser(cred, &p->p_acflag))) return (error); ogid = ip->i_gid; ouid = ip->i_uid; #ifdef QUOTA if (error = getinoquota(ip)) return (error); if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } change = ip->i_blocks; (void) chkdq(ip, -change, cred, CHOWN); (void) chkiq(ip, -1, cred, CHOWN); for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } #endif ip->i_gid = gid; ip->i_uid = uid; #ifdef QUOTA if ((error = getinoquota(ip)) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) goto good; else (void) chkdq(ip, -change, cred, CHOWN|FORCE); } for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } ip->i_gid = ogid; ip->i_uid = ouid; if (getinoquota(ip) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } (void) chkdq(ip, change, cred, FORCE|CHOWN); (void) chkiq(ip, 1, cred, FORCE|CHOWN); (void) getinoquota(ip); } return (error); good: if (getinoquota(ip)) panic("ufs_chown: lost quota"); #endif /* QUOTA */ ip->i_flag |= IN_CHANGE; if (cred->cr_uid != 0 && (ouid != uid || ogid != gid)) ip->i_mode &= ~(ISUID | ISGID); return (0); } /* * Mmap a file * * NB Currently unsupported. */ /* ARGSUSED */ int ufs_mmap(ap) struct vop_mmap_args /* { struct vnode *a_vp; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { return (EINVAL); } int ufs_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct inode *ip; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; int error; ip = VTOI(vp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(dvp)->i_flags & APPEND)) { error = EPERM; goto out; } - error = ufs_dirremove(dvp, ap->a_cnp); - if (error == 0) { - ip->i_nlink--; - ip->i_flag |= IN_CHANGE; - } + error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); VN_POLLEVENT(vp, POLLNLINK); VN_POLLEVENT(dvp, POLLWRITE); out: if (dvp == vp) vrele(vp); else vput(vp); vput(dvp); return (error); } /* * link vnode call */ int ufs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; struct inode *ip; struct timeval tv; + struct direct newdir; int error; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_link: no name"); #endif if (tdvp->v_mount != vp->v_mount) { VOP_ABORTOP(tdvp, cnp); error = EXDEV; goto out2; } if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { VOP_ABORTOP(tdvp, cnp); goto out2; } ip = VTOI(vp); if ((nlink_t)ip->i_nlink >= LINK_MAX) { VOP_ABORTOP(tdvp, cnp); error = EMLINK; goto out1; } if (ip->i_flags & (IMMUTABLE | APPEND)) { VOP_ABORTOP(tdvp, cnp); error = EPERM; goto out1; } + ip->i_effnlink++; ip->i_nlink++; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(vp)) + softdep_increase_linkcnt(ip); gettime(&tv); - error = UFS_UPDATE(vp, &tv, &tv, 1); + error = UFS_UPDATE(vp, &tv, &tv, !DOINGSOFTDEP(vp)); if (!error) { - error = ufs_direnter(ip, tdvp, cnp); + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL); } if (error) { + ip->i_effnlink--; ip->i_nlink--; ip->i_flag |= IN_CHANGE; } zfree(namei_zone, cnp->cn_pnbuf); out1: if (tdvp != vp) VOP_UNLOCK(vp, 0, p); out2: VN_POLLEVENT(vp, POLLNLINK); VN_POLLEVENT(tdvp, POLLWRITE); vput(tdvp); return (error); } /* * whiteout vnode call */ int ufs_whiteout(ap) struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct direct newdir; int error = 0; switch (ap->a_flags) { case LOOKUP: /* 4.4 format directories support whiteout operations */ if (dvp->v_mount->mnt_maxsymlinklen > 0) return (0); return (EOPNOTSUPP); case CREATE: /* create a new directory whiteout */ #ifdef DIAGNOSTIC if ((cnp->cn_flags & SAVENAME) == 0) panic("ufs_whiteout: missing name"); if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif newdir.d_ino = WINO; newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; - error = ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc); + error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL); break; case DELETE: /* remove an existing directory whiteout */ #ifdef DIAGNOSTIC if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif cnp->cn_flags &= ~DOWHITEOUT; - error = ufs_dirremove(dvp, cnp); + error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); break; + default: + panic("ufs_whiteout: unknown op"); } if (cnp->cn_flags & HASBUF) { zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_flags &= ~HASBUF; } return (error); } /* * Rename system call. * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. Can't do full commit without saving state in the * inode on disk which isn't feasible at this time. Best we can do is * always guarantee the target exists. * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to inode if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ int ufs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *tvp = ap->a_tvp; register struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; struct inode *ip, *xp, *dp; - struct dirtemplate dirbuf; + struct direct newdir; struct timeval tv; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; - u_char namlen; #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */ if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ vrele(fdvp); vrele(fvp); return (error); } if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; goto abortit; } /* * Check if just deleting a link name or if we've lost a race. * If another process completes the same rename after we've looked * up the source and have blocked looking up the target, then the * source and target inodes may be identical now although the * names were never linked. */ if (fvp == tvp) { if (fvp->v_type == VDIR) { /* * Linked directories are impossible, so we must * have lost the race. Pretend that the rename * completed before the lookup. */ #ifdef UFS_RENAME_DEBUG printf("ufs_rename: fvp == tvp for directories\n"); #endif error = ENOENT; goto abortit; } /* Release destination completely. */ VOP_ABORTOP(tdvp, tcnp); vput(tdvp); vput(tvp); /* * Delete source. There is another race now that everything * is unlocked, but this doesn't cause any new complications. * Relookup() may find a file that is unrelated to the * original one, or it may fail. Too bad. */ vrele(fdvp); vrele(fvp); fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); fcnp->cn_nameiop = DELETE; VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp == NULL) { #ifdef UFS_RENAME_DEBUG printf("ufs_rename: from name disappeared\n"); #endif return (ENOENT); } return (VOP_REMOVE(fdvp, fvp, fcnp)); } if (error = vn_lock(fvp, LK_EXCLUSIVE, p)) goto abortit; dp = VTOI(fdvp); ip = VTOI(fvp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) { VOP_UNLOCK(fvp, 0, p); error = EPERM; goto abortit; } if ((ip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || (ip->i_flag & IN_RENAME)) { VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } ip->i_flag |= IN_RENAME; oldparent = dp->i_number; doingdirectory++; } VN_POLLEVENT(fdvp, POLLWRITE); vrele(fdvp); /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ + ip->i_effnlink++; ip->i_nlink++; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(fvp)) + softdep_increase_linkcnt(ip); gettime(&tv); - if (error = UFS_UPDATE(fvp, &tv, &tv, 1)) { + if (error = UFS_UPDATE(fvp, &tv, &tv, !DOINGSOFTDEP(fvp))) { VOP_UNLOCK(fvp, 0, p); goto bad; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory heirarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to checkpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); VOP_UNLOCK(fvp, 0, p); if (oldparent != dp->i_number) newparent = dp->i_number; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); error = ufs_checkpath(ip, dp, tcnp->cn_cred); if (error) goto out; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); VREF(tdvp); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; vrele(tdvp); dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); } /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (xp == NULL) { if (dp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); /* * Account for ".." in new directory. * When source and destination have the same * parent we don't fool with the link count. */ if (doingdirectory && newparent) { if ((nlink_t)dp->i_nlink >= LINK_MAX) { error = EMLINK; goto bad; } + dp->i_effnlink++; dp->i_nlink++; dp->i_flag |= IN_CHANGE; - error = UFS_UPDATE(tdvp, &tv, &tv, 1); + if (DOINGSOFTDEP(tdvp)) + softdep_increase_linkcnt(dp); + error = UFS_UPDATE(tdvp, &tv, &tv, !DOINGSOFTDEP(tdvp)); if (error) goto bad; } - error = ufs_direnter(ip, tdvp, tcnp); + ufs_makedirentry(ip, tcnp, &newdir); + error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); if (error) { if (doingdirectory && newparent) { + dp->i_effnlink--; dp->i_nlink--; dp->i_flag |= IN_CHANGE; (void)UFS_UPDATE(tdvp, &tv, &tv, 1); } goto bad; } VN_POLLEVENT(tdvp, POLLWRITE); vput(tdvp); } else { if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (xp->i_number == ip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. */ if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && tcnp->cn_cred->cr_uid != dp->i_uid && xp->i_uid != tcnp->cn_cred->cr_uid) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((xp->i_mode&IFMT) == IFDIR) { - if (! ufs_dirempty - (xp, dp->i_number, tcnp->cn_cred) || - xp->i_nlink > 2) { + if ((xp->i_effnlink > 2) || + !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } - error = ufs_dirrewrite(dp, ip, tcnp); + error = ufs_dirrewrite(dp, xp, ip->i_number, + IFTODT(ip->i_mode), doingdirectory); if (error) goto bad; - /* - * If the target directory is in the same - * directory as the source directory, - * decrement the link count on the parent - * of the target directory. - */ - if (doingdirectory && !newparent) { - dp->i_nlink--; + if (doingdirectory) { + dp->i_effnlink--; dp->i_flag |= IN_CHANGE; + xp->i_effnlink--; + xp->i_flag |= IN_CHANGE; } VN_POLLEVENT(tdvp, POLLWRITE); - vput(tdvp); - /* - * Adjust the link count of the target to - * reflect the dirrewrite above. If this is - * a directory it is empty and there are - * no links to it, so we can squash the inode and - * any space associated with it. We disallowed - * renaming over top of a directory with links to - * it above, as the remaining link would point to - * a directory without "." or ".." entries. - */ - xp->i_nlink--; - if (doingdirectory) { - if (--xp->i_nlink != 0) - panic("ufs_rename: linked directory"); - error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, - tcnp->cn_cred, tcnp->cn_proc); + if (doingdirectory && !DOINGSOFTDEP(tvp)) { + /* + * Truncate inode. The only stuff left in the directory + * is "." and "..". The "." reference is inconsequential + * since we are quashing it. We have removed the "." + * reference and the reference in the parent directory, + * but there may be other hard links. The soft + * dependency code will arrange to do these operations + * after the parent directory entry has been deleted on + * disk, so when running with that code we avoid doing + * them now. + */ + dp->i_nlink--; + xp->i_nlink--; + if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, + tcnp->cn_cred, tcnp->cn_proc)) != 0) + goto bad; } - xp->i_flag |= IN_CHANGE; - VN_POLLEVENT(tvp, POLLNLINK); + vput(tdvp); + VN_POLLEVENT(tvp, POLLNLINK); /* XXX this right? */ vput(tvp); xp = NULL; } /* * 3) Unlink the source. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp != NULL) { xp = VTOI(fvp); dp = VTOI(fdvp); } else { /* * From name has disappeared. */ if (doingdirectory) panic("ufs_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } /* * Ensure that the directory entry still exists and has not * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source - * is a directory then it cannot have been rmdir'ed; its link - * count of three would cause a rmdir to fail with ENOTEMPTY. - * The IN_RENAME flag ensures that it cannot be moved by another - * rename. + * is a directory then it cannot have been rmdir'ed; the IN_RENAME + * flag ensures that it cannot be moved by another rename or removed + * by a rmdir. */ if (xp != ip) { if (doingdirectory) panic("ufs_rename: lost dir entry"); } else { /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; - error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, - sizeof (struct dirtemplate), (off_t)0, - UIO_SYSSPACE, IO_NODELOCKED, - tcnp->cn_cred, (int *)0, (struct proc *)0); - if (error == 0) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (fvp->v_mount->mnt_maxsymlinklen <= 0) - namlen = dirbuf.dotdot_type; - else - namlen = dirbuf.dotdot_namlen; -# else - namlen = dirbuf.dotdot_namlen; -# endif - if (namlen != 2 || - dirbuf.dotdot_name[0] != '.' || - dirbuf.dotdot_name[1] != '.') { - ufs_dirbad(xp, (doff_t)12, - "rename: mangled dir"); - } else { - dirbuf.dotdot_ino = newparent; - (void) vn_rdwr(UIO_WRITE, fvp, - (caddr_t)&dirbuf, - sizeof (struct dirtemplate), - (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED|IO_SYNC, - tcnp->cn_cred, (int *)0, - (struct proc *)0); - cache_purge(fdvp); - } - } + xp->i_offset = mastertemplate.dot_reclen; + ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); + cache_purge(fdvp); } - error = ufs_dirremove(fdvp, fcnp); - if (!error) { - xp->i_nlink--; - xp->i_flag |= IN_CHANGE; - } + error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); xp->i_flag &= ~IN_RENAME; } if (dp) vput(fdvp); if (xp) vput(fvp); vrele(ap->a_fvp); return (error); bad: if (xp) vput(ITOV(xp)); vput(ITOV(dp)); out: if (doingdirectory) ip->i_flag &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { + ip->i_effnlink--; ip->i_nlink--; ip->i_flag |= IN_CHANGE; ip->i_flag &= ~IN_RENAME; vput(fvp); } else vrele(fvp); return (error); } /* - * A virgin directory (no blushing please). - */ -static struct dirtemplate mastertemplate = { - 0, 12, DT_DIR, 1, { '.', 0 }, - 0, DIRBLKSIZ - 12, DT_DIR, 2, { '.', '.', 0 } -}; -static struct odirtemplate omastertemplate = { - 0, 12, 1, { '.', 0 }, - 0, DIRBLKSIZ - 12, 2, { '.', '.', 0 } -}; - -/* * Mkdir system call */ int ufs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct inode *ip, *dp; struct vnode *tvp; + struct buf *bp; struct dirtemplate dirtemplate, *dtp; + struct direct newdir; struct timeval tv; int error, dmode; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_mkdir: no name"); #endif dp = VTOI(dvp); if ((nlink_t)dp->i_nlink >= LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ufs_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; ip = VTOI(tvp); ip->i_gid = dp->i_gid; #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; ucp = cnp->cn_cred; #endif I /* * If we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * The new directory also inherits the SUID bit. * If user's UID and dir UID are the same, * 'give it away' so that the SUID is still forced on. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (dp->i_mode & ISUID) && dp->i_uid) { dmode |= ISUID; ip->i_uid = dp->i_uid; #ifdef QUOTA if (dp->i_uid != cnp->cn_cred->cr_uid) { /* * Make sure the correct user gets charged * for the space. * Make a dummy credential for the victim. * XXX This seems to never be accessed out of * our context so a stack variable is ok. */ ucred.cr_ref = 1; ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups[0] = dp->i_gid; ucp = &ucred; } #endif } else ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); vput(dvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); vput(dvp); return (error); } #endif #endif /* !SUIDDIR */ ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = dmode; tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ + ip->i_effnlink = 2; ip->i_nlink = 2; + if (DOINGSOFTDEP(tvp)) + softdep_increase_linkcnt(ip); if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; - gettime(&tv); - error = UFS_UPDATE(tvp, &tv, &tv, 1); /* - * Bump link count in parent directory - * to reflect work done below. Should - * be done before reference is created - * so reparation is possible if we crash. + * Bump link count in parent directory to reflect work done below. + * Should be done before reference is created so cleanup is + * possible if we crash. */ + dp->i_effnlink++; dp->i_nlink++; dp->i_flag |= IN_CHANGE; - error = UFS_UPDATE(dvp, &tv, &tv, 1); + if (DOINGSOFTDEP(dvp)) + softdep_increase_linkcnt(dp); + gettime(&tv); + error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(dvp)); if (error) goto bad; - /* Initialize directory with "." and ".." from static template. */ + /* + * Initialize directory with "." and ".." from static template. + */ if (dvp->v_mount->mnt_maxsymlinklen > 0 ) dtp = &mastertemplate; else dtp = (struct dirtemplate *)&omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; - error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, - sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0); - if (error) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; + if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, + B_CLRBUF, &bp)) != 0) goto bad; + ip->i_size = DIRBLKSIZ; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(tvp, (u_long)ip->i_size); + bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); + if ((error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(tvp))) != 0) { + (void)VOP_BWRITE(bp); + goto bad; } - if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) - panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */ - else { - ip->i_size = DIRBLKSIZ; - ip->i_flag |= IN_CHANGE; - } - - /* Directory set up, now install it's entry in the parent directory. */ - error = ufs_direnter(ip, dvp, cnp); - if (error) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; - } - VN_POLLEVENT(dvp, POLLWRITE); -bad: + VN_POLLEVENT(dvp, POLLWRITE); /* XXX right place? */ /* - * No need to do an explicit VOP_TRUNCATE here, vrele will do this - * for us because we set the link count to 0. + * Directory set up, now install it's entry in the parent directory. + * + * If we are not doing soft dependencies, then we must write out the + * buffer containing the new directory body before entering the new + * name in the parent. If we are doing soft dependencies, then the + * buffer containing the new directory body will be passed to and + * released in the soft dependency code after the code has attached + * an appropriate ordering dependency to the buffer which ensures that + * the buffer is written before the new name is written in the parent. */ - if (error) { + if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0)) + goto bad; + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); + +bad: + if (error == 0) { + *ap->a_vpp = tvp; + } else { + dp->i_effnlink--; + dp->i_nlink--; + dp->i_flag |= IN_CHANGE; + /* + * No need to do an explicit VOP_TRUNCATE here, vrele will + * do this for us because we set the link count to 0. + */ + ip->i_effnlink = 0; ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); - } else - *ap->a_vpp = tvp; + } out: zfree(namei_zone, cnp->cn_pnbuf); vput(dvp); return (error); } /* * Rmdir system call. */ int ufs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* - * Verify the directory is empty (and valid). - * (Rmdir ".." won't be valid since - * ".." will contain a reference to - * the current directory and thus be - * non-empty.) + * Do not remove a directory that is in the process of being renamed. + * Verify the directory is empty (and valid). Rmdir ".." will not be + * valid since ".." will contain a reference to the current directory + * and thus be non-empty. */ error = 0; - if (ip->i_nlink != 2 || + if (ip->i_flag & IN_RENAME) { + error = EINVAL; + goto out; + } + if (ip->i_effnlink != 2 || !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_flags & APPEND) || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ - error = ufs_dirremove(dvp, cnp); + error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); if (error) goto out; VN_POLLEVENT(dvp, POLLWRITE|POLLNLINK); - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; cache_purge(dvp); - vput(dvp); - dvp = NULL; /* - * Truncate inode. The only stuff left - * in the directory is "." and "..". The - * "." reference is inconsequential since - * we're quashing it. The ".." reference - * has already been adjusted above. We've - * removed the "." reference and the reference - * in the parent directory, but there may be - * other hard links so decrement by 2 and - * worry about them later. + * Truncate inode. The only stuff left in the directory is "." and + * "..". The "." reference is inconsequential since we are quashing + * it. We have removed the "." reference and the reference in the + * parent directory, but there may be other hard links. So, + * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no + * new entries are made. The soft dependency code will arrange to + * do these operations after the parent directory entry has been + * deleted on disk, so when running with that code we avoid doing + * them now. */ - ip->i_nlink -= 2; - error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, - cnp->cn_proc); - cache_purge(ITOV(ip)); - VN_POLLEVENT(vp, POLLNLINK); + dp->i_effnlink--; + dp->i_flag |= IN_CHANGE; + ip->i_effnlink--; + ip->i_flag |= IN_CHANGE; + if (!DOINGSOFTDEP(vp)) { + dp->i_nlink--; + ip->i_nlink--; + error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, + cnp->cn_proc); + } + cache_purge(vp); out: - if (dvp) - vput(dvp); + vput(dvp); + VN_POLLEVENT(vp, POLLNLINK); vput(vp); return (error); } /* * symlink -- make a symbolic link */ int ufs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { register struct vnode *vp, **vpp = ap->a_vpp; register struct inode *ip; int len, error; error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); VN_POLLEVENT(ap->a_dvp, POLLWRITE); vp = *vpp; len = strlen(ap->a_target); if (len < vp->v_mount->mnt_maxsymlinklen) { ip = VTOI(vp); bcopy(ap->a_target, (char *)ip->i_shortlink, len); ip->i_size = len; ip->i_flag |= IN_CHANGE | IN_UPDATE; } else error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0, (struct proc *)0); vput(vp); return (error); } /* * Vnode op for reading directories. * * The routine below assumes that the on-disk format of a directory * is the same as that defined by . If the on-disk * format changes, then it will be necessary to do a conversion * from the on-disk format that read returns to the format defined * by . */ int ufs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *ncookies; u_long **a_cookies; } */ *ap; { register struct uio *uio = ap->a_uio; int error; size_t count, lost; off_t off; if (ap->a_ncookies != NULL) /* * Ensure that the block is aligned. The caller can use * the cookies to determine where in the block to start. */ uio->uio_offset &= ~(DIRBLKSIZ - 1); off = uio->uio_offset; count = uio->uio_resid; /* Make sure we don't return partial entries. */ count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); if (count <= 0) return (EINVAL); lost = uio->uio_resid - count; uio->uio_resid = count; uio->uio_iov->iov_len = count; # if (BYTE_ORDER == LITTLE_ENDIAN) if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) { error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); } else { struct dirent *dp, *edp; struct uio auio; struct iovec aiov; caddr_t dirbuf; int readcnt; u_char tmp; auio = *uio; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; aiov.iov_len = count; MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); aiov.iov_base = dirbuf; error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); if (error == 0) { readcnt = count - auio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { tmp = dp->d_namlen; dp->d_namlen = dp->d_type; dp->d_type = tmp; if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, uio); } FREE(dirbuf, M_TEMP); } # else error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); # endif if (!error && ap->a_ncookies != NULL) { struct dirent* dpStart; struct dirent* dpEnd; struct dirent* dp; int ncookies; u_long *cookies; u_long *cookiep; if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) panic("ufs_readdir: unexpected uio from NFS server"); dpStart = (struct dirent *) (uio->uio_iov->iov_base - (uio->uio_offset - off)); dpEnd = (struct dirent *) uio->uio_iov->iov_base; for (dp = dpStart, ncookies = 0; dp < dpEnd; dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) ncookies++; MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK); for (dp = dpStart, cookiep = cookies; dp < dpEnd; dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) { off += dp->d_reclen; *cookiep++ = (u_long) off; } *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } uio->uio_resid += lost; if (ap->a_eofflag) *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset; return (error); } /* * Return target name of a symbolic link */ int ufs_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); int isize; isize = ip->i_size; if ((isize < vp->v_mount->mnt_maxsymlinklen) || (ip->i_din.di_blocks == 0)) { /* XXX - for old fastlink support */ uiomove((char *)ip->i_shortlink, isize, ap->a_uio); return (0); } return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); } /* * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually * done. If a buffer has been saved in anticipation of a CREATE, delete it. */ /* ARGSUSED */ int ufs_abortop(ap) struct vop_abortop_args /* { struct vnode *a_dvp; struct componentname *a_cnp; } */ *ap; { if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) zfree(namei_zone, ap->a_cnp->cn_pnbuf); return (0); } /* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. */ int ufs_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { register struct buf *bp = ap->a_bp; register struct vnode *vp = bp->b_vp; register struct inode *ip; int error; ip = VTOI(vp); if (vp->v_type == VBLK || vp->v_type == VCHR) panic("ufs_strategy: spec"); if (bp->b_blkno == bp->b_lblkno) { error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); if (error) { bp->b_error = error; bp->b_flags |= B_ERROR; biodone(bp); return (error); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if ((long)bp->b_blkno == -1) { biodone(bp); return (0); } vp = ip->i_devvp; bp->b_dev = vp->v_rdev; VOCALL (vp->v_op, VOFFSET(vop_strategy), ap); return (0); } /* * Print out the contents of an inode. */ int ufs_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); printf("tag VT_UFS, ino %ld, on dev %d, %d", ip->i_number, major(ip->i_dev), minor(ip->i_dev)); if (vp->v_type == VFIFO) fifo_printinfo(vp); lockmgr_printinfo(&ip->i_lock); printf("\n"); return (0); } /* * Read wrapper for special devices. */ int ufsspec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { /* * Set access flag. */ if (!(ap->a_vp->v_mount->mnt_flag & MNT_NOATIME)) VTOI(ap->a_vp)->i_flag |= IN_ACCESS; return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for special devices. */ int ufsspec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { /* * Set update and change flags. */ VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for special devices. * * Update the times on the inode then do device close. */ int ufsspec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); simple_lock(&vp->v_interlock); if (ap->a_vp->v_usecount > 1) ITIMES(ip, &time, &time); simple_unlock(&vp->v_interlock); return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); } /* * Read wrapper for fifo's */ int ufsfifo_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { /* * Set access flag. */ if (!(ap->a_vp->v_mount->mnt_flag & MNT_NOATIME)) VTOI(ap->a_vp)->i_flag |= IN_ACCESS; return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for fifo's. */ int ufsfifo_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { /* * Set update and change flags. */ VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for fifo's. * * Update the times on the inode then do device close. */ int ufsfifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); simple_lock(&vp->v_interlock); if (ap->a_vp->v_usecount > 1) ITIMES(ip, &time, &time); simple_unlock(&vp->v_interlock); return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); } /* * Return POSIX pathconf information applicable to ufs filesystems. */ int ufs_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; return (0); case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_NO_TRUNC: *ap->a_retval = 1; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Advisory record locking support */ int ufs_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { register struct inode *ip = VTOI(ap->a_vp); return (lf_advlock(ap, &(ip->i_lockf), ip->i_size)); } /* * Initialize the vnode associated with a new inode, handle aliased * vnodes. */ int ufs_vinit(mntp, specops, fifoops, vpp) struct mount *mntp; vop_t **specops; vop_t **fifoops; struct vnode **vpp; { struct inode *ip; struct vnode *vp, *nvp; vp = *vpp; ip = VTOI(vp); switch(vp->v_type = IFTOVT(ip->i_mode)) { case VCHR: case VBLK: vp->v_op = specops; nvp = checkalias(vp, ip->i_rdev, mntp); if (nvp) { /* * Discard unneeded vnode, but save its inode. * Note that the lock is carried over in the inode * to the replacement vnode. */ nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; vrele(vp); vgone(vp); /* * Reinitialize aliased inode. */ vp = nvp; ip->i_vnode = vp; } break; case VFIFO: vp->v_op = fifoops; break; default: break; } if (ip->i_number == ROOTINO) - vp->v_flag |= VROOT; + vp->v_flag |= VROOT; /* * Initialize modrev times */ SETHIGH(ip->i_modrev, mono_time.tv_sec); SETLOW(ip->i_modrev, mono_time.tv_usec * 4294); *vpp = vp; return (0); } /* * Allocate a new inode. */ int ufs_makeinode(mode, dvp, vpp, cnp) int mode; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; { register struct inode *ip, *pdir; + struct direct newdir; struct timeval tv; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_makeinode: no name"); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) { zfree(namei_zone, cnp->cn_pnbuf); vput(dvp); return (error); } ip = VTOI(tvp); ip->i_gid = pdir->i_gid; #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; ucp = cnp->cn_cred; #endif I /* * If we are not the owner of the directory, * and we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * Note that this drops off the execute bits for security. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (pdir->i_mode & ISUID) && (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { ip->i_uid = pdir->i_uid; mode &= ~07111; #ifdef QUOTA /* * Make sure the correct user gets charged * for the space. * Quickly knock up a dummy credential for the victim. * XXX This seems to never be accessed out of our * context so a stack variable is ok. */ ucred.cr_ref = 1; ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups[0] = pdir->i_gid; ucp = &ucred; #endif } else ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); vput(dvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); vput(dvp); return (error); } #endif #endif /* !SUIDDIR */ ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = mode; tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ + ip->i_effnlink = 1; ip->i_nlink = 1; + if (DOINGSOFTDEP(tvp)) + softdep_increase_linkcnt(ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && suser(cnp->cn_cred, NULL)) ip->i_mode &= ~ISGID; if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; /* * Make sure inode goes to disk before directory entry. */ gettime(&tv); - error = UFS_UPDATE(tvp, &tv, &tv, 1); + error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(tvp)); if (error) goto bad; - error = ufs_direnter(ip, dvp, cnp); + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL); if (error) goto bad; if ((cnp->cn_flags & SAVESTART) == 0) zfree(namei_zone, cnp->cn_pnbuf); vput(dvp); *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ zfree(namei_zone, cnp->cn_pnbuf); vput(dvp); + ip->i_effnlink = 0; ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); return (error); } static int ufs_missingop(ap) struct vop_generic_args *ap; { panic("no vop function for %s in ufs child", ap->a_desc->vdesc_name); return (EOPNOTSUPP); } /* Global vfs data structures for ufs. */ static vop_t **ufs_vnodeop_p; static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_fsync_desc, (vop_t *) ufs_missingop }, { &vop_read_desc, (vop_t *) ufs_missingop }, { &vop_reallocblks_desc, (vop_t *) ufs_missingop }, { &vop_write_desc, (vop_t *) ufs_missingop }, { &vop_abortop_desc, (vop_t *) ufs_abortop }, { &vop_access_desc, (vop_t *) ufs_access }, { &vop_advlock_desc, (vop_t *) ufs_advlock }, { &vop_bmap_desc, (vop_t *) ufs_bmap }, { &vop_cachedlookup_desc, (vop_t *) ufs_lookup }, { &vop_close_desc, (vop_t *) ufs_close }, { &vop_create_desc, (vop_t *) ufs_create }, { &vop_getattr_desc, (vop_t *) ufs_getattr }, { &vop_inactive_desc, (vop_t *) ufs_inactive }, { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_link_desc, (vop_t *) ufs_link }, { &vop_lock_desc, (vop_t *) vop_stdlock }, { &vop_lookup_desc, (vop_t *) vfs_cache_lookup }, { &vop_mkdir_desc, (vop_t *) ufs_mkdir }, { &vop_mknod_desc, (vop_t *) ufs_mknod }, { &vop_mmap_desc, (vop_t *) ufs_mmap }, { &vop_open_desc, (vop_t *) ufs_open }, { &vop_pathconf_desc, (vop_t *) ufs_pathconf }, { &vop_poll_desc, (vop_t *) vop_stdpoll }, { &vop_print_desc, (vop_t *) ufs_print }, { &vop_readdir_desc, (vop_t *) ufs_readdir }, { &vop_readlink_desc, (vop_t *) ufs_readlink }, { &vop_reclaim_desc, (vop_t *) ufs_reclaim }, { &vop_remove_desc, (vop_t *) ufs_remove }, { &vop_rename_desc, (vop_t *) ufs_rename }, { &vop_rmdir_desc, (vop_t *) ufs_rmdir }, { &vop_setattr_desc, (vop_t *) ufs_setattr }, { &vop_strategy_desc, (vop_t *) ufs_strategy }, { &vop_symlink_desc, (vop_t *) ufs_symlink }, { &vop_unlock_desc, (vop_t *) vop_stdunlock }, { &vop_whiteout_desc, (vop_t *) ufs_whiteout }, { NULL, NULL } }; static struct vnodeopv_desc ufs_vnodeop_opv_desc = { &ufs_vnodeop_p, ufs_vnodeop_entries }; static vop_t **ufs_specop_p; static struct vnodeopv_entry_desc ufs_specop_entries[] = { { &vop_default_desc, (vop_t *) spec_vnoperate }, { &vop_fsync_desc, (vop_t *) ufs_missingop }, { &vop_access_desc, (vop_t *) ufs_access }, { &vop_close_desc, (vop_t *) ufsspec_close }, { &vop_getattr_desc, (vop_t *) ufs_getattr }, { &vop_inactive_desc, (vop_t *) ufs_inactive }, { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_lock_desc, (vop_t *) vop_stdlock }, { &vop_print_desc, (vop_t *) ufs_print }, { &vop_read_desc, (vop_t *) ufsspec_read }, { &vop_reclaim_desc, (vop_t *) ufs_reclaim }, { &vop_setattr_desc, (vop_t *) ufs_setattr }, { &vop_unlock_desc, (vop_t *) vop_stdunlock }, { &vop_write_desc, (vop_t *) ufsspec_write }, { NULL, NULL } }; static struct vnodeopv_desc ufs_specop_opv_desc = { &ufs_specop_p, ufs_specop_entries }; static vop_t **ufs_fifoop_p; static struct vnodeopv_entry_desc ufs_fifoop_entries[] = { { &vop_default_desc, (vop_t *) fifo_vnoperate }, { &vop_fsync_desc, (vop_t *) ufs_missingop }, { &vop_access_desc, (vop_t *) ufs_access }, { &vop_close_desc, (vop_t *) ufsfifo_close }, { &vop_getattr_desc, (vop_t *) ufs_getattr }, { &vop_inactive_desc, (vop_t *) ufs_inactive }, { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_lock_desc, (vop_t *) vop_stdlock }, { &vop_print_desc, (vop_t *) ufs_print }, { &vop_read_desc, (vop_t *) ufsfifo_read }, { &vop_reclaim_desc, (vop_t *) ufs_reclaim }, { &vop_setattr_desc, (vop_t *) ufs_setattr }, { &vop_unlock_desc, (vop_t *) vop_stdunlock }, { &vop_write_desc, (vop_t *) ufsfifo_write }, { NULL, NULL } }; static struct vnodeopv_desc ufs_fifoop_opv_desc = { &ufs_fifoop_p, ufs_fifoop_entries }; VNODEOP_SET(ufs_vnodeop_opv_desc); VNODEOP_SET(ufs_specop_opv_desc); VNODEOP_SET(ufs_fifoop_opv_desc); int ufs_vnoperate(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap)); } int ufs_vnoperatefifo(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap)); } int ufs_vnoperatespec(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap)); }