Index: head/lib/libstand/ufs.c =================================================================== --- head/lib/libstand/ufs.c (revision 313474) +++ head/lib/libstand/ufs.c (revision 313475) @@ -1,861 +1,861 @@ /* $NetBSD: ufs.c,v 1.20 1998/03/01 07:15:39 ross Exp $ */ /*- * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * * Copyright (c) 1990, 1991 Carnegie Mellon University * All Rights Reserved. * * Author: David Golub * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone file reading package. */ #include #include #include #include #include #include #include "stand.h" #include "string.h" static int ufs_open(const char *path, struct open_file *f); static int ufs_write(struct open_file *f, void *buf, size_t size, size_t *resid); static int ufs_close(struct open_file *f); static int ufs_read(struct open_file *f, void *buf, size_t size, size_t *resid); static off_t ufs_seek(struct open_file *f, off_t offset, int where); static int ufs_stat(struct open_file *f, struct stat *sb); static int ufs_readdir(struct open_file *f, struct dirent *d); struct fs_ops ufs_fsops = { "ufs", ufs_open, ufs_close, ufs_read, ufs_write, ufs_seek, ufs_stat, ufs_readdir }; /* * In-core open file. */ struct file { off_t f_seekp; /* seek pointer */ struct fs *f_fs; /* pointer to super-block */ union dinode { struct ufs1_dinode di1; struct ufs2_dinode di2; } f_di; /* copy of on-disk inode */ int f_nindir[NIADDR]; /* number of blocks mapped by indirect block at level i */ char *f_blk[NIADDR]; /* buffer for indirect block at level i */ size_t f_blksize[NIADDR]; /* size of buffer */ ufs2_daddr_t f_blkno[NIADDR];/* disk address of block in buffer */ ufs2_daddr_t f_buf_blkno; /* block number of data block */ char *f_buf; /* buffer for data block */ size_t f_buf_size; /* size of data block */ }; #define DIP(fp, field) \ ((fp)->f_fs->fs_magic == FS_UFS1_MAGIC ? \ (fp)->f_di.di1.field : (fp)->f_di.di2.field) static int read_inode(ino_t, struct open_file *); static int block_map(struct open_file *, ufs2_daddr_t, ufs2_daddr_t *); static int buf_read_file(struct open_file *, char **, size_t *); static int buf_write_file(struct open_file *, char *, size_t *); static int search_directory(char *, struct open_file *, ino_t *); /* * Read a new inode into a file structure. */ static int read_inode(inumber, f) ino_t inumber; struct open_file *f; { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; char *buf; size_t rsize; int rc; if (fs == NULL) panic("fs == NULL"); /* * Read inode and save it. */ buf = malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, ino_to_fsba(fs, inumber)), fs->fs_bsize, buf, &rsize); if (rc) goto out; if (rsize != fs->fs_bsize) { rc = EIO; goto out; } if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) fp->f_di.di1 = ((struct ufs1_dinode *)buf) [ino_to_fsbo(fs, inumber)]; else fp->f_di.di2 = ((struct ufs2_dinode *)buf) [ino_to_fsbo(fs, inumber)]; /* * Clear out the old buffers */ { int level; for (level = 0; level < NIADDR; level++) fp->f_blkno[level] = -1; fp->f_buf_blkno = -1; } fp->f_seekp = 0; out: free(buf); return (rc); } /* * Given an offset in a file, find the disk block number that * contains that block. */ static int block_map(f, file_block, disk_block_p) struct open_file *f; ufs2_daddr_t file_block; ufs2_daddr_t *disk_block_p; /* out */ { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; int level; int idx; ufs2_daddr_t ind_block_num; int rc; /* * Index structure of an inode: * * di_db[0..NDADDR-1] hold block numbers for blocks * 0..NDADDR-1 * * di_ib[0] index block 0 is the single indirect block * holds block numbers for blocks * NDADDR .. NDADDR + NINDIR(fs)-1 * * di_ib[1] index block 1 is the double indirect block * holds block numbers for INDEX blocks for blocks * NDADDR + NINDIR(fs) .. * NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1 * * di_ib[2] index block 2 is the triple indirect block * holds block numbers for double-indirect * blocks for blocks * NDADDR + NINDIR(fs) + NINDIR(fs)**2 .. * NDADDR + NINDIR(fs) + NINDIR(fs)**2 * + NINDIR(fs)**3 - 1 */ if (file_block < NDADDR) { /* Direct block. */ *disk_block_p = DIP(fp, di_db[file_block]); return (0); } file_block -= NDADDR; /* * nindir[0] = NINDIR * nindir[1] = NINDIR**2 * nindir[2] = NINDIR**3 * etc */ for (level = 0; level < NIADDR; level++) { if (file_block < fp->f_nindir[level]) break; file_block -= fp->f_nindir[level]; } if (level == NIADDR) { /* Block number too high */ return (EFBIG); } ind_block_num = DIP(fp, di_ib[level]); for (; level >= 0; level--) { if (ind_block_num == 0) { *disk_block_p = 0; /* missing */ return (0); } if (fp->f_blkno[level] != ind_block_num) { if (fp->f_blk[level] == (char *)0) fp->f_blk[level] = malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fp->f_fs, ind_block_num), fs->fs_bsize, fp->f_blk[level], &fp->f_blksize[level]); if (rc) return (rc); if (fp->f_blksize[level] != fs->fs_bsize) return (EIO); fp->f_blkno[level] = ind_block_num; } if (level > 0) { idx = file_block / fp->f_nindir[level - 1]; file_block %= fp->f_nindir[level - 1]; } else idx = file_block; if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) ind_block_num = ((ufs1_daddr_t *)fp->f_blk[level])[idx]; else ind_block_num = ((ufs2_daddr_t *)fp->f_blk[level])[idx]; } *disk_block_p = ind_block_num; return (0); } /* * Write a portion of a file from an internal buffer. */ static int buf_write_file(f, buf_p, size_p) struct open_file *f; char *buf_p; size_t *size_p; /* out */ { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; long off; ufs_lbn_t file_block; ufs2_daddr_t disk_block; size_t block_size; int rc; /* * Calculate the starting block address and offset. */ off = blkoff(fs, fp->f_seekp); file_block = lblkno(fs, fp->f_seekp); block_size = sblksize(fs, DIP(fp, di_size), file_block); rc = block_map(f, file_block, &disk_block); if (rc) return (rc); if (disk_block == 0) /* Because we can't allocate space on the drive */ return (EFBIG); /* * Truncate buffer at end of file, and at the end of * this block. */ if (*size_p > DIP(fp, di_size) - fp->f_seekp) *size_p = DIP(fp, di_size) - fp->f_seekp; if (*size_p > block_size - off) *size_p = block_size - off; /* * If we don't entirely occlude the block and it's not * in memory already, read it in first. */ if (((off > 0) || (*size_p + off < block_size)) && (file_block != fp->f_buf_blkno)) { if (fp->f_buf == (char *)0) fp->f_buf = malloc(fs->fs_bsize); twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); fp->f_buf_blkno = file_block; } /* * Copy the user data into the cached block. */ bcopy(buf_p, fp->f_buf + off, *size_p); /* * Write the block out to storage. */ twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); return (rc); } /* * Read a portion of a file into an internal buffer. Return * the location in the buffer and the amount in the buffer. */ static int buf_read_file(f, buf_p, size_p) struct open_file *f; char **buf_p; /* out */ size_t *size_p; /* out */ { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; long off; ufs_lbn_t file_block; ufs2_daddr_t disk_block; size_t block_size; int rc; off = blkoff(fs, fp->f_seekp); file_block = lblkno(fs, fp->f_seekp); block_size = sblksize(fs, DIP(fp, di_size), file_block); if (file_block != fp->f_buf_blkno) { if (fp->f_buf == (char *)0) fp->f_buf = malloc(fs->fs_bsize); rc = block_map(f, file_block, &disk_block); if (rc) return (rc); if (disk_block == 0) { bzero(fp->f_buf, block_size); fp->f_buf_size = block_size; } else { twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); } fp->f_buf_blkno = file_block; } /* * Return address of byte in buffer corresponding to * offset, and size of remainder of buffer after that * byte. */ *buf_p = fp->f_buf + off; *size_p = block_size - off; /* * But truncate buffer at end of file. */ if (*size_p > DIP(fp, di_size) - fp->f_seekp) *size_p = DIP(fp, di_size) - fp->f_seekp; return (0); } /* * Search a directory for a name and return its * i_number. */ static int search_directory(name, f, inumber_p) char *name; struct open_file *f; ino_t *inumber_p; /* out */ { struct file *fp = (struct file *)f->f_fsdata; struct direct *dp; struct direct *edp; char *buf; size_t buf_size; int namlen, length; int rc; length = strlen(name); fp->f_seekp = 0; while (fp->f_seekp < DIP(fp, di_size)) { rc = buf_read_file(f, &buf, &buf_size); if (rc) return (rc); dp = (struct direct *)buf; edp = (struct direct *)(buf + buf_size); while (dp < edp) { if (dp->d_ino == (ino_t)0) goto next; #if BYTE_ORDER == LITTLE_ENDIAN if (fp->f_fs->fs_maxsymlinklen <= 0) namlen = dp->d_type; else #endif namlen = dp->d_namlen; if (namlen == length && !strcmp(name, dp->d_name)) { /* found entry */ *inumber_p = dp->d_ino; return (0); } next: dp = (struct direct *)((char *)dp + dp->d_reclen); } fp->f_seekp += buf_size; } return (ENOENT); } static int sblock_try[] = SBLOCKSEARCH; /* * Open a file. */ static int ufs_open(upath, f) const char *upath; struct open_file *f; { char *cp, *ncp; int c; ino_t inumber, parent_inumber; struct file *fp; struct fs *fs; int i, rc; size_t buf_size; int nlinks = 0; char namebuf[MAXPATHLEN+1]; char *buf = NULL; char *path = NULL; /* allocate file system specific data structure */ fp = malloc(sizeof(struct file)); bzero(fp, sizeof(struct file)); f->f_fsdata = (void *)fp; /* allocate space and read super block */ fs = malloc(SBLOCKSIZE); fp->f_fs = fs; twiddle(1); /* * Try reading the superblock in each of its possible locations. */ for (i = 0; sblock_try[i] != -1; i++) { rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, (char *)fs, &buf_size); if (rc) goto out; if ((fs->fs_magic == FS_UFS1_MAGIC || (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc == sblock_try[i])) && buf_size == SBLOCKSIZE && fs->fs_bsize <= MAXBSIZE && fs->fs_bsize >= sizeof(struct fs)) break; } if (sblock_try[i] == -1) { rc = EINVAL; goto out; } /* * Calculate indirect block levels. */ { ufs2_daddr_t mult; int level; mult = 1; for (level = 0; level < NIADDR; level++) { mult *= NINDIR(fs); fp->f_nindir[level] = mult; } } inumber = ROOTINO; if ((rc = read_inode(inumber, f)) != 0) goto out; cp = path = strdup(upath); if (path == NULL) { rc = ENOMEM; goto out; } while (*cp) { /* * Remove extra separators */ while (*cp == '/') cp++; if (*cp == '\0') break; /* * Check that current node is a directory. */ if ((DIP(fp, di_mode) & IFMT) != IFDIR) { rc = ENOTDIR; goto out; } /* * Get next component of path name. */ { int len = 0; ncp = cp; while ((c = *cp) != '\0' && c != '/') { - if (++len > MAXNAMLEN) { + if (++len > UFS_MAXNAMLEN) { rc = ENOENT; goto out; } cp++; } *cp = '\0'; } /* * Look up component in current directory. * Save directory inumber in case we find a * symbolic link. */ parent_inumber = inumber; rc = search_directory(ncp, f, &inumber); *cp = c; if (rc) goto out; /* * Open next component. */ if ((rc = read_inode(inumber, f)) != 0) goto out; /* * Check for symbolic link. */ if ((DIP(fp, di_mode) & IFMT) == IFLNK) { int link_len = DIP(fp, di_size); int len; len = strlen(cp); if (link_len + len > MAXPATHLEN || ++nlinks > MAXSYMLINKS) { rc = ENOENT; goto out; } bcopy(cp, &namebuf[link_len], len + 1); if (link_len < fs->fs_maxsymlinklen) { if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) cp = (caddr_t)(fp->f_di.di1.di_db); else cp = (caddr_t)(fp->f_di.di2.di_db); bcopy(cp, namebuf, (unsigned) link_len); } else { /* * Read file for symbolic link */ size_t buf_size; ufs2_daddr_t disk_block; struct fs *fs = fp->f_fs; if (!buf) buf = malloc(fs->fs_bsize); rc = block_map(f, (ufs2_daddr_t)0, &disk_block); if (rc) goto out; twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), fs->fs_bsize, buf, &buf_size); if (rc) goto out; bcopy((char *)buf, namebuf, (unsigned)link_len); } /* * If relative pathname, restart at parent directory. * If absolute pathname, restart at root. */ cp = namebuf; if (*cp != '/') inumber = parent_inumber; else inumber = (ino_t)ROOTINO; if ((rc = read_inode(inumber, f)) != 0) goto out; } } /* * Found terminal component. */ rc = 0; fp->f_seekp = 0; out: if (buf) free(buf); if (path) free(path); if (rc) { if (fp->f_buf) free(fp->f_buf); free(fp->f_fs); free(fp); } return (rc); } static int ufs_close(f) struct open_file *f; { struct file *fp = (struct file *)f->f_fsdata; int level; f->f_fsdata = (void *)0; if (fp == (struct file *)0) return (0); for (level = 0; level < NIADDR; level++) { if (fp->f_blk[level]) free(fp->f_blk[level]); } if (fp->f_buf) free(fp->f_buf); free(fp->f_fs); free(fp); return (0); } /* * Copy a portion of a file into kernel memory. * Cross block boundaries when necessary. */ static int ufs_read(f, start, size, resid) struct open_file *f; void *start; size_t size; size_t *resid; /* out */ { struct file *fp = (struct file *)f->f_fsdata; size_t csize; char *buf; size_t buf_size; int rc = 0; char *addr = start; while (size != 0) { if (fp->f_seekp >= DIP(fp, di_size)) break; rc = buf_read_file(f, &buf, &buf_size); if (rc) break; csize = size; if (csize > buf_size) csize = buf_size; bcopy(buf, addr, csize); fp->f_seekp += csize; addr += csize; size -= csize; } if (resid) *resid = size; return (rc); } /* * Write to a portion of an already allocated file. * Cross block boundaries when necessary. Can not * extend the file. */ static int ufs_write(f, start, size, resid) struct open_file *f; void *start; size_t size; size_t *resid; /* out */ { struct file *fp = (struct file *)f->f_fsdata; size_t csize; int rc = 0; char *addr = start; csize = size; while ((size != 0) && (csize != 0)) { if (fp->f_seekp >= DIP(fp, di_size)) break; if (csize >= 512) csize = 512; /* XXX */ rc = buf_write_file(f, addr, &csize); if (rc) break; fp->f_seekp += csize; addr += csize; size -= csize; } if (resid) *resid = size; return (rc); } static off_t ufs_seek(f, offset, where) struct open_file *f; off_t offset; int where; { struct file *fp = (struct file *)f->f_fsdata; switch (where) { case SEEK_SET: fp->f_seekp = offset; break; case SEEK_CUR: fp->f_seekp += offset; break; case SEEK_END: fp->f_seekp = DIP(fp, di_size) - offset; break; default: errno = EINVAL; return (-1); } return (fp->f_seekp); } static int ufs_stat(f, sb) struct open_file *f; struct stat *sb; { struct file *fp = (struct file *)f->f_fsdata; /* only important stuff */ sb->st_mode = DIP(fp, di_mode); sb->st_uid = DIP(fp, di_uid); sb->st_gid = DIP(fp, di_gid); sb->st_size = DIP(fp, di_size); return (0); } static int ufs_readdir(struct open_file *f, struct dirent *d) { struct file *fp = (struct file *)f->f_fsdata; struct direct *dp; char *buf; size_t buf_size; int error; /* * assume that a directory entry will not be split across blocks */ again: if (fp->f_seekp >= DIP(fp, di_size)) return (ENOENT); error = buf_read_file(f, &buf, &buf_size); if (error) return (error); dp = (struct direct *)buf; fp->f_seekp += dp->d_reclen; if (dp->d_ino == (ino_t)0) goto again; d->d_type = dp->d_type; strcpy(d->d_name, dp->d_name); return (0); } Index: head/sbin/fsck_ffs/fsutil.c =================================================================== --- head/sbin/fsck_ffs/fsutil.c (revision 313474) +++ head/sbin/fsck_ffs/fsutil.c (revision 313475) @@ -1,1044 +1,1044 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" static void slowio_start(void); static void slowio_end(void); static void printIOstats(void); static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ static struct timespec startpass, finishpass; struct timeval slowio_starttime; int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ int slowio_pollcnt; static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ static TAILQ_HEAD(buflist, bufarea) bufhead; /* head of buffer cache list */ static int numbufs; /* size of buffer cache */ static char *buftype[BT_NUMBUFTYPES] = BT_NAMES; static struct bufarea *cgbufs; /* header for cylinder group cache */ static int flushtries; /* number of tries to reclaim memory */ void fsutilinit(void) { diskreads = totaldiskreads = totalreads = 0; bzero(&startpass, sizeof(struct timespec)); bzero(&finishpass, sizeof(struct timespec)); bzero(&slowio_starttime, sizeof(struct timeval)); slowio_delay_usec = 10000; slowio_pollcnt = 0; bzero(&cgblk, sizeof(struct bufarea)); TAILQ_INIT(&bufhead); numbufs = 0; /* buftype ? */ cgbufs = NULL; flushtries = 0; } int ftypeok(union dinode *dp) { switch (DIP(dp, di_mode) & IFMT) { case IFDIR: case IFREG: case IFBLK: case IFCHR: case IFLNK: case IFSOCK: case IFIFO: return (1); default: if (debug) printf("bad file type 0%o\n", DIP(dp, di_mode)); return (0); } } int reply(const char *question) { int persevere; char c; if (preen) pfatal("INTERNAL ERROR: GOT TO reply()"); persevere = !strcmp(question, "CONTINUE"); printf("\n"); if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { printf("%s? no\n\n", question); resolved = 0; return (0); } if (yflag || (persevere && nflag)) { printf("%s? yes\n\n", question); return (1); } do { printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); while (c != '\n' && getc(stdin) != '\n') { if (feof(stdin)) { resolved = 0; return (0); } } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); resolved = 0; return (0); } /* * Look up state information for an inode. */ struct inostat * inoinfo(ino_t inum) { static struct inostat unallocated = { USTATE, 0, 0 }; struct inostatlist *ilp; int iloff; if (inum > maxino) errx(EEXIT, "inoinfo: inumber %ju out of range", (uintmax_t)inum); ilp = &inostathead[inum / sblock.fs_ipg]; iloff = inum % sblock.fs_ipg; if (iloff >= ilp->il_numalloced) return (&unallocated); return (&ilp->il_stat[iloff]); } /* * Malloc buffers and set up cache. */ void bufinit(void) { struct bufarea *bp; long bufcnt, i; char *bufp; pbp = pdirbp = (struct bufarea *)0; bufp = Malloc((unsigned int)sblock.fs_bsize); if (bufp == NULL) errx(EEXIT, "cannot allocate buffer pool"); cgblk.b_un.b_buf = bufp; initbarea(&cgblk, BT_CYLGRP); TAILQ_INIT(&bufhead); bufcnt = MAXBUFS; if (bufcnt < MINBUFS) bufcnt = MINBUFS; for (i = 0; i < bufcnt; i++) { bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); bufp = Malloc((unsigned int)sblock.fs_bsize); if (bp == NULL || bufp == NULL) { if (i >= MINBUFS) break; errx(EEXIT, "cannot allocate buffer pool"); } bp->b_un.b_buf = bufp; TAILQ_INSERT_HEAD(&bufhead, bp, b_list); initbarea(bp, BT_UNKNOWN); } numbufs = i; /* save number of buffers */ for (i = 0; i < BT_NUMBUFTYPES; i++) { readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; readcnt[i] = totalreadcnt[i] = 0; } } /* * Manage cylinder group buffers. */ static struct bufarea *cgbufs; /* header for cylinder group cache */ static int flushtries; /* number of tries to reclaim memory */ struct bufarea * cgget(int cg) { struct bufarea *cgbp; struct cg *cgp; if (cgbufs == NULL) { cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea)); if (cgbufs == NULL) errx(EEXIT, "cannot allocate cylinder group buffers"); } cgbp = &cgbufs[cg]; if (cgbp->b_un.b_cg != NULL) return (cgbp); cgp = NULL; if (flushtries == 0) cgp = malloc((unsigned int)sblock.fs_cgsize); if (cgp == NULL) { getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); return (&cgblk); } cgbp->b_un.b_cg = cgp; initbarea(cgbp, BT_CYLGRP); getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); return (cgbp); } /* * Attempt to flush a cylinder group cache entry. * Return whether the flush was successful. */ int flushentry(void) { struct bufarea *cgbp; if (flushtries == sblock.fs_ncg || cgbufs == NULL) return (0); cgbp = &cgbufs[flushtries++]; if (cgbp->b_un.b_cg == NULL) return (0); flush(fswritefd, cgbp); free(cgbp->b_un.b_buf); cgbp->b_un.b_buf = NULL; return (1); } /* * Manage a cache of directory blocks. */ struct bufarea * getdatablk(ufs2_daddr_t blkno, long size, int type) { struct bufarea *bp; TAILQ_FOREACH(bp, &bufhead, b_list) if (bp->b_bno == fsbtodb(&sblock, blkno)) goto foundit; TAILQ_FOREACH_REVERSE(bp, &bufhead, buflist, b_list) if ((bp->b_flags & B_INUSE) == 0) break; if (bp == NULL) errx(EEXIT, "deadlocked buffer pool"); bp->b_type = type; getblk(bp, blkno, size); /* fall through */ foundit: if (debug && bp->b_type != type) printf("Buffer type changed from %s to %s\n", buftype[bp->b_type], buftype[type]); TAILQ_REMOVE(&bufhead, bp, b_list); TAILQ_INSERT_HEAD(&bufhead, bp, b_list); bp->b_flags |= B_INUSE; return (bp); } /* * Timespec operations (from ). */ #define timespecsub(vvp, uvp) \ do { \ (vvp)->tv_sec -= (uvp)->tv_sec; \ (vvp)->tv_nsec -= (uvp)->tv_nsec; \ if ((vvp)->tv_nsec < 0) { \ (vvp)->tv_sec--; \ (vvp)->tv_nsec += 1000000000; \ } \ } while (0) #define timespecadd(vvp, uvp) \ do { \ (vvp)->tv_sec += (uvp)->tv_sec; \ (vvp)->tv_nsec += (uvp)->tv_nsec; \ if ((vvp)->tv_nsec >= 1000000000) { \ (vvp)->tv_sec++; \ (vvp)->tv_nsec -= 1000000000; \ } \ } while (0) void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) { ufs2_daddr_t dblk; struct timespec start, finish; dblk = fsbtodb(&sblock, blk); if (bp->b_bno == dblk) { totalreads++; } else { flush(fswritefd, bp); if (debug) { readcnt[bp->b_type]++; clock_gettime(CLOCK_REALTIME_PRECISE, &start); } bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); if (debug) { clock_gettime(CLOCK_REALTIME_PRECISE, &finish); timespecsub(&finish, &start); timespecadd(&readtime[bp->b_type], &finish); } bp->b_bno = dblk; bp->b_size = size; } } void flush(int fd, struct bufarea *bp) { int i, j; if (!bp->b_dirty) return; bp->b_dirty = 0; if (fswritefd < 0) { pfatal("WRITING IN READ_ONLY MODE.\n"); return; } if (bp->b_errs != 0) pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", (long long)bp->b_bno); bp->b_errs = 0; blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); if (bp != &sblk) return; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { blwrite(fswritefd, (char *)sblock.fs_csp + i, fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), MIN(sblock.fs_cssize - i, sblock.fs_bsize)); } } void rwerror(const char *mesg, ufs2_daddr_t blk) { if (bkgrdcheck) exit(EEXIT); if (preen == 0) printf("\n"); pfatal("CANNOT %s: %ld", mesg, (long)blk); if (reply("CONTINUE") == 0) exit(EEXIT); } void ckfini(int markclean) { struct bufarea *bp, *nbp; int ofsmodified, cnt; if (bkgrdflag) { unlink(snapname); if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { cmd.value = FS_UNCLEAN; cmd.size = markclean ? -1 : 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) rwerror("SET FILE SYSTEM FLAGS", FS_UNCLEAN); if (!preen) { printf("\n***** FILE SYSTEM MARKED %s *****\n", markclean ? "CLEAN" : "DIRTY"); if (!markclean) rerun = 1; } } else if (!preen && !markclean) { printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); rerun = 1; } } if (debug && totalreads > 0) printf("cache with %d buffers missed %ld of %ld (%d%%)\n", numbufs, totaldiskreads, totalreads, (int)(totaldiskreads * 100 / totalreads)); if (fswritefd < 0) { (void)close(fsreadfd); return; } flush(fswritefd, &sblk); if (havesb && cursnapshot == 0 && sblock.fs_magic == FS_UFS2_MAGIC && sblk.b_bno != sblock.fs_sblockloc / dev_bsize && !preen && reply("UPDATE STANDARD SUPERBLOCK")) { sblk.b_bno = sblock.fs_sblockloc / dev_bsize; sbdirty(); flush(fswritefd, &sblk); } flush(fswritefd, &cgblk); free(cgblk.b_un.b_buf); cnt = 0; TAILQ_FOREACH_REVERSE_SAFE(bp, &bufhead, buflist, b_list, nbp) { TAILQ_REMOVE(&bufhead, bp, b_list); cnt++; flush(fswritefd, bp); free(bp->b_un.b_buf); free((char *)bp); } if (numbufs != cnt) errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); if (cgbufs != NULL) { for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { if (cgbufs[cnt].b_un.b_cg == NULL) continue; flush(fswritefd, &cgbufs[cnt]); free(cgbufs[cnt].b_un.b_cg); } free(cgbufs); } pbp = pdirbp = (struct bufarea *)0; if (cursnapshot == 0 && sblock.fs_clean != markclean) { if ((sblock.fs_clean = markclean) != 0) { sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); sblock.fs_pendingblocks = 0; sblock.fs_pendinginodes = 0; } sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; if (!preen) { printf("\n***** FILE SYSTEM MARKED %s *****\n", markclean ? "CLEAN" : "DIRTY"); if (!markclean) rerun = 1; } } else if (!preen) { if (markclean) { printf("\n***** FILE SYSTEM IS CLEAN *****\n"); } else { printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); rerun = 1; } } (void)close(fsreadfd); (void)close(fswritefd); } /* * Print out I/O statistics. */ void IOstats(char *what) { int i; if (debug == 0) return; if (diskreads == 0) { printf("%s: no I/O\n\n", what); return; } if (startpass.tv_sec == 0) startpass = startprog; printf("%s: I/O statistics\n", what); printIOstats(); totaldiskreads += diskreads; diskreads = 0; for (i = 0; i < BT_NUMBUFTYPES; i++) { timespecadd(&totalreadtime[i], &readtime[i]); totalreadcnt[i] += readcnt[i]; readtime[i].tv_sec = readtime[i].tv_nsec = 0; readcnt[i] = 0; } clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); } void finalIOstats(void) { int i; if (debug == 0) return; printf("Final I/O statistics\n"); totaldiskreads += diskreads; diskreads = totaldiskreads; startpass = startprog; for (i = 0; i < BT_NUMBUFTYPES; i++) { timespecadd(&totalreadtime[i], &readtime[i]); totalreadcnt[i] += readcnt[i]; readtime[i] = totalreadtime[i]; readcnt[i] = totalreadcnt[i]; } printIOstats(); } static void printIOstats(void) { long long msec, totalmsec; int i; clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); timespecsub(&finishpass, &startpass); printf("Running time: %jd.%03ld sec\n", (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); printf("buffer reads by type:\n"); for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) totalmsec += readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; if (totalmsec == 0) totalmsec = 1; for (i = 0; i < BT_NUMBUFTYPES; i++) { if (readcnt[i] == 0) continue; msec = readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, (readcnt[i] * 1000 / diskreads) % 10, (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); } printf("\n"); } int blread(int fd, char *buf, ufs2_daddr_t blk, long size) { char *cp; int i, errs; off_t offset; offset = blk; offset *= dev_bsize; if (bkgrdflag) slowio_start(); totalreads++; diskreads++; if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); else if (read(fd, buf, (int)size) == size) { if (bkgrdflag) slowio_end(); return (0); } /* * This is handled specially here instead of in rwerror because * rwerror is used for all sorts of errors, not just true read/write * errors. It should be refactored and fixed. */ if (surrender) { pfatal("CANNOT READ_BLK: %ld", (long)blk); errx(EEXIT, "ABORTING DUE TO READ ERRORS"); } else rwerror("READ BLK", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); errs = 0; memset(buf, 0, (size_t)size); printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { if (read(fd, cp, (int)secsize) != secsize) { (void)lseek(fd, offset + i + secsize, 0); if (secsize != dev_bsize && dev_bsize != 1) printf(" %jd (%jd),", (intmax_t)(blk * dev_bsize + i) / secsize, (intmax_t)blk + i / dev_bsize); else printf(" %jd,", (intmax_t)blk + i / dev_bsize); errs++; } } printf("\n"); if (errs) resolved = 0; return (errs); } void blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) { int i; char *cp; off_t offset; if (fd < 0) return; offset = blk; offset *= dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); else if (write(fd, buf, size) == size) { fsmodified = 1; return; } resolved = 0; rwerror("WRITE BLK", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) if (write(fd, cp, dev_bsize) != dev_bsize) { (void)lseek(fd, offset + i + dev_bsize, 0); printf(" %jd,", (intmax_t)blk + i / dev_bsize); } printf("\n"); return; } void blerase(int fd, ufs2_daddr_t blk, long size) { off_t ioarg[2]; if (fd < 0) return; ioarg[0] = blk * dev_bsize; ioarg[1] = size; ioctl(fd, DIOCGDELETE, ioarg); /* we don't really care if we succeed or not */ return; } /* * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by * definition a multiple of dev_bsize. */ void blzero(int fd, ufs2_daddr_t blk, long size) { static char *zero; off_t offset, len; if (fd < 0) return; if (zero == NULL) { zero = calloc(ZEROBUFSIZE, 1); if (zero == NULL) errx(EEXIT, "cannot allocate buffer pool"); } offset = blk * dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); while (size > 0) { len = MIN(ZEROBUFSIZE, size); if (write(fd, zero, len) != len) rwerror("WRITE BLK", blk); blk += len / dev_bsize; size -= len; } } /* * Verify cylinder group's magic number and other parameters. If the * test fails, offer an option to rebuild the whole cylinder group. */ int check_cgmagic(int cg, struct bufarea *cgbp) { struct cg *cgp = cgbp->b_un.b_cg; /* * Extended cylinder group checks. */ if (cg_chkmagic(cgp) && ((sblock.fs_magic == FS_UFS1_MAGIC && cgp->cg_old_niblk == sblock.fs_ipg && cgp->cg_ndblk <= sblock.fs_fpg && cgp->cg_old_ncyl <= sblock.fs_old_cpg) || (sblock.fs_magic == FS_UFS2_MAGIC && cgp->cg_niblk == sblock.fs_ipg && cgp->cg_ndblk <= sblock.fs_fpg && cgp->cg_initediblk <= sblock.fs_ipg))) { return (1); } pfatal("CYLINDER GROUP %d: BAD MAGIC NUMBER", cg); if (!reply("REBUILD CYLINDER GROUP")) { printf("YOU WILL NEED TO RERUN FSCK.\n"); rerun = 1; return (1); } /* * Zero out the cylinder group and then initialize critical fields. * Bit maps and summaries will be recalculated by later passes. */ memset(cgp, 0, (size_t)sblock.fs_cgsize); cgp->cg_magic = CG_MAGIC; cgp->cg_cgx = cg; cgp->cg_niblk = sblock.fs_ipg; cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) cgp->cg_ndblk = sblock.fs_fpg; else cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); cgp->cg_iusedoff = &cgp->cg_space[0] - (u_char *)(&cgp->cg_firstfield); if (sblock.fs_magic == FS_UFS1_MAGIC) { cgp->cg_niblk = 0; cgp->cg_initediblk = 0; cgp->cg_old_ncyl = sblock.fs_old_cpg; cgp->cg_old_niblk = sblock.fs_ipg; cgp->cg_old_btotoff = cgp->cg_iusedoff; cgp->cg_old_boff = cgp->cg_old_btotoff + sblock.fs_old_cpg * sizeof(int32_t); cgp->cg_iusedoff = cgp->cg_old_boff + sblock.fs_old_cpg * sizeof(u_int16_t); } cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); if (sblock.fs_contigsumsize > 0) { cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; cgp->cg_clustersumoff = roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); cgp->cg_clustersumoff -= sizeof(u_int32_t); cgp->cg_clusteroff = cgp->cg_clustersumoff + (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); cgp->cg_nextfreeoff = cgp->cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); } dirty(cgbp); return (0); } /* * allocate a data block with the specified number of fragments */ ufs2_daddr_t allocblk(long frags) { int i, j, k, cg, baseblk; struct bufarea *cgbp; struct cg *cgp; if (frags <= 0 || frags > sblock.fs_frag) return (0); for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) { for (j = 0; j <= sblock.fs_frag - frags; j++) { if (testbmap(i + j)) continue; for (k = 1; k < frags; k++) if (testbmap(i + j + k)) break; if (k < frags) { j += k; continue; } cg = dtog(&sblock, i + j); cgbp = cgget(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp)) return (0); baseblk = dtogd(&sblock, i + j); for (k = 0; k < frags; k++) { setbmap(i + j + k); clrbit(cg_blksfree(cgp), baseblk + k); } n_blks += frags; if (frags == sblock.fs_frag) cgp->cg_cs.cs_nbfree--; else cgp->cg_cs.cs_nffree -= frags; dirty(cgbp); return (i + j); } } return (0); } /* * Free a previously allocated block */ void freeblk(ufs2_daddr_t blkno, long frags) { struct inodesc idesc; idesc.id_blkno = blkno; idesc.id_numfrags = frags; (void)pass4check(&idesc); } /* Slow down IO so as to leave some disk bandwidth for other processes */ void slowio_start() { /* Delay one in every 8 operations */ slowio_pollcnt = (slowio_pollcnt + 1) & 7; if (slowio_pollcnt == 0) { gettimeofday(&slowio_starttime, NULL); } } void slowio_end() { struct timeval tv; int delay_usec; if (slowio_pollcnt != 0) return; /* Update the slowdown interval. */ gettimeofday(&tv, NULL); delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + (tv.tv_usec - slowio_starttime.tv_usec); if (delay_usec < 64) delay_usec = 64; if (delay_usec > 2500000) delay_usec = 2500000; slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; /* delay by 8 times the average IO delay */ if (slowio_delay_usec > 64) usleep(slowio_delay_usec * 8); } /* * Find a pathname */ void getpathname(char *namebuf, ino_t curdir, ino_t ino) { int len; char *cp; struct inodesc idesc; static int busy = 0; if (curdir == ino && ino == ROOTINO) { (void)strcpy(namebuf, "/"); return; } if (busy || !INO_IS_DVALID(curdir)) { (void)strcpy(namebuf, "?"); return; } busy = 1; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_fix = IGNORE; cp = &namebuf[MAXPATHLEN - 1]; *cp = '\0'; if (curdir != ino) { idesc.id_parent = curdir; goto namelookup; } while (ino != ROOTINO) { idesc.id_number = ino; idesc.id_func = findino; idesc.id_name = strdup(".."); if ((ckinode(ginode(ino), &idesc) & FOUND) == 0) break; namelookup: idesc.id_number = idesc.id_parent; idesc.id_parent = ino; idesc.id_func = findname; idesc.id_name = namebuf; if ((ckinode(ginode(idesc.id_number), &idesc)&FOUND) == 0) break; len = strlen(namebuf); cp -= len; memmove(cp, namebuf, (size_t)len); *--cp = '/'; - if (cp < &namebuf[MAXNAMLEN]) + if (cp < &namebuf[UFS_MAXNAMLEN]) break; ino = idesc.id_number; } busy = 0; if (ino != ROOTINO) *--cp = '?'; memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); } void catch(int sig __unused) { ckfini(0); exit(12); } /* * When preening, allow a single quit to signal * a special exit after file system checks complete * so that reboot sequence may be interrupted. */ void catchquit(int sig __unused) { printf("returning to single-user after file system check\n"); returntosingle = 1; (void)signal(SIGQUIT, SIG_DFL); } /* * determine whether an inode should be fixed. */ int dofix(struct inodesc *idesc, const char *msg) { switch (idesc->id_fix) { case DONTKNOW: if (idesc->id_type == DATA) direrror(idesc->id_number, msg); else pwarn("%s", msg); if (preen) { printf(" (SALVAGED)\n"); idesc->id_fix = FIX; return (ALTERED); } if (reply("SALVAGE") == 0) { idesc->id_fix = NOFIX; return (0); } idesc->id_fix = FIX; return (ALTERED); case FIX: return (ALTERED); case NOFIX: case IGNORE: return (0); default: errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); } /* NOTREACHED */ return (0); } #include /* * An unexpected inconsistency occurred. * Die if preening or file system is running with soft dependency protocol, * otherwise just print message and continue. */ void pfatal(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (!preen) { (void)vfprintf(stdout, fmt, ap); va_end(ap); if (usedsoftdep) (void)fprintf(stdout, "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); /* * Force foreground fsck to clean up inconsistency. */ if (bkgrdflag) { cmd.value = FS_NEEDSFSCK; cmd.size = 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); ckfini(0); exit(EEXIT); } return; } if (cdevname == NULL) cdevname = strdup("fsck"); (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); (void)fprintf(stdout, "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", cdevname, usedsoftdep ? " SOFT UPDATE " : " "); /* * Force foreground fsck to clean up inconsistency. */ if (bkgrdflag) { cmd.value = FS_NEEDSFSCK; cmd.size = 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); } ckfini(0); exit(EEXIT); } /* * Pwarn just prints a message when not preening or running soft dependency * protocol, or a warning (preceded by filename) when preening. */ void pwarn(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (preen) (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); va_end(ap); } /* * Stub for routines from kernel. */ void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); pfatal("INTERNAL INCONSISTENCY:"); (void)vfprintf(stdout, fmt, ap); va_end(ap); exit(EEXIT); } Index: head/sbin/fsck_ffs/pass3.c =================================================================== --- head/sbin/fsck_ffs/pass3.c (revision 313474) +++ head/sbin/fsck_ffs/pass3.c (revision 313475) @@ -1,127 +1,127 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)pass3.c 8.2 (Berkeley) 4/27/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "fsck.h" void pass3(void) { struct inoinfo *inp; int loopcnt, inpindex, state; ino_t orphan; struct inodesc idesc; - char namebuf[MAXNAMLEN+1]; + char namebuf[UFS_MAXNAMLEN+1]; for (inpindex = inplast - 1; inpindex >= 0; inpindex--) { if (got_siginfo) { printf("%s: phase 3: dir %d of %d (%d%%)\n", cdevname, (int)(inplast - inpindex - 1), (int)inplast, (int)((inplast - inpindex - 1) * 100 / inplast)); got_siginfo = 0; } if (got_sigalarm) { setproctitle("%s p3 %d%%", cdevname, (int)((inplast - inpindex - 1) * 100 / inplast)); got_sigalarm = 0; } inp = inpsort[inpindex]; state = inoinfo(inp->i_number)->ino_state; if (inp->i_number == ROOTINO || (inp->i_parent != 0 && !S_IS_DUNFOUND(state))) continue; if (state == DCLEAR) continue; /* * If we are running with soft updates and we come * across unreferenced directories, we just leave * them in DSTATE which will cause them to be pitched * in pass 4. */ if ((preen || bkgrdflag) && resolved && usedsoftdep && S_IS_DUNFOUND(state)) { if (inp->i_dotdot >= ROOTINO) inoinfo(inp->i_dotdot)->ino_linkcnt++; continue; } for (loopcnt = 0; ; loopcnt++) { orphan = inp->i_number; if (inp->i_parent == 0 || !INO_IS_DUNFOUND(inp->i_parent) || loopcnt > countdirs) break; inp = getinoinfo(inp->i_parent); } if (loopcnt <= countdirs) { if (linkup(orphan, inp->i_dotdot, NULL)) { inp->i_parent = inp->i_dotdot = lfdir; inoinfo(lfdir)->ino_linkcnt--; } inoinfo(orphan)->ino_state = DFOUND; propagate(); continue; } pfatal("ORPHANED DIRECTORY LOOP DETECTED I=%lu", (u_long)orphan); if (reply("RECONNECT") == 0) continue; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_number = inp->i_parent; idesc.id_parent = orphan; idesc.id_func = findname; idesc.id_name = namebuf; if ((ckinode(ginode(inp->i_parent), &idesc) & FOUND) == 0) pfatal("COULD NOT FIND NAME IN PARENT DIRECTORY"); if (linkup(orphan, inp->i_parent, namebuf)) { idesc.id_func = clearentry; if (ckinode(ginode(inp->i_parent), &idesc) & FOUND) inoinfo(orphan)->ino_linkcnt++; inp->i_parent = inp->i_dotdot = lfdir; inoinfo(lfdir)->ino_linkcnt--; } inoinfo(orphan)->ino_state = DFOUND; propagate(); } } Index: head/sys/ufs/ufs/dir.h =================================================================== --- head/sys/ufs/ufs/dir.h (revision 313474) +++ head/sys/ufs/ufs/dir.h (revision 313475) @@ -1,155 +1,156 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)dir.h 8.2 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _UFS_UFS_DIR_H_ #define _UFS_UFS_DIR_H_ /* * Theoretically, directories can be more than 2Gb in length, however, in * practice this seems unlikely. So, we define the type doff_t as a 32-bit * quantity to keep down the cost of doing lookup on a 32-bit machine. */ #define doff_t int32_t #define MAXDIRSIZE (0x7fffffff) /* * A directory consists of some number of blocks of DIRBLKSIZ * bytes, where DIRBLKSIZ is chosen such that it can be transferred * to disk in a single atomic operation (e.g. 512 bytes on most machines). * * Each DIRBLKSIZ byte block contains some number of directory entry * structures, which are of variable length. Each directory entry has * a struct direct at the front of it, containing its inode number, * the length of the entry, and the length of the name contained in * the entry. These are followed by the name padded to a 4 byte boundary * with null bytes. All names are guaranteed null terminated. - * The maximum length of a name in a directory is MAXNAMLEN. + * The maximum length of a name in a directory is UFS_MAXNAMLEN. * * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent * a directory entry. Free space in a directory is represented by * entries which have dp->d_reclen > DIRSIZ(fmt, dp). All DIRBLKSIZ bytes * in a directory block are claimed by the directory entries. This * usually results in the last entry in a directory having a large * dp->d_reclen. When entries are deleted from a directory, the * space is returned to the previous entry in the same directory * block by increasing its dp->d_reclen. If the first entry of * a directory block is free, then its dp->d_ino is set to 0. * Entries other than the first in a directory do not normally have * dp->d_ino set to 0. */ #define DIRBLKSIZ DEV_BSIZE -#define MAXNAMLEN 255 +#define UFS_MAXNAMLEN 255 struct direct { u_int32_t d_ino; /* inode number of entry */ u_int16_t d_reclen; /* length of this record */ u_int8_t d_type; /* file type, see below */ u_int8_t d_namlen; /* length of string in d_name */ - char d_name[MAXNAMLEN + 1];/* name with length <= MAXNAMLEN */ + char d_name[UFS_MAXNAMLEN + 1]; + /* name with length <= UFS_MAXNAMLEN */ }; /* * File types */ #define DT_UNKNOWN 0 #define DT_FIFO 1 #define DT_CHR 2 #define DT_DIR 4 #define DT_BLK 6 #define DT_REG 8 #define DT_LNK 10 #define DT_SOCK 12 #define DT_WHT 14 /* * Convert between stat structure types and directory types. */ #define IFTODT(mode) (((mode) & 0170000) >> 12) #define DTTOIF(dirtype) ((dirtype) << 12) /* * The DIRSIZ macro gives the minimum record length which will hold * the directory entry. This requires the amount of space in struct direct * without the d_name field, plus enough space for the name with a terminating * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. * * */ #define DIRECTSIZ(namlen) \ ((__offsetof(struct direct, d_name) + \ ((namlen)+1)*sizeof(((struct direct *)0)->d_name[0]) + 3) & ~3) #if (BYTE_ORDER == LITTLE_ENDIAN) #define DIRSIZ(oldfmt, dp) \ ((oldfmt) ? DIRECTSIZ((dp)->d_type) : DIRECTSIZ((dp)->d_namlen)) #else #define DIRSIZ(oldfmt, dp) \ DIRECTSIZ((dp)->d_namlen) #endif #define OLDDIRFMT 1 #define NEWDIRFMT 0 /* * Template for manipulating directories. Should use struct direct's, - * but the name field is MAXNAMLEN - 1, and this just won't do. + * but the name field is UFS_MAXNAMLEN - 1, and this just won't do. */ struct dirtemplate { u_int32_t dot_ino; int16_t dot_reclen; u_int8_t dot_type; u_int8_t dot_namlen; char dot_name[4]; /* must be multiple of 4 */ u_int32_t dotdot_ino; int16_t dotdot_reclen; u_int8_t dotdot_type; u_int8_t dotdot_namlen; char dotdot_name[4]; /* ditto */ }; /* * This is the old format of directories, sanz type element. */ struct odirtemplate { u_int32_t dot_ino; int16_t dot_reclen; u_int16_t dot_namlen; char dot_name[4]; /* must be multiple of 4 */ u_int32_t dotdot_ino; int16_t dotdot_reclen; u_int16_t dotdot_namlen; char dotdot_name[4]; /* ditto */ }; #endif /* !_DIR_H_ */ Index: head/sys/ufs/ufs/dirhash.h =================================================================== --- head/sys/ufs/ufs/dirhash.h (revision 313474) +++ head/sys/ufs/ufs/dirhash.h (revision 313475) @@ -1,133 +1,133 @@ /*- * Copyright (c) 2001 Ian Dowse. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _UFS_UFS_DIRHASH_H_ #define _UFS_UFS_DIRHASH_H_ #include #include /* * For fast operations on large directories, we maintain a hash * that maps the file name to the offset of the directory entry within * the directory file. * * The hashing uses a dumb spillover to the next free slot on * collisions, so we must keep the utilisation low to avoid * long linear searches. Deleted entries that are not the last * in a chain must be marked DIRHASH_DEL. * * We also maintain information about free space in each block * to speed up creations. */ #define DIRHASH_EMPTY (-1) /* entry unused */ #define DIRHASH_DEL (-2) /* deleted entry; may be part of chain */ #define DIRALIGN 4 -#define DH_NFSTATS (DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN) +#define DH_NFSTATS (DIRECTSIZ(UFS_MAXNAMLEN + 1) / DIRALIGN) /* max DIRALIGN words in a directory entry */ /* * Dirhash uses a score mechanism to achieve a hybrid between a * least-recently-used and a least-often-used algorithm for entry * recycling. The score is incremented when a directory is used, and * decremented when the directory is a candidate for recycling. When * the score reaches zero, the hash is recycled. Hashes are linked * together on a TAILQ list, and hashes with higher scores filter * towards the tail (most recently used) end of the list. * * New hash entries are given an initial score of DH_SCOREINIT and are * placed at the most-recently-used end of the list. This helps a lot * in the worst-case case scenario where every directory access is * to a directory that is not hashed (i.e. the working set of hash * candidates is much larger than the configured memry limit). In this * case it limits the number of hash builds to 1/DH_SCOREINIT of the * number of accesses. */ #define DH_SCOREINIT 8 /* initial dh_score when dirhash built */ #define DH_SCOREMAX 64 /* max dh_score value */ /* * The main hash table has 2 levels. It is an array of pointers to * blocks of DH_NBLKOFF offsets. */ #define DH_BLKOFFSHIFT 8 #define DH_NBLKOFF (1 << DH_BLKOFFSHIFT) #define DH_BLKOFFMASK (DH_NBLKOFF - 1) #define DH_ENTRY(dh, slot) \ ((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK]) struct dirhash { struct sx dh_lock; /* protects all fields except list & score */ int dh_refcount; doff_t **dh_hash; /* the hash array (2-level) */ int dh_narrays; /* number of entries in dh_hash */ int dh_hlen; /* total slots in the 2-level hash array */ int dh_hused; /* entries in use */ int dh_memreq; /* Memory used. */ /* Free space statistics. XXX assumes DIRBLKSIZ is 512. */ u_int8_t *dh_blkfree; /* free DIRALIGN words in each dir block */ int dh_nblk; /* size of dh_blkfree array */ int dh_dirblks; /* number of DIRBLKSIZ blocks in dir */ int dh_firstfree[DH_NFSTATS + 1]; /* first blk with N words free */ doff_t dh_seqoff; /* sequential access optimisation offset */ int dh_score; /* access count for this dirhash */ int dh_onlist; /* true if on the ufsdirhash_list chain */ time_t dh_lastused; /* time the dirhash was last read or written*/ /* Protected by ufsdirhash_mtx. */ TAILQ_ENTRY(dirhash) dh_list; /* chain of all dirhashes */ }; /* * Dirhash functions. */ void ufsdirhash_init(void); void ufsdirhash_uninit(void); int ufsdirhash_build(struct inode *); doff_t ufsdirhash_findfree(struct inode *, int, int *); doff_t ufsdirhash_enduseful(struct inode *); int ufsdirhash_lookup(struct inode *, char *, int, doff_t *, struct buf **, doff_t *); void ufsdirhash_newblk(struct inode *, doff_t); void ufsdirhash_add(struct inode *, struct direct *, doff_t); void ufsdirhash_remove(struct inode *, struct direct *, doff_t); void ufsdirhash_move(struct inode *, struct direct *, doff_t, doff_t); void ufsdirhash_dirtrunc(struct inode *, doff_t); void ufsdirhash_free(struct inode *); void ufsdirhash_checkblock(struct inode *, char *, doff_t); #endif /* !_UFS_UFS_DIRHASH_H_ */ Index: head/sys/ufs/ufs/ufs_lookup.c =================================================================== --- head/sys/ufs/ufs/ufs_lookup.c (revision 313474) +++ head/sys/ufs/ufs/ufs_lookup.c (revision 313475) @@ -1,1485 +1,1485 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ufs.h" #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef UFS_DIRHASH #include #endif #include #include #ifdef DIAGNOSTIC static int dirchk = 1; #else static int dirchk = 0; #endif SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); /* true if old FS format...*/ #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) static int ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred, struct thread *td) { int error; #ifdef UFS_ACL /* * NFSv4 Minor Version 1, draft-ietf-nfsv4-minorversion1-03.txt * * 3.16.2.1. ACE4_DELETE vs. ACE4_DELETE_CHILD */ /* * XXX: Is this check required? */ error = VOP_ACCESS(vdp, VEXEC, cred, td); if (error) return (error); error = VOP_ACCESSX(tdp, VDELETE, cred, td); if (error == 0) return (0); error = VOP_ACCESSX(vdp, VDELETE_CHILD, cred, td); if (error == 0) return (0); error = VOP_ACCESSX(vdp, VEXPLICIT_DENY | VDELETE_CHILD, cred, td); if (error) return (error); #endif /* !UFS_ACL */ /* * Standard Unix access control - delete access requires VWRITE. */ error = VOP_ACCESS(vdp, VWRITE, cred, td); if (error) return (error); /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ if ((VTOI(vdp)->i_mode & ISVTX) && VOP_ACCESS(vdp, VADMIN, cred, td) && VOP_ACCESS(tdp, VADMIN, cred, td)) return (EPERM); return (0); } /* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the filesystem is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending * on whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and vput * instead of two vputs. * * This routine is actually used as VOP_CACHEDLOOKUP method, and the * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP * method. * * vfs_cache_lookup() performs the following for us: * check that it is a directory * check accessibility of directory * check for modification attempts on read-only mounts * if name found in cache * if at end of path and deleting or creating * drop it * else * return name. * return VOP_CACHEDLOOKUP() * * Overall outline of ufs_lookup: * * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache */ int ufs_lookup(ap) struct vop_cachedlookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL)); } int ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp, ino_t *dd_ino) { struct inode *dp; /* inode for directory being searched */ struct buf *bp; /* a buffer of directory entries */ struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ enum {NONE, COMPACT, FOUND} slotstatus; doff_t slotoffset; /* offset of area with free space */ doff_t i_diroff; /* cached i_diroff value. */ doff_t i_offset; /* cached i_offset value. */ int slotsize; /* size of area at slotoffset */ int slotfreespace; /* amount of space free in slot */ int slotneeded; /* size of the entry we're seeking */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* prev entry dp->i_offset */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by VFS_VGET */ doff_t enduseful; /* pointer past last used dir slot */ u_long bmask; /* block offset mask */ int namlen, error; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; ino_t ino, ino1; int ltype; if (vpp != NULL) *vpp = NULL; dp = VTOI(vdp); if (dp->i_effnlink == 0) return (ENOENT); /* * Create a vm object if vmiodirenable is enabled. * Alternatively we could call vnode_create_vobject * in VFS_VGET but we could end up creating objects * that are never used. */ vnode_create_vobject(vdp, DIP(dp, i_size), cnp->cn_thread); bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; #ifdef DEBUG_VFS_LOCKS /* * Assert that the directory vnode is locked, and locked * exclusively for the last component lookup for modifying * operations. * * The directory-modifying operations need to save * intermediate state in the inode between namei() call and * actual directory manipulations. See fields in the struct * inode marked as 'used during directory lookup'. We must * ensure that upgrade in namei() does not happen, since * upgrade might need to unlock vdp. If quotas are enabled, * getinoquota() also requires exclusive lock to modify inode. */ ASSERT_VOP_LOCKED(vdp, "ufs_lookup1"); if ((nameiop == CREATE || nameiop == DELETE || nameiop == RENAME) && (flags & (LOCKPARENT | ISLASTCN)) == (LOCKPARENT | ISLASTCN)) ASSERT_VOP_ELOCKED(vdp, "ufs_lookup2"); #endif restart: bp = NULL; slotoffset = -1; /* * We now have a segment name to search for, and a directory to search. * * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ ino = 0; i_diroff = dp->i_diroff; slotstatus = FOUND; slotfreespace = slotsize = slotneeded = 0; if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { slotstatus = NONE; slotneeded = DIRECTSIZ(cnp->cn_namelen); } #ifdef UFS_DIRHASH /* * Use dirhash for fast operations on large directories. The logic * to determine whether to hash the directory is contained within * ufsdirhash_build(); a zero return means that it decided to hash * this directory and it successfully built up the hash table. */ if (ufsdirhash_build(dp) == 0) { /* Look for a free slot if needed. */ enduseful = dp->i_size; if (slotstatus != FOUND) { slotoffset = ufsdirhash_findfree(dp, slotneeded, &slotsize); if (slotoffset >= 0) { slotstatus = COMPACT; enduseful = ufsdirhash_enduseful(dp); if (enduseful < 0) enduseful = dp->i_size; } } /* Look up the component. */ numdirpasses = 1; entryoffsetinblock = 0; /* silence compiler warning */ switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, &i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { case 0: ep = (struct direct *)((char *)bp->b_data + (i_offset & bmask)); goto foundentry; case ENOENT: i_offset = roundup2(dp->i_size, DIRBLKSIZ); goto notfound; default: /* Something failed; just do a linear search. */ break; } } #endif /* UFS_DIRHASH */ /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ if (nameiop != LOOKUP || i_diroff == 0 || i_diroff >= dp->i_size) { entryoffsetinblock = 0; i_offset = 0; numdirpasses = 1; } else { i_offset = i_diroff; if ((entryoffsetinblock = i_offset & bmask) && (error = UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp))) return (error); numdirpasses = 2; nchstats.ncs_2passes++; } prevoff = i_offset; endsearch = roundup2(dp->i_size, DIRBLKSIZ); enduseful = 0; searchloop: while (i_offset < endsearch) { /* * If necessary, get the next directory block. */ if ((i_offset & bmask) == 0) { if (bp != NULL) brelse(bp); error = UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp); if (error) return (error); entryoffsetinblock = 0; } /* * If still looking for a slot, and at a DIRBLKSIZE * boundary, have to start looking for free space again. */ if (slotstatus == NONE && (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { slotoffset = -1; slotfreespace = 0; } /* * Get pointer to next entry. * Full validation checks are slow, so we only check * enough to insure forward progress through the * directory. Complete checks can be run by patching * "dirchk" to be true. */ ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); if (ep->d_reclen == 0 || ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { int i; ufs_dirbad(dp, i_offset, "mangled entry"); i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); i_offset += i; entryoffsetinblock += i; continue; } /* * If an appropriate sized slot has not yet been found, * check to see if one is available. Also accumulate space * in the current block so that we can determine if * compaction is viable. */ if (slotstatus != FOUND) { int size = ep->d_reclen; if (ep->d_ino != 0) size -= DIRSIZ(OFSFMT(vdp), ep); if (size > 0) { if (size >= slotneeded) { slotstatus = FOUND; slotoffset = i_offset; slotsize = ep->d_reclen; } else if (slotstatus == NONE) { slotfreespace += size; if (slotoffset == -1) slotoffset = i_offset; if (slotfreespace >= slotneeded) { slotstatus = COMPACT; slotsize = i_offset + ep->d_reclen - slotoffset; } } } } /* * Check for a name match. */ if (ep->d_ino) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(vdp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if (namlen == cnp->cn_namelen && (cnp->cn_nameptr[0] == ep->d_name[0]) && !bcmp(cnp->cn_nameptr, ep->d_name, (unsigned)namlen)) { #ifdef UFS_DIRHASH foundentry: #endif /* * Save directory entry's inode number and * reclen in ndp->ni_ufs area, and release * directory buffer. */ if (vdp->v_mount->mnt_maxsymlinklen > 0 && ep->d_type == DT_WHT) { slotstatus = FOUND; slotoffset = i_offset; slotsize = ep->d_reclen; enduseful = dp->i_size; cnp->cn_flags |= ISWHITEOUT; numdirpasses--; goto notfound; } ino = ep->d_ino; goto found; } } prevoff = i_offset; i_offset += ep->d_reclen; entryoffsetinblock += ep->d_reclen; if (ep->d_ino) enduseful = i_offset; } notfound: /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; i_offset = 0; endsearch = i_diroff; goto searchloop; } if (bp != NULL) brelse(bp); /* * If creating, and at end of pathname and current * directory has not been removed, then can consider * allowing file to be created. */ if ((nameiop == CREATE || nameiop == RENAME || (nameiop == DELETE && (cnp->cn_flags & DOWHITEOUT) && (cnp->cn_flags & ISWHITEOUT))) && (flags & ISLASTCN) && dp->i_effnlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. * * XXX: Fix the comment above. */ if (flags & WILLBEDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread); else error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, * then set dp->i_count to 0 indicating * that the new slot belongs at the end of the * directory. If we found a slot, then the new entry * can be put in the range from dp->i_offset to * dp->i_offset + dp->i_count. */ if (slotstatus == NONE) { dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); dp->i_count = 0; enduseful = dp->i_offset; } else if (nameiop == DELETE) { dp->i_offset = slotoffset; if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; } else { dp->i_offset = slotoffset; dp->i_count = slotsize; if (enduseful < slotoffset + slotsize) enduseful = slotoffset + slotsize; } dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * The pathname buffer is saved so that the name * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } /* * Insert name into cache (as non-existent) if appropriate. */ if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(vdp, NULL, cnp); return (ENOENT); found: if (dd_ino != NULL) *dd_ino = ino; if (numdirpasses == 2) nchstats.ncs_pass2++; /* * Check that directory length properly reflects presence * of this entry. */ if (i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { ufs_dirbad(dp, i_offset, "i_size too small"); dp->i_size = i_offset + DIRSIZ(OFSFMT(vdp), ep); DIP_SET(dp, i_size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; } brelse(bp); /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = rounddown2(i_offset, DIRBLKSIZ); /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. */ if (nameiop == DELETE && (flags & ISLASTCN)) { if (flags & LOCKPARENT) ASSERT_VOP_ELOCKED(vdp, __FUNCTION__); /* * Return pointer to current entry in dp->i_offset, * and distance past previous entry (if there * is a previous entry in this block) in dp->i_count. * Save directory inode pointer in ndp->ni_dvp for dirremove(). * * Technically we shouldn't be setting these in the * WANTPARENT case (first lookup in rename()), but any * lookups that will result in directory changes will * overwrite these. */ dp->i_offset = i_offset; if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; if (dd_ino != NULL) return (0); if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, &tdp)) != 0) return (error); error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread); if (error) { vput(tdp); return (error); } if (dp->i_number == ino) { VREF(vdp); *vpp = vdp; vput(tdp); return (0); } *vpp = tdp; return (0); } /* * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a * regular file, or empty directory. */ if (nameiop == RENAME && (flags & ISLASTCN)) { if (flags & WILLBEDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread); else error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* * Careful about locking second inode. * This can only occur if the target is ".". */ dp->i_offset = i_offset; if (dp->i_number == ino) return (EISDIR); if (dd_ino != NULL) return (0); if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, &tdp)) != 0) return (error); error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread); if (error) { vput(tdp); return (error); } #ifdef SunOS_doesnt_do_that /* * The only purpose of this check is to return the correct * error. Assume that we want to rename directory "a" * to a file "b", and that we have no ACL_WRITE_DATA on * a containing directory, but we _do_ have ACL_APPEND_DATA. * In that case, the VOP_ACCESS check above will return 0, * and the operation will fail with ENOTDIR instead * of EACCESS. */ if (tdp->v_type == VDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread); else error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); if (error) { vput(tdp); return (error); } #endif *vpp = tdp; cnp->cn_flags |= SAVENAME; return (0); } if (dd_ino != NULL) return (0); /* * Step through the translation in the name. We do not `vput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the VFS_VGET for the * inode associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the filesystem has any hard links other than ".." * that point backwards in the directory structure. */ pdp = vdp; if (flags & ISDOTDOT) { error = vn_vget_ino(pdp, ino, cnp->cn_lkflags, &tdp); if (error) return (error); /* * Recheck that ".." entry in the vdp directory points * to the inode we looked up before vdp lock was * dropped. */ error = ufs_lookup_ino(pdp, NULL, cnp, &ino1); if (error) { vput(tdp); return (error); } if (ino1 != ino) { vput(tdp); goto restart; } *vpp = tdp; } else if (dp->i_number == ino) { VREF(vdp); /* we want ourself, ie "." */ /* * When we lookup "." we still can be asked to lock it * differently. */ ltype = cnp->cn_lkflags & LK_TYPE_MASK; if (ltype != VOP_ISLOCKED(vdp)) { if (ltype == LK_EXCLUSIVE) vn_lock(vdp, LK_UPGRADE | LK_RETRY); else /* if (ltype == LK_SHARED) */ vn_lock(vdp, LK_DOWNGRADE | LK_RETRY); /* * Relock for the "." case may left us with * reclaimed vnode. */ if (vdp->v_iflag & VI_DOOMED) { vrele(vdp); return (ENOENT); } } *vpp = vdp; } else { error = VFS_VGET(pdp->v_mount, ino, cnp->cn_lkflags, &tdp); if (error) return (error); *vpp = tdp; } /* * Insert name into cache if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); return (0); } void ufs_dirbad(ip, offset, how) struct inode *ip; doff_t offset; char *how; { struct mount *mp; mp = ITOV(ip)->v_mount; if ((mp->mnt_flag & MNT_RDONLY) == 0) panic("ufs_dirbad: %s: bad dir ino %ju at offset %ld: %s", mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number, (long)offset, how); else (void)printf("%s: bad dir ino %ju at offset %ld: %s\n", mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number, (long)offset, how); } /* * Do consistency checking on a directory entry: * record length must be multiple of 4 * entry must fit in rest of its DIRBLKSIZ block * record must be large enough to contain entry - * name is not longer than MAXNAMLEN + * name is not longer than UFS_MAXNAMLEN * name must be as long as advertised, and null terminated */ int ufs_dirbadentry(dp, ep, entryoffsetinblock) struct vnode *dp; struct direct *ep; int entryoffsetinblock; { int i, namlen; # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(dp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || - ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) { + ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > UFS_MAXNAMLEN) { /*return (1); */ printf("First bad\n"); goto bad; } if (ep->d_ino == 0) return (0); for (i = 0; i < namlen; i++) if (ep->d_name[i] == '\0') { /*return (1); */ printf("Second bad\n"); goto bad; } if (ep->d_name[i]) goto bad; return (0); bad: return (1); } /* * Construct a new directory entry after a call to namei, using the * parameters that it left in the componentname argument cnp. The * argument ip is the inode to which the new directory entry will refer. */ void ufs_makedirentry(ip, cnp, newdirp) struct inode *ip; struct componentname *cnp; struct direct *newdirp; { #ifdef INVARIANTS if ((cnp->cn_flags & SAVENAME) == 0) panic("ufs_makedirentry: missing name"); #endif newdirp->d_ino = ip->i_number; newdirp->d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) newdirp->d_type = IFTODT(ip->i_mode); else { newdirp->d_type = 0; # if (BYTE_ORDER == LITTLE_ENDIAN) { u_char tmp = newdirp->d_namlen; newdirp->d_namlen = newdirp->d_type; newdirp->d_type = tmp; } # endif } } /* * Write a directory entry after a call to namei, using the parameters * that it left in nameidata. The argument dirp is the new directory * entry contents. Dvp is a pointer to the directory to be written, * which was left locked by namei. Remaining parameters (dp->i_offset, * dp->i_count) indicate how the space for the new entry is to be obtained. * Non-null bp indicates that a directory is being created (for the * soft dependency code). */ int ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename) struct vnode *dvp; struct vnode *tvp; struct direct *dirp; struct componentname *cnp; struct buf *newdirbp; int isrename; { struct ucred *cr; struct thread *td; int newentrysize; struct inode *dp; struct buf *bp; u_int dsize; struct direct *ep, *nep; u_int64_t old_isize; int error, ret, blkoff, loc, spacefree, flags, namlen; char *dirbuf; td = curthread; /* XXX */ cr = td->td_ucred; dp = VTOI(dvp); newentrysize = DIRSIZ(OFSFMT(dvp), dirp); if (dp->i_count == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (dp->i_offset & (DIRBLKSIZ - 1)) panic("ufs_direnter: newblk"); flags = BA_CLRBUF; if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) flags |= IO_SYNC; #ifdef QUOTA if ((error = getinoquota(dp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } #endif old_isize = dp->i_size; vnode_pager_setsize(dvp, (u_long)dp->i_offset + DIRBLKSIZ); if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, cr, flags, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); vnode_pager_setsize(dvp, (u_long)old_isize); return (error); } dp->i_size = dp->i_offset + DIRBLKSIZ; DIP_SET(dp, i_size, dp->i_size); dp->i_endoff = dp->i_size; dp->i_flag |= IN_CHANGE | IN_UPDATE; dirp->d_reclen = DIRBLKSIZ; blkoff = dp->i_offset & (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { ufsdirhash_newblk(dp, dp->i_offset); ufsdirhash_add(dp, dirp, dp->i_offset); ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, dp->i_offset); } #endif if (DOINGSOFTDEP(dvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff += DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } if (softdep_setup_directory_add(bp, dp, dp->i_offset, dirp->d_ino, newdirbp, 1)) dp->i_flag |= IN_NEEDSYNC; if (newdirbp) bdwrite(newdirbp); bdwrite(bp); if ((dp->i_flag & IN_NEEDSYNC) == 0) return (UFS_UPDATE(dvp, 0)); /* * We have just allocated a directory block in an * indirect block. We must prevent holes in the * directory created if directory entries are * written out of order. To accomplish this we * fsync when we extend a directory into indirects. * During rename it's not safe to drop the tvp lock * so sync must be delayed until it is. * * This synchronous step could be removed if fsck and * the kernel were taught to fill in sparse * directories rather than panic. */ if (isrename) return (0); if (tvp != NULL) VOP_UNLOCK(tvp, 0); (void) VOP_FSYNC(dvp, MNT_WAIT, td); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); return (error); } if (DOINGASYNC(dvp)) { bdwrite(bp); return (UFS_UPDATE(dvp, 0)); } error = bwrite(bp); ret = UFS_UPDATE(dvp, 1); if (error == 0) return (ret); return (error); } /* * If dp->i_count is non-zero, then namei found space for the new * entry in the range dp->i_offset to dp->i_offset + dp->i_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (dp->i_offset + dp->i_count > dp->i_size) { dp->i_size = dp->i_offset + dp->i_count; DIP_SET(dp, i_size, dp->i_size); } /* * Get the block containing the space for the new directory entry. */ error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); if (error) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0; spacefree = ep->d_reclen - dsize; for (loc = ep->d_reclen; loc < dp->i_count; ) { nep = (struct direct *)(dirbuf + loc); /* Trim the existing slot (NB: dsize may be zero). */ ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); /* Read nep->d_reclen now as the bcopy() may clobber it. */ loc += nep->d_reclen; if (nep->d_ino == 0) { /* * A mid-block unused entry. Such entries are * never created by the kernel, but fsck_ffs * can create them (and it doesn't fix them). * * Add up the free space, and initialise the * relocated entry since we don't bcopy it. */ spacefree += nep->d_reclen; ep->d_ino = 0; dsize = 0; continue; } dsize = DIRSIZ(OFSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_move(dp, nep, dp->i_offset + ((char *)nep - dirbuf), dp->i_offset + ((char *)ep - dirbuf)); #endif if (DOINGSOFTDEP(dvp)) softdep_change_directoryentry_offset(bp, dp, dirbuf, (caddr_t)nep, (caddr_t)ep, dsize); else bcopy((caddr_t)nep, (caddr_t)ep, dsize); } /* * Here, `ep' points to a directory entry containing `dsize' in-use * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, * then the entry is completely unused (dsize == 0). The value * of ep->d_reclen is always indeterminate. * * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(dvp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if (ep->d_ino == 0 || (ep->d_ino == WINO && namlen == dirp->d_namlen && bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); #endif bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, dirbuf - (dp->i_offset & (DIRBLKSIZ - 1)), rounddown2(dp->i_offset, DIRBLKSIZ)); #endif if (DOINGSOFTDEP(dvp)) { (void) softdep_setup_directory_add(bp, dp, dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp, 0); if (newdirbp != NULL) bdwrite(newdirbp); bdwrite(bp); } else { if (DOINGASYNC(dvp)) { bdwrite(bp); error = 0; } else { error = bwrite(bp); } } dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If all went well, and the directory can be shortened, proceed * with the truncation. Note that we have to unlock the inode for * the entry that we just entered, as the truncation may need to * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ if (isrename == 0 && error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { if (tvp != NULL) VOP_UNLOCK(tvp, 0); error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr); if (error != 0) vn_printf(dvp, "ufs_direnter: failed to truncate " "err %d", error); #ifdef UFS_DIRHASH if (error == 0 && dp->i_dirhash != NULL) ufsdirhash_dirtrunc(dp, dp->i_endoff); #endif error = 0; if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); } return (error); } /* * Remove a directory entry after a call to namei, using * the parameters which it left in nameidata. The entry * dp->i_offset contains the offset into the directory of the * entry to be eliminated. The dp->i_count field contains the * size of the previous record in the directory. If this * is 0, the first entry is being deleted, so we need only * zero the inode number to mark the entry as free. If the * entry is not the first in the directory, we must reclaim * the space of the now empty record by adding the record size * to the size of the previous entry. */ int ufs_dirremove(dvp, ip, flags, isrmdir) struct vnode *dvp; struct inode *ip; int flags; int isrmdir; { struct inode *dp; struct direct *ep, *rep; struct buf *bp; int error; dp = VTOI(dvp); /* * Adjust the link count early so softdep can block if necessary. */ if (ip) { ip->i_effnlink--; if (DOINGSOFTDEP(dvp)) { softdep_setup_unlink(dp, ip); } else { ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; } } if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to WINO. */ if ((error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0) return (error); ep->d_ino = WINO; ep->d_type = DT_WHT; goto out; } if ((error = UFS_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) return (error); /* Set 'rep' to the entry being removed. */ if (dp->i_count == 0) rep = ep; else rep = (struct direct *)((char *)ep + ep->d_reclen); #ifdef UFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact * that `ep' is the previous entry when dp->i_count != 0. */ if (dp->i_dirhash != NULL) ufsdirhash_remove(dp, rep, dp->i_offset); #endif if (ip && rep->d_ino != ip->i_number) panic("ufs_dirremove: ip %ju does not match dirent ino %ju\n", (uintmax_t)ip->i_number, (uintmax_t)rep->d_ino); if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. */ ep->d_ino = 0; } else { /* * Collapse new free space into previous entry. */ ep->d_reclen += rep->d_reclen; } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, (char *)ep - ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), rounddown2(dp->i_offset, DIRBLKSIZ)); #endif out: error = 0; if (DOINGSOFTDEP(dvp)) { if (ip) softdep_setup_remove(bp, dp, ip, isrmdir); if (softdep_slowdown(dvp)) error = bwrite(bp); else bdwrite(bp); } else { if (flags & DOWHITEOUT) error = bwrite(bp); else if (DOINGASYNC(dvp)) bdwrite(bp); else error = bwrite(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if (ip != NULL && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_effnlink == 0) UFS_SNAPGONE(ip); return (error); } /* * Rewrite an existing directory entry to point at the inode * supplied. The parameters describing the directory entry are * set up by a call to namei. */ int ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) struct inode *dp, *oip; ino_t newinum; int newtype; int isrmdir; { struct buf *bp; struct direct *ep; struct vnode *vdp = ITOV(dp); int error; /* * Drop the link before we lock the buf so softdep can block if * necessary. */ oip->i_effnlink--; if (DOINGSOFTDEP(vdp)) { softdep_setup_unlink(dp, oip); } else { oip->i_nlink--; DIP_SET(oip, i_nlink, oip->i_nlink); oip->i_flag |= IN_CHANGE; } error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); if (ep->d_namlen == 2 && ep->d_name[1] == '.' && ep->d_name[0] == '.' && ep->d_ino != oip->i_number) { brelse(bp); return (EIDRM); } ep->d_ino = newinum; if (!OFSFMT(vdp)) ep->d_type = newtype; if (DOINGSOFTDEP(vdp)) { softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); } else { if (DOINGASYNC(vdp)) { bdwrite(bp); error = 0; } else { error = bwrite(bp); } } dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_effnlink == 0) UFS_SNAPGONE(oip); return (error); } /* * Check if a directory is empty or not. * Inode supplied must be locked. * * Using a struct dirtemplate here is not precisely * what we want, but better than using a struct direct. * * NB: does not handle corrupted directories. */ int ufs_dirempty(ip, parentino, cred) struct inode *ip; ino_t parentino; struct ucred *cred; { doff_t off; struct dirtemplate dbuf; struct direct *dp = (struct direct *)&dbuf; int error, namlen; ssize_t count; #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) for (off = 0; off < ip->i_size; off += dp->d_reclen) { error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, &count, (struct thread *)0); /* * Since we read MINDIRSIZ, residual must * be 0 unless we're at end of file. */ if (error || count != 0) return (0); /* avoid infinite loops */ if (dp->d_reclen == 0) return (0); /* skip empty entries */ if (dp->d_ino == 0 || dp->d_ino == WINO) continue; /* accept only "." and ".." */ # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(ITOV(ip))) namlen = dp->d_type; else namlen = dp->d_namlen; # else namlen = dp->d_namlen; # endif if (namlen > 2) return (0); if (dp->d_name[0] != '.') return (0); /* * At this point namlen must be 1 or 2. * 1 implies ".", 2 implies ".." if second * char is also "." */ if (namlen == 1 && dp->d_ino == ip->i_number) continue; if (dp->d_name[1] == '.' && dp->d_ino == parentino) continue; return (0); } return (1); } static int ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino, struct vnode **dd_vp) { struct dirtemplate dirbuf; struct vnode *ddvp; int error, namlen; ASSERT_VOP_LOCKED(vp, "ufs_dir_dd_ino"); if (vp->v_type != VDIR) return (ENOTDIR); /* * First check to see if we have it in the name cache. */ if ((ddvp = vn_dir_dd_ino(vp)) != NULL) { KASSERT(ddvp->v_mount == vp->v_mount, ("ufs_dir_dd_ino: Unexpected mount point crossing")); *dd_ino = VTOI(ddvp)->i_number; *dd_vp = ddvp; return (0); } /* * Have to read the directory. */ error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL, NULL); if (error != 0) return (error); #if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(vp)) namlen = dirbuf.dotdot_type; else namlen = dirbuf.dotdot_namlen; #else namlen = dirbuf.dotdot_namlen; #endif if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') return (ENOTDIR); *dd_ino = dirbuf.dotdot_ino; *dd_vp = NULL; return (0); } /* * Check if source directory is in the path of the target directory. */ int ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino) { struct mount *mp; struct vnode *tvp, *vp, *vp1; int error; ino_t dd_ino; vp = tvp = ITOV(target); mp = vp->v_mount; *wait_ino = 0; if (target->i_number == source_ino) return (EEXIST); if (target->i_number == parent_ino) return (0); if (target->i_number == ROOTINO) return (0); for (;;) { error = ufs_dir_dd_ino(vp, cred, &dd_ino, &vp1); if (error != 0) break; if (dd_ino == source_ino) { error = EINVAL; break; } if (dd_ino == ROOTINO) break; if (dd_ino == parent_ino) break; if (vp1 == NULL) { error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1); if (error != 0) { *wait_ino = dd_ino; break; } } KASSERT(dd_ino == VTOI(vp1)->i_number, ("directory %ju reparented\n", (uintmax_t)VTOI(vp1)->i_number)); if (vp != tvp) vput(vp); vp = vp1; } if (error == ENOTDIR) panic("checkpath: .. not a directory\n"); if (vp1 != NULL) vput(vp1); if (vp != tvp) vput(vp); return (error); }