Index: head/lib/libufs/Makefile =================================================================== --- head/lib/libufs/Makefile (revision 328425) +++ head/lib/libufs/Makefile (revision 328426) @@ -1,34 +1,36 @@ # $FreeBSD$ PACKAGE=lib${LIB} LIB= ufs SHLIBDIR?= /lib SHLIB_MAJOR= 6 SRCS= block.c cgroup.c crc32.c inode.c sblock.c type.c ffs_subr.c ffs_tables.c INCS= libufs.h MAN= bread.3 cgread.3 libufs.3 sbread.3 ufs_disk_close.3 MLINKS+= bread.3 bwrite.3 MLINKS+= bread.3 berase.3 MLINKS+= cgread.3 cgread1.3 MLINKS+= cgread.3 cgget.3 MLINKS+= cgread.3 cgwrite.3 MLINKS+= cgread.3 cgwrite1.3 MLINKS+= cgread.3 cgput.3 MLINKS+= sbread.3 sbwrite.3 +MLINKS+= sbread.3 sbget.3 +MLINKS+= sbread.3 sbput.3 MLINKS+= ufs_disk_close.3 ufs_disk_fillout.3 MLINKS+= ufs_disk_close.3 ufs_disk_fillout_blank.3 MLINKS+= ufs_disk_close.3 ufs_disk_write.3 .PATH: ${SRCTOP}/sys/libkern ${SRCTOP}/sys/ufs/ffs WARNS?= 2 CFLAGS+= -D_LIBUFS .if defined(LIBUFS_DEBUG) CFLAGS+= -D_LIBUFS_DEBUGGING .endif CFLAGS+= -I${.CURDIR} .include Index: head/lib/libufs/libufs.h =================================================================== --- head/lib/libufs/libufs.h (revision 328425) +++ head/lib/libufs/libufs.h (revision 328426) @@ -1,158 +1,165 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __LIBUFS_H__ #define __LIBUFS_H__ /* * libufs structures. */ /* * userland ufs disk. */ struct uufsd { const char *d_name; /* disk name */ int d_ufs; /* decimal UFS version */ int d_fd; /* raw device file descriptor */ long d_bsize; /* device bsize */ ufs2_daddr_t d_sblock; /* superblock location */ struct csum *d_sbcsum; /* Superblock summary info */ caddr_t d_inoblock; /* inode block */ uint32_t d_inomin; /* low inode (not ino_t for ABI compat) */ uint32_t d_inomax; /* high inode (not ino_t for ABI compat) */ union { struct fs d_fs; /* filesystem information */ char d_sb[MAXBSIZE]; /* superblock as buffer */ } d_sbunion; union { struct cg d_cg; /* cylinder group */ char d_buf[MAXBSIZE]; /* cylinder group storage */ } d_cgunion; int d_ccg; /* current cylinder group */ int d_lcg; /* last cylinder group (in d_cg) */ const char *d_error; /* human readable disk error */ int d_mine; /* internal flags */ #define d_fs d_sbunion.d_fs #define d_sb d_sbunion.d_sb #define d_cg d_cgunion.d_cg }; /* * libufs macros (internal, non-exported). */ #ifdef _LIBUFS /* * Trace steps through libufs, to be used at entry and erroneous return. */ static inline void ERROR(struct uufsd *u, const char *str) { #ifdef _LIBUFS_DEBUGGING if (str != NULL) { fprintf(stderr, "libufs: %s", str); if (errno != 0) fprintf(stderr, ": %s", strerror(errno)); fprintf(stderr, "\n"); } #endif if (u != NULL) u->d_error = str; } #endif /* _LIBUFS */ __BEGIN_DECLS /* * libufs prototypes. */ /* + * ffs_subr.c + */ +void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); +void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int); +void ffs_fragacct(struct fs *, int, int32_t [], int); +int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); +int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); +void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); +int ffs_sbget(void *, struct fs **, off_t, char *, + int (*)(void *, off_t, void **, int)); +int ffs_sbput(void *, struct fs *, off_t, + int (*)(void *, off_t, void *, int)); + +/* * block.c */ ssize_t bread(struct uufsd *, ufs2_daddr_t, void *, size_t); ssize_t bwrite(struct uufsd *, ufs2_daddr_t, const void *, size_t); int berase(struct uufsd *, ufs2_daddr_t, ufs2_daddr_t); /* * cgroup.c */ ufs2_daddr_t cgballoc(struct uufsd *); int cgbfree(struct uufsd *, ufs2_daddr_t, long); ino_t cgialloc(struct uufsd *); int cgget(struct uufsd *, int, struct cg *); int cgput(struct uufsd *, struct cg *); int cgread(struct uufsd *); int cgread1(struct uufsd *, int); int cgwrite(struct uufsd *); int cgwrite1(struct uufsd *, int); /* * inode.c */ int getino(struct uufsd *, void **, ino_t, int *); int putino(struct uufsd *); /* * sblock.c */ int sbread(struct uufsd *); int sbwrite(struct uufsd *, int); +/* low level superblock read/write functions */ +int sbget(int, struct fs **, off_t); +int sbput(int, struct fs *, int); /* * type.c */ int ufs_disk_close(struct uufsd *); int ufs_disk_fillout(struct uufsd *, const char *); int ufs_disk_fillout_blank(struct uufsd *, const char *); int ufs_disk_write(struct uufsd *); - -/* - * ffs_subr.c - */ -void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); -void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int); -void ffs_fragacct(struct fs *, int, int32_t [], int); -int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); -int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); -void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); /* * crc32c.c */ uint32_t calculate_crc32c(uint32_t, const void *, size_t); __END_DECLS #endif /* __LIBUFS_H__ */ Index: head/lib/libufs/sblock.c =================================================================== --- head/lib/libufs/sblock.c (revision 328425) +++ head/lib/libufs/sblock.c (revision 328426) @@ -1,171 +1,202 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include -static int superblocks[] = SBLOCKSEARCH; - int sbread(struct uufsd *disk) { - uint8_t block[MAXBSIZE]; struct fs *fs; - int sb, superblock; - int i, size, blks; - uint8_t *space; ERROR(disk, NULL); - fs = &disk->d_fs; - superblock = superblocks[0]; - - for (sb = 0; (superblock = superblocks[sb]) != -1; sb++) { - if (bread(disk, superblock, disk->d_sb, SBLOCKSIZE) == -1) { + if ((errno = sbget(disk->d_fd, &fs, -1)) != 0) { + switch (errno) { + case EIO: ERROR(disk, "non-existent or truncated superblock"); - return (-1); + break; + case ENOENT: + ERROR(disk, "no usable known superblock found"); + break; + case ENOSPC: + ERROR(disk, "failed to allocate space for superblock " + "information"); + break; + case EINVAL: + ERROR(disk, "The previous newfs operation on this " + "volume did not complete.\nYou must complete " + "newfs before using this volume."); + break; + default: + ERROR(disk, "unknown superblock read error"); + errno = EIO; + break; } - if (fs->fs_magic == FS_UFS1_MAGIC) - disk->d_ufs = 1; - if (fs->fs_magic == FS_UFS2_MAGIC && - fs->fs_sblockloc == superblock) - disk->d_ufs = 2; - if (fs->fs_bsize <= MAXBSIZE && - (size_t)fs->fs_bsize >= sizeof(*fs)) { - if (disk->d_ufs) - break; - } disk->d_ufs = 0; - } - if (superblock == -1 || disk->d_ufs == 0) { - /* - * Other error cases will result in errno being set, here we - * must set it to indicate no superblock could be found with - * which to associate this disk/filesystem. - */ - ERROR(disk, "no usable known superblock found"); - errno = ENOENT; return (-1); } + memcpy(&disk->d_fs, fs, fs->fs_sbsize); + free(fs); + fs = &disk->d_fs; + if (fs->fs_magic == FS_UFS1_MAGIC) + disk->d_ufs = 1; + if (fs->fs_magic == FS_UFS2_MAGIC) + disk->d_ufs = 2; disk->d_bsize = fs->fs_fsize / fsbtodb(fs, 1); - disk->d_sblock = superblock / disk->d_bsize; - /* - * Read in the superblock summary information. - */ - size = fs->fs_cssize; - blks = howmany(size, fs->fs_fsize); - size += fs->fs_ncg * sizeof(int32_t); - space = malloc(size); - if (space == NULL) { - ERROR(disk, "failed to allocate space for summary information"); - return (-1); - } - fs->fs_csp = (struct csum *)space; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - if (bread(disk, fsbtodb(fs, fs->fs_csaddr + i), block, size) - == -1) { - ERROR(disk, "Failed to read sb summary information"); - free(fs->fs_csp); - return (-1); - } - bcopy(block, space, size); - space += size; - } - fs->fs_maxcluster = (uint32_t *)space; + disk->d_sblock = fs->fs_sblockloc / disk->d_bsize; disk->d_sbcsum = fs->fs_csp; - return (0); } int sbwrite(struct uufsd *disk, int all) { struct fs *fs; - int blks, size; - uint8_t *space; - unsigned i; ERROR(disk, NULL); fs = &disk->d_fs; - - if (!disk->d_sblock) { - disk->d_sblock = disk->d_fs.fs_sblockloc / disk->d_bsize; - } - - if (bwrite(disk, disk->d_sblock, fs, SBLOCKSIZE) == -1) { - ERROR(disk, "failed to write superblock"); + if ((errno = sbput(disk->d_fd, fs, all ? fs->fs_ncg : 0)) != 0) { + switch (errno) { + case EIO: + ERROR(disk, "failed to write superblock"); + break; + default: + ERROR(disk, "unknown superblock write error"); + errno = EIO; + break; + } return (-1); } - /* - * Write superblock summary information. - */ - blks = howmany(fs->fs_cssize, fs->fs_fsize); - space = (uint8_t *)disk->d_sbcsum; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - if (bwrite(disk, fsbtodb(fs, fs->fs_csaddr + i), space, size) - == -1) { - ERROR(disk, "Failed to write sb summary information"); + return (0); +} + +/* + * These are the low-level functions that actually read and write + * the superblock and its associated data. The actual work is done by + * the functions ffs_sbget and ffs_sbput in /sys/ufs/ffs/ffs_subr.c. + */ +static int use_pread(void *devfd, off_t loc, void **bufp, int size); +static int use_pwrite(void *devfd, off_t loc, void *buf, int size); + +/* + * Read a superblock from the devfd device allocating memory returned + * in fsp. Also read the superblock summary information. + */ +int +sbget(int devfd, struct fs **fsp, off_t sblockloc) +{ + + return (ffs_sbget(&devfd, fsp, sblockloc, "user", use_pread)); +} + +/* + * A read function for use by user-level programs using libufs. + */ +static int +use_pread(void *devfd, off_t loc, void **bufp, int size) +{ + int fd; + + fd = *(int *)devfd; + if ((*bufp = malloc(size)) == NULL) + return (ENOSPC); + if (pread(fd, *bufp, size, loc) != size) + return (EIO); + return (0); +} + +/* + * Write a superblock to the devfd device from the memory pointed to by fs. + * Also write out the superblock summary information but do not free the + * summary information memory. + * + * Additionally write out numaltwrite of the alternate superblocks. Use + * fs->fs_ncg to write out all of the alternate superblocks. + */ +int +sbput(int devfd, struct fs *fs, int numaltwrite) +{ + struct csum *savedcsp; + off_t savedactualloc; + int i, error; + + if ((error = ffs_sbput(&devfd, fs, fs->fs_sblockactualloc, + use_pwrite)) != 0) + return (error); + if (numaltwrite == 0) + return (0); + savedactualloc = fs->fs_sblockactualloc; + savedcsp = fs->fs_csp; + fs->fs_csp = NULL; + for (i = 0; i < numaltwrite; i++) { + fs->fs_sblockactualloc = dbtob(fsbtodb(fs, cgsblock(fs, i))); + if ((error = ffs_sbput(&devfd, fs, fs->fs_sblockactualloc, + use_pwrite)) != 0) { + fs->fs_sblockactualloc = savedactualloc; + fs->fs_csp = savedcsp; return (-1); } - space += size; } - if (all) { - for (i = 0; i < fs->fs_ncg; i++) - if (bwrite(disk, fsbtodb(fs, cgsblock(fs, i)), - fs, SBLOCKSIZE) == -1) { - ERROR(disk, "failed to update a superblock"); - return (-1); - } - } + fs->fs_sblockactualloc = savedactualloc; + fs->fs_csp = savedcsp; + return (0); +} + +/* + * A write function for use by user-level programs using sbput in libufs. + */ +static int +use_pwrite(void *devfd, off_t loc, void *buf, int size) +{ + int fd; + + fd = *(int *)devfd; + if (pwrite(fd, buf, size, loc) != size) + return (EIO); return (0); } Index: head/lib/libufs/sbread.3 =================================================================== --- head/lib/libufs/sbread.3 (revision 328425) +++ head/lib/libufs/sbread.3 (revision 328426) @@ -1,81 +1,145 @@ .\" Author: Juli Mallett .\" Date: June 04, 2003 .\" Description: .\" Manual page for libufs functions: +.\" sbget(3) +.\" sbput(3) .\" sbread(3) .\" sbwrite(3) .\" .\" This file is in the public domain. .\" .\" $FreeBSD$ .\" -.Dd June 4, 2003 +.Dd January 19, 2018 .Dt SBREAD 3 .Os .Sh NAME -.Nm sbread , sbwrite +.Nm sbget , sbput , sbread , sbwrite .Nd read and write superblocks of a UFS file system .Sh LIBRARY .Lb libufs .Sh SYNOPSIS .In sys/param.h .In sys/mount.h .In ufs/ufs/ufsmount.h .In ufs/ufs/dinode.h .In ufs/ffs/fs.h .In libufs.h .Ft int +.Fn sbget "int devfd" "struct fs **fsp" "off_t sblockloc" +.Ft int +.Fn sbput "int devfd" "struct fs *fs" "int numaltwrite" +.Ft int .Fn sbread "struct uufsd *disk" .Ft int .Fn sbwrite "struct uufsd *disk" "int all" .Sh DESCRIPTION The +.Fn sbget +and .Fn sbread +functions provide superblock reads for +.Xr libufs 3 +consumers. +The +.Fn sbput and .Fn sbwrite -functions provide superblock reads and writes for +functions provide superblock writes for .Xr libufs 3 consumers. +.Pp The +.Fn sbget +function first allocates a buffer to hold the superblock. +Using the +.Va devfd +file descriptor that references the filesystem disk, +.Fn sbget +reads the superblock located at the byte offset specified by +.Va sblockloc +into the allocated buffer. +If successful, it returns a pointer to the buffer containing the superblock in +.Va fsp . +The +.Fn sbget +function is safe to use in threaded applications. +.Pp +The +.Fn sbput +function writes the superblock specified by +.Va fs +to the location from which it was read on the disk referenced by the +.Va devfd +file descriptor. +Additionally, the +.Fn sbput +function will update the first +.Va numaltwrite +alternate superblock locations. +To update all the alternate superblocks, +specify a +.Va numaltwrite +value of +.Va fs->fs_ncg . +The +.Fn sbput +function is safe to use in threaded applications. +Note that the +.Fn sbput +function needs to be called only if the superblock has been +modified and the on-disk copy needs to be updated. +.Pp +The .Fn sbread -and +function reads the standard filesystem superblock into the +.Va d_sb , +structure embedded in the given user-land UFS disk structure. +.Pp +The .Fn sbwrite -functions operate on the superblock field, +function writes the superblock from the .Va d_sb , -associated with a given userland UFS disk structure. +structure embedded in the given user-land UFS disk structure +to the location from which it was read. Additionally, the .Fn sbwrite -function will write to all superblock locations if the +function will write to all the alternate superblock locations if the .Fa all value is non-zero. .Sh RETURN VALUES -.Rv -std sbread sbwrite +.Rv -std sbget sbput sbread sbwrite .Sh ERRORS -The function +The +.Fn sbget +and .Fn sbread -may fail and set +functions may fail and set .Va errno for any of the errors specified for the library function .Xr bread 3 . Additionally, it may follow the .Xr libufs 3 error methodologies in situations where no usable superblock could be found. .Pp -The function +The +.Fn sbput +and .Fn sbwrite -may fail and set +functions may fail and set .Va errno for any of the errors specified for the library function .Xr bwrite 3 . .Sh SEE ALSO .Xr bread 3 , .Xr bwrite 3 , .Xr libufs 3 .Sh HISTORY These functions first appeared as part of .Xr libufs 3 in .Fx 5.0 . .Sh AUTHORS .An Juli Mallett Aq Mt jmallett@FreeBSD.org Index: head/sbin/clri/Makefile =================================================================== --- head/sbin/clri/Makefile (revision 328425) +++ head/sbin/clri/Makefile (revision 328426) @@ -1,9 +1,10 @@ # @(#)Makefile 8.1 (Berkeley) 6/5/93 # $FreeBSD$ PACKAGE=runtime PROG= clri MAN= clri.8 +LIBADD= ufs WARNS?= 2 .include Index: head/sbin/clri/clri.c =================================================================== --- head/sbin/clri/clri.c (revision 328425) +++ head/sbin/clri/clri.c (revision 328426) @@ -1,161 +1,152 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rich $alz of BBN Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1990, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)clri.c 8.2 (Berkeley) 9/23/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include +#include #include +#include #include #include #include #include -/* - * Possible superblock locations ordered from most to least likely. - */ -static int sblock_try[] = SBLOCKSEARCH; - static void usage(void) { (void)fprintf(stderr, "usage: clri special_device inode_number ...\n"); exit(1); } int main(int argc, char *argv[]) { - struct fs *sbp; + struct fs *fs; struct ufs1_dinode *dp1; struct ufs2_dinode *dp2; char *ibuf[MAXBSIZE]; long generation, bsize; off_t offset; - int i, fd, inonum; - char *fs, sblock[SBLOCKSIZE]; + int fd, ret, inonum; + char *fsname; void *v = ibuf; if (argc < 3) usage(); - fs = *++argv; - sbp = NULL; + fsname = *++argv; /* get the superblock. */ - if ((fd = open(fs, O_RDWR, 0)) < 0) - err(1, "%s", fs); - for (i = 0; sblock_try[i] != -1; i++) { - if (lseek(fd, (off_t)(sblock_try[i]), SEEK_SET) < 0) - err(1, "%s", fs); - if (read(fd, sblock, sizeof(sblock)) != sizeof(sblock)) - errx(1, "%s: can't read superblock", fs); - sbp = (struct fs *)sblock; - if ((sbp->fs_magic == FS_UFS1_MAGIC || - (sbp->fs_magic == FS_UFS2_MAGIC && - sbp->fs_sblockloc == sblock_try[i])) && - sbp->fs_bsize <= MAXBSIZE && - sbp->fs_bsize >= (int)sizeof(struct fs)) - break; + if ((fd = open(fsname, O_RDWR, 0)) < 0) + err(1, "%s", fsname); + if ((ret = sbget(fd, &fs, -1)) != 0) { + switch (ret) { + case ENOENT: + warn("Cannot find file system superblock"); + return (1); + default: + warn("Unable to read file system superblock"); + return (1); + } } - if (sblock_try[i] == -1) - errx(2, "cannot find file system superblock"); - bsize = sbp->fs_bsize; + bsize = fs->fs_bsize; /* remaining arguments are inode numbers. */ while (*++argv) { /* get the inode number. */ if ((inonum = atoi(*argv)) <= 0) errx(1, "%s is not a valid inode number", *argv); (void)printf("clearing %d\n", inonum); /* read in the appropriate block. */ - offset = ino_to_fsba(sbp, inonum); /* inode to fs blk */ - offset = fsbtodb(sbp, offset); /* fs blk disk blk */ + offset = ino_to_fsba(fs, inonum); /* inode to fs blk */ + offset = fsbtodb(fs, offset); /* fs blk disk blk */ offset *= DEV_BSIZE; /* disk blk to bytes */ /* seek and read the block */ if (lseek(fd, offset, SEEK_SET) < 0) - err(1, "%s", fs); + err(1, "%s", fsname); if (read(fd, ibuf, bsize) != bsize) - err(1, "%s", fs); + err(1, "%s", fsname); - if (sbp->fs_magic == FS_UFS2_MAGIC) { + if (fs->fs_magic == FS_UFS2_MAGIC) { /* get the inode within the block. */ dp2 = &(((struct ufs2_dinode *)v) - [ino_to_fsbo(sbp, inonum)]); + [ino_to_fsbo(fs, inonum)]); /* clear the inode, and bump the generation count. */ generation = dp2->di_gen + 1; memset(dp2, 0, sizeof(*dp2)); dp2->di_gen = generation; } else { /* get the inode within the block. */ dp1 = &(((struct ufs1_dinode *)v) - [ino_to_fsbo(sbp, inonum)]); + [ino_to_fsbo(fs, inonum)]); /* clear the inode, and bump the generation count. */ generation = dp1->di_gen + 1; memset(dp1, 0, sizeof(*dp1)); dp1->di_gen = generation; } /* backup and write the block */ if (lseek(fd, (off_t)-bsize, SEEK_CUR) < 0) - err(1, "%s", fs); + err(1, "%s", fsname); if (write(fd, ibuf, bsize) != bsize) - err(1, "%s", fs); + err(1, "%s", fsname); (void)fsync(fd); } (void)close(fd); exit(0); } Index: head/sbin/dump/Makefile =================================================================== --- head/sbin/dump/Makefile (revision 328425) +++ head/sbin/dump/Makefile (revision 328426) @@ -1,25 +1,26 @@ # @(#)Makefile 8.1 (Berkeley) 6/5/93 # $FreeBSD$ # dump.h header file # itime.c reads /etc/dumpdates # main.c driver # optr.c operator interface # dumprmt.c handles remote tape via rmt(8) # tape.c handles the mag tape and opening/closing # traverse.c traverses the file system # unctime.c undo ctime # # DEBUG use local directory to find ddate and dumpdates # TDEBUG trace out the process forking PACKAGE=runtime PROG= dump LINKS= ${BINDIR}/dump ${BINDIR}/rdump CFLAGS+=-DRDUMP SRCS= itime.c main.c optr.c dumprmt.c tape.c traverse.c unctime.c cache.c MAN= dump.8 +LIBADD= ufs MLINKS= dump.8 rdump.8 WARNS?= 2 .include Index: head/sbin/dump/dump.h =================================================================== --- head/sbin/dump/dump.h (revision 328425) +++ head/sbin/dump/dump.h (revision 328426) @@ -1,185 +1,184 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)dump.h 8.2 (Berkeley) 4/28/95 * * $FreeBSD$ */ /* * Dump maps used to describe what is to be dumped. */ int mapsize; /* size of the state maps */ char *usedinomap; /* map of allocated inodes */ char *dumpdirmap; /* map of directories to be dumped */ char *dumpinomap; /* map of files to be dumped */ /* * Map manipulation macros. */ #define SETINO(ino, map) \ map[(u_int)((ino) - 1) / CHAR_BIT] |= \ 1 << ((u_int)((ino) - 1) % CHAR_BIT) #define CLRINO(ino, map) \ map[(u_int)((ino) - 1) / CHAR_BIT] &= \ ~(1 << ((u_int)((ino) - 1) % CHAR_BIT)) #define TSTINO(ino, map) \ (map[(u_int)((ino) - 1) / CHAR_BIT] & \ (1 << ((u_int)((ino) - 1) % CHAR_BIT))) /* * All calculations done in 0.1" units! */ char *disk; /* name of the disk file */ char *tape; /* name of the tape file */ char *popenout; /* popen(3) per-"tape" command */ char *dumpdates; /* name of the file containing dump date information*/ char *temp; /* name of the file for doing rewrite of dumpdates */ int lastlevel; /* dump level of previous dump */ int level; /* dump level of this dump */ int uflag; /* update flag */ int diskfd; /* disk file descriptor */ int tapefd; /* tape file descriptor */ int pipeout; /* true => output to standard output */ ino_t curino; /* current inumber; used globally */ int newtape; /* new tape flag */ int density; /* density in 0.1" units */ long tapesize; /* estimated tape size, blocks */ long tsize; /* tape size in 0.1" units */ long asize; /* number of 0.1" units written on current tape */ int etapes; /* estimated number of tapes */ int nonodump; /* if set, do not honor UF_NODUMP user flags */ int unlimited; /* if set, write to end of medium */ int cachesize; /* size of block cache in bytes */ int rsync_friendly; /* be friendly with rsync */ int notify; /* notify operator flag */ int blockswritten; /* number of blocks written on current tape */ int tapeno; /* current tape number */ time_t tstart_writing; /* when started writing the first tape block */ time_t tend_writing; /* after writing the last tape block */ int passno; /* current dump pass number */ struct fs *sblock; /* the file system super block */ -char sblock_buf[MAXBSIZE]; long dev_bsize; /* block size of underlying disk device */ int dev_bshift; /* log2(dev_bsize) */ int tp_bshift; /* log2(TP_BSIZE) */ /* operator interface functions */ void broadcast(const char *message); void infosch(int); void lastdump(int arg); /* int should be char */ void msg(const char *fmt, ...) __printflike(1, 2); void msgtail(const char *fmt, ...) __printflike(1, 2); int query(const char *question); void quit(const char *fmt, ...) __printflike(1, 2); void timeest(void); time_t unctime(char *str); /* mapping rouintes */ union dinode; int mapfiles(ino_t maxino, long *tapesize); int mapdirs(ino_t maxino, long *tapesize); /* file dumping routines */ void blkread(ufs2_daddr_t blkno, char *buf, int size); ssize_t cread(int fd, void *buf, size_t nbytes, off_t offset); void dumpino(union dinode *dp, ino_t ino); void dumpmap(char *map, int type, ino_t ino); void writeheader(ino_t ino); /* tape writing routines */ int alloctape(void); void close_rewind(void); void dumpblock(ufs2_daddr_t blkno, int size); void startnewtape(int top); void trewind(void); void writerec(char *dp, int isspcl); void Exit(int status) __dead2; void dumpabort(int signo) __dead2; void dump_getfstab(void); char *rawname(char *cp); union dinode *getinode(ino_t inum, int *mode); /* rdump routines */ #ifdef RDUMP void rmtclose(void); int rmthost(const char *host); int rmtopen(const char *tape, int mode); int rmtwrite(const char *buf, int count); #endif /* RDUMP */ void interrupt(int signo); /* in case operator bangs on console */ /* * Exit status codes */ #define X_FINOK 0 /* normal exit */ #define X_STARTUP 1 /* startup error */ #define X_REWRITE 2 /* restart writing from the check point */ #define X_ABORT 3 /* abort dump; don't attempt checkpointing */ #define OPGRENT "operator" /* group entry to notify */ struct fstab *fstabsearch(const char *key); /* search fs_file and fs_spec */ #ifndef NAME_MAX #define NAME_MAX 255 #endif /* * The contents of the file _PATH_DUMPDATES is maintained both on * a linked list, and then (eventually) arrayified. */ struct dumpdates { char dd_name[NAME_MAX+3]; int dd_level; time_t dd_ddate; }; int nddates; /* number of records (might be zero) */ struct dumpdates **ddatev; /* the arrayfied version */ void initdumptimes(void); void getdumptime(void); void putdumptime(void); #define ITITERATE(i, ddp) \ if (ddatev != NULL) \ for (ddp = ddatev[i = 0]; i < nddates; ddp = ddatev[++i]) #define DUMPFMTLEN 53 /* max device pathname length */ #define DUMPOUTFMT "%-*s %d %s" /* for printf */ /* name, level, ctime(date) */ #define DUMPINFMT "%s %d %[^\n]\n" /* inverse for scanf */ void sig(int signo); #ifndef _PATH_FSTAB #define _PATH_FSTAB "/etc/fstab" #endif Index: head/sbin/dump/main.c =================================================================== --- head/sbin/dump/main.c (revision 328425) +++ head/sbin/dump/main.c (revision 328426) @@ -1,780 +1,773 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1991, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/1/95"; #endif static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include "dump.h" #include "pathnames.h" int notify = 0; /* notify operator flag */ int snapdump = 0; /* dumping live filesystem, so use snapshot */ int blockswritten = 0; /* number of blocks written on current tape */ int tapeno = 0; /* current tape number */ int density = 0; /* density in bytes/0.1" " <- this is for hilit19 */ int ntrec = NTREC; /* # tape blocks in each tape record */ int cartridge = 0; /* Assume non-cartridge tape */ int cachesize = 0; /* block cache size (in bytes), defaults to 0 */ long dev_bsize = 1; /* recalculated below */ long blocksperfile; /* output blocks per file */ char *host = NULL; /* remote host (if any) */ -/* - * Possible superblock locations ordered from most to least likely. - */ -static int sblock_try[] = SBLOCKSEARCH; - static char *getmntpt(char *, int *); static long numarg(const char *, long, long); static void obsolete(int *, char **[]); static void usage(void) __dead2; int main(int argc, char *argv[]) { struct stat sb; ino_t ino; int dirty; union dinode *dp; struct fstab *dt; char *map, *mntpt; int ch, mode, mntflags; - int i, anydirskipped, bflag = 0, Tflag = 0, honorlevel = 1; + int i, ret, anydirskipped, bflag = 0, Tflag = 0, honorlevel = 1; int just_estimate = 0; ino_t maxino; char *tmsg; spcl.c_date = _time_to_time64(time(NULL)); tsize = 0; /* Default later, based on 'c' option for cart tapes */ dumpdates = _PATH_DUMPDATES; popenout = NULL; tape = NULL; temp = _PATH_DTMP; if (TP_BSIZE / DEV_BSIZE == 0 || TP_BSIZE % DEV_BSIZE != 0) quit("TP_BSIZE must be a multiple of DEV_BSIZE\n"); level = 0; rsync_friendly = 0; if (argc < 2) usage(); obsolete(&argc, &argv); while ((ch = getopt(argc, argv, "0123456789aB:b:C:cD:d:f:h:LnP:RrSs:T:uWw")) != -1) switch (ch) { /* dump level */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': level = 10 * level + ch - '0'; break; case 'a': /* `auto-size', Write to EOM. */ unlimited = 1; break; case 'B': /* blocks per output file */ blocksperfile = numarg("number of blocks per file", 1L, 0L); break; case 'b': /* blocks per tape write */ ntrec = numarg("number of blocks per write", 1L, 1000L); break; case 'C': cachesize = numarg("cachesize", 0, 0) * 1024 * 1024; break; case 'c': /* Tape is cart. not 9-track */ cartridge = 1; break; case 'D': dumpdates = optarg; break; case 'd': /* density, in bits per inch */ density = numarg("density", 10L, 327670L) / 10; if (density >= 625 && !bflag) ntrec = HIGHDENSITYTREC; break; case 'f': /* output file */ if (popenout != NULL) errx(X_STARTUP, "You cannot use the P and f " "flags together.\n"); tape = optarg; break; case 'h': honorlevel = numarg("honor level", 0L, 10L); break; case 'L': snapdump = 1; break; case 'n': /* notify operators */ notify = 1; break; case 'P': if (tape != NULL) errx(X_STARTUP, "You cannot use the P and f " "flags together.\n"); popenout = optarg; break; case 'r': /* store slightly less data to be friendly to rsync */ if (rsync_friendly < 1) rsync_friendly = 1; break; case 'R': /* store even less data to be friendlier to rsync */ if (rsync_friendly < 2) rsync_friendly = 2; break; case 'S': /* exit after estimating # of tapes */ just_estimate = 1; break; case 's': /* tape size, feet */ tsize = numarg("tape size", 1L, 0L) * 12 * 10; break; case 'T': /* time of last dump */ spcl.c_ddate = unctime(optarg); if (spcl.c_ddate < 0) { (void)fprintf(stderr, "bad time \"%s\"\n", optarg); exit(X_STARTUP); } Tflag = 1; lastlevel = -1; break; case 'u': /* update /etc/dumpdates */ uflag = 1; break; case 'W': /* what to do */ case 'w': lastdump(ch); exit(X_FINOK); /* do nothing else */ default: usage(); } argc -= optind; argv += optind; if (argc < 1) { (void)fprintf(stderr, "Must specify disk or file system\n"); exit(X_STARTUP); } disk = *argv++; argc--; if (argc >= 1) { (void)fprintf(stderr, "Unknown arguments to dump:"); while (argc--) (void)fprintf(stderr, " %s", *argv++); (void)fprintf(stderr, "\n"); exit(X_STARTUP); } if (rsync_friendly && (level > 0)) { (void)fprintf(stderr, "%s %s\n", "rsync friendly options", "can be used only with level 0 dumps."); exit(X_STARTUP); } if (Tflag && uflag) { (void)fprintf(stderr, "You cannot use the T and u flags together.\n"); exit(X_STARTUP); } if (popenout) { tape = "child pipeline process"; } else if (tape == NULL && (tape = getenv("TAPE")) == NULL) tape = _PATH_DEFTAPE; if (strcmp(tape, "-") == 0) { pipeout++; tape = "standard output"; } if (blocksperfile) blocksperfile = rounddown(blocksperfile, ntrec); else if (!unlimited) { /* * Determine how to default tape size and density * * density tape size * 9-track 1600 bpi (160 bytes/.1") 2300 ft. * 9-track 6250 bpi (625 bytes/.1") 2300 ft. * cartridge 8000 bpi (100 bytes/.1") 1700 ft. * (450*4 - slop) * hilit19 hits again: " */ if (density == 0) density = cartridge ? 100 : 160; if (tsize == 0) tsize = cartridge ? 1700L*120L : 2300L*120L; } if (strchr(tape, ':')) { host = tape; tape = strchr(host, ':'); *tape++ = '\0'; #ifdef RDUMP if (strchr(tape, '\n')) { (void)fprintf(stderr, "invalid characters in tape\n"); exit(X_STARTUP); } if (rmthost(host) == 0) exit(X_STARTUP); #else (void)fprintf(stderr, "remote dump not enabled\n"); exit(X_STARTUP); #endif } (void)setuid(getuid()); /* rmthost() is the only reason to be setuid */ if (signal(SIGHUP, SIG_IGN) != SIG_IGN) signal(SIGHUP, sig); if (signal(SIGTRAP, SIG_IGN) != SIG_IGN) signal(SIGTRAP, sig); if (signal(SIGFPE, SIG_IGN) != SIG_IGN) signal(SIGFPE, sig); if (signal(SIGBUS, SIG_IGN) != SIG_IGN) signal(SIGBUS, sig); if (signal(SIGSEGV, SIG_IGN) != SIG_IGN) signal(SIGSEGV, sig); if (signal(SIGTERM, SIG_IGN) != SIG_IGN) signal(SIGTERM, sig); if (signal(SIGINT, interrupt) == SIG_IGN) signal(SIGINT, SIG_IGN); dump_getfstab(); /* /etc/fstab snarfed */ /* * disk can be either the full special file name, * the suffix of the special file name, * the special name missing the leading '/', * the file system name with or without the leading '/'. */ dt = fstabsearch(disk); if (dt != NULL) { disk = rawname(dt->fs_spec); if (disk == NULL) errx(X_STARTUP, "%s: unknown file system", dt->fs_spec); (void)strncpy(spcl.c_dev, dt->fs_spec, NAMELEN); (void)strncpy(spcl.c_filesys, dt->fs_file, NAMELEN); } else { (void)strncpy(spcl.c_dev, disk, NAMELEN); (void)strncpy(spcl.c_filesys, "an unlisted file system", NAMELEN); } spcl.c_dev[NAMELEN-1]='\0'; spcl.c_filesys[NAMELEN-1]='\0'; if ((mntpt = getmntpt(disk, &mntflags)) != NULL) { if (mntflags & MNT_RDONLY) { if (snapdump != 0) { msg("WARNING: %s\n", "-L ignored for read-only filesystem."); snapdump = 0; } } else if (snapdump == 0) { msg("WARNING: %s\n", "should use -L when dumping live read-write " "filesystems!"); } else { char snapname[BUFSIZ], snapcmd[BUFSIZ]; snprintf(snapname, sizeof snapname, "%s/.snap", mntpt); if ((stat(snapname, &sb) < 0) || !S_ISDIR(sb.st_mode)) { msg("WARNING: %s %s\n", "-L requested but snapshot location", snapname); msg(" %s: %s\n", "is not a directory", "dump downgraded, -L ignored"); snapdump = 0; } else { snprintf(snapname, sizeof snapname, "%s/.snap/dump_snapshot", mntpt); snprintf(snapcmd, sizeof snapcmd, "%s %s %s", _PATH_MKSNAP_FFS, mntpt, snapname); unlink(snapname); if (system(snapcmd) != 0) errx(X_STARTUP, "Cannot create %s: %s\n", snapname, strerror(errno)); if ((diskfd = open(snapname, O_RDONLY)) < 0) { unlink(snapname); errx(X_STARTUP, "Cannot open %s: %s\n", snapname, strerror(errno)); } unlink(snapname); if (fstat(diskfd, &sb) != 0) err(X_STARTUP, "%s: stat", snapname); spcl.c_date = _time_to_time64(sb.st_mtime); } } } else if (snapdump != 0) { msg("WARNING: Cannot use -L on an unmounted filesystem.\n"); snapdump = 0; } if (snapdump == 0) { if ((diskfd = open(disk, O_RDONLY)) < 0) err(X_STARTUP, "Cannot open %s", disk); if (fstat(diskfd, &sb) != 0) err(X_STARTUP, "%s: stat", disk); if (S_ISDIR(sb.st_mode)) errx(X_STARTUP, "%s: unknown file system", disk); } (void)strcpy(spcl.c_label, "none"); (void)gethostname(spcl.c_host, NAMELEN); spcl.c_level = level; spcl.c_type = TS_TAPE; if (rsync_friendly) { /* don't store real dump times */ spcl.c_date = 0; spcl.c_ddate = 0; } if (spcl.c_date == 0) { tmsg = "the epoch\n"; } else { time_t t = _time64_to_time(spcl.c_date); tmsg = ctime(&t); } msg("Date of this level %d dump: %s", level, tmsg); if (!Tflag && (!rsync_friendly)) getdumptime(); /* /etc/dumpdates snarfed */ if (spcl.c_ddate == 0) { tmsg = "the epoch\n"; } else { time_t t = _time64_to_time(spcl.c_ddate); tmsg = ctime(&t); } if (lastlevel < 0) msg("Date of last (level unknown) dump: %s", tmsg); else msg("Date of last level %d dump: %s", lastlevel, tmsg); msg("Dumping %s%s ", snapdump ? "snapshot of ": "", disk); if (dt != NULL) msgtail("(%s) ", dt->fs_file); if (host) msgtail("to %s on host %s\n", tape, host); else msgtail("to %s\n", tape); sync(); - sblock = (struct fs *)sblock_buf; - for (i = 0; sblock_try[i] != -1; i++) { - sblock->fs_fsize = SBLOCKSIZE; /* needed in blkread */ - blkread(sblock_try[i]>>dev_bshift, (char *) sblock, SBLOCKSIZE); - if ((sblock->fs_magic == FS_UFS1_MAGIC || - (sblock->fs_magic == FS_UFS2_MAGIC && - sblock->fs_sblockloc == sblock_try[i])) && - sblock->fs_bsize <= MAXBSIZE && - sblock->fs_bsize >= sizeof(struct fs)) - break; + if ((ret = sbget(diskfd, &sblock, -1)) != 0) { + switch (ret) { + case ENOENT: + warn("Cannot find file system superblock"); + return (1); + default: + warn("Unable to read file system superblock"); + return (1); + } } - if (sblock_try[i] == -1) - quit("Cannot find file system superblock\n"); dev_bsize = sblock->fs_fsize / fsbtodb(sblock, 1); dev_bshift = ffs(dev_bsize) - 1; if (dev_bsize != (1 << dev_bshift)) quit("dev_bsize (%ld) is not a power of 2", dev_bsize); tp_bshift = ffs(TP_BSIZE) - 1; if (TP_BSIZE != (1 << tp_bshift)) quit("TP_BSIZE (%d) is not a power of 2", TP_BSIZE); maxino = sblock->fs_ipg * sblock->fs_ncg; mapsize = roundup(howmany(maxino, CHAR_BIT), TP_BSIZE); usedinomap = (char *)calloc((unsigned) mapsize, sizeof(char)); dumpdirmap = (char *)calloc((unsigned) mapsize, sizeof(char)); dumpinomap = (char *)calloc((unsigned) mapsize, sizeof(char)); tapesize = 3 * (howmany(mapsize * sizeof(char), TP_BSIZE) + 1); nonodump = spcl.c_level < honorlevel; passno = 1; setproctitle("%s: pass 1: regular files", disk); msg("mapping (Pass I) [regular files]\n"); anydirskipped = mapfiles(maxino, &tapesize); passno = 2; setproctitle("%s: pass 2: directories", disk); msg("mapping (Pass II) [directories]\n"); while (anydirskipped) { anydirskipped = mapdirs(maxino, &tapesize); } if (pipeout || unlimited) { tapesize += 10; /* 10 trailer blocks */ msg("estimated %ld tape blocks.\n", tapesize); } else { double fetapes; if (blocksperfile) fetapes = (double) tapesize / blocksperfile; else if (cartridge) { /* Estimate number of tapes, assuming streaming stops at the end of each block written, and not in mid-block. Assume no erroneous blocks; this can be compensated for with an artificially low tape size. */ fetapes = ( (double) tapesize /* blocks */ * TP_BSIZE /* bytes/block */ * (1.0/density) /* 0.1" / byte " */ + (double) tapesize /* blocks */ * (1.0/ntrec) /* streaming-stops per block */ * 15.48 /* 0.1" / streaming-stop " */ ) * (1.0 / tsize ); /* tape / 0.1" " */ } else { /* Estimate number of tapes, for old fashioned 9-track tape */ int tenthsperirg = (density == 625) ? 3 : 7; fetapes = ( (double) tapesize /* blocks */ * TP_BSIZE /* bytes / block */ * (1.0/density) /* 0.1" / byte " */ + (double) tapesize /* blocks */ * (1.0/ntrec) /* IRG's / block */ * tenthsperirg /* 0.1" / IRG " */ ) * (1.0 / tsize ); /* tape / 0.1" " */ } etapes = fetapes; /* truncating assignment */ etapes++; /* count the dumped inodes map on each additional tape */ tapesize += (etapes - 1) * (howmany(mapsize * sizeof(char), TP_BSIZE) + 1); tapesize += etapes + 10; /* headers + 10 trailer blks */ msg("estimated %ld tape blocks on %3.2f tape(s).\n", tapesize, fetapes); } /* * If the user only wants an estimate of the number of * tapes, exit now. */ if (just_estimate) exit(0); /* * Allocate tape buffer. */ if (!alloctape()) quit( "can't allocate tape buffers - try a smaller blocking factor.\n"); startnewtape(1); (void)time((time_t *)&(tstart_writing)); dumpmap(usedinomap, TS_CLRI, maxino - 1); passno = 3; setproctitle("%s: pass 3: directories", disk); msg("dumping (Pass III) [directories]\n"); dirty = 0; /* XXX just to get gcc to shut up */ for (map = dumpdirmap, ino = 1; ino < maxino; ino++) { if (((ino - 1) % CHAR_BIT) == 0) /* map is offset by 1 */ dirty = *map++; else dirty >>= 1; if ((dirty & 1) == 0) continue; /* * Skip directory inodes deleted and maybe reallocated */ dp = getinode(ino, &mode); if (mode != IFDIR) continue; (void)dumpino(dp, ino); } passno = 4; setproctitle("%s: pass 4: regular files", disk); msg("dumping (Pass IV) [regular files]\n"); for (map = dumpinomap, ino = 1; ino < maxino; ino++) { if (((ino - 1) % CHAR_BIT) == 0) /* map is offset by 1 */ dirty = *map++; else dirty >>= 1; if ((dirty & 1) == 0) continue; /* * Skip inodes deleted and reallocated as directories. */ dp = getinode(ino, &mode); if (mode == IFDIR) continue; (void)dumpino(dp, ino); } (void)time((time_t *)&(tend_writing)); spcl.c_type = TS_END; for (i = 0; i < ntrec; i++) writeheader(maxino - 1); if (pipeout) msg("DUMP: %jd tape blocks\n", (intmax_t)spcl.c_tapea); else msg("DUMP: %jd tape blocks on %d volume%s\n", (intmax_t)spcl.c_tapea, spcl.c_volume, (spcl.c_volume == 1) ? "" : "s"); /* report dump performance, avoid division through zero */ if (tend_writing - tstart_writing == 0) msg("finished in less than a second\n"); else msg("finished in %jd seconds, throughput %jd KBytes/sec\n", (intmax_t)tend_writing - tstart_writing, (intmax_t)(spcl.c_tapea / (tend_writing - tstart_writing))); putdumptime(); trewind(); broadcast("DUMP IS DONE!\a\a\n"); msg("DUMP IS DONE\n"); Exit(X_FINOK); /* NOTREACHED */ } static void usage(void) { fprintf(stderr, "usage: dump [-0123456789acLnSu] [-B records] [-b blocksize] [-C cachesize]\n" " [-D dumpdates] [-d density] [-f file | -P pipecommand] [-h level]\n" " [-s feet] [-T date] filesystem\n" " dump -W | -w\n"); exit(X_STARTUP); } /* * Check to see if a disk is currently mounted. */ static char * getmntpt(char *name, int *mntflagsp) { long mntsize, i; struct statfs *mntbuf; mntsize = getmntinfo(&mntbuf, MNT_NOWAIT); for (i = 0; i < mntsize; i++) { if (!strcmp(mntbuf[i].f_mntfromname, name)) { *mntflagsp = mntbuf[i].f_flags; return (mntbuf[i].f_mntonname); } } return (0); } /* * Pick up a numeric argument. It must be nonnegative and in the given * range (except that a vmax of 0 means unlimited). */ static long numarg(const char *meaning, long vmin, long vmax) { char *p; long val; val = strtol(optarg, &p, 10); if (*p) errx(1, "illegal %s -- %s", meaning, optarg); if (val < vmin || (vmax && val > vmax)) errx(1, "%s must be between %ld and %ld", meaning, vmin, vmax); return (val); } void sig(int signo) { switch(signo) { case SIGALRM: case SIGBUS: case SIGFPE: case SIGHUP: case SIGTERM: case SIGTRAP: if (pipeout) quit("Signal on pipe: cannot recover\n"); msg("Rewriting attempted as response to unknown signal.\n"); (void)fflush(stderr); (void)fflush(stdout); close_rewind(); exit(X_REWRITE); /* NOTREACHED */ case SIGSEGV: msg("SIGSEGV: ABORTING!\n"); (void)signal(SIGSEGV, SIG_DFL); (void)kill(0, SIGSEGV); /* NOTREACHED */ } } char * rawname(char *cp) { struct stat sb; /* * Ensure that the device passed in is a raw device. */ if (stat(cp, &sb) == 0 && (sb.st_mode & S_IFMT) == S_IFCHR) return (cp); /* * Since there's only one device type now, we can't construct any * better name, so we have to return NULL. */ return (NULL); } /* * obsolete -- * Change set of key letters and ordered arguments into something * getopt(3) will like. */ static void obsolete(int *argcp, char **argvp[]) { int argc, flags; char *ap, **argv, *flagsp, **nargv, *p; /* Setup. */ argv = *argvp; argc = *argcp; /* * Return if no arguments or first argument has leading * dash or slash. */ ap = argv[1]; if (argc == 1 || *ap == '-' || *ap == '/') return; /* Allocate space for new arguments. */ if ((*argvp = nargv = malloc((argc + 1) * sizeof(char *))) == NULL || (p = flagsp = malloc(strlen(ap) + 2)) == NULL) err(1, NULL); *nargv++ = *argv; argv += 2; for (flags = 0; *ap; ++ap) { switch (*ap) { case 'B': case 'b': case 'd': case 'f': case 'D': case 'C': case 'h': case 's': case 'T': if (*argv == NULL) { warnx("option requires an argument -- %c", *ap); usage(); } if ((nargv[0] = malloc(strlen(*argv) + 2 + 1)) == NULL) err(1, NULL); nargv[0][0] = '-'; nargv[0][1] = *ap; (void)strcpy(&nargv[0][2], *argv); ++argv; ++nargv; break; default: if (!flags) { *p++ = '-'; flags = 1; } *p++ = *ap; break; } } /* Terminate flags. */ if (flags) { *p = '\0'; *nargv++ = flagsp; } else free(flagsp); /* Copy remaining arguments. */ while ((*nargv++ = *argv++)); /* Update argument count. */ *argcp = nargv - *argvp - 1; } Index: head/sbin/fsck_ffs/fsck.h =================================================================== --- head/sbin/fsck_ffs/fsck.h (revision 328425) +++ head/sbin/fsck_ffs/fsck.h (revision 328426) @@ -1,479 +1,479 @@ /*- * SPDX-License-Identifier: BSD-3-Clause and BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fsck.h 8.4 (Berkeley) 5/9/95 * $FreeBSD$ */ #ifndef _FSCK_H_ #define _FSCK_H_ #include #include #include #include #define MAXDUP 10 /* limit on dup blks (per inode) */ #define MAXBAD 10 /* limit on bad blks (per inode) */ #define MINBUFS 10 /* minimum number of buffers required */ #define MAXBUFS 40 /* maximum space to allocate to buffers */ #define INOBUFSIZE 64*1024 /* size of buffer to read inodes in pass1 */ #define ZEROBUFSIZE (dev_bsize * 128) /* size of zero buffer used by -Z */ union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; #define DIP(dp, field) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (dp)->dp1.field : (dp)->dp2.field) #define DIP_SET(dp, field, val) do { \ if (sblock.fs_magic == FS_UFS1_MAGIC) \ (dp)->dp1.field = (val); \ else \ (dp)->dp2.field = (val); \ } while (0) /* * Each inode on the file system is described by the following structure. * The linkcnt is initially set to the value in the inode. Each time it * is found during the descent in passes 2, 3, and 4 the count is * decremented. Any inodes whose count is non-zero after pass 4 needs to * have its link count adjusted by the value remaining in ino_linkcnt. */ struct inostat { char ino_state; /* state of inode, see below */ char ino_type; /* type of inode */ short ino_linkcnt; /* number of links not found */ }; /* * Inode states. */ #define USTATE 0x1 /* inode not allocated */ #define FSTATE 0x2 /* inode is file */ #define FZLINK 0x3 /* inode is file with a link count of zero */ #define DSTATE 0x4 /* inode is directory */ #define DZLINK 0x5 /* inode is directory with a zero link count */ #define DFOUND 0x6 /* directory found during descent */ /* 0x7 UNUSED - see S_IS_DVALID() definition */ #define DCLEAR 0x8 /* directory is to be cleared */ #define FCLEAR 0x9 /* file is to be cleared */ /* DUNFOUND === (state == DSTATE || state == DZLINK) */ #define S_IS_DUNFOUND(state) (((state) & ~0x1) == DSTATE) /* DVALID === (state == DSTATE || state == DZLINK || state == DFOUND) */ #define S_IS_DVALID(state) (((state) & ~0x3) == DSTATE) #define INO_IS_DUNFOUND(ino) S_IS_DUNFOUND(inoinfo(ino)->ino_state) #define INO_IS_DVALID(ino) S_IS_DVALID(inoinfo(ino)->ino_state) /* * Inode state information is contained on per cylinder group lists * which are described by the following structure. */ struct inostatlist { long il_numalloced; /* number of inodes allocated in this cg */ struct inostat *il_stat;/* inostat info for this cylinder group */ } *inostathead; /* * buffer cache structure. */ struct bufarea { TAILQ_ENTRY(bufarea) b_list; /* buffer list */ ufs2_daddr_t b_bno; int b_size; int b_errs; int b_flags; int b_type; union { char *b_buf; /* buffer space */ ufs1_daddr_t *b_indir1; /* UFS1 indirect block */ ufs2_daddr_t *b_indir2; /* UFS2 indirect block */ struct fs *b_fs; /* super block */ struct cg *b_cg; /* cylinder group */ struct ufs1_dinode *b_dinode1; /* UFS1 inode block */ struct ufs2_dinode *b_dinode2; /* UFS2 inode block */ } b_un; char b_dirty; }; #define IBLK(bp, i) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (bp)->b_un.b_indir1[i] : (bp)->b_un.b_indir2[i]) #define IBLK_SET(bp, i, val) do { \ if (sblock.fs_magic == FS_UFS1_MAGIC) \ (bp)->b_un.b_indir1[i] = (val); \ else \ (bp)->b_un.b_indir2[i] = (val); \ } while (0) /* * Buffer flags */ #define B_INUSE 0x00000001 /* Buffer is in use */ /* * Type of data in buffer */ #define BT_UNKNOWN 0 /* Buffer holds a superblock */ #define BT_SUPERBLK 1 /* Buffer holds a superblock */ #define BT_CYLGRP 2 /* Buffer holds a cylinder group map */ #define BT_LEVEL1 3 /* Buffer holds single level indirect */ #define BT_LEVEL2 4 /* Buffer holds double level indirect */ #define BT_LEVEL3 5 /* Buffer holds triple level indirect */ #define BT_EXTATTR 6 /* Buffer holds external attribute data */ #define BT_INODES 7 /* Buffer holds external attribute data */ #define BT_DIRDATA 8 /* Buffer holds directory data */ #define BT_DATA 9 /* Buffer holds user data */ #define BT_NUMBUFTYPES 10 #define BT_NAMES { \ "unknown", \ "Superblock", \ "Cylinder Group", \ "Single Level Indirect", \ "Double Level Indirect", \ "Triple Level Indirect", \ "External Attribute", \ "Inode Block", \ "Directory Contents", \ "User Data" } extern long readcnt[BT_NUMBUFTYPES]; extern long totalreadcnt[BT_NUMBUFTYPES]; extern struct timespec readtime[BT_NUMBUFTYPES]; extern struct timespec totalreadtime[BT_NUMBUFTYPES]; extern struct timespec startprog; extern struct bufarea sblk; /* file system superblock */ extern struct bufarea *pdirbp; /* current directory contents */ extern struct bufarea *pbp; /* current inode block */ #define dirty(bp) do { \ if (fswritefd < 0) \ pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n"); \ else \ (bp)->b_dirty = 1; \ } while (0) #define initbarea(bp, type) do { \ (bp)->b_dirty = 0; \ (bp)->b_bno = (ufs2_daddr_t)-1; \ (bp)->b_flags = 0; \ (bp)->b_type = type; \ } while (0) #define sbdirty() dirty(&sblk) #define sblock (*sblk.b_un.b_fs) enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE}; extern ino_t cursnapshot; struct inodesc { enum fixstate id_fix; /* policy on fixing errors */ int (*id_func)(struct inodesc *); /* function to be applied to blocks of inode */ ino_t id_number; /* inode number described */ ino_t id_parent; /* for DATA nodes, their parent */ ufs_lbn_t id_lbn; /* logical block number of current block */ ufs2_daddr_t id_blkno; /* current block number being examined */ int id_numfrags; /* number of frags contained in block */ off_t id_filesize; /* for DATA nodes, the size of the directory */ ufs2_daddr_t id_entryno;/* for DATA nodes, current entry number */ int id_loc; /* for DATA nodes, current location in dir */ struct direct *id_dirp; /* for DATA nodes, ptr to current entry */ char *id_name; /* for DATA nodes, name to find or enter */ char id_type; /* type of descriptor, DATA or ADDR */ }; /* file types */ #define DATA 1 /* a directory */ #define SNAP 2 /* a snapshot */ #define ADDR 3 /* anything but a directory or a snapshot */ /* * Linked list of duplicate blocks. * * The list is composed of two parts. The first part of the * list (from duplist through the node pointed to by muldup) * contains a single copy of each duplicate block that has been * found. The second part of the list (from muldup to the end) * contains duplicate blocks that have been found more than once. * To check if a block has been found as a duplicate it is only * necessary to search from duplist through muldup. To find the * total number of times that a block has been found as a duplicate * the entire list must be searched for occurrences of the block * in question. The following diagram shows a sample list where * w (found twice), x (found once), y (found three times), and z * (found once) are duplicate block numbers: * * w -> y -> x -> z -> y -> w -> y * ^ ^ * | | * duplist muldup */ struct dups { struct dups *next; ufs2_daddr_t dup; }; struct dups *duplist; /* head of dup list */ struct dups *muldup; /* end of unique duplicate dup block numbers */ /* * Inode cache data structures. */ struct inoinfo { struct inoinfo *i_nexthash; /* next entry in hash chain */ ino_t i_number; /* inode number of this entry */ ino_t i_parent; /* inode number of parent */ ino_t i_dotdot; /* inode number of `..' */ size_t i_isize; /* size of inode */ u_int i_numblks; /* size of block array in bytes */ ufs2_daddr_t i_blks[1]; /* actually longer */ } **inphead, **inpsort; extern long dirhash, inplast; extern unsigned long numdirs, listmax; extern long countdirs; /* number of directories we actually found */ #define MIBSIZE 3 /* size of fsck sysctl MIBs */ extern int adjrefcnt[MIBSIZE]; /* MIB command to adjust inode reference cnt */ extern int adjblkcnt[MIBSIZE]; /* MIB command to adjust inode block count */ extern int adjndir[MIBSIZE]; /* MIB command to adjust number of directories */ extern int adjnbfree[MIBSIZE]; /* MIB command to adjust number of free blocks */ extern int adjnifree[MIBSIZE]; /* MIB command to adjust number of free inodes */ extern int adjnffree[MIBSIZE]; /* MIB command to adjust number of free frags */ extern int adjnumclusters[MIBSIZE]; /* MIB command to adjust number of free clusters */ extern int freefiles[MIBSIZE]; /* MIB command to free a set of files */ extern int freedirs[MIBSIZE]; /* MIB command to free a set of directories */ extern int freeblks[MIBSIZE]; /* MIB command to free a set of data blocks */ extern struct fsck_cmd cmd; /* sysctl file system update commands */ extern char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */ extern char *cdevname; /* name of device being checked */ extern long dev_bsize; /* computed value of DEV_BSIZE */ extern long secsize; /* actual disk sector size */ extern u_int real_dev_bsize; /* actual disk sector size, not overridden */ extern char nflag; /* assume a no response */ extern char yflag; /* assume a yes response */ extern int bkgrdflag; /* use a snapshot to run on an active system */ -extern ufs2_daddr_t bflag; /* location of alternate super block */ +extern off_t bflag; /* location of alternate super block */ extern int debug; /* output debugging info */ extern int Eflag; /* delete empty data blocks */ extern int Zflag; /* zero empty data blocks */ extern int inoopt; /* trim out unused inodes */ extern char ckclean; /* only do work if not cleanly unmounted */ extern int cvtlevel; /* convert to newer file system format */ extern int bkgrdcheck; /* determine if background check is possible */ extern int bkgrdsumadj; /* whether the kernel have ability to adjust superblock summary */ extern char usedsoftdep; /* just fix soft dependency inconsistencies */ extern char preen; /* just fix normal inconsistencies */ extern char rerun; /* rerun fsck. Only used in non-preen mode */ extern int returntosingle; /* 1 => return to single user mode on exit */ extern char resolved; /* cleared if unresolved changes => not clean */ extern char havesb; /* superblock has been read */ extern char skipclean; /* skip clean file systems if preening */ extern int fsmodified; /* 1 => write done to file system */ extern int fsreadfd; /* file descriptor for reading file system */ extern int fswritefd; /* file descriptor for writing file system */ extern struct uufsd disk; /* libufs user-ufs disk structure */ extern int surrender; /* Give up if reads fail */ extern int wantrestart; /* Restart fsck on early termination */ extern ufs2_daddr_t maxfsblock; /* number of blocks in the file system */ extern char *blockmap; /* ptr to primary blk allocation map */ extern ino_t maxino; /* number of inodes in file system */ extern ino_t lfdir; /* lost & found directory inode number */ extern const char *lfname; /* lost & found directory name */ extern int lfmode; /* lost & found directory creation mode */ extern ufs2_daddr_t n_blks; /* number of blocks in use */ extern ino_t n_files; /* number of files in use */ extern volatile sig_atomic_t got_siginfo; /* received a SIGINFO */ extern volatile sig_atomic_t got_sigalarm; /* received a SIGALRM */ #define clearinode(dp) \ if (sblock.fs_magic == FS_UFS1_MAGIC) { \ (dp)->dp1 = ufs1_zino; \ } else { \ (dp)->dp2 = ufs2_zino; \ } extern struct ufs1_dinode ufs1_zino; extern struct ufs2_dinode ufs2_zino; #define setbmap(blkno) setbit(blockmap, blkno) #define testbmap(blkno) isset(blockmap, blkno) #define clrbmap(blkno) clrbit(blockmap, blkno) #define STOP 0x01 #define SKIP 0x02 #define KEEPON 0x04 #define ALTERED 0x08 #define FOUND 0x10 #define EEXIT 8 /* Standard error exit. */ #define ERERUN 16 /* fsck needs to be re-run. */ #define ERESTART -1 int flushentry(void); /* * Wrapper for malloc() that flushes the cylinder group cache to try * to get space. */ static inline void* Malloc(size_t size) { void *retval; while ((retval = malloc(size)) == NULL) if (flushentry() == 0) break; return (retval); } /* * Wrapper for calloc() that flushes the cylinder group cache to try * to get space. */ static inline void* Calloc(size_t cnt, size_t size) { void *retval; while ((retval = calloc(cnt, size)) == NULL) if (flushentry() == 0) break; return (retval); } struct fstab; void adjust(struct inodesc *, int lcnt); ufs2_daddr_t allocblk(long frags); ino_t allocdir(ino_t parent, ino_t request, int mode); ino_t allocino(ino_t request, int type); void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk); char *blockcheck(char *name); int blread(int fd, char *buf, ufs2_daddr_t blk, long size); void bufinit(void); void blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size); void blerase(int fd, ufs2_daddr_t blk, long size); void blzero(int fd, ufs2_daddr_t blk, long size); void cacheino(union dinode *dp, ino_t inumber); void catch(int); void catchquit(int); int changeino(ino_t dir, const char *name, ino_t newnum); int check_cgmagic(int cg, struct bufarea *cgbp); int chkrange(ufs2_daddr_t blk, int cnt); void ckfini(int markclean); int ckinode(union dinode *dp, struct inodesc *); void clri(struct inodesc *, const char *type, int flag); int clearentry(struct inodesc *); void direrror(ino_t ino, const char *errmesg); int dirscan(struct inodesc *); int dofix(struct inodesc *, const char *msg); int eascan(struct inodesc *, struct ufs2_dinode *dp); void fileerror(ino_t cwd, ino_t ino, const char *errmesg); void finalIOstats(void); int findino(struct inodesc *); int findname(struct inodesc *); void flush(int fd, struct bufarea *bp); void freeblk(ufs2_daddr_t blkno, long frags); void freeino(ino_t ino); void freeinodebuf(void); void fsutilinit(void); int ftypeok(union dinode *dp); void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size); struct bufarea *cglookup(int cg); struct bufarea *getdatablk(ufs2_daddr_t blkno, long size, int type); struct inoinfo *getinoinfo(ino_t inumber); union dinode *getnextinode(ino_t inumber, int rebuildcg); void getpathname(char *namebuf, ino_t curdir, ino_t ino); union dinode *ginode(ino_t inumber); void infohandler(int sig); void alarmhandler(int sig); void inocleanup(void); void inodirty(void); struct inostat *inoinfo(ino_t inum); void IOstats(char *what); int linkup(ino_t orphan, ino_t parentdir, char *name); int makeentry(ino_t parent, ino_t ino, const char *name); void panic(const char *fmt, ...) __printflike(1, 2); void pass1(void); void pass1b(void); int pass1check(struct inodesc *); void pass2(void); void pass3(void); void pass4(void); int pass4check(struct inodesc *); void pass5(void); void pfatal(const char *fmt, ...) __printflike(1, 2); void pinode(ino_t ino); void propagate(void); void pwarn(const char *fmt, ...) __printflike(1, 2); int readsb(int listerr); int reply(const char *question); void rwerror(const char *mesg, ufs2_daddr_t blk); void sblock_init(void); void setinodebuf(ino_t); int setup(char *dev); void gjournal_check(const char *filesys); int suj_check(const char *filesys); void update_maps(struct cg *, struct cg*, int); void fsckinit(void); #endif /* !_FSCK_H_ */ Index: head/sbin/fsck_ffs/fsutil.c =================================================================== --- head/sbin/fsck_ffs/fsutil.c (revision 328425) +++ head/sbin/fsck_ffs/fsutil.c (revision 328426) @@ -1,1064 +1,1059 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" static void slowio_start(void); static void slowio_end(void); static void printIOstats(void); static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ static struct timespec startpass, finishpass; struct timeval slowio_starttime; int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ int slowio_pollcnt; static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ static TAILQ_HEAD(buflist, bufarea) bufhead; /* head of buffer cache list */ static int numbufs; /* size of buffer cache */ static char *buftype[BT_NUMBUFTYPES] = BT_NAMES; static struct bufarea *cgbufs; /* header for cylinder group cache */ static int flushtries; /* number of tries to reclaim memory */ void fsutilinit(void) { diskreads = totaldiskreads = totalreads = 0; bzero(&startpass, sizeof(struct timespec)); bzero(&finishpass, sizeof(struct timespec)); bzero(&slowio_starttime, sizeof(struct timeval)); slowio_delay_usec = 10000; slowio_pollcnt = 0; bzero(&cgblk, sizeof(struct bufarea)); TAILQ_INIT(&bufhead); numbufs = 0; /* buftype ? */ cgbufs = NULL; flushtries = 0; } int ftypeok(union dinode *dp) { switch (DIP(dp, di_mode) & IFMT) { case IFDIR: case IFREG: case IFBLK: case IFCHR: case IFLNK: case IFSOCK: case IFIFO: return (1); default: if (debug) printf("bad file type 0%o\n", DIP(dp, di_mode)); return (0); } } int reply(const char *question) { int persevere; char c; if (preen) pfatal("INTERNAL ERROR: GOT TO reply()"); persevere = !strcmp(question, "CONTINUE"); printf("\n"); if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { printf("%s? no\n\n", question); resolved = 0; return (0); } if (yflag || (persevere && nflag)) { printf("%s? yes\n\n", question); return (1); } do { printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); while (c != '\n' && getc(stdin) != '\n') { if (feof(stdin)) { resolved = 0; return (0); } } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); resolved = 0; return (0); } /* * Look up state information for an inode. */ struct inostat * inoinfo(ino_t inum) { static struct inostat unallocated = { USTATE, 0, 0 }; struct inostatlist *ilp; int iloff; if (inum > maxino) errx(EEXIT, "inoinfo: inumber %ju out of range", (uintmax_t)inum); ilp = &inostathead[inum / sblock.fs_ipg]; iloff = inum % sblock.fs_ipg; if (iloff >= ilp->il_numalloced) return (&unallocated); return (&ilp->il_stat[iloff]); } /* * Malloc buffers and set up cache. */ void bufinit(void) { struct bufarea *bp; long bufcnt, i; char *bufp; pbp = pdirbp = (struct bufarea *)0; bufp = Malloc((unsigned int)sblock.fs_bsize); if (bufp == NULL) errx(EEXIT, "cannot allocate buffer pool"); cgblk.b_un.b_buf = bufp; initbarea(&cgblk, BT_CYLGRP); TAILQ_INIT(&bufhead); bufcnt = MAXBUFS; if (bufcnt < MINBUFS) bufcnt = MINBUFS; for (i = 0; i < bufcnt; i++) { bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); bufp = Malloc((unsigned int)sblock.fs_bsize); if (bp == NULL || bufp == NULL) { if (i >= MINBUFS) break; errx(EEXIT, "cannot allocate buffer pool"); } bp->b_un.b_buf = bufp; TAILQ_INSERT_HEAD(&bufhead, bp, b_list); initbarea(bp, BT_UNKNOWN); } numbufs = i; /* save number of buffers */ for (i = 0; i < BT_NUMBUFTYPES; i++) { readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; readcnt[i] = totalreadcnt[i] = 0; } } /* * Manage cylinder group buffers. */ static struct bufarea *cgbufs; /* header for cylinder group cache */ static int flushtries; /* number of tries to reclaim memory */ struct bufarea * cglookup(int cg) { struct bufarea *cgbp; struct cg *cgp; if (cgbufs == NULL) { cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea)); if (cgbufs == NULL) errx(EEXIT, "cannot allocate cylinder group buffers"); } cgbp = &cgbufs[cg]; if (cgbp->b_un.b_cg != NULL) return (cgbp); cgp = NULL; if (flushtries == 0) cgp = malloc((unsigned int)sblock.fs_cgsize); if (cgp == NULL) { getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); return (&cgblk); } cgbp->b_un.b_cg = cgp; initbarea(cgbp, BT_CYLGRP); getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); return (cgbp); } /* * Attempt to flush a cylinder group cache entry. * Return whether the flush was successful. */ int flushentry(void) { struct bufarea *cgbp; if (flushtries == sblock.fs_ncg || cgbufs == NULL) return (0); cgbp = &cgbufs[flushtries++]; if (cgbp->b_un.b_cg == NULL) return (0); flush(fswritefd, cgbp); free(cgbp->b_un.b_buf); cgbp->b_un.b_buf = NULL; return (1); } /* * Manage a cache of directory blocks. */ struct bufarea * getdatablk(ufs2_daddr_t blkno, long size, int type) { struct bufarea *bp; TAILQ_FOREACH(bp, &bufhead, b_list) if (bp->b_bno == fsbtodb(&sblock, blkno)) goto foundit; TAILQ_FOREACH_REVERSE(bp, &bufhead, buflist, b_list) if ((bp->b_flags & B_INUSE) == 0) break; if (bp == NULL) errx(EEXIT, "deadlocked buffer pool"); bp->b_type = type; getblk(bp, blkno, size); /* fall through */ foundit: if (debug && bp->b_type != type) printf("Buffer type changed from %s to %s\n", buftype[bp->b_type], buftype[type]); TAILQ_REMOVE(&bufhead, bp, b_list); TAILQ_INSERT_HEAD(&bufhead, bp, b_list); bp->b_flags |= B_INUSE; return (bp); } /* * Timespec operations (from ). */ #define timespecsub(vvp, uvp) \ do { \ (vvp)->tv_sec -= (uvp)->tv_sec; \ (vvp)->tv_nsec -= (uvp)->tv_nsec; \ if ((vvp)->tv_nsec < 0) { \ (vvp)->tv_sec--; \ (vvp)->tv_nsec += 1000000000; \ } \ } while (0) #define timespecadd(vvp, uvp) \ do { \ (vvp)->tv_sec += (uvp)->tv_sec; \ (vvp)->tv_nsec += (uvp)->tv_nsec; \ if ((vvp)->tv_nsec >= 1000000000) { \ (vvp)->tv_sec++; \ (vvp)->tv_nsec -= 1000000000; \ } \ } while (0) void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) { ufs2_daddr_t dblk; struct timespec start, finish; dblk = fsbtodb(&sblock, blk); if (bp->b_bno == dblk) { totalreads++; } else { flush(fswritefd, bp); if (debug) { readcnt[bp->b_type]++; clock_gettime(CLOCK_REALTIME_PRECISE, &start); } bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); if (debug) { clock_gettime(CLOCK_REALTIME_PRECISE, &finish); timespecsub(&finish, &start); timespecadd(&readtime[bp->b_type], &finish); } bp->b_bno = dblk; bp->b_size = size; } } void flush(int fd, struct bufarea *bp) { - int i, j; if (!bp->b_dirty) return; bp->b_dirty = 0; if (fswritefd < 0) { pfatal("WRITING IN READ_ONLY MODE.\n"); return; } if (bp->b_errs != 0) pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", (long long)bp->b_bno); bp->b_errs = 0; /* * Write using the appropriate function. */ switch (bp->b_type) { case BT_SUPERBLK: if (bp != &sblk) pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", bp, &sblk); - blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); - for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, - j++) { - blwrite(fswritefd, (char *)sblock.fs_csp + i, - fsbtodb(&sblock, - sblock.fs_csaddr + j * sblock.fs_frag), - MIN(sblock.fs_cssize - i, sblock.fs_bsize)); - } + if (sbput(fd, (struct fs *)bp->b_un.b_buf, 0) == 0) + fsmodified = 1; break; case BT_CYLGRP: if (cgput(&disk, (struct cg *)bp->b_un.b_buf) == 0) fsmodified = 1; break; default: blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); break; } } void rwerror(const char *mesg, ufs2_daddr_t blk) { if (bkgrdcheck) exit(EEXIT); if (preen == 0) printf("\n"); pfatal("CANNOT %s: %ld", mesg, (long)blk); if (reply("CONTINUE") == 0) exit(EEXIT); } void ckfini(int markclean) { struct bufarea *bp, *nbp; int ofsmodified, cnt; if (bkgrdflag) { unlink(snapname); if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { cmd.value = FS_UNCLEAN; cmd.size = markclean ? -1 : 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) rwerror("SET FILE SYSTEM FLAGS", FS_UNCLEAN); if (!preen) { printf("\n***** FILE SYSTEM MARKED %s *****\n", markclean ? "CLEAN" : "DIRTY"); if (!markclean) rerun = 1; } } else if (!preen && !markclean) { printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); rerun = 1; } } if (debug && totalreads > 0) printf("cache with %d buffers missed %ld of %ld (%d%%)\n", numbufs, totaldiskreads, totalreads, (int)(totaldiskreads * 100 / totalreads)); if (fswritefd < 0) { (void)close(fsreadfd); return; } flush(fswritefd, &sblk); if (havesb && cursnapshot == 0 && sblock.fs_magic == FS_UFS2_MAGIC && sblk.b_bno != sblock.fs_sblockloc / dev_bsize && !preen && reply("UPDATE STANDARD SUPERBLOCK")) { + /* Change the write destination to standard superblock */ + sblock.fs_sblockactualloc = sblock.fs_sblockloc; sblk.b_bno = sblock.fs_sblockloc / dev_bsize; sbdirty(); flush(fswritefd, &sblk); } flush(fswritefd, &cgblk); free(cgblk.b_un.b_buf); cnt = 0; TAILQ_FOREACH_REVERSE_SAFE(bp, &bufhead, buflist, b_list, nbp) { TAILQ_REMOVE(&bufhead, bp, b_list); cnt++; flush(fswritefd, bp); free(bp->b_un.b_buf); free((char *)bp); } if (numbufs != cnt) errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); if (cgbufs != NULL) { for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { if (cgbufs[cnt].b_un.b_cg == NULL) continue; flush(fswritefd, &cgbufs[cnt]); free(cgbufs[cnt].b_un.b_cg); } free(cgbufs); } pbp = pdirbp = (struct bufarea *)0; if (cursnapshot == 0 && sblock.fs_clean != markclean) { if ((sblock.fs_clean = markclean) != 0) { sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); sblock.fs_pendingblocks = 0; sblock.fs_pendinginodes = 0; } sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; if (!preen) { printf("\n***** FILE SYSTEM MARKED %s *****\n", markclean ? "CLEAN" : "DIRTY"); if (!markclean) rerun = 1; } } else if (!preen) { if (markclean) { printf("\n***** FILE SYSTEM IS CLEAN *****\n"); } else { printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); rerun = 1; } } (void)close(fsreadfd); (void)close(fswritefd); } /* * Print out I/O statistics. */ void IOstats(char *what) { int i; if (debug == 0) return; if (diskreads == 0) { printf("%s: no I/O\n\n", what); return; } if (startpass.tv_sec == 0) startpass = startprog; printf("%s: I/O statistics\n", what); printIOstats(); totaldiskreads += diskreads; diskreads = 0; for (i = 0; i < BT_NUMBUFTYPES; i++) { timespecadd(&totalreadtime[i], &readtime[i]); totalreadcnt[i] += readcnt[i]; readtime[i].tv_sec = readtime[i].tv_nsec = 0; readcnt[i] = 0; } clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); } void finalIOstats(void) { int i; if (debug == 0) return; printf("Final I/O statistics\n"); totaldiskreads += diskreads; diskreads = totaldiskreads; startpass = startprog; for (i = 0; i < BT_NUMBUFTYPES; i++) { timespecadd(&totalreadtime[i], &readtime[i]); totalreadcnt[i] += readcnt[i]; readtime[i] = totalreadtime[i]; readcnt[i] = totalreadcnt[i]; } printIOstats(); } static void printIOstats(void) { long long msec, totalmsec; int i; clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); timespecsub(&finishpass, &startpass); printf("Running time: %jd.%03ld sec\n", (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); printf("buffer reads by type:\n"); for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) totalmsec += readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; if (totalmsec == 0) totalmsec = 1; for (i = 0; i < BT_NUMBUFTYPES; i++) { if (readcnt[i] == 0) continue; msec = readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, (readcnt[i] * 1000 / diskreads) % 10, (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); } printf("\n"); } int blread(int fd, char *buf, ufs2_daddr_t blk, long size) { char *cp; int i, errs; off_t offset; offset = blk; offset *= dev_bsize; if (bkgrdflag) slowio_start(); totalreads++; diskreads++; if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); else if (read(fd, buf, (int)size) == size) { if (bkgrdflag) slowio_end(); return (0); } /* * This is handled specially here instead of in rwerror because * rwerror is used for all sorts of errors, not just true read/write * errors. It should be refactored and fixed. */ if (surrender) { pfatal("CANNOT READ_BLK: %ld", (long)blk); errx(EEXIT, "ABORTING DUE TO READ ERRORS"); } else rwerror("READ BLK", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); errs = 0; memset(buf, 0, (size_t)size); printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { if (read(fd, cp, (int)secsize) != secsize) { (void)lseek(fd, offset + i + secsize, 0); if (secsize != dev_bsize && dev_bsize != 1) printf(" %jd (%jd),", (intmax_t)(blk * dev_bsize + i) / secsize, (intmax_t)blk + i / dev_bsize); else printf(" %jd,", (intmax_t)blk + i / dev_bsize); errs++; } } printf("\n"); if (errs) resolved = 0; return (errs); } void blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) { int i; char *cp; off_t offset; if (fd < 0) return; offset = blk; offset *= dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); else if (write(fd, buf, size) == size) { fsmodified = 1; return; } resolved = 0; rwerror("WRITE BLK", blk); if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) if (write(fd, cp, dev_bsize) != dev_bsize) { (void)lseek(fd, offset + i + dev_bsize, 0); printf(" %jd,", (intmax_t)blk + i / dev_bsize); } printf("\n"); return; } void blerase(int fd, ufs2_daddr_t blk, long size) { off_t ioarg[2]; if (fd < 0) return; ioarg[0] = blk * dev_bsize; ioarg[1] = size; ioctl(fd, DIOCGDELETE, ioarg); /* we don't really care if we succeed or not */ return; } /* * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by * definition a multiple of dev_bsize. */ void blzero(int fd, ufs2_daddr_t blk, long size) { static char *zero; off_t offset, len; if (fd < 0) return; if (zero == NULL) { zero = calloc(ZEROBUFSIZE, 1); if (zero == NULL) errx(EEXIT, "cannot allocate buffer pool"); } offset = blk * dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); while (size > 0) { len = MIN(ZEROBUFSIZE, size); if (write(fd, zero, len) != len) rwerror("WRITE BLK", blk); blk += len / dev_bsize; size -= len; } } /* * Verify cylinder group's magic number and other parameters. If the * test fails, offer an option to rebuild the whole cylinder group. */ int check_cgmagic(int cg, struct bufarea *cgbp) { struct cg *cgp = cgbp->b_un.b_cg; /* * Extended cylinder group checks. */ if (cg_chkmagic(cgp) && ((sblock.fs_magic == FS_UFS1_MAGIC && cgp->cg_old_niblk == sblock.fs_ipg && cgp->cg_ndblk <= sblock.fs_fpg && cgp->cg_old_ncyl <= sblock.fs_old_cpg) || (sblock.fs_magic == FS_UFS2_MAGIC && cgp->cg_niblk == sblock.fs_ipg && cgp->cg_ndblk <= sblock.fs_fpg && cgp->cg_initediblk <= sblock.fs_ipg))) { return (1); } pfatal("CYLINDER GROUP %d: BAD MAGIC NUMBER", cg); if (!reply("REBUILD CYLINDER GROUP")) { printf("YOU WILL NEED TO RERUN FSCK.\n"); rerun = 1; return (1); } /* * Zero out the cylinder group and then initialize critical fields. * Bit maps and summaries will be recalculated by later passes. */ memset(cgp, 0, (size_t)sblock.fs_cgsize); cgp->cg_magic = CG_MAGIC; cgp->cg_cgx = cg; cgp->cg_niblk = sblock.fs_ipg; cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) cgp->cg_ndblk = sblock.fs_fpg; else cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); cgp->cg_iusedoff = &cgp->cg_space[0] - (u_char *)(&cgp->cg_firstfield); if (sblock.fs_magic == FS_UFS1_MAGIC) { cgp->cg_niblk = 0; cgp->cg_initediblk = 0; cgp->cg_old_ncyl = sblock.fs_old_cpg; cgp->cg_old_niblk = sblock.fs_ipg; cgp->cg_old_btotoff = cgp->cg_iusedoff; cgp->cg_old_boff = cgp->cg_old_btotoff + sblock.fs_old_cpg * sizeof(int32_t); cgp->cg_iusedoff = cgp->cg_old_boff + sblock.fs_old_cpg * sizeof(u_int16_t); } cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); if (sblock.fs_contigsumsize > 0) { cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; cgp->cg_clustersumoff = roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); cgp->cg_clustersumoff -= sizeof(u_int32_t); cgp->cg_clusteroff = cgp->cg_clustersumoff + (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); cgp->cg_nextfreeoff = cgp->cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); } dirty(cgbp); return (0); } /* * allocate a data block with the specified number of fragments */ ufs2_daddr_t allocblk(long frags) { int i, j, k, cg, baseblk; struct bufarea *cgbp; struct cg *cgp; if (frags <= 0 || frags > sblock.fs_frag) return (0); for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) { for (j = 0; j <= sblock.fs_frag - frags; j++) { if (testbmap(i + j)) continue; for (k = 1; k < frags; k++) if (testbmap(i + j + k)) break; if (k < frags) { j += k; continue; } cg = dtog(&sblock, i + j); cgbp = cglookup(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp)) return (0); baseblk = dtogd(&sblock, i + j); for (k = 0; k < frags; k++) { setbmap(i + j + k); clrbit(cg_blksfree(cgp), baseblk + k); } n_blks += frags; if (frags == sblock.fs_frag) cgp->cg_cs.cs_nbfree--; else cgp->cg_cs.cs_nffree -= frags; dirty(cgbp); return (i + j); } } return (0); } /* * Free a previously allocated block */ void freeblk(ufs2_daddr_t blkno, long frags) { struct inodesc idesc; idesc.id_blkno = blkno; idesc.id_numfrags = frags; (void)pass4check(&idesc); } /* Slow down IO so as to leave some disk bandwidth for other processes */ void slowio_start() { /* Delay one in every 8 operations */ slowio_pollcnt = (slowio_pollcnt + 1) & 7; if (slowio_pollcnt == 0) { gettimeofday(&slowio_starttime, NULL); } } void slowio_end() { struct timeval tv; int delay_usec; if (slowio_pollcnt != 0) return; /* Update the slowdown interval. */ gettimeofday(&tv, NULL); delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + (tv.tv_usec - slowio_starttime.tv_usec); if (delay_usec < 64) delay_usec = 64; if (delay_usec > 2500000) delay_usec = 2500000; slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; /* delay by 8 times the average IO delay */ if (slowio_delay_usec > 64) usleep(slowio_delay_usec * 8); } /* * Find a pathname */ void getpathname(char *namebuf, ino_t curdir, ino_t ino) { int len; char *cp; struct inodesc idesc; static int busy = 0; if (curdir == ino && ino == UFS_ROOTINO) { (void)strcpy(namebuf, "/"); return; } if (busy || !INO_IS_DVALID(curdir)) { (void)strcpy(namebuf, "?"); return; } busy = 1; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_fix = IGNORE; cp = &namebuf[MAXPATHLEN - 1]; *cp = '\0'; if (curdir != ino) { idesc.id_parent = curdir; goto namelookup; } while (ino != UFS_ROOTINO) { idesc.id_number = ino; idesc.id_func = findino; idesc.id_name = strdup(".."); if ((ckinode(ginode(ino), &idesc) & FOUND) == 0) break; namelookup: idesc.id_number = idesc.id_parent; idesc.id_parent = ino; idesc.id_func = findname; idesc.id_name = namebuf; if ((ckinode(ginode(idesc.id_number), &idesc)&FOUND) == 0) break; len = strlen(namebuf); cp -= len; memmove(cp, namebuf, (size_t)len); *--cp = '/'; if (cp < &namebuf[UFS_MAXNAMLEN]) break; ino = idesc.id_number; } busy = 0; if (ino != UFS_ROOTINO) *--cp = '?'; memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); } void catch(int sig __unused) { ckfini(0); exit(12); } /* * When preening, allow a single quit to signal * a special exit after file system checks complete * so that reboot sequence may be interrupted. */ void catchquit(int sig __unused) { printf("returning to single-user after file system check\n"); returntosingle = 1; (void)signal(SIGQUIT, SIG_DFL); } /* * determine whether an inode should be fixed. */ int dofix(struct inodesc *idesc, const char *msg) { switch (idesc->id_fix) { case DONTKNOW: if (idesc->id_type == DATA) direrror(idesc->id_number, msg); else pwarn("%s", msg); if (preen) { printf(" (SALVAGED)\n"); idesc->id_fix = FIX; return (ALTERED); } if (reply("SALVAGE") == 0) { idesc->id_fix = NOFIX; return (0); } idesc->id_fix = FIX; return (ALTERED); case FIX: return (ALTERED); case NOFIX: case IGNORE: return (0); default: errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); } /* NOTREACHED */ return (0); } #include /* * An unexpected inconsistency occurred. * Die if preening or file system is running with soft dependency protocol, * otherwise just print message and continue. */ void pfatal(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (!preen) { (void)vfprintf(stdout, fmt, ap); va_end(ap); if (usedsoftdep) (void)fprintf(stdout, "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); /* * Force foreground fsck to clean up inconsistency. */ if (bkgrdflag) { cmd.value = FS_NEEDSFSCK; cmd.size = 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); ckfini(0); exit(EEXIT); } return; } if (cdevname == NULL) cdevname = strdup("fsck"); (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); (void)fprintf(stdout, "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", cdevname, usedsoftdep ? " SOFT UPDATE " : " "); /* * Force foreground fsck to clean up inconsistency. */ if (bkgrdflag) { cmd.value = FS_NEEDSFSCK; cmd.size = 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); } ckfini(0); exit(EEXIT); } /* * Pwarn just prints a message when not preening or running soft dependency * protocol, or a warning (preceded by filename) when preening. */ void pwarn(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (preen) (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); va_end(ap); } /* * Stub for routines from kernel. */ void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); pfatal("INTERNAL INCONSISTENCY:"); (void)vfprintf(stdout, fmt, ap); va_end(ap); exit(EEXIT); } Index: head/sbin/fsck_ffs/globs.c =================================================================== --- head/sbin/fsck_ffs/globs.c (revision 328425) +++ head/sbin/fsck_ffs/globs.c (revision 328426) @@ -1,168 +1,168 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include "fsck.h" long readcnt[BT_NUMBUFTYPES]; long totalreadcnt[BT_NUMBUFTYPES]; struct timespec readtime[BT_NUMBUFTYPES]; struct timespec totalreadtime[BT_NUMBUFTYPES]; struct timespec startprog; struct bufarea sblk; /* file system superblock */ struct bufarea *pdirbp; /* current directory contents */ struct bufarea *pbp; /* current inode block */ ino_t cursnapshot; long dirhash, inplast; unsigned long numdirs, listmax; long countdirs; /* number of directories we actually found */ int adjrefcnt[MIBSIZE]; /* MIB command to adjust inode reference cnt */ int adjblkcnt[MIBSIZE]; /* MIB command to adjust inode block count */ int adjndir[MIBSIZE]; /* MIB command to adjust number of directories */ int adjnbfree[MIBSIZE]; /* MIB command to adjust number of free blocks */ int adjnifree[MIBSIZE]; /* MIB command to adjust number of free inodes */ int adjnffree[MIBSIZE]; /* MIB command to adjust number of free frags */ int adjnumclusters[MIBSIZE]; /* MIB command to adjust number of free clusters */ int freefiles[MIBSIZE]; /* MIB command to free a set of files */ int freedirs[MIBSIZE]; /* MIB command to free a set of directories */ int freeblks[MIBSIZE]; /* MIB command to free a set of data blocks */ struct fsck_cmd cmd; /* sysctl file system update commands */ char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */ char *cdevname; /* name of device being checked */ long dev_bsize; /* computed value of DEV_BSIZE */ long secsize; /* actual disk sector size */ u_int real_dev_bsize; /* actual disk sector size, not overridden */ char nflag; /* assume a no response */ char yflag; /* assume a yes response */ int bkgrdflag; /* use a snapshot to run on an active system */ -ufs2_daddr_t bflag; /* location of alternate super block */ +off_t bflag; /* location of alternate super block */ int debug; /* output debugging info */ int Eflag; /* delete empty data blocks */ int Zflag; /* zero empty data blocks */ int inoopt; /* trim out unused inodes */ char ckclean; /* only do work if not cleanly unmounted */ int cvtlevel; /* convert to newer file system format */ int bkgrdcheck; /* determine if background check is possible */ int bkgrdsumadj; /* whether the kernel have ability to adjust superblock summary */ char usedsoftdep; /* just fix soft dependency inconsistencies */ char preen; /* just fix normal inconsistencies */ char rerun; /* rerun fsck. Only used in non-preen mode */ int returntosingle; /* 1 => return to single user mode on exit */ char resolved; /* cleared if unresolved changes => not clean */ char havesb; /* superblock has been read */ char skipclean; /* skip clean file systems if preening */ int fsmodified; /* 1 => write done to file system */ int fsreadfd; /* file descriptor for reading file system */ int fswritefd; /* file descriptor for writing file system */ int surrender; /* Give up if reads fail */ int wantrestart; /* Restart fsck on early termination */ ufs2_daddr_t maxfsblock; /* number of blocks in the file system */ char *blockmap; /* ptr to primary blk allocation map */ ino_t maxino; /* number of inodes in file system */ ino_t lfdir; /* lost & found directory inode number */ const char *lfname; /* lost & found directory name */ int lfmode; /* lost & found directory creation mode */ ufs2_daddr_t n_blks; /* number of blocks in use */ ino_t n_files; /* number of files in use */ volatile sig_atomic_t got_siginfo; /* received a SIGINFO */ volatile sig_atomic_t got_sigalarm; /* received a SIGALRM */ struct ufs1_dinode ufs1_zino; struct ufs2_dinode ufs2_zino; void fsckinit(void) { bzero(readcnt, sizeof(long) * BT_NUMBUFTYPES); bzero(totalreadcnt, sizeof(long) * BT_NUMBUFTYPES); bzero(readtime, sizeof(struct timespec) * BT_NUMBUFTYPES); bzero(totalreadtime, sizeof(struct timespec) * BT_NUMBUFTYPES); bzero(&startprog, sizeof(struct timespec)); bzero(&sblk, sizeof(struct bufarea)); pdirbp = NULL; pbp = NULL; cursnapshot = 0; listmax = numdirs = dirhash = inplast = 0; countdirs = 0; bzero(adjrefcnt, sizeof(int) * MIBSIZE); bzero(adjblkcnt, sizeof(int) * MIBSIZE); bzero(adjndir, sizeof(int) * MIBSIZE); bzero(adjnbfree, sizeof(int) * MIBSIZE); bzero(adjnifree, sizeof(int) * MIBSIZE); bzero(adjnffree, sizeof(int) * MIBSIZE); bzero(adjnumclusters, sizeof(int) * MIBSIZE); bzero(freefiles, sizeof(int) * MIBSIZE); bzero(freedirs, sizeof(int) * MIBSIZE); bzero(freeblks, sizeof(int) * MIBSIZE); bzero(&cmd, sizeof(struct fsck_cmd)); bzero(snapname, sizeof(char) * BUFSIZ); cdevname = NULL; dev_bsize = 0; secsize = 0; real_dev_bsize = 0; bkgrdsumadj = 0; usedsoftdep = 0; rerun = 0; returntosingle = 0; resolved = 0; havesb = 0; fsmodified = 0; fsreadfd = 0; fswritefd = 0; maxfsblock = 0; blockmap = NULL; maxino = 0; lfdir = 0; lfname = "lost+found"; lfmode = 0700; n_blks = 0; n_files = 0; got_siginfo = 0; got_sigalarm = 0; bzero(&ufs1_zino, sizeof(struct ufs1_dinode)); bzero(&ufs2_zino, sizeof(struct ufs2_dinode)); } Index: head/sbin/fsck_ffs/setup.c =================================================================== --- head/sbin/fsck_ffs/setup.c (revision 328425) +++ head/sbin/fsck_ffs/setup.c (revision 328426) @@ -1,573 +1,551 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)setup.c 8.10 (Berkeley) 5/9/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #define FSTYPENAMES #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" struct uufsd disk; struct bufarea asblk; #define altsblock (*asblk.b_un.b_fs) #define POWEROF2(num) (((num) & ((num) - 1)) == 0) static int calcsb(char *dev, int devfd, struct fs *fs); static void saverecovery(int readfd, int writefd); static int chkrecovery(int devfd); /* * Read in a superblock finding an alternate if necessary. * Return 1 if successful, 0 if unsuccessful, -1 if file system * is already clean (ckclean and preen mode only). */ int setup(char *dev) { long cg, asked, i, j; long bmapsize; struct stat statb; struct fs proto; size_t size; havesb = 0; fswritefd = -1; cursnapshot = 0; if (stat(dev, &statb) < 0) { printf("Can't stat %s: %s\n", dev, strerror(errno)); if (bkgrdflag) { unlink(snapname); bkgrdflag = 0; } return (0); } if ((statb.st_mode & S_IFMT) != S_IFCHR && (statb.st_mode & S_IFMT) != S_IFBLK) { if (bkgrdflag != 0 && (statb.st_flags & SF_SNAPSHOT) == 0) { unlink(snapname); printf("background fsck lacks a snapshot\n"); exit(EEXIT); } if ((statb.st_flags & SF_SNAPSHOT) != 0 && cvtlevel == 0) { cursnapshot = statb.st_ino; } else { if (cvtlevel == 0 || (statb.st_flags & SF_SNAPSHOT) == 0) { if (preen && bkgrdflag) { unlink(snapname); bkgrdflag = 0; } pfatal("%s is not a disk device", dev); if (reply("CONTINUE") == 0) { if (bkgrdflag) { unlink(snapname); bkgrdflag = 0; } return (0); } } else { if (bkgrdflag) { unlink(snapname); bkgrdflag = 0; } pfatal("cannot convert a snapshot"); exit(EEXIT); } } } if ((fsreadfd = open(dev, O_RDONLY)) < 0 || ufs_disk_fillout(&disk, dev) < 0) { if (bkgrdflag) { unlink(snapname); bkgrdflag = 0; } printf("Can't open %s: %s\n", dev, strerror(errno)); return (0); } if (bkgrdflag) { unlink(snapname); size = MIBSIZE; if (sysctlnametomib("vfs.ffs.adjrefcnt", adjrefcnt, &size) < 0|| sysctlnametomib("vfs.ffs.adjblkcnt", adjblkcnt, &size) < 0|| sysctlnametomib("vfs.ffs.freefiles", freefiles, &size) < 0|| sysctlnametomib("vfs.ffs.freedirs", freedirs, &size) < 0 || sysctlnametomib("vfs.ffs.freeblks", freeblks, &size) < 0) { pfatal("kernel lacks background fsck support\n"); exit(EEXIT); } /* * When kernel is lack of runtime bgfsck superblock summary * adjustment functionality, it does not mean we can not * continue, as old kernels will recompute the summary at * mount time. However, it will be an unexpected softupdates * inconsistency if it turns out that the summary is still * incorrect. Set a flag so subsequent operation can know * this. */ bkgrdsumadj = 1; if (sysctlnametomib("vfs.ffs.adjndir", adjndir, &size) < 0 || sysctlnametomib("vfs.ffs.adjnbfree", adjnbfree, &size) < 0 || sysctlnametomib("vfs.ffs.adjnifree", adjnifree, &size) < 0 || sysctlnametomib("vfs.ffs.adjnffree", adjnffree, &size) < 0 || sysctlnametomib("vfs.ffs.adjnumclusters", adjnumclusters, &size) < 0) { bkgrdsumadj = 0; pwarn("kernel lacks runtime superblock summary adjustment support"); } cmd.version = FFS_CMD_VERSION; cmd.handle = fsreadfd; fswritefd = -1; } if (preen == 0) printf("** %s", dev); if (bkgrdflag == 0 && (nflag || ufs_disk_write(&disk) < 0 || (fswritefd = dup(disk.d_fd)) < 0)) { fswritefd = -1; if (preen) pfatal("NO WRITE ACCESS"); printf(" (NO WRITE)"); } if (preen == 0) printf("\n"); /* * Read in the superblock, looking for alternates if necessary */ if (readsb(1) == 0) { skipclean = 0; if (bflag || preen || calcsb(dev, fsreadfd, &proto) == 0) return(0); if (reply("LOOK FOR ALTERNATE SUPERBLOCKS") == 0) return (0); for (cg = 0; cg < proto.fs_ncg; cg++) { bflag = fsbtodb(&proto, cgsblock(&proto, cg)); if (readsb(0) != 0) break; } if (cg >= proto.fs_ncg) { printf("%s %s\n%s %s\n%s %s\n", "SEARCH FOR ALTERNATE SUPER-BLOCK", "FAILED. YOU MUST USE THE", "-b OPTION TO FSCK TO SPECIFY THE", "LOCATION OF AN ALTERNATE", "SUPER-BLOCK TO SUPPLY NEEDED", "INFORMATION; SEE fsck_ffs(8)."); bflag = 0; return(0); } pwarn("USING ALTERNATE SUPERBLOCK AT %jd\n", bflag); bflag = 0; } if (skipclean && ckclean && sblock.fs_clean) { pwarn("FILE SYSTEM CLEAN; SKIPPING CHECKS\n"); return (-1); } maxfsblock = sblock.fs_size; maxino = sblock.fs_ncg * sblock.fs_ipg; /* * Check and potentially fix certain fields in the super block. */ if (sblock.fs_optim != FS_OPTTIME && sblock.fs_optim != FS_OPTSPACE) { pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK"); if (reply("SET TO DEFAULT") == 1) { sblock.fs_optim = FS_OPTTIME; sbdirty(); } } if ((sblock.fs_minfree < 0 || sblock.fs_minfree > 99)) { pfatal("IMPOSSIBLE MINFREE=%d IN SUPERBLOCK", sblock.fs_minfree); if (reply("SET TO DEFAULT") == 1) { sblock.fs_minfree = 10; sbdirty(); } } if (sblock.fs_magic == FS_UFS1_MAGIC && sblock.fs_old_inodefmt < FS_44INODEFMT) { pwarn("Format of file system is too old.\n"); pwarn("Must update to modern format using a version of fsck\n"); pfatal("from before 2002 with the command ``fsck -c 2''\n"); exit(EEXIT); } if (asblk.b_dirty && !bflag) { memmove(&altsblock, &sblock, (size_t)sblock.fs_sbsize); flush(fswritefd, &asblk); } if (preen == 0 && yflag == 0 && sblock.fs_magic == FS_UFS2_MAGIC && fswritefd != -1 && chkrecovery(fsreadfd) == 0 && reply("SAVE DATA TO FIND ALTERNATE SUPERBLOCKS") != 0) saverecovery(fsreadfd, fswritefd); /* * read in the summary info. */ asked = 0; sblock.fs_csp = Calloc(1, sblock.fs_cssize); if (sblock.fs_csp == NULL) { printf("cannot alloc %u bytes for cg summary info\n", (unsigned)sblock.fs_cssize); goto badsb; } for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { size = MIN(sblock.fs_cssize - i, sblock.fs_bsize); readcnt[sblk.b_type]++; if (blread(fsreadfd, (char *)sblock.fs_csp + i, fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); if (reply("CONTINUE") == 0) { ckfini(0); exit(EEXIT); } asked++; } } /* * allocate and initialize the necessary maps */ bmapsize = roundup(howmany(maxfsblock, CHAR_BIT), sizeof(short)); blockmap = Calloc((unsigned)bmapsize, sizeof (char)); if (blockmap == NULL) { printf("cannot alloc %u bytes for blockmap\n", (unsigned)bmapsize); goto badsb; } inostathead = Calloc(sblock.fs_ncg, sizeof(struct inostatlist)); if (inostathead == NULL) { printf("cannot alloc %u bytes for inostathead\n", (unsigned)(sizeof(struct inostatlist) * (sblock.fs_ncg))); goto badsb; } numdirs = MAX(sblock.fs_cstotal.cs_ndir, 128); dirhash = numdirs; inplast = 0; listmax = numdirs + 10; inpsort = (struct inoinfo **)Calloc(listmax, sizeof(struct inoinfo *)); inphead = (struct inoinfo **)Calloc(numdirs, sizeof(struct inoinfo *)); if (inpsort == NULL || inphead == NULL) { printf("cannot alloc %ju bytes for inphead\n", (uintmax_t)numdirs * sizeof(struct inoinfo *)); goto badsb; } bufinit(); if (sblock.fs_flags & FS_DOSOFTDEP) usedsoftdep = 1; else usedsoftdep = 0; return (1); badsb: ckfini(0); return (0); } /* - * Possible superblock locations ordered from most to least likely. - */ -static int sblock_try[] = SBLOCKSEARCH; - -#define BAD_MAGIC_MSG \ -"The previous newfs operation on this volume did not complete.\n" \ -"You must complete newfs before mounting this volume.\n" - -/* * Read in the super block and its summary info. */ int readsb(int listerr) { - ufs2_daddr_t super; - int i, bad; + off_t super; + int bad, ret; + struct fs *fs; - if (bflag) { - super = bflag; - readcnt[sblk.b_type]++; - if ((blread(fsreadfd, (char *)&sblock, super, (long)SBLOCKSIZE))) - return (0); - if (sblock.fs_magic == FS_BAD_MAGIC) { - fprintf(stderr, BAD_MAGIC_MSG); + super = bflag ? bflag * dev_bsize : -1; + readcnt[sblk.b_type]++; + if ((ret = sbget(fsreadfd, &fs, super)) != 0) { + switch (ret) { + case EINVAL: + fprintf(stderr, "The previous newfs operation " + "on this volume did not complete.\nYou must " + "complete newfs before using this volume.\n"); exit(11); - } - if (sblock.fs_magic != FS_UFS1_MAGIC && - sblock.fs_magic != FS_UFS2_MAGIC) { - fprintf(stderr, "%jd is not a file system superblock\n", - bflag); + case ENOENT: + if (bflag) + fprintf(stderr, "%jd is not a file system " + "superblock\n", super / dev_bsize); + else + fprintf(stderr, "Cannot find file system " + "superblock\n"); return (0); - } - } else { - for (i = 0; sblock_try[i] != -1; i++) { - super = sblock_try[i] / dev_bsize; - readcnt[sblk.b_type]++; - if ((blread(fsreadfd, (char *)&sblock, super, - (long)SBLOCKSIZE))) - return (0); - if (sblock.fs_magic == FS_BAD_MAGIC) { - fprintf(stderr, BAD_MAGIC_MSG); - exit(11); - } - if ((sblock.fs_magic == FS_UFS1_MAGIC || - (sblock.fs_magic == FS_UFS2_MAGIC && - sblock.fs_sblockloc == sblock_try[i])) && - sblock.fs_ncg >= 1 && - sblock.fs_bsize >= MINBSIZE && - sblock.fs_sbsize >= roundup(sizeof(struct fs), dev_bsize)) - break; - } - if (sblock_try[i] == -1) { - fprintf(stderr, "Cannot find file system superblock\n"); + case EIO: + default: + fprintf(stderr, "I/O error reading %jd\n", + super / dev_bsize); return (0); } } + memcpy(&sblock, fs, fs->fs_sbsize); + free(fs); /* * Compute block size that the file system is based on, * according to fsbtodb, and adjust superblock block number * so we can tell if this is an alternate later. */ - super *= dev_bsize; dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1); - sblk.b_bno = super / dev_bsize; + sblk.b_bno = sblock.fs_sblockactualloc / dev_bsize; sblk.b_size = SBLOCKSIZE; /* * Compare all fields that should not differ in alternate super block. * When an alternate super-block is specified this check is skipped. */ if (bflag) goto out; getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1), sblock.fs_sbsize); if (asblk.b_errs) return (0); bad = 0; #define CHK(x, y) \ if (altsblock.x != sblock.x) { \ bad++; \ if (listerr && debug) \ printf("SUPER BLOCK VS ALTERNATE MISMATCH %s: " y " vs " y "\n", \ #x, (intmax_t)sblock.x, (intmax_t)altsblock.x); \ } CHK(fs_sblkno, "%jd"); CHK(fs_cblkno, "%jd"); CHK(fs_iblkno, "%jd"); CHK(fs_dblkno, "%jd"); CHK(fs_ncg, "%jd"); CHK(fs_bsize, "%jd"); CHK(fs_fsize, "%jd"); CHK(fs_frag, "%jd"); CHK(fs_bmask, "%#jx"); CHK(fs_fmask, "%#jx"); CHK(fs_bshift, "%jd"); CHK(fs_fshift, "%jd"); CHK(fs_fragshift, "%jd"); CHK(fs_fsbtodb, "%jd"); CHK(fs_sbsize, "%jd"); CHK(fs_nindir, "%jd"); CHK(fs_inopb, "%jd"); CHK(fs_cssize, "%jd"); CHK(fs_ipg, "%jd"); CHK(fs_fpg, "%jd"); CHK(fs_magic, "%#jx"); #undef CHK if (bad) { if (listerr == 0) return (0); if (preen) printf("%s: ", cdevname); printf( "VALUES IN SUPER BLOCK LSB=%jd DISAGREE WITH THOSE IN\n" "LAST ALTERNATE LSB=%jd\n", sblk.b_bno, asblk.b_bno); if (reply("IGNORE ALTERNATE SUPER BLOCK") == 0) return (0); } out: /* * If not yet done, update UFS1 superblock with new wider fields. */ if (sblock.fs_magic == FS_UFS1_MAGIC && sblock.fs_maxbsize != sblock.fs_bsize) { sblock.fs_maxbsize = sblock.fs_bsize; sblock.fs_time = sblock.fs_old_time; sblock.fs_size = sblock.fs_old_size; sblock.fs_dsize = sblock.fs_old_dsize; sblock.fs_csaddr = sblock.fs_old_csaddr; sblock.fs_cstotal.cs_ndir = sblock.fs_old_cstotal.cs_ndir; sblock.fs_cstotal.cs_nbfree = sblock.fs_old_cstotal.cs_nbfree; sblock.fs_cstotal.cs_nifree = sblock.fs_old_cstotal.cs_nifree; sblock.fs_cstotal.cs_nffree = sblock.fs_old_cstotal.cs_nffree; } havesb = 1; return (1); } void sblock_init(void) { fswritefd = -1; fsmodified = 0; lfdir = 0; initbarea(&sblk, BT_SUPERBLK); initbarea(&asblk, BT_SUPERBLK); sblk.b_un.b_buf = Malloc(SBLOCKSIZE); asblk.b_un.b_buf = Malloc(SBLOCKSIZE); if (sblk.b_un.b_buf == NULL || asblk.b_un.b_buf == NULL) errx(EEXIT, "cannot allocate space for superblock"); dev_bsize = secsize = DEV_BSIZE; } /* * Calculate a prototype superblock based on information in the boot area. * When done the cgsblock macro can be calculated and the fs_ncg field * can be used. Do NOT attempt to use other macros without verifying that * their needed information is available! */ static int calcsb(char *dev, int devfd, struct fs *fs) { struct fsrecovery *fsr; char *fsrbuf; u_int secsize; /* * We need fragments-per-group and the partition-size. * * Newfs stores these details at the end of the boot block area * at the start of the filesystem partition. If they have been * overwritten by a boot block, we fail. But usually they are * there and we can use them. */ if (ioctl(devfd, DIOCGSECTORSIZE, &secsize) == -1) return (0); fsrbuf = Malloc(secsize); if (fsrbuf == NULL) errx(EEXIT, "calcsb: cannot allocate recovery buffer"); if (blread(devfd, fsrbuf, (SBLOCK_UFS2 - secsize) / dev_bsize, secsize) != 0) return (0); fsr = (struct fsrecovery *)&fsrbuf[secsize - sizeof *fsr]; if (fsr->fsr_magic != FS_UFS2_MAGIC) return (0); memset(fs, 0, sizeof(struct fs)); fs->fs_fpg = fsr->fsr_fpg; fs->fs_fsbtodb = fsr->fsr_fsbtodb; fs->fs_sblkno = fsr->fsr_sblkno; fs->fs_magic = fsr->fsr_magic; fs->fs_ncg = fsr->fsr_ncg; free(fsrbuf); return (1); } /* * Check to see if recovery information exists. * Return 1 if it exists or cannot be created. * Return 0 if it does not exist and can be created. */ static int chkrecovery(int devfd) { struct fsrecovery *fsr; char *fsrbuf; u_int secsize; /* * Could not determine if backup material exists, so do not * offer to create it. */ if (ioctl(devfd, DIOCGSECTORSIZE, &secsize) == -1 || (fsrbuf = Malloc(secsize)) == NULL || blread(devfd, fsrbuf, (SBLOCK_UFS2 - secsize) / dev_bsize, secsize) != 0) return (1); /* * Recovery material has already been created, so do not * need to create it again. */ fsr = (struct fsrecovery *)&fsrbuf[secsize - sizeof *fsr]; if (fsr->fsr_magic == FS_UFS2_MAGIC) { free(fsrbuf); return (1); } /* * Recovery material has not been created and can be if desired. */ free(fsrbuf); return (0); } /* * Read the last sector of the boot block, replace the last * 20 bytes with the recovery information, then write it back. * The recovery information only works for UFS2 filesystems. */ static void saverecovery(int readfd, int writefd) { struct fsrecovery *fsr; char *fsrbuf; u_int secsize; if (sblock.fs_magic != FS_UFS2_MAGIC || ioctl(readfd, DIOCGSECTORSIZE, &secsize) == -1 || (fsrbuf = Malloc(secsize)) == NULL || blread(readfd, fsrbuf, (SBLOCK_UFS2 - secsize) / dev_bsize, secsize) != 0) { printf("RECOVERY DATA COULD NOT BE CREATED\n"); return; } fsr = (struct fsrecovery *)&fsrbuf[secsize - sizeof *fsr]; fsr->fsr_magic = sblock.fs_magic; fsr->fsr_fpg = sblock.fs_fpg; fsr->fsr_fsbtodb = sblock.fs_fsbtodb; fsr->fsr_sblkno = sblock.fs_sblkno; fsr->fsr_ncg = sblock.fs_ncg; blwrite(writefd, fsrbuf, (SBLOCK_UFS2 - secsize) / secsize, secsize); free(fsrbuf); } Index: head/sbin/fsirand/Makefile =================================================================== --- head/sbin/fsirand/Makefile (revision 328425) +++ head/sbin/fsirand/Makefile (revision 328426) @@ -1,9 +1,10 @@ # $OpenBSD: Makefile,v 1.1 1997/01/26 02:23:20 millert Exp $ # $FreeBSD$ PACKAGE=runtime PROG= fsirand MAN= fsirand.8 +LIBADD= ufs WARNS?= 3 .include Index: head/sbin/fsirand/fsirand.c =================================================================== --- head/sbin/fsirand/fsirand.c (revision 328425) +++ head/sbin/fsirand/fsirand.c (revision 328426) @@ -1,305 +1,234 @@ /* $OpenBSD: fsirand.c,v 1.9 1997/02/28 00:46:33 millert Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997 Todd C. Miller * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Todd C. Miller. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef lint static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include +#include #include #include #include #include #include #include static void usage(void) __dead2; int fsirand(char *); -/* - * Possible superblock locations ordered from most to least likely. - */ -static int sblock_try[] = SBLOCKSEARCH; - static int printonly = 0, force = 0, ignorelabel = 0; int main(int argc, char *argv[]) { int n, ex = 0; struct rlimit rl; while ((n = getopt(argc, argv, "bfp")) != -1) { switch (n) { case 'b': ignorelabel++; break; case 'p': printonly++; break; case 'f': force++; break; default: usage(); } } if (argc - optind < 1) usage(); srandomdev(); /* Increase our data size to the max */ if (getrlimit(RLIMIT_DATA, &rl) == 0) { rl.rlim_cur = rl.rlim_max; if (setrlimit(RLIMIT_DATA, &rl) < 0) warn("can't get resource limit to max data size"); } else warn("can't get resource limit for data size"); for (n = optind; n < argc; n++) { if (argc - optind != 1) (void)puts(argv[n]); ex += fsirand(argv[n]); if (n < argc - 1) putchar('\n'); } exit(ex); } int fsirand(char *device) { struct ufs1_dinode *dp1; struct ufs2_dinode *dp2; caddr_t inodebuf; ssize_t ibufsize; struct fs *sblock; ino_t inumber; - ufs2_daddr_t sblockloc, dblk; - char sbuf[SBLOCKSIZE], sbuftmp[SBLOCKSIZE]; - int i, devfd, n, cg; + ufs2_daddr_t dblk; + int devfd, n, cg, ret; u_int32_t bsize = DEV_BSIZE; if ((devfd = open(device, printonly ? O_RDONLY : O_RDWR)) < 0) { warn("can't open %s", device); return (1); } dp1 = NULL; dp2 = NULL; /* Read in master superblock */ - (void)memset(&sbuf, 0, sizeof(sbuf)); - sblock = (struct fs *)&sbuf; - for (i = 0; sblock_try[i] != -1; i++) { - sblockloc = sblock_try[i]; - if (lseek(devfd, sblockloc, SEEK_SET) == -1) { - warn("can't seek to superblock (%jd) on %s", - (intmax_t)sblockloc, device); + if ((ret = sbget(devfd, &sblock, -1)) != 0) { + switch (ret) { + case ENOENT: + warn("Cannot find file system superblock"); return (1); - } - if ((n = read(devfd, (void *)sblock, SBLOCKSIZE))!=SBLOCKSIZE) { - warnx("can't read superblock on %s: %s", device, - (n < SBLOCKSIZE) ? "short read" : strerror(errno)); + default: + warn("Unable to read file system superblock"); return (1); } - if ((sblock->fs_magic == FS_UFS1_MAGIC || - (sblock->fs_magic == FS_UFS2_MAGIC && - sblock->fs_sblockloc == sblock_try[i])) && - sblock->fs_bsize <= MAXBSIZE && - sblock->fs_bsize >= (ssize_t)sizeof(struct fs)) - break; } - if (sblock_try[i] == -1) { - fprintf(stderr, "Cannot find file system superblock\n"); - return (1); - } if (sblock->fs_magic == FS_UFS1_MAGIC && sblock->fs_old_inodefmt < FS_44INODEFMT) { warnx("file system format is too old, sorry"); return (1); } if (!force && !printonly && sblock->fs_clean != 1) { warnx("file system is not clean, fsck %s first", device); return (1); } - /* Make sure backup superblocks are sane. */ - sblock = (struct fs *)&sbuftmp; - for (cg = 0; cg < (int)sblock->fs_ncg; cg++) { - dblk = fsbtodb(sblock, cgsblock(sblock, cg)); - if (lseek(devfd, (off_t)dblk * bsize, SEEK_SET) < 0) { - warn("can't seek to %jd", (intmax_t)dblk * bsize); - return (1); - } else if ((n = write(devfd, (void *)sblock, SBLOCKSIZE)) != SBLOCKSIZE) { - warn("can't read backup superblock %d on %s: %s", - cg + 1, device, (n < SBLOCKSIZE) ? "short write" - : strerror(errno)); - return (1); - } - if (sblock->fs_magic != FS_UFS1_MAGIC && - sblock->fs_magic != FS_UFS2_MAGIC) { - warnx("bad magic number in backup superblock %d on %s", - cg + 1, device); - return (1); - } - if (sblock->fs_sbsize > SBLOCKSIZE) { - warnx("size of backup superblock %d on %s is preposterous", - cg + 1, device); - return (1); - } - } - sblock = (struct fs *)&sbuf; - /* XXX - should really cap buffer at 512kb or so */ if (sblock->fs_magic == FS_UFS1_MAGIC) ibufsize = sizeof(struct ufs1_dinode) * sblock->fs_ipg; else ibufsize = sizeof(struct ufs2_dinode) * sblock->fs_ipg; if ((inodebuf = malloc(ibufsize)) == NULL) errx(1, "can't allocate memory for inode buffer"); if (printonly && (sblock->fs_id[0] || sblock->fs_id[1])) { if (sblock->fs_id[0]) (void)printf("%s was randomized on %s", device, ctime((void *)&(sblock->fs_id[0]))); (void)printf("fsid: %x %x\n", sblock->fs_id[0], sblock->fs_id[1]); } /* Randomize fs_id unless old 4.2BSD file system */ if (!printonly) { /* Randomize fs_id and write out new sblock and backups */ sblock->fs_id[0] = (u_int32_t)time(NULL); sblock->fs_id[1] = random(); - - if (lseek(devfd, sblockloc, SEEK_SET) == -1) { - warn("can't seek to superblock (%jd) on %s", - (intmax_t)sblockloc, device); + if (sbput(devfd, sblock, sblock->fs_ncg) != 0) { + warn("could not write updated superblock"); return (1); } - if ((n = write(devfd, (void *)sblock, SBLOCKSIZE)) != - SBLOCKSIZE) { - warn("can't write superblock on %s: %s", device, - (n < SBLOCKSIZE) ? "short write" : strerror(errno)); - return (1); - } } /* For each cylinder group, randomize inodes and update backup sblock */ for (cg = 0, inumber = 0; cg < (int)sblock->fs_ncg; cg++) { - /* Update superblock if appropriate */ - if (!printonly) { - dblk = fsbtodb(sblock, cgsblock(sblock, cg)); - if (lseek(devfd, (off_t)dblk * bsize, SEEK_SET) < 0) { - warn("can't seek to %jd", - (intmax_t)dblk * bsize); - return (1); - } else if ((n = write(devfd, (void *)sblock, - SBLOCKSIZE)) != SBLOCKSIZE) { - warn("can't write backup superblock %d on %s: %s", - cg + 1, device, (n < SBLOCKSIZE) ? - "short write" : strerror(errno)); - return (1); - } - } - /* Read in inodes, then print or randomize generation nums */ dblk = fsbtodb(sblock, ino_to_fsba(sblock, inumber)); if (lseek(devfd, (off_t)dblk * bsize, SEEK_SET) < 0) { warn("can't seek to %jd", (intmax_t)dblk * bsize); return (1); } else if ((n = read(devfd, inodebuf, ibufsize)) != ibufsize) { warnx("can't read inodes: %s", (n < ibufsize) ? "short read" : strerror(errno)); return (1); } for (n = 0; n < (int)sblock->fs_ipg; n++, inumber++) { if (sblock->fs_magic == FS_UFS1_MAGIC) dp1 = &((struct ufs1_dinode *)inodebuf)[n]; else dp2 = &((struct ufs2_dinode *)inodebuf)[n]; if (inumber >= UFS_ROOTINO) { if (printonly) (void)printf("ino %ju gen %08x\n", (uintmax_t)inumber, sblock->fs_magic == FS_UFS1_MAGIC ? dp1->di_gen : dp2->di_gen); else if (sblock->fs_magic == FS_UFS1_MAGIC) dp1->di_gen = random(); else dp2->di_gen = random(); } } /* Write out modified inodes */ if (!printonly) { if (lseek(devfd, (off_t)dblk * bsize, SEEK_SET) < 0) { warn("can't seek to %jd", (intmax_t)dblk * bsize); return (1); } else if ((n = write(devfd, inodebuf, ibufsize)) != ibufsize) { warnx("can't write inodes: %s", (n != ibufsize) ? "short write" : strerror(errno)); return (1); } } } (void)close(devfd); return(0); } static void usage(void) { (void)fprintf(stderr, "usage: fsirand [-b] [-f] [-p] special [special ...]\n"); exit(1); } Index: head/sbin/growfs/growfs.c =================================================================== --- head/sbin/growfs/growfs.c (revision 328425) +++ head/sbin/growfs/growfs.c (revision 328426) @@ -1,1760 +1,1737 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1980, 1989, 1993 The Regents of the University of California. * Copyright (c) 2000 Christoph Herrmann, Thomas-Henning von Kamptz * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This code is derived from software contributed to Berkeley by * Christoph Herrmann and Thomas-Henning von Kamptz, Munich and Frankfurt. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgment: * This product includes software developed by the University of * California, Berkeley and its contributors, as well as Christoph * Herrmann and Thomas-Henning von Kamptz. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $TSHeader: src/sbin/growfs/growfs.c,v 1.5 2000/12/12 19:31:00 tomsoft Exp $ * */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 2000 Christoph Herrmann, Thomas-Henning von Kamptz\n\ Copyright (c) 1980, 1989, 1993 The Regents of the University of California.\n\ All rights reserved.\n"; #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "debug.h" #ifdef FS_DEBUG int _dbg_lvl_ = (DL_INFO); /* DL_TRC */ #endif /* FS_DEBUG */ static union { struct fs fs; char pad[SBLOCKSIZE]; } fsun1, fsun2; #define sblock fsun1.fs /* the new superblock */ #define osblock fsun2.fs /* the old superblock */ -/* - * Possible superblock locations ordered from most to least likely. - */ -static int sblock_try[] = SBLOCKSEARCH; -static ufs2_daddr_t sblockloc; - static union { struct cg cg; char pad[MAXBSIZE]; } cgun1, cgun2; #define acg cgun1.cg /* a cylinder cgroup (new) */ #define aocg cgun2.cg /* an old cylinder group */ static struct csum *fscs; /* cylinder summary */ static void growfs(int, int, unsigned int); static void rdfs(ufs2_daddr_t, size_t, void *, int); static void wtfs(ufs2_daddr_t, size_t, void *, int, unsigned int); static int charsperline(void); static void usage(void); static int isblock(struct fs *, unsigned char *, int); static void clrblock(struct fs *, unsigned char *, int); static void setblock(struct fs *, unsigned char *, int); static void initcg(int, time_t, int, unsigned int); static void updjcg(int, time_t, int, int, unsigned int); static void updcsloc(time_t, int, int, unsigned int); static void frag_adjust(ufs2_daddr_t, int); static void updclst(int); static void mount_reload(const struct statfs *stfs); static void cgckhash(struct cg *); /* * Here we actually start growing the file system. We basically read the * cylinder summary from the first cylinder group as we want to update * this on the fly during our various operations. First we handle the * changes in the former last cylinder group. Afterwards we create all new * cylinder groups. Now we handle the cylinder group containing the * cylinder summary which might result in a relocation of the whole * structure. In the end we write back the updated cylinder summary, the * new superblock, and slightly patched versions of the super block * copies. */ static void growfs(int fsi, int fso, unsigned int Nflag) { DBG_FUNC("growfs") time_t modtime; uint cylno; int i, j, width; char tmpbuf[100]; DBG_ENTER; time(&modtime); /* * Get the cylinder summary into the memory. */ fscs = (struct csum *)calloc((size_t)1, (size_t)sblock.fs_cssize); if (fscs == NULL) errx(1, "calloc failed"); - for (i = 0; i < osblock.fs_cssize; i += osblock.fs_bsize) { - rdfs(fsbtodb(&osblock, osblock.fs_csaddr + - numfrags(&osblock, i)), (size_t)MIN(osblock.fs_cssize - i, - osblock.fs_bsize), (void *)(((char *)fscs) + i), fsi); - } + memcpy(fscs, osblock.fs_csp, osblock.fs_cssize); + free(osblock.fs_csp); + osblock.fs_csp = NULL; + sblock.fs_csp = fscs; #ifdef FS_DEBUG { struct csum *dbg_csp; u_int32_t dbg_csc; char dbg_line[80]; dbg_csp = fscs; for (dbg_csc = 0; dbg_csc < osblock.fs_ncg; dbg_csc++) { snprintf(dbg_line, sizeof(dbg_line), "%d. old csum in old location", dbg_csc); DBG_DUMP_CSUM(&osblock, dbg_line, dbg_csp++); } } #endif /* FS_DEBUG */ DBG_PRINT0("fscs read\n"); /* * Do all needed changes in the former last cylinder group. */ updjcg(osblock.fs_ncg - 1, modtime, fsi, fso, Nflag); /* * Dump out summary information about file system. */ #ifdef FS_DEBUG #define B2MBFACTOR (1 / (1024.0 * 1024.0)) printf("growfs: %.1fMB (%jd sectors) block size %d, fragment size %d\n", (float)sblock.fs_size * sblock.fs_fsize * B2MBFACTOR, (intmax_t)fsbtodb(&sblock, sblock.fs_size), sblock.fs_bsize, sblock.fs_fsize); printf("\tusing %d cylinder groups of %.2fMB, %d blks, %d inodes.\n", sblock.fs_ncg, (float)sblock.fs_fpg * sblock.fs_fsize * B2MBFACTOR, sblock.fs_fpg / sblock.fs_frag, sblock.fs_ipg); if (sblock.fs_flags & FS_DOSOFTDEP) printf("\twith soft updates\n"); #undef B2MBFACTOR #endif /* FS_DEBUG */ /* * Now build the cylinders group blocks and * then print out indices of cylinder groups. */ printf("super-block backups (for fsck_ffs -b #) at:\n"); i = 0; width = charsperline(); /* * Iterate for only the new cylinder groups. */ for (cylno = osblock.fs_ncg; cylno < sblock.fs_ncg; cylno++) { initcg(cylno, modtime, fso, Nflag); j = sprintf(tmpbuf, " %jd%s", (intmax_t)fsbtodb(&sblock, cgsblock(&sblock, cylno)), cylno < (sblock.fs_ncg - 1) ? "," : "" ); if (i + j >= width) { printf("\n"); i = 0; } i += j; printf("%s", tmpbuf); fflush(stdout); } printf("\n"); /* * Do all needed changes in the first cylinder group. * allocate blocks in new location */ updcsloc(modtime, fsi, fso, Nflag); /* - * Now write the cylinder summary back to disk. - */ - for (i = 0; i < sblock.fs_cssize; i += sblock.fs_bsize) { - wtfs(fsbtodb(&sblock, sblock.fs_csaddr + numfrags(&sblock, i)), - (size_t)MIN(sblock.fs_cssize - i, sblock.fs_bsize), - (void *)(((char *)fscs) + i), fso, Nflag); - } - DBG_PRINT0("fscs written\n"); - -#ifdef FS_DEBUG - { - struct csum *dbg_csp; - u_int32_t dbg_csc; - char dbg_line[80]; - - dbg_csp = fscs; - for (dbg_csc = 0; dbg_csc < sblock.fs_ncg; dbg_csc++) { - snprintf(dbg_line, sizeof(dbg_line), - "%d. new csum in new location", dbg_csc); - DBG_DUMP_CSUM(&sblock, dbg_line, dbg_csp++); - } - } -#endif /* FS_DEBUG */ - - /* - * Now write the new superblock back to disk. - */ - sblock.fs_time = modtime; - wtfs(sblockloc, (size_t)SBLOCKSIZE, (void *)&sblock, fso, Nflag); - DBG_PRINT0("sblock written\n"); - DBG_DUMP_FS(&sblock, "new initial sblock"); - - /* - * Clean up the dynamic fields in our superblock copies. - */ - sblock.fs_fmod = 0; - sblock.fs_clean = 1; - sblock.fs_ronly = 0; - sblock.fs_cgrotor = 0; - sblock.fs_state = 0; - memset((void *)&sblock.fs_fsmnt, 0, sizeof(sblock.fs_fsmnt)); - sblock.fs_flags &= FS_DOSOFTDEP; - - /* + * Clean up the dynamic fields in our superblock. + * * XXX * The following fields are currently distributed from the superblock * to the copies: * fs_minfree * fs_rotdelay * fs_maxcontig * fs_maxbpg * fs_minfree, * fs_optim - * fs_flags regarding SOFTPDATES + * fs_flags * * We probably should rather change the summary for the cylinder group * statistics here to the value of what would be in there, if the file * system were created initially with the new size. Therefor we still * need to find an easy way of calculating that. * Possibly we can try to read the first superblock copy and apply the * "diffed" stats between the old and new superblock by still copying * certain parameters onto that. */ + sblock.fs_time = modtime; + sblock.fs_fmod = 0; + sblock.fs_clean = 1; + sblock.fs_ronly = 0; + sblock.fs_cgrotor = 0; + sblock.fs_state = 0; + memset((void *)&sblock.fs_fsmnt, 0, sizeof(sblock.fs_fsmnt)); /* - * Write out the duplicate super blocks. + * Now write the new superblock, its summary information, + * and all the alternates back to disk. */ - for (cylno = 0; cylno < sblock.fs_ncg; cylno++) { - wtfs(fsbtodb(&sblock, cgsblock(&sblock, cylno)), - (size_t)SBLOCKSIZE, (void *)&sblock, fso, Nflag); + if (!Nflag && sbput(fso, &sblock, sblock.fs_ncg) != 0) + errc(2, EIO, "could not write updated superblock"); + DBG_PRINT0("fscs written\n"); + +#ifdef FS_DEBUG + { + struct csum *dbg_csp; + u_int32_t dbg_csc; + char dbg_line[80]; + + dbg_csp = fscs; + for (dbg_csc = 0; dbg_csc < sblock.fs_ncg; dbg_csc++) { + snprintf(dbg_line, sizeof(dbg_line), + "%d. new csum in new location", dbg_csc); + DBG_DUMP_CSUM(&sblock, dbg_line, dbg_csp++); + } } +#endif /* FS_DEBUG */ + + DBG_PRINT0("sblock written\n"); + DBG_DUMP_FS(&sblock, "new initial sblock"); + DBG_PRINT0("sblock copies written\n"); DBG_DUMP_FS(&sblock, "new other sblocks"); DBG_LEAVE; return; } /* * This creates a new cylinder group structure, for more details please see * the source of newfs(8), as this function is taken over almost unchanged. * As this is never called for the first cylinder group, the special * provisions for that case are removed here. */ static void initcg(int cylno, time_t modtime, int fso, unsigned int Nflag) { DBG_FUNC("initcg") static caddr_t iobuf; long blkno, start; ino_t ino; ufs2_daddr_t i, cbase, dmax; struct ufs1_dinode *dp1; struct csum *cs; uint j, d, dupper, dlower; if (iobuf == NULL && (iobuf = malloc(sblock.fs_bsize * 3)) == NULL) errx(37, "panic: cannot allocate I/O buffer"); /* * Determine block bounds for cylinder group. * Allow space for super block summary information in first * cylinder group. */ cbase = cgbase(&sblock, cylno); dmax = cbase + sblock.fs_fpg; if (dmax > sblock.fs_size) dmax = sblock.fs_size; dlower = cgsblock(&sblock, cylno) - cbase; dupper = cgdmin(&sblock, cylno) - cbase; if (cylno == 0) /* XXX fscs may be relocated */ dupper += howmany(sblock.fs_cssize, sblock.fs_fsize); cs = &fscs[cylno]; memset(&acg, 0, sblock.fs_cgsize); acg.cg_time = modtime; acg.cg_magic = CG_MAGIC; acg.cg_cgx = cylno; acg.cg_niblk = sblock.fs_ipg; acg.cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); acg.cg_ndblk = dmax - cbase; if (sblock.fs_contigsumsize > 0) acg.cg_nclusterblks = acg.cg_ndblk / sblock.fs_frag; start = &acg.cg_space[0] - (u_char *)(&acg.cg_firstfield); if (sblock.fs_magic == FS_UFS2_MAGIC) { acg.cg_iusedoff = start; } else { acg.cg_old_ncyl = sblock.fs_old_cpg; acg.cg_old_time = acg.cg_time; acg.cg_time = 0; acg.cg_old_niblk = acg.cg_niblk; acg.cg_niblk = 0; acg.cg_initediblk = 0; acg.cg_old_btotoff = start; acg.cg_old_boff = acg.cg_old_btotoff + sblock.fs_old_cpg * sizeof(int32_t); acg.cg_iusedoff = acg.cg_old_boff + sblock.fs_old_cpg * sizeof(u_int16_t); } acg.cg_freeoff = acg.cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); acg.cg_nextfreeoff = acg.cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT); if (sblock.fs_contigsumsize > 0) { acg.cg_clustersumoff = roundup(acg.cg_nextfreeoff, sizeof(u_int32_t)); acg.cg_clustersumoff -= sizeof(u_int32_t); acg.cg_clusteroff = acg.cg_clustersumoff + (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); acg.cg_nextfreeoff = acg.cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); } if (acg.cg_nextfreeoff > (unsigned)sblock.fs_cgsize) { /* * This should never happen as we would have had that panic * already on file system creation */ errx(37, "panic: cylinder group too big"); } acg.cg_cs.cs_nifree += sblock.fs_ipg; if (cylno == 0) for (ino = 0; ino < UFS_ROOTINO; ino++) { setbit(cg_inosused(&acg), ino); acg.cg_cs.cs_nifree--; } /* * For the old file system, we have to initialize all the inodes. */ if (sblock.fs_magic == FS_UFS1_MAGIC) { bzero(iobuf, sblock.fs_bsize); for (i = 0; i < sblock.fs_ipg / INOPF(&sblock); i += sblock.fs_frag) { dp1 = (struct ufs1_dinode *)(void *)iobuf; for (j = 0; j < INOPB(&sblock); j++) { dp1->di_gen = arc4random(); dp1++; } wtfs(fsbtodb(&sblock, cgimin(&sblock, cylno) + i), sblock.fs_bsize, iobuf, fso, Nflag); } } if (cylno > 0) { /* * In cylno 0, beginning space is reserved * for boot and super blocks. */ for (d = 0; d < dlower; d += sblock.fs_frag) { blkno = d / sblock.fs_frag; setblock(&sblock, cg_blksfree(&acg), blkno); if (sblock.fs_contigsumsize > 0) setbit(cg_clustersfree(&acg), blkno); acg.cg_cs.cs_nbfree++; } sblock.fs_dsize += dlower; } sblock.fs_dsize += acg.cg_ndblk - dupper; if ((i = dupper % sblock.fs_frag)) { acg.cg_frsum[sblock.fs_frag - i]++; for (d = dupper + sblock.fs_frag - i; dupper < d; dupper++) { setbit(cg_blksfree(&acg), dupper); acg.cg_cs.cs_nffree++; } } for (d = dupper; d + sblock.fs_frag <= acg.cg_ndblk; d += sblock.fs_frag) { blkno = d / sblock.fs_frag; setblock(&sblock, cg_blksfree(&acg), blkno); if (sblock.fs_contigsumsize > 0) setbit(cg_clustersfree(&acg), blkno); acg.cg_cs.cs_nbfree++; } if (d < acg.cg_ndblk) { acg.cg_frsum[acg.cg_ndblk - d]++; for (; d < acg.cg_ndblk; d++) { setbit(cg_blksfree(&acg), d); acg.cg_cs.cs_nffree++; } } if (sblock.fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(&acg); u_char *mapp = cg_clustersfree(&acg); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < acg.cg_nclusterblks; i++) { if ((map & bit) != 0) run++; else if (run != 0) { if (run > sblock.fs_contigsumsize) run = sblock.fs_contigsumsize; sump[run]++; run = 0; } if ((i & (CHAR_BIT - 1)) != CHAR_BIT - 1) bit <<= 1; else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > sblock.fs_contigsumsize) run = sblock.fs_contigsumsize; sump[run]++; } } sblock.fs_cstotal.cs_ndir += acg.cg_cs.cs_ndir; sblock.fs_cstotal.cs_nffree += acg.cg_cs.cs_nffree; sblock.fs_cstotal.cs_nbfree += acg.cg_cs.cs_nbfree; sblock.fs_cstotal.cs_nifree += acg.cg_cs.cs_nifree; *cs = acg.cg_cs; cgckhash(&acg); memcpy(iobuf, &acg, sblock.fs_cgsize); memset(iobuf + sblock.fs_cgsize, '\0', sblock.fs_bsize * 3 - sblock.fs_cgsize); wtfs(fsbtodb(&sblock, cgtod(&sblock, cylno)), sblock.fs_bsize * 3, iobuf, fso, Nflag); DBG_DUMP_CG(&sblock, "new cg", &acg); DBG_LEAVE; return; } /* * Here we add or subtract (sign +1/-1) the available fragments in a given * block to or from the fragment statistics. By subtracting before and adding * after an operation on the free frag map we can easy update the fragment * statistic, which seems to be otherwise a rather complex operation. */ static void frag_adjust(ufs2_daddr_t frag, int sign) { DBG_FUNC("frag_adjust") int fragsize; int f; DBG_ENTER; fragsize = 0; /* * Here frag only needs to point to any fragment in the block we want * to examine. */ for (f = rounddown(frag, sblock.fs_frag); f < roundup(frag + 1, sblock.fs_frag); f++) { /* * Count contiguous free fragments. */ if (isset(cg_blksfree(&acg), f)) { fragsize++; } else { if (fragsize && fragsize < sblock.fs_frag) { /* * We found something in between. */ acg.cg_frsum[fragsize] += sign; DBG_PRINT2("frag_adjust [%d]+=%d\n", fragsize, sign); } fragsize = 0; } } if (fragsize && fragsize < sblock.fs_frag) { /* * We found something. */ acg.cg_frsum[fragsize] += sign; DBG_PRINT2("frag_adjust [%d]+=%d\n", fragsize, sign); } DBG_PRINT2("frag_adjust [[%d]]+=%d\n", fragsize, sign); DBG_LEAVE; return; } /* * Here we do all needed work for the former last cylinder group. It has to be * changed in any case, even if the file system ended exactly on the end of * this group, as there is some slightly inconsistent handling of the number * of cylinders in the cylinder group. We start again by reading the cylinder * group from disk. If the last block was not fully available, we first handle * the missing fragments, then we handle all new full blocks in that file * system and finally we handle the new last fragmented block in the file * system. We again have to handle the fragment statistics rotational layout * tables and cluster summary during all those operations. */ static void updjcg(int cylno, time_t modtime, int fsi, int fso, unsigned int Nflag) { DBG_FUNC("updjcg") ufs2_daddr_t cbase, dmax, dupper; struct csum *cs; int i, k; int j = 0; DBG_ENTER; /* * Read the former last (joining) cylinder group from disk, and make * a copy. */ rdfs(fsbtodb(&osblock, cgtod(&osblock, cylno)), (size_t)osblock.fs_cgsize, (void *)&aocg, fsi); DBG_PRINT0("jcg read\n"); DBG_DUMP_CG(&sblock, "old joining cg", &aocg); memcpy((void *)&cgun1, (void *)&cgun2, sizeof(cgun2)); /* * If the cylinder group had already its new final size almost * nothing is to be done ... except: * For some reason the value of cg_ncyl in the last cylinder group has * to be zero instead of fs_cpg. As this is now no longer the last * cylinder group we have to change that value now to fs_cpg. */ if (cgbase(&osblock, cylno + 1) == osblock.fs_size) { if (sblock.fs_magic == FS_UFS1_MAGIC) acg.cg_old_ncyl = sblock.fs_old_cpg; wtfs(fsbtodb(&sblock, cgtod(&sblock, cylno)), (size_t)sblock.fs_cgsize, (void *)&acg, fso, Nflag); DBG_PRINT0("jcg written\n"); DBG_DUMP_CG(&sblock, "new joining cg", &acg); DBG_LEAVE; return; } /* * Set up some variables needed later. */ cbase = cgbase(&sblock, cylno); dmax = cbase + sblock.fs_fpg; if (dmax > sblock.fs_size) dmax = sblock.fs_size; dupper = cgdmin(&sblock, cylno) - cbase; if (cylno == 0) /* XXX fscs may be relocated */ dupper += howmany(sblock.fs_cssize, sblock.fs_fsize); /* * Set pointer to the cylinder summary for our cylinder group. */ cs = fscs + cylno; /* * Touch the cylinder group, update all fields in the cylinder group as * needed, update the free space in the superblock. */ acg.cg_time = modtime; if ((unsigned)cylno == sblock.fs_ncg - 1) { /* * This is still the last cylinder group. */ if (sblock.fs_magic == FS_UFS1_MAGIC) acg.cg_old_ncyl = sblock.fs_old_ncyl % sblock.fs_old_cpg; } else { acg.cg_old_ncyl = sblock.fs_old_cpg; } DBG_PRINT2("jcg dbg: %d %u", cylno, sblock.fs_ncg); #ifdef FS_DEBUG if (sblock.fs_magic == FS_UFS1_MAGIC) DBG_PRINT2("%d %u", acg.cg_old_ncyl, sblock.fs_old_cpg); #endif DBG_PRINT0("\n"); acg.cg_ndblk = dmax - cbase; sblock.fs_dsize += acg.cg_ndblk - aocg.cg_ndblk; if (sblock.fs_contigsumsize > 0) acg.cg_nclusterblks = acg.cg_ndblk / sblock.fs_frag; /* * Now we have to update the free fragment bitmap for our new free * space. There again we have to handle the fragmentation and also * the rotational layout tables and the cluster summary. This is * also done per fragment for the first new block if the old file * system end was not on a block boundary, per fragment for the new * last block if the new file system end is not on a block boundary, * and per block for all space in between. * * Handle the first new block here if it was partially available * before. */ if (osblock.fs_size % sblock.fs_frag) { if (roundup(osblock.fs_size, sblock.fs_frag) <= sblock.fs_size) { /* * The new space is enough to fill at least this * block */ j = 0; for (i = roundup(osblock.fs_size - cbase, sblock.fs_frag) - 1; i >= osblock.fs_size - cbase; i--) { setbit(cg_blksfree(&acg), i); acg.cg_cs.cs_nffree++; j++; } /* * Check if the fragment just created could join an * already existing fragment at the former end of the * file system. */ if (isblock(&sblock, cg_blksfree(&acg), ((osblock.fs_size - cgbase(&sblock, cylno)) / sblock.fs_frag))) { /* * The block is now completely available. */ DBG_PRINT0("block was\n"); acg.cg_frsum[osblock.fs_size % sblock.fs_frag]--; acg.cg_cs.cs_nbfree++; acg.cg_cs.cs_nffree -= sblock.fs_frag; k = rounddown(osblock.fs_size - cbase, sblock.fs_frag); updclst((osblock.fs_size - cbase) / sblock.fs_frag); } else { /* * Lets rejoin a possible partially growed * fragment. */ k = 0; while (isset(cg_blksfree(&acg), i) && (i >= rounddown(osblock.fs_size - cbase, sblock.fs_frag))) { i--; k++; } if (k) acg.cg_frsum[k]--; acg.cg_frsum[k + j]++; } } else { /* * We only grow by some fragments within this last * block. */ for (i = sblock.fs_size - cbase - 1; i >= osblock.fs_size - cbase; i--) { setbit(cg_blksfree(&acg), i); acg.cg_cs.cs_nffree++; j++; } /* * Lets rejoin a possible partially growed fragment. */ k = 0; while (isset(cg_blksfree(&acg), i) && (i >= rounddown(osblock.fs_size - cbase, sblock.fs_frag))) { i--; k++; } if (k) acg.cg_frsum[k]--; acg.cg_frsum[k + j]++; } } /* * Handle all new complete blocks here. */ for (i = roundup(osblock.fs_size - cbase, sblock.fs_frag); i + sblock.fs_frag <= dmax - cbase; /* XXX <= or only < ? */ i += sblock.fs_frag) { j = i / sblock.fs_frag; setblock(&sblock, cg_blksfree(&acg), j); updclst(j); acg.cg_cs.cs_nbfree++; } /* * Handle the last new block if there are stll some new fragments left. * Here we don't have to bother about the cluster summary or the even * the rotational layout table. */ if (i < (dmax - cbase)) { acg.cg_frsum[dmax - cbase - i]++; for (; i < dmax - cbase; i++) { setbit(cg_blksfree(&acg), i); acg.cg_cs.cs_nffree++; } } sblock.fs_cstotal.cs_nffree += (acg.cg_cs.cs_nffree - aocg.cg_cs.cs_nffree); sblock.fs_cstotal.cs_nbfree += (acg.cg_cs.cs_nbfree - aocg.cg_cs.cs_nbfree); /* * The following statistics are not changed here: * sblock.fs_cstotal.cs_ndir * sblock.fs_cstotal.cs_nifree * As the statistics for this cylinder group are ready, copy it to * the summary information array. */ *cs = acg.cg_cs; /* * Write the updated "joining" cylinder group back to disk. */ cgckhash(&acg); wtfs(fsbtodb(&sblock, cgtod(&sblock, cylno)), (size_t)sblock.fs_cgsize, (void *)&acg, fso, Nflag); DBG_PRINT0("jcg written\n"); DBG_DUMP_CG(&sblock, "new joining cg", &acg); DBG_LEAVE; return; } /* * Here we update the location of the cylinder summary. We have two possible * ways of growing the cylinder summary: * (1) We can try to grow the summary in the current location, and relocate * possibly used blocks within the current cylinder group. * (2) Alternatively we can relocate the whole cylinder summary to the first * new completely empty cylinder group. Once the cylinder summary is no * longer in the beginning of the first cylinder group you should never * use a version of fsck which is not aware of the possibility to have * this structure in a non standard place. * Option (2) is considered to be less intrusive to the structure of the file- * system, so that's the one being used. */ static void updcsloc(time_t modtime, int fsi, int fso, unsigned int Nflag) { DBG_FUNC("updcsloc") struct csum *cs; int ocscg, ncscg; ufs2_daddr_t d; int lcs = 0; int block; DBG_ENTER; if (howmany(sblock.fs_cssize, sblock.fs_fsize) == howmany(osblock.fs_cssize, osblock.fs_fsize)) { /* * No new fragment needed. */ DBG_LEAVE; return; } ocscg = dtog(&osblock, osblock.fs_csaddr); cs = fscs + ocscg; /* * Read original cylinder group from disk, and make a copy. * XXX If Nflag is set in some very rare cases we now miss * some changes done in updjcg by reading the unmodified * block from disk. */ rdfs(fsbtodb(&osblock, cgtod(&osblock, ocscg)), (size_t)osblock.fs_cgsize, (void *)&aocg, fsi); DBG_PRINT0("oscg read\n"); DBG_DUMP_CG(&sblock, "old summary cg", &aocg); memcpy((void *)&cgun1, (void *)&cgun2, sizeof(cgun2)); /* * Touch the cylinder group, set up local variables needed later * and update the superblock. */ acg.cg_time = modtime; /* * XXX In the case of having active snapshots we may need much more * blocks for the copy on write. We need each block twice, and * also up to 8*3 blocks for indirect blocks for all possible * references. */ /* * There is not enough space in the old cylinder group to * relocate all blocks as needed, so we relocate the whole * cylinder group summary to a new group. We try to use the * first complete new cylinder group just created. Within the * cylinder group we align the area immediately after the * cylinder group information location in order to be as * close as possible to the original implementation of ffs. * * First we have to make sure we'll find enough space in the * new cylinder group. If not, then we currently give up. * We start with freeing everything which was used by the * fragments of the old cylinder summary in the current group. * Now we write back the group meta data, read in the needed * meta data from the new cylinder group, and start allocating * within that group. Here we can assume, the group to be * completely empty. Which makes the handling of fragments and * clusters a lot easier. */ DBG_TRC; if (sblock.fs_ncg - osblock.fs_ncg < 2) errx(2, "panic: not enough space"); /* * Point "d" to the first fragment not used by the cylinder * summary. */ d = osblock.fs_csaddr + (osblock.fs_cssize / osblock.fs_fsize); /* * Set up last cluster size ("lcs") already here. Calculate * the size for the trailing cluster just behind where "d" * points to. */ if (sblock.fs_contigsumsize > 0) { for (block = howmany(d % sblock.fs_fpg, sblock.fs_frag), lcs = 0; lcs < sblock.fs_contigsumsize; block++, lcs++) { if (isclr(cg_clustersfree(&acg), block)) break; } } /* * Point "d" to the last frag used by the cylinder summary. */ d--; DBG_PRINT1("d=%jd\n", (intmax_t)d); if ((d + 1) % sblock.fs_frag) { /* * The end of the cylinder summary is not a complete * block. */ DBG_TRC; frag_adjust(d % sblock.fs_fpg, -1); for (; (d + 1) % sblock.fs_frag; d--) { DBG_PRINT1("d=%jd\n", (intmax_t)d); setbit(cg_blksfree(&acg), d % sblock.fs_fpg); acg.cg_cs.cs_nffree++; sblock.fs_cstotal.cs_nffree++; } /* * Point "d" to the last fragment of the last * (incomplete) block of the cylinder summary. */ d++; frag_adjust(d % sblock.fs_fpg, 1); if (isblock(&sblock, cg_blksfree(&acg), (d % sblock.fs_fpg) / sblock.fs_frag)) { DBG_PRINT1("d=%jd\n", (intmax_t)d); acg.cg_cs.cs_nffree -= sblock.fs_frag; acg.cg_cs.cs_nbfree++; sblock.fs_cstotal.cs_nffree -= sblock.fs_frag; sblock.fs_cstotal.cs_nbfree++; if (sblock.fs_contigsumsize > 0) { setbit(cg_clustersfree(&acg), (d % sblock.fs_fpg) / sblock.fs_frag); if (lcs < sblock.fs_contigsumsize) { if (lcs) cg_clustersum(&acg)[lcs]--; lcs++; cg_clustersum(&acg)[lcs]++; } } } /* * Point "d" to the first fragment of the block before * the last incomplete block. */ d--; } DBG_PRINT1("d=%jd\n", (intmax_t)d); for (d = rounddown(d, sblock.fs_frag); d >= osblock.fs_csaddr; d -= sblock.fs_frag) { DBG_TRC; DBG_PRINT1("d=%jd\n", (intmax_t)d); setblock(&sblock, cg_blksfree(&acg), (d % sblock.fs_fpg) / sblock.fs_frag); acg.cg_cs.cs_nbfree++; sblock.fs_cstotal.cs_nbfree++; if (sblock.fs_contigsumsize > 0) { setbit(cg_clustersfree(&acg), (d % sblock.fs_fpg) / sblock.fs_frag); /* * The last cluster size is already set up. */ if (lcs < sblock.fs_contigsumsize) { if (lcs) cg_clustersum(&acg)[lcs]--; lcs++; cg_clustersum(&acg)[lcs]++; } } } *cs = acg.cg_cs; /* * Now write the former cylinder group containing the cylinder * summary back to disk. */ wtfs(fsbtodb(&sblock, cgtod(&sblock, ocscg)), (size_t)sblock.fs_cgsize, (void *)&acg, fso, Nflag); DBG_PRINT0("oscg written\n"); DBG_DUMP_CG(&sblock, "old summary cg", &acg); /* * Find the beginning of the new cylinder group containing the * cylinder summary. */ sblock.fs_csaddr = cgdmin(&sblock, osblock.fs_ncg); ncscg = dtog(&sblock, sblock.fs_csaddr); cs = fscs + ncscg; /* * If Nflag is specified, we would now read random data instead * of an empty cg structure from disk. So we can't simulate that * part for now. */ if (Nflag) { DBG_PRINT0("nscg update skipped\n"); DBG_LEAVE; return; } /* * Read the future cylinder group containing the cylinder * summary from disk, and make a copy. */ rdfs(fsbtodb(&sblock, cgtod(&sblock, ncscg)), (size_t)sblock.fs_cgsize, (void *)&aocg, fsi); DBG_PRINT0("nscg read\n"); DBG_DUMP_CG(&sblock, "new summary cg", &aocg); memcpy((void *)&cgun1, (void *)&cgun2, sizeof(cgun2)); /* * Allocate all complete blocks used by the new cylinder * summary. */ for (d = sblock.fs_csaddr; d + sblock.fs_frag <= sblock.fs_csaddr + (sblock.fs_cssize / sblock.fs_fsize); d += sblock.fs_frag) { clrblock(&sblock, cg_blksfree(&acg), (d % sblock.fs_fpg) / sblock.fs_frag); acg.cg_cs.cs_nbfree--; sblock.fs_cstotal.cs_nbfree--; if (sblock.fs_contigsumsize > 0) { clrbit(cg_clustersfree(&acg), (d % sblock.fs_fpg) / sblock.fs_frag); } } /* * Allocate all fragments used by the cylinder summary in the * last block. */ if (d < sblock.fs_csaddr + (sblock.fs_cssize / sblock.fs_fsize)) { for (; d - sblock.fs_csaddr < sblock.fs_cssize/sblock.fs_fsize; d++) { clrbit(cg_blksfree(&acg), d % sblock.fs_fpg); acg.cg_cs.cs_nffree--; sblock.fs_cstotal.cs_nffree--; } acg.cg_cs.cs_nbfree--; acg.cg_cs.cs_nffree += sblock.fs_frag; sblock.fs_cstotal.cs_nbfree--; sblock.fs_cstotal.cs_nffree += sblock.fs_frag; if (sblock.fs_contigsumsize > 0) clrbit(cg_clustersfree(&acg), (d % sblock.fs_fpg) / sblock.fs_frag); frag_adjust(d % sblock.fs_fpg, 1); } /* * XXX Handle the cluster statistics here in the case this * cylinder group is now almost full, and the remaining * space is less then the maximum cluster size. This is * probably not needed, as you would hardly find a file * system which has only MAXCSBUFS+FS_MAXCONTIG of free * space right behind the cylinder group information in * any new cylinder group. */ /* * Update our statistics in the cylinder summary. */ *cs = acg.cg_cs; /* * Write the new cylinder group containing the cylinder summary * back to disk. */ wtfs(fsbtodb(&sblock, cgtod(&sblock, ncscg)), (size_t)sblock.fs_cgsize, (void *)&acg, fso, Nflag); DBG_PRINT0("nscg written\n"); DBG_DUMP_CG(&sblock, "new summary cg", &acg); DBG_LEAVE; return; } /* * Here we read some block(s) from disk. */ static void rdfs(ufs2_daddr_t bno, size_t size, void *bf, int fsi) { DBG_FUNC("rdfs") ssize_t n; DBG_ENTER; if (bno < 0) err(32, "rdfs: attempting to read negative block number"); if (lseek(fsi, (off_t)bno * DEV_BSIZE, 0) < 0) err(33, "rdfs: seek error: %jd", (intmax_t)bno); n = read(fsi, bf, size); if (n != (ssize_t)size) err(34, "rdfs: read error: %jd", (intmax_t)bno); DBG_LEAVE; return; } /* * Here we write some block(s) to disk. */ static void wtfs(ufs2_daddr_t bno, size_t size, void *bf, int fso, unsigned int Nflag) { DBG_FUNC("wtfs") ssize_t n; DBG_ENTER; if (Nflag) { DBG_LEAVE; return; } if (lseek(fso, (off_t)bno * DEV_BSIZE, SEEK_SET) < 0) err(35, "wtfs: seek error: %ld", (long)bno); n = write(fso, bf, size); if (n != (ssize_t)size) err(36, "wtfs: write error: %ld", (long)bno); DBG_LEAVE; return; } /* * Here we check if all frags of a block are free. For more details again * please see the source of newfs(8), as this function is taken over almost * unchanged. */ static int isblock(struct fs *fs, unsigned char *cp, int h) { DBG_FUNC("isblock") unsigned char mask; DBG_ENTER; switch (fs->fs_frag) { case 8: DBG_LEAVE; return (cp[h] == 0xff); case 4: mask = 0x0f << ((h & 0x1) << 2); DBG_LEAVE; return ((cp[h >> 1] & mask) == mask); case 2: mask = 0x03 << ((h & 0x3) << 1); DBG_LEAVE; return ((cp[h >> 2] & mask) == mask); case 1: mask = 0x01 << (h & 0x7); DBG_LEAVE; return ((cp[h >> 3] & mask) == mask); default: fprintf(stderr, "isblock bad fs_frag %d\n", fs->fs_frag); DBG_LEAVE; return (0); } } /* * Here we allocate a complete block in the block map. For more details again * please see the source of newfs(8), as this function is taken over almost * unchanged. */ static void clrblock(struct fs *fs, unsigned char *cp, int h) { DBG_FUNC("clrblock") DBG_ENTER; switch ((fs)->fs_frag) { case 8: cp[h] = 0; break; case 4: cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); break; case 2: cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); break; case 1: cp[h >> 3] &= ~(0x01 << (h & 0x7)); break; default: warnx("clrblock bad fs_frag %d", fs->fs_frag); break; } DBG_LEAVE; return; } /* * Here we free a complete block in the free block map. For more details again * please see the source of newfs(8), as this function is taken over almost * unchanged. */ static void setblock(struct fs *fs, unsigned char *cp, int h) { DBG_FUNC("setblock") DBG_ENTER; switch (fs->fs_frag) { case 8: cp[h] = 0xff; break; case 4: cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); break; case 2: cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); break; case 1: cp[h >> 3] |= (0x01 << (h & 0x7)); break; default: warnx("setblock bad fs_frag %d", fs->fs_frag); break; } DBG_LEAVE; return; } /* * Figure out how many lines our current terminal has. For more details again * please see the source of newfs(8), as this function is taken over almost * unchanged. */ static int charsperline(void) { DBG_FUNC("charsperline") int columns; char *cp; struct winsize ws; DBG_ENTER; columns = 0; if (ioctl(0, TIOCGWINSZ, &ws) != -1) columns = ws.ws_col; if (columns == 0 && (cp = getenv("COLUMNS"))) columns = atoi(cp); if (columns == 0) columns = 80; /* last resort */ DBG_LEAVE; return (columns); } static int is_dev(const char *name) { struct stat devstat; if (stat(name, &devstat) != 0) return (0); if (!S_ISCHR(devstat.st_mode)) return (0); return (1); } /* * Return mountpoint on which the device is currently mounted. */ static const struct statfs * dev_to_statfs(const char *dev) { struct stat devstat, mntdevstat; struct statfs *mntbuf, *statfsp; char device[MAXPATHLEN]; char *mntdevname; int i, mntsize; /* * First check the mounted filesystems. */ if (stat(dev, &devstat) != 0) return (NULL); if (!S_ISCHR(devstat.st_mode) && !S_ISBLK(devstat.st_mode)) return (NULL); mntsize = getmntinfo(&mntbuf, MNT_NOWAIT); for (i = 0; i < mntsize; i++) { statfsp = &mntbuf[i]; mntdevname = statfsp->f_mntfromname; if (*mntdevname != '/') { strcpy(device, _PATH_DEV); strcat(device, mntdevname); mntdevname = device; } if (stat(mntdevname, &mntdevstat) == 0 && mntdevstat.st_rdev == devstat.st_rdev) return (statfsp); } return (NULL); } static const char * mountpoint_to_dev(const char *mountpoint) { struct statfs *mntbuf, *statfsp; struct fstab *fs; int i, mntsize; /* * First check the mounted filesystems. */ mntsize = getmntinfo(&mntbuf, MNT_NOWAIT); for (i = 0; i < mntsize; i++) { statfsp = &mntbuf[i]; if (strcmp(statfsp->f_mntonname, mountpoint) == 0) return (statfsp->f_mntfromname); } /* * Check the fstab. */ fs = getfsfile(mountpoint); if (fs != NULL) return (fs->fs_spec); return (NULL); } static const char * getdev(const char *name) { static char device[MAXPATHLEN]; const char *cp, *dev; if (is_dev(name)) return (name); cp = strrchr(name, '/'); if (cp == NULL) { snprintf(device, sizeof(device), "%s%s", _PATH_DEV, name); if (is_dev(device)) return (device); } dev = mountpoint_to_dev(name); if (dev != NULL && is_dev(dev)) return (dev); return (NULL); } /* * growfs(8) is a utility which allows to increase the size of an existing * ufs file system. Currently this can only be done on unmounted file system. * It recognizes some command line options to specify the new desired size, * and it does some basic checkings. The old file system size is determined * and after some more checks like we can really access the new last block * on the disk etc. we calculate the new parameters for the superblock. After * having done this we just call growfs() which will do the work. * We still have to provide support for snapshots. Therefore we first have to * understand what data structures are always replicated in the snapshot on * creation, for all other blocks we touch during our procedure, we have to * keep the old blocks unchanged somewhere available for the snapshots. If we * are lucky, then we only have to handle our blocks to be relocated in that * way. * Also we have to consider in what order we actually update the critical * data structures of the file system to make sure, that in case of a disaster * fsck(8) is still able to restore any lost data. * The foreseen last step then will be to provide for growing even mounted * file systems. There we have to extend the mount() system call to provide * userland access to the file system locking facility. */ int main(int argc, char **argv) { DBG_FUNC("main") + struct fs *fs; const char *device; const struct statfs *statfsp; uint64_t size = 0; off_t mediasize; - int error, i, j, fsi, fso, ch, Nflag = 0, yflag = 0; + int error, j, fsi, fso, ch, ret, Nflag = 0, yflag = 0; char *p, reply[5], oldsizebuf[6], newsizebuf[6]; void *testbuf; DBG_ENTER; while ((ch = getopt(argc, argv, "Ns:vy")) != -1) { switch(ch) { case 'N': Nflag = 1; break; case 's': size = (off_t)strtoumax(optarg, &p, 0); if (p == NULL || *p == '\0') size *= DEV_BSIZE; else if (*p == 'b' || *p == 'B') ; /* do nothing */ else if (*p == 'k' || *p == 'K') size <<= 10; else if (*p == 'm' || *p == 'M') size <<= 20; else if (*p == 'g' || *p == 'G') size <<= 30; else if (*p == 't' || *p == 'T') { size <<= 30; size <<= 10; } else errx(1, "unknown suffix on -s argument"); break; case 'v': /* for compatibility to newfs */ break; case 'y': yflag = 1; break; case '?': /* FALLTHROUGH */ default: usage(); } } argc -= optind; argv += optind; if (argc != 1) usage(); /* * Now try to guess the device name. */ device = getdev(*argv); if (device == NULL) errx(1, "cannot find special device for %s", *argv); statfsp = dev_to_statfs(device); fsi = open(device, O_RDONLY); if (fsi < 0) err(1, "%s", device); /* * Try to guess the slice size if not specified. */ if (ioctl(fsi, DIOCGMEDIASIZE, &mediasize) == -1) err(1,"DIOCGMEDIASIZE"); /* * Check if that partition is suitable for growing a file system. */ if (mediasize < 1) errx(1, "partition is unavailable"); /* * Read the current superblock, and take a backup. */ - for (i = 0; sblock_try[i] != -1; i++) { - sblockloc = sblock_try[i] / DEV_BSIZE; - rdfs(sblockloc, (size_t)SBLOCKSIZE, (void *)&(osblock), fsi); - if ((osblock.fs_magic == FS_UFS1_MAGIC || - (osblock.fs_magic == FS_UFS2_MAGIC && - osblock.fs_sblockloc == sblock_try[i])) && - osblock.fs_bsize <= MAXBSIZE && - osblock.fs_bsize >= (int32_t) sizeof(struct fs)) - break; + if ((ret = sbget(fsi, &fs, -1)) != 0) { + switch (ret) { + case ENOENT: + errx(1, "superblock not recognized"); + default: + errc(1, ret, "unable to read superblock"); + } } - if (sblock_try[i] == -1) - errx(1, "superblock not recognized"); - memcpy((void *)&fsun1, (void *)&fsun2, sizeof(fsun2)); + memcpy(&osblock, fs, fs->fs_sbsize); + free(fs); + memcpy((void *)&fsun1, (void *)&fsun2, osblock.fs_sbsize); DBG_OPEN("/tmp/growfs.debug"); /* already here we need a superblock */ DBG_DUMP_FS(&sblock, "old sblock"); /* * Determine size to grow to. Default to the device size. */ if (size == 0) size = mediasize; else { if (size > (uint64_t)mediasize) { humanize_number(oldsizebuf, sizeof(oldsizebuf), size, "B", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); humanize_number(newsizebuf, sizeof(newsizebuf), mediasize, "B", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); errx(1, "requested size %s is larger " "than the available %s", oldsizebuf, newsizebuf); } } /* * Make sure the new size is a multiple of fs_fsize; /dev/ufssuspend * only supports fragment-aligned IO requests. */ size -= size % osblock.fs_fsize; if (size <= (uint64_t)(osblock.fs_size * osblock.fs_fsize)) { humanize_number(oldsizebuf, sizeof(oldsizebuf), osblock.fs_size * osblock.fs_fsize, "B", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); humanize_number(newsizebuf, sizeof(newsizebuf), size, "B", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); errx(1, "requested size %s is not larger than the current " "filesystem size %s", newsizebuf, oldsizebuf); } sblock.fs_size = dbtofsb(&osblock, size / DEV_BSIZE); sblock.fs_providersize = dbtofsb(&osblock, mediasize / DEV_BSIZE); /* * Are we really growing? */ if (osblock.fs_size >= sblock.fs_size) { errx(1, "we are not growing (%jd->%jd)", (intmax_t)osblock.fs_size, (intmax_t)sblock.fs_size); } /* * Check if we find an active snapshot. */ if (yflag == 0) { for (j = 0; j < FSMAXSNAP; j++) { if (sblock.fs_snapinum[j]) { errx(1, "active snapshot found in file system; " "please remove all snapshots before " "using growfs"); } if (!sblock.fs_snapinum[j]) /* list is dense */ break; } } if (yflag == 0 && Nflag == 0) { if (statfsp != NULL && (statfsp->f_flags & MNT_RDONLY) == 0) printf("Device is mounted read-write; resizing will " "result in temporary write suspension for %s.\n", statfsp->f_mntonname); printf("It's strongly recommended to make a backup " "before growing the file system.\n" "OK to grow filesystem on %s", device); if (statfsp != NULL) printf(", mounted on %s,", statfsp->f_mntonname); humanize_number(oldsizebuf, sizeof(oldsizebuf), osblock.fs_size * osblock.fs_fsize, "B", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); humanize_number(newsizebuf, sizeof(newsizebuf), sblock.fs_size * sblock.fs_fsize, "B", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); printf(" from %s to %s? [yes/no] ", oldsizebuf, newsizebuf); fflush(stdout); fgets(reply, (int)sizeof(reply), stdin); if (strcasecmp(reply, "yes\n")){ printf("Response other than \"yes\"; aborting\n"); exit(0); } } /* * Try to access our device for writing. If it's not mounted, * or mounted read-only, simply open it; otherwise, use UFS * suspension mechanism. */ if (Nflag) { fso = -1; } else { if (statfsp != NULL && (statfsp->f_flags & MNT_RDONLY) == 0) { fso = open(_PATH_UFSSUSPEND, O_RDWR); if (fso == -1) err(1, "unable to open %s", _PATH_UFSSUSPEND); error = ioctl(fso, UFSSUSPEND, &statfsp->f_fsid); if (error != 0) err(1, "UFSSUSPEND"); } else { fso = open(device, O_WRONLY); if (fso < 0) err(1, "%s", device); } } /* * Try to access our new last block in the file system. */ testbuf = malloc(sblock.fs_fsize); if (testbuf == NULL) err(1, "malloc"); rdfs((ufs2_daddr_t)((size - sblock.fs_fsize) / DEV_BSIZE), sblock.fs_fsize, testbuf, fsi); wtfs((ufs2_daddr_t)((size - sblock.fs_fsize) / DEV_BSIZE), sblock.fs_fsize, testbuf, fso, Nflag); free(testbuf); /* * Now calculate new superblock values and check for reasonable * bound for new file system size: * fs_size: is derived from user input * fs_dsize: should get updated in the routines creating or * updating the cylinder groups on the fly * fs_cstotal: should get updated in the routines creating or * updating the cylinder groups */ /* * Update the number of cylinders and cylinder groups in the file system. */ if (sblock.fs_magic == FS_UFS1_MAGIC) { sblock.fs_old_ncyl = sblock.fs_size * sblock.fs_old_nspf / sblock.fs_old_spc; if (sblock.fs_size * sblock.fs_old_nspf > sblock.fs_old_ncyl * sblock.fs_old_spc) sblock.fs_old_ncyl++; } sblock.fs_ncg = howmany(sblock.fs_size, sblock.fs_fpg); /* * Allocate last cylinder group only if there is enough room * for at least one data block. */ if (sblock.fs_size % sblock.fs_fpg != 0 && sblock.fs_size <= cgdmin(&sblock, sblock.fs_ncg - 1)) { humanize_number(oldsizebuf, sizeof(oldsizebuf), (sblock.fs_size % sblock.fs_fpg) * sblock.fs_fsize, "B", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); warnx("no room to allocate last cylinder group; " "leaving %s unused", oldsizebuf); sblock.fs_ncg--; if (sblock.fs_magic == FS_UFS1_MAGIC) sblock.fs_old_ncyl = sblock.fs_ncg * sblock.fs_old_cpg; sblock.fs_size = sblock.fs_ncg * sblock.fs_fpg; } /* * Update the space for the cylinder group summary information in the * respective cylinder group data area. */ sblock.fs_cssize = fragroundup(&sblock, sblock.fs_ncg * sizeof(struct csum)); if (osblock.fs_size >= sblock.fs_size) errx(1, "not enough new space"); DBG_PRINT0("sblock calculated\n"); /* * Ok, everything prepared, so now let's do the tricks. */ growfs(fsi, fso, Nflag); close(fsi); if (fso > -1) { if (statfsp != NULL && (statfsp->f_flags & MNT_RDONLY) == 0) { error = ioctl(fso, UFSRESUME); if (error != 0) err(1, "UFSRESUME"); } error = close(fso); if (error != 0) err(1, "close"); if (statfsp != NULL && (statfsp->f_flags & MNT_RDONLY) != 0) mount_reload(statfsp); } DBG_CLOSE; DBG_LEAVE; return (0); } /* * Dump a line of usage. */ static void usage(void) { DBG_FUNC("usage") DBG_ENTER; fprintf(stderr, "usage: growfs [-Ny] [-s size] special | filesystem\n"); DBG_LEAVE; exit(1); } /* * This updates most parameters and the bitmap related to cluster. We have to * assume that sblock, osblock, acg are set up. */ static void updclst(int block) { DBG_FUNC("updclst") static int lcs = 0; DBG_ENTER; if (sblock.fs_contigsumsize < 1) /* no clustering */ return; /* * update cluster allocation map */ setbit(cg_clustersfree(&acg), block); /* * update cluster summary table */ if (!lcs) { /* * calculate size for the trailing cluster */ for (block--; lcs < sblock.fs_contigsumsize; block--, lcs++ ) { if (isclr(cg_clustersfree(&acg), block)) break; } } if (lcs < sblock.fs_contigsumsize) { if (lcs) cg_clustersum(&acg)[lcs]--; lcs++; cg_clustersum(&acg)[lcs]++; } DBG_LEAVE; return; } static void mount_reload(const struct statfs *stfs) { char errmsg[255]; struct iovec *iov; int iovlen; iov = NULL; iovlen = 0; *errmsg = '\0'; build_iovec(&iov, &iovlen, "fstype", __DECONST(char *, "ffs"), 4); build_iovec(&iov, &iovlen, "fspath", __DECONST(char *, stfs->f_mntonname), (size_t)-1); build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); build_iovec(&iov, &iovlen, "update", NULL, 0); build_iovec(&iov, &iovlen, "reload", NULL, 0); if (nmount(iov, iovlen, stfs->f_flags) < 0) { errmsg[sizeof(errmsg) - 1] = '\0'; err(9, "%s: cannot reload filesystem%s%s", stfs->f_mntonname, *errmsg != '\0' ? ": " : "", errmsg); } } /* * Calculate the check-hash of the cylinder group. */ static void cgckhash(struct cg *cgp) { if ((sblock.fs_metackhash & CK_CYLGRP) == 0) return; cgp->cg_ckhash = 0; cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); } Index: head/sbin/newfs/mkfs.c =================================================================== --- head/sbin/newfs/mkfs.c (revision 328425) +++ head/sbin/newfs/mkfs.c (revision 328426) @@ -1,1209 +1,1202 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program. * * Copyright (c) 1980, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)mkfs.c 8.11 (Berkeley) 5/3/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #define IN_RTLD /* So we pickup the P_OSREL defines */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "newfs.h" /* * make file system for cylinder-group style file systems */ #define UMASK 0755 #define POWEROF2(num) (((num) & ((num) - 1)) == 0) static struct csum *fscs; #define sblock disk.d_fs #define acg disk.d_cg union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; #define DIP(dp, field) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (dp)->dp1.field : (dp)->dp2.field) static caddr_t iobuf; static long iobufsize; static ufs2_daddr_t alloc(int size, int mode); static int charsperline(void); static void clrblock(struct fs *, unsigned char *, int); static void fsinit(time_t); static int ilog2(int); static void initcg(int, time_t); static int isblock(struct fs *, unsigned char *, int); static void iput(union dinode *, ino_t); static int makedir(struct direct *, int); static void setblock(struct fs *, unsigned char *, int); static void wtfs(ufs2_daddr_t, int, char *); static void cgckhash(struct cg *); static u_int32_t newfs_random(void); -static int -do_sbwrite(struct uufsd *disk) -{ - if (!disk->d_sblock) - disk->d_sblock = disk->d_fs.fs_sblockloc / disk->d_bsize; - return (pwrite(disk->d_fd, &disk->d_fs, SBLOCKSIZE, (off_t)((part_ofs + - disk->d_sblock) * disk->d_bsize))); -} - void mkfs(struct partition *pp, char *fsys) { int fragsperinode, optimalfpg, origdensity, minfpg, lastminfpg; long i, j, csfrags; uint cg; time_t utime; quad_t sizepb; int width; ino_t maxinum; int minfragsperinode; /* minimum ratio of frags to inodes */ char tmpbuf[100]; /* XXX this will break in about 2,500 years */ struct fsrecovery *fsr; char *fsrbuf; union { struct fs fdummy; char cdummy[SBLOCKSIZE]; } dummy; #define fsdummy dummy.fdummy #define chdummy dummy.cdummy /* * Our blocks == sector size, and the version of UFS we are using is * specified by Oflag. */ disk.d_bsize = sectorsize; disk.d_ufs = Oflag; if (Rflag) utime = 1000000000; else time(&utime); sblock.fs_old_flags = FS_FLAGS_UPDATED; sblock.fs_flags = 0; if (Uflag) sblock.fs_flags |= FS_DOSOFTDEP; if (Lflag) strlcpy(sblock.fs_volname, volumelabel, MAXVOLLEN); if (Jflag) sblock.fs_flags |= FS_GJOURNAL; if (lflag) sblock.fs_flags |= FS_MULTILABEL; if (tflag) sblock.fs_flags |= FS_TRIM; /* * Validate the given file system size. * Verify that its last block can actually be accessed. * Convert to file system fragment sized units. */ if (fssize <= 0) { printf("preposterous size %jd\n", (intmax_t)fssize); exit(13); } wtfs(fssize - (realsectorsize / DEV_BSIZE), realsectorsize, (char *)&sblock); /* * collect and verify the file system density info */ sblock.fs_avgfilesize = avgfilesize; sblock.fs_avgfpdir = avgfilesperdir; if (sblock.fs_avgfilesize <= 0) printf("illegal expected average file size %d\n", sblock.fs_avgfilesize), exit(14); if (sblock.fs_avgfpdir <= 0) printf("illegal expected number of files per directory %d\n", sblock.fs_avgfpdir), exit(15); restart: /* * collect and verify the block and fragment sizes */ sblock.fs_bsize = bsize; sblock.fs_fsize = fsize; if (!POWEROF2(sblock.fs_bsize)) { printf("block size must be a power of 2, not %d\n", sblock.fs_bsize); exit(16); } if (!POWEROF2(sblock.fs_fsize)) { printf("fragment size must be a power of 2, not %d\n", sblock.fs_fsize); exit(17); } if (sblock.fs_fsize < sectorsize) { printf("increasing fragment size from %d to sector size (%d)\n", sblock.fs_fsize, sectorsize); sblock.fs_fsize = sectorsize; } if (sblock.fs_bsize > MAXBSIZE) { printf("decreasing block size from %d to maximum (%d)\n", sblock.fs_bsize, MAXBSIZE); sblock.fs_bsize = MAXBSIZE; } if (sblock.fs_bsize < MINBSIZE) { printf("increasing block size from %d to minimum (%d)\n", sblock.fs_bsize, MINBSIZE); sblock.fs_bsize = MINBSIZE; } if (sblock.fs_fsize > MAXBSIZE) { printf("decreasing fragment size from %d to maximum (%d)\n", sblock.fs_fsize, MAXBSIZE); sblock.fs_fsize = MAXBSIZE; } if (sblock.fs_bsize < sblock.fs_fsize) { printf("increasing block size from %d to fragment size (%d)\n", sblock.fs_bsize, sblock.fs_fsize); sblock.fs_bsize = sblock.fs_fsize; } if (sblock.fs_fsize * MAXFRAG < sblock.fs_bsize) { printf( "increasing fragment size from %d to block size / %d (%d)\n", sblock.fs_fsize, MAXFRAG, sblock.fs_bsize / MAXFRAG); sblock.fs_fsize = sblock.fs_bsize / MAXFRAG; } if (maxbsize == 0) maxbsize = bsize; if (maxbsize < bsize || !POWEROF2(maxbsize)) { sblock.fs_maxbsize = sblock.fs_bsize; printf("Extent size set to %d\n", sblock.fs_maxbsize); } else if (sblock.fs_maxbsize > FS_MAXCONTIG * sblock.fs_bsize) { sblock.fs_maxbsize = FS_MAXCONTIG * sblock.fs_bsize; printf("Extent size reduced to %d\n", sblock.fs_maxbsize); } else { sblock.fs_maxbsize = maxbsize; } /* * Maxcontig sets the default for the maximum number of blocks * that may be allocated sequentially. With file system clustering * it is possible to allocate contiguous blocks up to the maximum * transfer size permitted by the controller or buffering. */ if (maxcontig == 0) maxcontig = MAX(1, MAXPHYS / bsize); sblock.fs_maxcontig = maxcontig; if (sblock.fs_maxcontig < sblock.fs_maxbsize / sblock.fs_bsize) { sblock.fs_maxcontig = sblock.fs_maxbsize / sblock.fs_bsize; printf("Maxcontig raised to %d\n", sblock.fs_maxbsize); } if (sblock.fs_maxcontig > 1) sblock.fs_contigsumsize = MIN(sblock.fs_maxcontig,FS_MAXCONTIG); sblock.fs_bmask = ~(sblock.fs_bsize - 1); sblock.fs_fmask = ~(sblock.fs_fsize - 1); sblock.fs_qbmask = ~sblock.fs_bmask; sblock.fs_qfmask = ~sblock.fs_fmask; sblock.fs_bshift = ilog2(sblock.fs_bsize); sblock.fs_fshift = ilog2(sblock.fs_fsize); sblock.fs_frag = numfrags(&sblock, sblock.fs_bsize); sblock.fs_fragshift = ilog2(sblock.fs_frag); if (sblock.fs_frag > MAXFRAG) { printf("fragment size %d is still too small (can't happen)\n", sblock.fs_bsize / MAXFRAG); exit(21); } sblock.fs_fsbtodb = ilog2(sblock.fs_fsize / sectorsize); sblock.fs_size = fssize = dbtofsb(&sblock, fssize); sblock.fs_providersize = dbtofsb(&sblock, mediasize / sectorsize); /* * Before the filesystem is finally initialized, mark it * as incompletely initialized. */ sblock.fs_magic = FS_BAD_MAGIC; if (Oflag == 1) { sblock.fs_sblockloc = SBLOCK_UFS1; + sblock.fs_sblockactualloc = SBLOCK_UFS1; sblock.fs_nindir = sblock.fs_bsize / sizeof(ufs1_daddr_t); sblock.fs_inopb = sblock.fs_bsize / sizeof(struct ufs1_dinode); sblock.fs_maxsymlinklen = ((UFS_NDADDR + UFS_NIADDR) * sizeof(ufs1_daddr_t)); sblock.fs_old_inodefmt = FS_44INODEFMT; sblock.fs_old_cgoffset = 0; sblock.fs_old_cgmask = 0xffffffff; sblock.fs_old_size = sblock.fs_size; sblock.fs_old_rotdelay = 0; sblock.fs_old_rps = 60; sblock.fs_old_nspf = sblock.fs_fsize / sectorsize; sblock.fs_old_cpg = 1; sblock.fs_old_interleave = 1; sblock.fs_old_trackskew = 0; sblock.fs_old_cpc = 0; sblock.fs_old_postblformat = 1; sblock.fs_old_nrpos = 1; } else { sblock.fs_sblockloc = SBLOCK_UFS2; + sblock.fs_sblockactualloc = SBLOCK_UFS2; sblock.fs_nindir = sblock.fs_bsize / sizeof(ufs2_daddr_t); sblock.fs_inopb = sblock.fs_bsize / sizeof(struct ufs2_dinode); sblock.fs_maxsymlinklen = ((UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t)); } sblock.fs_sblkno = roundup(howmany(sblock.fs_sblockloc + SBLOCKSIZE, sblock.fs_fsize), sblock.fs_frag); sblock.fs_cblkno = sblock.fs_sblkno + roundup(howmany(SBLOCKSIZE, sblock.fs_fsize), sblock.fs_frag); sblock.fs_iblkno = sblock.fs_cblkno + sblock.fs_frag; sblock.fs_maxfilesize = sblock.fs_bsize * UFS_NDADDR - 1; for (sizepb = sblock.fs_bsize, i = 0; i < UFS_NIADDR; i++) { sizepb *= NINDIR(&sblock); sblock.fs_maxfilesize += sizepb; } /* * It's impossible to create a snapshot in case that fs_maxfilesize * is smaller than the fssize. */ if (sblock.fs_maxfilesize < (u_quad_t)fssize) { warnx("WARNING: You will be unable to create snapshots on this " "file system. Correct by using a larger blocksize."); } /* * Calculate the number of blocks to put into each cylinder group. * * This algorithm selects the number of blocks per cylinder * group. The first goal is to have at least enough data blocks * in each cylinder group to meet the density requirement. Once * this goal is achieved we try to expand to have at least * MINCYLGRPS cylinder groups. Once this goal is achieved, we * pack as many blocks into each cylinder group map as will fit. * * We start by calculating the smallest number of blocks that we * can put into each cylinder group. If this is too big, we reduce * the density until it fits. */ maxinum = (((int64_t)(1)) << 32) - INOPB(&sblock); minfragsperinode = 1 + fssize / maxinum; if (density == 0) { density = MAX(NFPI, minfragsperinode) * fsize; } else if (density < minfragsperinode * fsize) { origdensity = density; density = minfragsperinode * fsize; fprintf(stderr, "density increased from %d to %d\n", origdensity, density); } origdensity = density; for (;;) { fragsperinode = MAX(numfrags(&sblock, density), 1); if (fragsperinode < minfragsperinode) { bsize <<= 1; fsize <<= 1; printf("Block size too small for a file system %s %d\n", "of this size. Increasing blocksize to", bsize); goto restart; } minfpg = fragsperinode * INOPB(&sblock); if (minfpg > sblock.fs_size) minfpg = sblock.fs_size; sblock.fs_ipg = INOPB(&sblock); sblock.fs_fpg = roundup(sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock), sblock.fs_frag); if (sblock.fs_fpg < minfpg) sblock.fs_fpg = minfpg; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); sblock.fs_fpg = roundup(sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock), sblock.fs_frag); if (sblock.fs_fpg < minfpg) sblock.fs_fpg = minfpg; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); if (CGSIZE(&sblock) < (unsigned long)sblock.fs_bsize) break; density -= sblock.fs_fsize; } if (density != origdensity) printf("density reduced from %d to %d\n", origdensity, density); /* * Start packing more blocks into the cylinder group until * it cannot grow any larger, the number of cylinder groups * drops below MINCYLGRPS, or we reach the size requested. * For UFS1 inodes per cylinder group are stored in an int16_t * so fs_ipg is limited to 2^15 - 1. */ for ( ; sblock.fs_fpg < maxblkspercg; sblock.fs_fpg += sblock.fs_frag) { sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); if (Oflag > 1 || (Oflag == 1 && sblock.fs_ipg <= 0x7fff)) { if (sblock.fs_size / sblock.fs_fpg < MINCYLGRPS) break; if (CGSIZE(&sblock) < (unsigned long)sblock.fs_bsize) continue; if (CGSIZE(&sblock) == (unsigned long)sblock.fs_bsize) break; } sblock.fs_fpg -= sblock.fs_frag; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); break; } /* * Check to be sure that the last cylinder group has enough blocks * to be viable. If it is too small, reduce the number of blocks * per cylinder group which will have the effect of moving more * blocks into the last cylinder group. */ optimalfpg = sblock.fs_fpg; for (;;) { sblock.fs_ncg = howmany(sblock.fs_size, sblock.fs_fpg); lastminfpg = roundup(sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock), sblock.fs_frag); if (sblock.fs_size < lastminfpg) { printf("Filesystem size %jd < minimum size of %d\n", (intmax_t)sblock.fs_size, lastminfpg); exit(28); } if (sblock.fs_size % sblock.fs_fpg >= lastminfpg || sblock.fs_size % sblock.fs_fpg == 0) break; sblock.fs_fpg -= sblock.fs_frag; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); } if (optimalfpg != sblock.fs_fpg) printf("Reduced frags per cylinder group from %d to %d %s\n", optimalfpg, sblock.fs_fpg, "to enlarge last cyl group"); sblock.fs_cgsize = fragroundup(&sblock, CGSIZE(&sblock)); sblock.fs_dblkno = sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock); if (Oflag == 1) { sblock.fs_old_spc = sblock.fs_fpg * sblock.fs_old_nspf; sblock.fs_old_nsect = sblock.fs_old_spc; sblock.fs_old_npsect = sblock.fs_old_spc; sblock.fs_old_ncyl = sblock.fs_ncg; } /* * fill in remaining fields of the super block */ sblock.fs_csaddr = cgdmin(&sblock, 0); sblock.fs_cssize = fragroundup(&sblock, sblock.fs_ncg * sizeof(struct csum)); fscs = (struct csum *)calloc(1, sblock.fs_cssize); if (fscs == NULL) errx(31, "calloc failed"); sblock.fs_sbsize = fragroundup(&sblock, sizeof(struct fs)); if (sblock.fs_sbsize > SBLOCKSIZE) sblock.fs_sbsize = SBLOCKSIZE; if (sblock.fs_sbsize < realsectorsize) sblock.fs_sbsize = realsectorsize; sblock.fs_minfree = minfree; if (metaspace > 0 && metaspace < sblock.fs_fpg / 2) sblock.fs_metaspace = blknum(&sblock, metaspace); else if (metaspace != -1) /* reserve half of minfree for metadata blocks */ sblock.fs_metaspace = blknum(&sblock, (sblock.fs_fpg * minfree) / 200); if (maxbpg == 0) sblock.fs_maxbpg = MAXBLKPG(sblock.fs_bsize); else sblock.fs_maxbpg = maxbpg; sblock.fs_optim = opt; sblock.fs_cgrotor = 0; sblock.fs_pendingblocks = 0; sblock.fs_pendinginodes = 0; sblock.fs_fmod = 0; sblock.fs_ronly = 0; sblock.fs_state = 0; sblock.fs_clean = 1; sblock.fs_id[0] = (long)utime; sblock.fs_id[1] = newfs_random(); sblock.fs_fsmnt[0] = '\0'; csfrags = howmany(sblock.fs_cssize, sblock.fs_fsize); sblock.fs_dsize = sblock.fs_size - sblock.fs_sblkno - sblock.fs_ncg * (sblock.fs_dblkno - sblock.fs_sblkno); sblock.fs_cstotal.cs_nbfree = fragstoblks(&sblock, sblock.fs_dsize) - howmany(csfrags, sblock.fs_frag); sblock.fs_cstotal.cs_nffree = fragnum(&sblock, sblock.fs_size) + (fragnum(&sblock, csfrags) > 0 ? sblock.fs_frag - fragnum(&sblock, csfrags) : 0); sblock.fs_cstotal.cs_nifree = sblock.fs_ncg * sblock.fs_ipg - UFS_ROOTINO; sblock.fs_cstotal.cs_ndir = 0; sblock.fs_dsize -= csfrags; sblock.fs_time = utime; if (Oflag == 1) { sblock.fs_old_time = utime; sblock.fs_old_dsize = sblock.fs_dsize; sblock.fs_old_csaddr = sblock.fs_csaddr; sblock.fs_old_cstotal.cs_ndir = sblock.fs_cstotal.cs_ndir; sblock.fs_old_cstotal.cs_nbfree = sblock.fs_cstotal.cs_nbfree; sblock.fs_old_cstotal.cs_nifree = sblock.fs_cstotal.cs_nifree; sblock.fs_old_cstotal.cs_nffree = sblock.fs_cstotal.cs_nffree; } /* * Set flags for metadata that is being check-hashed. */ if (Oflag > 1 && getosreldate() >= P_OSREL_CK_CYLGRP) sblock.fs_metackhash = CK_CYLGRP; /* * Dump out summary information about file system. */ # define B2MBFACTOR (1 / (1024.0 * 1024.0)) printf("%s: %.1fMB (%jd sectors) block size %d, fragment size %d\n", fsys, (float)sblock.fs_size * sblock.fs_fsize * B2MBFACTOR, (intmax_t)fsbtodb(&sblock, sblock.fs_size), sblock.fs_bsize, sblock.fs_fsize); printf("\tusing %d cylinder groups of %.2fMB, %d blks, %d inodes.\n", sblock.fs_ncg, (float)sblock.fs_fpg * sblock.fs_fsize * B2MBFACTOR, sblock.fs_fpg / sblock.fs_frag, sblock.fs_ipg); if (sblock.fs_flags & FS_DOSOFTDEP) printf("\twith soft updates\n"); # undef B2MBFACTOR if (Eflag && !Nflag) { printf("Erasing sectors [%jd...%jd]\n", sblock.fs_sblockloc / disk.d_bsize, fsbtodb(&sblock, sblock.fs_size) - 1); berase(&disk, sblock.fs_sblockloc / disk.d_bsize, sblock.fs_size * sblock.fs_fsize - sblock.fs_sblockloc); } /* * Wipe out old UFS1 superblock(s) if necessary. */ if (!Nflag && Oflag != 1 && realsectorsize <= SBLOCK_UFS1) { i = bread(&disk, part_ofs + SBLOCK_UFS1 / disk.d_bsize, chdummy, SBLOCKSIZE); if (i == -1) err(1, "can't read old UFS1 superblock: %s", disk.d_error); if (fsdummy.fs_magic == FS_UFS1_MAGIC) { fsdummy.fs_magic = 0; bwrite(&disk, part_ofs + SBLOCK_UFS1 / disk.d_bsize, chdummy, SBLOCKSIZE); for (cg = 0; cg < fsdummy.fs_ncg; cg++) { if (fsbtodb(&fsdummy, cgsblock(&fsdummy, cg)) > fssize) break; bwrite(&disk, part_ofs + fsbtodb(&fsdummy, cgsblock(&fsdummy, cg)), chdummy, SBLOCKSIZE); } } } if (!Nflag) - do_sbwrite(&disk); + sbput(disk.d_fd, &disk.d_fs, 0); if (Xflag == 1) { printf("** Exiting on Xflag 1\n"); exit(0); } if (Xflag == 2) printf("** Leaving BAD MAGIC on Xflag 2\n"); else sblock.fs_magic = (Oflag != 1) ? FS_UFS2_MAGIC : FS_UFS1_MAGIC; /* * Now build the cylinders group blocks and * then print out indices of cylinder groups. */ printf("super-block backups (for fsck_ffs -b #) at:\n"); i = 0; width = charsperline(); /* - * allocate space for superblock, cylinder group map, and + * Allocate space for cylinder group map and * two sets of inode blocks. */ - if (sblock.fs_bsize < SBLOCKSIZE) - iobufsize = SBLOCKSIZE + 3 * sblock.fs_bsize; - else - iobufsize = 4 * sblock.fs_bsize; + iobufsize = 3 * sblock.fs_bsize; if ((iobuf = calloc(1, iobufsize)) == 0) { printf("Cannot allocate I/O buffer\n"); exit(38); } /* - * Make a copy of the superblock into the buffer that we will be - * writing out in each cylinder group. + * Write out all the cylinder groups and backup superblocks. */ - bcopy((char *)&sblock, iobuf, SBLOCKSIZE); for (cg = 0; cg < sblock.fs_ncg; cg++) { - initcg(cg, utime); + if (!Nflag) + initcg(cg, utime); j = snprintf(tmpbuf, sizeof(tmpbuf), " %jd%s", (intmax_t)fsbtodb(&sblock, cgsblock(&sblock, cg)), cg < (sblock.fs_ncg-1) ? "," : ""); if (j < 0) tmpbuf[j = 0] = '\0'; if (i + j >= width) { printf("\n"); i = 0; } i += j; printf("%s", tmpbuf); fflush(stdout); } printf("\n"); if (Nflag) exit(0); /* * Now construct the initial file system, * then write out the super-block. */ fsinit(utime); if (Oflag == 1) { sblock.fs_old_cstotal.cs_ndir = sblock.fs_cstotal.cs_ndir; sblock.fs_old_cstotal.cs_nbfree = sblock.fs_cstotal.cs_nbfree; sblock.fs_old_cstotal.cs_nifree = sblock.fs_cstotal.cs_nifree; sblock.fs_old_cstotal.cs_nffree = sblock.fs_cstotal.cs_nffree; } if (Xflag == 3) { printf("** Exiting on Xflag 3\n"); exit(0); } - if (!Nflag) { - do_sbwrite(&disk); - /* - * For UFS1 filesystems with a blocksize of 64K, the first - * alternate superblock resides at the location used for - * the default UFS2 superblock. As there is a valid - * superblock at this location, the boot code will use - * it as its first choice. Thus we have to ensure that - * all of its statistcs on usage are correct. - */ - if (Oflag == 1 && sblock.fs_bsize == 65536) - wtfs(fsbtodb(&sblock, cgsblock(&sblock, 0)), - sblock.fs_bsize, (char *)&sblock); - } - for (i = 0; i < sblock.fs_cssize; i += sblock.fs_bsize) - wtfs(fsbtodb(&sblock, sblock.fs_csaddr + numfrags(&sblock, i)), - MIN(sblock.fs_cssize - i, sblock.fs_bsize), - ((char *)fscs) + i); /* + * Reference the summary information so it will also be written. + */ + sblock.fs_csp = fscs; + sbput(disk.d_fd, &disk.d_fs, 0); + /* + * For UFS1 filesystems with a blocksize of 64K, the first + * alternate superblock resides at the location used for + * the default UFS2 superblock. As there is a valid + * superblock at this location, the boot code will use + * it as its first choice. Thus we have to ensure that + * all of its statistcs on usage are correct. + */ + if (Oflag == 1 && sblock.fs_bsize == 65536) + wtfs(fsbtodb(&sblock, cgsblock(&sblock, 0)), + sblock.fs_bsize, (char *)&sblock); + /* * Read the last sector of the boot block, replace the last * 20 bytes with the recovery information, then write it back. * The recovery information only works for UFS2 filesystems. */ if (sblock.fs_magic == FS_UFS2_MAGIC) { if ((fsrbuf = malloc(realsectorsize)) == NULL || bread(&disk, part_ofs + (SBLOCK_UFS2 - realsectorsize) / disk.d_bsize, fsrbuf, realsectorsize) == -1) err(1, "can't read recovery area: %s", disk.d_error); fsr = (struct fsrecovery *)&fsrbuf[realsectorsize - sizeof *fsr]; fsr->fsr_magic = sblock.fs_magic; fsr->fsr_fpg = sblock.fs_fpg; fsr->fsr_fsbtodb = sblock.fs_fsbtodb; fsr->fsr_sblkno = sblock.fs_sblkno; fsr->fsr_ncg = sblock.fs_ncg; wtfs((SBLOCK_UFS2 - realsectorsize) / disk.d_bsize, realsectorsize, fsrbuf); free(fsrbuf); } /* * Update information about this partition in pack * label, to that it may be updated on disk. */ if (pp != NULL) { pp->p_fstype = FS_BSDFFS; pp->p_fsize = sblock.fs_fsize; pp->p_frag = sblock.fs_frag; pp->p_cpg = sblock.fs_fpg; } } /* * Initialize a cylinder group. */ void initcg(int cylno, time_t utime) { long blkno, start; + off_t savedactualloc; uint i, j, d, dlower, dupper; ufs2_daddr_t cbase, dmax; struct ufs1_dinode *dp1; struct ufs2_dinode *dp2; struct csum *cs; /* * Determine block bounds for cylinder group. * Allow space for super block summary information in first * cylinder group. */ cbase = cgbase(&sblock, cylno); dmax = cbase + sblock.fs_fpg; if (dmax > sblock.fs_size) dmax = sblock.fs_size; dlower = cgsblock(&sblock, cylno) - cbase; dupper = cgdmin(&sblock, cylno) - cbase; if (cylno == 0) dupper += howmany(sblock.fs_cssize, sblock.fs_fsize); cs = &fscs[cylno]; memset(&acg, 0, sblock.fs_cgsize); acg.cg_time = utime; acg.cg_magic = CG_MAGIC; acg.cg_cgx = cylno; acg.cg_niblk = sblock.fs_ipg; acg.cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); acg.cg_ndblk = dmax - cbase; if (sblock.fs_contigsumsize > 0) acg.cg_nclusterblks = acg.cg_ndblk / sblock.fs_frag; start = &acg.cg_space[0] - (u_char *)(&acg.cg_firstfield); if (Oflag == 2) { acg.cg_iusedoff = start; } else { acg.cg_old_ncyl = sblock.fs_old_cpg; acg.cg_old_time = acg.cg_time; acg.cg_time = 0; acg.cg_old_niblk = acg.cg_niblk; acg.cg_niblk = 0; acg.cg_initediblk = 0; acg.cg_old_btotoff = start; acg.cg_old_boff = acg.cg_old_btotoff + sblock.fs_old_cpg * sizeof(int32_t); acg.cg_iusedoff = acg.cg_old_boff + sblock.fs_old_cpg * sizeof(u_int16_t); } acg.cg_freeoff = acg.cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); acg.cg_nextfreeoff = acg.cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT); if (sblock.fs_contigsumsize > 0) { acg.cg_clustersumoff = roundup(acg.cg_nextfreeoff, sizeof(u_int32_t)); acg.cg_clustersumoff -= sizeof(u_int32_t); acg.cg_clusteroff = acg.cg_clustersumoff + (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); acg.cg_nextfreeoff = acg.cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); } if (acg.cg_nextfreeoff > (unsigned)sblock.fs_cgsize) { printf("Panic: cylinder group too big\n"); exit(37); } acg.cg_cs.cs_nifree += sblock.fs_ipg; if (cylno == 0) for (i = 0; i < (long)UFS_ROOTINO; i++) { setbit(cg_inosused(&acg), i); acg.cg_cs.cs_nifree--; } if (cylno > 0) { /* * In cylno 0, beginning space is reserved * for boot and super blocks. */ for (d = 0; d < dlower; d += sblock.fs_frag) { blkno = d / sblock.fs_frag; setblock(&sblock, cg_blksfree(&acg), blkno); if (sblock.fs_contigsumsize > 0) setbit(cg_clustersfree(&acg), blkno); acg.cg_cs.cs_nbfree++; } } if ((i = dupper % sblock.fs_frag)) { acg.cg_frsum[sblock.fs_frag - i]++; for (d = dupper + sblock.fs_frag - i; dupper < d; dupper++) { setbit(cg_blksfree(&acg), dupper); acg.cg_cs.cs_nffree++; } } for (d = dupper; d + sblock.fs_frag <= acg.cg_ndblk; d += sblock.fs_frag) { blkno = d / sblock.fs_frag; setblock(&sblock, cg_blksfree(&acg), blkno); if (sblock.fs_contigsumsize > 0) setbit(cg_clustersfree(&acg), blkno); acg.cg_cs.cs_nbfree++; } if (d < acg.cg_ndblk) { acg.cg_frsum[acg.cg_ndblk - d]++; for (; d < acg.cg_ndblk; d++) { setbit(cg_blksfree(&acg), d); acg.cg_cs.cs_nffree++; } } if (sblock.fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(&acg); u_char *mapp = cg_clustersfree(&acg); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < acg.cg_nclusterblks; i++) { if ((map & bit) != 0) run++; else if (run != 0) { if (run > sblock.fs_contigsumsize) run = sblock.fs_contigsumsize; sump[run]++; run = 0; } if ((i & (CHAR_BIT - 1)) != CHAR_BIT - 1) bit <<= 1; else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > sblock.fs_contigsumsize) run = sblock.fs_contigsumsize; sump[run]++; } } *cs = acg.cg_cs; cgckhash(&acg); /* - * Write out the duplicate super block, the cylinder group map - * and two blocks worth of inodes in a single write. + * Write out the duplicate super block. Then write the cylinder + * group map and two blocks worth of inodes in a single write. */ - start = MAX(sblock.fs_bsize, SBLOCKSIZE); + savedactualloc = sblock.fs_sblockactualloc; + sblock.fs_sblockactualloc = + dbtob(fsbtodb(&sblock, cgsblock(&sblock, cylno))); + sbput(disk.d_fd, &disk.d_fs, 0); + sblock.fs_sblockactualloc = savedactualloc; + start = 0; bcopy((char *)&acg, &iobuf[start], sblock.fs_cgsize); start += sblock.fs_bsize; dp1 = (struct ufs1_dinode *)(&iobuf[start]); dp2 = (struct ufs2_dinode *)(&iobuf[start]); for (i = 0; i < acg.cg_initediblk; i++) { if (sblock.fs_magic == FS_UFS1_MAGIC) { dp1->di_gen = newfs_random(); dp1++; } else { dp2->di_gen = newfs_random(); dp2++; } } - wtfs(fsbtodb(&sblock, cgsblock(&sblock, cylno)), iobufsize, iobuf); + wtfs(fsbtodb(&sblock, cgtod(&sblock, cylno)), iobufsize, iobuf); /* * For the old file system, we have to initialize all the inodes. */ if (Oflag == 1) { for (i = 2 * sblock.fs_frag; i < sblock.fs_ipg / INOPF(&sblock); i += sblock.fs_frag) { dp1 = (struct ufs1_dinode *)(&iobuf[start]); for (j = 0; j < INOPB(&sblock); j++) { dp1->di_gen = newfs_random(); dp1++; } wtfs(fsbtodb(&sblock, cgimin(&sblock, cylno) + i), sblock.fs_bsize, &iobuf[start]); } } } /* * initialize the file system */ #define ROOTLINKCNT 3 static struct direct root_dir[] = { { UFS_ROOTINO, sizeof(struct direct), DT_DIR, 1, "." }, { UFS_ROOTINO, sizeof(struct direct), DT_DIR, 2, ".." }, { UFS_ROOTINO + 1, sizeof(struct direct), DT_DIR, 5, ".snap" }, }; #define SNAPLINKCNT 2 static struct direct snap_dir[] = { { UFS_ROOTINO + 1, sizeof(struct direct), DT_DIR, 1, "." }, { UFS_ROOTINO, sizeof(struct direct), DT_DIR, 2, ".." }, }; void fsinit(time_t utime) { union dinode node; struct group *grp; gid_t gid; int entries; memset(&node, 0, sizeof node); if ((grp = getgrnam("operator")) != NULL) { gid = grp->gr_gid; } else { warnx("Cannot retrieve operator gid, using gid 0."); gid = 0; } entries = (nflag) ? ROOTLINKCNT - 1: ROOTLINKCNT; if (sblock.fs_magic == FS_UFS1_MAGIC) { /* * initialize the node */ node.dp1.di_atime = utime; node.dp1.di_mtime = utime; node.dp1.di_ctime = utime; /* * create the root directory */ node.dp1.di_mode = IFDIR | UMASK; node.dp1.di_nlink = entries; node.dp1.di_size = makedir(root_dir, entries); node.dp1.di_db[0] = alloc(sblock.fs_fsize, node.dp1.di_mode); node.dp1.di_blocks = btodb(fragroundup(&sblock, node.dp1.di_size)); wtfs(fsbtodb(&sblock, node.dp1.di_db[0]), sblock.fs_fsize, iobuf); iput(&node, UFS_ROOTINO); if (!nflag) { /* * create the .snap directory */ node.dp1.di_mode |= 020; node.dp1.di_gid = gid; node.dp1.di_nlink = SNAPLINKCNT; node.dp1.di_size = makedir(snap_dir, SNAPLINKCNT); node.dp1.di_db[0] = alloc(sblock.fs_fsize, node.dp1.di_mode); node.dp1.di_blocks = btodb(fragroundup(&sblock, node.dp1.di_size)); wtfs(fsbtodb(&sblock, node.dp1.di_db[0]), sblock.fs_fsize, iobuf); iput(&node, UFS_ROOTINO + 1); } } else { /* * initialize the node */ node.dp2.di_atime = utime; node.dp2.di_mtime = utime; node.dp2.di_ctime = utime; node.dp2.di_birthtime = utime; /* * create the root directory */ node.dp2.di_mode = IFDIR | UMASK; node.dp2.di_nlink = entries; node.dp2.di_size = makedir(root_dir, entries); node.dp2.di_db[0] = alloc(sblock.fs_fsize, node.dp2.di_mode); node.dp2.di_blocks = btodb(fragroundup(&sblock, node.dp2.di_size)); wtfs(fsbtodb(&sblock, node.dp2.di_db[0]), sblock.fs_fsize, iobuf); iput(&node, UFS_ROOTINO); if (!nflag) { /* * create the .snap directory */ node.dp2.di_mode |= 020; node.dp2.di_gid = gid; node.dp2.di_nlink = SNAPLINKCNT; node.dp2.di_size = makedir(snap_dir, SNAPLINKCNT); node.dp2.di_db[0] = alloc(sblock.fs_fsize, node.dp2.di_mode); node.dp2.di_blocks = btodb(fragroundup(&sblock, node.dp2.di_size)); wtfs(fsbtodb(&sblock, node.dp2.di_db[0]), sblock.fs_fsize, iobuf); iput(&node, UFS_ROOTINO + 1); } } } /* * construct a set of directory entries in "iobuf". * return size of directory. */ int makedir(struct direct *protodir, int entries) { char *cp; int i, spcleft; spcleft = DIRBLKSIZ; memset(iobuf, 0, DIRBLKSIZ); for (cp = iobuf, i = 0; i < entries - 1; i++) { protodir[i].d_reclen = DIRSIZ(0, &protodir[i]); memmove(cp, &protodir[i], protodir[i].d_reclen); cp += protodir[i].d_reclen; spcleft -= protodir[i].d_reclen; } protodir[i].d_reclen = spcleft; memmove(cp, &protodir[i], DIRSIZ(0, &protodir[i])); return (DIRBLKSIZ); } /* * allocate a block or frag */ ufs2_daddr_t alloc(int size, int mode) { int i, blkno, frag; uint d; bread(&disk, part_ofs + fsbtodb(&sblock, cgtod(&sblock, 0)), (char *)&acg, sblock.fs_cgsize); if (acg.cg_magic != CG_MAGIC) { printf("cg 0: bad magic number\n"); exit(38); } if (acg.cg_cs.cs_nbfree == 0) { printf("first cylinder group ran out of space\n"); exit(39); } for (d = 0; d < acg.cg_ndblk; d += sblock.fs_frag) if (isblock(&sblock, cg_blksfree(&acg), d / sblock.fs_frag)) goto goth; printf("internal error: can't find block in cyl 0\n"); exit(40); goth: blkno = fragstoblks(&sblock, d); clrblock(&sblock, cg_blksfree(&acg), blkno); if (sblock.fs_contigsumsize > 0) clrbit(cg_clustersfree(&acg), blkno); acg.cg_cs.cs_nbfree--; sblock.fs_cstotal.cs_nbfree--; fscs[0].cs_nbfree--; if (mode & IFDIR) { acg.cg_cs.cs_ndir++; sblock.fs_cstotal.cs_ndir++; fscs[0].cs_ndir++; } if (size != sblock.fs_bsize) { frag = howmany(size, sblock.fs_fsize); fscs[0].cs_nffree += sblock.fs_frag - frag; sblock.fs_cstotal.cs_nffree += sblock.fs_frag - frag; acg.cg_cs.cs_nffree += sblock.fs_frag - frag; acg.cg_frsum[sblock.fs_frag - frag]++; for (i = frag; i < sblock.fs_frag; i++) setbit(cg_blksfree(&acg), d + i); } /* XXX cgwrite(&disk, 0)??? */ cgckhash(&acg); wtfs(fsbtodb(&sblock, cgtod(&sblock, 0)), sblock.fs_cgsize, (char *)&acg); return ((ufs2_daddr_t)d); } /* * Allocate an inode on the disk */ void iput(union dinode *ip, ino_t ino) { ufs2_daddr_t d; bread(&disk, part_ofs + fsbtodb(&sblock, cgtod(&sblock, 0)), (char *)&acg, sblock.fs_cgsize); if (acg.cg_magic != CG_MAGIC) { printf("cg 0: bad magic number\n"); exit(31); } acg.cg_cs.cs_nifree--; setbit(cg_inosused(&acg), ino); cgckhash(&acg); wtfs(fsbtodb(&sblock, cgtod(&sblock, 0)), sblock.fs_cgsize, (char *)&acg); sblock.fs_cstotal.cs_nifree--; fscs[0].cs_nifree--; if (ino >= (unsigned long)sblock.fs_ipg * sblock.fs_ncg) { printf("fsinit: inode value out of range (%ju).\n", (uintmax_t)ino); exit(32); } d = fsbtodb(&sblock, ino_to_fsba(&sblock, ino)); bread(&disk, part_ofs + d, (char *)iobuf, sblock.fs_bsize); if (sblock.fs_magic == FS_UFS1_MAGIC) ((struct ufs1_dinode *)iobuf)[ino_to_fsbo(&sblock, ino)] = ip->dp1; else ((struct ufs2_dinode *)iobuf)[ino_to_fsbo(&sblock, ino)] = ip->dp2; wtfs(d, sblock.fs_bsize, (char *)iobuf); } /* * possibly write to disk */ static void wtfs(ufs2_daddr_t bno, int size, char *bf) { if (Nflag) return; if (bwrite(&disk, part_ofs + bno, bf, size) < 0) err(36, "wtfs: %d bytes at sector %jd", size, (intmax_t)bno); } /* * Calculate the check-hash of the cylinder group. */ static void cgckhash(cgp) struct cg *cgp; { if ((sblock.fs_metackhash & CK_CYLGRP) == 0) return; cgp->cg_ckhash = 0; cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); } /* * check if a block is available */ static int isblock(struct fs *fs, unsigned char *cp, int h) { unsigned char mask; switch (fs->fs_frag) { case 8: return (cp[h] == 0xff); case 4: mask = 0x0f << ((h & 0x1) << 2); return ((cp[h >> 1] & mask) == mask); case 2: mask = 0x03 << ((h & 0x3) << 1); return ((cp[h >> 2] & mask) == mask); case 1: mask = 0x01 << (h & 0x7); return ((cp[h >> 3] & mask) == mask); default: fprintf(stderr, "isblock bad fs_frag %d\n", fs->fs_frag); return (0); } } /* * take a block out of the map */ static void clrblock(struct fs *fs, unsigned char *cp, int h) { switch ((fs)->fs_frag) { case 8: cp[h] = 0; return; case 4: cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] &= ~(0x01 << (h & 0x7)); return; default: fprintf(stderr, "clrblock bad fs_frag %d\n", fs->fs_frag); return; } } /* * put a block into the map */ static void setblock(struct fs *fs, unsigned char *cp, int h) { switch (fs->fs_frag) { case 8: cp[h] = 0xff; return; case 4: cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] |= (0x01 << (h & 0x7)); return; default: fprintf(stderr, "setblock bad fs_frag %d\n", fs->fs_frag); return; } } /* * Determine the number of characters in a * single line. */ static int charsperline(void) { int columns; char *cp; struct winsize ws; columns = 0; if (ioctl(0, TIOCGWINSZ, &ws) != -1) columns = ws.ws_col; if (columns == 0 && (cp = getenv("COLUMNS"))) columns = atoi(cp); if (columns == 0) columns = 80; /* last resort */ return (columns); } static int ilog2(int val) { u_int n; for (n = 0; n < sizeof(n) * CHAR_BIT; n++) if (1 << n == val) return (n); errx(1, "ilog2: %d is not a power of 2\n", val); } /* * For the regression test, return predictable random values. * Otherwise use a true random number generator. */ static u_int32_t newfs_random(void) { static int nextnum = 1; if (Rflag) return (nextnum++); return (arc4random()); } Index: head/sbin/quotacheck/Makefile =================================================================== --- head/sbin/quotacheck/Makefile (revision 328425) +++ head/sbin/quotacheck/Makefile (revision 328426) @@ -1,13 +1,13 @@ # $FreeBSD$ # @(#)Makefile 8.1 (Berkeley) 6/5/93 PACKAGE=quotacheck PROG= quotacheck SRCS= quotacheck.c preen.c fsutil.c utilities.c WARNS?= 2 MAN= quotacheck.8 -LIBADD= util +LIBADD= util ufs .PATH: ${.CURDIR:H}/fsck ${.CURDIR:H}/fsck_ffs .include Index: head/sbin/quotacheck/quotacheck.c =================================================================== --- head/sbin/quotacheck/quotacheck.c (revision 328425) +++ head/sbin/quotacheck/quotacheck.c (revision 328426) @@ -1,728 +1,723 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Robert Elz at The University of Melbourne. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1990, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)quotacheck.c 8.3 (Berkeley) 1/29/94"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * Fix up / report on disk quotas & usage */ #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include "quotacheck.h" char *qfname = QUOTAFILENAME; char *qfextension[] = INITQFNAMES; char *quotagroup = QUOTAGROUP; union { struct fs sblk; char dummy[MAXBSIZE]; } sb_un; #define sblock sb_un.sblk union { struct cg cgblk; char dummy[MAXBSIZE]; } cg_un; #define cgblk cg_un.cgblk long dev_bsize = 1; ino_t maxino; union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; #define DIP(dp, field) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (dp)->dp1.field : (dp)->dp2.field) #define HASUSR 1 #define HASGRP 2 struct fileusage { struct fileusage *fu_next; u_long fu_curinodes; u_long fu_curblocks; u_long fu_id; char fu_name[1]; /* actually bigger */ }; #define FUHASH 1024 /* must be power of two */ struct fileusage *fuhead[MAXQUOTAS][FUHASH]; int aflag; /* all file systems */ int cflag; /* convert format to 32 or 64 bit size */ int gflag; /* check group quotas */ int uflag; /* check user quotas */ int vflag; /* verbose */ int fi; /* open disk file descriptor */ struct fileusage * addid(u_long, int, char *, const char *); -void bread(ufs2_daddr_t, char *, long); +void blkread(ufs2_daddr_t, char *, long); void freeinodebuf(void); union dinode * getnextinode(ino_t); int getquotagid(void); struct fileusage * lookup(u_long, int); int oneof(char *, char*[], int); void printchanges(const char *, int, struct dqblk *, struct fileusage *, u_long); void setinodebuf(ino_t); int update(const char *, struct quotafile *, int); void usage(void); int main(int argc, char *argv[]) { struct fstab *fs; struct passwd *pw; struct group *gr; struct quotafile *qfu, *qfg; int i, argnum, maxrun, errs, ch; long done = 0; char *name; errs = maxrun = 0; while ((ch = getopt(argc, argv, "ac:guvl:")) != -1) { switch(ch) { case 'a': aflag++; break; case 'c': if (cflag) usage(); cflag = atoi(optarg); break; case 'g': gflag++; break; case 'u': uflag++; break; case 'v': vflag++; break; case 'l': maxrun = atoi(optarg); break; default: usage(); } } argc -= optind; argv += optind; if ((argc == 0 && !aflag) || (argc > 0 && aflag)) usage(); if (cflag && cflag != 32 && cflag != 64) usage(); if (!gflag && !uflag) { gflag++; uflag++; } if (gflag) { setgrent(); while ((gr = getgrent()) != NULL) (void) addid((u_long)gr->gr_gid, GRPQUOTA, gr->gr_name, NULL); endgrent(); } if (uflag) { setpwent(); while ((pw = getpwent()) != NULL) (void) addid((u_long)pw->pw_uid, USRQUOTA, pw->pw_name, NULL); endpwent(); } /* * The maxrun (-l) option is now deprecated. */ if (maxrun > 0) warnx("the -l option is now deprecated"); if (aflag) exit(checkfstab(uflag, gflag)); if (setfsent() == 0) errx(1, "%s: can't open", FSTAB); while ((fs = getfsent()) != NULL) { if (((argnum = oneof(fs->fs_file, argv, argc)) >= 0 || (argnum = oneof(fs->fs_spec, argv, argc)) >= 0) && (name = blockcheck(fs->fs_spec))) { done |= 1 << argnum; qfu = NULL; if (uflag) qfu = quota_open(fs, USRQUOTA, O_CREAT|O_RDWR); qfg = NULL; if (gflag) qfg = quota_open(fs, GRPQUOTA, O_CREAT|O_RDWR); if (qfu == NULL && qfg == NULL) continue; errs += chkquota(name, qfu, qfg); if (qfu) quota_close(qfu); if (qfg) quota_close(qfg); } } endfsent(); for (i = 0; i < argc; i++) if ((done & (1 << i)) == 0) fprintf(stderr, "%s not found in %s\n", argv[i], FSTAB); exit(errs); } void usage(void) { (void)fprintf(stderr, "%s\n%s\n", "usage: quotacheck [-guv] [-c 32 | 64] [-l maxrun] -a", " quotacheck [-guv] [-c 32 | 64] filesystem ..."); exit(1); } /* - * Possible superblock locations ordered from most to least likely. - */ -static int sblock_try[] = SBLOCKSEARCH; - -/* * Scan the specified file system to check quota(s) present on it. */ int chkquota(char *specname, struct quotafile *qfu, struct quotafile *qfg) { struct fileusage *fup; union dinode *dp; - int cg, i, mode, errs = 0; + struct fs *fs; + int cg, i, ret, mode, errs = 0; ino_t ino, inosused, userino = 0, groupino = 0; dev_t dev, userdev = 0, groupdev = 0; struct stat sb; const char *mntpt; char *cp; if (qfu != NULL) mntpt = quota_fsname(qfu); else if (qfg != NULL) mntpt = quota_fsname(qfg); else errx(1, "null quotafile information passed to chkquota()\n"); if (cflag) { if (vflag && qfu != NULL) printf("%s: convert user quota to %d bits\n", mntpt, cflag); if (qfu != NULL && quota_convert(qfu, cflag) < 0) { if (errno == EBADF) errx(1, "%s: cannot convert an active quota file", mntpt); err(1, "user quota conversion to size %d failed", cflag); } if (vflag && qfg != NULL) printf("%s: convert group quota to %d bits\n", mntpt, cflag); if (qfg != NULL && quota_convert(qfg, cflag) < 0) { if (errno == EBADF) errx(1, "%s: cannot convert an active quota file", mntpt); err(1, "group quota conversion to size %d failed", cflag); } } if ((fi = open(specname, O_RDONLY, 0)) < 0) { warn("%s", specname); return (1); } if ((stat(mntpt, &sb)) < 0) { warn("%s", mntpt); return (1); } dev = sb.st_dev; if (vflag) { (void)printf("*** Checking "); if (qfu) (void)printf("user%s", qfg ? " and " : ""); if (qfg) (void)printf("group"); (void)printf(" quotas for %s (%s)\n", specname, mntpt); } if (qfu) { if (stat(quota_qfname(qfu), &sb) == 0) { userino = sb.st_ino; userdev = sb.st_dev; } } if (qfg) { if (stat(quota_qfname(qfg), &sb) == 0) { groupino = sb.st_ino; groupdev = sb.st_dev; } } sync(); - dev_bsize = 1; - for (i = 0; sblock_try[i] != -1; i++) { - bread(sblock_try[i], (char *)&sblock, (long)SBLOCKSIZE); - if ((sblock.fs_magic == FS_UFS1_MAGIC || - (sblock.fs_magic == FS_UFS2_MAGIC && - sblock.fs_sblockloc == sblock_try[i])) && - sblock.fs_bsize <= MAXBSIZE && - sblock.fs_bsize >= sizeof(struct fs)) - break; + if ((ret = sbget(fi, &fs, -1)) != 0) { + switch (ret) { + case ENOENT: + warn("Cannot find file system superblock"); + return (1); + default: + warn("Unable to read file system superblock"); + return (1); + } } - if (sblock_try[i] == -1) { - warn("Cannot find file system superblock"); - return (1); - } + bcopy(fs, &sblock, fs->fs_sbsize); + free(fs); dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1); maxino = sblock.fs_ncg * sblock.fs_ipg; for (cg = 0; cg < sblock.fs_ncg; cg++) { ino = cg * sblock.fs_ipg; setinodebuf(ino); - bread(fsbtodb(&sblock, cgtod(&sblock, cg)), (char *)(&cgblk), + blkread(fsbtodb(&sblock, cgtod(&sblock, cg)), (char *)(&cgblk), sblock.fs_cgsize); if (sblock.fs_magic == FS_UFS2_MAGIC) inosused = cgblk.cg_initediblk; else inosused = sblock.fs_ipg; /* * If we are using soft updates, then we can trust the * cylinder group inode allocation maps to tell us which * inodes are allocated. We will scan the used inode map * to find the inodes that are really in use, and then * read only those inodes in from disk. */ if (sblock.fs_flags & FS_DOSOFTDEP) { if (!cg_chkmagic(&cgblk)) errx(1, "CG %d: BAD MAGIC NUMBER\n", cg); cp = &cg_inosused(&cgblk)[(inosused - 1) / CHAR_BIT]; for ( ; inosused > 0; inosused -= CHAR_BIT, cp--) { if (*cp == 0) continue; for (i = 1 << (CHAR_BIT - 1); i > 0; i >>= 1) { if (*cp & i) break; inosused--; } break; } if (inosused <= 0) continue; } for (i = 0; i < inosused; i++, ino++) { if ((dp = getnextinode(ino)) == NULL || ino < UFS_ROOTINO || (mode = DIP(dp, di_mode) & IFMT) == 0) continue; /* * XXX: Do not account for UIDs or GIDs that appear * to be negative to prevent generating 100GB+ * quota files. */ if ((int)DIP(dp, di_uid) < 0 || (int)DIP(dp, di_gid) < 0) { if (vflag) { if (aflag) (void)printf("%s: ", mntpt); (void)printf("out of range UID/GID (%u/%u) ino=%ju\n", DIP(dp, di_uid), DIP(dp,di_gid), (uintmax_t)ino); } continue; } /* * Do not account for file system snapshot files * or the actual quota data files to be consistent * with how they are handled inside the kernel. */ #ifdef SF_SNAPSHOT if (DIP(dp, di_flags) & SF_SNAPSHOT) continue; #endif if ((ino == userino && dev == userdev) || (ino == groupino && dev == groupdev)) continue; if (qfg) { fup = addid((u_long)DIP(dp, di_gid), GRPQUOTA, (char *)0, mntpt); fup->fu_curinodes++; if (mode == IFREG || mode == IFDIR || mode == IFLNK) fup->fu_curblocks += DIP(dp, di_blocks); } if (qfu) { fup = addid((u_long)DIP(dp, di_uid), USRQUOTA, (char *)0, mntpt); fup->fu_curinodes++; if (mode == IFREG || mode == IFDIR || mode == IFLNK) fup->fu_curblocks += DIP(dp, di_blocks); } } } freeinodebuf(); if (qfu) errs += update(mntpt, qfu, USRQUOTA); if (qfg) errs += update(mntpt, qfg, GRPQUOTA); close(fi); (void)fflush(stdout); return (errs); } /* * Update a specified quota file. */ int update(const char *fsname, struct quotafile *qf, int type) { struct fileusage *fup; u_long id, lastid, highid = 0; struct dqblk dqbuf; struct stat sb; static struct dqblk zerodqbuf; static struct fileusage zerofileusage; /* * Scan the on-disk quota file and record any usage changes. */ lastid = quota_maxid(qf); for (id = 0; id <= lastid; id++) { if (quota_read(qf, &dqbuf, id) < 0) dqbuf = zerodqbuf; if ((fup = lookup(id, type)) == NULL) fup = &zerofileusage; if (fup->fu_curinodes || fup->fu_curblocks || dqbuf.dqb_bsoftlimit || dqbuf.dqb_bhardlimit || dqbuf.dqb_isoftlimit || dqbuf.dqb_ihardlimit) highid = id; if (dqbuf.dqb_curinodes == fup->fu_curinodes && dqbuf.dqb_curblocks == fup->fu_curblocks) { fup->fu_curinodes = 0; fup->fu_curblocks = 0; continue; } printchanges(fsname, type, &dqbuf, fup, id); dqbuf.dqb_curinodes = fup->fu_curinodes; dqbuf.dqb_curblocks = fup->fu_curblocks; (void) quota_write_usage(qf, &dqbuf, id); fup->fu_curinodes = 0; fup->fu_curblocks = 0; } /* * Walk the hash table looking for ids with non-zero usage * that are not currently recorded in the quota file. E.g. * ids that are past the end of the current file. */ for (id = 0; id < FUHASH; id++) { for (fup = fuhead[type][id]; fup != NULL; fup = fup->fu_next) { if (fup->fu_id <= lastid) continue; if (fup->fu_curinodes == 0 && fup->fu_curblocks == 0) continue; bzero(&dqbuf, sizeof(struct dqblk)); if (fup->fu_id > highid) highid = fup->fu_id; printchanges(fsname, type, &dqbuf, fup, fup->fu_id); dqbuf.dqb_curinodes = fup->fu_curinodes; dqbuf.dqb_curblocks = fup->fu_curblocks; (void) quota_write_usage(qf, &dqbuf, fup->fu_id); fup->fu_curinodes = 0; fup->fu_curblocks = 0; } } /* * If this is old format file, then size may be smaller, * so ensure that we only truncate when it will make things * smaller, and not if it will grow an old format file. */ if (highid < lastid && stat(quota_qfname(qf), &sb) == 0 && sb.st_size > (((off_t)highid + 2) * sizeof(struct dqblk))) truncate(quota_qfname(qf), (((off_t)highid + 2) * sizeof(struct dqblk))); return (0); } /* * Check to see if target appears in list of size cnt. */ int oneof(char *target, char *list[], int cnt) { int i; for (i = 0; i < cnt; i++) if (strcmp(target, list[i]) == 0) return (i); return (-1); } /* * Determine the group identifier for quota files. */ int getquotagid(void) { struct group *gr; if ((gr = getgrnam(quotagroup)) != NULL) return (gr->gr_gid); return (-1); } /* * Routines to manage the file usage table. * * Lookup an id of a specific type. */ struct fileusage * lookup(u_long id, int type) { struct fileusage *fup; for (fup = fuhead[type][id & (FUHASH-1)]; fup != NULL; fup = fup->fu_next) if (fup->fu_id == id) return (fup); return (NULL); } /* * Add a new file usage id if it does not already exist. */ struct fileusage * addid(u_long id, int type, char *name, const char *fsname) { struct fileusage *fup, **fhp; int len; if ((fup = lookup(id, type)) != NULL) return (fup); if (name) len = strlen(name); else len = 0; if ((fup = calloc(1, sizeof(*fup) + len)) == NULL) errx(1, "calloc failed"); fhp = &fuhead[type][id & (FUHASH - 1)]; fup->fu_next = *fhp; *fhp = fup; fup->fu_id = id; if (name) bcopy(name, fup->fu_name, len + 1); else { (void)sprintf(fup->fu_name, "%lu", id); if (vflag) { if (aflag && fsname != NULL) (void)printf("%s: ", fsname); printf("unknown %cid: %lu\n", type == USRQUOTA ? 'u' : 'g', id); } } return (fup); } /* * Special purpose version of ginode used to optimize pass * over all the inodes in numerical order. */ static ino_t nextino, lastinum, lastvalidinum; static long readcnt, readpercg, fullcnt, inobufsize, partialcnt, partialsize; static caddr_t inodebuf; #define INOBUFSIZE 56*1024 /* size of buffer to read inodes */ union dinode * getnextinode(ino_t inumber) { long size; ufs2_daddr_t dblk; union dinode *dp; static caddr_t nextinop; if (inumber != nextino++ || inumber > lastvalidinum) errx(1, "bad inode number %ju to nextinode", (uintmax_t)inumber); if (inumber >= lastinum) { readcnt++; dblk = fsbtodb(&sblock, ino_to_fsba(&sblock, lastinum)); if (readcnt % readpercg == 0) { size = partialsize; lastinum += partialcnt; } else { size = inobufsize; lastinum += fullcnt; } /* - * If bread returns an error, it will already have zeroed + * If blkread returns an error, it will already have zeroed * out the buffer, so we do not need to do so here. */ - bread(dblk, inodebuf, size); + blkread(dblk, inodebuf, size); nextinop = inodebuf; } dp = (union dinode *)nextinop; if (sblock.fs_magic == FS_UFS1_MAGIC) nextinop += sizeof(struct ufs1_dinode); else nextinop += sizeof(struct ufs2_dinode); return (dp); } /* * Prepare to scan a set of inodes. */ void setinodebuf(ino_t inum) { if (inum % sblock.fs_ipg != 0) errx(1, "bad inode number %ju to setinodebuf", (uintmax_t)inum); lastvalidinum = inum + sblock.fs_ipg - 1; nextino = inum; lastinum = inum; readcnt = 0; if (inodebuf != NULL) return; inobufsize = blkroundup(&sblock, INOBUFSIZE); fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)); readpercg = sblock.fs_ipg / fullcnt; partialcnt = sblock.fs_ipg % fullcnt; partialsize = partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)); if (partialcnt != 0) { readpercg++; } else { partialcnt = fullcnt; partialsize = inobufsize; } if ((inodebuf = malloc((unsigned)inobufsize)) == NULL) errx(1, "cannot allocate space for inode buffer"); } /* * Free up data structures used to scan inodes. */ void freeinodebuf(void) { if (inodebuf != NULL) free(inodebuf); inodebuf = NULL; } /* * Read specified disk blocks. */ void -bread(ufs2_daddr_t bno, char *buf, long cnt) +blkread(ufs2_daddr_t bno, char *buf, long cnt) { if (lseek(fi, (off_t)bno * dev_bsize, SEEK_SET) < 0 || read(fi, buf, cnt) != cnt) - errx(1, "bread failed on block %ld", (long)bno); + errx(1, "blkread failed on block %ld", (long)bno); } /* * Display updated block and i-node counts. */ void printchanges(const char *fsname, int type, struct dqblk *dp, struct fileusage *fup, u_long id) { if (!vflag) return; if (aflag) (void)printf("%s: ", fsname); if (fup->fu_name[0] == '\0') (void)printf("%-8lu fixed ", id); else (void)printf("%-8s fixed ", fup->fu_name); switch (type) { case GRPQUOTA: (void)printf("(group):"); break; case USRQUOTA: (void)printf("(user): "); break; default: (void)printf("(unknown quota type %d)", type); break; } if (dp->dqb_curinodes != fup->fu_curinodes) (void)printf("\tinodes %lu -> %lu", (u_long)dp->dqb_curinodes, (u_long)fup->fu_curinodes); if (dp->dqb_curblocks != fup->fu_curblocks) (void)printf("\tblocks %lu -> %lu", (u_long)dp->dqb_curblocks, (u_long)fup->fu_curblocks); (void)printf("\n"); } Index: head/stand/libsa/Makefile =================================================================== --- head/stand/libsa/Makefile (revision 328425) +++ head/stand/libsa/Makefile (revision 328426) @@ -1,151 +1,155 @@ # $FreeBSD$ # Originally from $NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $ # # Notes: # - We don't use the libc strerror/sys_errlist because the string table is # quite large. # MK_PROFILE= no MK_SSP= no .include INTERNALLIB= LIBSA_CPUARCH?=${MACHINE_CPUARCH} LIBC_SRC= ${SRCTOP}/lib/libc LIB?= sa NO_PIC= # standalone components and stuff we have modified locally SRCS+= gzguts.h zutil.h __main.c assert.c bcd.c environment.c getopt.c gets.c \ globals.c pager.c panic.c printf.c strdup.c strerror.c strtol.c strtoul.c \ random.c sbrk.c twiddle.c zalloc.c zalloc_malloc.c # private (pruned) versions of libc string functions SRCS+= strcasecmp.c .PATH: ${LIBC_SRC}/net SRCS+= ntoh.c # string functions from libc .PATH: ${LIBC_SRC}/string SRCS+= bcmp.c bcopy.c bzero.c ffs.c fls.c \ memccpy.c memchr.c memcmp.c memcpy.c memmove.c memset.c \ qdivrem.c strcat.c strchr.c strcmp.c strcpy.c stpcpy.c stpncpy.c \ strcspn.c strlcat.c strlcpy.c strlen.c strncat.c strncmp.c strncpy.c \ strnlen.c strpbrk.c strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c .if ${MACHINE_CPUARCH} == "arm" .PATH: ${LIBC_SRC}/arm/gen # Do not generate movt/movw, because the relocation fixup for them does not # translate to the -Bsymbolic -pie format required by self_reloc() in loader(8). # Also, the fpu is not available in a standalone environment. .if ${COMPILER_VERSION} < 30800 CFLAGS.clang+= -mllvm -arm-use-movt=0 .else CFLAGS.clang+= -mno-movt .endif CFLAGS.clang+= -mfpu=none # Compiler support functions .PATH: ${SRCTOP}/contrib/compiler-rt/lib/builtins/ # __clzsi2 and ctzsi2 for various builtin functions SRCS+= clzsi2.c ctzsi2.c # Divide and modulus functions called by the compiler SRCS+= divmoddi4.c divmodsi4.c divdi3.c divsi3.c moddi3.c modsi3.c SRCS+= udivmoddi4.c udivmodsi4.c udivdi3.c udivsi3.c umoddi3.c umodsi3.c .PATH: ${SRCTOP}/contrib/compiler-rt/lib/builtins/arm/ SRCS+= aeabi_idivmod.S aeabi_ldivmod.S aeabi_uidivmod.S aeabi_uldivmod.S SRCS+= aeabi_memcmp.S aeabi_memcpy.S aeabi_memmove.S aeabi_memset.S .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv" .PATH: ${LIBC_SRC}/${MACHINE_CPUARCH}/gen .endif .if ${MACHINE_CPUARCH} == "powerpc" .PATH: ${LIBC_SRC}/quad SRCS+= ashldi3.c ashrdi3.c SRCS+= syncicache.c .endif # uuid functions from libc .PATH: ${LIBC_SRC}/uuid SRCS+= uuid_create_nil.c uuid_equal.c uuid_from_string.c uuid_is_nil.c uuid_to_string.c # _setjmp/_longjmp .PATH: ${SASRC}/${LIBSA_CPUARCH} SRCS+= _setjmp.S # decompression functionality from libbz2 # NOTE: to actually test this functionality after libbz2 upgrade compile # loader(8) with LOADER_BZIP2_SUPPORT defined .PATH: ${SRCTOP}/contrib/bzip2 CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS SRCS+=bzlib.c crctable.c decompress.c huffman.c randtable.c # decompression functionality from zlib .PATH: ${SRCTOP}/contrib/zlib CFLAGS+=-DHAVE_MEMCPY -I${SRCTOP}/contrib/zlib SRCS+= adler32.c crc32.c SRCS+= infback.c inffast.c inflate.c inftrees.c zutil.c # Create a subset of includes that are safe, as well as adjusting those that aren't # The lists may drive people nuts, but they are explicitly opt-in FAKE_DIRS=xlocale arpa SAFE_INCS=a.out.h assert.h elf.h limits.h nlist.h setjmp.h stddef.h stdbool.h string.h strings.h time.h unistd.h uuid.h STAND_H_INC=ctype.h fcntl.h signal.h stdio.h stdlib.h OTHER_INC=stdarg.h errno.h stdint.h beforedepend: echo beforedepend; \ mkdir -p ${FAKE_DIRS}; \ for i in ${SAFE_INCS}; do \ ln -sf ${SRCTOP}/include/$$i $$i; \ done; \ ln -sf ${SYSDIR}/${MACHINE}/include/stdarg.h stdarg.h; \ ln -sf ${SYSDIR}/sys/errno.h errno.h; \ ln -sf ${SYSDIR}/sys/stdint.h stdint.h; \ ln -sf ${SRCTOP}/include/arpa/inet.h arpa/inet.h; \ ln -sf ${SRCTOP}/include/arpa/tftp.h arpa/tftp.h; \ for i in _time.h _strings.h _string.h; do \ [ -f xlocale/$$i ] || cp /dev/null xlocale/$$i; \ done; \ for i in ${STAND_H_INC}; do \ ln -sf ${SASRC}/stand.h $$i; \ done CLEANDIRS+=${FAKE_DIRS} CLEANFILES+= ${SAFE_INCS} ${STAND_H_INC} ${OTHER_INC} # io routines SRCS+= closeall.c dev.c ioctl.c nullfs.c stat.c \ fstat.c close.c lseek.c open.c read.c write.c readdir.c # network routines SRCS+= arp.c ether.c ip.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c # network info services: SRCS+= bootp.c rarp.c bootparam.c # boot filesystems SRCS+= ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c SRCS+= dosfs.c ext2fs.c SRCS+= splitfs.c SRCS+= pkgfs.c .if ${MK_NAND} != "no" SRCS+= nandfs.c .endif +# kernel ufs support +.PATH: ${SRCTOP}/sys/ufs/ffs +SRCS+=ffs_subr.c ffs_tables.c + CFLAGS.bzipfs.c+= -I${SRCTOP}/contrib/bzip2 # explicit_bzero .PATH: ${SYSDIR}/libkern SRCS+= explicit_bzero.c .include .include Index: head/stand/libsa/ufs.c =================================================================== --- head/stand/libsa/ufs.c (revision 328425) +++ head/stand/libsa/ufs.c (revision 328426) @@ -1,861 +1,866 @@ /* $NetBSD: ufs.c,v 1.20 1998/03/01 07:15:39 ross Exp $ */ /*- * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * * Copyright (c) 1990, 1991 Carnegie Mellon University * All Rights Reserved. * * Author: David Golub * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone file reading package. */ #include #include #include #include #include #include #include "stand.h" #include "string.h" static int ufs_open(const char *path, struct open_file *f); static int ufs_write(struct open_file *f, void *buf, size_t size, size_t *resid); static int ufs_close(struct open_file *f); static int ufs_read(struct open_file *f, void *buf, size_t size, size_t *resid); static off_t ufs_seek(struct open_file *f, off_t offset, int where); static int ufs_stat(struct open_file *f, struct stat *sb); static int ufs_readdir(struct open_file *f, struct dirent *d); struct fs_ops ufs_fsops = { "ufs", ufs_open, ufs_close, ufs_read, ufs_write, ufs_seek, ufs_stat, ufs_readdir }; /* * In-core open file. */ struct file { off_t f_seekp; /* seek pointer */ struct fs *f_fs; /* pointer to super-block */ union dinode { struct ufs1_dinode di1; struct ufs2_dinode di2; } f_di; /* copy of on-disk inode */ int f_nindir[UFS_NIADDR]; /* number of blocks mapped by indirect block at level i */ char *f_blk[UFS_NIADDR]; /* buffer for indirect block at level i */ size_t f_blksize[UFS_NIADDR]; /* size of buffer */ ufs2_daddr_t f_blkno[UFS_NIADDR];/* disk address of block in buffer */ ufs2_daddr_t f_buf_blkno; /* block number of data block */ char *f_buf; /* buffer for data block */ size_t f_buf_size; /* size of data block */ }; #define DIP(fp, field) \ ((fp)->f_fs->fs_magic == FS_UFS1_MAGIC ? \ (fp)->f_di.di1.field : (fp)->f_di.di2.field) static int read_inode(ino_t, struct open_file *); static int block_map(struct open_file *, ufs2_daddr_t, ufs2_daddr_t *); static int buf_read_file(struct open_file *, char **, size_t *); static int buf_write_file(struct open_file *, char *, size_t *); static int search_directory(char *, struct open_file *, ino_t *); +static int ufs_use_sa_read(void *, off_t, void **, int); +/* from ffs_subr.c */ +int ffs_sbget(void *, struct fs **, off_t, char *, + int (*)(void *, off_t, void **, int)); + /* * Read a new inode into a file structure. */ static int read_inode(inumber, f) ino_t inumber; struct open_file *f; { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; char *buf; size_t rsize; int rc; if (fs == NULL) panic("fs == NULL"); /* * Read inode and save it. */ buf = malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, ino_to_fsba(fs, inumber)), fs->fs_bsize, buf, &rsize); if (rc) goto out; if (rsize != fs->fs_bsize) { rc = EIO; goto out; } if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) fp->f_di.di1 = ((struct ufs1_dinode *)buf) [ino_to_fsbo(fs, inumber)]; else fp->f_di.di2 = ((struct ufs2_dinode *)buf) [ino_to_fsbo(fs, inumber)]; /* * Clear out the old buffers */ { int level; for (level = 0; level < UFS_NIADDR; level++) fp->f_blkno[level] = -1; fp->f_buf_blkno = -1; } fp->f_seekp = 0; out: free(buf); return (rc); } /* * Given an offset in a file, find the disk block number that * contains that block. */ static int block_map(f, file_block, disk_block_p) struct open_file *f; ufs2_daddr_t file_block; ufs2_daddr_t *disk_block_p; /* out */ { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; int level; int idx; ufs2_daddr_t ind_block_num; int rc; /* * Index structure of an inode: * * di_db[0..UFS_NDADDR-1] hold block numbers for blocks * 0..UFS_NDADDR-1 * * di_ib[0] index block 0 is the single indirect block * holds block numbers for blocks * UFS_NDADDR .. UFS_NDADDR + NINDIR(fs)-1 * * di_ib[1] index block 1 is the double indirect block * holds block numbers for INDEX blocks for blocks * UFS_NDADDR + NINDIR(fs) .. * UFS_NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1 * * di_ib[2] index block 2 is the triple indirect block * holds block numbers for double-indirect * blocks for blocks * UFS_NDADDR + NINDIR(fs) + NINDIR(fs)**2 .. * UFS_NDADDR + NINDIR(fs) + NINDIR(fs)**2 * + NINDIR(fs)**3 - 1 */ if (file_block < UFS_NDADDR) { /* Direct block. */ *disk_block_p = DIP(fp, di_db[file_block]); return (0); } file_block -= UFS_NDADDR; /* * nindir[0] = NINDIR * nindir[1] = NINDIR**2 * nindir[2] = NINDIR**3 * etc */ for (level = 0; level < UFS_NIADDR; level++) { if (file_block < fp->f_nindir[level]) break; file_block -= fp->f_nindir[level]; } if (level == UFS_NIADDR) { /* Block number too high */ return (EFBIG); } ind_block_num = DIP(fp, di_ib[level]); for (; level >= 0; level--) { if (ind_block_num == 0) { *disk_block_p = 0; /* missing */ return (0); } if (fp->f_blkno[level] != ind_block_num) { if (fp->f_blk[level] == (char *)0) fp->f_blk[level] = malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fp->f_fs, ind_block_num), fs->fs_bsize, fp->f_blk[level], &fp->f_blksize[level]); if (rc) return (rc); if (fp->f_blksize[level] != fs->fs_bsize) return (EIO); fp->f_blkno[level] = ind_block_num; } if (level > 0) { idx = file_block / fp->f_nindir[level - 1]; file_block %= fp->f_nindir[level - 1]; } else idx = file_block; if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) ind_block_num = ((ufs1_daddr_t *)fp->f_blk[level])[idx]; else ind_block_num = ((ufs2_daddr_t *)fp->f_blk[level])[idx]; } *disk_block_p = ind_block_num; return (0); } /* * Write a portion of a file from an internal buffer. */ static int buf_write_file(f, buf_p, size_p) struct open_file *f; char *buf_p; size_t *size_p; /* out */ { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; long off; ufs_lbn_t file_block; ufs2_daddr_t disk_block; size_t block_size; int rc; /* * Calculate the starting block address and offset. */ off = blkoff(fs, fp->f_seekp); file_block = lblkno(fs, fp->f_seekp); block_size = sblksize(fs, DIP(fp, di_size), file_block); rc = block_map(f, file_block, &disk_block); if (rc) return (rc); if (disk_block == 0) /* Because we can't allocate space on the drive */ return (EFBIG); /* * Truncate buffer at end of file, and at the end of * this block. */ if (*size_p > DIP(fp, di_size) - fp->f_seekp) *size_p = DIP(fp, di_size) - fp->f_seekp; if (*size_p > block_size - off) *size_p = block_size - off; /* * If we don't entirely occlude the block and it's not * in memory already, read it in first. */ if (((off > 0) || (*size_p + off < block_size)) && (file_block != fp->f_buf_blkno)) { if (fp->f_buf == (char *)0) fp->f_buf = malloc(fs->fs_bsize); twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); fp->f_buf_blkno = file_block; } /* * Copy the user data into the cached block. */ bcopy(buf_p, fp->f_buf + off, *size_p); /* * Write the block out to storage. */ twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); return (rc); } /* * Read a portion of a file into an internal buffer. Return * the location in the buffer and the amount in the buffer. */ static int buf_read_file(f, buf_p, size_p) struct open_file *f; char **buf_p; /* out */ size_t *size_p; /* out */ { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; long off; ufs_lbn_t file_block; ufs2_daddr_t disk_block; size_t block_size; int rc; off = blkoff(fs, fp->f_seekp); file_block = lblkno(fs, fp->f_seekp); block_size = sblksize(fs, DIP(fp, di_size), file_block); if (file_block != fp->f_buf_blkno) { if (fp->f_buf == (char *)0) fp->f_buf = malloc(fs->fs_bsize); rc = block_map(f, file_block, &disk_block); if (rc) return (rc); if (disk_block == 0) { bzero(fp->f_buf, block_size); fp->f_buf_size = block_size; } else { twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); } fp->f_buf_blkno = file_block; } /* * Return address of byte in buffer corresponding to * offset, and size of remainder of buffer after that * byte. */ *buf_p = fp->f_buf + off; *size_p = block_size - off; /* * But truncate buffer at end of file. */ if (*size_p > DIP(fp, di_size) - fp->f_seekp) *size_p = DIP(fp, di_size) - fp->f_seekp; return (0); } /* * Search a directory for a name and return its * i_number. */ static int search_directory(name, f, inumber_p) char *name; struct open_file *f; ino_t *inumber_p; /* out */ { struct file *fp = (struct file *)f->f_fsdata; struct direct *dp; struct direct *edp; char *buf; size_t buf_size; int namlen, length; int rc; length = strlen(name); fp->f_seekp = 0; while (fp->f_seekp < DIP(fp, di_size)) { rc = buf_read_file(f, &buf, &buf_size); if (rc) return (rc); dp = (struct direct *)buf; edp = (struct direct *)(buf + buf_size); while (dp < edp) { if (dp->d_ino == (ino_t)0) goto next; #if BYTE_ORDER == LITTLE_ENDIAN if (fp->f_fs->fs_maxsymlinklen <= 0) namlen = dp->d_type; else #endif namlen = dp->d_namlen; if (namlen == length && !strcmp(name, dp->d_name)) { /* found entry */ *inumber_p = dp->d_ino; return (0); } next: dp = (struct direct *)((char *)dp + dp->d_reclen); } fp->f_seekp += buf_size; } return (ENOENT); } -static int sblock_try[] = SBLOCKSEARCH; - /* * Open a file. */ static int ufs_open(upath, f) const char *upath; struct open_file *f; { char *cp, *ncp; int c; ino_t inumber, parent_inumber; struct file *fp; struct fs *fs; int i, rc; size_t buf_size; int nlinks = 0; char namebuf[MAXPATHLEN+1]; char *buf = NULL; char *path = NULL; /* allocate file system specific data structure */ fp = malloc(sizeof(struct file)); bzero(fp, sizeof(struct file)); f->f_fsdata = (void *)fp; - /* allocate space and read super block */ - fs = malloc(SBLOCKSIZE); - fp->f_fs = fs; + /* read super block */ twiddle(1); - /* - * Try reading the superblock in each of its possible locations. - */ - for (i = 0; sblock_try[i] != -1; i++) { - rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, - (char *)fs, &buf_size); - if (rc) - goto out; - if ((fs->fs_magic == FS_UFS1_MAGIC || - (fs->fs_magic == FS_UFS2_MAGIC && - fs->fs_sblockloc == sblock_try[i])) && - buf_size == SBLOCKSIZE && - fs->fs_bsize <= MAXBSIZE && - fs->fs_bsize >= sizeof(struct fs)) - break; - } - if (sblock_try[i] == -1) { - rc = EINVAL; + if ((rc = ffs_sbget(f, &fs, -1, 0, ufs_use_sa_read)) != 0) goto out; - } + fp->f_fs = fs; /* * Calculate indirect block levels. */ { ufs2_daddr_t mult; int level; mult = 1; for (level = 0; level < UFS_NIADDR; level++) { mult *= NINDIR(fs); fp->f_nindir[level] = mult; } } inumber = UFS_ROOTINO; if ((rc = read_inode(inumber, f)) != 0) goto out; cp = path = strdup(upath); if (path == NULL) { rc = ENOMEM; goto out; } while (*cp) { /* * Remove extra separators */ while (*cp == '/') cp++; if (*cp == '\0') break; /* * Check that current node is a directory. */ if ((DIP(fp, di_mode) & IFMT) != IFDIR) { rc = ENOTDIR; goto out; } /* * Get next component of path name. */ { int len = 0; ncp = cp; while ((c = *cp) != '\0' && c != '/') { if (++len > UFS_MAXNAMLEN) { rc = ENOENT; goto out; } cp++; } *cp = '\0'; } /* * Look up component in current directory. * Save directory inumber in case we find a * symbolic link. */ parent_inumber = inumber; rc = search_directory(ncp, f, &inumber); *cp = c; if (rc) goto out; /* * Open next component. */ if ((rc = read_inode(inumber, f)) != 0) goto out; /* * Check for symbolic link. */ if ((DIP(fp, di_mode) & IFMT) == IFLNK) { int link_len = DIP(fp, di_size); int len; len = strlen(cp); if (link_len + len > MAXPATHLEN || ++nlinks > MAXSYMLINKS) { rc = ENOENT; goto out; } bcopy(cp, &namebuf[link_len], len + 1); if (link_len < fs->fs_maxsymlinklen) { if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) cp = (caddr_t)(fp->f_di.di1.di_db); else cp = (caddr_t)(fp->f_di.di2.di_db); bcopy(cp, namebuf, (unsigned) link_len); } else { /* * Read file for symbolic link */ size_t buf_size; ufs2_daddr_t disk_block; struct fs *fs = fp->f_fs; if (!buf) buf = malloc(fs->fs_bsize); rc = block_map(f, (ufs2_daddr_t)0, &disk_block); if (rc) goto out; twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), fs->fs_bsize, buf, &buf_size); if (rc) goto out; bcopy((char *)buf, namebuf, (unsigned)link_len); } /* * If relative pathname, restart at parent directory. * If absolute pathname, restart at root. */ cp = namebuf; if (*cp != '/') inumber = parent_inumber; else inumber = (ino_t)UFS_ROOTINO; if ((rc = read_inode(inumber, f)) != 0) goto out; } } /* * Found terminal component. */ rc = 0; fp->f_seekp = 0; out: if (buf) free(buf); if (path) free(path); if (rc) { if (fp->f_buf) free(fp->f_buf); free(fp->f_fs); free(fp); } return (rc); +} + +/* + * A read function for use by standalone-layer routines. + */ +static int +ufs_use_sa_read(void *devfd, off_t loc, void **bufp, int size) +{ + struct open_file *f; + size_t buf_size; + int error; + + f = (struct open_file *)devfd; + if ((*bufp = malloc(size)) == NULL) + return (ENOSPC); + error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, loc / DEV_BSIZE, + size, *bufp, &buf_size); + if (error != 0) + return (error); + if (buf_size != size) + return (EIO); + return (0); } static int ufs_close(f) struct open_file *f; { struct file *fp = (struct file *)f->f_fsdata; int level; f->f_fsdata = (void *)0; if (fp == (struct file *)0) return (0); for (level = 0; level < UFS_NIADDR; level++) { if (fp->f_blk[level]) free(fp->f_blk[level]); } if (fp->f_buf) free(fp->f_buf); free(fp->f_fs); free(fp); return (0); } /* * Copy a portion of a file into kernel memory. * Cross block boundaries when necessary. */ static int ufs_read(f, start, size, resid) struct open_file *f; void *start; size_t size; size_t *resid; /* out */ { struct file *fp = (struct file *)f->f_fsdata; size_t csize; char *buf; size_t buf_size; int rc = 0; char *addr = start; while (size != 0) { if (fp->f_seekp >= DIP(fp, di_size)) break; rc = buf_read_file(f, &buf, &buf_size); if (rc) break; csize = size; if (csize > buf_size) csize = buf_size; bcopy(buf, addr, csize); fp->f_seekp += csize; addr += csize; size -= csize; } if (resid) *resid = size; return (rc); } /* * Write to a portion of an already allocated file. * Cross block boundaries when necessary. Can not * extend the file. */ static int ufs_write(f, start, size, resid) struct open_file *f; void *start; size_t size; size_t *resid; /* out */ { struct file *fp = (struct file *)f->f_fsdata; size_t csize; int rc = 0; char *addr = start; csize = size; while ((size != 0) && (csize != 0)) { if (fp->f_seekp >= DIP(fp, di_size)) break; if (csize >= 512) csize = 512; /* XXX */ rc = buf_write_file(f, addr, &csize); if (rc) break; fp->f_seekp += csize; addr += csize; size -= csize; } if (resid) *resid = size; return (rc); } static off_t ufs_seek(f, offset, where) struct open_file *f; off_t offset; int where; { struct file *fp = (struct file *)f->f_fsdata; switch (where) { case SEEK_SET: fp->f_seekp = offset; break; case SEEK_CUR: fp->f_seekp += offset; break; case SEEK_END: fp->f_seekp = DIP(fp, di_size) - offset; break; default: errno = EINVAL; return (-1); } return (fp->f_seekp); } static int ufs_stat(f, sb) struct open_file *f; struct stat *sb; { struct file *fp = (struct file *)f->f_fsdata; /* only important stuff */ sb->st_mode = DIP(fp, di_mode); sb->st_uid = DIP(fp, di_uid); sb->st_gid = DIP(fp, di_gid); sb->st_size = DIP(fp, di_size); return (0); } static int ufs_readdir(struct open_file *f, struct dirent *d) { struct file *fp = (struct file *)f->f_fsdata; struct direct *dp; char *buf; size_t buf_size; int error; /* * assume that a directory entry will not be split across blocks */ again: if (fp->f_seekp >= DIP(fp, di_size)) return (ENOENT); error = buf_read_file(f, &buf, &buf_size); if (error) return (error); dp = (struct direct *)buf; fp->f_seekp += dp->d_reclen; if (dp->d_ino == (ino_t)0) goto again; d->d_type = dp->d_type; strcpy(d->d_name, dp->d_name); return (0); } Index: head/sys/geom/geom.h =================================================================== --- head/sys/geom/geom.h (revision 328425) +++ head/sys/geom/geom.h (revision 328426) @@ -1,431 +1,433 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _GEOM_GEOM_H_ #define _GEOM_GEOM_H_ #include #include #include #include #include #include #include struct g_class; struct g_geom; struct g_consumer; struct g_provider; struct g_stat; struct thread; struct bio; struct sbuf; struct gctl_req; struct g_configargs; struct disk_zone_args; typedef int g_config_t (struct g_configargs *ca); typedef void g_ctl_req_t (struct gctl_req *, struct g_class *cp, char const *verb); typedef int g_ctl_create_geom_t (struct gctl_req *, struct g_class *cp, struct g_provider *pp); typedef int g_ctl_destroy_geom_t (struct gctl_req *, struct g_class *cp, struct g_geom *gp); typedef int g_ctl_config_geom_t (struct gctl_req *, struct g_geom *gp, const char *verb); typedef void g_init_t (struct g_class *mp); typedef void g_fini_t (struct g_class *mp); typedef struct g_geom * g_taste_t (struct g_class *, struct g_provider *, int flags); typedef int g_ioctl_t(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td); #define G_TF_NORMAL 0 #define G_TF_INSIST 1 #define G_TF_TRANSPARENT 2 typedef int g_access_t (struct g_provider *, int, int, int); /* XXX: not sure about the thread arg */ typedef void g_orphan_t (struct g_consumer *); typedef void g_start_t (struct bio *); typedef void g_spoiled_t (struct g_consumer *); typedef void g_attrchanged_t (struct g_consumer *, const char *attr); typedef void g_provgone_t (struct g_provider *); typedef void g_dumpconf_t (struct sbuf *, const char *indent, struct g_geom *, struct g_consumer *, struct g_provider *); typedef void g_resize_t(struct g_consumer *cp); /* * The g_class structure describes a transformation class. In other words * all BSD disklabel handlers share one g_class, all MBR handlers share * one common g_class and so on. * Certain operations are instantiated on the class, most notably the * taste and config_geom functions. */ struct g_class { const char *name; u_int version; u_int spare0; g_taste_t *taste; g_config_t *config; g_ctl_req_t *ctlreq; g_init_t *init; g_fini_t *fini; g_ctl_destroy_geom_t *destroy_geom; /* * Default values for geom methods */ g_start_t *start; g_spoiled_t *spoiled; g_attrchanged_t *attrchanged; g_dumpconf_t *dumpconf; g_access_t *access; g_orphan_t *orphan; g_ioctl_t *ioctl; g_provgone_t *providergone; g_resize_t *resize; void *spare1; void *spare2; /* * The remaining elements are private */ LIST_ENTRY(g_class) class; LIST_HEAD(,g_geom) geom; }; /* * The g_geom_alias is a list node for aliases for the geom name * for device node creation. */ struct g_geom_alias { LIST_ENTRY(g_geom_alias) ga_next; const char *ga_alias; }; #define G_VERSION_00 0x19950323 #define G_VERSION_01 0x20041207 /* add fflag to g_ioctl_t */ #define G_VERSION G_VERSION_01 /* * The g_geom is an instance of a g_class. */ struct g_geom { char *name; struct g_class *class; LIST_ENTRY(g_geom) geom; LIST_HEAD(,g_consumer) consumer; LIST_HEAD(,g_provider) provider; TAILQ_ENTRY(g_geom) geoms; /* XXX: better name */ int rank; g_start_t *start; g_spoiled_t *spoiled; g_attrchanged_t *attrchanged; g_dumpconf_t *dumpconf; g_access_t *access; g_orphan_t *orphan; g_ioctl_t *ioctl; g_provgone_t *providergone; g_resize_t *resize; void *spare0; void *spare1; void *softc; unsigned flags; #define G_GEOM_WITHER 1 #define G_GEOM_VOLATILE_BIO 2 LIST_HEAD(,g_geom_alias) aliases; }; /* * The g_bioq is a queue of struct bio's. * XXX: possibly collection point for statistics. * XXX: should (possibly) be collapsed with sys/bio.h::bio_queue_head. */ struct g_bioq { TAILQ_HEAD(, bio) bio_queue; struct mtx bio_queue_lock; int bio_queue_length; }; /* * A g_consumer is an attachment point for a g_provider. One g_consumer * can only be attached to one g_provider, but multiple g_consumers * can be attached to one g_provider. */ struct g_consumer { struct g_geom *geom; LIST_ENTRY(g_consumer) consumer; struct g_provider *provider; LIST_ENTRY(g_consumer) consumers; /* XXX: better name */ int acr, acw, ace; int flags; #define G_CF_SPOILED 0x1 #define G_CF_ORPHAN 0x4 #define G_CF_DIRECT_SEND 0x10 #define G_CF_DIRECT_RECEIVE 0x20 struct devstat *stat; u_int nstart, nend; /* Two fields for the implementing class to use */ void *private; u_int index; }; /* * A g_provider is a "logical disk". */ struct g_provider { char *name; LIST_ENTRY(g_provider) provider; struct g_geom *geom; LIST_HEAD(,g_consumer) consumers; int acr, acw, ace; int error; TAILQ_ENTRY(g_provider) orphan; off_t mediasize; u_int sectorsize; u_int stripesize; u_int stripeoffset; struct devstat *stat; u_int nstart, nend; u_int flags; #define G_PF_WITHER 0x2 #define G_PF_ORPHAN 0x4 #define G_PF_ACCEPT_UNMAPPED 0x8 #define G_PF_DIRECT_SEND 0x10 #define G_PF_DIRECT_RECEIVE 0x20 /* Two fields for the implementing class to use */ void *private; u_int index; }; /* * Descriptor of a classifier. We can register a function and * an argument, which is called by g_io_request() on bio's * that are not previously classified. */ struct g_classifier_hook { TAILQ_ENTRY(g_classifier_hook) link; int (*func)(void *arg, struct bio *bp); void *arg; }; /* BIO_GETATTR("GEOM::setstate") argument values. */ #define G_STATE_FAILED 0 #define G_STATE_REBUILD 1 #define G_STATE_RESYNC 2 #define G_STATE_ACTIVE 3 /* geom_dev.c */ struct cdev; void g_dev_print(void); void g_dev_physpath_changed(void); struct g_provider *g_dev_getprovider(struct cdev *dev); /* geom_dump.c */ void g_trace(int level, const char *, ...); # define G_T_TOPOLOGY 1 # define G_T_BIO 2 # define G_T_ACCESS 4 /* geom_event.c */ typedef void g_event_t(void *, int flag); #define EV_CANCEL 1 int g_post_event(g_event_t *func, void *arg, int flag, ...); int g_waitfor_event(g_event_t *func, void *arg, int flag, ...); void g_cancel_event(void *ref); int g_attr_changed(struct g_provider *pp, const char *attr, int flag); int g_media_changed(struct g_provider *pp, int flag); int g_media_gone(struct g_provider *pp, int flag); void g_orphan_provider(struct g_provider *pp, int error); void g_waitidlelock(void); /* geom_subr.c */ int g_access(struct g_consumer *cp, int nread, int nwrite, int nexcl); int g_attach(struct g_consumer *cp, struct g_provider *pp); int g_compare_names(const char *namea, const char *nameb); void g_destroy_consumer(struct g_consumer *cp); void g_destroy_geom(struct g_geom *pp); void g_destroy_provider(struct g_provider *pp); void g_detach(struct g_consumer *cp); void g_error_provider(struct g_provider *pp, int error); struct g_provider *g_provider_by_name(char const *arg); void g_geom_add_alias(struct g_geom *gp, const char *alias); int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len); #define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof *(v)) int g_handleattr(struct bio *bp, const char *attribute, const void *val, int len); int g_handleattr_int(struct bio *bp, const char *attribute, int val); int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val); int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val); int g_handleattr_str(struct bio *bp, const char *attribute, const char *str); struct g_consumer * g_new_consumer(struct g_geom *gp); struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...) __printflike(2, 3); struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...) __printflike(2, 3); void g_resize_provider(struct g_provider *pp, off_t size); int g_retaste(struct g_class *mp); void g_spoil(struct g_provider *pp, struct g_consumer *cp); int g_std_access(struct g_provider *pp, int dr, int dw, int de); void g_std_done(struct bio *bp); void g_std_spoiled(struct g_consumer *cp); void g_wither_geom(struct g_geom *gp, int error); void g_wither_geom_close(struct g_geom *gp, int error); void g_wither_provider(struct g_provider *pp, int error); #if defined(DIAGNOSTIC) || defined(DDB) int g_valid_obj(void const *ptr); #endif #ifdef DIAGNOSTIC #define G_VALID_CLASS(foo) \ KASSERT(g_valid_obj(foo) == 1, ("%p is not a g_class", foo)) #define G_VALID_GEOM(foo) \ KASSERT(g_valid_obj(foo) == 2, ("%p is not a g_geom", foo)) #define G_VALID_CONSUMER(foo) \ KASSERT(g_valid_obj(foo) == 3, ("%p is not a g_consumer", foo)) #define G_VALID_PROVIDER(foo) \ KASSERT(g_valid_obj(foo) == 4, ("%p is not a g_provider", foo)) #else #define G_VALID_CLASS(foo) do { } while (0) #define G_VALID_GEOM(foo) do { } while (0) #define G_VALID_CONSUMER(foo) do { } while (0) #define G_VALID_PROVIDER(foo) do { } while (0) #endif int g_modevent(module_t, int, void *); /* geom_io.c */ struct bio * g_clone_bio(struct bio *); struct bio * g_duplicate_bio(struct bio *); void g_destroy_bio(struct bio *); void g_io_deliver(struct bio *bp, int error); int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr); int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp); int g_io_flush(struct g_consumer *cp); int g_register_classifier(struct g_classifier_hook *hook); void g_unregister_classifier(struct g_classifier_hook *hook); void g_io_request(struct bio *bp, struct g_consumer *cp); struct bio *g_new_bio(void); struct bio *g_alloc_bio(void); void g_reset_bio(struct bio *); void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error); int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length); int g_delete_data(struct g_consumer *cp, off_t offset, off_t length); void g_print_bio(struct bio *bp); +int g_use_g_read_data(void *, off_t, void **, int); +int g_use_g_write_data(void *, off_t, void *, int); /* geom_kern.c / geom_kernsim.c */ #ifdef _KERNEL extern struct sx topology_lock; struct g_kerneldump { off_t offset; off_t length; struct dumperinfo di; }; MALLOC_DECLARE(M_GEOM); static __inline void * g_malloc(int size, int flags) { void *p; p = malloc(size, M_GEOM, flags); return (p); } static __inline void g_free(void *ptr) { #ifdef DIAGNOSTIC if (sx_xlocked(&topology_lock)) { KASSERT(g_valid_obj(ptr) == 0, ("g_free(%p) of live object, type %d", ptr, g_valid_obj(ptr))); } #endif free(ptr, M_GEOM); } #define g_topology_lock() \ do { \ sx_xlock(&topology_lock); \ } while (0) #define g_topology_try_lock() sx_try_xlock(&topology_lock) #define g_topology_unlock() \ do { \ sx_xunlock(&topology_lock); \ } while (0) #define g_topology_assert() \ do { \ sx_assert(&topology_lock, SX_XLOCKED); \ } while (0) #define g_topology_assert_not() \ do { \ sx_assert(&topology_lock, SX_UNLOCKED); \ } while (0) #define g_topology_sleep(chan, timo) \ sx_sleep(chan, &topology_lock, 0, "gtopol", timo) #define DECLARE_GEOM_CLASS(class, name) \ static moduledata_t name##_mod = { \ #name, g_modevent, &class \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); int g_is_geom_thread(struct thread *td); #endif /* _KERNEL */ /* geom_ctl.c */ int gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len); void gctl_set_param_err(struct gctl_req *req, const char *param, void const *ptr, int len); void *gctl_get_param(struct gctl_req *req, const char *param, int *len); char const *gctl_get_asciiparam(struct gctl_req *req, const char *param); void *gctl_get_paraml(struct gctl_req *req, const char *param, int len); int gctl_error(struct gctl_req *req, const char *fmt, ...) __printflike(2, 3); struct g_class *gctl_get_class(struct gctl_req *req, char const *arg); struct g_geom *gctl_get_geom(struct gctl_req *req, struct g_class *mpr, char const *arg); struct g_provider *gctl_get_provider(struct gctl_req *req, char const *arg); #endif /* _GEOM_GEOM_H_ */ Index: head/sys/geom/geom_io.c =================================================================== --- head/sys/geom/geom_io.c (revision 328425) +++ head/sys/geom/geom_io.c (revision 328426) @@ -1,1059 +1,1092 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int g_io_transient_map_bio(struct bio *bp); static struct g_bioq g_bio_run_down; static struct g_bioq g_bio_run_up; /* * Pace is a hint that we've had some trouble recently allocating * bios, so we should back off trying to send I/O down the stack * a bit to let the problem resolve. When pacing, we also turn * off direct dispatch to also reduce memory pressure from I/Os * there, at the expxense of some added latency while the memory * pressures exist. See g_io_schedule_down() for more details * and limitations. */ static volatile u_int pace; static uma_zone_t biozone; /* * The head of the list of classifiers used in g_io_request. * Use g_register_classifier() and g_unregister_classifier() * to add/remove entries to the list. * Classifiers are invoked in registration order. */ static TAILQ_HEAD(g_classifier_tailq, g_classifier_hook) g_classifier_tailq = TAILQ_HEAD_INITIALIZER(g_classifier_tailq); #include static void g_bioq_lock(struct g_bioq *bq) { mtx_lock(&bq->bio_queue_lock); } static void g_bioq_unlock(struct g_bioq *bq) { mtx_unlock(&bq->bio_queue_lock); } #if 0 static void g_bioq_destroy(struct g_bioq *bq) { mtx_destroy(&bq->bio_queue_lock); } #endif static void g_bioq_init(struct g_bioq *bq) { TAILQ_INIT(&bq->bio_queue); mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); } static struct bio * g_bioq_first(struct g_bioq *bq) { struct bio *bp; bp = TAILQ_FIRST(&bq->bio_queue); if (bp != NULL) { KASSERT((bp->bio_flags & BIO_ONQUEUE), ("Bio not on queue bp=%p target %p", bp, bq)); bp->bio_flags &= ~BIO_ONQUEUE; TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); bq->bio_queue_length--; } return (bp); } struct bio * g_new_bio(void) { struct bio *bp; bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO); #ifdef KTR if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { struct stack st; CTR1(KTR_GEOM, "g_new_bio(): %p", bp); stack_save(&st); CTRSTACK(KTR_GEOM, &st, 3, 0); } #endif return (bp); } struct bio * g_alloc_bio(void) { struct bio *bp; bp = uma_zalloc(biozone, M_WAITOK | M_ZERO); #ifdef KTR if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { struct stack st; CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp); stack_save(&st); CTRSTACK(KTR_GEOM, &st, 3, 0); } #endif return (bp); } void g_destroy_bio(struct bio *bp) { #ifdef KTR if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { struct stack st; CTR1(KTR_GEOM, "g_destroy_bio(): %p", bp); stack_save(&st); CTRSTACK(KTR_GEOM, &st, 3, 0); } #endif uma_zfree(biozone, bp); } struct bio * g_clone_bio(struct bio *bp) { struct bio *bp2; bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO); if (bp2 != NULL) { bp2->bio_parent = bp; bp2->bio_cmd = bp->bio_cmd; /* * BIO_ORDERED flag may be used by disk drivers to enforce * ordering restrictions, so this flag needs to be cloned. * BIO_UNMAPPED and BIO_VLIST should be inherited, to properly * indicate which way the buffer is passed. * Other bio flags are not suitable for cloning. */ bp2->bio_flags = bp->bio_flags & (BIO_ORDERED | BIO_UNMAPPED | BIO_VLIST); bp2->bio_length = bp->bio_length; bp2->bio_offset = bp->bio_offset; bp2->bio_data = bp->bio_data; bp2->bio_ma = bp->bio_ma; bp2->bio_ma_n = bp->bio_ma_n; bp2->bio_ma_offset = bp->bio_ma_offset; bp2->bio_attribute = bp->bio_attribute; if (bp->bio_cmd == BIO_ZONE) bcopy(&bp->bio_zone, &bp2->bio_zone, sizeof(bp->bio_zone)); /* Inherit classification info from the parent */ bp2->bio_classifier1 = bp->bio_classifier1; bp2->bio_classifier2 = bp->bio_classifier2; #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) bp2->bio_track_bp = bp->bio_track_bp; #endif bp->bio_children++; } #ifdef KTR if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { struct stack st; CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2); stack_save(&st); CTRSTACK(KTR_GEOM, &st, 3, 0); } #endif return(bp2); } struct bio * g_duplicate_bio(struct bio *bp) { struct bio *bp2; bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO); bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST); bp2->bio_parent = bp; bp2->bio_cmd = bp->bio_cmd; bp2->bio_length = bp->bio_length; bp2->bio_offset = bp->bio_offset; bp2->bio_data = bp->bio_data; bp2->bio_ma = bp->bio_ma; bp2->bio_ma_n = bp->bio_ma_n; bp2->bio_ma_offset = bp->bio_ma_offset; bp2->bio_attribute = bp->bio_attribute; bp->bio_children++; #ifdef KTR if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { struct stack st; CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2); stack_save(&st); CTRSTACK(KTR_GEOM, &st, 3, 0); } #endif return(bp2); } void g_reset_bio(struct bio *bp) { bzero(bp, sizeof(*bp)); } void g_io_init() { g_bioq_init(&g_bio_run_down); g_bioq_init(&g_bio_run_up); biozone = uma_zcreate("g_bio", sizeof (struct bio), NULL, NULL, NULL, NULL, 0, 0); } int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) { struct bio *bp; int error; g_trace(G_T_BIO, "bio_getattr(%s)", attr); bp = g_alloc_bio(); bp->bio_cmd = BIO_GETATTR; bp->bio_done = NULL; bp->bio_attribute = attr; bp->bio_length = *len; bp->bio_data = ptr; g_io_request(bp, cp); error = biowait(bp, "ggetattr"); *len = bp->bio_completed; g_destroy_bio(bp); return (error); } int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp) { struct bio *bp; int error; g_trace(G_T_BIO, "bio_zone(%d)", zone_args->zone_cmd); bp = g_alloc_bio(); bp->bio_cmd = BIO_ZONE; bp->bio_done = NULL; /* * XXX KDM need to handle report zone data. */ bcopy(zone_args, &bp->bio_zone, sizeof(*zone_args)); if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES) bp->bio_length = zone_args->zone_params.report.entries_allocated * sizeof(struct disk_zone_rep_entry); else bp->bio_length = 0; g_io_request(bp, cp); error = biowait(bp, "gzone"); bcopy(&bp->bio_zone, zone_args, sizeof(*zone_args)); g_destroy_bio(bp); return (error); } int g_io_flush(struct g_consumer *cp) { struct bio *bp; int error; g_trace(G_T_BIO, "bio_flush(%s)", cp->provider->name); bp = g_alloc_bio(); bp->bio_cmd = BIO_FLUSH; bp->bio_flags |= BIO_ORDERED; bp->bio_done = NULL; bp->bio_attribute = NULL; bp->bio_offset = cp->provider->mediasize; bp->bio_length = 0; bp->bio_data = NULL; g_io_request(bp, cp); error = biowait(bp, "gflush"); g_destroy_bio(bp); return (error); } static int g_io_check(struct bio *bp) { struct g_consumer *cp; struct g_provider *pp; off_t excess; int error; biotrack(bp, __func__); cp = bp->bio_from; pp = bp->bio_to; /* Fail if access counters dont allow the operation */ switch(bp->bio_cmd) { case BIO_READ: case BIO_GETATTR: if (cp->acr == 0) return (EPERM); break; case BIO_WRITE: case BIO_DELETE: case BIO_FLUSH: if (cp->acw == 0) return (EPERM); break; case BIO_ZONE: if ((bp->bio_zone.zone_cmd == DISK_ZONE_REPORT_ZONES) || (bp->bio_zone.zone_cmd == DISK_ZONE_GET_PARAMS)) { if (cp->acr == 0) return (EPERM); } else if (cp->acw == 0) return (EPERM); break; default: return (EPERM); } /* if provider is marked for error, don't disturb. */ if (pp->error) return (pp->error); if (cp->flags & G_CF_ORPHAN) return (ENXIO); switch(bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: /* Zero sectorsize or mediasize is probably a lack of media. */ if (pp->sectorsize == 0 || pp->mediasize == 0) return (ENXIO); /* Reject I/O not on sector boundary */ if (bp->bio_offset % pp->sectorsize) return (EINVAL); /* Reject I/O not integral sector long */ if (bp->bio_length % pp->sectorsize) return (EINVAL); /* Reject requests before or past the end of media. */ if (bp->bio_offset < 0) return (EIO); if (bp->bio_offset > pp->mediasize) return (EIO); /* Truncate requests to the end of providers media. */ excess = bp->bio_offset + bp->bio_length; if (excess > bp->bio_to->mediasize) { KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || round_page(bp->bio_ma_offset + bp->bio_length) / PAGE_SIZE == bp->bio_ma_n, ("excess bio %p too short", bp)); excess -= bp->bio_to->mediasize; bp->bio_length -= excess; if ((bp->bio_flags & BIO_UNMAPPED) != 0) { bp->bio_ma_n = round_page(bp->bio_ma_offset + bp->bio_length) / PAGE_SIZE; } if (excess > 0) CTR3(KTR_GEOM, "g_down truncated bio " "%p provider %s by %d", bp, bp->bio_to->name, excess); } /* Deliver zero length transfers right here. */ if (bp->bio_length == 0) { CTR2(KTR_GEOM, "g_down terminated 0-length " "bp %p provider %s", bp, bp->bio_to->name); return (0); } if ((bp->bio_flags & BIO_UNMAPPED) != 0 && (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 && (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { if ((error = g_io_transient_map_bio(bp)) >= 0) return (error); } break; default: break; } return (EJUSTRETURN); } /* * bio classification support. * * g_register_classifier() and g_unregister_classifier() * are used to add/remove a classifier from the list. * The list is protected using the g_bio_run_down lock, * because the classifiers are called in this path. * * g_io_request() passes bio's that are not already classified * (i.e. those with bio_classifier1 == NULL) to g_run_classifiers(). * Classifiers can store their result in the two fields * bio_classifier1 and bio_classifier2. * A classifier that updates one of the fields should * return a non-zero value. * If no classifier updates the field, g_run_classifiers() sets * bio_classifier1 = BIO_NOTCLASSIFIED to avoid further calls. */ int g_register_classifier(struct g_classifier_hook *hook) { g_bioq_lock(&g_bio_run_down); TAILQ_INSERT_TAIL(&g_classifier_tailq, hook, link); g_bioq_unlock(&g_bio_run_down); return (0); } void g_unregister_classifier(struct g_classifier_hook *hook) { struct g_classifier_hook *entry; g_bioq_lock(&g_bio_run_down); TAILQ_FOREACH(entry, &g_classifier_tailq, link) { if (entry == hook) { TAILQ_REMOVE(&g_classifier_tailq, hook, link); break; } } g_bioq_unlock(&g_bio_run_down); } static void g_run_classifiers(struct bio *bp) { struct g_classifier_hook *hook; int classified = 0; biotrack(bp, __func__); TAILQ_FOREACH(hook, &g_classifier_tailq, link) classified |= hook->func(hook->arg, bp); if (!classified) bp->bio_classifier1 = BIO_NOTCLASSIFIED; } void g_io_request(struct bio *bp, struct g_consumer *cp) { struct g_provider *pp; struct mtx *mtxp; int direct, error, first; uint8_t cmd; biotrack(bp, __func__); KASSERT(cp != NULL, ("NULL cp in g_io_request")); KASSERT(bp != NULL, ("NULL bp in g_io_request")); pp = cp->provider; KASSERT(pp != NULL, ("consumer not attached in g_io_request")); #ifdef DIAGNOSTIC KASSERT(bp->bio_driver1 == NULL, ("bio_driver1 used by the consumer (geom %s)", cp->geom->name)); KASSERT(bp->bio_driver2 == NULL, ("bio_driver2 used by the consumer (geom %s)", cp->geom->name)); KASSERT(bp->bio_pflags == 0, ("bio_pflags used by the consumer (geom %s)", cp->geom->name)); /* * Remember consumer's private fields, so we can detect if they were * modified by the provider. */ bp->_bio_caller1 = bp->bio_caller1; bp->_bio_caller2 = bp->bio_caller2; bp->_bio_cflags = bp->bio_cflags; #endif cmd = bp->bio_cmd; if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_GETATTR) { KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request(cmd=%hu)", bp->bio_cmd)); } if (cmd == BIO_DELETE || cmd == BIO_FLUSH) { KASSERT(bp->bio_data == NULL, ("non-NULL bp->data in g_io_request(cmd=%hu)", bp->bio_cmd)); } if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_DELETE) { KASSERT(bp->bio_offset % cp->provider->sectorsize == 0, ("wrong offset %jd for sectorsize %u", bp->bio_offset, cp->provider->sectorsize)); KASSERT(bp->bio_length % cp->provider->sectorsize == 0, ("wrong length %jd for sectorsize %u", bp->bio_length, cp->provider->sectorsize)); } g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); bp->bio_from = cp; bp->bio_to = pp; bp->bio_error = 0; bp->bio_completed = 0; KASSERT(!(bp->bio_flags & BIO_ONQUEUE), ("Bio already on queue bp=%p", bp)); if ((g_collectstats & G_STATS_CONSUMERS) != 0 || ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL)) binuptime(&bp->bio_t0); else getbinuptime(&bp->bio_t0); #ifdef GET_STACK_USAGE direct = (cp->flags & G_CF_DIRECT_SEND) != 0 && (pp->flags & G_PF_DIRECT_RECEIVE) != 0 && !g_is_geom_thread(curthread) && ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 || (bp->bio_flags & BIO_UNMAPPED) == 0 || THREAD_CAN_SLEEP()) && pace == 0; if (direct) { /* Block direct execution if less then half of stack left. */ size_t st, su; GET_STACK_USAGE(st, su); if (su * 2 > st) direct = 0; } #else direct = 0; #endif if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) { g_bioq_lock(&g_bio_run_down); g_run_classifiers(bp); g_bioq_unlock(&g_bio_run_down); } /* * The statistics collection is lockless, as such, but we * can not update one instance of the statistics from more * than one thread at a time, so grab the lock first. */ mtxp = mtx_pool_find(mtxpool_sleep, pp); mtx_lock(mtxp); if (g_collectstats & G_STATS_PROVIDERS) devstat_start_transaction(pp->stat, &bp->bio_t0); if (g_collectstats & G_STATS_CONSUMERS) devstat_start_transaction(cp->stat, &bp->bio_t0); pp->nstart++; cp->nstart++; mtx_unlock(mtxp); if (direct) { error = g_io_check(bp); if (error >= 0) { CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p " "provider %s returned %d", bp, bp->bio_to->name, error); g_io_deliver(bp, error); return; } bp->bio_to->geom->start(bp); } else { g_bioq_lock(&g_bio_run_down); first = TAILQ_EMPTY(&g_bio_run_down.bio_queue); TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue); bp->bio_flags |= BIO_ONQUEUE; g_bio_run_down.bio_queue_length++; g_bioq_unlock(&g_bio_run_down); /* Pass it on down. */ if (first) wakeup(&g_wait_down); } } void g_io_deliver(struct bio *bp, int error) { struct bintime now; struct g_consumer *cp; struct g_provider *pp; struct mtx *mtxp; int direct, first; biotrack(bp, __func__); KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); pp = bp->bio_to; KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); cp = bp->bio_from; if (cp == NULL) { bp->bio_error = error; bp->bio_done(bp); return; } KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); #ifdef DIAGNOSTIC /* * Some classes - GJournal in particular - can modify bio's * private fields while the bio is in transit; G_GEOM_VOLATILE_BIO * flag means it's an expected behaviour for that particular geom. */ if ((cp->geom->flags & G_GEOM_VOLATILE_BIO) == 0) { KASSERT(bp->bio_caller1 == bp->_bio_caller1, ("bio_caller1 used by the provider %s", pp->name)); KASSERT(bp->bio_caller2 == bp->_bio_caller2, ("bio_caller2 used by the provider %s", pp->name)); KASSERT(bp->bio_cflags == bp->_bio_cflags, ("bio_cflags used by the provider %s", pp->name)); } #endif KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0")); KASSERT(bp->bio_completed <= bp->bio_length, ("bio_completed can't be greater than bio_length")); g_trace(G_T_BIO, "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); KASSERT(!(bp->bio_flags & BIO_ONQUEUE), ("Bio already on queue bp=%p", bp)); /* * XXX: next two doesn't belong here */ bp->bio_bcount = bp->bio_length; bp->bio_resid = bp->bio_bcount - bp->bio_completed; #ifdef GET_STACK_USAGE direct = (pp->flags & G_PF_DIRECT_SEND) && (cp->flags & G_CF_DIRECT_RECEIVE) && !g_is_geom_thread(curthread); if (direct) { /* Block direct execution if less then half of stack left. */ size_t st, su; GET_STACK_USAGE(st, su); if (su * 2 > st) direct = 0; } #else direct = 0; #endif /* * The statistics collection is lockless, as such, but we * can not update one instance of the statistics from more * than one thread at a time, so grab the lock first. */ if ((g_collectstats & G_STATS_CONSUMERS) != 0 || ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL)) binuptime(&now); mtxp = mtx_pool_find(mtxpool_sleep, cp); mtx_lock(mtxp); if (g_collectstats & G_STATS_PROVIDERS) devstat_end_transaction_bio_bt(pp->stat, bp, &now); if (g_collectstats & G_STATS_CONSUMERS) devstat_end_transaction_bio_bt(cp->stat, bp, &now); cp->nend++; pp->nend++; mtx_unlock(mtxp); if (error != ENOMEM) { bp->bio_error = error; if (direct) { biodone(bp); } else { g_bioq_lock(&g_bio_run_up); first = TAILQ_EMPTY(&g_bio_run_up.bio_queue); TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue); bp->bio_flags |= BIO_ONQUEUE; g_bio_run_up.bio_queue_length++; g_bioq_unlock(&g_bio_run_up); if (first) wakeup(&g_wait_up); } return; } if (bootverbose) printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); bp->bio_children = 0; bp->bio_inbed = 0; bp->bio_driver1 = NULL; bp->bio_driver2 = NULL; bp->bio_pflags = 0; g_io_request(bp, cp); pace = 1; return; } SYSCTL_DECL(_kern_geom); static long transient_maps; SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD, &transient_maps, 0, "Total count of the transient mapping requests"); u_int transient_map_retries = 10; SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW, &transient_map_retries, 0, "Max count of retries used before giving up on creating transient map"); int transient_map_hard_failures; SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD, &transient_map_hard_failures, 0, "Failures to establish the transient mapping due to retry attempts " "exhausted"); int transient_map_soft_failures; SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD, &transient_map_soft_failures, 0, "Count of retried failures to establish the transient mapping"); int inflight_transient_maps; SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD, &inflight_transient_maps, 0, "Current count of the active transient maps"); static int g_io_transient_map_bio(struct bio *bp) { vm_offset_t addr; long size; u_int retried; KASSERT(unmapped_buf_allowed, ("unmapped disabled")); size = round_page(bp->bio_ma_offset + bp->bio_length); KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp)); addr = 0; retried = 0; atomic_add_long(&transient_maps, 1); retry: if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) { if (transient_map_retries != 0 && retried >= transient_map_retries) { CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s", bp, bp->bio_to->name); atomic_add_int(&transient_map_hard_failures, 1); return (EDEADLK/* XXXKIB */); } else { /* * Naive attempt to quisce the I/O to get more * in-flight requests completed and defragment * the transient_arena. */ CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d", bp, bp->bio_to->name, retried); pause("g_d_tra", hz / 10); retried++; atomic_add_int(&transient_map_soft_failures, 1); goto retry; } } atomic_add_int(&inflight_transient_maps, 1); pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size)); bp->bio_data = (caddr_t)addr + bp->bio_ma_offset; bp->bio_flags |= BIO_TRANSIENT_MAPPING; bp->bio_flags &= ~BIO_UNMAPPED; return (EJUSTRETURN); } void g_io_schedule_down(struct thread *tp __unused) { struct bio *bp; int error; for(;;) { g_bioq_lock(&g_bio_run_down); bp = g_bioq_first(&g_bio_run_down); if (bp == NULL) { CTR0(KTR_GEOM, "g_down going to sleep"); msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, PRIBIO | PDROP, "-", 0); continue; } CTR0(KTR_GEOM, "g_down has work to do"); g_bioq_unlock(&g_bio_run_down); biotrack(bp, __func__); if (pace != 0) { /* * There has been at least one memory allocation * failure since the last I/O completed. Pause 1ms to * give the system a chance to free up memory. We only * do this once because a large number of allocations * can fail in the direct dispatch case and there's no * relationship between the number of these failures and * the length of the outage. If there's still an outage, * we'll pause again and again until it's * resolved. Older versions paused longer and once per * allocation failure. This was OK for a single threaded * g_down, but with direct dispatch would lead to max of * 10 IOPs for minutes at a time when transient memory * issues prevented allocation for a batch of requests * from the upper layers. * * XXX This pacing is really lame. It needs to be solved * by other methods. This is OK only because the worst * case scenario is so rare. In the worst case scenario * all memory is tied up waiting for I/O to complete * which can never happen since we can't allocate bios * for that I/O. */ CTR0(KTR_GEOM, "g_down pacing self"); pause("g_down", min(hz/1000, 1)); pace = 0; } CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp, bp->bio_to->name); error = g_io_check(bp); if (error >= 0) { CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider " "%s returned %d", bp, bp->bio_to->name, error); g_io_deliver(bp, error); continue; } THREAD_NO_SLEEPING(); CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld " "len %ld", bp, bp->bio_to->name, bp->bio_offset, bp->bio_length); bp->bio_to->geom->start(bp); THREAD_SLEEPING_OK(); } } void g_io_schedule_up(struct thread *tp __unused) { struct bio *bp; for(;;) { g_bioq_lock(&g_bio_run_up); bp = g_bioq_first(&g_bio_run_up); if (bp == NULL) { CTR0(KTR_GEOM, "g_up going to sleep"); msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, PRIBIO | PDROP, "-", 0); continue; } g_bioq_unlock(&g_bio_run_up); THREAD_NO_SLEEPING(); CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off " "%jd len %ld", bp, bp->bio_to->name, bp->bio_offset, bp->bio_length); biodone(bp); THREAD_SLEEPING_OK(); } } void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) { struct bio *bp; void *ptr; int errorc; KASSERT(length > 0 && length >= cp->provider->sectorsize && length <= MAXPHYS, ("g_read_data(): invalid length %jd", (intmax_t)length)); bp = g_alloc_bio(); bp->bio_cmd = BIO_READ; bp->bio_done = NULL; bp->bio_offset = offset; bp->bio_length = length; ptr = g_malloc(length, M_WAITOK); bp->bio_data = ptr; g_io_request(bp, cp); errorc = biowait(bp, "gread"); if (error != NULL) *error = errorc; g_destroy_bio(bp); if (errorc) { g_free(ptr); ptr = NULL; } return (ptr); } +/* + * A read function for use by ffs_sbget when used by GEOM-layer routines. + */ int +g_use_g_read_data(void *devfd, off_t loc, void **bufp, int size) +{ + struct g_consumer *cp; + + cp = (struct g_consumer *)devfd; + /* + * Take care not to issue an invalid I/O request. The offset of + * the superblock candidate must be multiples of the provider's + * sector size, otherwise an FFS can't exist on the provider + * anyway. + */ + if (loc % cp->provider->sectorsize != 0) + return (ENOENT); + *bufp = g_read_data(cp, loc, size, NULL); + if (*bufp == NULL) + return (ENOENT); + return (0); +} + +int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) { struct bio *bp; int error; KASSERT(length > 0 && length >= cp->provider->sectorsize && length <= MAXPHYS, ("g_write_data(): invalid length %jd", (intmax_t)length)); bp = g_alloc_bio(); bp->bio_cmd = BIO_WRITE; bp->bio_done = NULL; bp->bio_offset = offset; bp->bio_length = length; bp->bio_data = ptr; g_io_request(bp, cp); error = biowait(bp, "gwrite"); g_destroy_bio(bp); return (error); +} + +/* + * A write function for use by ffs_sbput when used by GEOM-layer routines. + */ +int +g_use_g_write_data(void *devfd, off_t loc, void *buf, int size) +{ + + return (g_write_data((struct g_consumer *)devfd, loc, buf, size)); } int g_delete_data(struct g_consumer *cp, off_t offset, off_t length) { struct bio *bp; int error; KASSERT(length > 0 && length >= cp->provider->sectorsize, ("g_delete_data(): invalid length %jd", (intmax_t)length)); bp = g_alloc_bio(); bp->bio_cmd = BIO_DELETE; bp->bio_done = NULL; bp->bio_offset = offset; bp->bio_length = length; bp->bio_data = NULL; g_io_request(bp, cp); error = biowait(bp, "gdelete"); g_destroy_bio(bp); return (error); } void g_print_bio(struct bio *bp) { const char *pname, *cmd = NULL; if (bp->bio_to != NULL) pname = bp->bio_to->name; else pname = "[unknown]"; switch (bp->bio_cmd) { case BIO_GETATTR: cmd = "GETATTR"; printf("%s[%s(attr=%s)]", pname, cmd, bp->bio_attribute); return; case BIO_FLUSH: cmd = "FLUSH"; printf("%s[%s]", pname, cmd); return; case BIO_ZONE: { char *subcmd = NULL; cmd = "ZONE"; switch (bp->bio_zone.zone_cmd) { case DISK_ZONE_OPEN: subcmd = "OPEN"; break; case DISK_ZONE_CLOSE: subcmd = "CLOSE"; break; case DISK_ZONE_FINISH: subcmd = "FINISH"; break; case DISK_ZONE_RWP: subcmd = "RWP"; break; case DISK_ZONE_REPORT_ZONES: subcmd = "REPORT ZONES"; break; case DISK_ZONE_GET_PARAMS: subcmd = "GET PARAMS"; break; default: subcmd = "UNKNOWN"; break; } printf("%s[%s,%s]", pname, cmd, subcmd); return; } case BIO_READ: cmd = "READ"; break; case BIO_WRITE: cmd = "WRITE"; break; case BIO_DELETE: cmd = "DELETE"; break; default: cmd = "UNKNOWN"; printf("%s[%s()]", pname, cmd); return; } printf("%s[%s(offset=%jd, length=%jd)]", pname, cmd, (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); } Index: head/sys/geom/journal/g_journal_ufs.c =================================================================== --- head/sys/geom/journal/g_journal_ufs.c (revision 328425) +++ head/sys/geom/journal/g_journal_ufs.c (revision 328426) @@ -1,109 +1,99 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2006 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include -static const int superblocks[] = SBLOCKSEARCH; - static int g_journal_ufs_clean(struct mount *mp) { struct ufsmount *ump; struct fs *fs; int flags; ump = VFSTOUFS(mp); fs = ump->um_fs; flags = fs->fs_flags; fs->fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); ffs_sbupdate(ump, MNT_WAIT, 1); fs->fs_flags = flags; return (0); } static void g_journal_ufs_dirty(struct g_consumer *cp) { struct fs *fs; - int error, i, sb; + int error; - if (SBLOCKSIZE % cp->provider->sectorsize != 0) + if (SBLOCKSIZE % cp->provider->sectorsize != 0 || + ffs_sbget(cp, &fs, -1, NULL, g_use_g_read_data) != 0) { + GJ_DEBUG(0, "Cannot find superblock to mark file system %s " + "as dirty.", cp->provider->name); return; - for (i = 0; (sb = superblocks[i]) != -1; i++) { - if (sb % cp->provider->sectorsize != 0) - continue; - fs = g_read_data(cp, sb, SBLOCKSIZE, NULL); - if (fs == NULL) - continue; - if (fs->fs_magic != FS_UFS1_MAGIC && - fs->fs_magic != FS_UFS2_MAGIC) { - g_free(fs); - continue; - } - GJ_DEBUG(0, "clean=%d flags=0x%x", fs->fs_clean, fs->fs_flags); - fs->fs_clean = 0; - fs->fs_flags |= FS_NEEDSFSCK | FS_UNCLEAN; - error = g_write_data(cp, sb, fs, SBLOCKSIZE); - g_free(fs); - if (error != 0) { - GJ_DEBUG(0, "Cannot mark file system %s as dirty " - "(error=%d).", cp->provider->name, error); - } else { - GJ_DEBUG(0, "File system %s marked as dirty.", - cp->provider->name); - } + } + GJ_DEBUG(0, "clean=%d flags=0x%x", fs->fs_clean, fs->fs_flags); + fs->fs_clean = 0; + fs->fs_flags |= FS_NEEDSFSCK | FS_UNCLEAN; + error = ffs_sbput(cp, fs, fs->fs_sblockloc, g_use_g_write_data); + g_free(fs); + if (error != 0) { + GJ_DEBUG(0, "Cannot mark file system %s as dirty " + "(error=%d).", cp->provider->name, error); + } else { + GJ_DEBUG(0, "File system %s marked as dirty.", + cp->provider->name); } } const struct g_journal_desc g_journal_ufs = { .jd_fstype = "ufs", .jd_clean = g_journal_ufs_clean, .jd_dirty = g_journal_ufs_dirty }; MODULE_DEPEND(g_journal, ufs, 1, 1, 1); Index: head/sys/geom/label/g_label_ufs.c =================================================================== --- head/sys/geom/label/g_label_ufs.c (revision 328425) +++ head/sys/geom/label/g_label_ufs.c (revision 328426) @@ -1,188 +1,148 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002, 2003 Gordon Tetlow * Copyright (c) 2006 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include -#include #include +#include #include #include +#include +#include +#include #include #include #define G_LABEL_UFS_VOLUME_DIR "ufs" #define G_LABEL_UFS_ID_DIR "ufsid" #define G_LABEL_UFS_VOLUME 0 #define G_LABEL_UFS_ID 1 /* * G_LABEL_UFS_CMP returns true if difference between provider mediasize * and filesystem size is less than G_LABEL_UFS_MAXDIFF sectors */ #define G_LABEL_UFS_CMP(prov, fsys, size) \ ( abs( ((fsys)->size) - ( (prov)->mediasize / (fsys)->fs_fsize )) \ < G_LABEL_UFS_MAXDIFF ) #define G_LABEL_UFS_MAXDIFF 0x100 -static const int superblocks[] = SBLOCKSEARCH; - +/* + * Try to find a superblock on the provider. If successful, then + * check that the size in the superblock corresponds to the size + * of the underlying provider. Finally, look for a volume label + * and create an appropriate provider based on that. + */ static void g_label_ufs_taste_common(struct g_consumer *cp, char *label, size_t size, int what) { struct g_provider *pp; - int sb, superblock; struct fs *fs; g_topology_assert_not(); pp = cp->provider; label[0] = '\0'; - if (SBLOCKSIZE % cp->provider->sectorsize != 0) + if (SBLOCKSIZE % pp->sectorsize != 0 || + ffs_sbget(cp, &fs, -1, NULL, g_use_g_read_data) != 0) return; - /* - * Walk through the standard places that superblocks hide and look - * for UFS magic. If we find magic, then check that the size in the - * superblock corresponds to the size of the underlying provider. - * Finally, look for a volume label and create an appropriate - * provider based on that. + * Check for magic. We also need to check if file system size + * is almost equal to providers size, because sysinstall(8) + * used to bogusly put first partition at offset 0 + * instead of 16, and glabel/ufs would find file system on slice + * instead of partition. + * + * In addition, media size can be a bit bigger than file system + * size. For instance, mkuzip can append bytes to align data + * to large sector size (it improves compression rates). */ - for (sb = 0; (superblock = superblocks[sb]) != -1; sb++) { - /* - * Take care not to issue an invalid I/O request. The offset of - * the superblock candidate must be multiples of the provider's - * sector size, otherwise an FFS can't exist on the provider - * anyway. - */ - if (superblock % cp->provider->sectorsize != 0) - continue; - - fs = (struct fs *)g_read_data(cp, superblock, SBLOCKSIZE, NULL); - if (fs == NULL) - continue; - /* - * Check for magic. We also need to check if file system size - * is almost equal to providers size, because sysinstall(8) - * used to bogusly put first partition at offset 0 - * instead of 16, and glabel/ufs would find file system on slice - * instead of partition. - * - * In addition, media size can be a bit bigger than file system - * size. For instance, mkuzip can append bytes to align data - * to large sector size (it improves compression rates). - */ - switch (fs->fs_magic){ - case FS_UFS1_MAGIC: - case FS_UFS2_MAGIC: - G_LABEL_DEBUG(1, "%s %s params: %jd, %d, %d, %jd\n", - fs->fs_magic == FS_UFS1_MAGIC ? "UFS1" : "UFS2", - pp->name, pp->mediasize, fs->fs_fsize, - fs->fs_old_size, fs->fs_providersize); - break; - default: - break; - } - - if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_fsize > 0 && - ( G_LABEL_UFS_CMP(pp, fs, fs_old_size) - || G_LABEL_UFS_CMP(pp, fs, fs_providersize))) { - /* Valid UFS1. */ - } else if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_fsize > 0 && - ( G_LABEL_UFS_CMP(pp, fs, fs_size) - || G_LABEL_UFS_CMP(pp, fs, fs_providersize))) { - /* Valid UFS2. */ - } else { - g_free(fs); - continue; - } - if (fs->fs_sblockloc != superblock || fs->fs_ncg < 1 || - fs->fs_bsize < MINBSIZE || - fs->fs_bsize < sizeof(struct fs)) { - g_free(fs); - continue; - } - G_LABEL_DEBUG(1, "%s file system detected on %s.", - fs->fs_magic == FS_UFS1_MAGIC ? "UFS1" : "UFS2", pp->name); - switch (what) { - case G_LABEL_UFS_VOLUME: - /* Check for volume label */ - if (fs->fs_volname[0] == '\0') { - g_free(fs); - continue; - } + if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_fsize > 0 && + ( G_LABEL_UFS_CMP(pp, fs, fs_old_size) + || G_LABEL_UFS_CMP(pp, fs, fs_providersize))) { + /* Valid UFS1. */ + } else if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_fsize > 0 && + ( G_LABEL_UFS_CMP(pp, fs, fs_size) + || G_LABEL_UFS_CMP(pp, fs, fs_providersize))) { + /* Valid UFS2. */ + } else { + g_free(fs); + return; + } + G_LABEL_DEBUG(1, "%s file system detected on %s.", + fs->fs_magic == FS_UFS1_MAGIC ? "UFS1" : "UFS2", pp->name); + switch (what) { + case G_LABEL_UFS_VOLUME: + /* Check for volume label */ + if (fs->fs_volname[0] != '\0') strlcpy(label, fs->fs_volname, size); - break; - case G_LABEL_UFS_ID: - if (fs->fs_id[0] == 0 && fs->fs_id[1] == 0) { - g_free(fs); - continue; - } + break; + case G_LABEL_UFS_ID: + if (fs->fs_id[0] != 0 || fs->fs_id[1] != 0) snprintf(label, size, "%08x%08x", fs->fs_id[0], fs->fs_id[1]); - break; - } - g_free(fs); break; } + g_free(fs); } static void g_label_ufs_volume_taste(struct g_consumer *cp, char *label, size_t size) { g_label_ufs_taste_common(cp, label, size, G_LABEL_UFS_VOLUME); } static void g_label_ufs_id_taste(struct g_consumer *cp, char *label, size_t size) { g_label_ufs_taste_common(cp, label, size, G_LABEL_UFS_ID); } struct g_label_desc g_label_ufs_volume = { .ld_taste = g_label_ufs_volume_taste, .ld_dir = G_LABEL_UFS_VOLUME_DIR, .ld_enabled = 1 }; struct g_label_desc g_label_ufs_id = { .ld_taste = g_label_ufs_id_taste, .ld_dir = G_LABEL_UFS_ID_DIR, .ld_enabled = 1 }; G_LABEL_INIT(ufsid, g_label_ufs_id, "Create device nodes for UFS file system IDs"); G_LABEL_INIT(ufs, g_label_ufs_volume, "Create device nodes for UFS volume names"); Index: head/sys/ufs/ffs/ffs_extern.h =================================================================== --- head/sys/ufs/ffs/ffs_extern.h (revision 328425) +++ head/sys/ufs/ffs/ffs_extern.h (revision 328426) @@ -1,204 +1,207 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_extern.h 8.6 (Berkeley) 3/30/95 * $FreeBSD$ */ #ifndef _UFS_FFS_EXTERN_H #define _UFS_FFS_EXTERN_H #ifndef _KERNEL #error "No user-serving parts inside" #else struct buf; struct cg; struct fid; struct fs; struct inode; struct malloc_type; struct mount; struct thread; struct sockaddr; struct statfs; struct ucred; struct vnode; struct vop_fsync_args; struct vop_reallocblks_args; struct workhead; int ffs_alloc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, int, int, struct ucred *, ufs2_daddr_t *); int ffs_balloc_ufs1(struct vnode *a_vp, off_t a_startoffset, int a_size, struct ucred *a_cred, int a_flags, struct buf **a_bpp); int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size, struct ucred *a_cred, int a_flags, struct buf **a_bpp); int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **); void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t, enum vtype, struct workhead *); ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *); ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *); int ffs_checkfreefile(struct fs *, struct vnode *, ino_t); void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int); void ffs_bdflush(struct bufobj *, struct buf *); int ffs_copyonwrite(struct vnode *, struct buf *); int ffs_flushfiles(struct mount *, int, struct thread *); void ffs_fragacct(struct fs *, int, int32_t [], int); int ffs_freefile(struct ufsmount *, struct fs *, struct vnode *, ino_t, int, struct workhead *); void ffs_fserr(struct fs *, ino_t, char *); int ffs_getcg(struct fs *, struct vnode *, u_int, struct buf **, struct cg **); int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t); void ffs_oldfscompat_write(struct fs *, struct ufsmount *); int ffs_own_mount(const struct mount *mp); int ffs_reallocblks(struct vop_reallocblks_args *); int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t, ufs2_daddr_t, int, int, int, struct ucred *, struct buf **); int ffs_reload(struct mount *, struct thread *, int); +int ffs_sbget(void *, struct fs **, off_t, struct malloc_type *, + int (*)(void *, off_t, void **, int)); +int ffs_sbput(void *, struct fs *, off_t, int (*)(void *, off_t, void *, + int)); int ffs_sbupdate(struct ufsmount *, int, int); void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t, enum vtype, struct workhead *); void ffs_snapremove(struct vnode *vp); int ffs_snapshot(struct mount *mp, char *snapfile); void ffs_snapshot_mount(struct mount *mp); void ffs_snapshot_unmount(struct mount *mp); -void process_deferred_inactive(struct mount *mp); +void ffs_susp_initialize(void); +void ffs_susp_uninitialize(void); void ffs_sync_snap(struct mount *, int); int ffs_syncvnode(struct vnode *vp, int waitfor, int flags); int ffs_truncate(struct vnode *, off_t, int, struct ucred *); int ffs_update(struct vnode *, int); int ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **); - int ffs_vfree(struct vnode *, ino_t, int); vfs_vget_t ffs_vget; int ffs_vgetf(struct mount *, ino_t, int, struct vnode **, int); -void ffs_susp_initialize(void); -void ffs_susp_uninitialize(void); +void process_deferred_inactive(struct mount *mp); #define FFSV_FORCEINSMQ 0x0001 #define FFSR_FORCE 0x0001 #define FFSR_UNSUSPEND 0x0002 extern struct vop_vector ffs_vnodeops1; extern struct vop_vector ffs_fifoops1; extern struct vop_vector ffs_vnodeops2; extern struct vop_vector ffs_fifoops2; /* * Soft update function prototypes. */ int softdep_check_suspend(struct mount *, struct vnode *, int, int, int, int); void softdep_get_depcounts(struct mount *, int *, int *); void softdep_initialize(void); void softdep_uninitialize(void); int softdep_mount(struct vnode *, struct mount *, struct fs *, struct ucred *); void softdep_unmount(struct mount *); int softdep_move_dependencies(struct buf *, struct buf *); int softdep_flushworklist(struct mount *, int *, struct thread *); int softdep_flushfiles(struct mount *, int, struct thread *); void softdep_update_inodeblock(struct inode *, struct buf *, int); void softdep_load_inodeblock(struct inode *); void softdep_freefile(struct vnode *, ino_t, int); int softdep_request_cleanup(struct fs *, struct vnode *, struct ucred *, int); void softdep_setup_freeblocks(struct inode *, off_t, int); void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int); void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t, int, int); void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, ufs2_daddr_t, long, long, struct buf *); void softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t, ufs2_daddr_t, long, long, struct buf *); void softdep_setup_allocindir_meta(struct buf *, struct inode *, struct buf *, int, ufs2_daddr_t); void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t, struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *); void softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int, struct workhead *); void softdep_setup_inofree(struct mount *, struct buf *, ino_t, struct workhead *); void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *); void softdep_fsync_mountdev(struct vnode *); int softdep_sync_metadata(struct vnode *); int softdep_sync_buf(struct vnode *, struct buf *, int); int softdep_fsync(struct vnode *); int softdep_prealloc(struct vnode *, int); int softdep_journal_lookup(struct mount *, struct vnode **); void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int); void softdep_journal_fsync(struct inode *); void softdep_buf_append(struct buf *, struct workhead *); void softdep_inode_append(struct inode *, struct ucred *, struct workhead *); void softdep_freework(struct workhead *); /* * Things to request flushing in softdep_request_cleanup() */ #define FLUSH_INODES 1 #define FLUSH_INODES_WAIT 2 #define FLUSH_BLOCKS 3 #define FLUSH_BLOCKS_WAIT 4 /* * Flag to ffs_syncvnode() to request flushing of data only, * but skip the ffs_update() on the inode itself. Used to avoid * deadlock when flushing snapshot inodes while holding snaplk. */ #define NO_INO_UPDT 0x00000001 /* * Request data sync only from ffs_syncvnode(), not touching even more * metadata than NO_INO_UPDT. */ #define DATA_ONLY 0x00000002 int ffs_rdonly(struct inode *); TAILQ_HEAD(snaphead, inode); struct snapdata { LIST_ENTRY(snapdata) sn_link; struct snaphead sn_head; daddr_t sn_listsize; daddr_t *sn_blklist; struct lock sn_lock; }; #endif /* _KERNEL */ #endif /* !_UFS_FFS_EXTERN_H */ Index: head/sys/ufs/ffs/ffs_subr.c =================================================================== --- head/sys/ufs/ffs/ffs_subr.c (revision 328425) +++ head/sys/ufs/ffs/ffs_subr.c (revision 328426) @@ -1,353 +1,537 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_subr.c 8.5 (Berkeley) 3/21/95 */ #include __FBSDID("$FreeBSD$"); #include #ifndef _KERNEL +#include +#include +#include +#include +#include #include #include -#else + +struct malloc_type; +#define UFS_MALLOC(size, type, flags) malloc(size) +#define UFS_FREE(ptr, type) free(ptr) +#define UFS_TIME time(NULL) + +#else /* _KERNEL */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#define UFS_MALLOC(size, type, flags) malloc(size, type, flags) +#define UFS_FREE(ptr, type) free(ptr, type) +#define UFS_TIME time_second + /* * Return buffer with the contents of block "offset" from the beginning of * directory "ip". If "res" is non-zero, fill it in with a pointer to the * remaining space in the directory. */ int ffs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp) { struct inode *ip; struct fs *fs; struct buf *bp; ufs_lbn_t lbn; int bsize, error; ip = VTOI(vp); fs = ITOFS(ip); lbn = lblkno(fs, offset); bsize = blksize(fs, ip, lbn); *bpp = NULL; error = bread(vp, lbn, bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } if (res) *res = (char *)bp->b_data + blkoff(fs, offset); *bpp = bp; return (0); } /* * Load up the contents of an inode and copy the appropriate pieces * to the incore copy. */ void ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino) { if (I_IS_UFS1(ip)) { *ip->i_din1 = *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); ip->i_mode = ip->i_din1->di_mode; ip->i_nlink = ip->i_din1->di_nlink; ip->i_size = ip->i_din1->di_size; ip->i_flags = ip->i_din1->di_flags; ip->i_gen = ip->i_din1->di_gen; ip->i_uid = ip->i_din1->di_uid; ip->i_gid = ip->i_din1->di_gid; } else { *ip->i_din2 = *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); ip->i_mode = ip->i_din2->di_mode; ip->i_nlink = ip->i_din2->di_nlink; ip->i_size = ip->i_din2->di_size; ip->i_flags = ip->i_din2->di_flags; ip->i_gen = ip->i_din2->di_gen; ip->i_uid = ip->i_din2->di_uid; ip->i_gid = ip->i_din2->di_gid; } } #endif /* KERNEL */ + +/* + * These are the low-level functions that actually read and write + * the superblock and its associated data. + */ +static off_t sblock_try[] = SBLOCKSEARCH; +static int readsuper(void *, struct fs **, off_t, + int (*)(void *, off_t, void **, int)); + +/* + * Read a superblock from the devfd device. + * + * If an alternate superblock is specified, it is read. Otherwise the + * set of locations given in the SBLOCKSEARCH list is searched for a + * superblock. Memory is allocated for the superblock by the readfunc and + * is returned. If filltype is non-NULL, additional memory is allocated + * of type filltype and filled in with the superblock summary information. + * + * If a superblock is found, zero is returned. Otherwise one of the + * following error values is returned: + * EIO: non-existent or truncated superblock. + * EIO: error reading summary information. + * ENOENT: no usable known superblock found. + * ENOSPC: failed to allocate space for the superblock. + * EINVAL: The previous newfs operation on this volume did not complete. + * The administrator must complete newfs before using this volume. + */ +int +ffs_sbget(void *devfd, struct fs **fsp, off_t altsuperblock, + struct malloc_type *filltype, + int (*readfunc)(void *devfd, off_t loc, void **bufp, int size)) +{ + struct fs *fs; + int i, ret, size, blks; + uint8_t *space; + int32_t *lp; + char *buf; + + if (altsuperblock != -1) { + if ((ret = readsuper(devfd, fsp, altsuperblock, readfunc)) != 0) + return (ret); + } else { + for (i = 0; sblock_try[i] != -1; i++) { + if ((ret = readsuper(devfd, fsp, sblock_try[i], + readfunc)) == 0) + break; + if (ret == ENOENT) + continue; + return (ret); + } + if (sblock_try[i] == -1) + return (ENOENT); + } + /* + * If not filling in summary information, NULL out fs_csp and return. + */ + fs = *fsp; + if (filltype == NULL) { + fs->fs_csp = NULL; + return (0); + } + /* + * Read in the superblock summary information. + */ + size = fs->fs_cssize; + blks = howmany(size, fs->fs_fsize); + if (fs->fs_contigsumsize > 0) + size += fs->fs_ncg * sizeof(int32_t); + size += fs->fs_ncg * sizeof(u_int8_t); + space = UFS_MALLOC(size, filltype, M_WAITOK); + fs->fs_csp = (struct csum *)space; + for (i = 0; i < blks; i += fs->fs_frag) { + size = fs->fs_bsize; + if (i + fs->fs_frag > blks) + size = (blks - i) * fs->fs_fsize; + ret = (*readfunc)(devfd, + dbtob(fsbtodb(fs, fs->fs_csaddr + i)), (void **)&buf, size); + if (ret) { + UFS_FREE(fs->fs_csp, filltype); + fs->fs_csp = NULL; + return (ret); + } + memcpy(space, buf, size); + UFS_FREE(buf, filltype); + space += size; + } + if (fs->fs_contigsumsize > 0) { + fs->fs_maxcluster = lp = (int32_t *)space; + for (i = 0; i < fs->fs_ncg; i++) + *lp++ = fs->fs_contigsumsize; + space = (uint8_t *)lp; + } + size = fs->fs_ncg * sizeof(u_int8_t); + fs->fs_contigdirs = (u_int8_t *)space; + bzero(fs->fs_contigdirs, size); + return (0); +} + +/* + * Try to read a superblock from the location specified by sblockloc. + * Return zero on success or an errno on failure. + */ +static int +readsuper(void *devfd, struct fs **fsp, off_t sblockloc, + int (*readfunc)(void *devfd, off_t loc, void **bufp, int size)) +{ + struct fs *fs; + int error; + + error = (*readfunc)(devfd, sblockloc, (void **)fsp, SBLOCKSIZE); + if (error != 0) + return (error); + fs = *fsp; + if (fs->fs_magic == FS_BAD_MAGIC) + return (EINVAL); + if (((fs->fs_magic == FS_UFS1_MAGIC && sblockloc <= SBLOCK_UFS1) || + (fs->fs_magic == FS_UFS2_MAGIC && + sblockloc == fs->fs_sblockloc)) && + fs->fs_ncg >= 1 && + fs->fs_bsize >= MINBSIZE && + fs->fs_bsize <= MAXBSIZE && + fs->fs_bsize >= roundup(sizeof(struct fs), DEV_BSIZE)) { + /* Have to set for old filesystems that predate this field */ + fs->fs_sblockactualloc = sblockloc; + return (0); + } + return (ENOENT); +} + +/* + * Write a superblock to the devfd device from the memory pointed to by fs. + * Write out the superblock summary information if it is present. + * + * If the write is successful, zero is returned. Otherwise one of the + * following error values is returned: + * EIO: failed to write superblock. + * EIO: failed to write superblock summary information. + */ +int +ffs_sbput(void *devfd, struct fs *fs, off_t loc, + int (*writefunc)(void *devfd, off_t loc, void *buf, int size)) +{ + int i, error, blks, size; + uint8_t *space; + + /* + * If there is summary information, write it first, so if there + * is an error, the superblock will not be marked as clean. + */ + if (fs->fs_csp != NULL) { + blks = howmany(fs->fs_cssize, fs->fs_fsize); + space = (uint8_t *)fs->fs_csp; + for (i = 0; i < blks; i += fs->fs_frag) { + size = fs->fs_bsize; + if (i + fs->fs_frag > blks) + size = (blks - i) * fs->fs_fsize; + if ((error = (*writefunc)(devfd, + dbtob(fsbtodb(fs, fs->fs_csaddr + i)), + space, size)) != 0) + return (error); + space += size; + } + } + fs->fs_fmod = 0; + fs->fs_time = UFS_TIME; + if ((error = (*writefunc)(devfd, loc, fs, fs->fs_sbsize)) != 0) + return (error); + return (0); +} /* * Update the frsum fields to reflect addition or deletion * of some frags. */ void ffs_fragacct(struct fs *fs, int fragmap, int32_t fraglist[], int cnt) { int inblk; int field, subfield; int siz, pos; inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; fragmap <<= 1; for (siz = 1; siz < fs->fs_frag; siz++) { if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0) continue; field = around[siz]; subfield = inside[siz]; for (pos = siz; pos <= fs->fs_frag; pos++) { if ((fragmap & field) == subfield) { fraglist[siz] += cnt; pos += siz; field <<= siz; subfield <<= siz; } field <<= 1; subfield <<= 1; } } } /* * block operations * * check if a block is available */ int ffs_isblock(struct fs *fs, unsigned char *cp, ufs1_daddr_t h) { unsigned char mask; switch ((int)fs->fs_frag) { case 8: return (cp[h] == 0xff); case 4: mask = 0x0f << ((h & 0x1) << 2); return ((cp[h >> 1] & mask) == mask); case 2: mask = 0x03 << ((h & 0x3) << 1); return ((cp[h >> 2] & mask) == mask); case 1: mask = 0x01 << (h & 0x7); return ((cp[h >> 3] & mask) == mask); default: #ifdef _KERNEL panic("ffs_isblock"); #endif break; } return (0); } /* * check if a block is free */ int ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) { switch ((int)fs->fs_frag) { case 8: return (cp[h] == 0); case 4: return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); case 2: return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); case 1: return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); default: #ifdef _KERNEL panic("ffs_isfreeblock"); #endif break; } return (0); } /* * take a block out of the map */ void ffs_clrblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) { switch ((int)fs->fs_frag) { case 8: cp[h] = 0; return; case 4: cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] &= ~(0x01 << (h & 0x7)); return; default: #ifdef _KERNEL panic("ffs_clrblock"); #endif break; } } /* * put a block into the map */ void ffs_setblock(struct fs *fs, unsigned char *cp, ufs1_daddr_t h) { switch ((int)fs->fs_frag) { case 8: cp[h] = 0xff; return; case 4: cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] |= (0x01 << (h & 0x7)); return; default: #ifdef _KERNEL panic("ffs_setblock"); #endif break; } } /* * Update the cluster map because of an allocation or free. * * Cnt == 1 means free; cnt == -1 means allocating. */ void ffs_clusteracct(struct fs *fs, struct cg *cgp, ufs1_daddr_t blkno, int cnt) { int32_t *sump; int32_t *lp; u_char *freemapp, *mapp; int i, start, end, forw, back, map, bit; if (fs->fs_contigsumsize <= 0) return; freemapp = cg_clustersfree(cgp); sump = cg_clustersum(cgp); /* * Allocate or clear the actual block. */ if (cnt > 0) setbit(freemapp, blkno); else clrbit(freemapp, blkno); /* * Find the size of the cluster going forward. */ start = blkno + 1; end = start + fs->fs_contigsumsize; if (end >= cgp->cg_nclusterblks) end = cgp->cg_nclusterblks; mapp = &freemapp[start / NBBY]; map = *mapp++; bit = 1 << (start % NBBY); for (i = start; i < end; i++) { if ((map & bit) == 0) break; if ((i & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } forw = i - start; /* * Find the size of the cluster going backward. */ start = blkno - 1; end = start - fs->fs_contigsumsize; if (end < 0) end = -1; mapp = &freemapp[start / NBBY]; map = *mapp--; bit = 1 << (start % NBBY); for (i = start; i > end; i--) { if ((map & bit) == 0) break; if ((i & (NBBY - 1)) != 0) { bit >>= 1; } else { map = *mapp--; bit = 1 << (NBBY - 1); } } back = start - i; /* * Account for old cluster and the possibly new forward and * back clusters. */ i = back + forw + 1; if (i > fs->fs_contigsumsize) i = fs->fs_contigsumsize; sump[i] += cnt; if (back > 0) sump[back] -= cnt; if (forw > 0) sump[forw] -= cnt; /* * Update cluster summary information. */ lp = &sump[fs->fs_contigsumsize]; for (i = fs->fs_contigsumsize; i > 0; i--) if (*lp-- > 0) break; fs->fs_maxcluster[cgp->cg_cgx] = i; } Index: head/sys/ufs/ffs/ffs_vfsops.c =================================================================== --- head/sys/ufs/ffs/ffs_vfsops.c (revision 328425) +++ head/sys/ufs/ffs/ffs_vfsops.c (revision 328426) @@ -1,2319 +1,2300 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_quota.h" #include "opt_ufs.h" #include "opt_ffs.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static uma_zone_t uma_inode, uma_ufs1, uma_ufs2; static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, ufs2_daddr_t); static void ffs_ifree(struct ufsmount *ump, struct inode *ip); static int ffs_sync_lazy(struct mount *mp); +static int ffs_use_bread(void *devfd, off_t loc, void **bufp, int size); +static int ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size); static vfs_init_t ffs_init; static vfs_uninit_t ffs_uninit; static vfs_extattrctl_t ffs_extattrctl; static vfs_cmount_t ffs_cmount; static vfs_unmount_t ffs_unmount; static vfs_mount_t ffs_mount; static vfs_statfs_t ffs_statfs; static vfs_fhtovp_t ffs_fhtovp; static vfs_sync_t ffs_sync; static struct vfsops ufs_vfsops = { .vfs_extattrctl = ffs_extattrctl, .vfs_fhtovp = ffs_fhtovp, .vfs_init = ffs_init, .vfs_mount = ffs_mount, .vfs_cmount = ffs_cmount, .vfs_quotactl = ufs_quotactl, .vfs_root = ufs_root, .vfs_statfs = ffs_statfs, .vfs_sync = ffs_sync, .vfs_uninit = ffs_uninit, .vfs_unmount = ffs_unmount, .vfs_vget = ffs_vget, .vfs_susp_clean = process_deferred_inactive, }; VFS_SET(ufs_vfsops, ufs, 0); MODULE_VERSION(ufs, 1); static b_strategy_t ffs_geom_strategy; static b_write_t ffs_bufwrite; static struct buf_ops ffs_ops = { .bop_name = "FFS", .bop_write = ffs_bufwrite, .bop_strategy = ffs_geom_strategy, .bop_sync = bufsync, #ifdef NO_FFS_SNAPSHOT .bop_bdflush = bufbdflush, #else .bop_bdflush = ffs_bdflush, #endif }; /* * Note that userquota and groupquota options are not currently used * by UFS/FFS code and generally mount(8) does not pass those options * from userland, but they can be passed by loader(8) via * vfs.root.mountfrom.options. */ static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr", "noclusterw", "noexec", "export", "force", "from", "groupquota", "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir", "nosymfollow", "sync", "union", "userquota", NULL }; static int ffs_mount(struct mount *mp) { struct vnode *devvp; struct thread *td; struct ufsmount *ump = NULL; struct fs *fs; pid_t fsckpid = 0; int error, error1, flags; uint64_t mntorflags; accmode_t accmode; struct nameidata ndp; char *fspec; td = curthread; if (vfs_filteropt(mp->mnt_optnew, ffs_opts)) return (EINVAL); if (uma_inode == NULL) { uma_inode = uma_zcreate("FFS inode", sizeof(struct inode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs1 = uma_zcreate("FFS1 dinode", sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs2 = uma_zcreate("FFS2 dinode", sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } vfs_deleteopt(mp->mnt_optnew, "groupquota"); vfs_deleteopt(mp->mnt_optnew, "userquota"); fspec = vfs_getopts(mp->mnt_optnew, "from", &error); if (error) return (error); mntorflags = 0; if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0) mntorflags |= MNT_ACLS; if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) { mntorflags |= MNT_SNAPSHOT; /* * Once we have set the MNT_SNAPSHOT flag, do not * persist "snapshot" in the options list. */ vfs_deleteopt(mp->mnt_optnew, "snapshot"); vfs_deleteopt(mp->mnt_opt, "snapshot"); } if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 && vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) { /* * Once we have set the restricted PID, do not * persist "fsckpid" in the options list. */ vfs_deleteopt(mp->mnt_optnew, "fsckpid"); vfs_deleteopt(mp->mnt_opt, "fsckpid"); if (mp->mnt_flag & MNT_UPDATE) { if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 && vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) { vfs_mount_error(mp, "Checker enable: Must be read-only"); return (EINVAL); } } else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) { vfs_mount_error(mp, "Checker enable: Must be read-only"); return (EINVAL); } /* Set to -1 if we are done */ if (fsckpid == 0) fsckpid = -1; } if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) { if (mntorflags & MNT_ACLS) { vfs_mount_error(mp, "\"acls\" and \"nfsv4acls\" options " "are mutually exclusive"); return (EINVAL); } mntorflags |= MNT_NFS4ACLS; } MNT_ILOCK(mp); mp->mnt_flag |= mntorflags; MNT_IUNLOCK(mp); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; devvp = ump->um_devvp; if (fsckpid == -1 && ump->um_fsckpid > 0) { if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 || (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) return (error); g_topology_lock(); /* * Return to normal read-only mode. */ error = g_access(ump->um_cp, 0, -1, 0); g_topology_unlock(); ump->um_fsckpid = 0; } if (fs->fs_ronly == 0 && vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* * Flush any dirty data and suspend filesystem. */ if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); error = vfs_write_suspend_umnt(mp); if (error != 0) return (error); /* * Check for and optionally get rid of files open * for writing. */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; if (MOUNTEDSOFTDEP(mp)) { error = softdep_flushfiles(mp, flags, td); } else { error = ffs_flushfiles(mp, flags, td); } if (error) { vfs_write_resume(mp, 0); return (error); } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s Update error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) fs->fs_clean = 1; if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { fs->fs_ronly = 0; fs->fs_clean = 0; vfs_write_resume(mp, 0); return (error); } if (MOUNTEDSOFTDEP(mp)) softdep_unmount(mp); g_topology_lock(); /* * Drop our write and exclusive access. */ g_access(ump->um_cp, 0, -1, -1); g_topology_unlock(); fs->fs_ronly = 1; MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); /* * Allow the writers to note that filesystem * is ro now. */ vfs_write_resume(mp, 0); } if ((mp->mnt_flag & MNT_RELOAD) && (error = ffs_reload(mp, td, 0)) != 0) return (error); if (fs->fs_ronly && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* * If we are running a checker, do not allow upgrade. */ if (ump->um_fsckpid > 0) { vfs_mount_error(mp, "Active checker, cannot upgrade to write"); return (EINVAL); } /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { VOP_UNLOCK(devvp, 0); return (error); } VOP_UNLOCK(devvp, 0); fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if ((mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not properly " "dismounted\n", fs->fs_fsmnt); } else { vfs_mount_error(mp, "R/W mount of %s denied. %s.%s", fs->fs_fsmnt, "Filesystem is not clean - run fsck", (fs->fs_flags & FS_SUJ) == 0 ? "" : " Forced mount will invalidate" " journal contents"); return (EPERM); } } g_topology_lock(); /* * Request exclusive write access. */ error = g_access(ump->um_cp, 0, 1, 1); g_topology_unlock(); if (error) return (error); if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); fs->fs_ronly = 0; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); fs->fs_mtime = time_second; /* check to see if we need to start softdep */ if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, td->td_ucred))){ vn_finished_write(mp); return (error); } fs->fs_clean = 0; if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { vn_finished_write(mp); return (error); } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); vn_finished_write(mp); } /* * Soft updates is incompatible with "async", * so if we are doing softupdates stop the user * from setting the async flag in an update. * Softdep_mount() clears it in an initial mount * or ro->rw remount. */ if (MOUNTEDSOFTDEP(mp)) { /* XXX: Reset too late ? */ MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_ASYNC; MNT_IUNLOCK(mp); } /* * Keep MNT_ACLS flag if it is stored in superblock. */ if ((fs->fs_flags & FS_ACLS) != 0) { /* XXX: Set too late ? */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_ACLS; MNT_IUNLOCK(mp); } if ((fs->fs_flags & FS_NFS4ACLS) != 0) { /* XXX: Set too late ? */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } /* * If this is a request from fsck to clean up the filesystem, * then allow the specified pid to proceed. */ if (fsckpid > 0) { if (ump->um_fsckpid != 0) { vfs_mount_error(mp, "Active checker already running on %s", fs->fs_fsmnt); return (EINVAL); } KASSERT(MOUNTEDSOFTDEP(mp) == 0, ("soft updates enabled on read-only file system")); g_topology_lock(); /* * Request write access. */ error = g_access(ump->um_cp, 0, 1, 0); g_topology_unlock(); if (error) { vfs_mount_error(mp, "Checker activation failed on %s", fs->fs_fsmnt); return (error); } ump->um_fsckpid = fsckpid; if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_mtime = time_second; fs->fs_fmod = 1; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } /* * If this is a snapshot request, take the snapshot. */ if (mp->mnt_flag & MNT_SNAPSHOT) return (ffs_snapshot(mp, fspec)); /* * Must not call namei() while owning busy ref. */ vfs_unbusy(mp); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); error = namei(&ndp); if ((mp->mnt_flag & MNT_UPDATE) != 0) { /* * Unmount does not start if MNT_UPDATE is set. Mount * update busies mp before setting MNT_UPDATE. We * must be able to retain our busy ref succesfully, * without sleep. */ error1 = vfs_busy(mp, MBF_NOWAIT); MPASS(error1 == 0); } if (error != 0) return (error); NDFREE(&ndp, NDF_ONLY_PNBUF); devvp = ndp.ni_vp; if (!vn_isdisk(devvp, &error)) { vput(devvp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ accmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accmode |= VWRITE; error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { vput(devvp); return (error); } if (mp->mnt_flag & MNT_UPDATE) { /* * Update only * * If it's not the same vnode, or at least the same device * then it's not correct. */ if (devvp->v_rdev != ump->um_devvp->v_rdev) error = EINVAL; /* needs translation */ vput(devvp); if (error) return (error); } else { /* * New mount * * We need the name for the mount point (also used for * "last mounted on") copied in. If an error occurs, * the mount point is discarded by the upper level code. * Note that vfs_mount_alloc() populates f_mntonname for us. */ if ((error = ffs_mountfs(devvp, mp, td)) != 0) { vrele(devvp); return (error); } if (fsckpid > 0) { KASSERT(MOUNTEDSOFTDEP(mp) == 0, ("soft updates enabled on read-only file system")); ump = VFSTOUFS(mp); fs = ump->um_fs; g_topology_lock(); /* * Request write access. */ error = g_access(ump->um_cp, 0, 1, 0); g_topology_unlock(); if (error) { printf("WARNING: %s: Checker activation " "failed\n", fs->fs_fsmnt); } else { ump->um_fsckpid = fsckpid; if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_mtime = time_second; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } } } vfs_mountedfrom(mp, fspec); return (0); } /* * Compatibility with old mount system call. */ static int ffs_cmount(struct mntarg *ma, void *data, uint64_t flags) { struct ufs_args args; struct export_args exp; int error; if (data == NULL) return (EINVAL); error = copyin(data, &args, sizeof args); if (error) return (error); vfs_oexport_conv(&args.export, &exp); ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN); ma = mount_arg(ma, "export", &exp, sizeof(exp)); error = kernel_mount(ma, flags); return (error); } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). If the 'force' flag * is 0, the filesystem must be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary * writers, if requested. * 6) invalidate all cached file data. * 7) re-read inode data for all active vnodes. */ int ffs_reload(struct mount *mp, struct thread *td, int flags) { struct vnode *vp, *mvp, *devvp; struct inode *ip; void *space; struct buf *bp; struct fs *fs, *newfs; struct ufsmount *ump; ufs2_daddr_t sblockloc; int i, blks, error; u_long size; int32_t *lp; ump = VFSTOUFS(mp); MNT_ILOCK(mp); if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) { MNT_IUNLOCK(mp); return (EINVAL); } MNT_IUNLOCK(mp); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mp)->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); if (vinvalbuf(devvp, 0, 0, 0) != 0) panic("ffs_reload: dirty1"); VOP_UNLOCK(devvp, 0); /* * Step 2: re-read superblock from disk. */ fs = VFSTOUFS(mp)->um_fs; if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize, NOCRED, &bp)) != 0) return (error); newfs = (struct fs *)bp->b_data; if ((newfs->fs_magic != FS_UFS1_MAGIC && newfs->fs_magic != FS_UFS2_MAGIC) || newfs->fs_bsize > MAXBSIZE || newfs->fs_bsize < sizeof(struct fs)) { brelse(bp); return (EIO); /* XXX needs translation */ } /* * Copy pointer fields back into superblock before copying in XXX * new superblock. These should really be in the ufsmount. XXX * Note that important parameters (eg fs_ncg) are unchanged. */ newfs->fs_csp = fs->fs_csp; newfs->fs_maxcluster = fs->fs_maxcluster; newfs->fs_contigdirs = fs->fs_contigdirs; newfs->fs_active = fs->fs_active; newfs->fs_ronly = fs->fs_ronly; sblockloc = fs->fs_sblockloc; bcopy(newfs, fs, (u_int)fs->fs_sbsize); brelse(bp); mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc); UFS_LOCK(ump); if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s: reload pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } UFS_UNLOCK(ump); /* * Step 3: re-read summary information from disk. */ size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); size += fs->fs_ncg * sizeof(u_int8_t); free(fs->fs_csp, M_UFSMNT); space = malloc(size, M_UFSMNT, M_WAITOK); fs->fs_csp = space; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, NOCRED, &bp); if (error) return (error); bcopy(bp->b_data, space, (u_int)size); space = (char *)space + size; brelse(bp); } /* * We no longer know anything about clusters per cylinder group. */ if (fs->fs_contigsumsize > 0) { fs->fs_maxcluster = lp = space; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; space = lp; } size = fs->fs_ncg * sizeof(u_int8_t); fs->fs_contigdirs = (u_int8_t *)space; bzero(fs->fs_contigdirs, size); if ((flags & FFSR_UNSUSPEND) != 0) { MNT_ILOCK(mp); mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2); wakeup(&mp->mnt_flag); MNT_IUNLOCK(mp); } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Skip syncer vnode. */ if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } /* * Step 4: invalidate all cached file data. */ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } if (vinvalbuf(vp, 0, 0, 0)) panic("ffs_reload: dirty2"); /* * Step 5: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { VOP_UNLOCK(vp, 0); vrele(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } ffs_load_inode(bp, ip, fs, ip->i_number); ip->i_effnlink = ip->i_nlink; brelse(bp); VOP_UNLOCK(vp, 0); vrele(vp); } return (0); } /* - * Possible superblock locations ordered from most to least likely. - */ -static int sblock_try[] = SBLOCKSEARCH; - -/* * Common code for mount and mountroot */ static int ffs_mountfs(devvp, mp, td) struct vnode *devvp; struct mount *mp; struct thread *td; { struct ufsmount *ump; - struct buf *bp; struct fs *fs; struct cdev *dev; - void *space; - ufs2_daddr_t sblockloc; - int error, i, blks, len, ronly; - u_long size; - int32_t *lp; + int error, i, len, ronly; struct ucred *cred; struct g_consumer *cp; struct mount *nmp; - bp = NULL; + fs = NULL; ump = NULL; cred = td ? td->td_ucred : NOCRED; ronly = (mp->mnt_flag & MNT_RDONLY) != 0; KASSERT(devvp->v_type == VCHR, ("reclaimed devvp")); dev = devvp->v_rdev; if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0, (uintptr_t)mp) == 0) { VOP_UNLOCK(devvp, 0); return (EBUSY); } g_topology_lock(); error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1); g_topology_unlock(); if (error != 0) { atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); VOP_UNLOCK(devvp, 0); return (error); } dev_ref(dev); devvp->v_bufobj.bo_ops = &ffs_ops; VOP_UNLOCK(devvp, 0); if (dev->si_iosize_max != 0) mp->mnt_iosize_max = dev->si_iosize_max; if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; - - fs = NULL; - sblockloc = 0; - /* - * Try reading the superblock in each of its possible locations. - */ - for (i = 0; sblock_try[i] != -1; i++) { - if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) { - error = EINVAL; - vfs_mount_error(mp, - "Invalid sectorsize %d for superblock size %d", - cp->provider->sectorsize, SBLOCKSIZE); - goto out; - } - if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE, - cred, &bp)) != 0) - goto out; - fs = (struct fs *)bp->b_data; - sblockloc = sblock_try[i]; - if ((fs->fs_magic == FS_UFS1_MAGIC || - (fs->fs_magic == FS_UFS2_MAGIC && - (fs->fs_sblockloc == sblockloc || - (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) && - fs->fs_bsize <= MAXBSIZE && - fs->fs_bsize >= sizeof(struct fs)) - break; - brelse(bp); - bp = NULL; - } - if (sblock_try[i] == -1) { - error = EINVAL; /* XXX needs translation */ + if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) { + error = EINVAL; + vfs_mount_error(mp, + "Invalid sectorsize %d for superblock size %d", + cp->provider->sectorsize, SBLOCKSIZE); goto out; } + /* fetch the superblock and summary information */ + if ((error = ffs_sbget(devvp, &fs, -1, M_UFSMNT, ffs_use_bread)) != 0) + goto out; fs->fs_fmod = 0; /* none of these types of check-hashes are maintained */ fs->fs_metackhash &= ~(CK_SUPERBLOCK | CK_INODE | CK_INDIR | CK_DIR); /* no support for directory indices or any other undefined flags */ fs->fs_flags &= ~FS_INDEXDIRS; fs->fs_flags &= FS_SUPPORTED; fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if (ronly || (mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not properly dismounted\n", fs->fs_fsmnt); } else { vfs_mount_error(mp, "R/W mount of %s denied. %s%s", fs->fs_fsmnt, "Filesystem is not clean - run fsck.", (fs->fs_flags & FS_SUJ) == 0 ? "" : " Forced mount will invalidate journal contents"); error = EPERM; goto out; } if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) && (mp->mnt_flag & MNT_FORCE)) { printf("WARNING: %s: lost blocks %jd files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s: mount pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } if ((fs->fs_flags & FS_GJOURNAL) != 0) { #ifdef UFS_GJOURNAL /* * Get journal provider name. */ len = 1024; mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK); if (g_io_getattr("GJOURNAL::provider", cp, &len, mp->mnt_gjprovider) == 0) { mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len, M_UFSMNT, M_WAITOK); MNT_ILOCK(mp); mp->mnt_flag |= MNT_GJOURNAL; MNT_IUNLOCK(mp); } else { printf("WARNING: %s: GJOURNAL flag on fs " "but no gjournal provider below\n", mp->mnt_stat.f_mntonname); free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } #else printf("WARNING: %s: GJOURNAL flag on fs but no " "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname); #endif } else { mp->mnt_gjprovider = NULL; } ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO); ump->um_cp = cp; ump->um_bo = &devvp->v_bufobj; - ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK); + ump->um_fs = fs; if (fs->fs_magic == FS_UFS1_MAGIC) { ump->um_fstype = UFS1; ump->um_balloc = ffs_balloc_ufs1; } else { ump->um_fstype = UFS2; ump->um_balloc = ffs_balloc_ufs2; } ump->um_blkatoff = ffs_blkatoff; ump->um_truncate = ffs_truncate; ump->um_update = ffs_update; ump->um_valloc = ffs_valloc; ump->um_vfree = ffs_vfree; ump->um_ifree = ffs_ifree; ump->um_rdonly = ffs_rdonly; ump->um_snapgone = ffs_snapgone; mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF); - bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); - if (fs->fs_sbsize < SBLOCKSIZE) - bp->b_flags |= B_INVAL | B_NOCACHE; - brelse(bp); - bp = NULL; - fs = ump->um_fs; - ffs_oldfscompat_read(fs, ump, sblockloc); + ffs_oldfscompat_read(fs, ump, fs->fs_sblockloc); fs->fs_ronly = ronly; - size = fs->fs_cssize; - blks = howmany(size, fs->fs_fsize); - if (fs->fs_contigsumsize > 0) - size += fs->fs_ncg * sizeof(int32_t); - size += fs->fs_ncg * sizeof(u_int8_t); - space = malloc(size, M_UFSMNT, M_WAITOK); - fs->fs_csp = space; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, - cred, &bp)) != 0) { - free(fs->fs_csp, M_UFSMNT); - goto out; - } - bcopy(bp->b_data, space, (u_int)size); - space = (char *)space + size; - brelse(bp); - bp = NULL; - } - if (fs->fs_contigsumsize > 0) { - fs->fs_maxcluster = lp = space; - for (i = 0; i < fs->fs_ncg; i++) - *lp++ = fs->fs_contigsumsize; - space = lp; - } - size = fs->fs_ncg * sizeof(u_int8_t); - fs->fs_contigdirs = (u_int8_t *)space; - bzero(fs->fs_contigdirs, size); fs->fs_active = NULL; mp->mnt_data = ump; mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; nmp = NULL; if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) { if (nmp) vfs_rel(nmp); vfs_getnewfsid(mp); } mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; MNT_IUNLOCK(mp); if ((fs->fs_flags & FS_MULTILABEL) != 0) { #ifdef MAC MNT_ILOCK(mp); mp->mnt_flag |= MNT_MULTILABEL; MNT_IUNLOCK(mp); #else printf("WARNING: %s: multilabel flag on fs but " "no MAC support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_ACLS) != 0) { #ifdef UFS_ACL MNT_ILOCK(mp); if (mp->mnt_flag & MNT_NFS4ACLS) printf("WARNING: %s: ACLs flag on fs conflicts with " "\"nfsv4acls\" mount option; option ignored\n", mp->mnt_stat.f_mntonname); mp->mnt_flag &= ~MNT_NFS4ACLS; mp->mnt_flag |= MNT_ACLS; MNT_IUNLOCK(mp); #else printf("WARNING: %s: ACLs flag on fs but no ACLs support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_NFS4ACLS) != 0) { #ifdef UFS_ACL MNT_ILOCK(mp); if (mp->mnt_flag & MNT_ACLS) printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts " "with \"acls\" mount option; option ignored\n", mp->mnt_stat.f_mntonname); mp->mnt_flag &= ~MNT_ACLS; mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); #else printf("WARNING: %s: NFSv4 ACLs flag on fs but no " "ACLs support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_TRIM) != 0) { len = sizeof(int); if (g_io_getattr("GEOM::candelete", cp, &len, &ump->um_candelete) == 0) { if (!ump->um_candelete) printf("WARNING: %s: TRIM flag on fs but disk " "does not support TRIM\n", mp->mnt_stat.f_mntonname); } else { printf("WARNING: %s: TRIM flag on fs but disk does " "not confirm that it supports TRIM\n", mp->mnt_stat.f_mntonname); ump->um_candelete = 0; } if (ump->um_candelete) { ump->um_trim_tq = taskqueue_create("trim", M_WAITOK, taskqueue_thread_enqueue, &ump->um_trim_tq); taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS, "%s trim", mp->mnt_stat.f_mntonname); } } ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = fs->fs_nindir; ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; #ifdef UFS_EXTATTR ufs_extattr_uepm_init(&ump->um_extattr); #endif /* * Set FS local "last mounted on" information (NULL pad) */ bzero(fs->fs_fsmnt, MAXMNTLEN); strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN); mp->mnt_stat.f_iosize = fs->fs_bsize; if (mp->mnt_flag & MNT_ROOTFS) { /* * Root mount; update timestamp in mount structure. * this will be used by the common root mount code * to update the system clock. */ mp->mnt_time = fs->fs_time; } if (ronly == 0) { fs->fs_mtime = time_second; if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, cred)) != 0) { - free(fs->fs_csp, M_UFSMNT); ffs_flushfiles(mp, FORCECLOSE, td); goto out; } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_fmod = 1; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } /* * Initialize filesystem state information in mount struct. */ MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE; MNT_IUNLOCK(mp); #ifdef UFS_EXTATTR #ifdef UFS_EXTATTR_AUTOSTART /* * * Auto-starting does the following: * - check for /.attribute in the fs, and extattr_start if so * - for each file in .attribute, enable that file with * an attribute of the same name. * Not clear how to report errors -- probably eat them. * This would all happen while the filesystem was busy/not * available, so would effectively be "atomic". */ (void) ufs_extattr_autostart(mp, td); #endif /* !UFS_EXTATTR_AUTOSTART */ #endif /* !UFS_EXTATTR */ return (0); out: - if (bp) - brelse(bp); + if (fs != NULL) { + free(fs->fs_csp, M_UFSMNT); + free(fs, M_UFSMNT); + } if (cp != NULL) { g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); } if (ump) { mtx_destroy(UFS_MTX(ump)); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } - free(ump->um_fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = NULL; } atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); dev_rel(dev); return (error); } +/* + * A read function for use by filesystem-layer routines. + */ +static int +ffs_use_bread(void *devfd, off_t loc, void **bufp, int size) +{ + struct buf *bp; + int error; + + *bufp = malloc(size, M_UFSMNT, M_WAITOK); + if ((error = bread((struct vnode *)devfd, btodb(loc), size, NOCRED, + &bp)) != 0) { + free(*bufp, M_UFSMNT); + *bufp = NULL; + return (error); + } + bcopy(bp->b_data, *bufp, size); + bp->b_flags |= B_INVAL | B_NOCACHE; + brelse(bp); + return (0); +} + #include static int bigcgs = 0; SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, ""); /* * Sanity checks for loading old filesystem superblocks. * See ffs_oldfscompat_write below for unwound actions. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ static void ffs_oldfscompat_read(fs, ump, sblockloc) struct fs *fs; struct ufsmount *ump; ufs2_daddr_t sblockloc; { off_t maxfilesize; /* * If not yet done, update fs_flags location and value of fs_sblockloc. */ if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { fs->fs_flags = fs->fs_old_flags; fs->fs_old_flags |= FS_FLAGS_UPDATED; fs->fs_sblockloc = sblockloc; } /* * If not yet done, update UFS1 superblock with new wider fields. */ if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) { fs->fs_maxbsize = fs->fs_bsize; fs->fs_time = fs->fs_old_time; fs->fs_size = fs->fs_old_size; fs->fs_dsize = fs->fs_old_dsize; fs->fs_csaddr = fs->fs_old_csaddr; fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; } if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_old_inodefmt < FS_44INODEFMT) { fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1; fs->fs_qbmask = ~fs->fs_bmask; fs->fs_qfmask = ~fs->fs_fmask; } if (fs->fs_magic == FS_UFS1_MAGIC) { ump->um_savedmaxfilesize = fs->fs_maxfilesize; maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1; if (fs->fs_maxfilesize > maxfilesize) fs->fs_maxfilesize = maxfilesize; } /* Compatibility for old filesystems */ if (fs->fs_avgfilesize <= 0) fs->fs_avgfilesize = AVFILESIZ; if (fs->fs_avgfpdir <= 0) fs->fs_avgfpdir = AFPDIR; if (bigcgs) { fs->fs_save_cgsize = fs->fs_cgsize; fs->fs_cgsize = fs->fs_bsize; } } /* * Unwinding superblock updates for old filesystems. * See ffs_oldfscompat_read above for details. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ void ffs_oldfscompat_write(fs, ump) struct fs *fs; struct ufsmount *ump; { /* * Copy back UFS2 updated fields that UFS1 inspects. */ if (fs->fs_magic == FS_UFS1_MAGIC) { fs->fs_old_time = fs->fs_time; fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; fs->fs_maxfilesize = ump->um_savedmaxfilesize; } if (bigcgs) { fs->fs_cgsize = fs->fs_save_cgsize; fs->fs_save_cgsize = 0; } } /* * unmount system call */ static int ffs_unmount(mp, mntflags) struct mount *mp; int mntflags; { struct thread *td; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, flags, susp; #ifdef UFS_EXTATTR int e_restart; #endif flags = 0; td = curthread; fs = ump->um_fs; susp = 0; if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; susp = fs->fs_ronly == 0; } #ifdef UFS_EXTATTR if ((error = ufs_extattr_stop(mp, td))) { if (error != EOPNOTSUPP) printf("WARNING: unmount %s: ufs_extattr_stop " "returned errno %d\n", mp->mnt_stat.f_mntonname, error); e_restart = 0; } else { ufs_extattr_uepm_destroy(&ump->um_extattr); e_restart = 1; } #endif if (susp) { error = vfs_write_suspend_umnt(mp); if (error != 0) goto fail1; } if (MOUNTEDSOFTDEP(mp)) error = softdep_flushfiles(mp, flags, td); else error = ffs_flushfiles(mp, flags, td); if (error != 0 && error != ENXIO) goto fail; UFS_LOCK(ump); if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: unmount %s: pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } UFS_UNLOCK(ump); if (MOUNTEDSOFTDEP(mp)) softdep_unmount(mp); if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) { fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; error = ffs_sbupdate(ump, MNT_WAIT, 0); if (error && error != ENXIO) { fs->fs_clean = 0; goto fail; } } if (susp) vfs_write_resume(mp, VR_START_WRITE); if (ump->um_trim_tq != NULL) { while (ump->um_trim_inflight != 0) pause("ufsutr", hz); taskqueue_drain_all(ump->um_trim_tq); taskqueue_free(ump->um_trim_tq); } g_topology_lock(); if (ump->um_fsckpid > 0) { /* * Return to normal read-only mode. */ error = g_access(ump->um_cp, 0, -1, 0); ump->um_fsckpid = 0; } g_vfs_close(ump->um_cp); g_topology_unlock(); atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0); vrele(ump->um_devvp); dev_rel(ump->um_dev); mtx_destroy(UFS_MTX(ump)); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } free(fs->fs_csp, M_UFSMNT); free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = NULL; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); if (td->td_su == mp) { td->td_su = NULL; vfs_rel(mp); } return (error); fail: if (susp) vfs_write_resume(mp, VR_START_WRITE); fail1: #ifdef UFS_EXTATTR if (e_restart) { ufs_extattr_uepm_init(&ump->um_extattr); #ifdef UFS_EXTATTR_AUTOSTART (void) ufs_extattr_autostart(mp, td); #endif } #endif return (error); } /* * Flush out all the files in a filesystem. */ int ffs_flushfiles(mp, flags, td) struct mount *mp; int flags; struct thread *td; { struct ufsmount *ump; int qerror, error; ump = VFSTOUFS(mp); qerror = 0; #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { int i; error = vflush(mp, 0, SKIPSYSTEM|flags, td); if (error) return (error); for (i = 0; i < MAXQUOTAS; i++) { error = quotaoff(td, mp, i); if (error != 0) { if ((flags & EARLYFLUSH) == 0) return (error); else qerror = error; } } /* * Here we fall through to vflush again to ensure that * we have gotten rid of all the system vnodes, unless * quotas must not be closed. */ } #endif ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles"); if (ump->um_devvp->v_vflag & VV_COPYONWRITE) { if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0) return (error); ffs_snapshot_unmount(mp); flags |= FORCECLOSE; /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } /* * Do not close system files if quotas were not closed, to be * able to sync the remaining dquots. The freeblks softupdate * workitems might hold a reference on a dquot, preventing * quotaoff() from completing. Next round of * softdep_flushworklist() iteration should process the * blockers, allowing the next run of quotaoff() to finally * flush held dquots. * * Otherwise, flush all the files. */ if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0) return (error); /* * Flush filesystem metadata. */ vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td); VOP_UNLOCK(ump->um_devvp, 0); return (error); } /* * Get filesystem statistics. */ static int ffs_statfs(mp, sbp) struct mount *mp; struct statfs *sbp; { struct ufsmount *ump; struct fs *fs; ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC) panic("ffs_statfs"); sbp->f_version = STATFS_VERSION; sbp->f_bsize = fs->fs_fsize; sbp->f_iosize = fs->fs_bsize; sbp->f_blocks = fs->fs_dsize; UFS_LOCK(ump); sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_bavail = freespace(fs, fs->fs_minfree) + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_files = fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO; sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; UFS_UNLOCK(ump); sbp->f_namemax = UFS_MAXNAMLEN; return (0); } static bool sync_doupdate(struct inode *ip) { return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) != 0); } /* * For a lazy sync, we only care about access times, quotas and the * superblock. Other filesystem changes are already converted to * cylinder group blocks or inode blocks updates and are written to * disk by syncer. */ static int ffs_sync_lazy(mp) struct mount *mp; { struct vnode *mvp, *vp; struct inode *ip; struct thread *td; int allerror, error; allerror = 0; td = curthread; if ((mp->mnt_flag & MNT_NOATIME) != 0) goto qupdate; MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); /* * The IN_ACCESS flag is converted to IN_MODIFIED by * ufs_close() and ufs_getattr() by the calls to * ufs_itimes_locked(), without subsequent UFS_UPDATE(). * Test also all the other timestamp flags too, to pick up * any other cases that could be missed. */ if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td)) != 0) continue; if (sync_doupdate(ip)) error = ffs_update(vp, 0); if (error != 0) allerror = error; vput(vp); } qupdate: #ifdef QUOTA qsync(mp); #endif if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 && (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0) allerror = error; return (allerror); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked busy using * vfs_busy(). */ static int ffs_sync(mp, waitfor) struct mount *mp; int waitfor; { struct vnode *mvp, *vp, *devvp; struct thread *td; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, count, lockreq, allerror = 0; int suspend; int suspended; int secondary_writes; int secondary_accwrites; int softdep_deps; int softdep_accdeps; struct bufobj *bo; suspend = 0; suspended = 0; td = curthread; fs = ump->um_fs; if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0) panic("%s: ffs_sync: modification on read-only filesystem", fs->fs_fsmnt); if (waitfor == MNT_LAZY) { if (!rebooting) return (ffs_sync_lazy(mp)); waitfor = MNT_NOWAIT; } /* * Write back each (modified) inode. */ lockreq = LK_EXCLUSIVE | LK_NOWAIT; if (waitfor == MNT_SUSPEND) { suspend = 1; waitfor = MNT_WAIT; } if (waitfor == MNT_WAIT) lockreq = LK_EXCLUSIVE; lockreq |= LK_INTERLOCK | LK_SLEEPFAIL; loop: /* Grab snapshot of secondary write counts */ MNT_ILOCK(mp); secondary_writes = mp->mnt_secondary_writes; secondary_accwrites = mp->mnt_secondary_accwrites; MNT_IUNLOCK(mp); /* Grab snapshot of softdep dependency counts */ softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps); MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Depend on the vnode interlock to keep things stable enough * for a quick test. Since there might be hundreds of * thousands of vnodes, we cannot afford even a subroutine * call unless there's a good chance that we have work to do. */ if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && vp->v_bufobj.bo_dirty.bv_cnt == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, lockreq, td)) != 0) { if (error == ENOENT || error == ENOLCK) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0) allerror = error; vput(vp); } /* * Force stale filesystem control information to be flushed. */ if (waitfor == MNT_WAIT || rebooting) { if ((error = softdep_flushworklist(ump->um_mountp, &count, td))) allerror = error; /* Flushed work items may create new vnodes to clean */ if (allerror == 0 && count) goto loop; } #ifdef QUOTA qsync(mp); #endif devvp = ump->um_devvp; bo = &devvp->v_bufobj; BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(devvp, waitfor, td); VOP_UNLOCK(devvp, 0); if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN)) error = ffs_sbupdate(ump, waitfor, 0); if (error != 0) allerror = error; if (allerror == 0 && waitfor == MNT_WAIT) goto loop; } else if (suspend != 0) { if (softdep_check_suspend(mp, devvp, softdep_deps, softdep_accdeps, secondary_writes, secondary_accwrites) != 0) { MNT_IUNLOCK(mp); goto loop; /* More work needed */ } mtx_assert(MNT_MTX(mp), MA_OWNED); mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; MNT_IUNLOCK(mp); suspended = 1; } else BO_UNLOCK(bo); /* * Write back modified superblock. */ if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor, suspended)) != 0) allerror = error; return (allerror); } int ffs_vget(mp, ino, flags, vpp) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; { return (ffs_vgetf(mp, ino, flags, vpp, 0)); } int ffs_vgetf(mp, ino, flags, vpp, ffs_flags) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; int ffs_flags; { struct fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; int error; error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); /* * We must promote to an exclusive lock for vnode creation. This * can happen if lookup is passed LOCKSHARED. */ if ((flags & LK_TYPE_MASK) == LK_SHARED) { flags &= ~LK_TYPE_MASK; flags |= LK_EXCLUSIVE; } /* * We do not lock vnode creation as it is believed to be too * expensive for such rare case as simultaneous creation of vnode * for same ino by different processes. We just allow them to race * and check later to decide who wins. Let the race begin! */ ump = VFSTOUFS(mp); fs = ump->um_fs; ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO); /* Allocate a new vnode/inode. */ error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ? &ffs_vnodeops1 : &ffs_vnodeops2, &vp); if (error) { *vpp = NULL; uma_zfree(uma_inode, ip); return (error); } /* * FFS supports recursive locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); VN_LOCK_AREC(vp); vp->v_data = ip; vp->v_bufobj.bo_bsize = fs->fs_bsize; ip->i_vnode = vp; ip->i_ump = ump; ip->i_number = ino; ip->i_ea_refs = 0; ip->i_nextclustercg = -1; ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2; #ifdef QUOTA { int i; for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; } #endif if (ffs_flags & FFSV_FORCEINSMQ) vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) { uma_zfree(uma_inode, ip); *vpp = NULL; return (error); } vp->v_vflag &= ~VV_FORCEINSMQ; error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ brelse(bp); vput(vp); *vpp = NULL; return (error); } if (I_IS_UFS1(ip)) ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK); else ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK); ffs_load_inode(bp, ip, fs, ino); if (DOINGSOFTDEP(vp)) softdep_load_inodeblock(ip); else ip->i_effnlink = ip->i_nlink; bqrelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2, &vp); if (error) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization. */ if (vp->v_type != VFIFO) { /* FFS supports shared locking for all files except fifos. */ VN_LOCK_ASHARE(vp); } /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { while (ip->i_gen == 0) ip->i_gen = arc4random(); if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { ip->i_flag |= IN_MODIFIED; DIP_SET(ip, i_gen, ip->i_gen); } } #ifdef MAC if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) { /* * If this vnode is already allocated, and we're running * multi-label, attempt to perform a label association * from the extended attributes on the inode. */ error = mac_vnode_associate_extattr(mp, vp); if (error) { /* ufs_inactive will release ip->i_devvp ref. */ vput(vp); *vpp = NULL; return (error); } } #endif *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - for UFS2 check that the inode number is initialized * - call ffs_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ static int ffs_fhtovp(mp, fhp, flags, vpp) struct mount *mp; struct fid *fhp; int flags; struct vnode **vpp; { struct ufid *ufhp; struct ufsmount *ump; struct fs *fs; struct cg *cgp; struct buf *bp; ino_t ino; u_int cg; int error; ufhp = (struct ufid *)fhp; ino = ufhp->ufid_ino; ump = VFSTOUFS(mp); fs = ump->um_fs; if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) return (ESTALE); /* * Need to check if inode is initialized because UFS2 does lazy * initialization and nfs_fhtovp can offer arbitrary inode numbers. */ if (fs->fs_magic != FS_UFS2_MAGIC) return (ufs_fhtovp(mp, ufhp, flags, vpp)); cg = ino_to_cg(fs, ino); if ((error = ffs_getcg(fs, ump->um_devvp, cg, &bp, &cgp)) != 0) return (error); if (ino >= cg * fs->fs_ipg + cgp->cg_initediblk) { brelse(bp); return (ESTALE); } brelse(bp); return (ufs_fhtovp(mp, ufhp, flags, vpp)); } /* * Initialize the filesystem. */ static int ffs_init(vfsp) struct vfsconf *vfsp; { ffs_susp_initialize(); softdep_initialize(); return (ufs_init(vfsp)); } /* * Undo the work of ffs_init(). */ static int ffs_uninit(vfsp) struct vfsconf *vfsp; { int ret; ret = ufs_uninit(vfsp); softdep_uninitialize(); ffs_susp_uninitialize(); return (ret); } /* + * Structure used to pass information from ffs_sbupdate to its + * helper routine ffs_use_bwrite. + */ +struct devfd { + struct ufsmount *ump; + struct buf *sbbp; + int waitfor; + int suspended; + int error; +}; + +/* * Write a superblock and associated information back to disk. */ int ffs_sbupdate(ump, waitfor, suspended) struct ufsmount *ump; int waitfor; int suspended; { - struct fs *fs = ump->um_fs; + struct fs *fs; struct buf *sbbp; - struct buf *bp; - int blks; - void *space; - int i, size, error, allerror = 0; + struct devfd devfd; + fs = ump->um_fs; if (fs->fs_ronly == 1 && (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0) panic("ffs_sbupdate: write read-only filesystem"); /* * We use the superblock's buf to serialize calls to ffs_sbupdate(). */ sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize, 0, 0, 0); /* - * First write back the summary information. + * Initialize info needed for write function. */ - blks = howmany(fs->fs_cssize, fs->fs_fsize); - space = fs->fs_csp; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - bp = getblk(ump->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), - size, 0, 0, 0); - bcopy(space, bp->b_data, (u_int)size); - space = (char *)space + size; - if (suspended) + devfd.ump = ump; + devfd.sbbp = sbbp; + devfd.waitfor = waitfor; + devfd.suspended = suspended; + devfd.error = 0; + return (ffs_sbput(&devfd, fs, fs->fs_sblockloc, ffs_use_bwrite)); +} + +/* + * Write function for use by filesystem-layer routines. + */ +static int +ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size) +{ + struct devfd *devfdp; + struct ufsmount *ump; + struct buf *bp; + struct fs *fs; + int error; + + devfdp = devfd; + ump = devfdp->ump; + fs = ump->um_fs; + /* + * Writing the superblock summary information. + */ + if (loc != fs->fs_sblockloc) { + bp = getblk(ump->um_devvp, btodb(loc), size, 0, 0, 0); + bcopy(buf, bp->b_data, (u_int)size); + if (devfdp->suspended) bp->b_flags |= B_VALIDSUSPWRT; - if (waitfor != MNT_WAIT) + if (devfdp->waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) - allerror = error; + devfdp->error = error; + return (0); } /* - * Now write back the superblock itself. If any errors occurred - * up to this point, then fail so that the superblock avoids - * being written out as clean. + * Writing the superblock itself. We need to do special checks for it. */ - if (allerror) { - brelse(sbbp); - return (allerror); + bp = devfdp->sbbp; + if (devfdp->error != 0) { + brelse(bp); + return (devfdp->error); } - bp = sbbp; if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 && (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1); fs->fs_sblockloc = SBLOCK_UFS1; } if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 && (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2); fs->fs_sblockloc = SBLOCK_UFS2; } - fs->fs_fmod = 0; - fs->fs_time = time_second; if (MOUNTEDSOFTDEP(ump->um_mountp)) softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp); bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); ffs_oldfscompat_write((struct fs *)bp->b_data, ump); - if (suspended) + if (devfdp->suspended) bp->b_flags |= B_VALIDSUSPWRT; - if (waitfor != MNT_WAIT) + if (devfdp->waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) - allerror = error; - return (allerror); + devfdp->error = error; + return (devfdp->error); } static int ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname) { #ifdef UFS_EXTATTR return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace, attrname)); #else return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)); #endif } static void ffs_ifree(struct ufsmount *ump, struct inode *ip) { if (ump->um_fstype == UFS1 && ip->i_din1 != NULL) uma_zfree(uma_ufs1, ip->i_din1); else if (ip->i_din2 != NULL) uma_zfree(uma_ufs2, ip->i_din2); uma_zfree(uma_inode, ip); } static int dobkgrdwrite = 1; SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0, "Do background writes (honoring the BV_BKGRDWRITE flag)?"); /* * Complete a background write started from bwrite. */ static void ffs_backgroundwritedone(struct buf *bp) { struct bufobj *bufobj; struct buf *origbp; /* * Find the original buffer that we are writing. */ bufobj = bp->b_bufobj; BO_LOCK(bufobj); if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL) panic("backgroundwritedone: lost buffer"); /* * We should mark the cylinder group buffer origbp as * dirty, to not loose the failed write. */ if ((bp->b_ioflags & BIO_ERROR) != 0) origbp->b_vflags |= BV_BKGRDERR; BO_UNLOCK(bufobj); /* * Process dependencies then return any unfinished ones. */ if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0) buf_complete(bp); #ifdef SOFTUPDATES if (!LIST_EMPTY(&bp->b_dep)) softdep_move_dependencies(bp, origbp); #endif /* * This buffer is marked B_NOCACHE so when it is released * by biodone it will be tossed. */ bp->b_flags |= B_NOCACHE; bp->b_flags &= ~B_CACHE; pbrelvp(bp); /* * Prevent brelse() from trying to keep and re-dirtying bp on * errors. It causes b_bufobj dereference in * bdirty()/reassignbuf(), and b_bufobj was cleared in * pbrelvp() above. */ if ((bp->b_ioflags & BIO_ERROR) != 0) bp->b_flags |= B_INVAL; bufdone(bp); BO_LOCK(bufobj); /* * Clear the BV_BKGRDINPROG flag in the original buffer * and awaken it if it is waiting for the write to complete. * If BV_BKGRDINPROG is not set in the original buffer it must * have been released and re-instantiated - which is not legal. */ KASSERT((origbp->b_vflags & BV_BKGRDINPROG), ("backgroundwritedone: lost buffer2")); origbp->b_vflags &= ~BV_BKGRDINPROG; if (origbp->b_vflags & BV_BKGRDWAIT) { origbp->b_vflags &= ~BV_BKGRDWAIT; wakeup(&origbp->b_xflags); } BO_UNLOCK(bufobj); } /* * Write, release buffer on completion. (Done by iodone * if async). Do not bother writing anything if the buffer * is invalid. * * Note that we set B_CACHE here, indicating that buffer is * fully valid and thus cacheable. This is true even of NFS * now so we set it generally. This could be set either here * or in biodone() since the I/O is synchronous. We put it * here. */ static int ffs_bufwrite(struct buf *bp) { struct buf *newbp; CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); if (bp->b_flags & B_INVAL) { brelse(bp); return (0); } if (!BUF_ISLOCKED(bp)) panic("bufwrite: buffer is not busy???"); /* * If a background write is already in progress, delay * writing this block if it is asynchronous. Otherwise * wait for the background write to complete. */ BO_LOCK(bp->b_bufobj); if (bp->b_vflags & BV_BKGRDINPROG) { if (bp->b_flags & B_ASYNC) { BO_UNLOCK(bp->b_bufobj); bdwrite(bp); return (0); } bp->b_vflags |= BV_BKGRDWAIT; msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO, "bwrbg", 0); if (bp->b_vflags & BV_BKGRDINPROG) panic("bufwrite: still writing"); } bp->b_vflags &= ~BV_BKGRDERR; BO_UNLOCK(bp->b_bufobj); /* * If this buffer is marked for background writing and we * do not have to wait for it, make a copy and write the * copy so as to leave this buffer ready for further use. * * This optimization eats a lot of memory. If we have a page * or buffer shortfall we can't do it. */ if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) && (bp->b_flags & B_ASYNC) && !vm_page_count_severe() && !buf_dirty_count_severe()) { KASSERT(bp->b_iodone == NULL, ("bufwrite: needs chained iodone (%p)", bp->b_iodone)); /* get a new block */ newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD); if (newbp == NULL) goto normal_write; KASSERT(buf_mapped(bp), ("Unmapped cg")); memcpy(newbp->b_data, bp->b_data, bp->b_bufsize); BO_LOCK(bp->b_bufobj); bp->b_vflags |= BV_BKGRDINPROG; BO_UNLOCK(bp->b_bufobj); newbp->b_xflags |= (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER; newbp->b_lblkno = bp->b_lblkno; newbp->b_blkno = bp->b_blkno; newbp->b_offset = bp->b_offset; newbp->b_iodone = ffs_backgroundwritedone; newbp->b_flags |= B_ASYNC; newbp->b_flags &= ~B_INVAL; pbgetvp(bp->b_vp, newbp); #ifdef SOFTUPDATES /* * Move over the dependencies. If there are rollbacks, * leave the parent buffer dirtied as it will need to * be written again. */ if (LIST_EMPTY(&bp->b_dep) || softdep_move_dependencies(bp, newbp) == 0) bundirty(bp); #else bundirty(bp); #endif /* * Initiate write on the copy, release the original. The * BKGRDINPROG flag prevents it from going away until * the background write completes. */ bqrelse(bp); bp = newbp; } else /* Mark the buffer clean */ bundirty(bp); /* Let the normal bufwrite do the rest for us */ normal_write: return (bufwrite(bp)); } static void ffs_geom_strategy(struct bufobj *bo, struct buf *bp) { struct vnode *vp; struct buf *tbp; int error, nocopy; vp = bo2vnode(bo); if (bp->b_iocmd == BIO_WRITE) { if ((bp->b_flags & B_VALIDSUSPWRT) == 0 && bp->b_vp != NULL && bp->b_vp->v_mount != NULL && (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) panic("ffs_geom_strategy: bad I/O"); nocopy = bp->b_flags & B_NOCOPY; bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY); if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 && vp->v_rdev->si_snapdata != NULL) { if ((bp->b_flags & B_CLUSTER) != 0) { runningbufwakeup(bp); TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, b_cluster.cluster_entry) { error = ffs_copyonwrite(vp, tbp); if (error != 0 && error != EOPNOTSUPP) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } } bp->b_runningbufspace = bp->b_bufsize; atomic_add_long(&runningbufspace, bp->b_runningbufspace); } else { error = ffs_copyonwrite(vp, bp); if (error != 0 && error != EOPNOTSUPP) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } } } #ifdef SOFTUPDATES if ((bp->b_flags & B_CLUSTER) != 0) { TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, b_cluster.cluster_entry) { if (!LIST_EMPTY(&tbp->b_dep)) buf_start(tbp); } } else { if (!LIST_EMPTY(&bp->b_dep)) buf_start(bp); } #endif /* * Check for metadata that needs check-hashes and update them. */ switch (bp->b_xflags & BX_FSPRIV) { case BX_CYLGRP: ((struct cg *)bp->b_data)->cg_ckhash = 0; ((struct cg *)bp->b_data)->cg_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount); break; case BX_SUPERBLOCK: case BX_INODE: case BX_INDIR: case BX_DIR: printf("Check-hash write is unimplemented!!!\n"); break; case 0: break; default: printf("multiple buffer types 0x%b\n", (u_int)(bp->b_xflags & BX_FSPRIV), PRINT_UFS_BUF_XFLAGS); break; } } g_vfs_strategy(bo, bp); } int ffs_own_mount(const struct mount *mp) { if (mp->mnt_op == &ufs_vfsops) return (1); return (0); } #ifdef DDB #ifdef SOFTUPDATES /* defined in ffs_softdep.c */ extern void db_print_ffs(struct ufsmount *ump); DB_SHOW_COMMAND(ffs, db_show_ffs) { struct mount *mp; struct ufsmount *ump; if (have_addr) { ump = VFSTOUFS((struct mount *)addr); db_print_ffs(ump); return; } TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name)) db_print_ffs(VFSTOUFS(mp)); } } #endif /* SOFTUPDATES */ #endif /* DDB */ Index: head/sys/ufs/ffs/fs.h =================================================================== --- head/sys/ufs/ffs/fs.h (revision 328425) +++ head/sys/ufs/ffs/fs.h (revision 328426) @@ -1,820 +1,821 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fs.h 8.13 (Berkeley) 3/21/95 * $FreeBSD$ */ #ifndef _UFS_FFS_FS_H_ #define _UFS_FFS_FS_H_ #include #include /* * Each disk drive contains some number of filesystems. * A filesystem consists of a number of cylinder groups. * Each cylinder group has inodes and data. * * A filesystem is described by its super-block, which in turn * describes the cylinder groups. The super-block is critical * data and is replicated in each cylinder group to protect against * catastrophic loss. This is done at `newfs' time and the critical * super-block data does not change, so the copies need not be * referenced further unless disaster strikes. * * For filesystem fs, the offsets of the various blocks of interest * are given in the super block as: * [fs->fs_sblkno] Super-block * [fs->fs_cblkno] Cylinder group block * [fs->fs_iblkno] Inode blocks * [fs->fs_dblkno] Data blocks * The beginning of cylinder group cg in fs, is given by * the ``cgbase(fs, cg)'' macro. * * Depending on the architecture and the media, the superblock may * reside in any one of four places. For tiny media where every block * counts, it is placed at the very front of the partition. Historically, * UFS1 placed it 8K from the front to leave room for the disk label and * a small bootstrap. For UFS2 it got moved to 64K from the front to leave * room for the disk label and a bigger bootstrap, and for really piggy * systems we check at 256K from the front if the first three fail. In * all cases the size of the superblock will be SBLOCKSIZE. All values are * given in byte-offset form, so they do not imply a sector size. The * SBLOCKSEARCH specifies the order in which the locations should be searched. */ #define SBLOCK_FLOPPY 0 #define SBLOCK_UFS1 8192 #define SBLOCK_UFS2 65536 #define SBLOCK_PIGGY 262144 #define SBLOCKSIZE 8192 #define SBLOCKSEARCH \ { SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 } /* * Max number of fragments per block. This value is NOT tweakable. */ #define MAXFRAG 8 /* * Addresses stored in inodes are capable of addressing fragments * of `blocks'. File system blocks of at most size MAXBSIZE can * be optionally broken into 2, 4, or 8 pieces, each of which is * addressable; these pieces may be DEV_BSIZE, or some multiple of * a DEV_BSIZE unit. * * Large files consist of exclusively large data blocks. To avoid * undue wasted disk space, the last data block of a small file may be * allocated as only as many fragments of a large block as are * necessary. The filesystem format retains only a single pointer * to such a fragment, which is a piece of a single large block that * has been divided. The size of such a fragment is determinable from * information in the inode, using the ``blksize(fs, ip, lbn)'' macro. * * The filesystem records space availability at the fragment level; * to determine block availability, aligned fragments are examined. */ /* * MINBSIZE is the smallest allowable block size. * In order to insure that it is possible to create files of size * 2^32 with only two levels of indirection, MINBSIZE is set to 4096. * MINBSIZE must be big enough to hold a cylinder group block, * thus changes to (struct cg) must keep its size within MINBSIZE. * Note that super blocks are always of size SBLOCKSIZE, * and that both SBLOCKSIZE and MAXBSIZE must be >= MINBSIZE. */ #define MINBSIZE 4096 /* * The path name on which the filesystem is mounted is maintained * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in * the super block for this name. */ #define MAXMNTLEN 468 /* * The volume name for this filesystem is maintained in fs_volname. * MAXVOLLEN defines the length of the buffer allocated. */ #define MAXVOLLEN 32 /* * There is a 128-byte region in the superblock reserved for in-core * pointers to summary information. Originally this included an array * of pointers to blocks of struct csum; now there are just a few * pointers and the remaining space is padded with fs_ocsp[]. * * NOCSPTRS determines the size of this padding. One pointer (fs_csp) * is taken away to point to a contiguous array of struct csum for * all cylinder groups; a second (fs_maxcluster) points to an array * of cluster sizes that is computed as cylinder groups are inspected, * and the third points to an array that tracks the creation of new * directories. A fourth pointer, fs_active, is used when creating * snapshots; it points to a bitmap of cylinder groups for which the * free-block bitmap has changed since the snapshot operation began. */ #define NOCSPTRS ((128 / sizeof(void *)) - 4) /* * A summary of contiguous blocks of various sizes is maintained * in each cylinder group. Normally this is set by the initial * value of fs_maxcontig. To conserve space, a maximum summary size * is set by FS_MAXCONTIG. */ #define FS_MAXCONTIG 16 /* * MINFREE gives the minimum acceptable percentage of filesystem * blocks which may be free. If the freelist drops below this level * only the superuser may continue to allocate blocks. This may * be set to 0 if no reserve of free blocks is deemed necessary, * however throughput drops by fifty percent if the filesystem * is run at between 95% and 100% full; thus the minimum default * value of fs_minfree is 5%. However, to get good clustering * performance, 10% is a better choice. hence we use 10% as our * default value. With 10% free space, fragmentation is not a * problem, so we choose to optimize for time. */ #define MINFREE 8 #define DEFAULTOPT FS_OPTTIME /* * Grigoriy Orlov has done some extensive work to fine * tune the layout preferences for directories within a filesystem. * His algorithm can be tuned by adjusting the following parameters * which tell the system the average file size and the average number * of files per directory. These defaults are well selected for typical * filesystems, but may need to be tuned for odd cases like filesystems * being used for squid caches or news spools. */ #define AVFILESIZ 16384 /* expected average file size */ #define AFPDIR 64 /* expected number of files per directory */ /* * The maximum number of snapshot nodes that can be associated * with each filesystem. This limit affects only the number of * snapshot files that can be recorded within the superblock so * that they can be found when the filesystem is mounted. However, * maintaining too many will slow the filesystem performance, so * having this limit is a good idea. */ #define FSMAXSNAP 20 /* * Used to identify special blocks in snapshots: * * BLK_NOCOPY - A block that was unallocated at the time the snapshot * was taken, hence does not need to be copied when written. * BLK_SNAP - A block held by another snapshot that is not needed by this * snapshot. When the other snapshot is freed, the BLK_SNAP entries * are converted to BLK_NOCOPY. These are needed to allow fsck to * identify blocks that are in use by other snapshots (which are * expunged from this snapshot). */ #define BLK_NOCOPY ((ufs2_daddr_t)(1)) #define BLK_SNAP ((ufs2_daddr_t)(2)) /* * Sysctl values for the fast filesystem. */ #define FFS_ADJ_REFCNT 1 /* adjust inode reference count */ #define FFS_ADJ_BLKCNT 2 /* adjust inode used block count */ #define FFS_BLK_FREE 3 /* free range of blocks in map */ #define FFS_DIR_FREE 4 /* free specified dir inodes in map */ #define FFS_FILE_FREE 5 /* free specified file inodes in map */ #define FFS_SET_FLAGS 6 /* set filesystem flags */ #define FFS_ADJ_NDIR 7 /* adjust number of directories */ #define FFS_ADJ_NBFREE 8 /* adjust number of free blocks */ #define FFS_ADJ_NIFREE 9 /* adjust number of free inodes */ #define FFS_ADJ_NFFREE 10 /* adjust number of free frags */ #define FFS_ADJ_NUMCLUSTERS 11 /* adjust number of free clusters */ #define FFS_SET_CWD 12 /* set current directory */ #define FFS_SET_DOTDOT 13 /* set inode number for ".." */ #define FFS_UNLINK 14 /* remove a name in the filesystem */ #define FFS_SET_INODE 15 /* update an on-disk inode */ #define FFS_SET_BUFOUTPUT 16 /* set buffered writing on descriptor */ #define FFS_MAXID 16 /* number of valid ffs ids */ /* * Command structure passed in to the filesystem to adjust filesystem values. */ #define FFS_CMD_VERSION 0x19790518 /* version ID */ struct fsck_cmd { int32_t version; /* version of command structure */ int32_t handle; /* reference to filesystem to be changed */ int64_t value; /* inode or block number to be affected */ int64_t size; /* amount or range to be adjusted */ int64_t spare; /* reserved for future use */ }; /* * A recovery structure placed at the end of the boot block area by newfs * that can be used by fsck to search for alternate superblocks. */ struct fsrecovery { int32_t fsr_magic; /* magic number */ int32_t fsr_fsbtodb; /* fsbtodb and dbtofsb shift constant */ int32_t fsr_sblkno; /* offset of super-block in filesys */ int32_t fsr_fpg; /* blocks per group * fs_frag */ u_int32_t fsr_ncg; /* number of cylinder groups */ }; /* * Per cylinder group information; summarized in blocks allocated * from first cylinder group data blocks. These blocks have to be * read in from fs_csaddr (size fs_cssize) in addition to the * super block. */ struct csum { int32_t cs_ndir; /* number of directories */ int32_t cs_nbfree; /* number of free blocks */ int32_t cs_nifree; /* number of free inodes */ int32_t cs_nffree; /* number of free frags */ }; struct csum_total { int64_t cs_ndir; /* number of directories */ int64_t cs_nbfree; /* number of free blocks */ int64_t cs_nifree; /* number of free inodes */ int64_t cs_nffree; /* number of free frags */ int64_t cs_numclusters; /* number of free clusters */ int64_t cs_spare[3]; /* future expansion */ }; /* * Super block for an FFS filesystem. */ struct fs { int32_t fs_firstfield; /* historic filesystem linked list, */ int32_t fs_unused_1; /* used for incore super blocks */ int32_t fs_sblkno; /* offset of super-block in filesys */ int32_t fs_cblkno; /* offset of cyl-block in filesys */ int32_t fs_iblkno; /* offset of inode-blocks in filesys */ int32_t fs_dblkno; /* offset of first data after cg */ int32_t fs_old_cgoffset; /* cylinder group offset in cylinder */ int32_t fs_old_cgmask; /* used to calc mod fs_ntrak */ int32_t fs_old_time; /* last time written */ int32_t fs_old_size; /* number of blocks in fs */ int32_t fs_old_dsize; /* number of data blocks in fs */ u_int32_t fs_ncg; /* number of cylinder groups */ int32_t fs_bsize; /* size of basic blocks in fs */ int32_t fs_fsize; /* size of frag blocks in fs */ int32_t fs_frag; /* number of frags in a block in fs */ /* these are configuration parameters */ int32_t fs_minfree; /* minimum percentage of free blocks */ int32_t fs_old_rotdelay; /* num of ms for optimal next block */ int32_t fs_old_rps; /* disk revolutions per second */ /* these fields can be computed from the others */ int32_t fs_bmask; /* ``blkoff'' calc of blk offsets */ int32_t fs_fmask; /* ``fragoff'' calc of frag offsets */ int32_t fs_bshift; /* ``lblkno'' calc of logical blkno */ int32_t fs_fshift; /* ``numfrags'' calc number of frags */ /* these are configuration parameters */ int32_t fs_maxcontig; /* max number of contiguous blks */ int32_t fs_maxbpg; /* max number of blks per cyl group */ /* these fields can be computed from the others */ int32_t fs_fragshift; /* block to frag shift */ int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ int32_t fs_sbsize; /* actual size of super block */ int32_t fs_spare1[2]; /* old fs_csmask */ /* old fs_csshift */ int32_t fs_nindir; /* value of NINDIR */ u_int32_t fs_inopb; /* value of INOPB */ int32_t fs_old_nspf; /* value of NSPF */ /* yet another configuration parameter */ int32_t fs_optim; /* optimization preference, see below */ int32_t fs_old_npsect; /* # sectors/track including spares */ int32_t fs_old_interleave; /* hardware sector interleave */ int32_t fs_old_trackskew; /* sector 0 skew, per track */ int32_t fs_id[2]; /* unique filesystem id */ /* sizes determined by number of cylinder groups and their sizes */ int32_t fs_old_csaddr; /* blk addr of cyl grp summary area */ int32_t fs_cssize; /* size of cyl grp summary area */ int32_t fs_cgsize; /* cylinder group size */ int32_t fs_spare2; /* old fs_ntrak */ int32_t fs_old_nsect; /* sectors per track */ int32_t fs_old_spc; /* sectors per cylinder */ int32_t fs_old_ncyl; /* cylinders in filesystem */ int32_t fs_old_cpg; /* cylinders per group */ u_int32_t fs_ipg; /* inodes per group */ int32_t fs_fpg; /* blocks per group * fs_frag */ /* this data must be re-computed after crashes */ struct csum fs_old_cstotal; /* cylinder summary information */ /* these fields are cleared at mount time */ int8_t fs_fmod; /* super block modified flag */ int8_t fs_clean; /* filesystem is clean flag */ int8_t fs_ronly; /* mounted read-only flag */ int8_t fs_old_flags; /* old FS_ flags */ u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ u_char fs_volname[MAXVOLLEN]; /* volume name */ u_int64_t fs_swuid; /* system-wide uid */ int32_t fs_pad; /* due to alignment of fs_swuid */ /* these fields retain the current block allocation info */ int32_t fs_cgrotor; /* last cg searched */ void *fs_ocsp[NOCSPTRS]; /* padding; was list of fs_cs buffers */ u_int8_t *fs_contigdirs; /* (u) # of contig. allocated dirs */ struct csum *fs_csp; /* (u) cg summary info buffer */ int32_t *fs_maxcluster; /* (u) max cluster in each cyl group */ u_int *fs_active; /* (u) used by snapshots to track fs */ int32_t fs_old_cpc; /* cyl per cycle in postbl */ int32_t fs_maxbsize; /* maximum blocking factor permitted */ int64_t fs_unrefs; /* number of unreferenced inodes */ int64_t fs_providersize; /* size of underlying GEOM provider */ int64_t fs_metaspace; /* size of area reserved for metadata */ - int64_t fs_sparecon64[14]; /* old rotation block list head */ + int64_t fs_sparecon64[13]; /* old rotation block list head */ + int64_t fs_sblockactualloc; /* byte offset of this superblock */ int64_t fs_sblockloc; /* byte offset of standard superblock */ struct csum_total fs_cstotal; /* (u) cylinder summary information */ ufs_time_t fs_time; /* last time written */ int64_t fs_size; /* number of blocks in fs */ int64_t fs_dsize; /* number of data blocks in fs */ ufs2_daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ int64_t fs_pendingblocks; /* (u) blocks being freed */ u_int32_t fs_pendinginodes; /* (u) inodes being freed */ uint32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */ u_int32_t fs_avgfilesize; /* expected average file size */ u_int32_t fs_avgfpdir; /* expected # of files per directory */ int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */ ufs_time_t fs_mtime; /* Last mount or fsck time. */ int32_t fs_sujfree; /* SUJ free list */ int32_t fs_sparecon32[22]; /* reserved for future constants */ u_int32_t fs_metackhash; /* metadata check-hash, see CK_ below */ int32_t fs_flags; /* see FS_ flags below */ int32_t fs_contigsumsize; /* size of cluster summary array */ int32_t fs_maxsymlinklen; /* max length of an internal symlink */ int32_t fs_old_inodefmt; /* format of on-disk inodes */ u_int64_t fs_maxfilesize; /* maximum representable file size */ int64_t fs_qbmask; /* ~fs_bmask for use with 64-bit size */ int64_t fs_qfmask; /* ~fs_fmask for use with 64-bit size */ int32_t fs_state; /* validate fs_clean field */ int32_t fs_old_postblformat; /* format of positional layout tables */ int32_t fs_old_nrpos; /* number of rotational positions */ int32_t fs_spare5[2]; /* old fs_postbloff */ /* old fs_rotbloff */ int32_t fs_magic; /* magic number */ }; /* Sanity checking. */ #ifdef CTASSERT CTASSERT(sizeof(struct fs) == 1376); #endif /* * Filesystem identification */ #define FS_UFS1_MAGIC 0x011954 /* UFS1 fast filesystem magic number */ #define FS_UFS2_MAGIC 0x19540119 /* UFS2 fast filesystem magic number */ #define FS_BAD_MAGIC 0x19960408 /* UFS incomplete newfs magic number */ #define FS_42INODEFMT -1 /* 4.2BSD inode format */ #define FS_44INODEFMT 2 /* 4.4BSD inode format */ /* * Preference for optimization. */ #define FS_OPTTIME 0 /* minimize allocation time */ #define FS_OPTSPACE 1 /* minimize disk fragmentation */ /* * Filesystem flags. * * The FS_UNCLEAN flag is set by the kernel when the filesystem was * mounted with fs_clean set to zero. The FS_DOSOFTDEP flag indicates * that the filesystem should be managed by the soft updates code. * Note that the FS_NEEDSFSCK flag is set and cleared only by the * fsck utility. It is set when background fsck finds an unexpected * inconsistency which requires a traditional foreground fsck to be * run. Such inconsistencies should only be found after an uncorrectable * disk error. A foreground fsck will clear the FS_NEEDSFSCK flag when * it has successfully cleaned up the filesystem. The kernel uses this * flag to enforce that inconsistent filesystems be mounted read-only. * The FS_INDEXDIRS flag when set indicates that the kernel maintains * on-disk auxiliary indexes (such as B-trees) for speeding directory * accesses. Kernels that do not support auxiliary indices clear the * flag to indicate that the indices need to be rebuilt (by fsck) before * they can be used. When a filesystem is mounted, any flags not * included in FS_SUPPORTED are cleared. This lets newer features * know that the filesystem has been run on an older version of the * filesystem and thus that data structures associated with those * features are out-of-date and need to be rebuilt. * * FS_ACLS indicates that POSIX.1e ACLs are administratively enabled * for the file system, so they should be loaded from extended attributes, * observed for access control purposes, and be administered by object * owners. FS_NFS4ACLS indicates that NFSv4 ACLs are administratively * enabled. This flag is mutually exclusive with FS_ACLS. FS_MULTILABEL * indicates that the TrustedBSD MAC Framework should attempt to back MAC * labels into extended attributes on the file system rather than maintain * a single mount label for all objects. */ #define FS_UNCLEAN 0x0001 /* filesystem not clean at mount */ #define FS_DOSOFTDEP 0x0002 /* filesystem using soft dependencies */ #define FS_NEEDSFSCK 0x0004 /* filesystem needs sync fsck before mount */ #define FS_SUJ 0x0008 /* Filesystem using softupdate journal */ #define FS_ACLS 0x0010 /* file system has POSIX.1e ACLs enabled */ #define FS_MULTILABEL 0x0020 /* file system is MAC multi-label */ #define FS_GJOURNAL 0x0040 /* gjournaled file system */ #define FS_FLAGS_UPDATED 0x0080 /* flags have been moved to new location */ #define FS_NFS4ACLS 0x0100 /* file system has NFSv4 ACLs enabled */ #define FS_INDEXDIRS 0x0200 /* kernel supports indexed directories */ #define FS_TRIM 0x0400 /* issue BIO_DELETE for deleted blocks */ #define FS_SUPPORTED 0xFFFF /* supported flags, others cleared at mount */ /* * The fs_metackhash field indicates the types of metadata check-hash * that are maintained for a filesystem. Not all filesystems check-hash * all metadata. */ #define CK_SUPERBLOCK 0x0001 /* the superblock */ #define CK_CYLGRP 0x0002 /* the cylinder groups */ #define CK_INODE 0x0004 /* inodes */ #define CK_INDIR 0x0008 /* indirect blocks */ #define CK_DIR 0x0010 /* directory contents */ /* * The BX_FSPRIV buffer b_xflags are used to track types of data in buffers. */ #define BX_SUPERBLOCK 0x00010000 /* superblock */ #define BX_CYLGRP 0x00020000 /* cylinder groups */ #define BX_INODE 0x00040000 /* inodes */ #define BX_INDIR 0x00080000 /* indirect blocks */ #define BX_DIR 0x00100000 /* directory contents */ #define PRINT_UFS_BUF_XFLAGS "\20\25dir\24indir\23inode\22cylgrp\21superblock" /* * Macros to access bits in the fs_active array. */ #define ACTIVECGNUM(fs, cg) ((fs)->fs_active[(cg) / (NBBY * sizeof(int))]) #define ACTIVECGOFF(cg) (1 << ((cg) % (NBBY * sizeof(int)))) #define ACTIVESET(fs, cg) do { \ if ((fs)->fs_active) \ ACTIVECGNUM((fs), (cg)) |= ACTIVECGOFF((cg)); \ } while (0) #define ACTIVECLEAR(fs, cg) do { \ if ((fs)->fs_active) \ ACTIVECGNUM((fs), (cg)) &= ~ACTIVECGOFF((cg)); \ } while (0) /* * The size of a cylinder group is calculated by CGSIZE. The maximum size * is limited by the fact that cylinder groups are at most one block. * Its size is derived from the size of the maps maintained in the * cylinder group and the (struct cg) size. */ #define CGSIZE(fs) \ /* base cg */ (sizeof(struct cg) + sizeof(int32_t) + \ /* old btotoff */ (fs)->fs_old_cpg * sizeof(int32_t) + \ /* old boff */ (fs)->fs_old_cpg * sizeof(u_int16_t) + \ /* inode map */ howmany((fs)->fs_ipg, NBBY) + \ /* block map */ howmany((fs)->fs_fpg, NBBY) +\ /* if present */ ((fs)->fs_contigsumsize <= 0 ? 0 : \ /* cluster sum */ (fs)->fs_contigsumsize * sizeof(int32_t) + \ /* cluster map */ howmany(fragstoblks(fs, (fs)->fs_fpg), NBBY))) /* * The minimal number of cylinder groups that should be created. */ #define MINCYLGRPS 4 /* * Convert cylinder group to base address of its global summary info. */ #define fs_cs(fs, indx) fs_csp[indx] /* * Cylinder group block for a filesystem. */ #define CG_MAGIC 0x090255 struct cg { int32_t cg_firstfield; /* historic cyl groups linked list */ int32_t cg_magic; /* magic number */ int32_t cg_old_time; /* time last written */ u_int32_t cg_cgx; /* we are the cgx'th cylinder group */ int16_t cg_old_ncyl; /* number of cyl's this cg */ int16_t cg_old_niblk; /* number of inode blocks this cg */ u_int32_t cg_ndblk; /* number of data blocks this cg */ struct csum cg_cs; /* cylinder summary information */ u_int32_t cg_rotor; /* position of last used block */ u_int32_t cg_frotor; /* position of last used frag */ u_int32_t cg_irotor; /* position of last used inode */ u_int32_t cg_frsum[MAXFRAG]; /* counts of available frags */ int32_t cg_old_btotoff; /* (int32) block totals per cylinder */ int32_t cg_old_boff; /* (u_int16) free block positions */ u_int32_t cg_iusedoff; /* (u_int8) used inode map */ u_int32_t cg_freeoff; /* (u_int8) free block map */ u_int32_t cg_nextfreeoff; /* (u_int8) next available space */ u_int32_t cg_clustersumoff; /* (u_int32) counts of avail clusters */ u_int32_t cg_clusteroff; /* (u_int8) free cluster map */ u_int32_t cg_nclusterblks; /* number of clusters this cg */ u_int32_t cg_niblk; /* number of inode blocks this cg */ u_int32_t cg_initediblk; /* last initialized inode */ u_int32_t cg_unrefs; /* number of unreferenced inodes */ int32_t cg_sparecon32[1]; /* reserved for future use */ u_int32_t cg_ckhash; /* check-hash of this cg */ ufs_time_t cg_time; /* time last written */ int64_t cg_sparecon64[3]; /* reserved for future use */ u_int8_t cg_space[1]; /* space for cylinder group maps */ /* actually longer */ }; /* * Macros for access to cylinder group array structures */ #define cg_chkmagic(cgp) ((cgp)->cg_magic == CG_MAGIC) #define cg_inosused(cgp) \ ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff)) #define cg_blksfree(cgp) \ ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff)) #define cg_clustersfree(cgp) \ ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff)) #define cg_clustersum(cgp) \ ((int32_t *)((uintptr_t)(cgp) + (cgp)->cg_clustersumoff)) /* * Turn filesystem block numbers into disk block addresses. * This maps filesystem blocks to device size blocks. */ #define fsbtodb(fs, b) ((daddr_t)(b) << (fs)->fs_fsbtodb) #define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb) /* * Cylinder group macros to locate things in cylinder groups. * They calc filesystem addresses of cylinder group data structures. */ #define cgbase(fs, c) (((ufs2_daddr_t)(fs)->fs_fpg) * (c)) #define cgdata(fs, c) (cgdmin(fs, c) + (fs)->fs_metaspace) /* data zone */ #define cgmeta(fs, c) (cgdmin(fs, c)) /* meta data */ #define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ #define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ #define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ #define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */ #define cgstart(fs, c) \ ((fs)->fs_magic == FS_UFS2_MAGIC ? cgbase(fs, c) : \ (cgbase(fs, c) + (fs)->fs_old_cgoffset * ((c) & ~((fs)->fs_old_cgmask)))) /* * Macros for handling inode numbers: * inode number to filesystem block offset. * inode number to cylinder group number. * inode number to filesystem block address. */ #define ino_to_cg(fs, x) (((ino_t)(x)) / (fs)->fs_ipg) #define ino_to_fsba(fs, x) \ ((ufs2_daddr_t)(cgimin(fs, ino_to_cg(fs, (ino_t)(x))) + \ (blkstofrags((fs), ((((ino_t)(x)) % (fs)->fs_ipg) / INOPB(fs)))))) #define ino_to_fsbo(fs, x) (((ino_t)(x)) % INOPB(fs)) /* * Give cylinder group number for a filesystem block. * Give cylinder group block number for a filesystem block. */ #define dtog(fs, d) ((d) / (fs)->fs_fpg) #define dtogd(fs, d) ((d) % (fs)->fs_fpg) /* * Extract the bits for a block from a map. * Compute the cylinder and rotational position of a cyl block addr. */ #define blkmap(fs, map, loc) \ (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag))) /* * The following macros optimize certain frequently calculated * quantities by using shifts and masks in place of divisions * modulos and multiplications. */ #define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ ((loc) & (fs)->fs_qbmask) #define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ ((loc) & (fs)->fs_qfmask) #define lfragtosize(fs, frag) /* calculates ((off_t)frag * fs->fs_fsize) */ \ (((off_t)(frag)) << (fs)->fs_fshift) #define lblktosize(fs, blk) /* calculates ((off_t)blk * fs->fs_bsize) */ \ (((off_t)(blk)) << (fs)->fs_bshift) /* Use this only when `blk' is known to be small, e.g., < UFS_NDADDR. */ #define smalllblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ ((blk) << (fs)->fs_bshift) #define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ ((loc) >> (fs)->fs_bshift) #define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ ((loc) >> (fs)->fs_fshift) #define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \ (((size) + (fs)->fs_qbmask) & (fs)->fs_bmask) #define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) #define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \ ((frags) >> (fs)->fs_fragshift) #define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \ ((blks) << (fs)->fs_fragshift) #define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \ ((fsb) & ((fs)->fs_frag - 1)) #define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \ ((fsb) &~ ((fs)->fs_frag - 1)) /* * Determine the number of available frags given a * percentage to hold in reserve. */ #define freespace(fs, percentreserved) \ (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \ (fs)->fs_cstotal.cs_nffree - \ (((off_t)((fs)->fs_dsize)) * (percentreserved) / 100)) /* * Determining the size of a file block in the filesystem. */ #define blksize(fs, ip, lbn) \ (((lbn) >= UFS_NDADDR || (ip)->i_size >= \ (uint64_t)smalllblktosize(fs, (lbn) + 1)) \ ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (ip)->i_size)))) #define sblksize(fs, size, lbn) \ (((lbn) >= UFS_NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \ ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (size))))) /* * Number of indirects in a filesystem block. */ #define NINDIR(fs) ((fs)->fs_nindir) /* * Indirect lbns are aligned on UFS_NDADDR addresses where single indirects * are the negated address of the lowest lbn reachable, double indirects * are this lbn - 1 and triple indirects are this lbn - 2. This yields * an unusual bit order to determine level. */ static inline int lbn_level(ufs_lbn_t lbn) { if (lbn >= 0) return 0; switch (lbn & 0x3) { case 0: return (0); case 1: break; case 2: return (2); case 3: return (1); default: break; } return (-1); } static inline ufs_lbn_t lbn_offset(struct fs *fs, int level) { ufs_lbn_t res; for (res = 1; level > 0; level--) res *= NINDIR(fs); return (res); } /* * Number of inodes in a secondary storage block/fragment. */ #define INOPB(fs) ((fs)->fs_inopb) #define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift) /* * Softdep journal record format. */ #define JOP_ADDREF 1 /* Add a reference to an inode. */ #define JOP_REMREF 2 /* Remove a reference from an inode. */ #define JOP_NEWBLK 3 /* Allocate a block. */ #define JOP_FREEBLK 4 /* Free a block or a tree of blocks. */ #define JOP_MVREF 5 /* Move a reference from one off to another. */ #define JOP_TRUNC 6 /* Partial truncation record. */ #define JOP_SYNC 7 /* fsync() complete record. */ #define JREC_SIZE 32 /* Record and segment header size. */ #define SUJ_MIN (4 * 1024 * 1024) /* Minimum journal size */ #define SUJ_MAX (32 * 1024 * 1024) /* Maximum journal size */ #define SUJ_FILE ".sujournal" /* Journal file name */ /* * Size of the segment record header. There is at most one for each disk * block in the journal. The segment header is followed by an array of * records. fsck depends on the first element in each record being 'op' * and the second being 'ino'. Segments may span multiple disk blocks but * the header is present on each. */ struct jsegrec { uint64_t jsr_seq; /* Our sequence number */ uint64_t jsr_oldest; /* Oldest valid sequence number */ uint16_t jsr_cnt; /* Count of valid records */ uint16_t jsr_blocks; /* Count of device bsize blocks. */ uint32_t jsr_crc; /* 32bit crc of the valid space */ ufs_time_t jsr_time; /* timestamp for mount instance */ }; /* * Reference record. Records a single link count modification. */ struct jrefrec { uint32_t jr_op; uint32_t jr_ino; uint32_t jr_parent; uint16_t jr_nlink; uint16_t jr_mode; int64_t jr_diroff; uint64_t jr_unused; }; /* * Move record. Records a reference moving within a directory block. The * nlink is unchanged but we must search both locations. */ struct jmvrec { uint32_t jm_op; uint32_t jm_ino; uint32_t jm_parent; uint16_t jm_unused; int64_t jm_oldoff; int64_t jm_newoff; }; /* * Block record. A set of frags or tree of blocks starting at an indirect are * freed or a set of frags are allocated. */ struct jblkrec { uint32_t jb_op; uint32_t jb_ino; ufs2_daddr_t jb_blkno; ufs_lbn_t jb_lbn; uint16_t jb_frags; uint16_t jb_oldfrags; uint32_t jb_unused; }; /* * Truncation record. Records a partial truncation so that it may be * completed at check time. Also used for sync records. */ struct jtrncrec { uint32_t jt_op; uint32_t jt_ino; int64_t jt_size; uint32_t jt_extsize; uint32_t jt_pad[3]; }; union jrec { struct jsegrec rec_jsegrec; struct jrefrec rec_jrefrec; struct jmvrec rec_jmvrec; struct jblkrec rec_jblkrec; struct jtrncrec rec_jtrncrec; }; #ifdef CTASSERT CTASSERT(sizeof(struct jsegrec) == JREC_SIZE); CTASSERT(sizeof(struct jrefrec) == JREC_SIZE); CTASSERT(sizeof(struct jmvrec) == JREC_SIZE); CTASSERT(sizeof(struct jblkrec) == JREC_SIZE); CTASSERT(sizeof(struct jtrncrec) == JREC_SIZE); CTASSERT(sizeof(union jrec) == JREC_SIZE); #endif extern int inside[], around[]; extern u_char *fragtbl[]; /* * IOCTLs used for filesystem write suspension. */ #define UFSSUSPEND _IOW('U', 1, fsid_t) #define UFSRESUME _IO('U', 2) #endif Index: head/usr.sbin/fstyp/Makefile =================================================================== --- head/usr.sbin/fstyp/Makefile (revision 328425) +++ head/usr.sbin/fstyp/Makefile (revision 328426) @@ -1,46 +1,46 @@ # $FreeBSD$ .include PROG= fstyp SRCS= cd9660.c exfat.c ext2fs.c fstyp.c geli.c msdosfs.c ntfs.c ufs.c .if ${MK_ZFS} != "no" SRCS += zfs.c .endif MAN= fstyp.8 WARNS?= 2 .include .if ${TARGET_ENDIANNESS} == 1234 HAS_TESTS= SUBDIR.${MK_TESTS}+= tests .endif .if ${MK_ZFS} != "no" IGNORE_PRAGMA= YES CFLAGS+= -DNEED_SOLARIS_BOOLEAN -DHAVE_ZFS CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/sys CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/head .endif CFLAGS+=-I${SRCTOP}/sys -LIBADD= geom md +LIBADD= geom md ufs .if ${MK_ZFS} != "no" LIBADD+=nvpair zfs .endif .include Index: head/usr.sbin/fstyp/ufs.c =================================================================== --- head/usr.sbin/fstyp/ufs.c (revision 328425) +++ head/usr.sbin/fstyp/ufs.c (revision 328426) @@ -1,109 +1,64 @@ /*- * Copyright (c) 2002, 2003 Gordon Tetlow * Copyright (c) 2006 Pawel Jakub Dawidek * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include +#include +#include + +#include +#include #include #include #include #include -#include -#include - #include "fstyp.h" -static const int superblocks[] = SBLOCKSEARCH; - int fstyp_ufs(FILE *fp, char *label, size_t labelsize) { - int sb, superblock; struct fs *fs; - /* - * Walk through the standard places that superblocks hide and look - * for UFS magic. If we find magic, then check that the size in the - * superblock corresponds to the size of the underlying provider. - * Finally, look for a volume label and create an appropriate - * provider based on that. - */ - for (sb = 0; (superblock = superblocks[sb]) != -1; sb++) { - fs = (struct fs *)read_buf(fp, superblock, SBLOCKSIZE); - if (fs == NULL) - continue; - /* - * Check for magic. We also need to check if file system size is equal - * to providers size, because sysinstall(8) used to bogusly put first - * partition at offset 0 instead of 16, and glabel/ufs would find file - * system on slice instead of partition. - */ -#ifdef notyet - if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_fsize > 0 && - ((pp->mediasize / fs->fs_fsize == fs->fs_old_size) || - (pp->mediasize / fs->fs_fsize == fs->fs_providersize))) { - /* Valid UFS1. */ - } else if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_fsize > 0 && - ((pp->mediasize / fs->fs_fsize == fs->fs_size) || - (pp->mediasize / fs->fs_fsize == fs->fs_providersize))) { - /* Valid UFS2. */ - } else { - g_free(fs); - continue; - } -#else - if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_fsize > 0) { - /* Valid UFS1. */ - } else if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_fsize > 0) { - /* Valid UFS2. */ - } else { - free(fs); - continue; - } -#endif - - if (fs->fs_sblockloc != superblock || fs->fs_ncg < 1 || - fs->fs_bsize < MINBSIZE || - (size_t)fs->fs_bsize < sizeof(struct fs)) { - free(fs); - continue; - } - + switch (sbget(fileno(fp), &fs, -1)) { + case 0: strlcpy(label, fs->fs_volname, labelsize); - - free(fs); return (0); + case ENOENT: + /* Cannot find file system superblock */ + return (1); + default: + /* Unable to read file system superblock */ + return (1); } - - return (1); } Index: head/usr.sbin/quot/Makefile =================================================================== --- head/usr.sbin/quot/Makefile (revision 328425) +++ head/usr.sbin/quot/Makefile (revision 328426) @@ -1,8 +1,9 @@ # $FreeBSD$ PROG= quot MAN= quot.8 +LIBADD= ufs WARNS?= 2 .include Index: head/usr.sbin/quot/quot.c =================================================================== --- head/usr.sbin/quot/quot.c (revision 328425) +++ head/usr.sbin/quot/quot.c (revision 328426) @@ -1,646 +1,631 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1991, 1994 Wolfgang Solfrank. * Copyright (C) 1991, 1994 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include /* some flags of what to do: */ static char estimate; static char count; static char unused; static void (*func)(int, struct fs *, char *); static long blocksize; static char *header; static int headerlen; static union dinode *get_inode(int, struct fs *, ino_t); static int virtualblocks(struct fs *, union dinode *); static int isfree(struct fs *, union dinode *); static void inituser(void); static void usrrehash(void); static struct user *user(uid_t); static int cmpusers(const void *, const void *); static void uses(uid_t, daddr_t, time_t); static void initfsizes(void); static void dofsizes(int, struct fs *, char *); static void douser(int, struct fs *, char *); static void donames(int, struct fs *, char *); static void usage(void); static void quot(char *, char *); /* * Original BSD quot doesn't round to number of frags/blocks, * doesn't account for indirection blocks and gets it totally * wrong if the size is a multiple of the blocksize. * The new code always counts the number of 512 byte blocks * instead of the number of kilobytes and converts them to * kByte when done (on request). * * Due to the size of modern disks, we must cast intermediate * values to 64 bits to prevent potential overflows. */ #ifdef COMPAT #define SIZE(n) (n) #else #define SIZE(n) ((int)(((quad_t)(n) * 512 + blocksize - 1)/blocksize)) #endif #define INOCNT(fs) ((fs)->fs_ipg) #define INOSZ(fs) \ (((fs)->fs_magic == FS_UFS1_MAGIC ? sizeof(struct ufs1_dinode) : \ sizeof(struct ufs2_dinode)) * INOCNT(fs)) union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; #define DIP(fs, dp, field) \ (((fs)->fs_magic == FS_UFS1_MAGIC) ? \ (dp)->dp1.field : (dp)->dp2.field) static union dinode * get_inode(int fd, struct fs *super, ino_t ino) { static caddr_t ipbuf; static struct cg *cgp; static ino_t last; static int cg; struct ufs2_dinode *di2; if (fd < 0) { /* flush cache */ if (ipbuf) { free(ipbuf); ipbuf = 0; if (super != NULL && super->fs_magic == FS_UFS2_MAGIC) { free(cgp); cgp = 0; } } return 0; } if (!ipbuf || ino < last || ino >= last + INOCNT(super)) { if (super->fs_magic == FS_UFS2_MAGIC && (!cgp || cg != ino_to_cg(super, ino))) { cg = ino_to_cg(super, ino); if (!cgp && !(cgp = malloc(super->fs_cgsize))) errx(1, "allocate cg"); if (lseek(fd, (off_t)cgtod(super, cg) << super->fs_fshift, 0) < 0) err(1, "lseek cg"); if (read(fd, cgp, super->fs_cgsize) != super->fs_cgsize) err(1, "read cg"); if (!cg_chkmagic(cgp)) errx(1, "cg has bad magic"); } if (!ipbuf && !(ipbuf = malloc(INOSZ(super)))) errx(1, "allocate inodes"); last = rounddown(ino, INOCNT(super)); if (lseek(fd, (off_t)ino_to_fsba(super, last) << super->fs_fshift, 0) < (off_t)0 || read(fd, ipbuf, INOSZ(super)) != (ssize_t)INOSZ(super)) err(1, "read inodes"); } if (super->fs_magic == FS_UFS1_MAGIC) return ((union dinode *) &((struct ufs1_dinode *)ipbuf)[ino % INOCNT(super)]); di2 = &((struct ufs2_dinode *)ipbuf)[ino % INOCNT(super)]; /* If the inode is unused, it might be unallocated too, so zero it. */ if (isclr(cg_inosused(cgp), ino % super->fs_ipg)) bzero(di2, sizeof (*di2)); return ((union dinode *)di2); } #ifdef COMPAT #define actualblocks(fs, dp) (DIP(fs, dp, di_blocks) / 2) #else #define actualblocks(fs, dp) DIP(fs, dp, di_blocks) #endif static int virtualblocks(struct fs *super, union dinode *dp) { off_t nblk, sz; sz = DIP(super, dp, di_size); #ifdef COMPAT if (lblkno(super,sz) >= UFS_NDADDR) { nblk = blkroundup(super,sz); if (sz == nblk) nblk += super->fs_bsize; } return sz / 1024; #else /* COMPAT */ if (lblkno(super,sz) >= UFS_NDADDR) { nblk = blkroundup(super,sz); sz = lblkno(super,nblk); sz = (sz - UFS_NDADDR + NINDIR(super) - 1) / NINDIR(super); while (sz > 0) { nblk += sz * super->fs_bsize; /* sz - 1 rounded up */ sz = (sz - 1 + NINDIR(super) - 1) / NINDIR(super); } } else nblk = fragroundup(super,sz); return nblk / 512; #endif /* COMPAT */ } static int isfree(struct fs *super, union dinode *dp) { #ifdef COMPAT return (DIP(super, dp, di_mode) & IFMT) == 0; #else /* COMPAT */ switch (DIP(super, dp, di_mode) & IFMT) { case IFIFO: case IFLNK: /* should check FASTSYMLINK? */ case IFDIR: case IFREG: return 0; case IFCHR: case IFBLK: case IFSOCK: case IFWHT: case 0: return 1; default: errx(1, "unknown IFMT 0%o", DIP(super, dp, di_mode) & IFMT); } #endif } static struct user { uid_t uid; char *name; daddr_t space; long count; daddr_t spc30; daddr_t spc60; daddr_t spc90; } *users; static int nusers; static void inituser(void) { int i; struct user *usr; if (!nusers) { nusers = 8; if (!(users = (struct user *)calloc(nusers,sizeof(struct user)))) errx(1, "allocate users"); } else { for (usr = users, i = nusers; --i >= 0; usr++) { usr->space = usr->spc30 = usr->spc60 = usr->spc90 = 0; usr->count = 0; } } } static void usrrehash(void) { int i; struct user *usr, *usrn; struct user *svusr; svusr = users; nusers <<= 1; if (!(users = (struct user *)calloc(nusers,sizeof(struct user)))) errx(1, "allocate users"); for (usr = svusr, i = nusers >> 1; --i >= 0; usr++) { for (usrn = users + (usr->uid&(nusers - 1)); usrn->name; usrn--) { if (usrn <= users) usrn = users + nusers; } *usrn = *usr; } } static struct user * user(uid_t uid) { struct user *usr; int i; struct passwd *pwd; while (1) { for (usr = users + (uid&(nusers - 1)), i = nusers; --i >= 0; usr--) { if (!usr->name) { usr->uid = uid; if (!(pwd = getpwuid(uid))) { if ((usr->name = (char *)malloc(7))) sprintf(usr->name,"#%d",uid); } else { if ((usr->name = (char *) malloc(strlen(pwd->pw_name) + 1))) strcpy(usr->name,pwd->pw_name); } if (!usr->name) errx(1, "allocate users"); return usr; } else if (usr->uid == uid) return usr; if (usr <= users) usr = users + nusers; } usrrehash(); } } static int cmpusers(const void *v1, const void *v2) { const struct user *u1, *u2; u1 = (const struct user *)v1; u2 = (const struct user *)v2; return u2->space - u1->space; } #define sortusers(users) (qsort((users),nusers,sizeof(struct user), \ cmpusers)) static void uses(uid_t uid, daddr_t blks, time_t act) { static time_t today; struct user *usr; if (!today) time(&today); usr = user(uid); usr->count++; usr->space += blks; if (today - act > 90L * 24L * 60L * 60L) usr->spc90 += blks; if (today - act > 60L * 24L * 60L * 60L) usr->spc60 += blks; if (today - act > 30L * 24L * 60L * 60L) usr->spc30 += blks; } #ifdef COMPAT #define FSZCNT 500 #else #define FSZCNT 512 #endif struct fsizes { struct fsizes *fsz_next; daddr_t fsz_first, fsz_last; ino_t fsz_count[FSZCNT]; daddr_t fsz_sz[FSZCNT]; } *fsizes; static void initfsizes(void) { struct fsizes *fp; int i; for (fp = fsizes; fp; fp = fp->fsz_next) { for (i = FSZCNT; --i >= 0;) { fp->fsz_count[i] = 0; fp->fsz_sz[i] = 0; } } } static void dofsizes(int fd, struct fs *super, char *name) { ino_t inode, maxino; union dinode *dp; daddr_t sz, ksz; struct fsizes *fp, **fsp; int i; maxino = super->fs_ncg * super->fs_ipg - 1; #ifdef COMPAT if (!(fsizes = (struct fsizes *)malloc(sizeof(struct fsizes)))) errx(1, "allocate fsize structure"); #endif /* COMPAT */ for (inode = 0; inode < maxino; inode++) { errno = 0; if ((dp = get_inode(fd,super,inode)) #ifdef COMPAT && ((DIP(super, dp, di_mode) & IFMT) == IFREG || (DIP(super, dp, di_mode) & IFMT) == IFDIR) #else /* COMPAT */ && !isfree(super, dp) #endif /* COMPAT */ ) { sz = estimate ? virtualblocks(super, dp) : actualblocks(super, dp); #ifdef COMPAT if (sz >= FSZCNT) { fsizes->fsz_count[FSZCNT-1]++; fsizes->fsz_sz[FSZCNT-1] += sz; } else { fsizes->fsz_count[sz]++; fsizes->fsz_sz[sz] += sz; } #else /* COMPAT */ ksz = SIZE(sz); for (fsp = &fsizes; (fp = *fsp); fsp = &fp->fsz_next) { if (ksz < fp->fsz_last) break; } if (!fp || ksz < fp->fsz_first) { if (!(fp = (struct fsizes *) malloc(sizeof(struct fsizes)))) errx(1, "allocate fsize structure"); fp->fsz_next = *fsp; *fsp = fp; fp->fsz_first = rounddown(ksz, FSZCNT); fp->fsz_last = fp->fsz_first + FSZCNT; for (i = FSZCNT; --i >= 0;) { fp->fsz_count[i] = 0; fp->fsz_sz[i] = 0; } } fp->fsz_count[ksz % FSZCNT]++; fp->fsz_sz[ksz % FSZCNT] += sz; #endif /* COMPAT */ } else if (errno) { err(1, "%s", name); } } sz = 0; for (fp = fsizes; fp; fp = fp->fsz_next) { for (i = 0; i < FSZCNT; i++) { if (fp->fsz_count[i]) printf("%jd\t%jd\t%d\n", (intmax_t)(fp->fsz_first + i), (intmax_t)fp->fsz_count[i], SIZE(sz += fp->fsz_sz[i])); } } } static void douser(int fd, struct fs *super, char *name) { ino_t inode, maxino; struct user *usr, *usrs; union dinode *dp; int n; maxino = super->fs_ncg * super->fs_ipg - 1; for (inode = 0; inode < maxino; inode++) { errno = 0; if ((dp = get_inode(fd,super,inode)) && !isfree(super, dp)) uses(DIP(super, dp, di_uid), estimate ? virtualblocks(super, dp) : actualblocks(super, dp), DIP(super, dp, di_atime)); else if (errno) { err(1, "%s", name); } } if (!(usrs = (struct user *)malloc(nusers * sizeof(struct user)))) errx(1, "allocate users"); bcopy(users,usrs,nusers * sizeof(struct user)); sortusers(usrs); for (usr = usrs, n = nusers; --n >= 0 && usr->count; usr++) { printf("%5d",SIZE(usr->space)); if (count) printf("\t%5ld",usr->count); printf("\t%-8s",usr->name); if (unused) printf("\t%5d\t%5d\t%5d", SIZE(usr->spc30), SIZE(usr->spc60), SIZE(usr->spc90)); printf("\n"); } free(usrs); } static void donames(int fd, struct fs *super, char *name) { int c; ino_t maxino; uintmax_t inode; union dinode *dp; maxino = super->fs_ncg * super->fs_ipg - 1; /* first skip the name of the filesystem */ while ((c = getchar()) != EOF && (c < '0' || c > '9')) while ((c = getchar()) != EOF && c != '\n'); ungetc(c,stdin); while (scanf("%ju", &inode) == 1) { if (inode > maxino) { warnx("illegal inode %ju", inode); return; } errno = 0; if ((dp = get_inode(fd,super,inode)) && !isfree(super, dp)) { printf("%s\t",user(DIP(super, dp, di_uid))->name); /* now skip whitespace */ while ((c = getchar()) == ' ' || c == '\t'); /* and print out the remainder of the input line */ while (c != EOF && c != '\n') { putchar(c); c = getchar(); } putchar('\n'); } else { if (errno) { err(1, "%s", name); } /* skip this line */ while ((c = getchar()) != EOF && c != '\n'); } if (c == EOF) break; } } static void usage(void) { #ifdef COMPAT fprintf(stderr,"usage: quot [-nfcvha] [filesystem ...]\n"); #else /* COMPAT */ fprintf(stderr,"usage: quot [-acfhknv] [filesystem ...]\n"); #endif /* COMPAT */ exit(1); } -/* - * Possible superblock locations ordered from most to least likely. - */ -static int sblock_try[] = SBLOCKSEARCH; -static char superblock[SBLOCKSIZE]; - void quot(char *name, char *mp) { - int i, fd; + int fd; struct fs *fs; get_inode(-1, NULL, 0); /* flush cache */ inituser(); initfsizes(); if ((fd = open(name,0)) < 0) { warn("%s", name); close(fd); return; } - for (i = 0; sblock_try[i] != -1; i++) { - if (lseek(fd, sblock_try[i], 0) != sblock_try[i]) { - close(fd); - return; - } - if (read(fd, superblock, SBLOCKSIZE) != SBLOCKSIZE) { - close(fd); - return; - } - fs = (struct fs *)superblock; - if ((fs->fs_magic == FS_UFS1_MAGIC || - (fs->fs_magic == FS_UFS2_MAGIC && - fs->fs_sblockloc == sblock_try[i])) && - fs->fs_bsize <= MAXBSIZE && - fs->fs_bsize >= sizeof(struct fs)) - break; - } - if (sblock_try[i] == -1) { - warnx("%s: not a BSD filesystem",name); + switch (sbget(fd, &fs, -1)) { + case 0: + break; + case ENOENT: + warn("Cannot find file system superblock"); + close(fd); + return; + default: + warn("Unable to read file system superblock"); close(fd); return; } printf("%s:",name); if (mp) printf(" (%s)",mp); putchar('\n'); (*func)(fd, fs, name); close(fd); } int main(int argc, char *argv[]) { char all = 0; struct statfs *mp; struct fstab *fs; int cnt; func = douser; #ifndef COMPAT header = getbsize(&headerlen,&blocksize); #endif while (--argc > 0 && **++argv == '-') { while (*++*argv) { switch (**argv) { case 'n': func = donames; break; case 'c': func = dofsizes; break; case 'a': all = 1; break; case 'f': count = 1; break; case 'h': estimate = 1; break; #ifndef COMPAT case 'k': blocksize = 1024; break; #endif /* COMPAT */ case 'v': unused = 1; break; default: usage(); } } } if (all) { cnt = getmntinfo(&mp,MNT_NOWAIT); for (; --cnt >= 0; mp++) { if (!strncmp(mp->f_fstypename, "ufs", MFSNAMELEN)) quot(mp->f_mntfromname, mp->f_mntonname); } } while (--argc >= 0) { if ((fs = getfsfile(*argv)) != NULL) quot(fs->fs_spec, 0); else quot(*argv,0); argv++; } return 0; }