diff --git a/bin/cp/Makefile b/bin/cp/Makefile index 0c4c7b5eff53..a73ee5447769 100644 --- a/bin/cp/Makefile +++ b/bin/cp/Makefile @@ -1,14 +1,14 @@ # @(#)Makefile 8.1 (Berkeley) 5/31/93 # $FreeBSD$ .include PACKAGE=runtime PROG= cp SRCS= cp.c utils.c -CFLAGS+= -DVM_AND_BUFFER_CACHE_SYNCHRONIZED -D_ACL_PRIVATE +CFLAGS+= -D_ACL_PRIVATE HAS_TESTS= SUBDIR.${MK_TESTS}= tests .include diff --git a/bin/cp/cp.c b/bin/cp/cp.c index dbae4b535843..e38cd97f4369 100644 --- a/bin/cp/cp.c +++ b/bin/cp/cp.c @@ -1,593 +1,592 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * David Hitz of Auspex Systems Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char const copyright[] = "@(#) Copyright (c) 1988, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)cp.c 8.2 (Berkeley) 4/1/94"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * Cp copies source files to target files. * * The global PATH_T structure "to" always contains the path to the * current target file. Since fts(3) does not change directories, * this path can be either absolute or dot-relative. * * The basic algorithm is to initialize "to" and use fts(3) to traverse * the file hierarchy rooted in the argument list. A trivial case is the * case of 'cp file1 file2'. The more interesting case is the case of * 'cp file1 file2 ... fileN dir' where the hierarchy is traversed and the * path (relative to the root of the traversal) is appended to dir (stored * in "to") to form the final target path. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "extern.h" #define STRIP_TRAILING_SLASH(p) { \ while ((p).p_end > (p).p_path + 1 && (p).p_end[-1] == '/') \ *--(p).p_end = 0; \ } static char emptystring[] = ""; PATH_T to = { to.p_path, emptystring, "" }; int fflag, iflag, lflag, nflag, pflag, sflag, vflag; static int Hflag, Lflag, Rflag, rflag; volatile sig_atomic_t info; enum op { FILE_TO_FILE, FILE_TO_DIR, DIR_TO_DNE }; static int copy(char *[], enum op, int, struct stat *); static void siginfo(int __unused); int main(int argc, char *argv[]) { struct stat to_stat, tmp_stat; enum op type; int Pflag, ch, fts_options, r, have_trailing_slash; char *target; fts_options = FTS_NOCHDIR | FTS_PHYSICAL; Pflag = 0; while ((ch = getopt(argc, argv, "HLPRafilnprsvx")) != -1) switch (ch) { case 'H': Hflag = 1; Lflag = Pflag = 0; break; case 'L': Lflag = 1; Hflag = Pflag = 0; break; case 'P': Pflag = 1; Hflag = Lflag = 0; break; case 'R': Rflag = 1; break; case 'a': pflag = 1; Rflag = 1; Pflag = 1; Hflag = Lflag = 0; break; case 'f': fflag = 1; iflag = nflag = 0; break; case 'i': iflag = 1; fflag = nflag = 0; break; case 'l': lflag = 1; break; case 'n': nflag = 1; fflag = iflag = 0; break; case 'p': pflag = 1; break; case 'r': rflag = Lflag = 1; Hflag = Pflag = 0; break; case 's': sflag = 1; break; case 'v': vflag = 1; break; case 'x': fts_options |= FTS_XDEV; break; default: usage(); break; } argc -= optind; argv += optind; if (argc < 2) usage(); if (Rflag && rflag) errx(1, "the -R and -r options may not be specified together"); if (lflag && sflag) errx(1, "the -l and -s options may not be specified together"); if (rflag) Rflag = 1; if (Rflag) { if (Hflag) fts_options |= FTS_COMFOLLOW; if (Lflag) { fts_options &= ~FTS_PHYSICAL; fts_options |= FTS_LOGICAL; } } else if (!Pflag) { fts_options &= ~FTS_PHYSICAL; fts_options |= FTS_LOGICAL | FTS_COMFOLLOW; } (void)signal(SIGINFO, siginfo); /* Save the target base in "to". */ target = argv[--argc]; if (strlcpy(to.p_path, target, sizeof(to.p_path)) >= sizeof(to.p_path)) errx(1, "%s: name too long", target); to.p_end = to.p_path + strlen(to.p_path); if (to.p_path == to.p_end) { *to.p_end++ = '.'; *to.p_end = 0; } have_trailing_slash = (to.p_end[-1] == '/'); if (have_trailing_slash) STRIP_TRAILING_SLASH(to); to.target_end = to.p_end; /* Set end of argument list for fts(3). */ argv[argc] = NULL; /* * Cp has two distinct cases: * * cp [-R] source target * cp [-R] source1 ... sourceN directory * * In both cases, source can be either a file or a directory. * * In (1), the target becomes a copy of the source. That is, if the * source is a file, the target will be a file, and likewise for * directories. * * In (2), the real target is not directory, but "directory/source". */ r = stat(to.p_path, &to_stat); if (r == -1 && errno != ENOENT) err(1, "%s", to.p_path); if (r == -1 || !S_ISDIR(to_stat.st_mode)) { /* * Case (1). Target is not a directory. */ if (argc > 1) errx(1, "%s is not a directory", to.p_path); /* * Need to detect the case: * cp -R dir foo * Where dir is a directory and foo does not exist, where * we want pathname concatenations turned on but not for * the initial mkdir(). */ if (r == -1) { if (Rflag && (Lflag || Hflag)) stat(*argv, &tmp_stat); else lstat(*argv, &tmp_stat); if (S_ISDIR(tmp_stat.st_mode) && Rflag) type = DIR_TO_DNE; else type = FILE_TO_FILE; } else type = FILE_TO_FILE; if (have_trailing_slash && type == FILE_TO_FILE) { if (r == -1) { errx(1, "directory %s does not exist", to.p_path); } else errx(1, "%s is not a directory", to.p_path); } } else /* * Case (2). Target is a directory. */ type = FILE_TO_DIR; /* * For DIR_TO_DNE, we could provide copy() with the to_stat we've * already allocated on the stack here that isn't being used for * anything. Not doing so, though, simplifies later logic a little bit * as we need to skip checking root_stat on the first iteration and * ensure that we set it with the first mkdir(). */ exit (copy(argv, type, fts_options, (type == DIR_TO_DNE ? NULL : &to_stat))); } /* Does the right thing based on -R + -H/-L/-P */ static int copy_stat(const char *path, struct stat *sb) { /* * For -R -H/-P, we need to lstat() instead; copy() cares about the link * itself rather than the target if we're not following links during the * traversal. */ if (!Rflag || Lflag) return (stat(path, sb)); return (lstat(path, sb)); } static int copy(char *argv[], enum op type, int fts_options, struct stat *root_stat) { char rootname[NAME_MAX]; struct stat created_root_stat, to_stat; FTS *ftsp; FTSENT *curr; int base = 0, dne, badcp, rval; size_t nlen; char *p, *recurse_path, *target_mid; mode_t mask, mode; /* * Keep an inverted copy of the umask, for use in correcting * permissions on created directories when not using -p. */ mask = ~umask(0777); umask(~mask); recurse_path = NULL; if ((ftsp = fts_open(argv, fts_options, NULL)) == NULL) err(1, "fts_open"); - for (badcp = rval = 0; errno = 0, (curr = fts_read(ftsp)) != NULL; - badcp = 0) { + for (badcp = rval = 0; (curr = fts_read(ftsp)) != NULL; badcp = 0) { switch (curr->fts_info) { case FTS_NS: case FTS_DNR: case FTS_ERR: warnx("%s: %s", curr->fts_path, strerror(curr->fts_errno)); badcp = rval = 1; continue; case FTS_DC: /* Warn, continue. */ warnx("%s: directory causes a cycle", curr->fts_path); badcp = rval = 1; continue; default: ; } /* * Stash the root basename off for detecting recursion later. * * This will be essential if the root is a symlink and we're * rolling with -L or -H. The later bits will need this bit in * particular. */ if (curr->fts_level == FTS_ROOTLEVEL) { strlcpy(rootname, curr->fts_name, sizeof(rootname)); } /* * If we are in case (2) or (3) above, we need to append the * source name to the target name. */ if (type != FILE_TO_FILE) { /* * Need to remember the roots of traversals to create * correct pathnames. If there's a directory being * copied to a non-existent directory, e.g. * cp -R a/dir noexist * the resulting path name should be noexist/foo, not * noexist/dir/foo (where foo is a file in dir), which * is the case where the target exists. * * Also, check for "..". This is for correct path * concatenation for paths ending in "..", e.g. * cp -R .. /tmp * Paths ending in ".." are changed to ".". This is * tricky, but seems the easiest way to fix the problem. * * XXX * Since the first level MUST be FTS_ROOTLEVEL, base * is always initialized. */ if (curr->fts_level == FTS_ROOTLEVEL) { if (type != DIR_TO_DNE) { p = strrchr(curr->fts_path, '/'); base = (p == NULL) ? 0 : (int)(p - curr->fts_path + 1); if (!strcmp(&curr->fts_path[base], "..")) base += 1; } else base = curr->fts_pathlen; } p = &curr->fts_path[base]; nlen = curr->fts_pathlen - base; target_mid = to.target_end; if (*p != '/' && target_mid[-1] != '/') *target_mid++ = '/'; *target_mid = 0; if (target_mid - to.p_path + nlen >= PATH_MAX) { warnx("%s%s: name too long (not copied)", to.p_path, p); badcp = rval = 1; continue; } (void)strncat(target_mid, p, nlen); to.p_end = target_mid + nlen; *to.p_end = 0; STRIP_TRAILING_SLASH(to); /* * We're on the verge of recursing on ourselves. Either * we need to stop right here (we knowingly just created * it), or we will in an immediate descendant. Record * the path of the immediate descendant to make our * lives a little less complicated looking. */ if (curr->fts_info == FTS_D && root_stat != NULL && root_stat->st_dev == curr->fts_statp->st_dev && root_stat->st_ino == curr->fts_statp->st_ino) { assert(recurse_path == NULL); if (root_stat == &created_root_stat) { /* * This directory didn't exist when we * started, we created it as part of * traversal. Stop right here before we * do something silly. */ fts_set(ftsp, curr, FTS_SKIP); continue; } if (asprintf(&recurse_path, "%s/%s", to.p_path, rootname) == -1) err(1, "asprintf"); } if (recurse_path != NULL && strcmp(to.p_path, recurse_path) == 0) { fts_set(ftsp, curr, FTS_SKIP); continue; } } if (curr->fts_info == FTS_DP) { /* * We are nearly finished with this directory. If we * didn't actually copy it, or otherwise don't need to * change its attributes, then we are done. */ if (!curr->fts_number) continue; /* * If -p is in effect, set all the attributes. * Otherwise, set the correct permissions, limited * by the umask. Optimise by avoiding a chmod() * if possible (which is usually the case if we * made the directory). Note that mkdir() does not * honour setuid, setgid and sticky bits, but we * normally want to preserve them on directories. */ if (pflag) { if (setfile(curr->fts_statp, -1)) rval = 1; if (preserve_dir_acls(curr->fts_statp, curr->fts_accpath, to.p_path) != 0) rval = 1; } else { mode = curr->fts_statp->st_mode; if ((mode & (S_ISUID | S_ISGID | S_ISTXT)) || ((mode | S_IRWXU) & mask) != (mode & mask)) if (chmod(to.p_path, mode & mask) != 0) { warn("chmod: %s", to.p_path); rval = 1; } } continue; } /* Not an error but need to remember it happened. */ if (copy_stat(to.p_path, &to_stat) == -1) dne = 1; else { if (to_stat.st_dev == curr->fts_statp->st_dev && to_stat.st_ino == curr->fts_statp->st_ino) { warnx("%s and %s are identical (not copied).", to.p_path, curr->fts_path); badcp = rval = 1; if (S_ISDIR(curr->fts_statp->st_mode)) (void)fts_set(ftsp, curr, FTS_SKIP); continue; } if (!S_ISDIR(curr->fts_statp->st_mode) && S_ISDIR(to_stat.st_mode)) { warnx("cannot overwrite directory %s with " "non-directory %s", to.p_path, curr->fts_path); badcp = rval = 1; continue; } dne = 0; } switch (curr->fts_statp->st_mode & S_IFMT) { case S_IFLNK: /* Catch special case of a non-dangling symlink. */ if ((fts_options & FTS_LOGICAL) || ((fts_options & FTS_COMFOLLOW) && curr->fts_level == 0)) { if (copy_file(curr, dne)) badcp = rval = 1; } else { if (copy_link(curr, !dne)) badcp = rval = 1; } break; case S_IFDIR: if (!Rflag) { warnx("%s is a directory (not copied).", curr->fts_path); (void)fts_set(ftsp, curr, FTS_SKIP); badcp = rval = 1; break; } /* * If the directory doesn't exist, create the new * one with the from file mode plus owner RWX bits, * modified by the umask. Trade-off between being * able to write the directory (if from directory is * 555) and not causing a permissions race. If the * umask blocks owner writes, we fail. */ if (dne) { if (mkdir(to.p_path, curr->fts_statp->st_mode | S_IRWXU) < 0) err(1, "%s", to.p_path); /* * First DNE with a NULL root_stat is the root * path, so set root_stat. We can't really * tell in all cases if the target path is * within the src path, so we just stat() the * first directory we created and use that. */ if (root_stat == NULL && stat(to.p_path, &created_root_stat) == -1) { err(1, "stat"); } else if (root_stat == NULL) { root_stat = &created_root_stat; } } else if (!S_ISDIR(to_stat.st_mode)) { errno = ENOTDIR; err(1, "%s", to.p_path); } /* * Arrange to correct directory attributes later * (in the post-order phase) if this is a new * directory, or if the -p flag is in effect. */ curr->fts_number = pflag || dne; break; case S_IFBLK: case S_IFCHR: if (Rflag && !sflag) { if (copy_special(curr->fts_statp, !dne)) badcp = rval = 1; } else { if (copy_file(curr, dne)) badcp = rval = 1; } break; case S_IFSOCK: warnx("%s is a socket (not copied).", curr->fts_path); break; case S_IFIFO: if (Rflag && !sflag) { if (copy_fifo(curr->fts_statp, !dne)) badcp = rval = 1; } else { if (copy_file(curr, dne)) badcp = rval = 1; } break; default: if (copy_file(curr, dne)) badcp = rval = 1; break; } if (vflag && !badcp) (void)printf("%s -> %s\n", curr->fts_path, to.p_path); } if (errno) err(1, "fts_read"); fts_close(ftsp); free(recurse_path); return (rval); } static void siginfo(int sig __unused) { info = 1; } diff --git a/bin/cp/tests/Makefile b/bin/cp/tests/Makefile index faad22df713a..1e480ad706d1 100644 --- a/bin/cp/tests/Makefile +++ b/bin/cp/tests/Makefile @@ -1,7 +1,9 @@ # $FreeBSD$ PACKAGE= tests ATF_TESTS_SH= cp_test +PROGS+= sparse +BINDIR= ${TESTSDIR} .include diff --git a/bin/cp/tests/cp_test.sh b/bin/cp/tests/cp_test.sh index 7362168d7303..559a5ecc3c00 100755 --- a/bin/cp/tests/cp_test.sh +++ b/bin/cp/tests/cp_test.sh @@ -1,225 +1,315 @@ # # SPDX-License-Identifier: BSD-2-Clause-FreeBSD # # Copyright (c) 2020 Kyle Evans # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ check_size() { file=$1 sz=$2 atf_check -o inline:"$sz\n" stat -f '%z' $file } atf_test_case basic basic_body() { echo "foo" > bar atf_check cp bar baz check_size baz 4 } atf_test_case basic_symlink basic_symlink_body() { echo "foo" > bar ln -s bar baz atf_check cp baz foo atf_check test '!' -L foo atf_check -e inline:"cp: baz and baz are identical (not copied).\n" \ -s exit:1 cp baz baz atf_check -e inline:"cp: bar and baz are identical (not copied).\n" \ -s exit:1 cp baz bar } atf_test_case chrdev chrdev_body() { echo "foo" > bar check_size bar 4 atf_check cp /dev/null trunc check_size trunc 0 atf_check cp bar trunc check_size trunc 4 atf_check cp /dev/null trunc check_size trunc 0 } atf_test_case matching_srctgt matching_srctgt_body() { # PR235438: `cp -R foo foo` would previously infinitely recurse and # eventually error out. mkdir foo echo "qux" > foo/bar cp foo/bar foo/zoo atf_check cp -R foo foo atf_check -o inline:"qux\n" cat foo/foo/bar atf_check -o inline:"qux\n" cat foo/foo/zoo atf_check -e not-empty -s not-exit:0 stat foo/foo/foo } atf_test_case matching_srctgt_contained matching_srctgt_contained_body() { # Let's do the same thing, except we'll try to recursively copy foo into # one of its subdirectories. mkdir foo ln -s foo coo echo "qux" > foo/bar mkdir foo/moo touch foo/moo/roo cp foo/bar foo/zoo atf_check cp -R foo foo/moo atf_check cp -RH coo foo/moo atf_check -o inline:"qux\n" cat foo/moo/foo/bar atf_check -o inline:"qux\n" cat foo/moo/coo/bar atf_check -o inline:"qux\n" cat foo/moo/foo/zoo atf_check -o inline:"qux\n" cat foo/moo/coo/zoo # We should have copied the contents of foo/moo before foo, coo started # getting copied in. atf_check -o not-empty stat foo/moo/foo/moo/roo atf_check -o not-empty stat foo/moo/coo/moo/roo atf_check -e not-empty -s not-exit:0 stat foo/moo/foo/moo/foo atf_check -e not-empty -s not-exit:0 stat foo/moo/coo/moo/coo } atf_test_case matching_srctgt_link matching_srctgt_link_body() { mkdir foo echo "qux" > foo/bar cp foo/bar foo/zoo atf_check ln -s foo roo atf_check cp -RH roo foo atf_check -o inline:"qux\n" cat foo/roo/bar atf_check -o inline:"qux\n" cat foo/roo/zoo } atf_test_case matching_srctgt_nonexistent matching_srctgt_nonexistent_body() { # We'll copy foo to a nonexistent subdirectory; ideally, we would # skip just the directory and end up with a layout like; # # foo/ # bar # dne/ # bar # zoo # zoo # mkdir foo echo "qux" > foo/bar cp foo/bar foo/zoo atf_check cp -R foo foo/dne atf_check -o inline:"qux\n" cat foo/dne/bar atf_check -o inline:"qux\n" cat foo/dne/zoo atf_check -e not-empty -s not-exit:0 stat foo/dne/foo } recursive_link_setup() { extra_cpflag=$1 mkdir -p foo/bar ln -s bar foo/baz mkdir foo-mirror eval "cp -R $extra_cpflag foo foo-mirror" } atf_test_case recursive_link_dflt recursive_link_dflt_body() { recursive_link_setup # -P is the default, so this should work and preserve the link. atf_check cp -R foo foo-mirror atf_check test -L foo-mirror/foo/baz } atf_test_case recursive_link_Hflag recursive_link_Hflag_body() { recursive_link_setup # -H will not follow either, so this should also work and preserve the # link. atf_check cp -RH foo foo-mirror atf_check test -L foo-mirror/foo/baz } atf_test_case recursive_link_Lflag recursive_link_Lflag_body() { recursive_link_setup -L # -L will work, but foo/baz ends up expanded to a directory. atf_check test -d foo-mirror/foo/baz -a \ '(' ! -L foo-mirror/foo/baz ')' atf_check cp -RL foo foo-mirror atf_check test -d foo-mirror/foo/baz -a \ '(' ! -L foo-mirror/foo/baz ')' } +file_is_sparse() +{ + atf_check ${0%/*}/sparse "$1" +} + +files_are_equal() +{ + atf_check test "$(stat -f "%d %i" "$1")" != "$(stat -f "%d %i" "$2")" + atf_check cmp "$1" "$2" +} + +atf_test_case sparse_leading_hole +sparse_leading_hole_body() +{ + # A 16-megabyte hole followed by one megabyte of data + truncate -s 16M foo + seq -f%015g 65536 >>foo + file_is_sparse foo + + atf_check cp foo bar + files_are_equal foo bar + file_is_sparse bar +} + +atf_test_case sparse_multiple_holes +sparse_multiple_holes_body() +{ + # Three one-megabyte blocks of data preceded, separated, and + # followed by 16-megabyte holes + truncate -s 16M foo + seq -f%015g 65536 >>foo + truncate -s 33M foo + seq -f%015g 65536 >>foo + truncate -s 50M foo + seq -f%015g 65536 >>foo + truncate -s 67M foo + file_is_sparse foo + + atf_check cp foo bar + files_are_equal foo bar + file_is_sparse bar +} + +atf_test_case sparse_only_hole +sparse_only_hole_body() +{ + # A 16-megabyte hole + truncate -s 16M foo + file_is_sparse foo + + atf_check cp foo bar + files_are_equal foo bar + file_is_sparse bar +} + +atf_test_case sparse_to_dev +sparse_to_dev_body() +{ + # Three one-megabyte blocks of data preceded, separated, and + # followed by 16-megabyte holes + truncate -s 16M foo + seq -f%015g 65536 >>foo + truncate -s 33M foo + seq -f%015g 65536 >>foo + truncate -s 50M foo + seq -f%015g 65536 >>foo + truncate -s 67M foo + file_is_sparse foo + + atf_check -o file:foo cp foo /dev/stdout +} + +atf_test_case sparse_trailing_hole +sparse_trailing_hole_body() +{ + # One megabyte of data followed by a 16-megabyte hole + seq -f%015g 65536 >foo + truncate -s 17M foo + file_is_sparse foo + + atf_check cp foo bar + files_are_equal foo bar + file_is_sparse bar +} + atf_test_case standalone_Pflag standalone_Pflag_body() { echo "foo" > bar ln -s bar foo atf_check cp -P foo baz atf_check -o inline:'Symbolic Link\n' stat -f %SHT baz } atf_init_test_cases() { atf_add_test_case basic atf_add_test_case basic_symlink atf_add_test_case chrdev atf_add_test_case matching_srctgt atf_add_test_case matching_srctgt_contained atf_add_test_case matching_srctgt_link atf_add_test_case matching_srctgt_nonexistent atf_add_test_case recursive_link_dflt atf_add_test_case recursive_link_Hflag atf_add_test_case recursive_link_Lflag + atf_add_test_case sparse_leading_hole + atf_add_test_case sparse_multiple_holes + atf_add_test_case sparse_only_hole + atf_add_test_case sparse_to_dev + atf_add_test_case sparse_trailing_hole atf_add_test_case standalone_Pflag } diff --git a/bin/cp/tests/sparse.c b/bin/cp/tests/sparse.c new file mode 100644 index 000000000000..78957581a56c --- /dev/null +++ b/bin/cp/tests/sparse.c @@ -0,0 +1,73 @@ +/*- + * Copyright (c) 2023 Klara, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include + +static bool verbose; + +/* + * Returns true if the file named by its argument is sparse, i.e. if + * seeking to SEEK_HOLE returns a different value than seeking to + * SEEK_END. + */ +static bool +sparse(const char *filename) +{ + off_t hole, end; + int fd; + + if ((fd = open(filename, O_RDONLY)) < 0 || + (hole = lseek(fd, 0, SEEK_HOLE)) < 0 || + (end = lseek(fd, 0, SEEK_END)) < 0) + err(1, "%s", filename); + close(fd); + if (end > hole) { + if (verbose) + printf("%s: hole at %zu\n", filename, (size_t)hole); + return (true); + } + return (false); +} + +static void +usage(void) +{ + + fprintf(stderr, "usage: sparse [-v] file [...]\n"); + exit(EX_USAGE); +} + +int +main(int argc, char *argv[]) +{ + int opt, rv; + + while ((opt = getopt(argc, argv, "v")) != -1) { + switch (opt) { + case 'v': + verbose = true; + break; + default: + usage(); + break; + } + } + argc -= optind; + argv += optind; + if (argc == 0) + usage(); + rv = EXIT_SUCCESS; + while (argc-- > 0) + if (!sparse(*argv++)) + rv = EXIT_FAILURE; + exit(rv); +} diff --git a/bin/cp/utils.c b/bin/cp/utils.c index 07de0495ba9e..8c1c350ff6f1 100644 --- a/bin/cp/utils.c +++ b/bin/cp/utils.c @@ -1,579 +1,522 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)utils.c 8.3 (Berkeley) 4/1/94"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); -#include -#include #include +#include #include -#ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED -#include -#endif #include #include #include #include #include #include #include #include #include #include "extern.h" #define cp_pct(x, y) ((y == 0) ? 0 : (int)(100.0 * (x) / (y))) /* * Memory strategy threshold, in pages: if physmem is larger then this, use a * large buffer. */ #define PHYSPAGES_THRESHOLD (32*1024) /* Maximum buffer size in bytes - do not allow it to grow larger than this. */ #define BUFSIZE_MAX (2*1024*1024) /* * Small (default) buffer size in bytes. It's inefficient for this to be * smaller than MAXPHYS. */ #define BUFSIZE_SMALL (MAXPHYS) static ssize_t -copy_fallback(int from_fd, int to_fd, char *buf, size_t bufsize) +copy_fallback(int from_fd, int to_fd) { + static char *buf = NULL; + static size_t bufsize; ssize_t rcount, wresid, wcount = 0; char *bufp; + if (buf == NULL) { + if (sysconf(_SC_PHYS_PAGES) > PHYSPAGES_THRESHOLD) + bufsize = MIN(BUFSIZE_MAX, MAXPHYS * 8); + else + bufsize = BUFSIZE_SMALL; + buf = malloc(bufsize); + if (buf == NULL) + err(1, "Not enough memory"); + } rcount = read(from_fd, buf, bufsize); if (rcount <= 0) return (rcount); for (bufp = buf, wresid = rcount; ; bufp += wcount, wresid -= wcount) { wcount = write(to_fd, bufp, wresid); if (wcount <= 0) break; if (wcount >= (ssize_t)wresid) break; } return (wcount < 0 ? wcount : rcount); } int copy_file(const FTSENT *entp, int dne) { - static char *buf = NULL; - static size_t bufsize; struct stat *fs; ssize_t wcount; off_t wtotal; int ch, checkch, from_fd, rval, to_fd; -#ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED - size_t wresid; - char *bufp, *p; -#endif int use_copy_file_range = 1; from_fd = to_fd = -1; if (!lflag && !sflag && (from_fd = open(entp->fts_path, O_RDONLY, 0)) == -1) { warn("%s", entp->fts_path); return (1); } fs = entp->fts_statp; /* * If the file exists and we're interactive, verify with the user. * If the file DNE, set the mode to be the from file, minus setuid * bits, modified by the umask; arguably wrong, but it makes copying * executables work right and it's been that way forever. (The * other choice is 666 or'ed with the execute bits on the from file * modified by the umask.) */ if (!dne) { #define YESNO "(y/n [n]) " if (nflag) { if (vflag) printf("%s not overwritten\n", to.p_path); rval = 1; goto done; } else if (iflag) { (void)fprintf(stderr, "overwrite %s? %s", to.p_path, YESNO); checkch = ch = getchar(); while (ch != '\n' && ch != EOF) ch = getchar(); if (checkch != 'y' && checkch != 'Y') { (void)fprintf(stderr, "not overwritten\n"); rval = 1; goto done; } } if (fflag) { /* * Remove existing destination file name create a new * file. */ (void)unlink(to.p_path); if (!lflag && !sflag) { to_fd = open(to.p_path, O_WRONLY | O_TRUNC | O_CREAT, fs->st_mode & ~(S_ISUID | S_ISGID)); } } else if (!lflag && !sflag) { /* Overwrite existing destination file name. */ to_fd = open(to.p_path, O_WRONLY | O_TRUNC, 0); } } else if (!lflag && !sflag) { to_fd = open(to.p_path, O_WRONLY | O_TRUNC | O_CREAT, fs->st_mode & ~(S_ISUID | S_ISGID)); } if (!lflag && !sflag && to_fd == -1) { warn("%s", to.p_path); rval = 1; goto done; } rval = 0; if (!lflag && !sflag) { - /* - * Mmap and write if less than 8M (the limit is so we don't - * totally trash memory on big files. This is really a minor - * hack, but it wins some CPU back. - * Some filesystems, such as smbnetfs, don't support mmap, - * so this is a best-effort attempt. - */ -#ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED - if (S_ISREG(fs->st_mode) && fs->st_size > 0 && - fs->st_size <= 8 * 1024 * 1024 && - (p = mmap(NULL, (size_t)fs->st_size, PROT_READ, - MAP_SHARED, from_fd, (off_t)0)) != MAP_FAILED) { - wtotal = 0; - for (bufp = p, wresid = fs->st_size; ; - bufp += wcount, wresid -= (size_t)wcount) { - wcount = write(to_fd, bufp, wresid); - if (wcount <= 0) - break; - wtotal += wcount; - if (info) { - info = 0; - (void)fprintf(stderr, - "%s -> %s %3d%%\n", - entp->fts_path, to.p_path, - cp_pct(wtotal, fs->st_size)); + wtotal = 0; + do { + if (use_copy_file_range) { + wcount = copy_file_range(from_fd, NULL, + to_fd, NULL, SSIZE_MAX, 0); + if (wcount < 0 && errno == EINVAL) { + /* Prob a non-seekable FD */ + use_copy_file_range = 0; } - if (wcount >= (ssize_t)wresid) - break; } - if (wcount != (ssize_t)wresid) { - warn("%s", to.p_path); - rval = 1; - } - /* Some systems don't unmap on close(2). */ - if (munmap(p, fs->st_size) < 0) { - warn("%s", entp->fts_path); - rval = 1; - } - } else -#endif - { - if (buf == NULL) { - /* - * Note that buf and bufsize are static. If - * malloc() fails, it will fail at the start - * and not copy only some files. - */ - if (sysconf(_SC_PHYS_PAGES) > - PHYSPAGES_THRESHOLD) - bufsize = MIN(BUFSIZE_MAX, MAXPHYS * 8); - else - bufsize = BUFSIZE_SMALL; - buf = malloc(bufsize); - if (buf == NULL) - err(1, "Not enough memory"); + if (!use_copy_file_range) { + wcount = copy_fallback(from_fd, to_fd); } - wtotal = 0; - do { - if (use_copy_file_range) { - wcount = copy_file_range(from_fd, NULL, - to_fd, NULL, SSIZE_MAX, 0); - if (wcount < 0 && errno == EINVAL) { - /* Prob a non-seekable FD */ - use_copy_file_range = 0; - } - } - if (!use_copy_file_range) { - wcount = copy_fallback(from_fd, to_fd, - buf, bufsize); - } - wtotal += wcount; - if (info) { - info = 0; - (void)fprintf(stderr, - "%s -> %s %3d%%\n", - entp->fts_path, to.p_path, - cp_pct(wtotal, fs->st_size)); - } - } while (wcount > 0); - if (wcount < 0) { - warn("%s", entp->fts_path); - rval = 1; + wtotal += wcount; + if (info) { + info = 0; + (void)fprintf(stderr, + "%s -> %s %3d%%\n", + entp->fts_path, to.p_path, + cp_pct(wtotal, fs->st_size)); } + } while (wcount > 0); + if (wcount < 0) { + warn("%s", entp->fts_path); + rval = 1; } } else if (lflag) { if (link(entp->fts_path, to.p_path)) { warn("%s", to.p_path); rval = 1; } } else if (sflag) { if (symlink(entp->fts_path, to.p_path)) { warn("%s", to.p_path); rval = 1; } } /* * Don't remove the target even after an error. The target might * not be a regular file, or its attributes might be important, * or its contents might be irreplaceable. It would only be safe * to remove it if we created it and its length is 0. */ if (!lflag && !sflag) { if (pflag && setfile(fs, to_fd)) rval = 1; if (pflag && preserve_fd_acls(from_fd, to_fd) != 0) rval = 1; if (close(to_fd)) { warn("%s", to.p_path); rval = 1; } } done: if (from_fd != -1) (void)close(from_fd); return (rval); } int copy_link(const FTSENT *p, int exists) { - int len; + ssize_t len; char llink[PATH_MAX]; if (exists && nflag) { if (vflag) printf("%s not overwritten\n", to.p_path); return (1); } if ((len = readlink(p->fts_path, llink, sizeof(llink) - 1)) == -1) { warn("readlink: %s", p->fts_path); return (1); } llink[len] = '\0'; if (exists && unlink(to.p_path)) { warn("unlink: %s", to.p_path); return (1); } if (symlink(llink, to.p_path)) { warn("symlink: %s", llink); return (1); } return (pflag ? setfile(p->fts_statp, -1) : 0); } int copy_fifo(struct stat *from_stat, int exists) { if (exists && nflag) { if (vflag) printf("%s not overwritten\n", to.p_path); return (1); } if (exists && unlink(to.p_path)) { warn("unlink: %s", to.p_path); return (1); } if (mkfifo(to.p_path, from_stat->st_mode)) { warn("mkfifo: %s", to.p_path); return (1); } return (pflag ? setfile(from_stat, -1) : 0); } int copy_special(struct stat *from_stat, int exists) { if (exists && nflag) { if (vflag) printf("%s not overwritten\n", to.p_path); return (1); } if (exists && unlink(to.p_path)) { warn("unlink: %s", to.p_path); return (1); } if (mknod(to.p_path, from_stat->st_mode, from_stat->st_rdev)) { warn("mknod: %s", to.p_path); return (1); } return (pflag ? setfile(from_stat, -1) : 0); } int setfile(struct stat *fs, int fd) { static struct timespec tspec[2]; struct stat ts; int rval, gotstat, islink, fdval; rval = 0; fdval = fd != -1; islink = !fdval && S_ISLNK(fs->st_mode); fs->st_mode &= S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO; tspec[0] = fs->st_atim; tspec[1] = fs->st_mtim; if (fdval ? futimens(fd, tspec) : utimensat(AT_FDCWD, to.p_path, tspec, islink ? AT_SYMLINK_NOFOLLOW : 0)) { warn("utimensat: %s", to.p_path); rval = 1; } if (fdval ? fstat(fd, &ts) : (islink ? lstat(to.p_path, &ts) : stat(to.p_path, &ts))) gotstat = 0; else { gotstat = 1; ts.st_mode &= S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO; } /* * Changing the ownership probably won't succeed, unless we're root * or POSIX_CHOWN_RESTRICTED is not set. Set uid/gid before setting * the mode; current BSD behavior is to remove all setuid bits on * chown. If chown fails, lose setuid/setgid bits. */ if (!gotstat || fs->st_uid != ts.st_uid || fs->st_gid != ts.st_gid) if (fdval ? fchown(fd, fs->st_uid, fs->st_gid) : (islink ? lchown(to.p_path, fs->st_uid, fs->st_gid) : chown(to.p_path, fs->st_uid, fs->st_gid))) { if (errno != EPERM) { warn("chown: %s", to.p_path); rval = 1; } fs->st_mode &= ~(S_ISUID | S_ISGID); } if (!gotstat || fs->st_mode != ts.st_mode) if (fdval ? fchmod(fd, fs->st_mode) : (islink ? lchmod(to.p_path, fs->st_mode) : chmod(to.p_path, fs->st_mode))) { warn("chmod: %s", to.p_path); rval = 1; } if (!gotstat || fs->st_flags != ts.st_flags) if (fdval ? fchflags(fd, fs->st_flags) : (islink ? lchflags(to.p_path, fs->st_flags) : chflags(to.p_path, fs->st_flags))) { warn("chflags: %s", to.p_path); rval = 1; } return (rval); } int preserve_fd_acls(int source_fd, int dest_fd) { acl_t acl; acl_type_t acl_type; int acl_supported = 0, ret, trivial; ret = fpathconf(source_fd, _PC_ACL_NFS4); if (ret > 0 ) { acl_supported = 1; acl_type = ACL_TYPE_NFS4; } else if (ret < 0 && errno != EINVAL) { warn("fpathconf(..., _PC_ACL_NFS4) failed for %s", to.p_path); return (1); } if (acl_supported == 0) { ret = fpathconf(source_fd, _PC_ACL_EXTENDED); if (ret > 0 ) { acl_supported = 1; acl_type = ACL_TYPE_ACCESS; } else if (ret < 0 && errno != EINVAL) { warn("fpathconf(..., _PC_ACL_EXTENDED) failed for %s", to.p_path); return (1); } } if (acl_supported == 0) return (0); acl = acl_get_fd_np(source_fd, acl_type); if (acl == NULL) { warn("failed to get acl entries while setting %s", to.p_path); return (1); } if (acl_is_trivial_np(acl, &trivial)) { warn("acl_is_trivial() failed for %s", to.p_path); acl_free(acl); return (1); } if (trivial) { acl_free(acl); return (0); } if (acl_set_fd_np(dest_fd, acl, acl_type) < 0) { warn("failed to set acl entries for %s", to.p_path); acl_free(acl); return (1); } acl_free(acl); return (0); } int preserve_dir_acls(struct stat *fs, char *source_dir, char *dest_dir) { acl_t (*aclgetf)(const char *, acl_type_t); int (*aclsetf)(const char *, acl_type_t, acl_t); struct acl *aclp; acl_t acl; acl_type_t acl_type; int acl_supported = 0, ret, trivial; ret = pathconf(source_dir, _PC_ACL_NFS4); if (ret > 0) { acl_supported = 1; acl_type = ACL_TYPE_NFS4; } else if (ret < 0 && errno != EINVAL) { warn("fpathconf(..., _PC_ACL_NFS4) failed for %s", source_dir); return (1); } if (acl_supported == 0) { ret = pathconf(source_dir, _PC_ACL_EXTENDED); if (ret > 0) { acl_supported = 1; acl_type = ACL_TYPE_ACCESS; } else if (ret < 0 && errno != EINVAL) { warn("fpathconf(..., _PC_ACL_EXTENDED) failed for %s", source_dir); return (1); } } if (acl_supported == 0) return (0); /* * If the file is a link we will not follow it. */ if (S_ISLNK(fs->st_mode)) { aclgetf = acl_get_link_np; aclsetf = acl_set_link_np; } else { aclgetf = acl_get_file; aclsetf = acl_set_file; } if (acl_type == ACL_TYPE_ACCESS) { /* * Even if there is no ACL_TYPE_DEFAULT entry here, a zero * size ACL will be returned. So it is not safe to simply * check the pointer to see if the default ACL is present. */ acl = aclgetf(source_dir, ACL_TYPE_DEFAULT); if (acl == NULL) { warn("failed to get default acl entries on %s", source_dir); return (1); } aclp = &acl->ats_acl; if (aclp->acl_cnt != 0 && aclsetf(dest_dir, ACL_TYPE_DEFAULT, acl) < 0) { warn("failed to set default acl entries on %s", dest_dir); acl_free(acl); return (1); } acl_free(acl); } acl = aclgetf(source_dir, acl_type); if (acl == NULL) { warn("failed to get acl entries on %s", source_dir); return (1); } if (acl_is_trivial_np(acl, &trivial)) { warn("acl_is_trivial() failed on %s", source_dir); acl_free(acl); return (1); } if (trivial) { acl_free(acl); return (0); } if (aclsetf(dest_dir, acl_type, acl) < 0) { warn("failed to set acl entries on %s", dest_dir); acl_free(acl); return (1); } acl_free(acl); return (0); } void usage(void) { (void)fprintf(stderr, "%s\n%s\n", "usage: cp [-R [-H | -L | -P]] [-f | -i | -n] [-alpsvx] " "source_file target_file", " cp [-R [-H | -L | -P]] [-f | -i | -n] [-alpsvx] " "source_file ... " "target_directory"); exit(EX_USAGE); }