diff --git a/bin/cp/cp.1 b/bin/cp/cp.1 --- a/bin/cp/cp.1 +++ b/bin/cp/cp.1 @@ -32,7 +32,7 @@ .\" @(#)cp.1 8.3 (Berkeley) 4/18/94 .\" $FreeBSD$ .\" -.Dd February 23, 2022 +.Dd January 25, 2023 .Dt CP 1 .Os .Sh NAME @@ -45,7 +45,7 @@ .Op Fl H | Fl L | Fl P .Oc .Op Fl f | i | n -.Op Fl alpsvx +.Op Fl alpSsvx .Ar source_file target_file .Nm .Oo @@ -53,15 +53,15 @@ .Op Fl H | Fl L | Fl P .Oc .Op Fl f | i | n -.Op Fl alpsvx +.Op Fl alpSsvx .Ar source_file ... target_directory .Nm .Op Fl f | i | n -.Op Fl alPpsvx +.Op Fl alPpSsvx .Ar source_file target_file .Nm .Op Fl f | i | n -.Op Fl alPpsvx +.Op Fl alPpSsvx .Ar source_file ... target_directory .Sh DESCRIPTION In the first synopsis form, the @@ -187,6 +187,8 @@ and either the user ID or group ID cannot be preserved, neither the set-user-ID nor set-group-ID bits are preserved in the copy's permissions. +.It Fl S +If the source file is sparse, attempt to preserve its holes. .It Fl s Create symbolic links to regular files in a hierarchy instead of copying. .It Fl v diff --git a/bin/cp/cp.c b/bin/cp/cp.c --- a/bin/cp/cp.c +++ b/bin/cp/cp.c @@ -86,7 +86,7 @@ PATH_T to = { to.p_path, emptystring, "" }; -int fflag, iflag, lflag, nflag, pflag, sflag, vflag; +int fflag, iflag, lflag, nflag, pflag, Sflag, sflag, vflag; static int Hflag, Lflag, Rflag, rflag; volatile sig_atomic_t info; @@ -105,7 +105,7 @@ fts_options = FTS_NOCHDIR | FTS_PHYSICAL; Pflag = 0; - while ((ch = getopt(argc, argv, "HLPRafilnprsvx")) != -1) + while ((ch = getopt(argc, argv, "HLPRafilnprSsvx")) != -1) switch (ch) { case 'H': Hflag = 1; @@ -150,6 +150,9 @@ rflag = Lflag = 1; Hflag = Pflag = 0; break; + case 'S': + Sflag = 1; + break; case 's': sflag = 1; break; diff --git a/bin/cp/extern.h b/bin/cp/extern.h --- a/bin/cp/extern.h +++ b/bin/cp/extern.h @@ -39,7 +39,7 @@ } PATH_T; extern PATH_T to; -extern int fflag, iflag, lflag, nflag, pflag, sflag, vflag; +extern int fflag, iflag, lflag, nflag, pflag, Sflag, sflag, vflag; extern volatile sig_atomic_t info; __BEGIN_DECLS diff --git a/bin/cp/tests/cp_test.sh b/bin/cp/tests/cp_test.sh --- a/bin/cp/tests/cp_test.sh +++ b/bin/cp/tests/cp_test.sh @@ -209,6 +209,110 @@ atf_check -o inline:'Symbolic Link\n' stat -f %SHT baz } +file_is_sparse() +{ + atf_check test "$(stat -f "%b" "$1")" != "$(stat -f "%z" "$1")" +} + +files_are_equal() +{ + atf_check test "$(stat -f "%d %i" "$1")" != "$(stat -f "%d %i" "$2")" + atf_check cmp "$1" "$2" +} + +atf_test_case sparse_empty +sparse_empty_body() +{ + # Empty file + touch foo + atf_check cp -S foo bar + + files_are_equal foo bar +} + +atf_test_case sparse_no_holes +sparse_no_holes_body() +{ + # One megabyte of data + seq -f%015g 65536 >foo + + atf_check cp -S foo bar + files_are_equal foo bar +} + +atf_test_case sparse_only_hole +sparse_only_hole_body() +{ + # A one-megabyte hole + truncate -s 1M foo + + atf_check cp -S foo bar + files_are_equal foo bar + file_is_sparse bar +} + +atf_test_case sparse_leading_hole +sparse_leading_hole_body() +{ + # A one-megabyte hole followed by one megabyte of data + truncate -s 1M foo + seq -f%015g 65536 >>foo + file_is_sparse foo + + atf_check cp -S foo bar + files_are_equal foo bar + file_is_sparse bar +} + +atf_test_case sparse_trailing_hole +sparse_trailing_hole_body() +{ + # One megabyte of data followed by a one-megabyte hole + seq -f%015g 65536 >foo + truncate -s 2M foo + file_is_sparse foo + + atf_check cp -S foo bar + files_are_equal foo bar + file_is_sparse bar +} + +atf_test_case sparse_multiple_holes +sparse_multiple_holes_body() +{ + # Three one-megabyte blocks of data preceded, separated, and + # followed by one-megabyte holes + truncate -s 1M foo + seq -f%015g >>foo + truncate -s 3M foo + seq -f%015g >>foo + truncate -s 5M foo + seq -f%015g >>foo + truncate -s 7M foo + file_is_sparse foo + + atf_check cp -S foo bar + files_are_equal foo bar + file_is_sparse bar +} + +atf_test_case sparse_to_dev +sparse_to_dev_body() +{ + # Three one-megabyte blocks of data preceded, separated, and + # followed by one-megabyte holes + truncate -s 1M foo + seq -f%015g >>foo + truncate -s 3M foo + seq -f%015g >>foo + truncate -s 5M foo + seq -f%015g >>foo + truncate -s 7M foo + file_is_sparse foo + + atf_check -o file:foo cp -S foo /dev/stdout +} + atf_init_test_cases() { atf_add_test_case basic @@ -222,4 +326,11 @@ atf_add_test_case recursive_link_Hflag atf_add_test_case recursive_link_Lflag atf_add_test_case standalone_Pflag + atf_add_test_case sparse_empty + atf_add_test_case sparse_no_holes + atf_add_test_case sparse_only_hole + atf_add_test_case sparse_leading_hole + atf_add_test_case sparse_trailing_hole + atf_add_test_case sparse_multiple_holes + atf_add_test_case sparse_to_dev } diff --git a/bin/cp/utils.c b/bin/cp/utils.c --- a/bin/cp/utils.c +++ b/bin/cp/utils.c @@ -75,38 +75,158 @@ #define BUFSIZE_SMALL (MAXPHYS) static ssize_t -copy_fallback(int from_fd, int to_fd, char *buf, size_t bufsize) +copy_fallback(int from_fd, int to_fd, size_t len) { - ssize_t rcount, wresid, wcount = 0; + static char *buf; + static size_t bufsize; + ssize_t rcount, wcount, wtotal; + size_t rsize; char *bufp; - rcount = read(from_fd, buf, bufsize); - if (rcount <= 0) - return (rcount); - for (bufp = buf, wresid = rcount; ; bufp += wcount, wresid -= wcount) { - wcount = write(to_fd, bufp, wresid); - if (wcount <= 0) + if (buf == NULL) { + if (sysconf(_SC_PHYS_PAGES) > PHYSPAGES_THRESHOLD) + bufsize = MIN(BUFSIZE_MAX, MAXPHYS * 8); + else + bufsize = BUFSIZE_SMALL; + buf = malloc(bufsize); + if (buf == NULL) + err(1, "Not enough memory"); + } + wtotal = 0; + while (len > 0) { + rsize = len < bufsize ? len : bufsize; + rcount = read(from_fd, buf, rsize); + if (rcount < 0) + return (-1); + if (rcount == 0) break; - if (wcount >= (ssize_t)wresid) + for (bufp = buf; rcount > 0; bufp += wcount, rcount -= wcount) { + wcount = write(to_fd, bufp, rcount); + if (wcount < 0) + return (-1); + if (wcount == 0) + break; + wtotal += wcount; + } + } + return (wtotal); +} + +/* + * Copy from one descriptor to another without ever seeking. + */ +static int +copy_linear(const FTSENT *entp, int from_fd, int to_fd) +{ + struct stat *fs = entp->fts_statp; + off_t wtotal = 0; + ssize_t wcount; + int rval = 0, use_copy_file_range = 1; + + do { + if (use_copy_file_range) { + wcount = copy_file_range(from_fd, NULL, + to_fd, NULL, SSIZE_MAX, 0); + if (wcount < 0 && errno == EINVAL) { + /* Prob a non-seekable FD */ + use_copy_file_range = 0; + } + } + if (!use_copy_file_range) { + wcount = copy_fallback(from_fd, to_fd, + SSIZE_MAX); + } + if (wcount < 0) break; + wtotal += wcount; + if (info) { + info = 0; + (void)fprintf(stderr, + "%s -> %s %3d%%\n", + entp->fts_path, to.p_path, + cp_pct(wtotal, fs->st_size)); + } + } while (wcount > 0); + if (wcount < 0) { + warn("%s", entp->fts_path); + rval = 1; } - return (wcount < 0 ? wcount : rcount); + return (rval); +} + +/* + * Copy one file to another, attempting to reproduce any holes found in + * the original. No attempt is made to convert zeroes to holes. + */ +static int +copy_holes(const FTSENT *entp, int from_fd, int to_fd) +{ + struct stat *fs = entp->fts_statp; + off_t from_off = 0, to_off = 0, wtotal = 0; + off_t data, hole, end; + ssize_t wcount; + int use_copy_file_range = 1; + + do { + if ((data = lseek(from_fd, from_off, SEEK_DATA)) < 0) { + if (errno != ENXIO) + goto fail; + /* hole at end of file, truncate destination */ + if ((end = lseek(from_fd, 0, SEEK_END)) < 0 || + ftruncate(to_fd, end) != 0) + goto fail; + return (0); + } + wtotal += data - from_off; + from_off = data; + if ((hole = lseek(from_fd, data, SEEK_HOLE)) < 0) + goto fail; + to_off = data; + if (use_copy_file_range) { + wcount = copy_file_range(from_fd, &from_off, + to_fd, &to_off, hole - data, 0); + if (wcount < 0 && errno == EINVAL) + use_copy_file_range = 0; + } + if (!use_copy_file_range) { + if (lseek(from_fd, from_off, SEEK_SET) < 0 || + lseek(to_fd, to_off, SEEK_SET) < 0) + goto fail; + wcount = copy_fallback(from_fd, to_fd, hole - data); + if (wcount > 0) { + from_off += wcount; + to_off += wcount; + } + } + if (wcount < 0) + goto fail; + wtotal += wcount; + if (info) { + info = 0; + (void)fprintf(stderr, + "%s -> %s %3d%%\n", + entp->fts_path, to.p_path, + cp_pct(wtotal, fs->st_size)); + } + } while (wcount > 0); + return (0); +fail: + warn("%s", entp->fts_path); + return (1); } int copy_file(const FTSENT *entp, int dne) { - static char *buf = NULL; - static size_t bufsize; struct stat *fs; ssize_t wcount; off_t wtotal; + off_t hole; int ch, checkch, from_fd, rval, to_fd; #ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED size_t wresid; char *bufp, *p; #endif - int use_copy_file_range = 1; from_fd = to_fd = -1; if (!lflag && !sflag && @@ -171,6 +291,15 @@ goto done; } + /* + * If asked to handle sparse files, the source is a regular file, + * and the destination is seekable, find the first hole in the + * source. + */ + hole = -1; + if (Sflag && S_ISREG(fs->st_mode) && lseek(to_fd, 0, SEEK_SET) == 0) + hole = lseek(from_fd, 0, SEEK_HOLE); + rval = 0; if (!lflag && !sflag) { @@ -180,10 +309,12 @@ * hack, but it wins some CPU back. * Some filesystems, such as smbnetfs, don't support mmap, * so this is a best-effort attempt. + * Don't mmap if the file is sparse. */ #ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED if (S_ISREG(fs->st_mode) && fs->st_size > 0 && fs->st_size <= 8 * 1024 * 1024 && + hole == fs->st_size && (p = mmap(NULL, (size_t)fs->st_size, PROT_READ, MAP_SHARED, from_fd, (off_t)0)) != MAP_FAILED) { wtotal = 0; @@ -215,48 +346,10 @@ } else #endif { - if (buf == NULL) { - /* - * Note that buf and bufsize are static. If - * malloc() fails, it will fail at the start - * and not copy only some files. - */ - if (sysconf(_SC_PHYS_PAGES) > - PHYSPAGES_THRESHOLD) - bufsize = MIN(BUFSIZE_MAX, MAXPHYS * 8); - else - bufsize = BUFSIZE_SMALL; - buf = malloc(bufsize); - if (buf == NULL) - err(1, "Not enough memory"); - } - wtotal = 0; - do { - if (use_copy_file_range) { - wcount = copy_file_range(from_fd, NULL, - to_fd, NULL, SSIZE_MAX, 0); - if (wcount < 0 && errno == EINVAL) { - /* Prob a non-seekable FD */ - use_copy_file_range = 0; - } - } - if (!use_copy_file_range) { - wcount = copy_fallback(from_fd, to_fd, - buf, bufsize); - } - wtotal += wcount; - if (info) { - info = 0; - (void)fprintf(stderr, - "%s -> %s %3d%%\n", - entp->fts_path, to.p_path, - cp_pct(wtotal, fs->st_size)); - } - } while (wcount > 0); - if (wcount < 0) { - warn("%s", entp->fts_path); - rval = 1; - } + if (hole < 0) + rval = copy_linear(entp, from_fd, to_fd); + else + rval = copy_holes(entp, from_fd, to_fd); } } else if (lflag) { if (link(entp->fts_path, to.p_path)) { @@ -570,9 +663,9 @@ { (void)fprintf(stderr, "%s\n%s\n", - "usage: cp [-R [-H | -L | -P]] [-f | -i | -n] [-alpsvx] " + "usage: cp [-R [-H | -L | -P]] [-f | -i | -n] [-alpSsvx] " "source_file target_file", - " cp [-R [-H | -L | -P]] [-f | -i | -n] [-alpsvx] " + " cp [-R [-H | -L | -P]] [-f | -i | -n] [-alpSsvx] " "source_file ... " "target_directory"); exit(EX_USAGE);