Index: head/bin/pax/ar_io.c =================================================================== --- head/bin/pax/ar_io.c (revision 169925) +++ head/bin/pax/ar_io.c (revision 169926) @@ -1,1292 +1,1292 @@ /*- * Copyright (c) 1992 Keith Muller. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Keith Muller of the University of California, San Diego. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)ar_io.c 8.2 (Berkeley) 4/18/94"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pax.h" #include "options.h" #include "extern.h" /* * Routines which deal directly with the archive I/O device/file. */ #define DMOD 0666 /* default mode of created archives */ #define EXT_MODE O_RDONLY /* open mode for list/extract */ #define AR_MODE (O_WRONLY | O_CREAT | O_TRUNC) /* mode for archive */ #define APP_MODE O_RDWR /* mode for append */ static char none[] = ""; /* pseudo name for no file */ static char stdo[] = ""; /* pseudo name for stdout */ static char stdn[] = ""; /* pseudo name for stdin */ static int arfd = -1; /* archive file descriptor */ static int artyp = ISREG; /* archive type: file/FIFO/tape */ static int arvol = 1; /* archive volume number */ static int lstrval = -1; /* return value from last i/o */ static int io_ok; /* i/o worked on volume after resync */ static int did_io; /* did i/o ever occur on volume? */ static int done; /* set via tty termination */ static struct stat arsb; /* stat of archive device at open */ static int invld_rec; /* tape has out of spec record size */ static int wr_trail = 1; /* trailer was rewritten in append */ static int can_unlnk = 0; /* do we unlink null archives? */ const char *arcname; /* printable name of archive */ const char *gzip_program; /* name of gzip program */ static pid_t zpid = -1; /* pid of child process */ static int get_phys(void); extern sigset_t s_mask; static void ar_start_gzip(int, const char *, int); /* * ar_open() * Opens the next archive volume. Determines the type of the device and * sets up block sizes as required by the archive device and the format. * Note: we may be called with name == NULL on the first open only. * Return: * -1 on failure, 0 otherwise */ int ar_open(const char *name) { struct mtget mb; if (arfd != -1) (void)close(arfd); arfd = -1; can_unlnk = did_io = io_ok = invld_rec = 0; artyp = ISREG; flcnt = 0; /* * open based on overall operation mode */ switch (act) { case LIST: case EXTRACT: if (name == NULL) { arfd = STDIN_FILENO; arcname = stdn; } else if ((arfd = open(name, EXT_MODE, DMOD)) < 0) syswarn(0, errno, "Failed open to read on %s", name); if (arfd != -1 && gzip_program != NULL) ar_start_gzip(arfd, gzip_program, 0); break; case ARCHIVE: if (name == NULL) { arfd = STDOUT_FILENO; arcname = stdo; } else if ((arfd = open(name, AR_MODE, DMOD)) < 0) syswarn(0, errno, "Failed open to write on %s", name); else can_unlnk = 1; if (arfd != -1 && gzip_program != NULL) ar_start_gzip(arfd, gzip_program, 1); break; case APPND: if (name == NULL) { arfd = STDOUT_FILENO; arcname = stdo; } else if ((arfd = open(name, APP_MODE, DMOD)) < 0) syswarn(0, errno, "Failed open to read/write on %s", name); break; case COPY: /* * arfd not used in COPY mode */ arcname = none; lstrval = 1; return(0); } if (arfd < 0) return(-1); if (chdname != NULL) if (chdir(chdname) != 0) { syswarn(1, errno, "Failed chdir to %s", chdname); return(-1); } /* * set up is based on device type */ if (fstat(arfd, &arsb) < 0) { syswarn(0, errno, "Failed stat on %s", arcname); (void)close(arfd); arfd = -1; can_unlnk = 0; return(-1); } if (S_ISDIR(arsb.st_mode)) { paxwarn(0, "Cannot write an archive on top of a directory %s", arcname); (void)close(arfd); arfd = -1; can_unlnk = 0; return(-1); } if (S_ISCHR(arsb.st_mode)) artyp = ioctl(arfd, MTIOCGET, &mb) ? ISCHR : ISTAPE; else if (S_ISBLK(arsb.st_mode)) artyp = ISBLK; else if ((lseek(arfd, (off_t)0L, SEEK_CUR) == -1) && (errno == ESPIPE)) artyp = ISPIPE; else artyp = ISREG; /* * make sure we beyond any doubt that we only can unlink regular files * we created */ if (artyp != ISREG) can_unlnk = 0; /* * if we are writing, we are done */ if (act == ARCHIVE) { blksz = rdblksz = wrblksz; lstrval = 1; return(0); } /* * set default blksz on read. APPNDs writes rdblksz on the last volume * On all new archive volumes, we shift to wrblksz (if the user * specified one, otherwize we will continue to use rdblksz). We * must to set blocksize based on what kind of device the archive is * stored. */ switch(artyp) { case ISTAPE: /* * Tape drives come in at least two flavors. Those that support * variable sized records and those that have fixed sized * records. They must be treated differently. For tape drives * that support variable sized records, we must make large * reads to make sure we get the entire record, otherwise we * will just get the first part of the record (up to size we * asked). Tapes with fixed sized records may or may not return * multiple records in a single read. We really do not care * what the physical record size is UNLESS we are going to * append. (We will need the physical block size to rewrite * the trailer). Only when we are appending do we go to the * effort to figure out the true PHYSICAL record size. */ blksz = rdblksz = MAXBLK; break; case ISPIPE: case ISBLK: case ISCHR: /* * Blocksize is not a major issue with these devices (but must * be kept a multiple of 512). If the user specified a write * block size, we use that to read. Under append, we must * always keep blksz == rdblksz. Otherwise we go ahead and use * the device optimal blocksize as (and if) returned by stat * and if it is within pax specs. */ if ((act == APPND) && wrblksz) { blksz = rdblksz = wrblksz; break; } if ((arsb.st_blksize > 0) && (arsb.st_blksize < MAXBLK) && ((arsb.st_blksize % BLKMULT) == 0)) rdblksz = arsb.st_blksize; else rdblksz = DEVBLK; /* * For performance go for large reads when we can without harm */ if ((act == APPND) || (artyp == ISCHR)) blksz = rdblksz; else blksz = MAXBLK; break; case ISREG: /* * if the user specified wrblksz works, use it. Under appends * we must always keep blksz == rdblksz */ if ((act == APPND) && wrblksz && ((arsb.st_size%wrblksz)==0)){ blksz = rdblksz = wrblksz; break; } /* * See if we can find the blocking factor from the file size */ for (rdblksz = MAXBLK; rdblksz > 0; rdblksz -= BLKMULT) if ((arsb.st_size % rdblksz) == 0) break; /* * When we cannot find a match, we may have a flawed archive. */ if (rdblksz <= 0) rdblksz = FILEBLK; /* * for performance go for large reads when we can */ if (act == APPND) blksz = rdblksz; else blksz = MAXBLK; break; default: /* * should never happen, worse case, slow... */ blksz = rdblksz = BLKMULT; break; } lstrval = 1; return(0); } /* * ar_close() * closes archive device, increments volume number, and prints i/o summary */ void ar_close(void) { int status; if (arfd < 0) { did_io = io_ok = flcnt = 0; return; } /* * Close archive file. This may take a LONG while on tapes (we may be * forced to wait for the rewind to complete) so tell the user what is * going on (this avoids the user hitting control-c thinking pax is * broken). */ if (vflag && (artyp == ISTAPE)) { if (vfpart) (void)putc('\n', listf); (void)fprintf(listf, "%s: Waiting for tape drive close to complete...", argv0); (void)fflush(listf); } /* * if nothing was written to the archive (and we created it), we remove * it */ if (can_unlnk && (fstat(arfd, &arsb) == 0) && (S_ISREG(arsb.st_mode)) && (arsb.st_size == 0)) { (void)unlink(arcname); can_unlnk = 0; } /* * for a quick extract/list, pax frequently exits before the child * process is done */ if ((act == LIST || act == EXTRACT) && nflag && zpid > 0) kill(zpid, SIGINT); (void)close(arfd); /* Do not exit before child to ensure data integrity */ if (zpid > 0) waitpid(zpid, &status, 0); if (vflag && (artyp == ISTAPE)) { (void)fputs("done.\n", listf); vfpart = 0; (void)fflush(listf); } arfd = -1; if (!io_ok && !did_io) { flcnt = 0; return; } did_io = io_ok = 0; /* * The volume number is only increased when the last device has data * and we have already determined the archive format. */ if (frmt != NULL) ++arvol; if (!vflag) { flcnt = 0; return; } /* * Print out a summary of I/O for this archive volume. */ if (vfpart) { (void)putc('\n', listf); vfpart = 0; } /* * If we have not determined the format yet, we just say how many bytes * we have skipped over looking for a header to id. there is no way we * could have written anything yet. */ if (frmt == NULL) { # ifdef NET2_STAT (void)fprintf(listf, "%s: unknown format, %lu bytes skipped.\n", argv0, rdcnt); # else (void)fprintf(listf, "%s: unknown format, %ju bytes skipped.\n", argv0, (uintmax_t)rdcnt); # endif (void)fflush(listf); flcnt = 0; return; } if (strcmp(NM_CPIO, argv0) == 0) (void)fprintf(listf, "%llu blocks\n", (unsigned long long)((rdcnt ? rdcnt : wrcnt) / 5120)); else if (strcmp(NM_TAR, argv0) != 0) (void)fprintf(listf, # ifdef NET2_STAT "%s: %s vol %d, %lu files, %lu bytes read, %lu bytes written.\n", argv0, frmt->name, arvol-1, flcnt, rdcnt, wrcnt); # else "%s: %s vol %d, %ju files, %ju bytes read, %ju bytes written.\n", argv0, frmt->name, arvol-1, (uintmax_t)flcnt, (uintmax_t)rdcnt, (uintmax_t)wrcnt); # endif (void)fflush(listf); flcnt = 0; } /* * ar_drain() * drain any archive format independent padding from an archive read * from a socket or a pipe. This is to prevent the process on the * other side of the pipe from getting a SIGPIPE (pax will stop * reading an archive once a format dependent trailer is detected). */ void ar_drain(void) { int res; char drbuf[MAXBLK]; /* * we only drain from a pipe/socket. Other devices can be closed * without reading up to end of file. We sure hope that pipe is closed * on the other side so we will get an EOF. */ if ((artyp != ISPIPE) || (lstrval <= 0)) return; /* * keep reading until pipe is drained */ while ((res = read(arfd, drbuf, sizeof(drbuf))) > 0) ; lstrval = res; } /* * ar_set_wr() * Set up device right before switching from read to write in an append. * device dependent code (if required) to do this should be added here. * For all archive devices we are already positioned at the place we want * to start writing when this routine is called. * Return: * 0 if all ready to write, -1 otherwise */ int ar_set_wr(void) { off_t cpos; /* * we must make sure the trailer is rewritten on append, ar_next() * will stop us if the archive containing the trailer was not written */ wr_trail = 0; /* * Add any device dependent code as required here */ if (artyp != ISREG) return(0); /* * Ok we have an archive in a regular file. If we were rewriting a * file, we must get rid of all the stuff after the current offset * (it was not written by pax). */ if (((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) || (ftruncate(arfd, cpos) < 0)) { syswarn(1, errno, "Unable to truncate archive file"); return(-1); } return(0); } /* * ar_app_ok() * check if the last volume in the archive allows appends. We cannot check * this until we are ready to write since there is no spec that says all * volumes in a single archive have to be of the same type... * Return: * 0 if we can append, -1 otherwise. */ int ar_app_ok(void) { if (artyp == ISPIPE) { paxwarn(1, "Cannot append to an archive obtained from a pipe."); return(-1); } if (!invld_rec) return(0); paxwarn(1,"Cannot append, device record size %d does not support %s spec", rdblksz, argv0); return(-1); } /* * ar_read() * read up to a specified number of bytes from the archive into the * supplied buffer. When dealing with tapes we may not always be able to * read what we want. * Return: * Number of bytes in buffer. 0 for end of file, -1 for a read error. */ int ar_read(char *buf, int cnt) { int res = 0; /* * if last i/o was in error, no more reads until reset or new volume */ if (lstrval <= 0) return(lstrval); /* * how we read must be based on device type */ switch (artyp) { case ISTAPE: if ((res = read(arfd, buf, cnt)) > 0) { /* * CAUTION: tape systems may not always return the same * sized records so we leave blksz == MAXBLK. The * physical record size that a tape drive supports is * very hard to determine in a uniform and portable * manner. */ io_ok = 1; if (res != rdblksz) { /* * Record size changed. If this is happens on * any record after the first, we probably have * a tape drive which has a fixed record size * we are getting multiple records in a single * read). Watch out for record blocking that * violates pax spec (must be a multiple of * BLKMULT). */ rdblksz = res; if (rdblksz % BLKMULT) invld_rec = 1; } return(res); } break; case ISREG: case ISBLK: case ISCHR: case ISPIPE: default: /* * Files are so easy to deal with. These other things cannot * be trusted at all. So when we are dealing with character * devices and pipes we just take what they have ready for us * and return. Trying to do anything else with them runs the * risk of failure. */ if ((res = read(arfd, buf, cnt)) > 0) { io_ok = 1; return(res); } break; } /* * We are in trouble at this point, something is broken... */ lstrval = res; if (res < 0) syswarn(1, errno, "Failed read on archive volume %d", arvol); else paxwarn(0, "End of archive volume %d reached", arvol); return(res); } /* * ar_write() * Write a specified number of bytes in supplied buffer to the archive * device so it appears as a single "block". Deals with errors and tries * to recover when faced with short writes. * Return: * Number of bytes written. 0 indicates end of volume reached and with no * flaws (as best that can be detected). A -1 indicates an unrecoverable * error in the archive occured. */ int ar_write(char *buf, int bsz) { int res; off_t cpos; /* * do not allow pax to create a "bad" archive. Once a write fails on * an archive volume prevent further writes to it. */ if (lstrval <= 0) return(lstrval); if ((res = write(arfd, buf, bsz)) == bsz) { wr_trail = 1; io_ok = 1; return(bsz); } /* * write broke, see what we can do with it. We try to send any partial * writes that may violate pax spec to the next archive volume. */ if (res < 0) lstrval = res; else lstrval = 0; switch (artyp) { case ISREG: if ((res > 0) && (res % BLKMULT)) { /* * try to fix up partial writes which are not BLKMULT * in size by forcing the runt record to next archive * volume */ if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) break; cpos -= (off_t)res; if (ftruncate(arfd, cpos) < 0) break; res = lstrval = 0; break; } if (res >= 0) break; /* * if file is out of space, handle it like a return of 0 */ if ((errno == ENOSPC) || (errno == EFBIG) || (errno == EDQUOT)) res = lstrval = 0; break; case ISTAPE: case ISCHR: case ISBLK: if (res >= 0) break; if (errno == EACCES) { paxwarn(0, "Write failed, archive is write protected."); res = lstrval = 0; return(0); } /* * see if we reached the end of media, if so force a change to * the next volume */ if ((errno == ENOSPC) || (errno == EIO) || (errno == ENXIO)) res = lstrval = 0; break; case ISPIPE: default: /* * we cannot fix errors to these devices */ break; } /* * Better tell the user the bad news... * if this is a block aligned archive format, we may have a bad archive * if the format wants the header to start at a BLKMULT boundary. While * we can deal with the mis-aligned data, it violates spec and other * archive readers will likely fail. if the format is not block * aligned, the user may be lucky (and the archive is ok). */ if (res >= 0) { if (res > 0) wr_trail = 1; io_ok = 1; } /* * If we were trying to rewrite the trailer and it didn't work, we * must quit right away. */ if (!wr_trail && (res <= 0)) { paxwarn(1,"Unable to append, trailer re-write failed. Quitting."); return(res); } if (res == 0) paxwarn(0, "End of archive volume %d reached", arvol); else if (res < 0) syswarn(1, errno, "Failed write to archive volume: %d", arvol); else if (!frmt->blkalgn || ((res % frmt->blkalgn) == 0)) paxwarn(0,"WARNING: partial archive write. Archive MAY BE FLAWED"); else paxwarn(1,"WARNING: partial archive write. Archive IS FLAWED"); return(res); } /* * ar_rdsync() * Try to move past a bad spot on a flawed archive as needed to continue * I/O. Clears error flags to allow I/O to continue. * Return: * 0 when ok to try i/o again, -1 otherwise. */ int ar_rdsync(void) { long fsbz; off_t cpos; off_t mpos; struct mtop mb; /* * Fail resync attempts at user request (done) or this is going to be * an update/append to an existing archive. If last i/o hit media end, * we need to go to the next volume not try a resync. */ if ((done > 0) || (lstrval == 0)) return(-1); if ((act == APPND) || (act == ARCHIVE)) { paxwarn(1, "Cannot allow updates to an archive with flaws."); return(-1); } if (io_ok) did_io = 1; switch(artyp) { case ISTAPE: /* * if the last i/o was a successful data transfer, we assume * the fault is just a bad record on the tape that we are now * past. If we did not get any data since the last resync try * to move the tape forward one PHYSICAL record past any * damaged tape section. Some tape drives are stubborn and need * to be pushed. */ if (io_ok) { io_ok = 0; lstrval = 1; break; } mb.mt_op = MTFSR; mb.mt_count = 1; if (ioctl(arfd, MTIOCTOP, &mb) < 0) break; lstrval = 1; break; case ISREG: case ISCHR: case ISBLK: /* * try to step over the bad part of the device. */ io_ok = 0; if (((fsbz = arsb.st_blksize) <= 0) || (artyp != ISREG)) fsbz = BLKMULT; if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) break; mpos = fsbz - (cpos % (off_t)fsbz); if (lseek(arfd, mpos, SEEK_CUR) < 0) break; lstrval = 1; break; case ISPIPE: default: /* * cannot recover on these archive device types */ io_ok = 0; break; } if (lstrval <= 0) { paxwarn(1, "Unable to recover from an archive read failure."); return(-1); } paxwarn(0, "Attempting to recover from an archive read failure."); return(0); } /* * ar_fow() * Move the I/O position within the archive foward the specified number of * bytes as supported by the device. If we cannot move the requested * number of bytes, return the actual number of bytes moved in skipped. * Return: * 0 if moved the requested distance, -1 on complete failure, 1 on * partial move (the amount moved is in skipped) */ int ar_fow(off_t sksz, off_t *skipped) { off_t cpos; off_t mpos; *skipped = 0; if (sksz <= 0) return(0); /* * we cannot move foward at EOF or error */ if (lstrval <= 0) return(lstrval); /* * Safer to read forward on devices where it is hard to find the end of * the media without reading to it. With tapes we cannot be sure of the * number of physical blocks to skip (we do not know physical block * size at this point), so we must only read foward on tapes! */ if (artyp != ISREG) return(0); /* * figure out where we are in the archive */ if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) >= 0) { /* * we can be asked to move farther than there are bytes in this * volume, if so, just go to file end and let normal buf_fill() * deal with the end of file (it will go to next volume by * itself) */ if ((mpos = cpos + sksz) > arsb.st_size) { *skipped = arsb.st_size - cpos; mpos = arsb.st_size; } else *skipped = sksz; if (lseek(arfd, mpos, SEEK_SET) >= 0) return(0); } syswarn(1, errno, "Forward positioning operation on archive failed"); lstrval = -1; return(-1); } /* * ar_rev() * move the i/o position within the archive backwards the specified byte * count as supported by the device. With tapes drives we RESET rdblksz to * the PHYSICAL blocksize. * NOTE: We should only be called to move backwards so we can rewrite the * last records (the trailer) of an archive (APPEND). * Return: * 0 if moved the requested distance, -1 on complete failure */ int ar_rev(off_t sksz) { off_t cpos; struct mtop mb; int phyblk; /* * make sure we do not have try to reverse on a flawed archive */ if (lstrval < 0) return(lstrval); switch(artyp) { case ISPIPE: if (sksz <= 0) break; /* * cannot go backwards on these critters */ paxwarn(1, "Reverse positioning on pipes is not supported."); lstrval = -1; return(-1); case ISREG: case ISBLK: case ISCHR: default: if (sksz <= 0) break; /* * For things other than files, backwards movement has a very * high probability of failure as we really do not know the * true attributes of the device we are talking to (the device * may not even have the ability to lseek() in any direction). * First we figure out where we are in the archive. */ if ((cpos = lseek(arfd, (off_t)0L, SEEK_CUR)) < 0) { syswarn(1, errno, "Unable to obtain current archive byte offset"); lstrval = -1; return(-1); } /* * we may try to go backwards past the start when the archive * is only a single record. If this hapens and we are on a * multi volume archive, we need to go to the end of the * previous volume and continue our movement backwards from * there. */ if ((cpos -= sksz) < (off_t)0L) { if (arvol > 1) { /* * this should never happen */ paxwarn(1,"Reverse position on previous volume."); lstrval = -1; return(-1); } cpos = (off_t)0L; } if (lseek(arfd, cpos, SEEK_SET) < 0) { syswarn(1, errno, "Unable to seek archive backwards"); lstrval = -1; return(-1); } break; case ISTAPE: /* * Calculate and move the proper number of PHYSICAL tape * blocks. If the sksz is not an even multiple of the physical * tape size, we cannot do the move (this should never happen). * (We also cannot handler trailers spread over two vols). * get_phys() also makes sure we are in front of the filemark. */ if ((phyblk = get_phys()) <= 0) { lstrval = -1; return(-1); } /* * make sure future tape reads only go by physical tape block * size (set rdblksz to the real size). */ rdblksz = phyblk; /* * if no movement is required, just return (we must be after * get_phys() so the physical blocksize is properly set) */ if (sksz <= 0) break; /* * ok we have to move. Make sure the tape drive can do it. */ if (sksz % phyblk) { paxwarn(1, "Tape drive unable to backspace requested amount"); lstrval = -1; return(-1); } /* * move backwards the requested number of bytes */ mb.mt_op = MTBSR; mb.mt_count = sksz/phyblk; if (ioctl(arfd, MTIOCTOP, &mb) < 0) { syswarn(1,errno, "Unable to backspace tape %d blocks.", mb.mt_count); lstrval = -1; return(-1); } break; } lstrval = 1; return(0); } /* * get_phys() * Determine the physical block size on a tape drive. We need the physical * block size so we know how many bytes we skip over when we move with * mtio commands. We also make sure we are BEFORE THE TAPE FILEMARK when * return. * This is one really SLOW routine... * Return: * physical block size if ok (ok > 0), -1 otherwise */ static int get_phys(void) { int padsz = 0; int res; int phyblk; struct mtop mb; char scbuf[MAXBLK]; /* * move to the file mark, and then back up one record and read it. * this should tell us the physical record size the tape is using. */ if (lstrval == 1) { /* * we know we are at file mark when we get back a 0 from * read() */ while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) padsz += res; if (res < 0) { syswarn(1, errno, "Unable to locate tape filemark."); return(-1); } } /* * move backwards over the file mark so we are at the end of the * last record. */ mb.mt_op = MTBSF; mb.mt_count = 1; if (ioctl(arfd, MTIOCTOP, &mb) < 0) { syswarn(1, errno, "Unable to backspace over tape filemark."); return(-1); } /* * move backwards so we are in front of the last record and read it to * get physical tape blocksize. */ mb.mt_op = MTBSR; mb.mt_count = 1; if (ioctl(arfd, MTIOCTOP, &mb) < 0) { syswarn(1, errno, "Unable to backspace over last tape block."); return(-1); } if ((phyblk = read(arfd, scbuf, sizeof(scbuf))) <= 0) { syswarn(1, errno, "Cannot determine archive tape blocksize."); return(-1); } /* * read foward to the file mark, then back up in front of the filemark * (this is a bit paranoid, but should be safe to do). */ while ((res = read(arfd, scbuf, sizeof(scbuf))) > 0) ; if (res < 0) { syswarn(1, errno, "Unable to locate tape filemark."); return(-1); } mb.mt_op = MTBSF; mb.mt_count = 1; if (ioctl(arfd, MTIOCTOP, &mb) < 0) { syswarn(1, errno, "Unable to backspace over tape filemark."); return(-1); } /* * set lstrval so we know that the filemark has not been seen */ lstrval = 1; /* * return if there was no padding */ if (padsz == 0) return(phyblk); /* * make sure we can move backwards over the padding. (this should * never fail). */ if (padsz % phyblk) { paxwarn(1, "Tape drive unable to backspace requested amount"); return(-1); } /* * move backwards over the padding so the head is where it was when * we were first called (if required). */ mb.mt_op = MTBSR; mb.mt_count = padsz/phyblk; if (ioctl(arfd, MTIOCTOP, &mb) < 0) { syswarn(1,errno,"Unable to backspace tape over %d pad blocks", mb.mt_count); return(-1); } return(phyblk); } /* * ar_next() * prompts the user for the next volume in this archive. For some devices * we may allow the media to be changed. Otherwise a new archive is * prompted for. By pax spec, if there is no controlling tty or an eof is * read on tty input, we must quit pax. * Return: * 0 when ready to continue, -1 when all done */ int ar_next(void) { char buf[PAXPATHLEN+2]; static int freeit = 0; sigset_t o_mask; /* * WE MUST CLOSE THE DEVICE. A lot of devices must see last close, (so * things like writing EOF etc will be done) (Watch out ar_close() can * also be called via a signal handler, so we must prevent a race. */ if (sigprocmask(SIG_BLOCK, &s_mask, &o_mask) < 0) syswarn(0, errno, "Unable to set signal mask"); ar_close(); if (sigprocmask(SIG_SETMASK, &o_mask, NULL) < 0) syswarn(0, errno, "Unable to restore signal mask"); if (done || !wr_trail || strcmp(NM_TAR, argv0) == 0) return(-1); tty_prnt("\nATTENTION! %s archive volume change required.\n", argv0); /* * if i/o is on stdin or stdout, we cannot reopen it (we do not know * the name), the user will be forced to type it in. */ if (strcmp(arcname, stdo) && strcmp(arcname, stdn) && (artyp != ISREG) && (artyp != ISPIPE)) { if (artyp == ISTAPE) { tty_prnt("%s ready for archive tape volume: %d\n", arcname, arvol); tty_prnt("Load the NEXT TAPE on the tape drive"); } else { tty_prnt("%s ready for archive volume: %d\n", arcname, arvol); tty_prnt("Load the NEXT STORAGE MEDIA (if required)"); } if ((act == ARCHIVE) || (act == APPND)) tty_prnt(" and make sure it is WRITE ENABLED.\n"); else tty_prnt("\n"); for(;;) { tty_prnt("Type \"y\" to continue, \".\" to quit %s,", argv0); tty_prnt(" or \"s\" to switch to new device.\nIf you"); tty_prnt(" cannot change storage media, type \"s\"\n"); tty_prnt("Is the device ready and online? > "); if ((tty_read(buf,sizeof(buf))<0) || !strcmp(buf,".")){ done = 1; lstrval = -1; tty_prnt("Quitting %s!\n", argv0); vfpart = 0; return(-1); } if ((buf[0] == '\0') || (buf[1] != '\0')) { tty_prnt("%s unknown command, try again\n",buf); continue; } switch (buf[0]) { case 'y': case 'Y': /* * we are to continue with the same device */ if (ar_open(arcname) >= 0) return(0); tty_prnt("Cannot re-open %s, try again\n", arcname); continue; case 's': case 'S': /* * user wants to open a different device */ tty_prnt("Switching to a different archive\n"); break; default: tty_prnt("%s unknown command, try again\n",buf); continue; } break; } } else tty_prnt("Ready for archive volume: %d\n", arvol); /* * have to go to a different archive */ for (;;) { tty_prnt("Input archive name or \".\" to quit %s.\n", argv0); tty_prnt("Archive name > "); if ((tty_read(buf, sizeof(buf)) < 0) || !strcmp(buf, ".")) { done = 1; lstrval = -1; tty_prnt("Quitting %s!\n", argv0); vfpart = 0; return(-1); } if (buf[0] == '\0') { tty_prnt("Empty file name, try again\n"); continue; } if (!strcmp(buf, "..")) { tty_prnt("Illegal file name: .. try again\n"); continue; } if (strlen(buf) > PAXPATHLEN) { tty_prnt("File name too long, try again\n"); continue; } /* * try to open new archive */ if (ar_open(buf) >= 0) { if (freeit) { - (void)free((char *)(uintptr_t)arcname); + free((char *)(uintptr_t)arcname); freeit = 0; } if ((arcname = strdup(buf)) == NULL) { done = 1; lstrval = -1; paxwarn(0, "Cannot save archive name."); return(-1); } freeit = 1; break; } tty_prnt("Cannot open %s, try again\n", buf); continue; } return(0); } /* * ar_start_gzip() * starts the gzip compression/decompression process as a child, using magic * to keep the fd the same in the calling function (parent). */ void ar_start_gzip(int fd, const char *gzip_prog, int wr) { int fds[2]; const char *gzip_flags; if (pipe(fds) < 0) err(1, "could not pipe"); zpid = fork(); if (zpid < 0) err(1, "could not fork"); /* parent */ if (zpid) { if (wr) dup2(fds[1], fd); else dup2(fds[0], fd); close(fds[0]); close(fds[1]); } else { if (wr) { dup2(fds[0], STDIN_FILENO); dup2(fd, STDOUT_FILENO); gzip_flags = "-c"; } else { dup2(fds[1], STDOUT_FILENO); dup2(fd, STDIN_FILENO); gzip_flags = "-dc"; } close(fds[0]); close(fds[1]); if (execlp(gzip_prog, gzip_prog, gzip_flags, (char *)NULL) < 0) err(1, "could not exec"); /* NOTREACHED */ } } Index: head/bin/pax/file_subs.c =================================================================== --- head/bin/pax/file_subs.c (revision 169925) +++ head/bin/pax/file_subs.c (revision 169926) @@ -1,973 +1,973 @@ /*- * Copyright (c) 1992 Keith Muller. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Keith Muller of the University of California, San Diego. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)file_subs.c 8.1 (Berkeley) 5/31/93"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "pax.h" #include "options.h" #include "extern.h" static int mk_link(char *,struct stat *,char *, int); /* * routines that deal with file operations such as: creating, removing; * and setting access modes, uid/gid and times of files */ #define FILEBITS (S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO) #define SETBITS (S_ISUID | S_ISGID) #define ABITS (FILEBITS | SETBITS) /* * file_creat() * Create and open a file. * Return: * file descriptor or -1 for failure */ int file_creat(ARCHD *arcn) { int fd = -1; mode_t file_mode; int oerrno; /* * assume file doesn't exist, so just try to create it, most times this * works. We have to take special handling when the file does exist. To * detect this, we use O_EXCL. For example when trying to create a * file and a character device or fifo exists with the same name, we * can accidently open the device by mistake (or block waiting to open) * If we find that the open has failed, then figure spend the effort to * figure out why. This strategy was found to have better average * performance in common use than checking the file (and the path) * first with lstat. */ file_mode = arcn->sb.st_mode & FILEBITS; if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, file_mode)) >= 0) return(fd); /* * the file seems to exist. First we try to get rid of it (found to be * the second most common failure when traced). If this fails, only * then we go to the expense to check and create the path to the file */ if (unlnk_exist(arcn->name, arcn->type) != 0) return(-1); for (;;) { /* * try to open it again, if this fails, check all the nodes in * the path and give it a final try. if chk_path() finds that * it cannot fix anything, we will skip the last attempt */ if ((fd = open(arcn->name, O_WRONLY | O_CREAT | O_TRUNC, file_mode)) >= 0) break; oerrno = errno; if (nodirs || chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid) < 0) { syswarn(1, oerrno, "Unable to create %s", arcn->name); return(-1); } } return(fd); } /* * file_close() * Close file descriptor to a file just created by pax. Sets modes, * ownership and times as required. * Return: * 0 for success, -1 for failure */ void file_close(ARCHD *arcn, int fd) { int res = 0; if (fd < 0) return; if (close(fd) < 0) syswarn(0, errno, "Unable to close file descriptor on %s", arcn->name); /* * set owner/groups first as this may strip off mode bits we want * then set file permission modes. Then set file access and * modification times. */ if (pids) res = set_ids(arcn->name, arcn->sb.st_uid, arcn->sb.st_gid); /* * IMPORTANT SECURITY NOTE: * if not preserving mode or we cannot set uid/gid, then PROHIBIT * set uid/gid bits */ if (!pmode || res) arcn->sb.st_mode &= ~(SETBITS); if (pmode) set_pmode(arcn->name, arcn->sb.st_mode); if (patime || pmtime) set_ftime(arcn->name, arcn->sb.st_mtime, arcn->sb.st_atime, 0); } /* * lnk_creat() * Create a hard link to arcn->ln_name from arcn->name. arcn->ln_name * must exist; * Return: * 0 if ok, -1 otherwise */ int lnk_creat(ARCHD *arcn) { struct stat sb; /* * we may be running as root, so we have to be sure that link target * is not a directory, so we lstat and check */ if (lstat(arcn->ln_name, &sb) < 0) { syswarn(1,errno,"Unable to link to %s from %s", arcn->ln_name, arcn->name); return(-1); } if (S_ISDIR(sb.st_mode)) { paxwarn(1, "A hard link to the directory %s is not allowed", arcn->ln_name); return(-1); } return(mk_link(arcn->ln_name, &sb, arcn->name, 0)); } /* * cross_lnk() * Create a hard link to arcn->org_name from arcn->name. Only used in copy * with the -l flag. No warning or error if this does not succeed (we will * then just create the file) * Return: * 1 if copy() should try to create this file node * 0 if cross_lnk() ok, -1 for fatal flaw (like linking to self). */ int cross_lnk(ARCHD *arcn) { /* * try to make a link to original file (-l flag in copy mode). make sure * we do not try to link to directories in case we are running as root * (and it might succeed). */ if (arcn->type == PAX_DIR) return(1); return(mk_link(arcn->org_name, &(arcn->sb), arcn->name, 1)); } /* * chk_same() * In copy mode if we are not trying to make hard links between the src * and destinations, make sure we are not going to overwrite ourselves by * accident. This slows things down a little, but we have to protect all * those people who make typing errors. * Return: * 1 the target does not exist, go ahead and copy * 0 skip it file exists (-k) or may be the same as source file */ int chk_same(ARCHD *arcn) { struct stat sb; /* * if file does not exist, return. if file exists and -k, skip it * quietly */ if (lstat(arcn->name, &sb) < 0) return(1); if (kflag) return(0); /* * better make sure the user does not have src == dest by mistake */ if ((arcn->sb.st_dev == sb.st_dev) && (arcn->sb.st_ino == sb.st_ino)) { paxwarn(1, "Unable to copy %s, file would overwrite itself", arcn->name); return(0); } return(1); } /* * mk_link() * try to make a hard link between two files. if ign set, we do not * complain. * Return: * 0 if successful (or we are done with this file but no error, such as * finding the from file exists and the user has set -k). * 1 when ign was set to indicates we could not make the link but we * should try to copy/extract the file as that might work (and is an * allowed option). -1 an error occurred. */ static int mk_link(char *to, struct stat *to_sb, char *from, int ign) { struct stat sb; int oerrno; /* * if from file exists, it has to be unlinked to make the link. If the * file exists and -k is set, skip it quietly */ if (lstat(from, &sb) == 0) { if (kflag) return(0); /* * make sure it is not the same file, protect the user */ if ((to_sb->st_dev==sb.st_dev)&&(to_sb->st_ino == sb.st_ino)) { paxwarn(1, "Unable to link file %s to itself", to); - return(-1);; + return(-1); } /* * try to get rid of the file, based on the type */ if (S_ISDIR(sb.st_mode)) { if (rmdir(from) < 0) { syswarn(1, errno, "Unable to remove %s", from); return(-1); } } else if (unlink(from) < 0) { if (!ign) { syswarn(1, errno, "Unable to remove %s", from); return(-1); } return(1); } } /* * from file is gone (or did not exist), try to make the hard link. * if it fails, check the path and try it again (if chk_path() says to * try again) */ for (;;) { if (link(to, from) == 0) break; oerrno = errno; if (!nodirs && chk_path(from, to_sb->st_uid, to_sb->st_gid) == 0) continue; if (!ign) { syswarn(1, oerrno, "Could not link to %s from %s", to, from); return(-1); } return(1); } /* * all right the link was made */ return(0); } /* * node_creat() * create an entry in the file system (other than a file or hard link). * If successful, sets uid/gid modes and times as required. * Return: * 0 if ok, -1 otherwise */ int node_creat(ARCHD *arcn) { int res; int ign = 0; int oerrno; int pass = 0; mode_t file_mode; struct stat sb; /* * create node based on type, if that fails try to unlink the node and * try again. finally check the path and try again. As noted in the * file and link creation routines, this method seems to exhibit the * best performance in general use workloads. */ file_mode = arcn->sb.st_mode & FILEBITS; for (;;) { switch(arcn->type) { case PAX_DIR: res = mkdir(arcn->name, file_mode); if (ign) res = 0; break; case PAX_CHR: file_mode |= S_IFCHR; res = mknod(arcn->name, file_mode, arcn->sb.st_rdev); break; case PAX_BLK: file_mode |= S_IFBLK; res = mknod(arcn->name, file_mode, arcn->sb.st_rdev); break; case PAX_FIF: res = mkfifo(arcn->name, file_mode); break; case PAX_SCK: /* * Skip sockets, operation has no meaning under BSD */ paxwarn(0, "%s skipped. Sockets cannot be copied or extracted", arcn->name); return(-1); case PAX_SLK: res = symlink(arcn->ln_name, arcn->name); break; case PAX_CTG: case PAX_HLK: case PAX_HRG: case PAX_REG: default: /* * we should never get here */ paxwarn(0, "%s has an unknown file type, skipping", arcn->name); return(-1); } /* * if we were able to create the node break out of the loop, * otherwise try to unlink the node and try again. if that * fails check the full path and try a final time. */ if (res == 0) break; /* * we failed to make the node */ oerrno = errno; if ((ign = unlnk_exist(arcn->name, arcn->type)) < 0) return(-1); if (++pass <= 1) continue; if (nodirs || chk_path(arcn->name,arcn->sb.st_uid,arcn->sb.st_gid) < 0) { syswarn(1, oerrno, "Could not create: %s", arcn->name); return(-1); } } /* * we were able to create the node. set uid/gid, modes and times */ if (pids) res = ((arcn->type == PAX_SLK) ? set_lids(arcn->name, arcn->sb.st_uid, arcn->sb.st_gid) : set_ids(arcn->name, arcn->sb.st_uid, arcn->sb.st_gid)); else res = 0; /* * symlinks are done now. */ if (arcn->type == PAX_SLK) return(0); /* * IMPORTANT SECURITY NOTE: * if not preserving mode or we cannot set uid/gid, then PROHIBIT any * set uid/gid bits */ if (!pmode || res) arcn->sb.st_mode &= ~(SETBITS); if (pmode) set_pmode(arcn->name, arcn->sb.st_mode); if (arcn->type == PAX_DIR && strcmp(NM_CPIO, argv0) != 0) { /* * Dirs must be processed again at end of extract to set times * and modes to agree with those stored in the archive. However * to allow extract to continue, we may have to also set owner * rights. This allows nodes in the archive that are children * of this directory to be extracted without failure. Both time * and modes will be fixed after the entire archive is read and * before pax exits. */ if (access(arcn->name, R_OK | W_OK | X_OK) < 0) { if (lstat(arcn->name, &sb) < 0) { syswarn(0, errno,"Could not access %s (stat)", arcn->name); set_pmode(arcn->name,file_mode | S_IRWXU); } else { /* * We have to add rights to the dir, so we make * sure to restore the mode. The mode must be * restored AS CREATED and not as stored if * pmode is not set. */ set_pmode(arcn->name, ((sb.st_mode & FILEBITS) | S_IRWXU)); if (!pmode) arcn->sb.st_mode = sb.st_mode; } /* * we have to force the mode to what was set here, * since we changed it from the default as created. */ add_dir(arcn->name, arcn->nlen, &(arcn->sb), 1); } else if (pmode || patime || pmtime) add_dir(arcn->name, arcn->nlen, &(arcn->sb), 0); } if (patime || pmtime) set_ftime(arcn->name, arcn->sb.st_mtime, arcn->sb.st_atime, 0); return(0); } /* * unlnk_exist() * Remove node from file system with the specified name. We pass the type * of the node that is going to replace it. When we try to create a * directory and find that it already exists, we allow processing to * continue as proper modes etc will always be set for it later on. * Return: * 0 is ok to proceed, no file with the specified name exists * -1 we were unable to remove the node, or we should not remove it (-k) * 1 we found a directory and we were going to create a directory. */ int unlnk_exist(char *name, int type) { struct stat sb; /* * the file does not exist, or -k we are done */ if (lstat(name, &sb) < 0) return(0); if (kflag) return(-1); if (S_ISDIR(sb.st_mode)) { /* * try to remove a directory, if it fails and we were going to * create a directory anyway, tell the caller (return a 1) */ if (rmdir(name) < 0) { if (type == PAX_DIR) return(1); syswarn(1,errno,"Unable to remove directory %s", name); return(-1); } return(0); } /* * try to get rid of all non-directory type nodes */ if (unlink(name) < 0) { syswarn(1, errno, "Could not unlink %s", name); return(-1); } return(0); } /* * chk_path() * We were trying to create some kind of node in the file system and it * failed. chk_path() makes sure the path up to the node exists and is * writeable. When we have to create a directory that is missing along the * path somewhere, the directory we create will be set to the same * uid/gid as the file has (when uid and gid are being preserved). * NOTE: this routine is a real performance loss. It is only used as a * last resort when trying to create entries in the file system. * Return: * -1 when it could find nothing it is allowed to fix. * 0 otherwise */ int chk_path( char *name, uid_t st_uid, gid_t st_gid) { char *spt = name; struct stat sb; int retval = -1; /* * watch out for paths with nodes stored directly in / (e.g. /bozo) */ if (*spt == '/') ++spt; for(;;) { /* * work foward from the first / and check each part of the path */ spt = strchr(spt, '/'); if (spt == NULL) break; *spt = '\0'; /* * if it exists we assume it is a directory, it is not within * the spec (at least it seems to read that way) to alter the * file system for nodes NOT EXPLICITLY stored on the archive. * If that assumption is changed, you would test the node here * and figure out how to get rid of it (probably like some * recursive unlink()) or fix up the directory permissions if * required (do an access()). */ if (lstat(name, &sb) == 0) { *(spt++) = '/'; continue; } /* * the path fails at this point, see if we can create the * needed directory and continue on */ if (mkdir(name, S_IRWXU | S_IRWXG | S_IRWXO) < 0) { *spt = '/'; retval = -1; break; } /* * we were able to create the directory. We will tell the * caller that we found something to fix, and it is ok to try * and create the node again. */ retval = 0; if (pids) (void)set_ids(name, st_uid, st_gid); /* * make sure the user doen't have some strange umask that * causes this newly created directory to be unusable. We fix * the modes and restore them back to the creation default at * the end of pax */ if ((access(name, R_OK | W_OK | X_OK) < 0) && (lstat(name, &sb) == 0)) { set_pmode(name, ((sb.st_mode & FILEBITS) | S_IRWXU)); add_dir(name, spt - name, &sb, 1); } *(spt++) = '/'; continue; } return(retval); } /* * set_ftime() * Set the access time and modification time for a named file. If frc is * non-zero we force these times to be set even if the user did not * request access and/or modification time preservation (this is also * used by -t to reset access times). * When ign is zero, only those times the user has asked for are set, the * other ones are left alone. We do not assume the un-documented feature * of many utimes() implementations that consider a 0 time value as a do * not set request. */ void set_ftime(char *fnm, time_t mtime, time_t atime, int frc) { static struct timeval tv[2] = {{0L, 0L}, {0L, 0L}}; struct stat sb; tv[0].tv_sec = atime; tv[1].tv_sec = mtime; if (!frc && (!patime || !pmtime)) { /* * if we are not forcing, only set those times the user wants * set. We get the current values of the times if we need them. */ if (lstat(fnm, &sb) == 0) { if (!patime) tv[0].tv_sec = sb.st_atime; if (!pmtime) tv[1].tv_sec = sb.st_mtime; } else syswarn(0,errno,"Unable to obtain file stats %s", fnm); } /* * set the times */ if (utimes(fnm, tv) < 0) syswarn(1, errno, "Access/modification time set failed on: %s", fnm); return; } /* * set_ids() * set the uid and gid of a file system node * Return: * 0 when set, -1 on failure */ int set_ids(char *fnm, uid_t uid, gid_t gid) { if (chown(fnm, uid, gid) < 0) { /* * ignore EPERM unless in verbose mode or being run by root. * if running as pax, POSIX requires a warning. */ if (strcmp(NM_PAX, argv0) == 0 || errno != EPERM || vflag || geteuid() == 0) syswarn(1, errno, "Unable to set file uid/gid of %s", fnm); return(-1); } return(0); } /* * set_lids() * set the uid and gid of a file system node * Return: * 0 when set, -1 on failure */ int set_lids(char *fnm, uid_t uid, gid_t gid) { if (lchown(fnm, uid, gid) < 0) { /* * ignore EPERM unless in verbose mode or being run by root. * if running as pax, POSIX requires a warning. */ if (strcmp(NM_PAX, argv0) == 0 || errno != EPERM || vflag || geteuid() == 0) syswarn(1, errno, "Unable to set file uid/gid of %s", fnm); return(-1); } return(0); } /* * set_pmode() * Set file access mode */ void set_pmode(char *fnm, mode_t mode) { mode &= ABITS; if (chmod(fnm, mode) < 0) syswarn(1, errno, "Could not set permissions on %s", fnm); return; } /* * file_write() * Write/copy a file (during copy or archive extract). This routine knows * how to copy files with lseek holes in it. (Which are read as file * blocks containing all 0's but do not have any file blocks associated * with the data). Typical examples of these are files created by dbm * variants (.pag files). While the file size of these files are huge, the * actual storage is quite small (the files are sparse). The problem is * the holes read as all zeros so are probably stored on the archive that * way (there is no way to determine if the file block is really a hole, * we only know that a file block of all zero's can be a hole). * At this writing, no major archive format knows how to archive files * with holes. However, on extraction (or during copy, -rw) we have to * deal with these files. Without detecting the holes, the files can * consume a lot of file space if just written to disk. This replacement * for write when passed the basic allocation size of a file system block, * uses lseek whenever it detects the input data is all 0 within that * file block. In more detail, the strategy is as follows: * While the input is all zero keep doing an lseek. Keep track of when we * pass over file block boundries. Only write when we hit a non zero * input. once we have written a file block, we continue to write it to * the end (we stop looking at the input). When we reach the start of the * next file block, start checking for zero blocks again. Working on file * block boundries significantly reduces the overhead when copying files * that are NOT very sparse. This overhead (when compared to a write) is * almost below the measurement resolution on many systems. Without it, * files with holes cannot be safely copied. It does has a side effect as * it can put holes into files that did not have them before, but that is * not a problem since the file contents are unchanged (in fact it saves * file space). (Except on paging files for diskless clients. But since we * cannot determine one of those file from here, we ignore them). If this * ever ends up on a system where CTG files are supported and the holes * are not desired, just do a conditional test in those routines that * call file_write() and have it call write() instead. BEFORE CLOSING THE * FILE, make sure to call file_flush() when the last write finishes with * an empty block. A lot of file systems will not create an lseek hole at * the end. In this case we drop a single 0 at the end to force the * trailing 0's in the file. * ---Parameters--- * rem: how many bytes left in this file system block * isempt: have we written to the file block yet (is it empty) * sz: basic file block allocation size * cnt: number of bytes on this write * str: buffer to write * Return: * number of bytes written, -1 on write (or lseek) error. */ int file_write(int fd, char *str, int cnt, int *rem, int *isempt, int sz, char *name) { char *pt; char *end; int wcnt; char *st = str; /* * while we have data to process */ while (cnt) { if (!*rem) { /* * We are now at the start of file system block again * (or what we think one is...). start looking for * empty blocks again */ *isempt = 1; *rem = sz; } /* * only examine up to the end of the current file block or * remaining characters to write, whatever is smaller */ wcnt = MIN(cnt, *rem); cnt -= wcnt; *rem -= wcnt; if (*isempt) { /* * have not written to this block yet, so we keep * looking for zero's */ pt = st; end = st + wcnt; /* * look for a zero filled buffer */ while ((pt < end) && (*pt == '\0')) ++pt; if (pt == end) { /* * skip, buf is empty so far */ if (lseek(fd, (off_t)wcnt, SEEK_CUR) < 0) { syswarn(1,errno,"File seek on %s", name); return(-1); } st = pt; continue; } /* * drat, the buf is not zero filled */ *isempt = 0; } /* * have non-zero data in this file system block, have to write */ if (write(fd, st, wcnt) != wcnt) { syswarn(1, errno, "Failed write to file %s", name); return(-1); } st += wcnt; } return(st - str); } /* * file_flush() * when the last file block in a file is zero, many file systems will not * let us create a hole at the end. To get the last block with zeros, we * write the last BYTE with a zero (back up one byte and write a zero). */ void file_flush(int fd, char *fname, int isempt) { static char blnk[] = "\0"; /* * silly test, but make sure we are only called when the last block is * filled with all zeros. */ if (!isempt) return; /* * move back one byte and write a zero */ if (lseek(fd, (off_t)-1, SEEK_CUR) < 0) { syswarn(1, errno, "Failed seek on file %s", fname); return; } if (write(fd, blnk, 1) < 0) syswarn(1, errno, "Failed write to file %s", fname); return; } /* * rdfile_close() * close a file we have beed reading (to copy or archive). If we have to * reset access time (tflag) do so (the times are stored in arcn). */ void rdfile_close(ARCHD *arcn, int *fd) { /* * make sure the file is open */ if (*fd < 0) return; (void)close(*fd); *fd = -1; if (!tflag) return; /* * user wants last access time reset */ set_ftime(arcn->org_name, arcn->sb.st_mtime, arcn->sb.st_atime, 1); return; } /* * set_crc() * read a file to calculate its crc. This is a real drag. Archive formats * that have this, end up reading the file twice (we have to write the * header WITH the crc before writing the file contents. Oh well... * Return: * 0 if was able to calculate the crc, -1 otherwise */ int set_crc(ARCHD *arcn, int fd) { int i; int res; off_t cpcnt = 0L; u_long size; unsigned long crc = 0L; char tbuf[FILEBLK]; struct stat sb; if (fd < 0) { /* * hmm, no fd, should never happen. well no crc then. */ arcn->crc = 0L; return(0); } if ((size = (u_long)arcn->sb.st_blksize) > (u_long)sizeof(tbuf)) size = (u_long)sizeof(tbuf); /* * read all the bytes we think that there are in the file. If the user * is trying to archive an active file, forget this file. */ for(;;) { if ((res = read(fd, tbuf, size)) <= 0) break; cpcnt += res; for (i = 0; i < res; ++i) crc += (tbuf[i] & 0xff); } /* * safety check. we want to avoid archiving files that are active as * they can create inconsistant archive copies. */ if (cpcnt != arcn->sb.st_size) paxwarn(1, "File changed size %s", arcn->org_name); else if (fstat(fd, &sb) < 0) syswarn(1, errno, "Failed stat on %s", arcn->org_name); else if (arcn->sb.st_mtime != sb.st_mtime) paxwarn(1, "File %s was modified during read", arcn->org_name); else if (lseek(fd, (off_t)0L, SEEK_SET) < 0) syswarn(1, errno, "File rewind failed on: %s", arcn->org_name); else { arcn->crc = crc; return(0); } return(-1); } Index: head/bin/pax/pat_rep.c =================================================================== --- head/bin/pax/pat_rep.c (revision 169925) +++ head/bin/pax/pat_rep.c (revision 169926) @@ -1,1130 +1,1130 @@ /*- * Copyright (c) 1992 Keith Muller. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Keith Muller of the University of California, San Diego. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #ifdef NET2_REGEX #include #else #include #endif #include "pax.h" #include "pat_rep.h" #include "extern.h" /* * routines to handle pattern matching, name modification (regular expression * substitution and interactive renames), and destination name modification for * copy (-rw). Both file name and link names are adjusted as required in these * routines. */ #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ static PATTERN *pathead = NULL; /* file pattern match list head */ static PATTERN *pattail = NULL; /* file pattern match list tail */ static REPLACE *rephead = NULL; /* replacement string list head */ static REPLACE *reptail = NULL; /* replacement string list tail */ static int rep_name(char *, int *, int); static int tty_rename(ARCHD *); static int fix_path(char *, int *, char *, int); static int fn_match(char *, char *, char **); static char * range_match(char *, int); #ifdef NET2_REGEX static int resub(regexp *, char *, char *, char *); #else static int resub(regex_t *, regmatch_t *, char *, char *, char *); #endif /* * rep_add() * parses the -s replacement string; compiles the regular expression * and stores the compiled value and it's replacement string together in * replacement string list. Input to this function is of the form: * /old/new/pg * The first char in the string specifies the delimiter used by this * replacement string. "Old" is a regular expression in "ed" format which * is compiled by regcomp() and is applied to filenames. "new" is the * substitution string; p and g are options flags for printing and global * replacement (over the single filename) * Return: * 0 if a proper replacement string and regular expression was added to * the list of replacement patterns; -1 otherwise. */ int rep_add(char *str) { char *pt1; char *pt2; REPLACE *rep; # ifndef NET2_REGEX int res; char rebuf[BUFSIZ]; # endif /* * throw out the bad parameters */ if ((str == NULL) || (*str == '\0')) { paxwarn(1, "Empty replacement string"); return(-1); } /* * first character in the string specifies what the delimiter is for * this expression */ if ((pt1 = strchr(str+1, *str)) == NULL) { paxwarn(1, "Invalid replacement string %s", str); return(-1); } /* * allocate space for the node that handles this replacement pattern * and split out the regular expression and try to compile it */ if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { paxwarn(1, "Unable to allocate memory for replacement string"); return(-1); } *pt1 = '\0'; # ifdef NET2_REGEX if ((rep->rcmp = regcomp(str+1)) == NULL) { # else if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); paxwarn(1, "%s while compiling regular expression %s", rebuf, str); # endif - (void)free((char *)rep); + free((char *)rep); return(-1); } /* * put the delimiter back in case we need an error message and * locate the delimiter at the end of the replacement string * we then point the node at the new substitution string */ *pt1++ = *str; if ((pt2 = strchr(pt1, *str)) == NULL) { # ifdef NET2_REGEX - (void)free((char *)rep->rcmp); + free((char *)rep->rcmp); # else regfree(&(rep->rcmp)); # endif - (void)free((char *)rep); + free((char *)rep); paxwarn(1, "Invalid replacement string %s", str); return(-1); } *pt2 = '\0'; rep->nstr = pt1; pt1 = pt2++; rep->flgs = 0; /* * set the options if any */ while (*pt2 != '\0') { switch(*pt2) { case 'g': case 'G': rep->flgs |= GLOB; break; case 'p': case 'P': rep->flgs |= PRNT; break; default: # ifdef NET2_REGEX - (void)free((char *)rep->rcmp); + free((char *)rep->rcmp); # else regfree(&(rep->rcmp)); # endif - (void)free((char *)rep); + free((char *)rep); *pt1 = *str; paxwarn(1, "Invalid replacement string option %s", str); return(-1); } ++pt2; } /* * all done, link it in at the end */ rep->fow = NULL; if (rephead == NULL) { reptail = rephead = rep; return(0); } reptail->fow = rep; reptail = rep; return(0); } /* * pat_add() * add a pattern match to the pattern match list. Pattern matches are used * to select which archive members are extracted. (They appear as * arguments to pax in the list and read modes). If no patterns are * supplied to pax, all members in the archive will be selected (and the * pattern match list is empty). * Return: * 0 if the pattern was added to the list, -1 otherwise */ int pat_add(char *str, char *chdnam) { PATTERN *pt; /* * throw out the junk */ if ((str == NULL) || (*str == '\0')) { paxwarn(1, "Empty pattern string"); return(-1); } /* * allocate space for the pattern and store the pattern. the pattern is * part of argv so do not bother to copy it, just point at it. Add the * node to the end of the pattern list */ if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { paxwarn(1, "Unable to allocate memory for pattern string"); return(-1); } pt->pstr = str; pt->pend = NULL; pt->plen = strlen(str); pt->fow = NULL; pt->flgs = 0; pt->chdname = chdnam; if (pathead == NULL) { pattail = pathead = pt; return(0); } pattail->fow = pt; pattail = pt; return(0); } /* * pat_chk() * complain if any the user supplied pattern did not result in a match to * a selected archive member. */ void pat_chk(void) { PATTERN *pt; int wban = 0; /* * walk down the list checking the flags to make sure MTCH was set, * if not complain */ for (pt = pathead; pt != NULL; pt = pt->fow) { if (pt->flgs & MTCH) continue; if (!wban) { paxwarn(1, "WARNING! These patterns were not matched:"); ++wban; } (void)fprintf(stderr, "%s\n", pt->pstr); } } /* * pat_sel() * the archive member which matches a pattern was selected. Mark the * pattern as having selected an archive member. arcn->pat points at the * pattern that was matched. arcn->pat is set in pat_match() * * NOTE: When the -c option is used, we are called when there was no match * by pat_match() (that means we did match before the inverted sense of * the logic). Now this seems really strange at first, but with -c we * need to keep track of those patterns that cause an archive member to NOT * be selected (it found an archive member with a specified pattern) * Return: * 0 if the pattern pointed at by arcn->pat was tagged as creating a * match, -1 otherwise. */ int pat_sel(ARCHD *arcn) { PATTERN *pt; PATTERN **ppt; int len; /* * if no patterns just return */ if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) return(0); /* * when we are NOT limited to a single match per pattern mark the * pattern and return */ if (!nflag) { pt->flgs |= MTCH; return(0); } /* * we reach this point only when we allow a single selected match per * pattern, if the pattern matches a directory and we do not have -d * (dflag) we are done with this pattern. We may also be handed a file * in the subtree of a directory. in that case when we are operating * with -d, this pattern was already selected and we are done */ if (pt->flgs & DIR_MTCH) return(0); if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { /* * ok we matched a directory and we are allowing * subtree matches but because of the -n only its children will * match. This is tagged as a DIR_MTCH type. * WATCH IT, the code assumes that pt->pend points * into arcn->name and arcn->name has not been modified. * If not we will have a big mess. Yup this is another kludge */ /* * if this was a prefix match, remove trailing part of path * so we can copy it. Future matches will be exact prefix match */ if (pt->pend != NULL) *pt->pend = '\0'; if ((pt->pstr = strdup(arcn->name)) == NULL) { paxwarn(1, "Pattern select out of memory"); if (pt->pend != NULL) *pt->pend = '/'; pt->pend = NULL; return(-1); } /* * put the trailing / back in the source string */ if (pt->pend != NULL) { *pt->pend = '/'; pt->pend = NULL; } pt->plen = strlen(pt->pstr); /* * strip off any trailing /, this should really never happen */ len = pt->plen - 1; if (*(pt->pstr + len) == '/') { *(pt->pstr + len) = '\0'; pt->plen = len; } pt->flgs = DIR_MTCH | MTCH; arcn->pat = pt; return(0); } /* * we are then done with this pattern, so we delete it from the list * because it can never be used for another match. * Seems kind of strange to do for a -c, but the pax spec is really * vague on the interaction of -c -n and -d. We assume that when -c * and the pattern rejects a member (i.e. it matched it) it is done. * In effect we place the order of the flags as having -c last. */ pt = pathead; ppt = &pathead; while ((pt != NULL) && (pt != arcn->pat)) { ppt = &(pt->fow); pt = pt->fow; } if (pt == NULL) { /* * should never happen.... */ paxwarn(1, "Pattern list inconsistant"); return(-1); } *ppt = pt->fow; - (void)free((char *)pt); + free((char *)pt); arcn->pat = NULL; return(0); } /* * pat_match() * see if this archive member matches any supplied pattern, if a match * is found, arcn->pat is set to point at the potential pattern. Later if * this archive member is "selected" we process and mark the pattern as * one which matched a selected archive member (see pat_sel()) * Return: * 0 if this archive member should be processed, 1 if it should be * skipped and -1 if we are done with all patterns (and pax should quit * looking for more members) */ int pat_match(ARCHD *arcn) { PATTERN *pt; arcn->pat = NULL; /* * if there are no more patterns and we have -n (and not -c) we are * done. otherwise with no patterns to match, matches all */ if (pathead == NULL) { if (nflag && !cflag) return(-1); return(0); } /* * have to search down the list one at a time looking for a match. */ pt = pathead; while (pt != NULL) { /* * check for a file name match unless we have DIR_MTCH set in * this pattern then we want a prefix match */ if (pt->flgs & DIR_MTCH) { /* * this pattern was matched before to a directory * as we must have -n set for this (but not -d). We can * only match CHILDREN of that directory so we must use * an exact prefix match (no wildcards). */ if ((arcn->name[pt->plen] == '/') && (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) break; } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) break; pt = pt->fow; } /* * return the result, remember that cflag (-c) inverts the sense of a * match */ if (pt == NULL) return(cflag ? 0 : 1); /* * We had a match, now when we invert the sense (-c) we reject this * member. However we have to tag the pattern a being successful, (in a * match, not in selecting an archive member) so we call pat_sel() here. */ arcn->pat = pt; if (!cflag) return(0); if (pat_sel(arcn) < 0) return(-1); arcn->pat = NULL; return(1); } /* * fn_match() * Return: * 0 if this archive member should be processed, 1 if it should be * skipped and -1 if we are done with all patterns (and pax should quit * looking for more members) * Note: *pend may be changed to show where the prefix ends. */ static int fn_match(char *pattern, char *string, char **pend) { char c; char test; *pend = NULL; for (;;) { switch (c = *pattern++) { case '\0': /* * Ok we found an exact match */ if (*string == '\0') return(0); /* * Check if it is a prefix match */ if ((dflag == 1) || (*string != '/')) return(-1); /* * It is a prefix match, remember where the trailing * / is located */ *pend = string; return(0); case '?': if ((test = *string++) == '\0') return (-1); break; case '*': c = *pattern; /* * Collapse multiple *'s. */ while (c == '*') c = *++pattern; /* * Optimized hack for pattern with a * at the end */ if (c == '\0') return (0); /* * General case, use recursion. */ while ((test = *string) != '\0') { if (!fn_match(pattern, string, pend)) return (0); ++string; } return (-1); case '[': /* * range match */ if (((test = *string++) == '\0') || ((pattern = range_match(pattern, test)) == NULL)) return (-1); break; case '\\': default: if (c != *string++) return (-1); break; } } /* NOTREACHED */ } static char * range_match(char *pattern, int test) { char c; char c2; int negate; int ok = 0; if ((negate = (*pattern == '!')) != 0) ++pattern; while ((c = *pattern++) != ']') { /* * Illegal pattern */ if (c == '\0') return (NULL); if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && (c2 != ']')) { if ((c <= test) && (test <= c2)) ok = 1; pattern += 2; } else if (c == test) ok = 1; } return (ok == negate ? NULL : pattern); } /* * mod_name() * modify a selected file name. first attempt to apply replacement string * expressions, then apply interactive file rename. We apply replacement * string expressions to both filenames and file links (if we didn't the * links would point to the wrong place, and we could never be able to * move an archive that has a file link in it). When we rename files * interactively, we store that mapping (old name to user input name) so * if we spot any file links to the old file name in the future, we will * know exactly how to fix the file link. * Return: * 0 continue to process file, 1 skip this file, -1 pax is finished */ int mod_name(ARCHD *arcn) { int res = 0; /* * Strip off leading '/' if appropriate. * Currently, this option is only set for the tar format. */ if (rmleadslash && arcn->name[0] == '/') { if (arcn->name[1] == '\0') { arcn->name[0] = '.'; } else { (void)memmove(arcn->name, &arcn->name[1], strlen(arcn->name)); arcn->nlen--; } if (rmleadslash < 2) { rmleadslash = 2; paxwarn(0, "Removing leading / from absolute path names in the archive"); } } if (rmleadslash && arcn->ln_name[0] == '/' && (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) { if (arcn->ln_name[1] == '\0') { arcn->ln_name[0] = '.'; } else { (void)memmove(arcn->ln_name, &arcn->ln_name[1], strlen(arcn->ln_name)); arcn->ln_nlen--; } if (rmleadslash < 2) { rmleadslash = 2; paxwarn(0, "Removing leading / from absolute path names in the archive"); } } /* * IMPORTANT: We have a problem. what do we do with symlinks? * Modifying a hard link name makes sense, as we know the file it * points at should have been seen already in the archive (and if it * wasn't seen because of a read error or a bad archive, we lose * anyway). But there are no such requirements for symlinks. On one * hand the symlink that refers to a file in the archive will have to * be modified to so it will still work at its new location in the * file system. On the other hand a symlink that points elsewhere (and * should continue to do so) should not be modified. There is clearly * no perfect solution here. So we handle them like hardlinks. Clearly * a replacement made by the interactive rename mapping is very likely * to be correct since it applies to a single file and is an exact * match. The regular expression replacements are a little harder to * justify though. We claim that the symlink name is only likely * to be replaced when it points within the file tree being moved and * in that case it should be modified. what we really need to do is to * call an oracle here. :) */ if (rephead != NULL) { /* * we have replacement strings, modify the name and the link * name if any. */ if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0) return(res); if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) && ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0)) return(res); } if (iflag) { /* * perform interactive file rename, then map the link if any */ if ((res = tty_rename(arcn)) != 0) return(res); if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name)); } return(res); } /* * tty_rename() * Prompt the user for a replacement file name. A "." keeps the old name, * a empty line skips the file, and an EOF on reading the tty, will cause * pax to stop processing and exit. Otherwise the file name input, replaces * the old one. * Return: * 0 process this file, 1 skip this file, -1 we need to exit pax */ static int tty_rename(ARCHD *arcn) { char tmpname[PAXPATHLEN+2]; int res; /* * prompt user for the replacement name for a file, keep trying until * we get some reasonable input. Archives may have more than one file * on them with the same name (from updates etc). We print verbose info * on the file so the user knows what is up. */ tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); for (;;) { ls_tty(arcn); tty_prnt("Input new name, or a \".\" to keep the old name, "); tty_prnt("or a \"return\" to skip this file.\n"); tty_prnt("Input > "); if (tty_read(tmpname, sizeof(tmpname)) < 0) return(-1); if (strcmp(tmpname, "..") == 0) { tty_prnt("Try again, illegal file name: ..\n"); continue; } if (strlen(tmpname) > PAXPATHLEN) { tty_prnt("Try again, file name too long\n"); continue; } break; } /* * empty file name, skips this file. a "." leaves it alone */ if (tmpname[0] == '\0') { tty_prnt("Skipping file.\n"); return(1); } if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { tty_prnt("Processing continues, name unchanged.\n"); return(0); } /* * ok the name changed. We may run into links that point at this * file later. we have to remember where the user sent the file * in order to repair any links. */ tty_prnt("Processing continues, name changed to: %s\n", tmpname); res = add_name(arcn->name, arcn->nlen, tmpname); arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1); arcn->name[arcn->nlen] = '\0'; if (res < 0) return(-1); return(0); } /* * set_dest() * fix up the file name and the link name (if any) so this file will land * in the destination directory (used during copy() -rw). * Return: * 0 if ok, -1 if failure (name too long) */ int set_dest(ARCHD *arcn, char *dest_dir, int dir_len) { if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) return(-1); /* * It is really hard to deal with symlinks here, we cannot be sure * if the name they point was moved (or will be moved). It is best to * leave them alone. */ if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) return(0); if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) return(-1); return(0); } /* * fix_path * concatenate dir_name and or_name and store the result in or_name (if * it fits). This is one ugly function. * Return: * 0 if ok, -1 if the final name is too long */ static int fix_path( char *or_name, int *or_len, char *dir_name, int dir_len) { char *src; char *dest; char *start; int len; /* * we shift the or_name to the right enough to tack in the dir_name * at the front. We make sure we have enough space for it all before * we start. since dest always ends in a slash, we skip of or_name * if it also starts with one. */ start = or_name; src = start + *or_len; dest = src + dir_len; if (*start == '/') { ++start; --dest; } if ((len = dest - or_name) > PAXPATHLEN) { paxwarn(1, "File name %s/%s, too long", dir_name, start); return(-1); } *or_len = len; /* * enough space, shift */ while (src >= start) *dest-- = *src--; src = dir_name + dir_len - 1; /* * splice in the destination directory name */ while (src >= dir_name) *dest-- = *src--; *(or_name + len) = '\0'; return(0); } /* * rep_name() * walk down the list of replacement strings applying each one in order. * when we find one with a successful substitution, we modify the name * as specified. if required, we print the results. if the resulting name * is empty, we will skip this archive member. We use the regexp(3) * routines (regexp() ought to win a prize as having the most cryptic * library function manual page). * --Parameters-- * name is the file name we are going to apply the regular expressions to * (and may be modified) * nlen is the length of this name (and is modified to hold the length of * the final string). * prnt is a flag that says whether to print the final result. * Return: * 0 if substitution was successful, 1 if we are to skip the file (the name * ended up empty) */ static int rep_name(char *name, int *nlen, int prnt) { REPLACE *pt; char *inpt; char *outpt; char *endpt; char *rpt; int found = 0; int res; # ifndef NET2_REGEX regmatch_t pm[MAXSUBEXP]; # endif char nname[PAXPATHLEN+1]; /* final result of all replacements */ char buf1[PAXPATHLEN+1]; /* where we work on the name */ /* * copy the name into buf1, where we will work on it. We need to keep * the orig string around so we can print out the result of the final * replacement. We build up the final result in nname. inpt points at * the string we apply the regular expression to. prnt is used to * suppress printing when we handle replacements on the link field * (the user already saw that substitution go by) */ pt = rephead; (void)strcpy(buf1, name); inpt = buf1; outpt = nname; endpt = outpt + PAXPATHLEN; /* * try each replacement string in order */ while (pt != NULL) { do { /* * check for a successful substitution, if not go to * the next pattern, or cleanup if we were global */ # ifdef NET2_REGEX if (regexec(pt->rcmp, inpt) == 0) # else if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) # endif break; /* * ok we found one. We have three parts, the prefix * which did not match, the section that did and the * tail (that also did not match). Copy the prefix to * the final output buffer (watching to make sure we * do not create a string too long). */ found = 1; # ifdef NET2_REGEX rpt = pt->rcmp->startp[0]; # else rpt = inpt + pm[0].rm_so; # endif while ((inpt < rpt) && (outpt < endpt)) *outpt++ = *inpt++; if (outpt == endpt) break; /* * for the second part (which matched the regular * expression) apply the substitution using the * replacement string and place it the prefix in the * final output. If we have problems, skip it. */ # ifdef NET2_REGEX if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) { # else if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt)) < 0) { # endif if (prnt) paxwarn(1, "Replacement name error %s", name); return(1); } outpt += res; /* * we set up to look again starting at the first * character in the tail (of the input string right * after the last character matched by the regular * expression (inpt always points at the first char in * the string to process). If we are not doing a global * substitution, we will use inpt to copy the tail to * the final result. Make sure we do not overrun the * output buffer */ # ifdef NET2_REGEX inpt = pt->rcmp->endp[0]; # else inpt += pm[0].rm_eo - pm[0].rm_so; # endif if ((outpt == endpt) || (*inpt == '\0')) break; /* * if the user wants global we keep trying to * substitute until it fails, then we are done. */ } while (pt->flgs & GLOB); if (found) break; /* * a successful substitution did NOT occur, try the next one */ pt = pt->fow; } if (found) { /* * we had a substitution, copy the last tail piece (if there is * room) to the final result */ while ((outpt < endpt) && (*inpt != '\0')) *outpt++ = *inpt++; *outpt = '\0'; if ((outpt == endpt) && (*inpt != '\0')) { if (prnt) paxwarn(1,"Replacement name too long %s >> %s", name, nname); return(1); } /* * inform the user of the result if wanted */ if (prnt && (pt->flgs & PRNT)) { if (*nname == '\0') (void)fprintf(stderr,"%s >> \n", name); else (void)fprintf(stderr,"%s >> %s\n", name, nname); } /* * if empty inform the caller this file is to be skipped * otherwise copy the new name over the orig name and return */ if (*nname == '\0') return(1); *nlen = l_strncpy(name, nname, PAXPATHLEN + 1); name[PAXPATHLEN] = '\0'; } return(0); } #ifdef NET2_REGEX /* * resub() * apply the replacement to the matched expression. expand out the old * style ed(1) subexpression expansion. * Return: * -1 if error, or the number of characters added to the destination. */ static int resub(regexp *prog, char *src, char *dest, char *destend) { char *spt; char *dpt; char c; int no; int len; spt = src; dpt = dest; while ((dpt < destend) && ((c = *spt++) != '\0')) { if (c == '&') no = 0; else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) no = *spt++ - '0'; else { if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) c = *spt++; *dpt++ = c; continue; } if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) || ((len = prog->endp[no] - prog->startp[no]) <= 0)) continue; /* * copy the subexpression to the destination. * fail if we run out of space or the match string is damaged */ if (len > (destend - dpt)) len = destend - dpt; if (l_strncpy(dpt, prog->startp[no], len) != len) return(-1); dpt += len; } return(dpt - dest); } #else /* * resub() * apply the replacement to the matched expression. expand out the old * style ed(1) subexpression expansion. * Return: * -1 if error, or the number of characters added to the destination. */ static int resub(regex_t *rp, regmatch_t *pm, char *src, char *dest, char *destend) { char *spt; char *dpt; char c; regmatch_t *pmpt; int len; int subexcnt; spt = src; dpt = dest; subexcnt = rp->re_nsub; while ((dpt < destend) && ((c = *spt++) != '\0')) { /* * see if we just have an ordinary replacement character * or we refer to a subexpression. */ if (c == '&') { pmpt = pm; } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) { /* * make sure there is a subexpression as specified */ if ((len = *spt++ - '0') > subexcnt) return(-1); pmpt = pm + len; } else { /* * Ordinary character, just copy it */ if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) c = *spt++; *dpt++ = c; continue; } /* * continue if the subexpression is bogus */ if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) continue; /* * copy the subexpression to the destination. * fail if we run out of space or the match string is damaged */ if (len > (destend - dpt)) len = destend - dpt; if (l_strncpy(dpt, src + pmpt->rm_so, len) != len) return(-1); dpt += len; } return(dpt - dest); } #endif Index: head/bin/pax/sel_subs.c =================================================================== --- head/bin/pax/sel_subs.c (revision 169925) +++ head/bin/pax/sel_subs.c (revision 169926) @@ -1,606 +1,606 @@ /*- * Copyright (c) 1992 Keith Muller. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Keith Muller of the University of California, San Diego. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)sel_subs.c 8.1 (Berkeley) 5/31/93"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "pax.h" #include "sel_subs.h" #include "extern.h" static int str_sec(char *, time_t *); static int usr_match(ARCHD *); static int grp_match(ARCHD *); static int trng_match(ARCHD *); static TIME_RNG *trhead = NULL; /* time range list head */ static TIME_RNG *trtail = NULL; /* time range list tail */ static USRT **usrtb = NULL; /* user selection table */ static GRPT **grptb = NULL; /* group selection table */ /* * Routines for selection of archive members */ /* * sel_chk() * check if this file matches a specified uid, gid or time range * Return: * 0 if this archive member should be processed, 1 if it should be skipped */ int sel_chk(ARCHD *arcn) { if (((usrtb != NULL) && usr_match(arcn)) || ((grptb != NULL) && grp_match(arcn)) || ((trhead != NULL) && trng_match(arcn))) return(1); return(0); } /* * User/group selection routines * * Routines to handle user selection of files based on the file uid/gid. To * add an entry, the user supplies either then name or the uid/gid starting with * a # on the command line. A \# will escape the #. */ /* * usr_add() * add a user match to the user match hash table * Return: * 0 if added ok, -1 otherwise; */ int usr_add(char *str) { u_int indx; USRT *pt; struct passwd *pw; uid_t uid; /* * create the table if it doesn't exist */ if ((str == NULL) || (*str == '\0')) return(-1); if ((usrtb == NULL) && ((usrtb = (USRT **)calloc(USR_TB_SZ, sizeof(USRT *))) == NULL)) { paxwarn(1, "Unable to allocate memory for user selection table"); return(-1); } /* * figure out user spec */ if (str[0] != '#') { /* * it is a user name, \# escapes # as first char in user name */ if ((str[0] == '\\') && (str[1] == '#')) ++str; if ((pw = getpwnam(str)) == NULL) { paxwarn(1, "Unable to find uid for user: %s", str); return(-1); } uid = (uid_t)pw->pw_uid; } else # ifdef NET2_STAT uid = (uid_t)atoi(str+1); # else uid = (uid_t)strtoul(str+1, NULL, 10); # endif endpwent(); /* * hash it and go down the hash chain (if any) looking for it */ indx = ((unsigned)uid) % USR_TB_SZ; if ((pt = usrtb[indx]) != NULL) { while (pt != NULL) { if (pt->uid == uid) return(0); pt = pt->fow; } } /* * uid is not yet in the table, add it to the front of the chain */ if ((pt = (USRT *)malloc(sizeof(USRT))) != NULL) { pt->uid = uid; pt->fow = usrtb[indx]; usrtb[indx] = pt; return(0); } paxwarn(1, "User selection table out of memory"); return(-1); } /* * usr_match() * check if this files uid matches a selected uid. * Return: * 0 if this archive member should be processed, 1 if it should be skipped */ static int usr_match(ARCHD *arcn) { USRT *pt; /* * hash and look for it in the table */ pt = usrtb[((unsigned)arcn->sb.st_uid) % USR_TB_SZ]; while (pt != NULL) { if (pt->uid == arcn->sb.st_uid) return(0); pt = pt->fow; } /* * not found */ return(1); } /* * grp_add() * add a group match to the group match hash table * Return: * 0 if added ok, -1 otherwise; */ int grp_add(char *str) { u_int indx; GRPT *pt; struct group *gr; gid_t gid; /* * create the table if it doesn't exist */ if ((str == NULL) || (*str == '\0')) return(-1); if ((grptb == NULL) && ((grptb = (GRPT **)calloc(GRP_TB_SZ, sizeof(GRPT *))) == NULL)) { paxwarn(1, "Unable to allocate memory fo group selection table"); return(-1); } /* * figure out user spec */ if (str[0] != '#') { /* * it is a group name, \# escapes # as first char in group name */ if ((str[0] == '\\') && (str[1] == '#')) ++str; if ((gr = getgrnam(str)) == NULL) { paxwarn(1,"Cannot determine gid for group name: %s", str); return(-1); } gid = gr->gr_gid; } else # ifdef NET2_STAT gid = (gid_t)atoi(str+1); # else gid = (gid_t)strtoul(str+1, NULL, 10); # endif endgrent(); /* * hash it and go down the hash chain (if any) looking for it */ indx = ((unsigned)gid) % GRP_TB_SZ; if ((pt = grptb[indx]) != NULL) { while (pt != NULL) { if (pt->gid == gid) return(0); pt = pt->fow; } } /* * gid not in the table, add it to the front of the chain */ if ((pt = (GRPT *)malloc(sizeof(GRPT))) != NULL) { pt->gid = gid; pt->fow = grptb[indx]; grptb[indx] = pt; return(0); } paxwarn(1, "Group selection table out of memory"); return(-1); } /* * grp_match() * check if this files gid matches a selected gid. * Return: * 0 if this archive member should be processed, 1 if it should be skipped */ static int grp_match(ARCHD *arcn) { GRPT *pt; /* * hash and look for it in the table */ pt = grptb[((unsigned)arcn->sb.st_gid) % GRP_TB_SZ]; while (pt != NULL) { if (pt->gid == arcn->sb.st_gid) return(0); pt = pt->fow; } /* * not found */ return(1); } /* * Time range selection routines * * Routines to handle user selection of files based on the modification and/or * inode change time falling within a specified time range (the non-standard * -T flag). The user may specify any number of different file time ranges. * Time ranges are checked one at a time until a match is found (if at all). * If the file has a mtime (and/or ctime) which lies within one of the time * ranges, the file is selected. Time ranges may have a lower and/or an upper * value. These ranges are inclusive. When no time ranges are supplied to pax * with the -T option, all members in the archive will be selected by the time * range routines. When only a lower range is supplied, only files with a * mtime (and/or ctime) equal to or younger are selected. When only an upper * range is supplied, only files with a mtime (and/or ctime) equal to or older * are selected. When the lower time range is equal to the upper time range, * only files with a mtime (or ctime) of exactly that time are selected. */ /* * trng_add() * add a time range match to the time range list. * This is a non-standard pax option. Lower and upper ranges are in the * format: [yy[mm[dd[hh]]]]mm[.ss] and are comma separated. * Time ranges are based on current time, so 1234 would specify a time of * 12:34 today. * Return: * 0 if the time range was added to the list, -1 otherwise */ int trng_add(char *str) { TIME_RNG *pt; char *up_pt = NULL; char *stpt; char *flgpt; int dot = 0; /* * throw out the badly formed time ranges */ if ((str == NULL) || (*str == '\0')) { paxwarn(1, "Empty time range string"); return(-1); } /* * locate optional flags suffix /{cm}. */ if ((flgpt = strrchr(str, '/')) != NULL) *flgpt++ = '\0'; for (stpt = str; *stpt != '\0'; ++stpt) { if ((*stpt >= '0') && (*stpt <= '9')) continue; if ((*stpt == ',') && (up_pt == NULL)) { *stpt = '\0'; up_pt = stpt + 1; dot = 0; continue; } /* * allow only one dot per range (secs) */ if ((*stpt == '.') && (!dot)) { ++dot; continue; } paxwarn(1, "Improperly specified time range: %s", str); goto out; } /* * allocate space for the time range and store the limits */ if ((pt = (TIME_RNG *)malloc(sizeof(TIME_RNG))) == NULL) { paxwarn(1, "Unable to allocate memory for time range"); return(-1); } /* * by default we only will check file mtime, but usee can specify * mtime, ctime (inode change time) or both. */ if ((flgpt == NULL) || (*flgpt == '\0')) pt->flgs = CMPMTME; else { pt->flgs = 0; while (*flgpt != '\0') { switch(*flgpt) { case 'M': case 'm': pt->flgs |= CMPMTME; break; case 'C': case 'c': pt->flgs |= CMPCTME; break; default: paxwarn(1, "Bad option %c with time range %s", *flgpt, str); goto out; } ++flgpt; } } /* * start off with the current time */ pt->low_time = pt->high_time = time(NULL); if (*str != '\0') { /* * add lower limit */ if (str_sec(str, &(pt->low_time)) < 0) { paxwarn(1, "Illegal lower time range %s", str); - (void)free((char *)pt); + free((char *)pt); goto out; } pt->flgs |= HASLOW; } if ((up_pt != NULL) && (*up_pt != '\0')) { /* * add upper limit */ if (str_sec(up_pt, &(pt->high_time)) < 0) { paxwarn(1, "Illegal upper time range %s", up_pt); - (void)free((char *)pt); + free((char *)pt); goto out; } pt->flgs |= HASHIGH; /* * check that the upper and lower do not overlap */ if (pt->flgs & HASLOW) { if (pt->low_time > pt->high_time) { paxwarn(1, "Upper %s and lower %s time overlap", up_pt, str); - (void)free((char *)pt); + free((char *)pt); return(-1); } } } pt->fow = NULL; if (trhead == NULL) { trtail = trhead = pt; return(0); } trtail->fow = pt; trtail = pt; return(0); out: paxwarn(1, "Time range format is: [yy[mm[dd[hh]]]]mm[.ss][/[c][m]]"); return(-1); } /* * trng_match() * check if this files mtime/ctime falls within any supplied time range. * Return: * 0 if this archive member should be processed, 1 if it should be skipped */ static int trng_match(ARCHD *arcn) { TIME_RNG *pt; /* * have to search down the list one at a time looking for a match. * remember time range limits are inclusive. */ pt = trhead; while (pt != NULL) { switch(pt->flgs & CMPBOTH) { case CMPBOTH: /* * user wants both mtime and ctime checked for this * time range */ if (((pt->flgs & HASLOW) && (arcn->sb.st_mtime < pt->low_time) && (arcn->sb.st_ctime < pt->low_time)) || ((pt->flgs & HASHIGH) && (arcn->sb.st_mtime > pt->high_time) && (arcn->sb.st_ctime > pt->high_time))) { pt = pt->fow; continue; } break; case CMPCTME: /* * user wants only ctime checked for this time range */ if (((pt->flgs & HASLOW) && (arcn->sb.st_ctime < pt->low_time)) || ((pt->flgs & HASHIGH) && (arcn->sb.st_ctime > pt->high_time))) { pt = pt->fow; continue; } break; case CMPMTME: default: /* * user wants only mtime checked for this time range */ if (((pt->flgs & HASLOW) && (arcn->sb.st_mtime < pt->low_time)) || ((pt->flgs & HASHIGH) && (arcn->sb.st_mtime > pt->high_time))) { pt = pt->fow; continue; } break; } break; } if (pt == NULL) return(1); return(0); } /* * str_sec() * Convert a time string in the format of [yy[mm[dd[hh]]]]mm[.ss] to gmt * seconds. Tval already has current time loaded into it at entry. * Return: * 0 if converted ok, -1 otherwise */ static int str_sec(char *str, time_t *tval) { struct tm *lt; char *dot = NULL; lt = localtime(tval); if ((dot = strchr(str, '.')) != NULL) { /* * seconds (.ss) */ *dot++ = '\0'; if (strlen(dot) != 2) return(-1); if ((lt->tm_sec = ATOI2(dot)) > 61) return(-1); } else lt->tm_sec = 0; switch (strlen(str)) { case 10: /* * year (yy) * watch out for year 2000 */ if ((lt->tm_year = ATOI2(str)) < 69) lt->tm_year += 100; str += 2; /* FALLTHROUGH */ case 8: /* * month (mm) * watch out months are from 0 - 11 internally */ if ((lt->tm_mon = ATOI2(str)) > 12) return(-1); --lt->tm_mon; str += 2; /* FALLTHROUGH */ case 6: /* * day (dd) */ if ((lt->tm_mday = ATOI2(str)) > 31) return(-1); str += 2; /* FALLTHROUGH */ case 4: /* * hour (hh) */ if ((lt->tm_hour = ATOI2(str)) > 23) return(-1); str += 2; /* FALLTHROUGH */ case 2: /* * minute (mm) */ if ((lt->tm_min = ATOI2(str)) > 59) return(-1); break; default: return(-1); } /* * convert broken-down time to GMT clock time seconds */ if ((*tval = mktime(lt)) == -1) return(-1); return(0); } Index: head/bin/pax/tables.c =================================================================== --- head/bin/pax/tables.c (revision 169925) +++ head/bin/pax/tables.c (revision 169926) @@ -1,1286 +1,1286 @@ /*- * Copyright (c) 1992 Keith Muller. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Keith Muller of the University of California, San Diego. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)tables.c 8.1 (Berkeley) 5/31/93"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "pax.h" #include "tables.h" #include "extern.h" /* * Routines for controlling the contents of all the different databases pax * keeps. Tables are dynamically created only when they are needed. The * goal was speed and the ability to work with HUGE archives. The databases * were kept simple, but do have complex rules for when the contents change. * As of this writing, the POSIX library functions were more complex than * needed for this application (pax databases have very short lifetimes and * do not survive after pax is finished). Pax is required to handle very * large archives. These database routines carefully combine memory usage and * temporary file storage in ways which will not significantly impact runtime * performance while allowing the largest possible archives to be handled. * Trying to force the fit to the POSIX databases routines was not considered * time well spent. */ static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */ static FTM **ftab = NULL; /* file time table for updating arch */ static NAMT **ntab = NULL; /* interactive rename storage table */ static DEVT **dtab = NULL; /* device/inode mapping tables */ static ATDIR **atab = NULL; /* file tree directory time reset table */ static int dirfd = -1; /* storage for setting created dir time/mode */ static u_long dircnt; /* entries in dir time/mode storage */ static int ffd = -1; /* tmp file for file time table name storage */ static DEVT *chk_dev(dev_t, int); /* * hard link table routines * * The hard link table tries to detect hard links to files using the device and * inode values. We do this when writing an archive, so we can tell the format * write routine that this file is a hard link to another file. The format * write routine then can store this file in whatever way it wants (as a hard * link if the format supports that like tar, or ignore this info like cpio). * (Actually a field in the format driver table tells us if the format wants * hard link info. if not, we do not waste time looking for them). We also use * the same table when reading an archive. In that situation, this table is * used by the format read routine to detect hard links from stored dev and * inode numbers (like cpio). This will allow pax to create a link when one * can be detected by the archive format. */ /* * lnk_start * Creates the hard link table. * Return: * 0 if created, -1 if failure */ int lnk_start(void) { if (ltab != NULL) return(0); if ((ltab = (HRDLNK **)calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) { paxwarn(1, "Cannot allocate memory for hard link table"); return(-1); } return(0); } /* * chk_lnk() * Looks up entry in hard link hash table. If found, it copies the name * of the file it is linked to (we already saw that file) into ln_name. * lnkcnt is decremented and if goes to 1 the node is deleted from the * database. (We have seen all the links to this file). If not found, * we add the file to the database if it has the potential for having * hard links to other files we may process (it has a link count > 1) * Return: * if found returns 1; if not found returns 0; -1 on error */ int chk_lnk(ARCHD *arcn) { HRDLNK *pt; HRDLNK **ppt; u_int indx; if (ltab == NULL) return(-1); /* * ignore those nodes that cannot have hard links */ if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1)) return(0); /* * hash inode number and look for this file */ indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; if ((pt = ltab[indx]) != NULL) { /* * it's hash chain in not empty, walk down looking for it */ ppt = &(ltab[indx]); while (pt != NULL) { if ((pt->ino == arcn->sb.st_ino) && (pt->dev == arcn->sb.st_dev)) break; ppt = &(pt->fow); pt = pt->fow; } if (pt != NULL) { /* * found a link. set the node type and copy in the * name of the file it is to link to. we need to * handle hardlinks to regular files differently than * other links. */ arcn->ln_nlen = l_strncpy(arcn->ln_name, pt->name, sizeof(arcn->ln_name) - 1); arcn->ln_name[arcn->ln_nlen] = '\0'; if (arcn->type == PAX_REG) arcn->type = PAX_HRG; else arcn->type = PAX_HLK; /* * if we have found all the links to this file, remove * it from the database */ if (--pt->nlink <= 1) { *ppt = pt->fow; - (void)free((char *)pt->name); - (void)free((char *)pt); + free((char *)pt->name); + free((char *)pt); } return(1); } } /* * we never saw this file before. It has links so we add it to the * front of this hash chain */ if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) { if ((pt->name = strdup(arcn->name)) != NULL) { pt->dev = arcn->sb.st_dev; pt->ino = arcn->sb.st_ino; pt->nlink = arcn->sb.st_nlink; pt->fow = ltab[indx]; ltab[indx] = pt; return(0); } - (void)free((char *)pt); + free((char *)pt); } paxwarn(1, "Hard link table out of memory"); return(-1); } /* * purg_lnk * remove reference for a file that we may have added to the data base as * a potential source for hard links. We ended up not using the file, so * we do not want to accidently point another file at it later on. */ void purg_lnk(ARCHD *arcn) { HRDLNK *pt; HRDLNK **ppt; u_int indx; if (ltab == NULL) return; /* * do not bother to look if it could not be in the database */ if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) || (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) return; /* * find the hash chain for this inode value, if empty return */ indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ; if ((pt = ltab[indx]) == NULL) return; /* * walk down the list looking for the inode/dev pair, unlink and * free if found */ ppt = &(ltab[indx]); while (pt != NULL) { if ((pt->ino == arcn->sb.st_ino) && (pt->dev == arcn->sb.st_dev)) break; ppt = &(pt->fow); pt = pt->fow; } if (pt == NULL) return; /* * remove and free it */ *ppt = pt->fow; - (void)free((char *)pt->name); - (void)free((char *)pt); + free((char *)pt->name); + free((char *)pt); } /* * lnk_end() * Pull apart an existing link table so we can reuse it. We do this between * read and write phases of append with update. (The format may have * used the link table, and we need to start with a fresh table for the * write phase). */ void lnk_end(void) { int i; HRDLNK *pt; HRDLNK *ppt; if (ltab == NULL) return; for (i = 0; i < L_TAB_SZ; ++i) { if (ltab[i] == NULL) continue; pt = ltab[i]; ltab[i] = NULL; /* * free up each entry on this chain */ while (pt != NULL) { ppt = pt; pt = ppt->fow; - (void)free((char *)ppt->name); - (void)free((char *)ppt); + free((char *)ppt->name); + free((char *)ppt); } } return; } /* * modification time table routines * * The modification time table keeps track of last modification times for all * files stored in an archive during a write phase when -u is set. We only * add a file to the archive if it is newer than a file with the same name * already stored on the archive (if there is no other file with the same * name on the archive it is added). This applies to writes and appends. * An append with an -u must read the archive and store the modification time * for every file on that archive before starting the write phase. It is clear * that this is one HUGE database. To save memory space, the actual file names * are stored in a scatch file and indexed by an in memory hash table. The * hash table is indexed by hashing the file path. The nodes in the table store * the length of the filename and the lseek offset within the scratch file * where the actual name is stored. Since there are never any deletions to this * table, fragmentation of the scratch file is never an issue. Lookups seem to * not exhibit any locality at all (files in the database are rarely * looked up more than once...). So caching is just a waste of memory. The * only limitation is the amount of scatch file space available to store the * path names. */ /* * ftime_start() * create the file time hash table and open for read/write the scratch * file. (after created it is unlinked, so when we exit we leave * no witnesses). * Return: * 0 if the table and file was created ok, -1 otherwise */ int ftime_start(void) { if (ftab != NULL) return(0); if ((ftab = (FTM **)calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) { paxwarn(1, "Cannot allocate memory for file time table"); return(-1); } /* * get random name and create temporary scratch file, unlink name * so it will get removed on exit */ memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); if ((ffd = mkstemp(tempfile)) < 0) { syswarn(1, errno, "Unable to create temporary file: %s", tempfile); return(-1); } (void)unlink(tempfile); return(0); } /* * chk_ftime() * looks up entry in file time hash table. If not found, the file is * added to the hash table and the file named stored in the scratch file. * If a file with the same name is found, the file times are compared and * the most recent file time is retained. If the new file was younger (or * was not in the database) the new file is selected for storage. * Return: * 0 if file should be added to the archive, 1 if it should be skipped, * -1 on error */ int chk_ftime(ARCHD *arcn) { FTM *pt; int namelen; u_int indx; char ckname[PAXPATHLEN+1]; /* * no info, go ahead and add to archive */ if (ftab == NULL) return(0); /* * hash the pathname and look up in table */ namelen = arcn->nlen; indx = st_hash(arcn->name, namelen, F_TAB_SZ); if ((pt = ftab[indx]) != NULL) { /* * the hash chain is not empty, walk down looking for match * only read up the path names if the lengths match, speeds * up the search a lot */ while (pt != NULL) { if (pt->namelen == namelen) { /* * potential match, have to read the name * from the scratch file. */ if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) { syswarn(1, errno, "Failed ftime table seek"); return(-1); } if (read(ffd, ckname, namelen) != namelen) { syswarn(1, errno, "Failed ftime table read"); return(-1); } /* * if the names match, we are done */ if (!strncmp(ckname, arcn->name, namelen)) break; } /* * try the next entry on the chain */ pt = pt->fow; } if (pt != NULL) { /* * found the file, compare the times, save the newer */ if (arcn->sb.st_mtime > pt->mtime) { /* * file is newer */ pt->mtime = arcn->sb.st_mtime; return(0); } /* * file is older */ return(1); } } /* * not in table, add it */ if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) { /* * add the name at the end of the scratch file, saving the * offset. add the file to the head of the hash chain */ if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) { if (write(ffd, arcn->name, namelen) == namelen) { pt->mtime = arcn->sb.st_mtime; pt->namelen = namelen; pt->fow = ftab[indx]; ftab[indx] = pt; return(0); } syswarn(1, errno, "Failed write to file time table"); } else syswarn(1, errno, "Failed seek on file time table"); } else paxwarn(1, "File time table ran out of memory"); if (pt != NULL) - (void)free((char *)pt); + free((char *)pt); return(-1); } /* * Interactive rename table routines * * The interactive rename table keeps track of the new names that the user * assigns to files from tty input. Since this map is unique for each file * we must store it in case there is a reference to the file later in archive * (a link). Otherwise we will be unable to find the file we know was * extracted. The remapping of these files is stored in a memory based hash * table (it is assumed since input must come from /dev/tty, it is unlikely to * be a very large table). */ /* * name_start() * create the interactive rename table * Return: * 0 if successful, -1 otherwise */ int name_start(void) { if (ntab != NULL) return(0); if ((ntab = (NAMT **)calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) { paxwarn(1, "Cannot allocate memory for interactive rename table"); return(-1); } return(0); } /* * add_name() * add the new name to old name mapping just created by the user. * If an old name mapping is found (there may be duplicate names on an * archive) only the most recent is kept. * Return: * 0 if added, -1 otherwise */ int add_name(char *oname, int onamelen, char *nname) { NAMT *pt; u_int indx; if (ntab == NULL) { /* * should never happen */ paxwarn(0, "No interactive rename table, links may fail\n"); return(0); } /* * look to see if we have already mapped this file, if so we * will update it */ indx = st_hash(oname, onamelen, N_TAB_SZ); if ((pt = ntab[indx]) != NULL) { /* * look down the has chain for the file */ while ((pt != NULL) && (strcmp(oname, pt->oname) != 0)) pt = pt->fow; if (pt != NULL) { /* * found an old mapping, replace it with the new one * the user just input (if it is different) */ if (strcmp(nname, pt->nname) == 0) return(0); - (void)free((char *)pt->nname); + free((char *)pt->nname); if ((pt->nname = strdup(nname)) == NULL) { paxwarn(1, "Cannot update rename table"); return(-1); } return(0); } } /* * this is a new mapping, add it to the table */ if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) { if ((pt->oname = strdup(oname)) != NULL) { if ((pt->nname = strdup(nname)) != NULL) { pt->fow = ntab[indx]; ntab[indx] = pt; return(0); } - (void)free((char *)pt->oname); + free((char *)pt->oname); } - (void)free((char *)pt); + free((char *)pt); } paxwarn(1, "Interactive rename table out of memory"); return(-1); } /* * sub_name() * look up a link name to see if it points at a file that has been * remapped by the user. If found, the link is adjusted to contain the * new name (oname is the link to name) */ void sub_name(char *oname, int *onamelen, size_t onamesize) { NAMT *pt; u_int indx; if (ntab == NULL) return; /* * look the name up in the hash table */ indx = st_hash(oname, *onamelen, N_TAB_SZ); if ((pt = ntab[indx]) == NULL) return; while (pt != NULL) { /* * walk down the hash chain looking for a match */ if (strcmp(oname, pt->oname) == 0) { /* * found it, replace it with the new name * and return (we know that oname has enough space) */ *onamelen = l_strncpy(oname, pt->nname, onamesize - 1); oname[*onamelen] = '\0'; return; } pt = pt->fow; } /* * no match, just return */ return; } /* * device/inode mapping table routines * (used with formats that store device and inodes fields) * * device/inode mapping tables remap the device field in an archive header. The * device/inode fields are used to determine when files are hard links to each * other. However these values have very little meaning outside of that. This * database is used to solve one of two different problems. * * 1) when files are appended to an archive, while the new files may have hard * links to each other, you cannot determine if they have hard links to any * file already stored on the archive from a prior run of pax. We must assume * that these inode/device pairs are unique only within a SINGLE run of pax * (which adds a set of files to an archive). So we have to make sure the * inode/dev pairs we add each time are always unique. We do this by observing * while the inode field is very dense, the use of the dev field is fairly * sparse. Within each run of pax, we remap any device number of a new archive * member that has a device number used in a prior run and already stored in a * file on the archive. During the read phase of the append, we store the * device numbers used and mark them to not be used by any file during the * write phase. If during write we go to use one of those old device numbers, * we remap it to a new value. * * 2) Often the fields in the archive header used to store these values are * too small to store the entire value. The result is an inode or device value * which can be truncated. This really can foul up an archive. With truncation * we end up creating links between files that are really not links (after * truncation the inodes are the same value). We address that by detecting * truncation and forcing a remap of the device field to split truncated * inodes away from each other. Each truncation creates a pattern of bits that * are removed. We use this pattern of truncated bits to partition the inodes * on a single device to many different devices (each one represented by the * truncated bit pattern). All inodes on the same device that have the same * truncation pattern are mapped to the same new device. Two inodes that * truncate to the same value clearly will always have different truncation * bit patterns, so they will be split from away each other. When we spot * device truncation we remap the device number to a non truncated value. * (for more info see table.h for the data structures involved). */ /* * dev_start() * create the device mapping table * Return: * 0 if successful, -1 otherwise */ int dev_start(void) { if (dtab != NULL) return(0); if ((dtab = (DEVT **)calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) { paxwarn(1, "Cannot allocate memory for device mapping table"); return(-1); } return(0); } /* * add_dev() * add a device number to the table. this will force the device to be * remapped to a new value if it be used during a write phase. This * function is called during the read phase of an append to prohibit the * use of any device number already in the archive. * Return: * 0 if added ok, -1 otherwise */ int add_dev(ARCHD *arcn) { if (chk_dev(arcn->sb.st_dev, 1) == NULL) return(-1); return(0); } /* * chk_dev() * check for a device value in the device table. If not found and the add * flag is set, it is added. This does NOT assign any mapping values, just * adds the device number as one that need to be remapped. If this device * is already mapped, just return with a pointer to that entry. * Return: * pointer to the entry for this device in the device map table. Null * if the add flag is not set and the device is not in the table (it is * not been seen yet). If add is set and the device cannot be added, null * is returned (indicates an error). */ static DEVT * chk_dev(dev_t dev, int add) { DEVT *pt; u_int indx; if (dtab == NULL) return(NULL); /* * look to see if this device is already in the table */ indx = ((unsigned)dev) % D_TAB_SZ; if ((pt = dtab[indx]) != NULL) { while ((pt != NULL) && (pt->dev != dev)) pt = pt->fow; /* * found it, return a pointer to it */ if (pt != NULL) return(pt); } /* * not in table, we add it only if told to as this may just be a check * to see if a device number is being used. */ if (add == 0) return(NULL); /* * allocate a node for this device and add it to the front of the hash * chain. Note we do not assign remaps values here, so the pt->list * list must be NULL. */ if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) { paxwarn(1, "Device map table out of memory"); return(NULL); } pt->dev = dev; pt->list = NULL; pt->fow = dtab[indx]; dtab[indx] = pt; return(pt); } /* * map_dev() * given an inode and device storage mask (the mask has a 1 for each bit * the archive format is able to store in a header), we check for inode * and device truncation and remap the device as required. Device mapping * can also occur when during the read phase of append a device number was * seen (and was marked as do not use during the write phase). WE ASSUME * that unsigned longs are the same size or bigger than the fields used * for ino_t and dev_t. If not the types will have to be changed. * Return: * 0 if all ok, -1 otherwise. */ int map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask) { DEVT *pt; DLIST *dpt; static dev_t lastdev = 0; /* next device number to try */ int trc_ino = 0; int trc_dev = 0; ino_t trunc_bits = 0; ino_t nino; if (dtab == NULL) return(0); /* * check for device and inode truncation, and extract the truncated * bit pattern. */ if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev) ++trc_dev; if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) { ++trc_ino; trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask); } /* * see if this device is already being mapped, look up the device * then find the truncation bit pattern which applies */ if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) { /* * this device is already marked to be remapped */ for (dpt = pt->list; dpt != NULL; dpt = dpt->fow) if (dpt->trunc_bits == trunc_bits) break; if (dpt != NULL) { /* * we are being remapped for this device and pattern * change the device number to be stored and return */ arcn->sb.st_dev = dpt->dev; arcn->sb.st_ino = nino; return(0); } } else { /* * this device is not being remapped YET. if we do not have any * form of truncation, we do not need a remap */ if (!trc_ino && !trc_dev) return(0); /* * we have truncation, have to add this as a device to remap */ if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL) goto bad; /* * if we just have a truncated inode, we have to make sure that * all future inodes that do not truncate (they have the * truncation pattern of all 0's) continue to map to the same * device number. We probably have already written inodes with * this device number to the archive with the truncation * pattern of all 0's. So we add the mapping for all 0's to the * same device number. */ if (!trc_dev && (trunc_bits != 0)) { if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL) goto bad; dpt->trunc_bits = 0; dpt->dev = arcn->sb.st_dev; dpt->fow = pt->list; pt->list = dpt; } } /* * look for a device number not being used. We must watch for wrap * around on lastdev (so we do not get stuck looking forever!) */ while (++lastdev > 0) { if (chk_dev(lastdev, 0) != NULL) continue; /* * found an unused value. If we have reached truncation point * for this format we are hosed, so we give up. Otherwise we * mark it as being used. */ if (((lastdev & ((dev_t)dev_mask)) != lastdev) || (chk_dev(lastdev, 1) == NULL)) goto bad; break; } if ((lastdev <= 0) || ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)) goto bad; /* * got a new device number, store it under this truncation pattern. * change the device number this file is being stored with. */ dpt->trunc_bits = trunc_bits; dpt->dev = lastdev; dpt->fow = pt->list; pt->list = dpt; arcn->sb.st_dev = lastdev; arcn->sb.st_ino = nino; return(0); bad: paxwarn(1, "Unable to fix truncated inode/device field when storing %s", arcn->name); paxwarn(0, "Archive may create improper hard links when extracted"); return(0); } /* * directory access/mod time reset table routines (for directories READ by pax) * * The pax -t flag requires that access times of archive files to be the same * before being read by pax. For regular files, access time is restored after * the file has been copied. This database provides the same functionality for * directories read during file tree traversal. Restoring directory access time * is more complex than files since directories may be read several times until * all the descendants in their subtree are visited by fts. Directory access * and modification times are stored during the fts pre-order visit (done * before any descendants in the subtree is visited) and restored after the * fts post-order visit (after all the descendants have been visited). In the * case of premature exit from a subtree (like from the effects of -n), any * directory entries left in this database are reset during final cleanup * operations of pax. Entries are hashed by inode number for fast lookup. */ /* * atdir_start() * create the directory access time database for directories READ by pax. * Return: * 0 is created ok, -1 otherwise. */ int atdir_start(void) { if (atab != NULL) return(0); if ((atab = (ATDIR **)calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) { paxwarn(1,"Cannot allocate space for directory access time table"); return(-1); } return(0); } /* * atdir_end() * walk through the directory access time table and reset the access time * of any directory who still has an entry left in the database. These * entries are for directories READ by pax */ void atdir_end(void) { ATDIR *pt; int i; if (atab == NULL) return; /* * for each non-empty hash table entry reset all the directories * chained there. */ for (i = 0; i < A_TAB_SZ; ++i) { if ((pt = atab[i]) == NULL) continue; /* * remember to force the times, set_ftime() looks at pmtime * and patime, which only applies to things CREATED by pax, * not read by pax. Read time reset is controlled by -t. */ for (; pt != NULL; pt = pt->fow) set_ftime(pt->name, pt->mtime, pt->atime, 1); } } /* * add_atdir() * add a directory to the directory access time table. Table is hashed * and chained by inode number. This is for directories READ by pax */ void add_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime) { ATDIR *pt; u_int indx; if (atab == NULL) return; /* * make sure this directory is not already in the table, if so just * return (the older entry always has the correct time). The only * way this will happen is when the same subtree can be traversed by * different args to pax and the -n option is aborting fts out of a * subtree before all the post-order visits have been made). */ indx = ((unsigned)ino) % A_TAB_SZ; if ((pt = atab[indx]) != NULL) { while (pt != NULL) { if ((pt->ino == ino) && (pt->dev == dev)) break; pt = pt->fow; } /* * oops, already there. Leave it alone. */ if (pt != NULL) return; } /* * add it to the front of the hash chain */ if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) { if ((pt->name = strdup(fname)) != NULL) { pt->dev = dev; pt->ino = ino; pt->mtime = mtime; pt->atime = atime; pt->fow = atab[indx]; atab[indx] = pt; return; } - (void)free((char *)pt); + free((char *)pt); } paxwarn(1, "Directory access time reset table ran out of memory"); return; } /* * get_atdir() * look up a directory by inode and device number to obtain the access * and modification time you want to set to. If found, the modification * and access time parameters are set and the entry is removed from the * table (as it is no longer needed). These are for directories READ by * pax * Return: * 0 if found, -1 if not found. */ int get_atdir(dev_t dev, ino_t ino, time_t *mtime, time_t *atime) { ATDIR *pt; ATDIR **ppt; u_int indx; if (atab == NULL) return(-1); /* * hash by inode and search the chain for an inode and device match */ indx = ((unsigned)ino) % A_TAB_SZ; if ((pt = atab[indx]) == NULL) return(-1); ppt = &(atab[indx]); while (pt != NULL) { if ((pt->ino == ino) && (pt->dev == dev)) break; /* * no match, go to next one */ ppt = &(pt->fow); pt = pt->fow; } /* * return if we did not find it. */ if (pt == NULL) return(-1); /* * found it. return the times and remove the entry from the table. */ *ppt = pt->fow; *mtime = pt->mtime; *atime = pt->atime; - (void)free((char *)pt->name); - (void)free((char *)pt); + free((char *)pt->name); + free((char *)pt); return(0); } /* * directory access mode and time storage routines (for directories CREATED * by pax). * * Pax requires that extracted directories, by default, have their access/mod * times and permissions set to the values specified in the archive. During the * actions of extracting (and creating the destination subtree during -rw copy) * directories extracted may be modified after being created. Even worse is * that these directories may have been created with file permissions which * prohibits any descendants of these directories from being extracted. When * directories are created by pax, access rights may be added to permit the * creation of files in their subtree. Every time pax creates a directory, the * times and file permissions specified by the archive are stored. After all * files have been extracted (or copied), these directories have their times * and file modes reset to the stored values. The directory info is restored in * reverse order as entries were added to the data file from root to leaf. To * restore atime properly, we must go backwards. The data file consists of * records with two parts, the file name followed by a DIRDATA trailer. The * fixed sized trailer contains the size of the name plus the off_t location in * the file. To restore we work backwards through the file reading the trailer * then the file name. */ /* * dir_start() * set up the directory time and file mode storage for directories CREATED * by pax. * Return: * 0 if ok, -1 otherwise */ int dir_start(void) { if (dirfd != -1) return(0); /* * unlink the file so it goes away at termination by itself */ memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE)); if ((dirfd = mkstemp(tempfile)) >= 0) { (void)unlink(tempfile); return(0); } paxwarn(1, "Unable to create temporary file for directory times: %s", tempfile); return(-1); } /* * add_dir() * add the mode and times for a newly CREATED directory * name is name of the directory, psb the stat buffer with the data in it, * frc_mode is a flag that says whether to force the setting of the mode * (ignoring the user set values for preserving file mode). Frc_mode is * for the case where we created a file and found that the resulting * directory was not writeable and the user asked for file modes to NOT * be preserved. (we have to preserve what was created by default, so we * have to force the setting at the end. this is stated explicitly in the * pax spec) */ void add_dir(char *name, int nlen, struct stat *psb, int frc_mode) { DIRDATA dblk; if (dirfd < 0) return; /* * get current position (where file name will start) so we can store it * in the trailer */ if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) { paxwarn(1,"Unable to store mode and times for directory: %s",name); return; } /* * write the file name followed by the trailer */ dblk.nlen = nlen + 1; dblk.mode = psb->st_mode & 0xffff; dblk.mtime = psb->st_mtime; dblk.atime = psb->st_atime; dblk.frc_mode = frc_mode; if ((write(dirfd, name, dblk.nlen) == dblk.nlen) && (write(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) { ++dircnt; return; } paxwarn(1,"Unable to store mode and times for created directory: %s",name); return; } /* * proc_dir() * process all file modes and times stored for directories CREATED * by pax */ void proc_dir(void) { char name[PAXPATHLEN+1]; DIRDATA dblk; u_long cnt; if (dirfd < 0) return; /* * read backwards through the file and process each directory */ for (cnt = 0; cnt < dircnt; ++cnt) { /* * read the trailer, then the file name, if this fails * just give up. */ if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0) break; if (read(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk)) break; if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) break; if (read(dirfd, name, dblk.nlen) != dblk.nlen) break; if (lseek(dirfd, dblk.npos, SEEK_SET) < 0) break; /* * frc_mode set, make sure we set the file modes even if * the user didn't ask for it (see file_subs.c for more info) */ if (pmode || dblk.frc_mode) set_pmode(name, dblk.mode); if (patime || pmtime) set_ftime(name, dblk.mtime, dblk.atime, 0); } (void)close(dirfd); dirfd = -1; if (cnt != dircnt) paxwarn(1,"Unable to set mode and times for created directories"); return; } /* * database independent routines */ /* * st_hash() * hashes filenames to a u_int for hashing into a table. Looks at the tail * end of file, as this provides far better distribution than any other * part of the name. For performance reasons we only care about the last * MAXKEYLEN chars (should be at LEAST large enough to pick off the file * name). Was tested on 500,000 name file tree traversal from the root * and gave almost a perfectly uniform distribution of keys when used with * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int) * chars at a time and pads with 0 for last addition. * Return: * the hash value of the string MOD (%) the table size. */ u_int st_hash(char *name, int len, int tabsz) { char *pt; char *dest; char *end; int i; u_int key = 0; int steps; int res; u_int val; /* * only look at the tail up to MAXKEYLEN, we do not need to waste * time here (remember these are pathnames, the tail is what will * spread out the keys) */ if (len > MAXKEYLEN) { pt = &(name[len - MAXKEYLEN]); len = MAXKEYLEN; } else pt = name; /* * calculate the number of u_int size steps in the string and if * there is a runt to deal with */ steps = len/sizeof(u_int); res = len % sizeof(u_int); /* * add up the value of the string in unsigned integer sized pieces * too bad we cannot have unsigned int aligned strings, then we * could avoid the expensive copy. */ for (i = 0; i < steps; ++i) { end = pt + sizeof(u_int); dest = (char *)&val; while (pt < end) *dest++ = *pt++; key += val; } /* * add in the runt padded with zero to the right */ if (res) { val = 0; end = pt + res; dest = (char *)&val; while (pt < end) *dest++ = *pt++; key += val; } /* * return the result mod the table size */ return(key % tabsz); }