Index: usr.bin/split/split.1 =================================================================== --- usr.bin/split/split.1 +++ usr.bin/split/split.1 @@ -28,7 +28,7 @@ .\" @(#)split.1 8.3 (Berkeley) 4/16/94 .\" $FreeBSD$ .\" -.Dd May 9, 2013 +.Dd August 23, 2022 .Dt SPLIT 1 .Os .Sh NAME @@ -213,5 +213,3 @@ .Nm command appeared in .At v3 . -.Sh BUGS -The maximum line length for matching patterns is 65536. Index: usr.bin/split/split.c =================================================================== --- usr.bin/split/split.c +++ usr.bin/split/split.c @@ -64,13 +64,22 @@ #include #define DEFLINE 1000 /* Default num lines per file. */ +#define DEFSIZE (16 * 1024) /* Reasonable default for I/O buffer. */ static off_t bytecnt; /* Byte count to split on. */ static off_t chunks = 0; /* Chunks count to split into. */ static long numlines; /* Line count to split on. */ static int file_open; /* If a file open. */ static int ifd = -1, ofd = -1; /* Input/output file descriptors. */ -static char bfr[MAXBSIZE]; /* I/O buffer. */ + +/* + * We'll start with a reasonably sized internal buffer and grow as needed while + * we're processing. + */ +static char ibuf[DEFSIZE]; +static char *buf = &ibuf[0]; /* I/O buffer. */ +static size_t bufsize = DEFSIZE; /* I/O buffer size. */ + static char fname[MAXPATHLEN]; /* File name prefix. */ static regex_t rgx; static int pflag; @@ -211,7 +220,7 @@ nfiles = 0; for (bcnt = 0;;) - switch ((len = read(ifd, bfr, MAXBSIZE))) { + switch ((len = read(ifd, buf, bufsize))) { case 0: exit(0); case -1: @@ -226,10 +235,10 @@ } if (bcnt + len >= bytecnt) { dist = bytecnt - bcnt; - if (write(ofd, bfr, dist) != dist) + if (write(ofd, buf, dist) != dist) err(EX_IOERR, "write"); len -= dist; - for (C = bfr + dist; len >= bytecnt; + for (C = buf + dist; len >= bytecnt; len -= bytecnt, C += bytecnt) { if (!chunks || (nfiles < chunks)) { newfile(); @@ -251,12 +260,52 @@ bcnt = len; } else { bcnt += len; - if (write(ofd, bfr, len) != len) + if (write(ofd, buf, len) != len) err(EX_IOERR, "write"); } } } +/* + * grow -- + * Grow the input buffer to try and fit a larger line. + */ +static int +grow(size_t dlen) +{ + size_t newsize; + + /* + * A heuristic of sorts, at least -- we'll double the buffer size every + * time up until MAXBSIZE, then we'll only grow in MAXBSIZE increments + * to minimize waste. + */ + newsize = MIN(bufsize + MAXBSIZE, bufsize * 2); + + /* + * The first transition is special because we're pointing to .bss, so + * we'll need to allocate+copy the first time. + */ + if (buf == &ibuf[0]) { + char *newbuf; + + newbuf = malloc(newsize); + if (newbuf == NULL) + return (ENOMEM); + + memcpy(newbuf, buf, dlen); + + buf = newbuf; + } else { + buf = realloc(buf, newsize); + if (buf == NULL) + return (ENOMEM); + } + + bufsize = newsize; + return (0); +} + /* * split2 -- * Split the input by lines. @@ -272,12 +321,25 @@ err(EX_NOINPUT, "fdopen"); /* Process input one line at a time */ - while (fgets(bfr, sizeof(bfr), infp) != NULL) { - const int len = strlen(bfr); + while (fgets(buf, bufsize, infp) != NULL) { + int len = strlen(buf); + + /* + * If line is too long to deal with, grow our buffer and grab + * more data. + */ + while (buf[len - 1] != '\n') { + char *newchunk; - /* If line is too long to deal with, just write it out */ - if (bfr[len - 1] != '\n') - goto writeit; + if (grow(len) != 0) + err(EX_OSERR, "grow"); + + newchunk = &buf[len]; + if (fgets(newchunk, bufsize - len, infp) == NULL) + goto out; + + len += strlen(newchunk); + } /* Check if we need to start a new file */ if (pflag) { @@ -285,23 +347,23 @@ pmatch.rm_so = 0; pmatch.rm_eo = len - 1; - if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) + if (regexec(&rgx, buf, 0, &pmatch, REG_STARTEND) == 0) newfile(); } else if (lcnt++ == numlines) { newfile(); lcnt = 1; } -writeit: /* Open output file if needed */ if (!file_open) newfile(); /* Write out line */ - if (write(ofd, bfr, len) != len) + if (write(ofd, buf, len) != len) err(EX_IOERR, "write"); } +out: /* EOF or error? */ if (ferror(infp)) err(EX_IOERR, "read");