Changeset View
Changeset View
Standalone View
Standalone View
usr.bin/split/split.c
Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | |||||
#include <stdio.h> | #include <stdio.h> | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include <unistd.h> | #include <unistd.h> | ||||
#include <regex.h> | #include <regex.h> | ||||
#include <sysexits.h> | #include <sysexits.h> | ||||
#define DEFLINE 1000 /* Default num lines per file. */ | #define DEFLINE 1000 /* Default num lines per file. */ | ||||
#define DEFSIZE (16 * 1024) /* Reasonable default for I/O buffer. */ | |||||
static off_t bytecnt; /* Byte count to split on. */ | static off_t bytecnt; /* Byte count to split on. */ | ||||
static off_t chunks = 0; /* Chunks count to split into. */ | static off_t chunks = 0; /* Chunks count to split into. */ | ||||
static long numlines; /* Line count to split on. */ | static long numlines; /* Line count to split on. */ | ||||
static int file_open; /* If a file open. */ | static int file_open; /* If a file open. */ | ||||
static int ifd = -1, ofd = -1; /* Input/output file descriptors. */ | static int ifd = -1, ofd = -1; /* Input/output file descriptors. */ | ||||
static char bfr[MAXBSIZE]; /* I/O buffer. */ | |||||
/* | |||||
* We'll start with a reasonably sized internal buffer and grow as needed while | |||||
* we're processing. | |||||
*/ | |||||
static char ibuf[DEFSIZE]; | |||||
static char *buf = &ibuf[0]; /* I/O buffer. */ | |||||
static size_t bufsize = DEFSIZE; /* I/O buffer size. */ | |||||
static char fname[MAXPATHLEN]; /* File name prefix. */ | static char fname[MAXPATHLEN]; /* File name prefix. */ | ||||
static regex_t rgx; | static regex_t rgx; | ||||
static int pflag; | static int pflag; | ||||
static bool dflag; | static bool dflag; | ||||
static long sufflen = 2; /* File name suffix length. */ | static long sufflen = 2; /* File name suffix length. */ | ||||
static void newfile(void); | static void newfile(void); | ||||
static void split1(void); | static void split1(void); | ||||
▲ Show 20 Lines • Show All 124 Lines • ▼ Show 20 Lines | split1(void) | ||||
off_t bcnt; | off_t bcnt; | ||||
char *C; | char *C; | ||||
ssize_t dist, len; | ssize_t dist, len; | ||||
int nfiles; | int nfiles; | ||||
nfiles = 0; | nfiles = 0; | ||||
for (bcnt = 0;;) | for (bcnt = 0;;) | ||||
switch ((len = read(ifd, bfr, MAXBSIZE))) { | switch ((len = read(ifd, buf, bufsize))) { | ||||
case 0: | case 0: | ||||
exit(0); | exit(0); | ||||
case -1: | case -1: | ||||
err(EX_IOERR, "read"); | err(EX_IOERR, "read"); | ||||
/* NOTREACHED */ | /* NOTREACHED */ | ||||
default: | default: | ||||
if (!file_open) { | if (!file_open) { | ||||
if (!chunks || (nfiles < chunks)) { | if (!chunks || (nfiles < chunks)) { | ||||
newfile(); | newfile(); | ||||
nfiles++; | nfiles++; | ||||
} | } | ||||
} | } | ||||
if (bcnt + len >= bytecnt) { | if (bcnt + len >= bytecnt) { | ||||
dist = bytecnt - bcnt; | dist = bytecnt - bcnt; | ||||
if (write(ofd, bfr, dist) != dist) | if (write(ofd, buf, dist) != dist) | ||||
err(EX_IOERR, "write"); | err(EX_IOERR, "write"); | ||||
len -= dist; | len -= dist; | ||||
for (C = bfr + dist; len >= bytecnt; | for (C = buf + dist; len >= bytecnt; | ||||
len -= bytecnt, C += bytecnt) { | len -= bytecnt, C += bytecnt) { | ||||
if (!chunks || (nfiles < chunks)) { | if (!chunks || (nfiles < chunks)) { | ||||
newfile(); | newfile(); | ||||
nfiles++; | nfiles++; | ||||
} | } | ||||
if (write(ofd, | if (write(ofd, | ||||
C, bytecnt) != bytecnt) | C, bytecnt) != bytecnt) | ||||
err(EX_IOERR, "write"); | err(EX_IOERR, "write"); | ||||
} | } | ||||
if (len != 0) { | if (len != 0) { | ||||
if (!chunks || (nfiles < chunks)) { | if (!chunks || (nfiles < chunks)) { | ||||
newfile(); | newfile(); | ||||
nfiles++; | nfiles++; | ||||
} | } | ||||
if (write(ofd, C, len) != len) | if (write(ofd, C, len) != len) | ||||
err(EX_IOERR, "write"); | err(EX_IOERR, "write"); | ||||
} else | } else | ||||
file_open = 0; | file_open = 0; | ||||
bcnt = len; | bcnt = len; | ||||
} else { | } else { | ||||
bcnt += len; | bcnt += len; | ||||
if (write(ofd, bfr, len) != len) | if (write(ofd, buf, len) != len) | ||||
err(EX_IOERR, "write"); | err(EX_IOERR, "write"); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* grow -- | |||||
* Grow the input buffer to try and fit a larger line. | |||||
*/ | |||||
static int | |||||
grow(size_t dlen) | |||||
{ | |||||
size_t newsize; | |||||
/* | |||||
* A heuristic of sorts, at least -- we'll double the buffer size every | |||||
* time up until MAXBSIZE, then we'll only grow in MAXBSIZE increments | |||||
* to minimize waste. | |||||
*/ | |||||
newsize = MIN(bufsize + MAXBSIZE, bufsize * 2); | |||||
/* | |||||
* The first transition is special because we're pointing to .bss, so | |||||
* we'll need to allocate+copy the first time. | |||||
*/ | |||||
if (buf == &ibuf[0]) { | |||||
char *newbuf; | |||||
newbuf = malloc(newsize); | |||||
if (newbuf == NULL) | |||||
return (ENOMEM); | |||||
memcpy(newbuf, buf, dlen); | |||||
buf = newbuf; | |||||
} else { | |||||
buf = realloc(buf, newsize); | |||||
if (buf == NULL) | |||||
return (ENOMEM); | |||||
} | |||||
bufsize = newsize; | |||||
return (0); | |||||
} | |||||
/* | |||||
* split2 -- | * split2 -- | ||||
* Split the input by lines. | * Split the input by lines. | ||||
*/ | */ | ||||
static void | static void | ||||
split2(void) | split2(void) | ||||
{ | { | ||||
long lcnt = 0; | long lcnt = 0; | ||||
FILE *infp; | FILE *infp; | ||||
/* Stick a stream on top of input file descriptor */ | /* Stick a stream on top of input file descriptor */ | ||||
if ((infp = fdopen(ifd, "r")) == NULL) | if ((infp = fdopen(ifd, "r")) == NULL) | ||||
err(EX_NOINPUT, "fdopen"); | err(EX_NOINPUT, "fdopen"); | ||||
/* Process input one line at a time */ | /* Process input one line at a time */ | ||||
while (fgets(bfr, sizeof(bfr), infp) != NULL) { | while (fgets(buf, bufsize, infp) != NULL) { | ||||
const int len = strlen(bfr); | int len = strlen(buf); | ||||
/* If line is too long to deal with, just write it out */ | /* | ||||
if (bfr[len - 1] != '\n') | * If line is too long to deal with, grow our buffer and grab | ||||
goto writeit; | * more data. | ||||
*/ | |||||
while (buf[len - 1] != '\n') { | |||||
char *newchunk; | |||||
bapt: why not use getline(3) here? | |||||
if (grow(len) != 0) | |||||
err(EX_OSERR, "grow"); | |||||
newchunk = &buf[len]; | |||||
if (fgets(newchunk, bufsize - len, infp) == NULL) | |||||
goto out; | |||||
len += strlen(newchunk); | |||||
} | |||||
/* Check if we need to start a new file */ | /* Check if we need to start a new file */ | ||||
if (pflag) { | if (pflag) { | ||||
regmatch_t pmatch; | regmatch_t pmatch; | ||||
pmatch.rm_so = 0; | pmatch.rm_so = 0; | ||||
pmatch.rm_eo = len - 1; | pmatch.rm_eo = len - 1; | ||||
if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) | if (regexec(&rgx, buf, 0, &pmatch, REG_STARTEND) == 0) | ||||
newfile(); | newfile(); | ||||
} else if (lcnt++ == numlines) { | } else if (lcnt++ == numlines) { | ||||
newfile(); | newfile(); | ||||
lcnt = 1; | lcnt = 1; | ||||
} | } | ||||
writeit: | |||||
/* Open output file if needed */ | /* Open output file if needed */ | ||||
if (!file_open) | if (!file_open) | ||||
newfile(); | newfile(); | ||||
/* Write out line */ | /* Write out line */ | ||||
if (write(ofd, bfr, len) != len) | if (write(ofd, buf, len) != len) | ||||
err(EX_IOERR, "write"); | err(EX_IOERR, "write"); | ||||
} | } | ||||
out: | |||||
/* EOF or error? */ | /* EOF or error? */ | ||||
if (ferror(infp)) | if (ferror(infp)) | ||||
err(EX_IOERR, "read"); | err(EX_IOERR, "read"); | ||||
else | else | ||||
exit(0); | exit(0); | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines |
why not use getline(3) here?