Changeset View
Changeset View
Standalone View
Standalone View
contrib/less/regexp.c
Show First 20 Lines • Show All 201 Lines • ▼ Show 20 Lines | |||||
* thing really will compile successfully, and we never have to move the | * thing really will compile successfully, and we never have to move the | ||||
* code and thus invalidate pointers into it. (Note that it has to be in | * code and thus invalidate pointers into it. (Note that it has to be in | ||||
* one piece because free() must be able to free it all.) | * one piece because free() must be able to free it all.) | ||||
* | * | ||||
* Beware that the optimization-preparation code in here knows about some | * Beware that the optimization-preparation code in here knows about some | ||||
* of the structure of the compiled regexp. | * of the structure of the compiled regexp. | ||||
*/ | */ | ||||
regexp * | regexp * | ||||
regcomp(exp) | regcomp(char *exp) | ||||
char *exp; | |||||
{ | { | ||||
register regexp *r; | register regexp *r; | ||||
register char *scan; | register char *scan; | ||||
register char *longest; | register char *longest; | ||||
register int len; | register int len; | ||||
int flags; | int flags; | ||||
if (exp == NULL) | if (exp == NULL) | ||||
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines | |||||
* | * | ||||
* Caller must absorb opening parenthesis. | * Caller must absorb opening parenthesis. | ||||
* | * | ||||
* Combining parenthesis handling with the base level of regular expression | * Combining parenthesis handling with the base level of regular expression | ||||
* is a trifle forced, but the need to tie the tails of the branches to what | * is a trifle forced, but the need to tie the tails of the branches to what | ||||
* follows makes it hard to avoid. | * follows makes it hard to avoid. | ||||
*/ | */ | ||||
static char * | static char * | ||||
reg(paren, flagp) | reg(int paren, int *flagp) | ||||
int paren; /* Parenthesized? */ | |||||
int *flagp; | |||||
{ | { | ||||
register char *ret; | register char *ret; | ||||
register char *br; | register char *br; | ||||
register char *ender; | register char *ender; | ||||
register int parno = 0; | register int parno = 0; | ||||
int flags; | int flags; | ||||
*flagp = HASWIDTH; /* Tentatively. */ | *flagp = HASWIDTH; /* Tentatively. */ | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* | /* | ||||
- regbranch - one alternative of an | operator | - regbranch - one alternative of an | operator | ||||
* | * | ||||
* Implements the concatenation operator. | * Implements the concatenation operator. | ||||
*/ | */ | ||||
static char * | static char * | ||||
regbranch(flagp) | regbranch(int *flagp) | ||||
int *flagp; | |||||
{ | { | ||||
register char *ret; | register char *ret; | ||||
register char *chain; | register char *chain; | ||||
register char *latest; | register char *latest; | ||||
int flags; | int flags; | ||||
*flagp = WORST; /* Tentatively. */ | *flagp = WORST; /* Tentatively. */ | ||||
Show All 21 Lines | |||||
* | * | ||||
* Note that the branching code sequences used for ? and the general cases | * Note that the branching code sequences used for ? and the general cases | ||||
* of * and + are somewhat optimized: they use the same NOTHING node as | * of * and + are somewhat optimized: they use the same NOTHING node as | ||||
* both the endmarker for their branch list and the body of the last branch. | * both the endmarker for their branch list and the body of the last branch. | ||||
* It might seem that this node could be dispensed with entirely, but the | * It might seem that this node could be dispensed with entirely, but the | ||||
* endmarker role is not redundant. | * endmarker role is not redundant. | ||||
*/ | */ | ||||
static char * | static char * | ||||
regpiece(flagp) | regpiece(int *flagp) | ||||
int *flagp; | |||||
{ | { | ||||
register char *ret; | register char *ret; | ||||
register char op; | register char op; | ||||
register char *next; | register char *next; | ||||
int flags; | int flags; | ||||
ret = regatom(&flags); | ret = regatom(&flags); | ||||
if (ret == NULL) | if (ret == NULL) | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | |||||
- regatom - the lowest level | - regatom - the lowest level | ||||
* | * | ||||
* Optimization: gobbles an entire sequence of ordinary characters so that | * Optimization: gobbles an entire sequence of ordinary characters so that | ||||
* it can turn them into a single node, which is smaller to store and | * it can turn them into a single node, which is smaller to store and | ||||
* faster to run. Backslashed characters are exceptions, each becoming a | * faster to run. Backslashed characters are exceptions, each becoming a | ||||
* separate node; the code is simpler that way and it's not worth fixing. | * separate node; the code is simpler that way and it's not worth fixing. | ||||
*/ | */ | ||||
static char * | static char * | ||||
regatom(flagp) | regatom(int *flagp) | ||||
int *flagp; | |||||
{ | { | ||||
register char *ret; | register char *ret; | ||||
int flags; | int flags; | ||||
*flagp = WORST; /* Tentatively. */ | *flagp = WORST; /* Tentatively. */ | ||||
switch (*regparse++) { | switch (*regparse++) { | ||||
case '^': | case '^': | ||||
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines | regatom(int *flagp) | ||||
return(ret); | return(ret); | ||||
} | } | ||||
/* | /* | ||||
- regnode - emit a node | - regnode - emit a node | ||||
*/ | */ | ||||
static char * /* Location. */ | static char * /* Location. */ | ||||
regnode(op) | regnode(char op) | ||||
char op; | |||||
{ | { | ||||
register char *ret; | register char *ret; | ||||
register char *ptr; | register char *ptr; | ||||
ret = regcode; | ret = regcode; | ||||
if (ret == ®dummy) { | if (ret == ®dummy) { | ||||
regsize += 3; | regsize += 3; | ||||
return(ret); | return(ret); | ||||
} | } | ||||
ptr = ret; | ptr = ret; | ||||
*ptr++ = op; | *ptr++ = op; | ||||
*ptr++ = '\0'; /* Null "next" pointer. */ | *ptr++ = '\0'; /* Null "next" pointer. */ | ||||
*ptr++ = '\0'; | *ptr++ = '\0'; | ||||
regcode = ptr; | regcode = ptr; | ||||
return(ret); | return(ret); | ||||
} | } | ||||
/* | /* | ||||
- regc - emit (if appropriate) a byte of code | - regc - emit (if appropriate) a byte of code | ||||
*/ | */ | ||||
static void | static void | ||||
regc(b) | regc(char b) | ||||
char b; | |||||
{ | { | ||||
if (regcode != ®dummy) | if (regcode != ®dummy) | ||||
*regcode++ = b; | *regcode++ = b; | ||||
else | else | ||||
regsize++; | regsize++; | ||||
} | } | ||||
/* | /* | ||||
- reginsert - insert an operator in front of already-emitted operand | - reginsert - insert an operator in front of already-emitted operand | ||||
* | * | ||||
* Means relocating the operand. | * Means relocating the operand. | ||||
*/ | */ | ||||
static void | static void | ||||
reginsert(op, opnd) | reginsert(char op, char *opnd) | ||||
char op; | |||||
char *opnd; | |||||
{ | { | ||||
register char *src; | register char *src; | ||||
register char *dst; | register char *dst; | ||||
register char *place; | register char *place; | ||||
if (regcode == ®dummy) { | if (regcode == ®dummy) { | ||||
regsize += 3; | regsize += 3; | ||||
return; | return; | ||||
Show All 10 Lines | reginsert(char op, char *opnd) | ||||
*place++ = '\0'; | *place++ = '\0'; | ||||
*place++ = '\0'; | *place++ = '\0'; | ||||
} | } | ||||
/* | /* | ||||
- regtail - set the next-pointer at the end of a node chain | - regtail - set the next-pointer at the end of a node chain | ||||
*/ | */ | ||||
static void | static void | ||||
regtail(p, val) | regtail(char *p, char *val) | ||||
char *p; | |||||
char *val; | |||||
{ | { | ||||
register char *scan; | register char *scan; | ||||
register char *temp; | register char *temp; | ||||
register int offset; | register int offset; | ||||
if (p == ®dummy) | if (p == ®dummy) | ||||
return; | return; | ||||
Show All 13 Lines | regtail(char *p, char *val) | ||||
*(scan+1) = (offset>>8)&0377; | *(scan+1) = (offset>>8)&0377; | ||||
*(scan+2) = offset&0377; | *(scan+2) = offset&0377; | ||||
} | } | ||||
/* | /* | ||||
- regoptail - regtail on operand of first argument; nop if operandless | - regoptail - regtail on operand of first argument; nop if operandless | ||||
*/ | */ | ||||
static void | static void | ||||
regoptail(p, val) | regoptail(char *p, char *val) | ||||
char *p; | |||||
char *val; | |||||
{ | { | ||||
/* "Operandless" and "op != BRANCH" are synonymous in practice. */ | /* "Operandless" and "op != BRANCH" are synonymous in practice. */ | ||||
if (p == NULL || p == ®dummy || OP(p) != BRANCH) | if (p == NULL || p == ®dummy || OP(p) != BRANCH) | ||||
return; | return; | ||||
regtail(OPERAND(p), val); | regtail(OPERAND(p), val); | ||||
} | } | ||||
/* | /* | ||||
Show All 20 Lines | |||||
void regdump(); | void regdump(); | ||||
STATIC char *regprop(); | STATIC char *regprop(); | ||||
#endif | #endif | ||||
/* | /* | ||||
- regexec - match a regexp against a string | - regexec - match a regexp against a string | ||||
*/ | */ | ||||
int | int | ||||
regexec2(prog, string, notbol) | regexec2(regexp *prog, char *string, int notbol) | ||||
register regexp *prog; | |||||
register char *string; | |||||
int notbol; | |||||
{ | { | ||||
register char *s; | register char *s; | ||||
/* Be paranoid... */ | /* Be paranoid... */ | ||||
if (prog == NULL || string == NULL) { | if (prog == NULL || string == NULL) { | ||||
regerror("NULL parameter"); | regerror("NULL parameter"); | ||||
return(0); | return(0); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | do { | ||||
return(1); | return(1); | ||||
} while (*s++ != '\0'); | } while (*s++ != '\0'); | ||||
/* Failure. */ | /* Failure. */ | ||||
return(0); | return(0); | ||||
} | } | ||||
int | int | ||||
regexec(prog, string) | regexec(regexp *prog, char *string) | ||||
register regexp *prog; | |||||
register char *string; | |||||
{ | { | ||||
return regexec2(prog, string, 0); | return regexec2(prog, string, 0); | ||||
} | } | ||||
/* | /* | ||||
- regtry - try match at specific point | - regtry - try match at specific point | ||||
*/ | */ | ||||
static int /* 0 failure, 1 success */ | static int /* 0 failure, 1 success */ | ||||
regtry(prog, string) | regtry(regexp *prog, char *string) | ||||
regexp *prog; | |||||
char *string; | |||||
{ | { | ||||
register int i; | register int i; | ||||
register char **sp; | register char **sp; | ||||
register char **ep; | register char **ep; | ||||
reginput = string; | reginput = string; | ||||
regstartp = prog->startp; | regstartp = prog->startp; | ||||
regendp = prog->endp; | regendp = prog->endp; | ||||
Show All 18 Lines | |||||
* Conceptually the strategy is simple: check to see whether the current | * Conceptually the strategy is simple: check to see whether the current | ||||
* node matches, call self recursively to see whether the rest matches, | * node matches, call self recursively to see whether the rest matches, | ||||
* and then act accordingly. In practice we make some effort to avoid | * and then act accordingly. In practice we make some effort to avoid | ||||
* recursion, in particular by going through "ordinary" nodes (that don't | * recursion, in particular by going through "ordinary" nodes (that don't | ||||
* need to know whether the rest of the match failed) by a loop instead of | * need to know whether the rest of the match failed) by a loop instead of | ||||
* by recursion. | * by recursion. | ||||
*/ | */ | ||||
static int /* 0 failure, 1 success */ | static int /* 0 failure, 1 success */ | ||||
regmatch(prog) | regmatch(char *prog) | ||||
char *prog; | |||||
{ | { | ||||
register char *scan; /* Current node. */ | register char *scan; /* Current node. */ | ||||
emaste: comment indentation? | |||||
char *next; /* Next node. */ | char *next; /* Next node. */ | ||||
scan = prog; | scan = prog; | ||||
#ifdef DEBUG | #ifdef DEBUG | ||||
if (scan != NULL && regnarrate) | if (scan != NULL && regnarrate) | ||||
fprintf(stderr, "%s(\n", regprop(scan)); | fprintf(stderr, "%s(\n", regprop(scan)); | ||||
#endif | #endif | ||||
while (scan != NULL) { | while (scan != NULL) { | ||||
▲ Show 20 Lines • Show All 173 Lines • ▼ Show 20 Lines | #endif | ||||
regerror("corrupted pointers"); | regerror("corrupted pointers"); | ||||
return(0); | return(0); | ||||
} | } | ||||
/* | /* | ||||
- regrepeat - repeatedly match something simple, report how many | - regrepeat - repeatedly match something simple, report how many | ||||
*/ | */ | ||||
static int | static int | ||||
regrepeat(p) | regrepeat(char *p) | ||||
char *p; | |||||
{ | { | ||||
register int count = 0; | register int count = 0; | ||||
register char *scan; | register char *scan; | ||||
register char *opnd; | register char *opnd; | ||||
scan = reginput; | scan = reginput; | ||||
opnd = OPERAND(p); | opnd = OPERAND(p); | ||||
switch (OP(p)) { | switch (OP(p)) { | ||||
Show All 28 Lines | regrepeat(char *p) | ||||
return(count); | return(count); | ||||
} | } | ||||
/* | /* | ||||
- regnext - dig the "next" pointer out of a node | - regnext - dig the "next" pointer out of a node | ||||
*/ | */ | ||||
static char * | static char * | ||||
regnext(p) | regnext(register char *p) | ||||
Not Done Inline Actionsregister emaste: register
| |||||
register char *p; | |||||
{ | { | ||||
register int offset; | register int offset; | ||||
if (p == ®dummy) | if (p == ®dummy) | ||||
return(NULL); | return(NULL); | ||||
offset = NEXT(p); | offset = NEXT(p); | ||||
if (offset == 0) | if (offset == 0) | ||||
return(NULL); | return(NULL); | ||||
if (OP(p) == BACK) | if (OP(p) == BACK) | ||||
return(p-offset); | return(p-offset); | ||||
else | else | ||||
return(p+offset); | return(p+offset); | ||||
} | } | ||||
#ifdef DEBUG | #ifdef DEBUG | ||||
STATIC char *regprop(); | STATIC char *regprop(); | ||||
/* | /* | ||||
- regdump - dump a regexp onto stdout in vaguely comprehensible form | - regdump - dump a regexp onto stdout in vaguely comprehensible form | ||||
*/ | */ | ||||
void | void | ||||
regdump(r) | regdump(regexp *r) | ||||
regexp *r; | |||||
{ | { | ||||
register char *s; | register char *s; | ||||
register char op = EXACTLY; /* Arbitrary non-END op. */ | register char op = EXACTLY; /* Arbitrary non-END op. */ | ||||
register char *next; | register char *next; | ||||
s = r->program + 1; | s = r->program + 1; | ||||
while (op != END) { /* While that wasn't END last time... */ | while (op != END) { /* While that wasn't END last time... */ | ||||
Show All 25 Lines | if (r->regmust != NULL) | ||||
printf("must have \"%s\"", r->regmust); | printf("must have \"%s\"", r->regmust); | ||||
printf("\n"); | printf("\n"); | ||||
} | } | ||||
/* | /* | ||||
- regprop - printable representation of opcode | - regprop - printable representation of opcode | ||||
*/ | */ | ||||
static char * | static char * | ||||
regprop(op) | regprop(char *op) | ||||
char *op; | |||||
{ | { | ||||
register char *p; | register char *p; | ||||
static char buf[50]; | static char buf[50]; | ||||
(void) strcpy(buf, ":"); | (void) strcpy(buf, ":"); | ||||
switch (OP(op)) { | switch (OP(op)) { | ||||
case BOL: | case BOL: | ||||
▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
#ifdef STRCSPN | #ifdef STRCSPN | ||||
/* | /* | ||||
* strcspn - find length of initial segment of s1 consisting entirely | * strcspn - find length of initial segment of s1 consisting entirely | ||||
* of characters not from s2 | * of characters not from s2 | ||||
*/ | */ | ||||
static int | static int | ||||
strcspn(s1, s2) | strcspn(char *s1, char *s2) | ||||
char *s1; | |||||
char *s2; | |||||
{ | { | ||||
register char *scan1; | register char *scan1; | ||||
register char *scan2; | register char *scan2; | ||||
register int count; | register int count; | ||||
count = 0; | count = 0; | ||||
for (scan1 = s1; *scan1 != '\0'; scan1++) { | for (scan1 = s1; *scan1 != '\0'; scan1++) { | ||||
for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ | for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ | ||||
if (*scan1 == *scan2++) | if (*scan1 == *scan2++) | ||||
return(count); | return(count); | ||||
count++; | count++; | ||||
} | } | ||||
return(count); | return(count); | ||||
} | } | ||||
#endif | #endif |
comment indentation?