Changeset View
Standalone View
usr.bin/grep/regex/tre-fastmatch.c
Show All 40 Lines | |||||
#endif | #endif | ||||
#include "hashtable.h" | #include "hashtable.h" | ||||
#include "tre-fastmatch.h" | #include "tre-fastmatch.h" | ||||
static int fastcmp(const fastmatch_t *fg, const void *data, | static int fastcmp(const fastmatch_t *fg, const void *data, | ||||
tre_str_type_t type); | tre_str_type_t type); | ||||
static tre_char_t *tre_strndup(const tre_char_t *src, size_t len); | |||||
/* | /* | ||||
* Clean up if pattern compilation fails. | * Clean up if pattern compilation fails. | ||||
*/ | */ | ||||
#define FAIL_COMP(errcode) \ | #define FAIL_COMP(errcode) \ | ||||
{ \ | { \ | ||||
if (fg->pattern) \ | if (fg->pattern) \ | ||||
free(fg->pattern); \ | free(fg->pattern); \ | ||||
if (fg->wpattern) \ | if (fg->wpattern) \ | ||||
▲ Show 20 Lines • Show All 289 Lines • ▼ Show 20 Lines | |||||
#endif | #endif | ||||
/* | /* | ||||
* Fills in the good suffix table for SB/MB strings. | * Fills in the good suffix table for SB/MB strings. | ||||
*/ | */ | ||||
#define FILL_BMGS \ | #define FILL_BMGS \ | ||||
if (fg->len > 0 && !fg->hasdot) \ | if (fg->len > 0 && !fg->hasdot) \ | ||||
{ \ | { \ | ||||
fg->sbmGs = malloc(fg->len * sizeof(int)); \ | fg->sbmGs = malloc(fg->len * sizeof(*fg->sbmGs)); \ | ||||
cem: This might be the compiler being a little too pedantic. Can the size of int and unsigned int… | |||||
Not Done Inline ActionsI'm fairly certain they can't differ. =) That's fair -- that does look+work better. kevans: I'm fairly certain they can't differ. =)
That's fair -- that does look+work better. | |||||
if (!fg->sbmGs) \ | if (!fg->sbmGs) \ | ||||
return REG_ESPACE; \ | return REG_ESPACE; \ | ||||
if (fg->len == 1) \ | if (fg->len == 1) \ | ||||
fg->sbmGs[0] = 1; \ | fg->sbmGs[0] = 1; \ | ||||
else \ | else \ | ||||
_FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false); \ | _FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false); \ | ||||
DPRINT_BMGS(fg->len, "GS shift for pos %d is %d\n", fg->sbmGs); \ | DPRINT_BMGS(fg->len, "GS shift for pos %d is %d\n", fg->sbmGs); \ | ||||
} | } | ||||
/* | /* | ||||
* Fills in the good suffix table for wide strings. | * Fills in the good suffix table for wide strings. | ||||
*/ | */ | ||||
#define FILL_BMGS_WIDE \ | #define FILL_BMGS_WIDE \ | ||||
if (fg->wlen > 0 && !fg->hasdot) \ | if (fg->wlen > 0 && !fg->hasdot) \ | ||||
{ \ | { \ | ||||
fg->bmGs = malloc(fg->wlen * sizeof(int)); \ | fg->bmGs = malloc(fg->wlen * sizeof(*fg->bmGs)); \ | ||||
if (!fg->bmGs) \ | if (!fg->bmGs) \ | ||||
return REG_ESPACE; \ | return REG_ESPACE; \ | ||||
if (fg->wlen == 1) \ | if (fg->wlen == 1) \ | ||||
fg->bmGs[0] = 1; \ | fg->bmGs[0] = 1; \ | ||||
else \ | else \ | ||||
_FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true); \ | _FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true); \ | ||||
DPRINT_BMGS(fg->wlen, "GS shift (wide) for pos %d is %d\n", \ | DPRINT_BMGS(fg->wlen, "GS shift (wide) for pos %d is %d\n", \ | ||||
fg->bmGs); \ | fg->bmGs); \ | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | if (arr[g] == plen) \ | ||||
arr[g] = plen - 1 - i; \ | arr[g] = plen - 1 - i; \ | ||||
for (unsigned int i = 0; i <= plen - 2; i++) \ | for (unsigned int i = 0; i <= plen - 2; i++) \ | ||||
arr[plen - 1 - suff[i]] = plen - 1 - i; \ | arr[plen - 1 - suff[i]] = plen - 1 - i; \ | ||||
\ | \ | ||||
free(suff); \ | free(suff); \ | ||||
} | } | ||||
/* | /* | ||||
* Copies the pattern pat having length n to p and stores | |||||
* the size in l. | |||||
*/ | |||||
#define SAVE_PATTERN(src, srclen, dst, dstlen) \ | |||||
dstlen = srclen; \ | |||||
dst = malloc((dstlen + 1) * sizeof(tre_char_t)); \ | |||||
if (dst == NULL) \ | |||||
return REG_ESPACE; \ | |||||
if (dstlen > 0) \ | |||||
memcpy(dst, src, dstlen * sizeof(tre_char_t)); \ | |||||
dst[dstlen] = TRE_CHAR('\0'); | |||||
/* | |||||
* Initializes pattern compiling. | * Initializes pattern compiling. | ||||
*/ | */ | ||||
#define INIT_COMP \ | #define INIT_COMP \ | ||||
/* Initialize. */ \ | /* Initialize. */ \ | ||||
Not Done Inline ActionsI'd rather SAVE_PATTERN() be implemented as a function tre_strndup(), in the style of strndup(3). Then the free() (and error return) will be handled in the callers. cem: I'd rather `SAVE_PATTERN()` be implemented as a function `tre_strndup()`, in the style of… | |||||
Not Done Inline Actions+1; I implemented this as tre_strndup. Originally I was trying to match the current usage with minimal changes, but I'm not sure if it's worth it -- naming SAVE_PATTERN_FREESRC was painful. kevans: +1; I implemented this as `tre_strndup`. Originally I was trying to match the current usage… | |||||
memset(fg, 0, sizeof(*fg)); \ | memset(fg, 0, sizeof(*fg)); \ | ||||
fg->icase = (cflags & REG_ICASE); \ | fg->icase = (cflags & REG_ICASE); \ | ||||
fg->word = (cflags & REG_WORD); \ | fg->word = (cflags & REG_WORD); \ | ||||
fg->newline = (cflags & REG_NEWLINE); \ | fg->newline = (cflags & REG_NEWLINE); \ | ||||
fg->nosub = (cflags & REG_NOSUB); \ | fg->nosub = (cflags & REG_NOSUB); \ | ||||
\ | \ | ||||
/* Cannot handle REG_ICASE with MB string */ \ | /* Cannot handle REG_ICASE with MB string */ \ | ||||
if (fg->icase && (TRE_MB_CUR_MAX > 1) && n > 0) \ | if (fg->icase && (TRE_MB_CUR_MAX > 1) && n > 0) \ | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | tre_compile_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n, | ||||
CHECK_MATCHALL(true); | CHECK_MATCHALL(true); | ||||
/* Cannot handle word boundaries with MB string */ | /* Cannot handle word boundaries with MB string */ | ||||
if (fg->word && (TRE_MB_CUR_MAX > 1)) | if (fg->word && (TRE_MB_CUR_MAX > 1)) | ||||
return REG_BADPAT; | return REG_BADPAT; | ||||
#ifdef TRE_WCHAR | #ifdef TRE_WCHAR | ||||
SAVE_PATTERN(pat, n, fg->wpattern, fg->wlen); | fg->wpattern = tre_strndup(pat, n); | ||||
if (fg->wpattern == NULL) | |||||
return REG_ESPACE; | |||||
fg->wlen = n; | |||||
STORE_MBS_PAT; | STORE_MBS_PAT; | ||||
#else | #else | ||||
SAVE_PATTERN(pat, n, fg->pattern, fg->len); | fg->pattern = tre_strndup(pat, n); | ||||
if (fg->pattern == NULL) | |||||
return REG_ESPACE; | |||||
fg->len = n; | |||||
#endif | #endif | ||||
DPRINT(("tre_compile_literal: pattern: %s, len %zu, icase: %c, word: %c, " | DPRINT(("tre_compile_literal: pattern: %s, len %zu, icase: %c, word: %c, " | ||||
"newline %c\n", fg->pattern, fg->len, fg->icase ? 'y' : 'n', | "newline %c\n", fg->pattern, fg->len, fg->icase ? 'y' : 'n', | ||||
fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n')); | fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n')); | ||||
FILL_QSBC; | FILL_QSBC; | ||||
FILL_BMGS; | FILL_BMGS; | ||||
▲ Show 20 Lines • Show All 162 Lines • ▼ Show 20 Lines | badpat: | ||||
fg->hasdot = wfirstdot > -1; | fg->hasdot = wfirstdot > -1; | ||||
/* | /* | ||||
* The pattern has been processed and copied to tmp as a literal string | * The pattern has been processed and copied to tmp as a literal string | ||||
* with escapes, anchors (^$) and the word boundary match character | * with escapes, anchors (^$) and the word boundary match character | ||||
* classes stripped out. | * classes stripped out. | ||||
*/ | */ | ||||
#ifdef TRE_WCHAR | #ifdef TRE_WCHAR | ||||
SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen); | fg->wpattern = tre_strndup(tmp, pos); | ||||
if (fg->wpattern == NULL) | |||||
{ | |||||
free(tmp); | |||||
return REG_ESPACE; | |||||
} | |||||
fg->wlen = pos; | |||||
fg->wescmap = _escmap; | fg->wescmap = _escmap; | ||||
STORE_MBS_PAT; | STORE_MBS_PAT; | ||||
/* | /* | ||||
* The position of dots and escaped dots is different in the MB string | * The position of dots and escaped dots is different in the MB string | ||||
* than in to the wide string so traverse the converted string, as well, | * than in to the wide string so traverse the converted string, as well, | ||||
* to store these positions. | * to store these positions. | ||||
*/ | */ | ||||
Show All 38 Lines | else if (_checkpat[i] == '.' && !escaped) | ||||
if (firstdot == -1) | if (firstdot == -1) | ||||
firstdot = i; | firstdot = i; | ||||
} | } | ||||
else | else | ||||
escaped = false; | escaped = false; | ||||
free(_checkpat); | free(_checkpat); | ||||
} | } | ||||
#else | #else | ||||
SAVE_PATTERN(tmp, pos, fg->pattern, fg->len); | fg->pattern = tre_strndup(tmp, pos); | ||||
if (fg->pattern == NULL) | |||||
{ | |||||
free(tmp); | |||||
return REG_ESPACE; | |||||
} | |||||
fg->len = pos; | |||||
fg->escmap = _escmap; | fg->escmap = _escmap; | ||||
#endif | #endif | ||||
free(tmp); | free(tmp); | ||||
DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, " | DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, " | ||||
"icase: %c, word: %c, newline %c\n", fg->pattern, fg->len, | "icase: %c, word: %c, newline %c\n", fg->pattern, fg->len, | ||||
fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n', | fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n', | ||||
▲ Show 20 Lines • Show All 280 Lines • ▼ Show 20 Lines | if (fg->icase ? (tolower((unsigned char)pat_byte[i]) == tolower((unsigned char)str_byte[i])) | ||||
: (pat_byte[i] == str_byte[i])) | : (pat_byte[i] == str_byte[i])) | ||||
continue; | continue; | ||||
} | } | ||||
DPRINT(("fastcmp: mismatch at position %d\n", i)); | DPRINT(("fastcmp: mismatch at position %d\n", i)); | ||||
ret = -(i + 1); | ret = -(i + 1); | ||||
break; | break; | ||||
} | } | ||||
return ret; | return ret; | ||||
} | |||||
/* | |||||
* Copies the pattern pat having length n to p and stores | |||||
* the size in l. | |||||
*/ | |||||
static tre_char_t * | |||||
tre_strndup(const tre_char_t *src, size_t len) | |||||
Not Done Inline ActionsIt occurs to me now that I broke the broken style in this file here -- should I fix this, or is it OK since it's off on its own? kevans: It occurs to me now that I broke the broken style in this file here -- should I fix this, or is… | |||||
Not Done Inline ActionsProbably best to fix it. :-) cem: Probably best to fix it. :-) | |||||
{ | |||||
tre_char_t *dest; | |||||
dest = malloc((len + 1) * sizeof(*dest)); | |||||
if (dest == NULL) | |||||
return NULL; | |||||
if (len > 0) | |||||
memcpy(dest, src, len * sizeof(*src)); | |||||
Not Done Inline ActionsTo mirror strndup this would be strncpy(). I don't think it really matters here, though. cem: To mirror `strndup` this would be `strncpy()`. I don't think it really matters here, though. | |||||
Not Done Inline ActionsD'oh, now that you mention it the name of this function's technically wrong. tre_char_t is actually a wchar_t, but we don't have a matching wcsndup implementation so I guess it's alright? I think it might be better to leave it as-is (memcpy) to hide that implementation detail, though. It is theoretically possible to compile the tre bits using multi-byte strings instead of wide char (!TRE_WCHAR), which would complicate this possibly a bit more than it needs to be? OTOH, it's not practically possible to compile it with !TRE_WCHAR without some changes to the glue. kevans: D'oh, now that you mention it the name of this function's technically wrong. `tre_char_t` is… | |||||
Not Done Inline ActionsOh, right, strncpy would be wrong for wchar strings. Yeah, I'm ok just leaving it as memcpy for now. cem: Oh, right, strncpy would be wrong for wchar strings. Yeah, I'm ok just leaving it as memcpy… | |||||
Not Done Inline ActionsAlrighty. =) I'm still chomping at the bit to disable tre by default, so I'm trying not to do much in these parts. kevans: Alrighty. =) I'm still chomping at the bit to disable tre by default, so I'm trying not to do… | |||||
Not Done Inline ActionsThat's totally fair :-) cem: That's totally fair :-) | |||||
dest[len] = TRE_CHAR('\0'); | |||||
return dest; | |||||
} | } |
This might be the compiler being a little too pedantic. Can the size of int and unsigned int really differ?
I usually use sizeof(*fg->sbmGs) so the type is always correct relative to the pointer.