Changeset View
Changeset View
Standalone View
Standalone View
usr.bin/sort/bwstring.c
Show First 20 Lines • Show All 107 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* | /* | ||||
* Compare two wide-character strings | * Compare two wide-character strings | ||||
*/ | */ | ||||
static int | static int | ||||
wide_str_coll(const wchar_t *s1, const wchar_t *s2) | wide_str_coll(const wchar_t *s1, const wchar_t *s2) | ||||
{ | { | ||||
int ret = 0; | int ret; | ||||
errno = 0; | errno = 0; | ||||
ret = wcscoll(s1, s2); | ret = wcscoll(s1, s2); | ||||
if (errno == EILSEQ) { | if (errno == EILSEQ) { | ||||
errno = 0; | errno = 0; | ||||
ret = wcscmp(s1, s2); | ret = wcscmp(s1, s2); | ||||
if (errno != 0) { | if (errno != 0) { | ||||
for (size_t i = 0; ; ++i) { | for (size_t i = 0; ; ++i) { | ||||
Show All 14 Lines | |||||
/* counterparts of wcs functions */ | /* counterparts of wcs functions */ | ||||
void | void | ||||
bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) | bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) | ||||
{ | { | ||||
if (MB_CUR_MAX == 1) | if (MB_CUR_MAX == 1) | ||||
fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix); | fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix); | ||||
else | else | ||||
fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix); | fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix); | ||||
} | } | ||||
const void* bwsrawdata(const struct bwstring *bws) | const void* bwsrawdata(const struct bwstring *bws) | ||||
{ | { | ||||
return (&(bws->data)); | return (&(bws->wdata)); | ||||
} | } | ||||
size_t bwsrawlen(const struct bwstring *bws) | size_t bwsrawlen(const struct bwstring *bws) | ||||
{ | { | ||||
return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len)); | return ((MB_CUR_MAX == 1) ? bws->cdata.len : | ||||
SIZEOF_WCHAR_STRING(bws->wdata.len)); | |||||
} | } | ||||
size_t | size_t | ||||
bws_memsize(const struct bwstring *bws) | bws_memsize(const struct bwstring *bws) | ||||
{ | { | ||||
return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) : | return ((MB_CUR_MAX == 1) ? | ||||
(SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring))); | (bws->cdata.len + 2 + sizeof(struct bwstring)) : | ||||
(SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring))); | |||||
} | } | ||||
void | void | ||||
bws_setlen(struct bwstring *bws, size_t newlen) | bws_setlen(struct bwstring *bws, size_t newlen) | ||||
{ | { | ||||
if (bws && newlen != bws->len && newlen <= bws->len) { | if (MB_CUR_MAX == 1 && bws && newlen != bws->cdata.len && | ||||
bws->len = newlen; | newlen <= bws->cdata.len) { | ||||
if (MB_CUR_MAX == 1) | bws->cdata.len = newlen; | ||||
bws->data.cstr[newlen] = '\0'; | bws->cdata.str[newlen] = '\0'; | ||||
else | } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) { | ||||
bws->data.wstr[newlen] = L'\0'; | bws->wdata.len = newlen; | ||||
bws->wdata.str[newlen] = L'\0'; | |||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Allocate a new binary string of specified size | * Allocate a new binary string of specified size | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
bwsalloc(size_t sz) | bwsalloc(size_t sz) | ||||
{ | { | ||||
struct bwstring *ret; | struct bwstring *ret; | ||||
if (MB_CUR_MAX == 1) | if (MB_CUR_MAX == 1) { | ||||
ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); | ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); | ||||
else | ret->cdata.len = sz; | ||||
ret = sort_malloc(sizeof(struct bwstring) + | ret->cdata.str[sz] = '\0'; | ||||
SIZEOF_WCHAR_STRING(sz + 1)); | } else { | ||||
ret->len = sz; | ret = sort_malloc( | ||||
sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1)); | |||||
ret->wdata.len = sz; | |||||
ret->wdata.str[sz] = L'\0'; | |||||
} | |||||
if (MB_CUR_MAX == 1) | |||||
ret->data.cstr[ret->len] = '\0'; | |||||
else | |||||
ret->data.wstr[ret->len] = L'\0'; | |||||
return (ret); | return (ret); | ||||
} | } | ||||
/* | /* | ||||
* Create a copy of binary string. | * Create a copy of binary string. | ||||
* New string size equals the length of the old string. | * New string size equals the length of the old string. | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
bwsdup(const struct bwstring *s) | bwsdup(const struct bwstring *s) | ||||
{ | { | ||||
if (s == NULL) | if (s == NULL) | ||||
return (NULL); | return (NULL); | ||||
else { | else { | ||||
struct bwstring *ret = bwsalloc(s->len); | struct bwstring *ret = bwsalloc(BWSLEN(s)); | ||||
if (MB_CUR_MAX == 1) | if (MB_CUR_MAX == 1) | ||||
memcpy(ret->data.cstr, s->data.cstr, (s->len)); | memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len)); | ||||
else | else | ||||
memcpy(ret->data.wstr, s->data.wstr, | memcpy(ret->wdata.str, s->wdata.str, | ||||
SIZEOF_WCHAR_STRING(s->len)); | SIZEOF_WCHAR_STRING(s->wdata.len)); | ||||
return (ret); | return (ret); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Create a new binary string from a wide character buffer. | * Create a new binary string from a wide character buffer. | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
bwssbdup(const wchar_t *str, size_t len) | bwssbdup(const wchar_t *str, size_t len) | ||||
{ | { | ||||
if (str == NULL) | if (str == NULL) | ||||
return ((len == 0) ? bwsalloc(0) : NULL); | return ((len == 0) ? bwsalloc(0) : NULL); | ||||
else { | else { | ||||
struct bwstring *ret; | struct bwstring *ret; | ||||
ret = bwsalloc(len); | ret = bwsalloc(len); | ||||
if (MB_CUR_MAX == 1) | if (MB_CUR_MAX == 1) | ||||
for (size_t i = 0; i < len; ++i) | for (size_t i = 0; i < len; ++i) | ||||
ret->data.cstr[i] = (unsigned char) str[i]; | ret->cdata.str[i] = (char)str[i]; | ||||
else | else | ||||
memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len)); | memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len)); | ||||
return (ret); | return (ret); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Create a new binary string from a raw binary buffer. | * Create a new binary string from a raw binary buffer. | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
bwscsbdup(const unsigned char *str, size_t len) | bwscsbdup(const unsigned char *str, size_t len) | ||||
{ | { | ||||
struct bwstring *ret; | struct bwstring *ret; | ||||
ret = bwsalloc(len); | ret = bwsalloc(len); | ||||
if (str) { | if (str) { | ||||
if (MB_CUR_MAX == 1) | if (MB_CUR_MAX == 1) | ||||
memcpy(ret->data.cstr, str, len); | memcpy(ret->cdata.str, str, len); | ||||
else { | else { | ||||
mbstate_t mbs; | mbstate_t mbs; | ||||
const char *s; | const char *s; | ||||
size_t charlen, chars, cptr; | size_t charlen, chars, cptr; | ||||
chars = 0; | chars = 0; | ||||
cptr = 0; | cptr = 0; | ||||
s = (const char *) str; | s = (const char *) str; | ||||
memset(&mbs, 0, sizeof(mbs)); | memset(&mbs, 0, sizeof(mbs)); | ||||
while (cptr < len) { | while (cptr < len) { | ||||
size_t n = MB_CUR_MAX; | size_t n = MB_CUR_MAX; | ||||
if (n > len - cptr) | if (n > len - cptr) | ||||
n = len - cptr; | n = len - cptr; | ||||
charlen = mbrlen(s + cptr, n, &mbs); | charlen = mbrlen(s + cptr, n, &mbs); | ||||
switch (charlen) { | switch (charlen) { | ||||
case 0: | case 0: | ||||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||
case (size_t) -1: | case (size_t) -1: | ||||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||
case (size_t) -2: | case (size_t) -2: | ||||
ret->data.wstr[chars++] = | ret->wdata.str[chars++] = | ||||
(unsigned char) s[cptr]; | (unsigned char) s[cptr]; | ||||
++cptr; | ++cptr; | ||||
break; | break; | ||||
default: | default: | ||||
n = mbrtowc(ret->data.wstr + (chars++), | n = mbrtowc(ret->wdata.str + (chars++), | ||||
s + cptr, charlen, &mbs); | s + cptr, charlen, &mbs); | ||||
if ((n == (size_t)-1) || (n == (size_t)-2)) | if ((n == (size_t)-1) || (n == (size_t)-2)) | ||||
/* NOTREACHED */ | /* NOTREACHED */ | ||||
err(2, "mbrtowc error"); | err(2, "mbrtowc error"); | ||||
cptr += charlen; | cptr += charlen; | ||||
} | } | ||||
} | } | ||||
ret->len = chars; | ret->wdata.len = chars; | ||||
ret->data.wstr[ret->len] = L'\0'; | ret->wdata.str[ret->wdata.len] = L'\0'; | ||||
} | } | ||||
} | } | ||||
return (ret); | return (ret); | ||||
} | } | ||||
/* | /* | ||||
* De-allocate object memory | * De-allocate object memory | ||||
*/ | */ | ||||
void | void | ||||
bwsfree(const struct bwstring *s) | bwsfree(const struct bwstring *s) | ||||
{ | { | ||||
if (s) | if (s) | ||||
sort_free(s); | sort_free(s); | ||||
} | } | ||||
/* | /* | ||||
* Copy content of src binary string to dst. | * Copy content of src binary string to dst. | ||||
* If the capacity of the dst string is not sufficient, | * If the capacity of the dst string is not sufficient, | ||||
* then the data is truncated. | * then the data is truncated. | ||||
*/ | */ | ||||
size_t | size_t | ||||
bwscpy(struct bwstring *dst, const struct bwstring *src) | bwscpy(struct bwstring *dst, const struct bwstring *src) | ||||
{ | { | ||||
size_t nums = src->len; | size_t nums = BWSLEN(src); | ||||
if (nums > dst->len) | if (nums > BWSLEN(dst)) | ||||
nums = dst->len; | nums = BWSLEN(dst); | ||||
dst->len = nums; | |||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
memcpy(dst->data.cstr, src->data.cstr, nums); | memcpy(dst->cdata.str, src->cdata.str, nums); | ||||
dst->data.cstr[dst->len] = '\0'; | dst->cdata.len = nums; | ||||
dst->cdata.str[dst->cdata.len] = '\0'; | |||||
} else { | } else { | ||||
memcpy(dst->data.wstr, src->data.wstr, | memcpy(dst->wdata.str, src->wdata.str, | ||||
SIZEOF_WCHAR_STRING(nums + 1)); | SIZEOF_WCHAR_STRING(nums)); | ||||
dst->data.wstr[dst->len] = L'\0'; | dst->wdata.len = nums; | ||||
dst->wdata.str[nums] = L'\0'; | |||||
} | } | ||||
return (nums); | return (nums); | ||||
} | } | ||||
/* | /* | ||||
* Copy content of src binary string to dst, | * Copy content of src binary string to dst, | ||||
* with specified number of symbols to be copied. | * with specified number of symbols to be copied. | ||||
* If the capacity of the dst string is not sufficient, | * If the capacity of the dst string is not sufficient, | ||||
* then the data is truncated. | * then the data is truncated. | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) | bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) | ||||
{ | { | ||||
size_t nums = src->len; | size_t nums = BWSLEN(src); | ||||
if (nums > dst->len) | if (nums > BWSLEN(dst)) | ||||
nums = dst->len; | nums = BWSLEN(dst); | ||||
if (nums > size) | if (nums > size) | ||||
nums = size; | nums = size; | ||||
dst->len = nums; | |||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
memcpy(dst->data.cstr, src->data.cstr, nums); | memcpy(dst->cdata.str, src->cdata.str, nums); | ||||
dst->data.cstr[dst->len] = '\0'; | dst->cdata.len = nums; | ||||
dst->cdata.str[nums] = '\0'; | |||||
} else { | } else { | ||||
memcpy(dst->data.wstr, src->data.wstr, | memcpy(dst->wdata.str, src->wdata.str, | ||||
SIZEOF_WCHAR_STRING(nums + 1)); | SIZEOF_WCHAR_STRING(nums)); | ||||
dst->data.wstr[dst->len] = L'\0'; | dst->wdata.len = nums; | ||||
dst->wdata.str[nums] = L'\0'; | |||||
} | } | ||||
return (dst); | return (dst); | ||||
} | } | ||||
/* | /* | ||||
* Copy content of src binary string to dst, | * Copy content of src binary string to dst, | ||||
* with specified number of symbols to be copied. | * with specified number of symbols to be copied. | ||||
* An offset value can be specified, from the start of src string. | * An offset value can be specified, from the start of src string. | ||||
* If the capacity of the dst string is not sufficient, | * If the capacity of the dst string is not sufficient, | ||||
* then the data is truncated. | * then the data is truncated. | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, | bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, | ||||
size_t size) | size_t size) | ||||
{ | { | ||||
if (offset >= src->len) { | if (offset >= BWSLEN(src)) { | ||||
dst->data.wstr[0] = 0; | bws_setlen(dst, 0); | ||||
dst->len = 0; | |||||
} else { | } else { | ||||
size_t nums = src->len - offset; | size_t nums = BWSLEN(src) - offset; | ||||
if (nums > dst->len) | if (nums > BWSLEN(dst)) | ||||
nums = dst->len; | nums = BWSLEN(dst); | ||||
if (nums > size) | if (nums > size) | ||||
nums = size; | nums = size; | ||||
dst->len = nums; | |||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
memcpy(dst->data.cstr, src->data.cstr + offset, | memcpy(dst->cdata.str, src->cdata.str + offset, nums); | ||||
(nums)); | dst->cdata.len = nums; | ||||
dst->data.cstr[dst->len] = '\0'; | dst->cdata.str[nums] = '\0'; | ||||
} else { | } else { | ||||
memcpy(dst->data.wstr, src->data.wstr + offset, | memcpy(dst->wdata.str, src->wdata.str + offset, | ||||
SIZEOF_WCHAR_STRING(nums)); | SIZEOF_WCHAR_STRING(nums)); | ||||
dst->data.wstr[dst->len] = L'\0'; | dst->wdata.len = nums; | ||||
dst->wdata.str[nums] = L'\0'; | |||||
} | } | ||||
} | } | ||||
return (dst); | return (dst); | ||||
} | } | ||||
/* | /* | ||||
* Write binary string to the file. | * Write binary string to the file. | ||||
* The output is ended either with '\n' (nl == true) | * The output is ended either with '\n' (nl == true) | ||||
* or '\0' (nl == false). | * or '\0' (nl == false). | ||||
*/ | */ | ||||
size_t | size_t | ||||
bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) | bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) | ||||
{ | { | ||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
size_t len = bws->len; | size_t len = bws->cdata.len; | ||||
if (!zero_ended) { | if (!zero_ended) { | ||||
bws->data.cstr[len] = '\n'; | bws->cdata.str[len] = '\n'; | ||||
if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) | if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) | ||||
err(2, NULL); | err(2, NULL); | ||||
bws->data.cstr[len] = '\0'; | bws->cdata.str[len] = '\0'; | ||||
} else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) | } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1) | ||||
err(2, NULL); | err(2, NULL); | ||||
return (len + 1); | return (len + 1); | ||||
} else { | } else { | ||||
wchar_t eols; | wchar_t eols; | ||||
size_t printed = 0; | size_t printed = 0; | ||||
eols = zero_ended ? btowc('\0') : btowc('\n'); | eols = zero_ended ? btowc('\0') : btowc('\n'); | ||||
while (printed < BWSLEN(bws)) { | while (printed < BWSLEN(bws)) { | ||||
const wchar_t *s = bws->data.wstr + printed; | const wchar_t *s = bws->wdata.str + printed; | ||||
if (*s == L'\0') { | if (*s == L'\0') { | ||||
int nums; | int nums; | ||||
nums = fwprintf(f, L"%lc", *s); | nums = fwprintf(f, L"%lc", *s); | ||||
if (nums != 1) | if (nums != 1) | ||||
err(2, NULL); | err(2, NULL); | ||||
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | if (ret == NULL) { | ||||
if (!feof(f)) | if (!feof(f)) | ||||
err(2, NULL); | err(2, NULL); | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
if (*len > 0) { | if (*len > 0) { | ||||
if (ret[*len - 1] == '\n') | if (ret[*len - 1] == '\n') | ||||
--(*len); | --(*len); | ||||
} | } | ||||
return (bwscsbdup((unsigned char*)ret, *len)); | return (bwscsbdup((unsigned char *)ret, *len)); | ||||
} else { | } else { | ||||
*len = 0; | *len = 0; | ||||
if (feof(f)) | if (feof(f)) | ||||
return (NULL); | return (NULL); | ||||
if (2 >= rb->fgetwln_z_buffer_size) { | if (2 >= rb->fgetwln_z_buffer_size) { | ||||
Show All 23 Lines | if (MB_CUR_MAX == 1) | ||||
SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); | SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size)); | ||||
} | } | ||||
rb->fgetwln_z_buffer[*len] = c; | rb->fgetwln_z_buffer[*len] = c; | ||||
rb->fgetwln_z_buffer[++(*len)] = 0; | rb->fgetwln_z_buffer[++(*len)] = 0; | ||||
} | } | ||||
else | else | ||||
while (!feof(f)) { | while (!feof(f)) { | ||||
wint_t c = 0; | wint_t c; | ||||
c = fgetwc(f); | c = fgetwc(f); | ||||
if (c == WEOF) { | if (c == WEOF) { | ||||
if (*len == 0) | if (*len == 0) | ||||
return (NULL); | return (NULL); | ||||
goto line_read_done; | goto line_read_done; | ||||
} | } | ||||
Show All 16 Lines | line_read_done: | ||||
} | } | ||||
} | } | ||||
int | int | ||||
bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, | bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, | ||||
size_t offset, size_t len) | size_t offset, size_t len) | ||||
{ | { | ||||
size_t cmp_len, len1, len2; | size_t cmp_len, len1, len2; | ||||
int res = 0; | int res; | ||||
len1 = bws1->len; | len1 = BWSLEN(bws1); | ||||
len2 = bws2->len; | len2 = BWSLEN(bws2); | ||||
if (len1 <= offset) { | if (len1 <= offset) { | ||||
return ((len2 <= offset) ? 0 : -1); | return ((len2 <= offset) ? 0 : -1); | ||||
} else { | } else { | ||||
if (len2 <= offset) | if (len2 <= offset) | ||||
return (+1); | return (+1); | ||||
else { | else { | ||||
len1 -= offset; | len1 -= offset; | ||||
len2 -= offset; | len2 -= offset; | ||||
cmp_len = len1; | cmp_len = len1; | ||||
if (len2 < cmp_len) | if (len2 < cmp_len) | ||||
cmp_len = len2; | cmp_len = len2; | ||||
if (len < cmp_len) | if (len < cmp_len) | ||||
cmp_len = len; | cmp_len = len; | ||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
const unsigned char *s1, *s2; | const char *s1, *s2; | ||||
s1 = bws1->data.cstr + offset; | s1 = bws1->cdata.str + offset; | ||||
s2 = bws2->data.cstr + offset; | s2 = bws2->cdata.str + offset; | ||||
res = memcmp(s1, s2, cmp_len); | res = memcmp(s1, s2, cmp_len); | ||||
} else { | } else { | ||||
const wchar_t *s1, *s2; | const wchar_t *s1, *s2; | ||||
s1 = bws1->data.wstr + offset; | s1 = bws1->wdata.str + offset; | ||||
s2 = bws2->data.wstr + offset; | s2 = bws2->wdata.str + offset; | ||||
res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); | res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if (res == 0) { | if (res == 0) { | ||||
if (len1 < cmp_len && len1 < len2) | if (len1 < cmp_len && len1 < len2) | ||||
res = -1; | res = -1; | ||||
else if (len2 < cmp_len && len2 < len1) | else if (len2 < cmp_len && len2 < len1) | ||||
res = +1; | res = +1; | ||||
} | } | ||||
return (res); | return (res); | ||||
} | } | ||||
int | int | ||||
bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) | bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) | ||||
{ | { | ||||
size_t len1, len2, cmp_len; | size_t len1, len2, cmp_len; | ||||
int res; | int res; | ||||
len1 = bws1->len; | len1 = BWSLEN(bws1); | ||||
len2 = bws2->len; | len2 = BWSLEN(bws2); | ||||
len1 -= offset; | len1 -= offset; | ||||
len2 -= offset; | len2 -= offset; | ||||
cmp_len = len1; | cmp_len = len1; | ||||
if (len2 < cmp_len) | if (len2 < cmp_len) | ||||
cmp_len = len2; | cmp_len = len2; | ||||
Show All 9 Lines | bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) | ||||
return (res); | return (res); | ||||
} | } | ||||
int | int | ||||
bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) | bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) | ||||
{ | { | ||||
wchar_t c1, c2; | wchar_t c1, c2; | ||||
size_t i = 0; | size_t i; | ||||
for (i = 0; i < len; ++i) { | for (i = 0; i < len; ++i) { | ||||
c1 = bws_get_iter_value(iter1); | c1 = bws_get_iter_value(iter1); | ||||
c2 = bws_get_iter_value(iter2); | c2 = bws_get_iter_value(iter2); | ||||
if (c1 != c2) | if (c1 != c2) | ||||
return (c1 - c2); | return (c1 - c2); | ||||
iter1 = bws_iterator_inc(iter1, 1); | iter1 = bws_iterator_inc(iter1, 1); | ||||
iter2 = bws_iterator_inc(iter2, 1); | iter2 = bws_iterator_inc(iter2, 1); | ||||
} | } | ||||
return (0); | return (0); | ||||
} | } | ||||
int | int | ||||
bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) | bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) | ||||
{ | { | ||||
size_t len1, len2; | size_t len1, len2; | ||||
len1 = bws1->len; | len1 = BWSLEN(bws1); | ||||
len2 = bws2->len; | len2 = BWSLEN(bws2); | ||||
if (len1 <= offset) | if (len1 <= offset) | ||||
return ((len2 <= offset) ? 0 : -1); | return ((len2 <= offset) ? 0 : -1); | ||||
else { | else { | ||||
if (len2 <= offset) | if (len2 <= offset) | ||||
return (+1); | return (+1); | ||||
else { | else { | ||||
len1 -= offset; | len1 -= offset; | ||||
len2 -= offset; | len2 -= offset; | ||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
const unsigned char *s1, *s2; | const char *s1, *s2; | ||||
s1 = bws1->data.cstr + offset; | s1 = bws1->cdata.str + offset; | ||||
s2 = bws2->data.cstr + offset; | s2 = bws2->cdata.str + offset; | ||||
if (byte_sort) { | if (byte_sort) { | ||||
int res = 0; | int res; | ||||
if (len1 > len2) { | if (len1 > len2) { | ||||
res = memcmp(s1, s2, len2); | res = memcmp(s1, s2, len2); | ||||
if (!res) | if (!res) | ||||
res = +1; | res = +1; | ||||
} else if (len1 < len2) { | } else if (len1 < len2) { | ||||
res = memcmp(s1, s2, len1); | res = memcmp(s1, s2, len1); | ||||
if (!res) | if (!res) | ||||
res = -1; | res = -1; | ||||
} else | } else | ||||
res = memcmp(s1, s2, len1); | res = memcmp(s1, s2, len1); | ||||
return (res); | return (res); | ||||
} else { | } else { | ||||
int res = 0; | int res; | ||||
size_t i, maxlen; | size_t i, maxlen; | ||||
i = 0; | i = 0; | ||||
maxlen = len1; | maxlen = len1; | ||||
if (maxlen > len2) | if (maxlen > len2) | ||||
maxlen = len2; | maxlen = len2; | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | else { | ||||
else if (len1 > len2) | else if (len1 > len2) | ||||
return (+1); | return (+1); | ||||
return (0); | return (0); | ||||
} | } | ||||
} else { | } else { | ||||
const wchar_t *s1, *s2; | const wchar_t *s1, *s2; | ||||
size_t i, maxlen; | size_t i, maxlen; | ||||
int res = 0; | int res; | ||||
s1 = bws1->data.wstr + offset; | s1 = bws1->wdata.str + offset; | ||||
s2 = bws2->data.wstr + offset; | s2 = bws2->wdata.str + offset; | ||||
i = 0; | i = 0; | ||||
maxlen = len1; | maxlen = len1; | ||||
if (maxlen > len2) | if (maxlen > len2) | ||||
maxlen = len2; | maxlen = len2; | ||||
while (i < maxlen) { | while (i < maxlen) { | ||||
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* | /* | ||||
* Correction of the system API | * Correction of the system API | ||||
*/ | */ | ||||
double | double | ||||
bwstod(struct bwstring *s0, bool *empty) | bwstod(struct bwstring *s0, bool *empty) | ||||
{ | { | ||||
double ret = 0; | double ret; | ||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
unsigned char *end, *s; | char *end, *s; | ||||
char *ep; | char *ep; | ||||
s = s0->data.cstr; | s = s0->cdata.str; | ||||
end = s + s0->len; | end = s + s0->cdata.len; | ||||
ep = NULL; | ep = NULL; | ||||
while (isblank(*s) && s < end) | while (isblank(*s) && s < end) | ||||
++s; | ++s; | ||||
if (!isprint(*s)) { | if (!isprint(*s)) { | ||||
*empty = true; | *empty = true; | ||||
return (0); | return (0); | ||||
} | } | ||||
ret = strtod((char*)s, &ep); | ret = strtod((char*)s, &ep); | ||||
if ((unsigned char*) ep == s) { | if (ep == s) { | ||||
*empty = true; | *empty = true; | ||||
return (0); | return (0); | ||||
} | } | ||||
} else { | } else { | ||||
wchar_t *end, *ep, *s; | wchar_t *end, *ep, *s; | ||||
s = s0->data.wstr; | s = s0->wdata.str; | ||||
end = s + s0->len; | end = s + s0->wdata.len; | ||||
ep = NULL; | ep = NULL; | ||||
while (iswblank(*s) && s < end) | while (iswblank(*s) && s < end) | ||||
++s; | ++s; | ||||
if (!iswprint(*s)) { | if (!iswprint(*s)) { | ||||
*empty = true; | *empty = true; | ||||
return (0); | return (0); | ||||
Show All 16 Lines | |||||
* while if there is no match, it just return -1. | * while if there is no match, it just return -1. | ||||
*/ | */ | ||||
int | int | ||||
bws_month_score(const struct bwstring *s0) | bws_month_score(const struct bwstring *s0) | ||||
{ | { | ||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
const unsigned char *end, *s; | const char *end, *s; | ||||
s = s0->data.cstr; | s = s0->cdata.str; | ||||
end = s + s0->len; | end = s + s0->cdata.len; | ||||
while (isblank(*s) && s < end) | while (isblank(*s) && s < end) | ||||
++s; | ++s; | ||||
for (int i = 11; i >= 0; --i) { | for (int i = 11; i >= 0; --i) { | ||||
if (cmonths[i] && | if (cmonths[i] && | ||||
(s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i])))) | (s == strstr(s, cmonths[i]))) | ||||
return (i); | return (i); | ||||
} | } | ||||
} else { | } else { | ||||
const wchar_t *end, *s; | const wchar_t *end, *s; | ||||
s = s0->data.wstr; | s = s0->wdata.str; | ||||
end = s + s0->len; | end = s + s0->wdata.len; | ||||
while (iswblank(*s) && s < end) | while (iswblank(*s) && s < end) | ||||
++s; | ++s; | ||||
for (int i = 11; i >= 0; --i) { | for (int i = 11; i >= 0; --i) { | ||||
if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) | if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) | ||||
return (i); | return (i); | ||||
} | } | ||||
} | } | ||||
return (-1); | return (-1); | ||||
} | } | ||||
/* | /* | ||||
* Rips out leading blanks (-b). | * Rips out leading blanks (-b). | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
ignore_leading_blanks(struct bwstring *str) | ignore_leading_blanks(struct bwstring *str) | ||||
{ | { | ||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
unsigned char *dst, *end, *src; | char *dst, *end, *src; | ||||
src = str->data.cstr; | src = str->cdata.str; | ||||
dst = src; | dst = src; | ||||
end = src + str->len; | end = src + str->cdata.len; | ||||
while (src < end && isblank(*src)) | while (src < end && isblank(*src)) | ||||
++src; | ++src; | ||||
if (src != dst) { | if (src != dst) { | ||||
size_t newlen; | size_t newlen; | ||||
newlen = BWSLEN(str) - (src - dst); | newlen = BWSLEN(str) - (src - dst); | ||||
while (src < end) { | while (src < end) { | ||||
*dst = *src; | *dst = *src; | ||||
++dst; | ++dst; | ||||
++src; | ++src; | ||||
} | } | ||||
bws_setlen(str, newlen); | bws_setlen(str, newlen); | ||||
} | } | ||||
} else { | } else { | ||||
wchar_t *dst, *end, *src; | wchar_t *dst, *end, *src; | ||||
src = str->data.wstr; | src = str->wdata.str; | ||||
dst = src; | dst = src; | ||||
end = src + str->len; | end = src + str->wdata.len; | ||||
while (src < end && iswblank(*src)) | while (src < end && iswblank(*src)) | ||||
++src; | ++src; | ||||
if (src != dst) { | if (src != dst) { | ||||
size_t newlen = BWSLEN(str) - (src - dst); | size_t newlen = BWSLEN(str) - (src - dst); | ||||
Show All 10 Lines | |||||
} | } | ||||
/* | /* | ||||
* Rips out nonprinting characters (-i). | * Rips out nonprinting characters (-i). | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
ignore_nonprinting(struct bwstring *str) | ignore_nonprinting(struct bwstring *str) | ||||
{ | { | ||||
size_t newlen = str->len; | size_t newlen = BWSLEN(str); | ||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
unsigned char *dst, *end, *src; | char *dst, *end, *src; | ||||
unsigned char c; | char c; | ||||
src = str->data.cstr; | src = str->cdata.str; | ||||
dst = src; | dst = src; | ||||
end = src + str->len; | end = src + str->cdata.len; | ||||
while (src < end) { | while (src < end) { | ||||
c = *src; | c = *src; | ||||
if (isprint(c)) { | if (isprint(c)) { | ||||
*dst = c; | *dst = c; | ||||
++dst; | ++dst; | ||||
++src; | ++src; | ||||
} else { | } else { | ||||
++src; | ++src; | ||||
--newlen; | --newlen; | ||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
wchar_t *dst, *end, *src; | wchar_t *dst, *end, *src; | ||||
wchar_t c; | wchar_t c; | ||||
src = str->data.wstr; | src = str->wdata.str; | ||||
dst = src; | dst = src; | ||||
end = src + str->len; | end = src + str->wdata.len; | ||||
while (src < end) { | while (src < end) { | ||||
c = *src; | c = *src; | ||||
if (iswprint(c)) { | if (iswprint(c)) { | ||||
*dst = c; | *dst = c; | ||||
++dst; | ++dst; | ||||
++src; | ++src; | ||||
} else { | } else { | ||||
Show All 9 Lines | |||||
/* | /* | ||||
* Rips out any characters that are not alphanumeric characters | * Rips out any characters that are not alphanumeric characters | ||||
* nor blanks (-d). | * nor blanks (-d). | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
dictionary_order(struct bwstring *str) | dictionary_order(struct bwstring *str) | ||||
{ | { | ||||
size_t newlen = str->len; | size_t newlen = BWSLEN(str); | ||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
unsigned char *dst, *end, *src; | char *dst, *end, *src; | ||||
unsigned char c; | char c; | ||||
src = str->data.cstr; | src = str->cdata.str; | ||||
dst = src; | dst = src; | ||||
end = src + str->len; | end = src + str->cdata.len; | ||||
while (src < end) { | while (src < end) { | ||||
c = *src; | c = *src; | ||||
if (isalnum(c) || isblank(c)) { | if (isalnum(c) || isblank(c)) { | ||||
*dst = c; | *dst = c; | ||||
++dst; | ++dst; | ||||
++src; | ++src; | ||||
} else { | } else { | ||||
++src; | ++src; | ||||
--newlen; | --newlen; | ||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
wchar_t *dst, *end, *src; | wchar_t *dst, *end, *src; | ||||
wchar_t c; | wchar_t c; | ||||
src = str->data.wstr; | src = str->wdata.str; | ||||
dst = src; | dst = src; | ||||
end = src + str->len; | end = src + str->wdata.len; | ||||
while (src < end) { | while (src < end) { | ||||
c = *src; | c = *src; | ||||
if (iswalnum(c) || iswblank(c)) { | if (iswalnum(c) || iswblank(c)) { | ||||
*dst = c; | *dst = c; | ||||
++dst; | ++dst; | ||||
++src; | ++src; | ||||
} else { | } else { | ||||
Show All 10 Lines | |||||
/* | /* | ||||
* Converts string to lower case(-f). | * Converts string to lower case(-f). | ||||
*/ | */ | ||||
struct bwstring * | struct bwstring * | ||||
ignore_case(struct bwstring *str) | ignore_case(struct bwstring *str) | ||||
{ | { | ||||
if (MB_CUR_MAX == 1) { | if (MB_CUR_MAX == 1) { | ||||
unsigned char *end, *s; | char *end, *s; | ||||
s = str->data.cstr; | s = str->cdata.str; | ||||
end = s + str->len; | end = s + str->cdata.len; | ||||
while (s < end) { | while (s < end) { | ||||
*s = toupper(*s); | *s = toupper(*s); | ||||
++s; | ++s; | ||||
} | } | ||||
} else { | } else { | ||||
wchar_t *end, *s; | wchar_t *end, *s; | ||||
s = str->data.wstr; | s = str->wdata.str; | ||||
end = s + str->len; | end = s + str->wdata.len; | ||||
while (s < end) { | while (s < end) { | ||||
*s = towupper(*s); | *s = towupper(*s); | ||||
++s; | ++s; | ||||
} | } | ||||
} | } | ||||
return (str); | return (str); | ||||
} | } | ||||
void | void | ||||
bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) | bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) | ||||
{ | { | ||||
if (MB_CUR_MAX == 1) | if (MB_CUR_MAX == 1) | ||||
warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr); | warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str); | ||||
else | else | ||||
warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr); | warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str); | ||||
} | } |