Changeset View
Changeset View
Standalone View
Standalone View
lib/libedit/chartype.c
/* $NetBSD: chartype.c,v 1.23 2016/02/28 23:02:24 christos Exp $ */ | /* $NetBSD: chartype.c,v 1.35 2019/07/23 10:18:52 christos Exp $ */ | ||||
/*- | /*- | ||||
* Copyright (c) 2009 The NetBSD Foundation, Inc. | * Copyright (c) 2009 The NetBSD Foundation, Inc. | ||||
* All rights reserved. | * All rights reserved. | ||||
* | * | ||||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | ||||
* modification, are permitted provided that the following conditions | * modification, are permitted provided that the following conditions | ||||
* are met: | * are met: | ||||
Show All 16 Lines | |||||
* POSSIBILITY OF SUCH DAMAGE. | * POSSIBILITY OF SUCH DAMAGE. | ||||
*/ | */ | ||||
/* | /* | ||||
* chartype.c: character classification and meta information | * chartype.c: character classification and meta information | ||||
*/ | */ | ||||
#include "config.h" | #include "config.h" | ||||
#if !defined(lint) && !defined(SCCSID) | #if !defined(lint) && !defined(SCCSID) | ||||
__RCSID("$NetBSD: chartype.c,v 1.23 2016/02/28 23:02:24 christos Exp $"); | __RCSID("$NetBSD: chartype.c,v 1.35 2019/07/23 10:18:52 christos Exp $"); | ||||
#endif /* not lint && not SCCSID */ | #endif /* not lint && not SCCSID */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <ctype.h> | #include <ctype.h> | ||||
#include <limits.h> | #include <limits.h> | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include "el.h" | #include "el.h" | ||||
#define CT_BUFSIZ ((size_t)1024) | #define CT_BUFSIZ ((size_t)1024) | ||||
#ifdef WIDECHAR | static int ct_conv_cbuff_resize(ct_buffer_t *, size_t); | ||||
protected int | static int ct_conv_wbuff_resize(ct_buffer_t *, size_t); | ||||
static int | |||||
ct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize) | ct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize) | ||||
{ | { | ||||
void *p; | void *p; | ||||
if (csize <= conv->csize) | if (csize <= conv->csize) | ||||
return 0; | return 0; | ||||
conv->csize = csize; | conv->csize = csize; | ||||
p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff)); | p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff)); | ||||
if (p == NULL) { | if (p == NULL) { | ||||
conv->csize = 0; | conv->csize = 0; | ||||
el_free(conv->cbuff); | el_free(conv->cbuff); | ||||
conv->cbuff = NULL; | conv->cbuff = NULL; | ||||
return -1; | return -1; | ||||
} | } | ||||
conv->cbuff = p; | conv->cbuff = p; | ||||
return 0; | return 0; | ||||
} | } | ||||
protected int | static int | ||||
ct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize) | ct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize) | ||||
{ | { | ||||
void *p; | void *p; | ||||
if (wsize <= conv->wsize) | if (wsize <= conv->wsize) | ||||
return 0; | return 0; | ||||
conv->wsize = wsize; | conv->wsize = wsize; | ||||
p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff)); | p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff)); | ||||
if (p == NULL) { | if (p == NULL) { | ||||
conv->wsize = 0; | conv->wsize = 0; | ||||
el_free(conv->wbuff); | el_free(conv->wbuff); | ||||
conv->wbuff = NULL; | conv->wbuff = NULL; | ||||
return -1; | return -1; | ||||
} | } | ||||
conv->wbuff = p; | conv->wbuff = p; | ||||
return 0; | return 0; | ||||
} | } | ||||
public char * | char * | ||||
ct_encode_string(const Char *s, ct_buffer_t *conv) | ct_encode_string(const wchar_t *s, ct_buffer_t *conv) | ||||
{ | { | ||||
char *dst; | char *dst; | ||||
ssize_t used; | ssize_t used; | ||||
if (!s) | if (!s) | ||||
return NULL; | return NULL; | ||||
dst = conv->cbuff; | dst = conv->cbuff; | ||||
Show All 12 Lines | if (used == -1) /* failed to encode, need more buffer space */ | ||||
abort(); | abort(); | ||||
++s; | ++s; | ||||
dst += used; | dst += used; | ||||
} | } | ||||
*dst = '\0'; | *dst = '\0'; | ||||
return conv->cbuff; | return conv->cbuff; | ||||
} | } | ||||
public Char * | wchar_t * | ||||
ct_decode_string(const char *s, ct_buffer_t *conv) | ct_decode_string(const char *s, ct_buffer_t *conv) | ||||
{ | { | ||||
size_t len; | size_t len; | ||||
if (!s) | if (!s) | ||||
return NULL; | return NULL; | ||||
len = ct_mbstowcs(NULL, s, (size_t)0); | len = mbstowcs(NULL, s, (size_t)0); | ||||
if (len == (size_t)-1) | if (len == (size_t)-1) | ||||
return NULL; | return NULL; | ||||
if (conv->wsize < ++len) | if (conv->wsize < ++len) | ||||
if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1) | if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1) | ||||
return NULL; | return NULL; | ||||
ct_mbstowcs(conv->wbuff, s, conv->wsize); | mbstowcs(conv->wbuff, s, conv->wsize); | ||||
return conv->wbuff; | return conv->wbuff; | ||||
} | } | ||||
protected Char ** | libedit_private wchar_t ** | ||||
ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv) | ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv) | ||||
{ | { | ||||
size_t bufspace; | size_t bufspace; | ||||
int i; | int i; | ||||
Char *p; | wchar_t *p; | ||||
Char **wargv; | wchar_t **wargv; | ||||
ssize_t bytes; | ssize_t bytes; | ||||
/* Make sure we have enough space in the conversion buffer to store all | /* Make sure we have enough space in the conversion buffer to store all | ||||
* the argv strings. */ | * the argv strings. */ | ||||
for (i = 0, bufspace = 0; i < argc; ++i) | for (i = 0, bufspace = 0; i < argc; ++i) | ||||
bufspace += argv[i] ? strlen(argv[i]) + 1 : 0; | bufspace += argv[i] ? strlen(argv[i]) + 1 : 0; | ||||
if (conv->wsize < ++bufspace) | if (conv->wsize < ++bufspace) | ||||
if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1) | if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1) | ||||
return NULL; | return NULL; | ||||
wargv = el_malloc((size_t)(argc + 1) * sizeof(*wargv)); | wargv = el_calloc((size_t)(argc + 1), sizeof(*wargv)); | ||||
for (i = 0, p = conv->wbuff; i < argc; ++i) { | for (i = 0, p = conv->wbuff; i < argc; ++i) { | ||||
if (!argv[i]) { /* don't pass null pointers to mbstowcs */ | if (!argv[i]) { /* don't pass null pointers to mbstowcs */ | ||||
wargv[i] = NULL; | wargv[i] = NULL; | ||||
continue; | continue; | ||||
} else { | } else { | ||||
wargv[i] = p; | wargv[i] = p; | ||||
bytes = (ssize_t)mbstowcs(p, argv[i], bufspace); | bytes = (ssize_t)mbstowcs(p, argv[i], bufspace); | ||||
} | } | ||||
if (bytes == -1) { | if (bytes == -1) { | ||||
el_free(wargv); | el_free(wargv); | ||||
return NULL; | return NULL; | ||||
} else | } else | ||||
bytes++; /* include '\0' in the count */ | bytes++; /* include '\0' in the count */ | ||||
bufspace -= (size_t)bytes; | bufspace -= (size_t)bytes; | ||||
p += bytes; | p += bytes; | ||||
} | } | ||||
wargv[i] = NULL; | wargv[i] = NULL; | ||||
return wargv; | return wargv; | ||||
} | } | ||||
protected size_t | libedit_private size_t | ||||
ct_enc_width(Char c) | ct_enc_width(wchar_t c) | ||||
{ | { | ||||
mbstate_t ps = (mbstate_t){{0}}; | mbstate_t mbs; | ||||
size_t len; | char buf[MB_LEN_MAX]; | ||||
char cbuf[MB_LEN_MAX]; | size_t size; | ||||
len = ct_wcrtomb(cbuf, c, &ps); | memset(&mbs, 0, sizeof(mbs)); | ||||
if (len == (size_t)-1) | |||||
return (0); | if ((size = wcrtomb(buf, c, &mbs)) == (size_t)-1) | ||||
return (len); | return 0; | ||||
return size; | |||||
} | } | ||||
protected ssize_t | libedit_private ssize_t | ||||
ct_encode_char(char *dst, size_t len, Char c) | ct_encode_char(char *dst, size_t len, wchar_t c) | ||||
{ | { | ||||
ssize_t l = 0; | ssize_t l = 0; | ||||
if (len < ct_enc_width(c)) | if (len < ct_enc_width(c)) | ||||
return -1; | return -1; | ||||
l = ct_wctomb(dst, c); | l = wctomb(dst, c); | ||||
if (l < 0) { | if (l < 0) { | ||||
ct_wctomb_reset; | wctomb(NULL, L'\0'); | ||||
l = 0; | l = 0; | ||||
} | } | ||||
return l; | return l; | ||||
} | } | ||||
size_t | libedit_private const wchar_t * | ||||
ct_mbrtowc(wchar_t *wc, const char *s, size_t n) | ct_visual_string(const wchar_t *s, ct_buffer_t *conv) | ||||
{ | { | ||||
mbstate_t mbs; | wchar_t *dst; | ||||
/* This only works because UTF-8 is stateless */ | ssize_t used; | ||||
memset(&mbs, 0, sizeof(mbs)); | |||||
return mbrtowc(wc, s, n, &mbs); | |||||
} | |||||
#else | |||||
size_t | |||||
ct_mbrtowc(wchar_t *wc, const char *s, size_t n) | |||||
{ | |||||
if (s == NULL) | |||||
return 0; | |||||
if (n == 0) | |||||
return (size_t)-2; | |||||
if (wc != NULL) | |||||
*wc = *s; | |||||
return *s != '\0'; | |||||
} | |||||
#endif | |||||
protected const Char * | |||||
ct_visual_string(const Char *s) | |||||
{ | |||||
static Char *buff = NULL; | |||||
static size_t buffsize = 0; | |||||
void *p; | |||||
Char *dst; | |||||
ssize_t used = 0; | |||||
if (!s) | if (!s) | ||||
return NULL; | return NULL; | ||||
if (!buff) { | |||||
buffsize = CT_BUFSIZ; | if (ct_conv_wbuff_resize(conv, CT_BUFSIZ) == -1) | ||||
buff = el_malloc(buffsize * sizeof(*buff)); | return NULL; | ||||
} | |||||
dst = buff; | used = 0; | ||||
dst = conv->wbuff; | |||||
while (*s) { | while (*s) { | ||||
used = ct_visual_char(dst, buffsize - (size_t)(dst - buff), *s); | used = ct_visual_char(dst, | ||||
if (used == -1) { /* failed to encode, need more buffer space */ | conv->wsize - (size_t)(dst - conv->wbuff), *s); | ||||
used = dst - buff; | if (used != -1) { | ||||
buffsize += CT_BUFSIZ; | |||||
p = el_realloc(buff, buffsize * sizeof(*buff)); | |||||
if (p == NULL) | |||||
goto out; | |||||
buff = p; | |||||
dst = buff + used; | |||||
/* don't increment s here - we want to retry it! */ | |||||
} | |||||
else | |||||
++s; | ++s; | ||||
dst += used; | dst += used; | ||||
continue; | |||||
} | } | ||||
if (dst >= (buff + buffsize)) { /* sigh */ | |||||
buffsize += 1; | /* failed to encode, need more buffer space */ | ||||
p = el_realloc(buff, buffsize * sizeof(*buff)); | used = dst - conv->wbuff; | ||||
if (p == NULL) | if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1) | ||||
goto out; | return NULL; | ||||
buff = p; | dst = conv->wbuff + used; | ||||
dst = buff + buffsize - 1; | |||||
} | } | ||||
*dst = 0; | |||||
return buff; | if (dst >= (conv->wbuff + conv->wsize)) { /* sigh */ | ||||
out: | used = dst - conv->wbuff; | ||||
el_free(buff); | if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1) | ||||
buffsize = 0; | |||||
return NULL; | return NULL; | ||||
dst = conv->wbuff + used; | |||||
} | } | ||||
*dst = L'\0'; | |||||
return conv->wbuff; | |||||
} | |||||
protected int | |||||
ct_visual_width(Char c) | libedit_private int | ||||
ct_visual_width(wchar_t c) | |||||
{ | { | ||||
int t = ct_chr_class(c); | int t = ct_chr_class(c); | ||||
switch (t) { | switch (t) { | ||||
case CHTYPE_ASCIICTL: | case CHTYPE_ASCIICTL: | ||||
return 2; /* ^@ ^? etc. */ | return 2; /* ^@ ^? etc. */ | ||||
case CHTYPE_TAB: | case CHTYPE_TAB: | ||||
return 1; /* Hmm, this really need to be handled outside! */ | return 1; /* Hmm, this really need to be handled outside! */ | ||||
case CHTYPE_NL: | case CHTYPE_NL: | ||||
return 0; /* Should this be 1 instead? */ | return 0; /* Should this be 1 instead? */ | ||||
#ifdef WIDECHAR | |||||
case CHTYPE_PRINT: | case CHTYPE_PRINT: | ||||
return wcwidth(c); | return wcwidth(c); | ||||
case CHTYPE_NONPRINT: | case CHTYPE_NONPRINT: | ||||
if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ | if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ | ||||
return 8; /* \U+12345 */ | return 8; /* \U+12345 */ | ||||
else | else | ||||
return 7; /* \U+1234 */ | return 7; /* \U+1234 */ | ||||
#else | |||||
case CHTYPE_PRINT: | |||||
return 1; | |||||
case CHTYPE_NONPRINT: | |||||
return 4; /* \123 */ | |||||
#endif | |||||
default: | default: | ||||
return 0; /* should not happen */ | return 0; /* should not happen */ | ||||
} | } | ||||
} | } | ||||
protected ssize_t | libedit_private ssize_t | ||||
ct_visual_char(Char *dst, size_t len, Char c) | ct_visual_char(wchar_t *dst, size_t len, wchar_t c) | ||||
{ | { | ||||
int t = ct_chr_class(c); | int t = ct_chr_class(c); | ||||
switch (t) { | switch (t) { | ||||
case CHTYPE_TAB: | case CHTYPE_TAB: | ||||
case CHTYPE_NL: | case CHTYPE_NL: | ||||
case CHTYPE_ASCIICTL: | case CHTYPE_ASCIICTL: | ||||
if (len < 2) | if (len < 2) | ||||
return -1; /* insufficient space */ | return -1; /* insufficient space */ | ||||
*dst++ = '^'; | *dst++ = '^'; | ||||
if (c == '\177') | if (c == '\177') | ||||
*dst = '?'; /* DEL -> ^? */ | *dst = '?'; /* DEL -> ^? */ | ||||
else | else | ||||
*dst = c | 0100; /* uncontrolify it */ | *dst = c | 0100; /* uncontrolify it */ | ||||
return 2; | return 2; | ||||
case CHTYPE_PRINT: | case CHTYPE_PRINT: | ||||
if (len < 1) | if (len < 1) | ||||
return -1; /* insufficient space */ | return -1; /* insufficient space */ | ||||
*dst = c; | *dst = c; | ||||
return 1; | return 1; | ||||
case CHTYPE_NONPRINT: | case CHTYPE_NONPRINT: | ||||
/* we only use single-width glyphs for display, | /* we only use single-width glyphs for display, | ||||
* so this is right */ | * so this is right */ | ||||
if ((ssize_t)len < ct_visual_width(c)) | if ((ssize_t)len < ct_visual_width(c)) | ||||
return -1; /* insufficient space */ | return -1; /* insufficient space */ | ||||
#ifdef WIDECHAR | |||||
*dst++ = '\\'; | *dst++ = '\\'; | ||||
*dst++ = 'U'; | *dst++ = 'U'; | ||||
*dst++ = '+'; | *dst++ = '+'; | ||||
#define tohexdigit(v) "0123456789ABCDEF"[v] | #define tohexdigit(v) "0123456789ABCDEF"[v] | ||||
if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ | if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ | ||||
*dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf); | *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf); | ||||
*dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf); | *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf); | ||||
*dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf); | *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf); | ||||
*dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf); | *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf); | ||||
*dst = tohexdigit(((unsigned int) c ) & 0xf); | *dst = tohexdigit(((unsigned int) c ) & 0xf); | ||||
return c > 0xffff ? 8 : 7; | return c > 0xffff ? 8 : 7; | ||||
#else | |||||
*dst++ = '\\'; | |||||
#define tooctaldigit(v) (Char)((v) + '0') | |||||
*dst++ = tooctaldigit(((unsigned int) c >> 6) & 0x7); | |||||
*dst++ = tooctaldigit(((unsigned int) c >> 3) & 0x7); | |||||
*dst++ = tooctaldigit(((unsigned int) c ) & 0x7); | |||||
#endif | |||||
/*FALLTHROUGH*/ | /*FALLTHROUGH*/ | ||||
/* these two should be handled outside this function */ | /* these two should be handled outside this function */ | ||||
default: /* we should never hit the default */ | default: /* we should never hit the default */ | ||||
return 0; | return 0; | ||||
} | } | ||||
} | } | ||||
protected int | libedit_private int | ||||
ct_chr_class(Char c) | ct_chr_class(wchar_t c) | ||||
{ | { | ||||
if (c == '\t') | if (c == '\t') | ||||
return CHTYPE_TAB; | return CHTYPE_TAB; | ||||
else if (c == '\n') | else if (c == '\n') | ||||
return CHTYPE_NL; | return CHTYPE_NL; | ||||
else if (IsASCII(c) && Iscntrl(c)) | else if (c < 0x100 && iswcntrl(c)) | ||||
return CHTYPE_ASCIICTL; | return CHTYPE_ASCIICTL; | ||||
else if (Isprint(c)) | else if (iswprint(c)) | ||||
return CHTYPE_PRINT; | return CHTYPE_PRINT; | ||||
else | else | ||||
return CHTYPE_NONPRINT; | return CHTYPE_NONPRINT; | ||||
} | } |