Differential D10152 Diff 26895 head/contrib/less/charset.c

Changeset View

Standalone View

head/contrib/less/charset.c

Show First 20 Lines • Show All 124 Lines • ▼ Show 20 Lines
* repetition of the letter.		* repetition of the letter.
*		*
* Each letter is one of:		* Each letter is one of:
* . normal character		* . normal character
* b binary character		* b binary character
* c control character		* c control character
*/		*/
static void		static void
ichardef(s)		ichardef(char *s)
char *s;
{		{
register char *cp;		char *cp;
register int n;		int n;
register char v;		char v;

n = 0;		n = 0;
v = 0;		v = 0;
cp = chardef;		cp = chardef;
while (*s != '\0')		while (*s != '\0')
{		{
switch (*s++)		switch (*s++)
{		{
Show All 35 Lines	while (cp < chardef + sizeof(chardef))
*cp++ = v;		*cp++ = v;
}		}

/*		/*
* Define a charset, given a charset name.		* Define a charset, given a charset name.
* The valid charset names are listed in the "charsets" array.		* The valid charset names are listed in the "charsets" array.
*/		*/
static int		static int
icharset(name, no_error)		icharset(char *name, int no_error)
register char *name;
int no_error;
{		{
register struct charset *p;		struct charset *p;
register struct cs_alias *a;		struct cs_alias *a;

if (name == NULL \|\| *name == '\0')		if (name == NULL \|\| *name == '\0')
return (0);		return (0);

/* First see if the name is an alias. */		/* First see if the name is an alias. */
for (a = cs_aliases; a->name != NULL; a++)		for (a = cs_aliases; a->name != NULL; a++)
{		{
if (strcmp(name, a->name) == 0)		if (strcmp(name, a->name) == 0)
Show All 21 Lines	icharset(char *name, int no_error)
return (0);		return (0);
}		}

#if HAVE_LOCALE		#if HAVE_LOCALE
/*		/*
* Define a charset, given a locale name.		* Define a charset, given a locale name.
*/		*/
static void		static void
ilocale()		ilocale(void)
{		{
register int c;		int c;

for (c = 0; c < (int) sizeof(chardef); c++)		for (c = 0; c < (int) sizeof(chardef); c++)
{		{
if (isprint(c))		if (isprint(c))
chardef[c] = 0;		chardef[c] = 0;
else if (iscntrl(c))		else if (iscntrl(c))
chardef[c] = IS_CONTROL_CHAR;		chardef[c] = IS_CONTROL_CHAR;
else		else
chardef[c] = IS_BINARY_CHAR\|IS_CONTROL_CHAR;		chardef[c] = IS_BINARY_CHAR\|IS_CONTROL_CHAR;
}		}
}		}
#endif		#endif

/*		/*
* Define the printing format for control (or binary utf) chars.		* Define the printing format for control (or binary utf) chars.
*/		*/
static void		static void
setbinfmt(s, fmtvarptr, default_fmt)		setbinfmt(char s, char fmtvarptr, char default_fmt)
char *s;
char **fmtvarptr;
char *default_fmt;
{		{
if (s && utf_mode)		if (s && utf_mode)
{		{
/* It would be too hard to account for width otherwise. */		/* It would be too hard to account for width otherwise. */
char *t = s;		char *t = s;
while (*t)		while (*t)
{		{
if (t < ' ' \|\| t > '~')		if (t < ' ' \|\| t > '~')
Show All 29 Lines	attr:
}		}
*fmtvarptr = s;		*fmtvarptr = s;
}		}

/*		/*
*		*
*/		*/
static void		static void
set_charset()		set_charset(void)
{		{
char *s;		char *s;

/*		/*
* See if environment variable LESSCHARSET is defined.		* See if environment variable LESSCHARSET is defined.
*/		*/
s = lgetenv("LESSCHARSET");		s = lgetenv("LESSCHARSET");
if (icharset(s, 0))		if (icharset(s, 0))
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines
#endif		#endif
#endif		#endif
}		}

/*		/*
* Initialize charset data structures.		* Initialize charset data structures.
*/		*/
public void		public void
init_charset()		init_charset(void)
{		{
char *s;		char *s;

#if HAVE_LOCALE		#if HAVE_LOCALE
setlocale(LC_ALL, "");		setlocale(LC_ALL, "");
#endif		#endif

set_charset();		set_charset();

s = lgetenv("LESSBINFMT");		s = lgetenv("LESSBINFMT");
setbinfmt(s, &binfmt, "*s<%02X>");		setbinfmt(s, &binfmt, "*s<%02X>");

s = lgetenv("LESSUTFBINFMT");		s = lgetenv("LESSUTFBINFMT");
setbinfmt(s, &utfbinfmt, "<U+%04lX>");		setbinfmt(s, &utfbinfmt, "<U+%04lX>");
}		}

/*		/*
* Is a given character a "binary" character?		* Is a given character a "binary" character?
*/		*/
public int		public int
binary_char(c)		binary_char(LWCHAR c)
LWCHAR c;
{		{
if (utf_mode)		if (utf_mode)
return (is_ubin_char(c));		return (is_ubin_char(c));
c &= 0377;		c &= 0377;
return (chardef[c] & IS_BINARY_CHAR);		return (chardef[c] & IS_BINARY_CHAR);
}		}

/*		/*
* Is a given character a "control" character?		* Is a given character a "control" character?
*/		*/
public int		public int
control_char(c)		control_char(LWCHAR c)
LWCHAR c;
{		{
c &= 0377;		c &= 0377;
return (chardef[c] & IS_CONTROL_CHAR);		return (chardef[c] & IS_CONTROL_CHAR);
}		}

/*		/*
* Return the printable form of a character.		* Return the printable form of a character.
* For example, in the "ascii" charset '\3' is printed as "^C".		* For example, in the "ascii" charset '\3' is printed as "^C".
*/		*/
public char *		public char *
prchar(c)		prchar(LWCHAR c)
LWCHAR c;
{		{
/* {{ This buffer can be overrun if LESSBINFMT is a long string. }} */		/* {{ This buffer can be overrun if LESSBINFMT is a long string. }} */
static char buf[32];		static char buf[32];

c &= 0377;		c &= 0377;
if ((c < 128 \|\| !utf_mode) && !control_char(c))		if ((c < 128 \|\| !utf_mode) && !control_char(c))
SNPRINTF1(buf, sizeof(buf), "%c", (int) c);		SNPRINTF1(buf, sizeof(buf), "%c", (int) c);
else if (c == ESC)		else if (c == ESC)
Show All 18 Lines	else
SNPRINTF1(buf, sizeof(buf), binfmt, c);		SNPRINTF1(buf, sizeof(buf), binfmt, c);
return (buf);		return (buf);
}		}

/*		/*
* Return the printable form of a UTF-8 character.		* Return the printable form of a UTF-8 character.
*/		*/
public char *		public char *
prutfchar(ch)		prutfchar(LWCHAR ch)
LWCHAR ch;
{		{
static char buf[32];		static char buf[32];

if (ch == ESC)		if (ch == ESC)
strcpy(buf, "ESC");		strcpy(buf, "ESC");
else if (ch < 128 && control_char(ch))		else if (ch < 128 && control_char(ch))
{		{
if (!control_char(ch ^ 0100))		if (!control_char(ch ^ 0100))
Show All 13 Lines	prutfchar(LWCHAR ch)
}		}
return (buf);		return (buf);
}		}

/*		/*
* Get the length of a UTF-8 character in bytes.		* Get the length of a UTF-8 character in bytes.
*/		*/
public int		public int
utf_len(ch)		utf_len(char ch)
char ch;
{		{
if ((ch & 0x80) == 0)		if ((ch & 0x80) == 0)
return 1;		return 1;
if ((ch & 0xE0) == 0xC0)		if ((ch & 0xE0) == 0xC0)
return 2;		return 2;
if ((ch & 0xF0) == 0xE0)		if ((ch & 0xF0) == 0xE0)
return 3;		return 3;
if ((ch & 0xF8) == 0xF0)		if ((ch & 0xF8) == 0xF0)
return 4;		return 4;
if ((ch & 0xFC) == 0xF8)		if ((ch & 0xFC) == 0xF8)
return 5;		return 5;
if ((ch & 0xFE) == 0xFC)		if ((ch & 0xFE) == 0xFC)
return 6;		return 6;
/* Invalid UTF-8 encoding. */		/* Invalid UTF-8 encoding. */
return 1;		return 1;
}		}

/*		/*
* Does the parameter point to the lead byte of a well-formed UTF-8 character?		* Does the parameter point to the lead byte of a well-formed UTF-8 character?
*/		*/
public int		public int
is_utf8_well_formed(s, slen)		is_utf8_well_formed(unsigned char *s, int slen)
unsigned char *s;
int slen;
{		{
int i;		int i;
int len;		int len;

if (IS_UTF8_INVALID(s[0]))		if (IS_UTF8_INVALID(s[0]))
return (0);		return (0);

len = utf_len((char) s[0]);		len = utf_len((char) s[0]);
Show All 18 Lines	if (!IS_UTF8_TRAIL(s[i]))
return (0);		return (0);
return (1);		return (1);
}		}

/*		/*
* Return number of invalid UTF-8 sequences found in a buffer.		* Return number of invalid UTF-8 sequences found in a buffer.
*/		*/
public int		public int
utf_bin_count(data, len)		utf_bin_count(unsigned char *data, int len)
unsigned char *data;
int len;
{		{
int bin_count = 0;		int bin_count = 0;
while (len > 0)		while (len > 0)
{		{
if (is_utf8_well_formed(data, len))		if (is_utf8_well_formed(data, len))
{		{
int clen = utf_len(*data);		int clen = utf_len(*data);
data += clen;		data += clen;
Show All 10 Lines	utf_bin_count(unsigned char *data, int len)
}		}
return (bin_count);		return (bin_count);
}		}

/*		/*
* Get the value of a UTF-8 character.		* Get the value of a UTF-8 character.
*/		*/
public LWCHAR		public LWCHAR
get_wchar(p)		get_wchar(constant char *p)
char *p;
{		{
switch (utf_len(p[0]))		switch (utf_len(p[0]))
{		{
case 1:		case 1:
default:		default:
/* 0xxxxxxx */		/* 0xxxxxxx */
return (LWCHAR)		return (LWCHAR)
(p[0] & 0xFF);		(p[0] & 0xFF);
Show All 34 Lines	return (LWCHAR) (
(p[5] & 0x3F));		(p[5] & 0x3F));
}		}
}		}

/*		/*
* Store a character into a UTF-8 string.		* Store a character into a UTF-8 string.
*/		*/
public void		public void
put_wchar(pp, ch)		put_wchar(char **pp, LWCHAR ch)
char **pp;
LWCHAR ch;
{		{
if (!utf_mode \|\| ch < 0x80)		if (!utf_mode \|\| ch < 0x80)
{		{
/* 0xxxxxxx */		/* 0xxxxxxx */
(pp)++ = (char) ch;		(pp)++ = (char) ch;
} else if (ch < 0x800)		} else if (ch < 0x800)
{		{
/* 110xxxxx 10xxxxxx */		/* 110xxxxx 10xxxxxx */
Show All 31 Lines	if (!utf_mode \|\| ch < 0x80)
(pp)++ = (char) (0x80 \| (ch & 0x3F));		(pp)++ = (char) (0x80 \| (ch & 0x3F));
}		}
}		}

/*		/*
* Step forward or backward one character in a string.		* Step forward or backward one character in a string.
*/		*/
public LWCHAR		public LWCHAR
step_char(pp, dir, limit)		step_char(constant char *pp, signed int dir, constant char limit)
char **pp;
signed int dir;
char *limit;
{		{
LWCHAR ch;		LWCHAR ch;
int len;		int len;
char p = pp;		constant char p = pp;

if (!utf_mode)		if (!utf_mode)
{		{
/* It's easy if chars are one byte. */		/* It's easy if chars are one byte. */
if (dir > 0)		if (dir > 0)
ch = (LWCHAR) ((p < limit) ? *p++ : 0);		ch = (LWCHAR) ((p < limit) ? *p++ : 0);
else		else
ch = (LWCHAR) ((p > limit) ? *--p : 0);		ch = (LWCHAR) ((p > limit) ? *--p : 0);
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines

/* comb_table is special pairs, not ranges. */		/* comb_table is special pairs, not ranges. */
static struct wchar_range comb_table[] = {		static struct wchar_range comb_table[] = {
{0x0644,0x0622}, {0x0644,0x0623}, {0x0644,0x0625}, {0x0644,0x0627},		{0x0644,0x0622}, {0x0644,0x0623}, {0x0644,0x0625}, {0x0644,0x0627},
};		};


static int		static int
is_in_table(ch, table)		is_in_table(LWCHAR ch, struct wchar_range_table *table)
LWCHAR ch;
struct wchar_range_table *table;
{		{
int hi;		int hi;
int lo;		int lo;

/* Binary search in the table. */		/* Binary search in the table. */
if (ch < table->table[0].first)		if (ch < table->table[0].first)
return 0;		return 0;
lo = 0;		lo = 0;
Show All 11 Lines	is_in_table(LWCHAR ch, struct wchar_range_table *table)
return 0;		return 0;
}		}

/*		/*
* Is a character a UTF-8 composing character?		* Is a character a UTF-8 composing character?
* If a composing character follows any char, the two combine into one glyph.		* If a composing character follows any char, the two combine into one glyph.
*/		*/
public int		public int
is_composing_char(ch)		is_composing_char(LWCHAR ch)
LWCHAR ch;
{		{
return is_in_table(ch, &compose_table);		return is_in_table(ch, &compose_table);
}		}

/*		/*
* Should this UTF-8 character be treated as binary?		* Should this UTF-8 character be treated as binary?
*/		*/
public int		public int
is_ubin_char(ch)		is_ubin_char(LWCHAR ch)
LWCHAR ch;
{		{
return is_in_table(ch, &ubin_table);		return is_in_table(ch, &ubin_table);
}		}

/*		/*
* Is this a double width UTF-8 character?		* Is this a double width UTF-8 character?
*/		*/
public int		public int
is_wide_char(ch)		is_wide_char(LWCHAR ch)
LWCHAR ch;
{		{
return is_in_table(ch, &wide_table);		return is_in_table(ch, &wide_table);
}		}

/*		/*
* Is a character a UTF-8 combining character?		* Is a character a UTF-8 combining character?
* A combining char acts like an ordinary char, but if it follows		* A combining char acts like an ordinary char, but if it follows
* a specific char (not any char), the two combine into one glyph.		* a specific char (not any char), the two combine into one glyph.
*/		*/
public int		public int
is_combining_char(ch1, ch2)		is_combining_char(LWCHAR ch1, LWCHAR ch2)
LWCHAR ch1;
LWCHAR ch2;
{		{
/* The table is small; use linear search. */		/* The table is small; use linear search. */
int i;		int i;
for (i = 0; i < sizeof(comb_table)/sizeof(*comb_table); i++)		for (i = 0; i < sizeof(comb_table)/sizeof(*comb_table); i++)
{		{
if (ch1 == comb_table[i].first &&		if (ch1 == comb_table[i].first &&
ch2 == comb_table[i].last)		ch2 == comb_table[i].last)
return 1;		return 1;
}		}
return 0;		return 0;
}		}