Index: lib/libc/regex/regcomp.c =================================================================== --- lib/libc/regex/regcomp.c +++ lib/libc/regex/regcomp.c @@ -936,7 +936,7 @@ wint_t i; #ifndef LIBREGEX struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; + (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; #endif /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { @@ -984,15 +984,30 @@ if (start == finish) CHadd(p, cs, start); else { + /* + * Below is the implementation choice we made based on + * POSIX.2 defining range expressions outside of POSIX + * locale to have unspecified behavior. + * + * 1. Do binary (non-collating) comparison in multibyte + * locales -- if character comes from POSIX locale + * range, it will be properly attributed as such by + * CHaddrange(). + * 2. Do collation-based comparison in single-byte + * locales as they do not seem to have the same + * problem as multi-byte ones mixing small/capital + * and other characters in the collation order. + */ #ifndef LIBREGEX if (table->__collate_load_error || MB_CUR_MAX > 1) { #else if (MB_CUR_MAX > 1) { #endif - (void)REQUIRE(start <= finish, REG_ERANGE); + (void) REQUIRE(start <= finish, REG_ERANGE); CHaddrange(p, cs, start, finish); } else { - (void)REQUIRE(p_range_cmp(start, finish) <= 0, REG_ERANGE); + (void) REQUIRE(p_range_cmp(start, finish) <= 0, + REG_ERANGE); for (i = 0; i <= UCHAR_MAX; i++) { if (p_range_cmp(start, i) <= 0 && p_range_cmp(i, finish) <= 0 ) Index: lib/libc/regex/regex.3 =================================================================== --- lib/libc/regex/regex.3 +++ lib/libc/regex/regex.3 @@ -32,7 +32,7 @@ .\" @(#)regex.3 8.4 (Berkeley) 3/20/94 .\" $FreeBSD$ .\" -.Dd May 25, 2016 +.Dd January 23, 2018 .Dt REGEX 3 .Os .Sh NAME @@ -612,6 +612,16 @@ beginning and ending subexpressions in obsolete .Pq Dq basic REs are anchors, not ordinary characters. +.Pp +Range expressions outside of POSIX +.Pq Dq C +locale are defined to have unspecified behavior: conforming applications should +not expect range expression to be valid, or have the well defined set of +collating elements matched. +Current implementation does collation-based comparison in singlebyte locales, +and binary +.Pq non-collating +comparison in multibyte locales. .Sh DIAGNOSTICS Non-zero error codes from .Fn regcomp