Index: include/dirent.h =================================================================== --- include/dirent.h +++ include/dirent.h @@ -105,6 +105,7 @@ __BEGIN_DECLS #if __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE >= 700 int alphasort(const struct dirent **, const struct dirent **); +int versionsort(const struct dirent **, const struct dirent **); int dirfd(DIR *); #endif #if __BSD_VISIBLE Index: include/string.h =================================================================== --- include/string.h +++ include/string.h @@ -83,6 +83,7 @@ char *strchrnul(const char*, int) __pure; #endif int strcmp(const char *, const char *) __pure; +int strverscmp(const char *, const char *) __pure; int strcoll(const char *, const char *); char *strcpy(char * __restrict, const char * __restrict); size_t strcspn(const char *, const char *) __pure; Index: lib/libc/gen/Symbol.map =================================================================== --- lib/libc/gen/Symbol.map +++ lib/libc/gen/Symbol.map @@ -442,6 +442,7 @@ sched_getaffinity; sched_setaffinity; sched_getcpu; + versionsort; __cpuset_alloc; __cpuset_free; }; Index: lib/libc/gen/scandir.3 =================================================================== --- lib/libc/gen/scandir.3 +++ lib/libc/gen/scandir.3 @@ -28,12 +28,13 @@ .\" @(#)scandir.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd January 3, 2010 +.Dd July 11, 2022 .Dt SCANDIR 3 .Os .Sh NAME .Nm scandir , -.Nm alphasort +.Nm alphasort , +.Nm versionsort .Nd scan a directory .Sh LIBRARY .Lb libc @@ -45,6 +46,8 @@ .Fn scandir_b "const char *dirname" "struct dirent ***namelist" "int \*(lp*select\^(rp\*(lpconst struct dirent *\*(rp" "int \*(lp^compar\*(rp\*(lpconst struct dirent **, const struct dirent **\*(rp" .Ft int .Fn alphasort "const struct dirent **d1" "const struct dirent **d2" +.Ft int +.Fn versionsort "const struct dirent **d1" "const struct dirent **d2" .Sh DESCRIPTION The .Fn scandir @@ -86,6 +89,13 @@ argument to sort the array alphabetically using .Xr strcoll 3 . .Pp +The +.Fn versionsort +function is a routine which can be used for the +.Fa compar +argument to sort the array naturally using +.Xr strverscmp 3 . +.Pp The memory allocated for the array can be deallocated with .Xr free 3 , by freeing each pointer in the array and then the array itself. @@ -107,6 +117,7 @@ .Xr malloc 3 , .Xr qsort 3 , .Xr strcoll 3 , +.Xr strverscmp 3 , .Xr dir 5 .Sh HISTORY The Index: lib/libc/gen/scandir.c =================================================================== --- lib/libc/gen/scandir.c +++ lib/libc/gen/scandir.c @@ -150,6 +150,13 @@ return (strcoll((*d1)->d_name, (*d2)->d_name)); } +int +versionsort(const struct dirent **d1, const struct dirent **d2) +{ + + return strverscmp((*d1)->d_name, (*d2)->d_name); +} + static int alphasort_thunk(void *thunk, const void *p1, const void *p2) { Index: lib/libc/string/Makefile.inc =================================================================== --- lib/libc/string/Makefile.inc +++ lib/libc/string/Makefile.inc @@ -16,7 +16,7 @@ strcspn.c strdup.c strerror.c strlcat.c strlcpy.c strlen.c strmode.c \ strncat.c strncmp.c strncpy.c strndup.c strnlen.c strnstr.c \ strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \ - strxfrm.c swab.c \ + strverscmp.c strxfrm.c swab.c \ timingsafe_bcmp.c \ timingsafe_memcmp.c \ wcpcpy.c wcpncpy.c wcscasecmp.c wcscat.c \ @@ -46,7 +46,7 @@ memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \ strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \ string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \ - strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 \ + strspn.3 strstr.3 strtok.3 strverscmp.3 strxfrm.3 swab.3 \ timingsafe_bcmp.3 \ wcscoll.3 wcstok.3 \ wcswidth.3 wcsxfrm.3 wmemchr.3 Index: lib/libc/string/Symbol.map =================================================================== --- lib/libc/string/Symbol.map +++ lib/libc/string/Symbol.map @@ -116,6 +116,7 @@ FBSD_1.7 { mempcpy; + strverscmp; wmempcpy; }; Index: lib/libc/string/strverscmp.3 =================================================================== --- /dev/null +++ lib/libc/string/strverscmp.3 @@ -0,0 +1,51 @@ +.\" SPDX-License-Identifier: BSD-2-Clause +.\" Copyright (c) 2022 Obiwac +.Dd July 11, 2022 +.Dt STRVERSCMP 3 +.Os +.Sh NAME +.Nm strverscmp +.Nd compare strings according to natural order +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In string.h +.Ft int +.Fn strverscmp "const char *s1" "const char *s2" +.Sh DESCRIPTION +The +.Fn strverscmp +function +compares the null-terminated strings +.Fa s1 +and +.Fa s2 +according to their natural order +and returns an integer greater than, equal to, or less than 0, +depending on whether +.Fa s1 +is greater than, equal to, or less than +.Fa s2 . +.Pp +More specifically, this natural order is found by iterating over both +strings until a difference is found. +If the difference is between non-decimal characters, +.Fn strverscmp +acts like +.Xr strcmp 3 +(thus, ordering would be "a", "b", "train"). +If a decimal digit is found, the whole number is read and compared +(thus, ordering would be "9", "10", "420" which is the opposite of lexicographic order, +what +.Xr strcmp 3 +would've done). +Numbers with leading zeroes are interpreted as fractional parts, +and numbers with more leading zeroes are placed before numbers with fewer leading zeroes +(thus, ordering would be "000", "00", "01", "010", "09", "0", "1", "9", "10"). +.Sh SEE ALSO +.Xr strcmp 3 , +.Xr versionsort 3 +.Sh STANDARDS +The +.Fn strverscmp +function is a GNU extension and conforms to no standard. Index: lib/libc/string/strverscmp.c =================================================================== --- /dev/null +++ lib/libc/string/strverscmp.c @@ -0,0 +1,100 @@ +/*- +* SPDX-License-Identifier: BSD-2-Clause +* Copyright (c) 2022 Obiwac +*/ + +#include +#include + +int +strverscmp(const char *s1, const char *s2) +{ + /* if pointers are aliased, no need to go through to process of comparing them */ + + if (s1 == s2) + return 0; + + const unsigned char *u1 = (const void*) s1; + const unsigned char *u2 = (const void*) s2; + + for (; *u1 && *u2; u1++, u2++) { + /* leading zeros; we're dealing with the fractional part of a number */ + + if (*u1 == '0' || *u2 == '0') { + /* count leading zeros (more zeros == smaller number) */ + + size_t n1 = 0; + size_t n2 = 0; + + for (; *u1 == '0'; u1++) + n1++; + + for (; *u2 == '0'; u2++) + n2++; + + if (n1 != n2) + return (n2 - n1); + + /* handle the case where 000 < 09 */ + + if (*u1 == '\0' && isdigit(*u2)) + return (1); + + if (*u2 == '\0' && isdigit(*u1)) + return (-1); + + /* for all other cases, compare each digit until there are none left */ + + for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) { + if (*u1 != *u2) + return (*u1 - *u2); + } + + u1--; + u2--; + + /* no leading zeros; we're simply comparing two numbers */ + + } else if (isdigit(*u1) && isdigit(*u2)) { + const unsigned char *o1 = u1; + const unsigned char *o2 = u2; + + /* count digits (more digits == larger number) */ + + size_t n1 = 0; + size_t n2 = 0; + + for (; isdigit(*u1); u1++) + n1++; + + for (; isdigit(*u2); u2++) + n2++; + + if (n1 != n2) + return (n1 - n2); + + /* + * if there're the same number of digits, + * go back and compare each digit until there are none left + */ + + u1 = o1; + u2 = o2; + + for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) { + if (*u1 != *u2) + return (*u1 - *u2); + } + + u1--; + u2--; + } + + /* for the rest, we can just fallback to a regular strcmp */ + + if (*u1 != *u2) + return (*u1 - *u2); + } + + return (*u1 - *u2); +} Index: lib/libc/tests/string/Makefile =================================================================== --- lib/libc/tests/string/Makefile +++ lib/libc/tests/string/Makefile @@ -4,10 +4,11 @@ ATF_TESTS_C+= memset_s_test ATF_TESTS_C+= stpncpy_test ATF_TESTS_C+= strerror2_test -ATF_TESTS_C+= wcscasecmp_test -ATF_TESTS_C+= wcsnlen_test +ATF_TESTS_C+= strverscmp_test ATF_TESTS_C+= strxfrm_test +ATF_TESTS_C+= wcscasecmp_test ATF_TESTS_C+= wcscoll_test +ATF_TESTS_C+= wcsnlen_test # TODO: popcount, stresep Index: lib/libc/tests/string/strverscmp_test.c =================================================================== --- /dev/null +++ lib/libc/tests/string/strverscmp_test.c @@ -0,0 +1,90 @@ +/*- +* SPDX-License-Identifier: BSD-2-Clause +* Copyright (c) 2022 Obiwac +*/ + +#include +#include + +static void +check_all(size_t len, const char *ordered[len]) +{ + for (size_t i = 0; i < len; i++) { + for (size_t j = 0; j < len; j++) { + const char *a = ordered[i]; + const char *b = ordered[j]; + + if (i == j) + ATF_CHECK_MSG( + strverscmp(a, b) == 0, + "strverscmp(\"%s\", \"%s\") == 0", + a, b); + + else if (i < j) + ATF_CHECK_MSG( + strverscmp(a, b) < 0, + "strverscmp(\"%s\", \"%s\") < 0", + a, b); + + else if (i > j) + ATF_CHECK_MSG( + strverscmp(a, b) > 0, + "strverscmp(\"%s\", \"%s\") > 0", + a, b); + } + } +} + +#define CHECK_ALL(...) do { \ + const char *ordered[] = { __VA_ARGS__ }; \ + check_all(sizeof(ordered) / sizeof(*ordered), ordered); \ +} while (0) + +ATF_TC_WITHOUT_HEAD(strcmp_functionality); +ATF_TC_BODY(strcmp_functionality, tc) +{ + CHECK_ALL("", "a", "b"); +} + +/* from Linux man page strverscmp(3) */ + +ATF_TC_WITHOUT_HEAD(vers_ordering); +ATF_TC_BODY(vers_ordering, tc) +{ + CHECK_ALL("000", "00", "01", "010", "09", "0", "1", "9", "10"); +} + +ATF_TC_WITHOUT_HEAD(natural_ordering); +ATF_TC_BODY(natural_ordering, tc) +{ + CHECK_ALL("jan1", "jan2", "jan9", "jan10", "jan11", "jan19", "jan20"); +} + +/* https://sourceware.org/bugzilla/show_bug.cgi?id=9913 */ + +ATF_TC_WITHOUT_HEAD(glibc_bug_9913); +ATF_TC_BODY(glibc_bug_9913, tc) +{ + CHECK_ALL( + "B0075022800016.gbp.corp.com", + "B007502280067.gbp.corp.com", + "B007502357019.GBP.CORP.COM" + ); +} + +ATF_TC_WITHOUT_HEAD(semver_ordering); +ATF_TC_BODY(semver_ordering, tc) +{ + CHECK_ALL("2.6.20", "2.6.21"); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, strcmp_functionality); + ATF_TP_ADD_TC(tp, vers_ordering); + ATF_TP_ADD_TC(tp, natural_ordering); + ATF_TP_ADD_TC(tp, glibc_bug_9913); + ATF_TP_ADD_TC(tp, semver_ordering); + + return (atf_no_error()); +}