Index: bin/ls/cmp.c =================================================================== --- bin/ls/cmp.c +++ bin/ls/cmp.c @@ -64,6 +64,20 @@ return (strcoll(b->fts_name, a->fts_name)); } +int +verscmp(const FTSENT *a, const FTSENT *b) +{ + + return (strverscmp(a->fts_name, b->fts_name)); +} + +int +revverscmp(const FTSENT *a, const FTSENT *b) +{ + + return (strverscmp(b->fts_name, a->fts_name)); +} + int modcmp(const FTSENT *a, const FTSENT *b) { Index: bin/ls/extern.h =================================================================== --- bin/ls/extern.h +++ bin/ls/extern.h @@ -42,6 +42,8 @@ int revmodcmp(const FTSENT *, const FTSENT *); int namecmp(const FTSENT *, const FTSENT *); int revnamecmp(const FTSENT *, const FTSENT *); +int verscmp(const FTSENT *, const FTSENT *); +int revverscmp(const FTSENT *, const FTSENT *); int statcmp(const FTSENT *, const FTSENT *); int revstatcmp(const FTSENT *, const FTSENT *); int sizecmp(const FTSENT *, const FTSENT *); Index: bin/ls/ls.1 =================================================================== --- bin/ls/ls.1 +++ bin/ls/ls.1 @@ -32,7 +32,7 @@ .\" @(#)ls.1 8.7 (Berkeley) 7/29/94 .\" $FreeBSD$ .\" -.Dd August 31, 2020 +.Dd July 11, 2022 .Dt LS 1 .Os .Sh NAME @@ -40,7 +40,7 @@ .Nd list directory contents .Sh SYNOPSIS .Nm -.Op Fl ABCFGHILPRSTUWZabcdfghiklmnopqrstuwxy1\&, +.Op Fl ABCFGHILPRSTUWZabcdfghiklmnopqrstuvwxy1\&, .Op Fl -color Ns = Ns Ar when .Op Fl D Ar format .Op Ar @@ -399,6 +399,12 @@ .Pq Fl t or printing .Pq Fl l . +.It Fl v +Sort by natural ordering (using +.Xr strverscmp 3 +instead of +.Xr strcoll 3 +as the comparison function). .It Fl w Force raw printing of non-printable characters. This is the default @@ -902,7 +908,7 @@ and .St -p1003.1-2008 . The options -.Fl B , D , G , I , T , U , W , Z , b , h , w , y +.Fl B , D , G , I , T , U , W , Z , b , h , v , w , y and .Fl , are non-standard extensions. Index: bin/ls/ls.c =================================================================== --- bin/ls/ls.c +++ bin/ls/ls.c @@ -136,6 +136,7 @@ int f_octal_escape; /* like f_octal but use C escapes if possible */ static int f_recursive; /* ls subdirectories also */ static int f_reversesort; /* reverse whatever sort is used */ +static int f_verssort; /* sort names using strverscmp(3) rather than strcoll(3) */ int f_samesort; /* sort time and name in same direction */ int f_sectime; /* print full time information */ static int f_singlecol; /* use single column output */ @@ -275,7 +276,7 @@ colorflag = COLORFLAG_AUTO; #endif while ((ch = getopt_long(argc, argv, - "+1ABCD:FGHILPRSTUWXZabcdfghiklmnopqrstuwxy,", long_opts, + "+1ABCD:FGHILPRSTUWXZabcdfghiklmnopqrstuvwxy,", long_opts, NULL)) != -1) { switch (ch) { /* @@ -439,6 +440,9 @@ case 's': f_size = 1; break; + case 'v': + f_verssort = 1; + break; case 'w': f_nonprint = 0; f_octal = 0; @@ -566,10 +570,12 @@ } /* Select a sort function. */ if (f_reversesort) { - if (!f_timesort && !f_sizesort) - sortfcn = revnamecmp; - else if (f_sizesort) + if (f_sizesort) sortfcn = revsizecmp; + else if (f_verssort) + sortfcn = revverscmp; + else if (!f_timesort) + sortfcn = revnamecmp; else if (f_accesstime) sortfcn = revacccmp; else if (f_birthtime) @@ -579,10 +585,12 @@ else /* Use modification time. */ sortfcn = revmodcmp; } else { - if (!f_timesort && !f_sizesort) - sortfcn = namecmp; - else if (f_sizesort) + if (f_sizesort) sortfcn = sizecmp; + else if (f_verssort) + sortfcn = verscmp; + else if (!f_timesort) + sortfcn = namecmp; else if (f_accesstime) sortfcn = acccmp; else if (f_birthtime) Index: bin/ls/tests/ls_tests.sh =================================================================== --- bin/ls/tests/ls_tests.sh +++ bin/ls/tests/ls_tests.sh @@ -846,6 +846,20 @@ atf_check -e empty -o match:'a\.file.*b\.file' -s exit:0 ls -Cu } +atf_test_case v_flag +v_flag_head() +{ + atf_set "descr" "Verify that the output from ls -v sorts based on strverscmp(3)" +} + +v_flag_body() +{ + create_test_dir + + atf_check -e empty -o empty -s exit:0 touch 000 00 01 010 09 0 1 9 10 + atf_check -e empty -o match:"000.00.01.010.09.0.1.9.10" -s exit:0 sh -c 'ls -Cv' +} + atf_test_case x_flag x_flag_head() { @@ -960,6 +974,7 @@ atf_add_test_case s_flag atf_add_test_case t_flag atf_add_test_case u_flag + atf_add_test_case v_flag atf_add_test_case x_flag atf_add_test_case y_flag atf_add_test_case 1_flag Index: bin/ls/util.c =================================================================== --- bin/ls/util.c +++ bin/ls/util.c @@ -227,9 +227,9 @@ { (void)fprintf(stderr, #ifdef COLORLS - "usage: ls [-ABCFGHILPRSTUWZabcdfghiklmnopqrstuwxy1,] [--color=when] [-D format]" + "usage: ls [-ABCFGHILPRSTUWZabcdfghiklmnopqrstuvwxy1,] [--color=when] [-D format]" #else - "usage: ls [-ABCFHILPRSTUWZabcdfghiklmnopqrstuwxy1,] [-D format]" + "usage: ls [-ABCFHILPRSTUWZabcdfghiklmnopqrstuvwxy1,] [-D format]" #endif " [file ...]\n"); exit(1); Index: include/dirent.h =================================================================== --- include/dirent.h +++ include/dirent.h @@ -105,6 +105,7 @@ __BEGIN_DECLS #if __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE >= 700 int alphasort(const struct dirent **, const struct dirent **); +int versionsort(const struct dirent **, const struct dirent **); int dirfd(DIR *); #endif #if __BSD_VISIBLE Index: include/string.h =================================================================== --- include/string.h +++ include/string.h @@ -83,6 +83,7 @@ char *strchrnul(const char*, int) __pure; #endif int strcmp(const char *, const char *) __pure; +int strverscmp(const char *, const char *) __pure; int strcoll(const char *, const char *); char *strcpy(char * __restrict, const char * __restrict); size_t strcspn(const char *, const char *) __pure; Index: lib/libc/gen/Symbol.map =================================================================== --- lib/libc/gen/Symbol.map +++ lib/libc/gen/Symbol.map @@ -442,6 +442,7 @@ sched_getaffinity; sched_setaffinity; sched_getcpu; + versionsort; __cpuset_alloc; __cpuset_free; }; Index: lib/libc/gen/scandir.3 =================================================================== --- lib/libc/gen/scandir.3 +++ lib/libc/gen/scandir.3 @@ -28,12 +28,13 @@ .\" @(#)scandir.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd January 3, 2010 +.Dd July 11, 2022 .Dt SCANDIR 3 .Os .Sh NAME .Nm scandir , -.Nm alphasort +.Nm alphasort , +.Nm versionsort .Nd scan a directory .Sh LIBRARY .Lb libc @@ -45,6 +46,8 @@ .Fn scandir_b "const char *dirname" "struct dirent ***namelist" "int \*(lp*select\^(rp\*(lpconst struct dirent *\*(rp" "int \*(lp^compar\*(rp\*(lpconst struct dirent **, const struct dirent **\*(rp" .Ft int .Fn alphasort "const struct dirent **d1" "const struct dirent **d2" +.Ft int +.Fn versionsort "const struct dirent **d1" "const struct dirent **d2" .Sh DESCRIPTION The .Fn scandir @@ -86,6 +89,14 @@ argument to sort the array alphabetically using .Xr strcoll 3 . .Pp +The +.Fn versionsort +function +is a routine which can be used for the +.Fa compar +argument to sort the array naturally using +.Xr strverscmp 3 . +.Pp The memory allocated for the array can be deallocated with .Xr free 3 , by freeing each pointer in the array and then the array itself. @@ -107,6 +118,7 @@ .Xr malloc 3 , .Xr qsort 3 , .Xr strcoll 3 , +.Xr strverscmp 3 , .Xr dir 5 .Sh HISTORY The Index: lib/libc/gen/scandir.c =================================================================== --- lib/libc/gen/scandir.c +++ lib/libc/gen/scandir.c @@ -146,10 +146,15 @@ int alphasort(const struct dirent **d1, const struct dirent **d2) { - return (strcoll((*d1)->d_name, (*d2)->d_name)); } +int +versionsort(const struct dirent **d1, const struct dirent **d2) +{ + return strverscmp((*d1)->d_name, (*d2)->d_name); +} + static int alphasort_thunk(void *thunk, const void *p1, const void *p2) { Index: lib/libc/string/Makefile.inc =================================================================== --- lib/libc/string/Makefile.inc +++ lib/libc/string/Makefile.inc @@ -16,7 +16,7 @@ strcspn.c strdup.c strerror.c strlcat.c strlcpy.c strlen.c strmode.c \ strncat.c strncmp.c strncpy.c strndup.c strnlen.c strnstr.c \ strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \ - strxfrm.c swab.c \ + strverscmp.c strxfrm.c swab.c \ timingsafe_bcmp.c \ timingsafe_memcmp.c \ wcpcpy.c wcpncpy.c wcscasecmp.c wcscat.c \ @@ -46,7 +46,7 @@ memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \ strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \ string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \ - strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 \ + strspn.3 strstr.3 strtok.3 strverscmp.3 strxfrm.3 swab.3 \ timingsafe_bcmp.3 \ wcscoll.3 wcstok.3 \ wcswidth.3 wcsxfrm.3 wmemchr.3 Index: lib/libc/string/Symbol.map =================================================================== --- lib/libc/string/Symbol.map +++ lib/libc/string/Symbol.map @@ -116,6 +116,7 @@ FBSD_1.7 { mempcpy; + strverscmp; wmempcpy; }; Index: lib/libc/string/strverscmp.3 =================================================================== --- /dev/null +++ lib/libc/string/strverscmp.3 @@ -0,0 +1,76 @@ +.\" Copyright (c) 2022 Obiwac, +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)strverscmp.3 1.0 7/11/22 +.\" $FreeBSD$ +.\" +.Dd July 11, 2022 +.Dt STRVERSCMP 3 +.Os +.Sh NAME +.Nm strverscmp +.Nd compare strings according to natural order +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In string.h +.Ft int +.Fn strverscmp "const char *s1" "const char *s2" +.Sh DESCRIPTION +The +.Fn strverscmp +function +compares the null-terminated strings +.Fa s1 +and +.Fa s2 +according to their natural order +and returns an integer greater than, equal to, or less than 0, +depending on whether +.Fa s1 +is greater than, equal to, or less than +.Fa s2 . +.Pp +More specifically, this natural order is found by iterating over both +strings until a difference is found. +If the difference is between non-decimal characters, +.Fn strverscmp +acts like +.Xr strcmp 3 +(thus, ordering would be "a", "b", "train"). +If a decimal digit is found, the whole number is read and compared +(thus, ordering would be "9", "10", "420" which is the opposite of lexicographic order, +what +.Xr strcmp 3 +would've done). +Numbers with leading zeroes are interpreted as fractional parts, +and numbers with more leading zeroes are placed before numbers with fewer leading zeroes +(thus, ordering would be "000", "00", "01", "010", "09", "0", "1", "9", "10"). +.Sh SEE ALSO +.Xr strcmp 3 , +.Xr versionsort 3 +.Sh STANDARDS +The +.Fn strverscmp +function is a GNU extension and conforms to no standard. Index: lib/libc/string/strverscmp.c =================================================================== --- /dev/null +++ lib/libc/string/strverscmp.c @@ -0,0 +1,120 @@ +/*- +* SPDX-License-Identifier: BSD-2-Clause +* +* Copyright (c) 2022 Obiwac, +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGE. +*/ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)strverscmp.c 1.0 6/30/22"; +#endif /* LIBC_SCCS and not lint */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +int strverscmp(const char* s1, const char* s2) { + // if pointers are aliased, no need to go through to process of comparing them + + if (s1 == s2) { + return 0; + } + + const unsigned char* u1 = (const void*) s1; + const unsigned char* u2 = (const void*) s2; + + for (; *u1 && *u2; u1++, u2++) { + // leading zeroes; we're dealing with the fractional part of a number + + if (*u1 == '0' || *u2 == '0') { + // count leading zeros (more leading zeroes == smaller number) + + unsigned n1 = 0; + unsigned n2 = 0; + + for (; *u1 == '0'; u1++) n1++; + for (; *u2 == '0'; u2++) n2++; + + if (n1 != n2) { + return n2 - n1; + } + + // handle the case where 000 < 09 + + if (!*u1 && isdigit(*u2)) return 1; + if (!*u2 && isdigit(*u1)) return -1; + + // for all other cases, compare each digit until there are none left + + for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) { + if (*u1 != *u2) { + return *u1 - *u2; + } + } + + u1--, u2--; + } + + // no leading; we're simply comparing two numbers + + else if (isdigit(*u1) && isdigit(*u2)) { + const unsigned char* o1 = u1; + const unsigned char* o2 = u2; + + // count digits (more digits == larger number) + + unsigned n1 = 0; + unsigned n2 = 0; + + for (; isdigit(*u1); u1++) n1++; + for (; isdigit(*u2); u2++) n2++; + + if (n1 != n2) { + return n1 - n2; + } + + // if there're the same number of digits, + // go back and compare each digit until there are none left + + u1 = o1, u2 = o2; + + for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) { + if (*u1 != *u2) { + return *u1 - *u2; + } + } + + u1--, u2--; + } + + // for the rest, we can just fallback to a regular strcmp + + if (*u1 != *u2) { + return *u1 - *u2; + } + } + + return *u1 - *u2; +} Index: lib/libc/tests/string/Makefile =================================================================== --- lib/libc/tests/string/Makefile +++ lib/libc/tests/string/Makefile @@ -8,6 +8,7 @@ ATF_TESTS_C+= wcsnlen_test ATF_TESTS_C+= strxfrm_test ATF_TESTS_C+= wcscoll_test +ATF_TESTS_C+= strverscmp_test # TODO: popcount, stresep Index: lib/libc/tests/string/strverscmp_test.c =================================================================== --- /dev/null +++ lib/libc/tests/string/strverscmp_test.c @@ -0,0 +1,110 @@ +/*- +* SPDX-License-Identifier: BSD-2-Clause +* +* Copyright (c) 2022 Obiwac, +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGE. +*/ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +static void check_all(int len, const char* ordered[len]) { + for (size_t i = 0; i < len; i++) { + for (size_t j = 0; j < len; j++) { + const char* a = ordered[i]; + const char* b = ordered[j]; + + if (i == j) { + ATF_CHECK_MSG( + strverscmp(a, b) == 0, + "strverscmp(\"%s\", \"%s\") == 0", + a, b); + } + + else if (i < j) { + ATF_CHECK_MSG( + strverscmp(a, b) < 0, + "strverscmp(\"%s\", \"%s\") < 0", + a, b); + } + + else if (i > j) { + ATF_CHECK_MSG( + strverscmp(a, b) > 0, + "strverscmp(\"%s\", \"%s\") > 0", + a, b); + } + } + } +} + +#define CHECK_ALL(...) \ + const char* ordered[] = { __VA_ARGS__ }; \ + check_all(sizeof(ordered) / sizeof(*ordered), ordered); + +ATF_TC_WITHOUT_HEAD(strcmp_functionality); +ATF_TC_BODY(strcmp_functionality, tc) { + CHECK_ALL("", "a", "b") +} + +// from Linux man page + +ATF_TC_WITHOUT_HEAD(vers_ordering); +ATF_TC_BODY(vers_ordering, tc) { + CHECK_ALL("000", "00", "01", "010", "09", "0", "1", "9", "10") +} + +ATF_TC_WITHOUT_HEAD(natural_ordering); +ATF_TC_BODY(natural_ordering, tc) { + CHECK_ALL("jan1", "jan2", "jan9", "jan10", "jan11", "jan19", "jan20") +} + +// https://sourceware.org/bugzilla/show_bug.cgi?id=9913 + +ATF_TC_WITHOUT_HEAD(glibc_bug_9913); +ATF_TC_BODY(glibc_bug_9913, tc) { + CHECK_ALL( + "B0075022800016.gbp.corp.com", + "B007502280067.gbp.corp.com", + "B007502357019.GBP.CORP.COM" + ) +} + +ATF_TC_WITHOUT_HEAD(semver_ordering); +ATF_TC_BODY(semver_ordering, tc) { + CHECK_ALL("2.6.20", "2.6.21") +} + +ATF_TP_ADD_TCS(tp) { + ATF_TP_ADD_TC(tp, strcmp_functionality); + ATF_TP_ADD_TC(tp, vers_ordering); + ATF_TP_ADD_TC(tp, natural_ordering); + ATF_TP_ADD_TC(tp, glibc_bug_9913); + ATF_TP_ADD_TC(tp, semver_ordering); + + return atf_no_error(); +} Index: sys/sys/exec.h =================================================================== --- sys/sys/exec.h +++ sys/sys/exec.h @@ -90,8 +90,8 @@ #define PROC_PS_STRINGS(p) \ ((p)->p_vmspace->vm_stacktop - (p)->p_sysent->sv_psstringssz) -int exec_map_first_page(struct image_params *); -void exec_unmap_first_page(struct image_params *); +int exec_map_first_page(struct image_params *); +void exec_unmap_first_page(struct image_params *); int exec_register(const struct execsw *); int exec_unregister(const struct execsw *);