Index: head/sys/kern/kern_descrip.c =================================================================== --- head/sys/kern/kern_descrip.c +++ head/sys/kern/kern_descrip.c @@ -1801,8 +1801,12 @@ atomic_store_rel_ptr((volatile void *)&fdp->fd_files, (uintptr_t)ntable); /* - * Do not free the old file table, as some threads may still - * reference entries within it. Instead, place it on a freelist + * Free the old file table when not shared by other threads or processes. + * The old file table is considered to be shared when either are true: + * - The process has more than one thread. + * - The file descriptor table has been shared via fdshare(). + * + * When shared, the old file table will be placed on a freelist * which will be processed when the struct filedesc is released. * * Note that if onfiles == NDFILE, we're dealing with the original @@ -1810,10 +1814,14 @@ * which must not be freed. */ if (onfiles > NDFILE) { - ft = (struct freetable *)&otable->fdt_ofiles[onfiles]; - fdp0 = (struct filedesc0 *)fdp; - ft->ft_table = otable; - SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next); + if (curproc->p_numthreads == 1 && fdp->fd_refcnt == 1) + free(otable, M_FILEDESC); + else { + ft = (struct freetable *)&otable->fdt_ofiles[onfiles]; + fdp0 = (struct filedesc0 *)fdp; + ft->ft_table = otable; + SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next); + } } /* * The map does not have the same possibility of threads still Index: head/tests/sys/kern/Makefile =================================================================== --- head/tests/sys/kern/Makefile +++ head/tests/sys/kern/Makefile @@ -10,6 +10,7 @@ #ATF_TESTS_C+= kcov ATF_TESTS_C+= kern_copyin ATF_TESTS_C+= kern_descrip_test +ATF_TESTS_C+= fdgrowtable_test ATF_TESTS_C+= kill_zombie ATF_TESTS_C+= ptrace_test TEST_METADATA.ptrace_test+= timeout="15" @@ -46,6 +47,7 @@ LIBADD.unix_seqpacket_test+= pthread LIBADD.kcov+= pthread LIBADD.sendfile_helper+= pthread +LIBADD.fdgrowtable_test+= util pthread kvm procstat NETBSD_ATF_TESTS_C+= lockf_test NETBSD_ATF_TESTS_C+= mqueue_test Index: head/tests/sys/kern/fdgrowtable_test.c =================================================================== --- head/tests/sys/kern/fdgrowtable_test.c +++ head/tests/sys/kern/fdgrowtable_test.c @@ -0,0 +1,267 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Rob Wing + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* linked libraries */ +#include +#include +#include +#include + +/* test-case macro */ +#define AFILE "afile" + +/* + * The following macros, struct freetable, struct fdescenttbl0 + * and struct filedesc0 are copied from sys/kern/kern_descrip.c + */ +#define NDFILE 20 +#define NDSLOTSIZE sizeof(NDSLOTTYPE) +#define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) +#define NDSLOT(x) ((x) / NDENTRIES) +#define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) +#define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) + +struct freetable { + struct fdescenttbl *ft_table; + SLIST_ENTRY(freetable) ft_next; +}; + +struct fdescenttbl0 { + int fdt_nfiles; + struct filedescent fdt_ofiles[NDFILE]; +}; + +struct filedesc0 { + struct filedesc fd_fd; + SLIST_HEAD(, freetable) fd_free; + struct fdescenttbl0 fd_dfiles; + NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; +}; + +static void +openfiles(int n) +{ + int i, fd; + + ATF_REQUIRE((fd = open(AFILE, O_CREAT, 0644)) != -1); + close(fd); + for (i = 0; i < n; i++) + ATF_REQUIRE((fd = open(AFILE, O_RDONLY, 0644)) != -1); +} + +/* + * Get a count of the old file descriptor tables on the freelist. + */ +static int +old_tables(kvm_t *kd, struct kinfo_proc *kp) +{ + struct filedesc0 fdp0; + struct freetable *ft, tft; + int counter; + + counter = 0; + + ATF_REQUIRE(kvm_read(kd, (unsigned long) kp->ki_fd, &fdp0, sizeof(fdp0)) > 0); + + SLIST_FOREACH(ft, &fdp0.fd_free, ft_next) { + ATF_REQUIRE(kvm_read(kd, (unsigned long) ft, &tft, sizeof(tft)) > 0 ); + ft = &tft; + counter++; + } + + return (counter); +} + +/* + * The returning struct kinfo_proc stores kernel addresses that will be + * used by kvm_read to retrieve information for the current process. + */ +static struct kinfo_proc * +read_kinfo(kvm_t *kd) +{ + struct kinfo_proc *kp; + int procs_found; + + ATF_REQUIRE((kp = kvm_getprocs(kd, KERN_PROC_PID, (int) getpid(), &procs_found)) != NULL); + ATF_REQUIRE(procs_found == 1); + + return (kp); +} + +/* + * Test a single threaded process that doesn't have a shared + * file descriptor table. The old tables should be freed. + */ +ATF_TC(free_oldtables); +ATF_TC_HEAD(free_oldtables, tc) +{ + atf_tc_set_md_var(tc, "require.user", "root"); +} + +ATF_TC_BODY(free_oldtables, tc) +{ + kvm_t *kd; + struct kinfo_proc *kp; + + ATF_REQUIRE((kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL)) != NULL); + openfiles(128); + kp = read_kinfo(kd); + ATF_CHECK(old_tables(kd,kp) == 0); +} + +static void * +exec_thread(void *args) +{ + for (;;) + sleep(1); +} + +/* + * Test a process with two threads that doesn't have a shared file + * descriptor table. The old tables should not be freed. + */ +ATF_TC(oldtables_shared_via_threads); +ATF_TC_HEAD(oldtables_shared_via_threads, tc) +{ + atf_tc_set_md_var(tc, "require.user", "root"); +} + +ATF_TC_BODY(oldtables_shared_via_threads, tc) +{ + kvm_t *kd; + struct kinfo_proc *kp; + pthread_t thread; + + ATF_REQUIRE((kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL)) != NULL); + ATF_REQUIRE(pthread_create(&thread, NULL, exec_thread, NULL) == 0); + + openfiles(128); + + kp = read_kinfo(kd); + ATF_CHECK(kp->ki_numthreads > 1); + ATF_CHECK(old_tables(kd,kp) > 1); + + ATF_REQUIRE(pthread_cancel(thread) == 0); + ATF_REQUIRE(pthread_join(thread, NULL) == 0); +} + +/* + * Get the reference count of a file descriptor table. + */ +static int +filedesc_refcnt(kvm_t *kd, struct kinfo_proc *kp) +{ + struct filedesc fdp; + + ATF_REQUIRE(kvm_read(kd, (unsigned long) kp->ki_fd, &fdp, sizeof(fdp)) > 0); + + return (fdp.fd_refcnt); +} + +/* + * Test a single threaded process that shares a file descriptor + * table with another process. The old tables should not be freed. + */ +ATF_TC(oldtables_shared_via_process); +ATF_TC_HEAD(oldtables_shared_via_process, tc) +{ + atf_tc_set_md_var(tc, "require.user", "root"); +} + +ATF_TC_BODY(oldtables_shared_via_process, tc) +{ + kvm_t *kd; + struct kinfo_proc *kp; + int status; + pid_t child, wpid; + + ATF_REQUIRE((kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL)) != NULL); + + /* share the file descriptor table */ + ATF_REQUIRE((child = rfork(RFPROC)) != -1); + + if (child == 0) { + openfiles(128); + raise(SIGSTOP); + exit(127); + } + + /* let parent process open some files too */ + openfiles(128); + + /* get current status of child */ + wpid = waitpid(child, &status, WUNTRACED); + + /* child should be stopped */ + ATF_REQUIRE(WIFSTOPPED(status)); + + /* + * We want to read kernel data + * before the child exits + * otherwise we'll lose a reference count + * to the file descriptor table + */ + if (child != 0) { + kp = read_kinfo(kd); + + ATF_CHECK(filedesc_refcnt(kd,kp) > 1); + ATF_CHECK(old_tables(kd,kp) > 1); + + kill(child, SIGCONT); + } + + /* child should have exited */ + wpid = waitpid(child, &status, 0); + ATF_REQUIRE(WIFEXITED(status)); + ATF_REQUIRE(WEXITSTATUS(status) == 127); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, free_oldtables); + ATF_TP_ADD_TC(tp, oldtables_shared_via_threads); + ATF_TP_ADD_TC(tp, oldtables_shared_via_process); + return (atf_no_error()); +}