diff --git a/.cirrus.yml b/.cirrus.yml --- a/.cirrus.yml +++ b/.cirrus.yml @@ -3,18 +3,18 @@ task: matrix: + - name: 14.0-RELEASE + freebsd_instance: + image_family: freebsd-14-0 + - name: 14.0-STABLE + freebsd_instance: + image_family: freebsd-14-0-snap - name: 13.2-RELEASE freebsd_instance: - image_family: freebsd-13-1 + image_family: freebsd-13-2 - name: 13.2-STABLE freebsd_instance: image_family: freebsd-13-2-snap - - name: 12.4-STABLE - freebsd_instance: - image_family: freebsd-12-4-snap - - name: 12.4-RELEASE - freebsd_instance: - image_family: freebsd-12-4 env: DO: distcheck install_script: diff --git a/AUTHORS b/AUTHORS --- a/AUTHORS +++ b/AUTHORS @@ -8,4 +8,5 @@ # * Name # * Organization +* The FreeBSD Foundation * Google Inc. diff --git a/doc/kyua.1.in b/doc/kyua.1.in --- a/doc/kyua.1.in +++ b/doc/kyua.1.in @@ -392,6 +392,11 @@ .Xr atf 7 , .Xr tests 7 .Sh AUTHORS +The original author of +.Nm +was +.An Julio Merino . +.Pp For more details on the people that made .Nm possible and the license terms, run: diff --git a/utils/process/Kyuafile b/utils/process/Kyuafile --- a/utils/process/Kyuafile +++ b/utils/process/Kyuafile @@ -6,6 +6,7 @@ atf_test_program{name="deadline_killer_test"} atf_test_program{name="exceptions_test"} atf_test_program{name="executor_test"} +atf_test_program{name="executor_pid_test"} atf_test_program{name="fdstream_test"} atf_test_program{name="isolation_test"} atf_test_program{name="operations_test"} diff --git a/utils/process/Makefile.am.inc b/utils/process/Makefile.am.inc --- a/utils/process/Makefile.am.inc +++ b/utils/process/Makefile.am.inc @@ -83,6 +83,11 @@ utils_process_executor_test_CXXFLAGS = $(UTILS_CFLAGS) $(ATF_CXX_CFLAGS) utils_process_executor_test_LDADD = $(UTILS_LIBS) $(ATF_CXX_LIBS) +tests_utils_process_PROGRAMS += utils/process/executor_pid_test +utils_process_executor_pid_test_SOURCES = utils/process/executor_pid_test.cpp +utils_process_executor_pid_test_CXXFLAGS = $(UTILS_CFLAGS) $(ATF_CXX_CFLAGS) +utils_process_executor_pid_test_LDADD = $(UTILS_LIBS) $(ATF_CXX_LIBS) + tests_utils_process_PROGRAMS += utils/process/fdstream_test utils_process_fdstream_test_SOURCES = utils/process/fdstream_test.cpp utils_process_fdstream_test_CXXFLAGS = $(UTILS_CFLAGS) $(ATF_CXX_CFLAGS) diff --git a/utils/process/executor.cpp b/utils/process/executor.cpp --- a/utils/process/executor.cpp +++ b/utils/process/executor.cpp @@ -39,10 +39,12 @@ #include } +#include #include #include #include #include +#include #include "utils/datetime.hpp" #include "utils/format/macros.hpp" @@ -123,7 +125,7 @@ } -/// Internal implementation for the exit_handle class. +/// Internal implementation for the exec_handle class. struct utils::process::executor::exec_handle::impl : utils::noncopyable { /// PID of the process being run. int pid; @@ -547,6 +549,9 @@ /// Mapping of PIDs to the data required at run time. exec_handles_map all_exec_handles; + /// Former members of all_exec_handles removed due to PID reuse. + std::forward_list stale_exec_handles; + /// Whether the executor state has been cleaned yet or not. /// /// Used to keep track of explicit calls to the public cleanup(). @@ -558,6 +563,8 @@ interrupts_handler(new signals::interrupts_handler()), root_work_directory(new fs::auto_directory( fs::auto_directory::mkdtemp_public(work_directory_template))), + all_exec_handles(), + stale_exec_handles(), cleaned(false) { } @@ -603,6 +610,17 @@ } all_exec_handles.clear(); + for (auto iter : stale_exec_handles) { + // The process already exited, so no need to kill and wait. + try { + fs::rm_r(iter.control_directory()); + } catch (const fs::error& e) { + LE(F("Failed to clean up stale subprocess work directory " + "%s: %s") % iter.control_directory() % e.what()); + } + } + stale_exec_handles.clear(); + try { // The following only causes the work directory to be deleted, not // any of its contents, so we expect this to always succeed. This @@ -611,9 +629,9 @@ // subprocesses. root_work_directory->cleanup(); } catch (const fs::error& e) { - LE(F("Failed to clean up executor work directory %s: %s; this is " - "an internal error") % root_work_directory->directory() - % e.what()); + LE(F("Failed to clean up executor work directory %s: %s; " + "this could be an internal error or a buggy test") % + root_work_directory->directory() % e.what()); } root_work_directory.reset(NULL); @@ -773,12 +791,16 @@ timeout, unprivileged_user, detail::refcnt_t(new detail::refcnt_t::element_type(0))))); - INV_MSG(_pimpl->all_exec_handles.find(handle.pid()) == - _pimpl->all_exec_handles.end(), - F("PID %s already in all_exec_handles; not properly cleaned " - "up or reused too fast") % handle.pid());; - _pimpl->all_exec_handles.insert(exec_handles_map::value_type( - handle.pid(), handle)); + const auto value = exec_handles_map::value_type(handle.pid(), handle); + auto insert_pair = _pimpl->all_exec_handles.insert(value); + if (!insert_pair.second) { + LI(F("PID %s already in all_exec_handles") % handle.pid()); + _pimpl->stale_exec_handles.push_front(insert_pair.first->second); + _pimpl->all_exec_handles.erase(insert_pair.first); + insert_pair = _pimpl->all_exec_handles.insert(value); + INV_MSG(insert_pair.second, F("PID %s still in all_exec_handles") % + handle.pid()); + } LI(F("Spawned subprocess with exec_handle %s") % handle.pid()); return handle; } @@ -816,12 +838,16 @@ timeout, base.unprivileged_user(), base.state_owners()))); - INV_MSG(_pimpl->all_exec_handles.find(handle.pid()) == - _pimpl->all_exec_handles.end(), - F("PID %s already in all_exec_handles; not properly cleaned " - "up or reused too fast") % handle.pid());; - _pimpl->all_exec_handles.insert(exec_handles_map::value_type( - handle.pid(), handle)); + const auto value = exec_handles_map::value_type(handle.pid(), handle); + auto insert_pair = _pimpl->all_exec_handles.insert(value); + if (!insert_pair.second) { + LI(F("PID %s already in all_exec_handles") % handle.pid()); + _pimpl->stale_exec_handles.push_front(insert_pair.first->second); + _pimpl->all_exec_handles.erase(insert_pair.first); + insert_pair = _pimpl->all_exec_handles.insert(value); + INV_MSG(insert_pair.second, F("PID %s still in all_exec_handles") % + handle.pid()); + } LI(F("Spawned subprocess with exec_handle %s") % handle.pid()); return handle; } diff --git a/utils/process/executor_pid_test.cpp b/utils/process/executor_pid_test.cpp new file mode 100644 --- /dev/null +++ b/utils/process/executor_pid_test.cpp @@ -0,0 +1,208 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 Dell Inc. + * Author: Eric van Gyzen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if 0 + +1. Run some "bad" tests that prevent kyua from removing the work directory. + We use "chflags uunlink". Mounting a file system from an md(4) device + is another common use case. +2. Fork a lot, nearly wrapping the PID number space, so step 3 will re-use + a PID from step 1. Running the entire FreeBSD test suite is a more + realistic scenario for this step. +3. Run some more tests. If the stars align, the bug is not fixed yet, and + kyua is built with debugging, kyua will abort with the following messages. + Without debugging, the tests in step 3 will reuse the context from step 1, + including stdout, stderr, and working directory, which are still populated + with stuff from step 1. When I found this bug, step 3 was + __test_cases_list__, which expects a certain format in stdout and failed + when it found something completely unrelated. +4. You can clean up with: chflags -R nouunlink /tmp/kyua.*; rm -rf /tmp/kyua.* + +$ cc -o pid_wrap -latf-c pid_wrap.c +$ kyua test +pid_wrap:leak_0 -> passed [0.001s] +pid_wrap:leak_1 -> passed [0.001s] +pid_wrap:leak_2 -> passed [0.001s] +pid_wrap:leak_3 -> passed [0.001s] +pid_wrap:leak_4 -> passed [0.001s] +pid_wrap:leak_5 -> passed [0.001s] +pid_wrap:leak_6 -> passed [0.001s] +pid_wrap:leak_7 -> passed [0.001s] +pid_wrap:leak_8 -> passed [0.001s] +pid_wrap:leak_9 -> passed [0.001s] +pid_wrap:pid_wrap -> passed [1.113s] +pid_wrap:pid_wrap_0 -> passed [0.001s] +pid_wrap:pid_wrap_1 -> passed [0.001s] +pid_wrap:pid_wrap_2 -> passed [0.001s] +pid_wrap:pid_wrap_3 -> *** /usr/src/main/contrib/kyua/utils/process/executor.cpp:779: Invariant check failed: PID 60876 already in all_exec_handles; not properly cleaned up or reused too fast +*** Fatal signal 6 received +*** Log file is /home/vangyzen/.kyua/logs/kyua.20221006-193544.log +*** Please report this problem to kyua-discuss@googlegroups.com detailing what you were doing before the crash happened; if possible, include the log file mentioned above +Abort trap (core dumped) + +#endif + +#include + +#include + +#include +#include +#include + +#include +#include + +void +leak_work_dir() +{ + int fd; + + ATF_REQUIRE((fd = open("unforgettable", O_CREAT|O_EXCL|O_WRONLY, 0600)) + >= 0); + ATF_REQUIRE_EQ(0, fchflags(fd, UF_NOUNLINK)); + ATF_REQUIRE_EQ(0, close(fd)); +} + +void +wrap_pids() +{ + pid_t begin, current, target; + bool wrapped; + + begin = getpid(); + target = begin - 15; + if (target <= 1) { + target += 99999; // PID_MAX + wrapped = true; + } else { + wrapped = false; + } + + ATF_REQUIRE(signal(SIGCHLD, SIG_IGN) != SIG_ERR); + + do { + current = vfork(); + if (current == 0) { + _exit(0); + } + ATF_REQUIRE(current != -1); + if (current < begin) { + wrapped = true; + } + } while (!wrapped || current < target); +} + +void +test_work_dir_reuse() +{ + // If kyua is built with debugging, it would abort here before the fix. +} + +void +clean_up() +{ + (void)system("chflags -R nouunlink ../.."); +} + +ATF_TEST_CASE_WITHOUT_HEAD(leak_0); +ATF_TEST_CASE_BODY(leak_0) { leak_work_dir(); } +ATF_TEST_CASE_WITHOUT_HEAD(leak_1); +ATF_TEST_CASE_BODY(leak_1) { leak_work_dir(); } +ATF_TEST_CASE_WITHOUT_HEAD(leak_2); +ATF_TEST_CASE_BODY(leak_2) { leak_work_dir(); } +ATF_TEST_CASE_WITHOUT_HEAD(leak_3); +ATF_TEST_CASE_BODY(leak_3) { leak_work_dir(); } +ATF_TEST_CASE_WITHOUT_HEAD(leak_4); +ATF_TEST_CASE_BODY(leak_4) { leak_work_dir(); } +ATF_TEST_CASE_WITHOUT_HEAD(leak_5); +ATF_TEST_CASE_BODY(leak_5) { leak_work_dir(); } +ATF_TEST_CASE_WITHOUT_HEAD(leak_6); +ATF_TEST_CASE_BODY(leak_6) { leak_work_dir(); } +ATF_TEST_CASE_WITHOUT_HEAD(leak_7); +ATF_TEST_CASE_BODY(leak_7) { leak_work_dir(); } +ATF_TEST_CASE_WITHOUT_HEAD(leak_8); +ATF_TEST_CASE_BODY(leak_8) { leak_work_dir(); } +ATF_TEST_CASE_WITHOUT_HEAD(leak_9); +ATF_TEST_CASE_BODY(leak_9) { leak_work_dir(); } + +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap); +ATF_TEST_CASE_BODY(pid_wrap) { wrap_pids(); } + +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_0); +ATF_TEST_CASE_BODY(pid_wrap_0) { test_work_dir_reuse(); } +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_1); +ATF_TEST_CASE_BODY(pid_wrap_1) { test_work_dir_reuse(); } +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_2); +ATF_TEST_CASE_BODY(pid_wrap_2) { test_work_dir_reuse(); } +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_3); +ATF_TEST_CASE_BODY(pid_wrap_3) { test_work_dir_reuse(); } +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_4); +ATF_TEST_CASE_BODY(pid_wrap_4) { test_work_dir_reuse(); } +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_5); +ATF_TEST_CASE_BODY(pid_wrap_5) { test_work_dir_reuse(); } +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_6); +ATF_TEST_CASE_BODY(pid_wrap_6) { test_work_dir_reuse(); } +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_7); +ATF_TEST_CASE_BODY(pid_wrap_7) { test_work_dir_reuse(); } +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_8); +ATF_TEST_CASE_BODY(pid_wrap_8) { test_work_dir_reuse(); } +ATF_TEST_CASE_WITHOUT_HEAD(pid_wrap_9); +ATF_TEST_CASE_BODY(pid_wrap_9) { test_work_dir_reuse(); } + +ATF_TEST_CASE_WITHOUT_HEAD(really_clean_up); +ATF_TEST_CASE_BODY(really_clean_up) { clean_up(); } + +ATF_INIT_TEST_CASES(tcs) +{ + ATF_ADD_TEST_CASE(tcs, leak_0); + ATF_ADD_TEST_CASE(tcs, leak_1); + ATF_ADD_TEST_CASE(tcs, leak_2); + ATF_ADD_TEST_CASE(tcs, leak_3); + ATF_ADD_TEST_CASE(tcs, leak_4); + ATF_ADD_TEST_CASE(tcs, leak_5); + ATF_ADD_TEST_CASE(tcs, leak_6); + ATF_ADD_TEST_CASE(tcs, leak_7); + ATF_ADD_TEST_CASE(tcs, leak_8); + ATF_ADD_TEST_CASE(tcs, leak_9); + + ATF_ADD_TEST_CASE(tcs, pid_wrap); + + ATF_ADD_TEST_CASE(tcs, pid_wrap_0); + ATF_ADD_TEST_CASE(tcs, pid_wrap_1); + ATF_ADD_TEST_CASE(tcs, pid_wrap_2); + ATF_ADD_TEST_CASE(tcs, pid_wrap_3); + ATF_ADD_TEST_CASE(tcs, pid_wrap_4); + ATF_ADD_TEST_CASE(tcs, pid_wrap_5); + ATF_ADD_TEST_CASE(tcs, pid_wrap_6); + ATF_ADD_TEST_CASE(tcs, pid_wrap_7); + ATF_ADD_TEST_CASE(tcs, pid_wrap_8); + ATF_ADD_TEST_CASE(tcs, pid_wrap_9); + + ATF_ADD_TEST_CASE(tcs, really_clean_up); +}