diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index aa82eafff9b0..a6b327339c80 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,913 +1,913 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 */ #include #include "opt_ddb.h" #include "opt_kdb.h" #include "opt_init_path.h" #include "opt_verbose_sysinit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void mi_startup(void); /* Should be elsewhere */ /* Components of the first process -- never freed. */ static struct session session0; static struct pgrp pgrp0; struct proc proc0; struct thread0_storage thread0_st __aligned(32); struct vmspace vmspace0; struct proc *initproc; int linux_alloc_current_noop(struct thread *td __unused, int flags __unused) { return (0); } int (*lkpi_alloc_current)(struct thread *, int) = linux_alloc_current_noop; #ifndef BOOTHOWTO #define BOOTHOWTO 0 #endif int boothowto = BOOTHOWTO; /* initialized so that it can be patched */ SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "Boot control flags, passed from loader"); #ifndef BOOTVERBOSE #define BOOTVERBOSE 0 #endif int bootverbose = BOOTVERBOSE; SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "Control the output of verbose kernel messages"); #ifdef VERBOSE_SYSINIT /* * We'll use the defined value of VERBOSE_SYSINIT from the kernel config to * dictate the default VERBOSE_SYSINIT behavior. Significant values for this * option and associated tunable are: * - 0, 'compiled in but silent by default' * - 1, 'compiled in but verbose by default' (default) */ int verbose_sysinit = VERBOSE_SYSINIT; TUNABLE_INT("debug.verbose_sysinit", &verbose_sysinit); #endif #ifdef INVARIANTS FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance"); #endif /* * This ensures that there is at least one entry so that the sysinit_set * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never * executed. */ SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); /* * The sysinit linker set compiled into the kernel. These are placed onto the * sysinit list by mi_startup; sysinit_add can add (e.g., from klds) additional * sysinits to the linked list but the linker set here does not change. */ SET_DECLARE(sysinit_set, struct sysinit); /* * The sysinit list itself. Items are removed as they are run. */ -static SLIST_HEAD(sysinitlist, sysinit) sysinit_list; +static STAILQ_HEAD(sysinitlist, sysinit) sysinit_list; /* * Compare two sysinits; return -1, 0, or 1 if a comes before, at the same time * as, or after b. */ static int sysinit_compar(struct sysinit *a, struct sysinit *b, void *thunk __unused) { if (a->subsystem < b->subsystem) return (-1); if (a->subsystem > b->subsystem) return (1); if (a->order < b->order) return (-1); if (a->order > b->order) return (1); return (0); } static void sysinit_mklist(struct sysinitlist *list, struct sysinit **set, struct sysinit **set_end) { struct sysinit **sipp; TSENTER(); TSENTER2("listify"); - SLIST_INIT(list); + STAILQ_INIT(list); for (sipp = set; sipp < set_end; sipp++) - SLIST_INSERT_HEAD(list, *sipp, next); + STAILQ_INSERT_TAIL(list, *sipp, next); TSEXIT2("listify"); TSENTER2("mergesort"); - SLIST_MERGESORT(list, NULL, sysinit_compar, sysinit, next); + STAILQ_MERGESORT(list, NULL, sysinit_compar, sysinit, next); TSEXIT2("mergesort"); TSEXIT(); } /* * Merge a new sysinit set into the sysinit list. */ void sysinit_add(struct sysinit **set, struct sysinit **set_end) { struct sysinitlist new_list; TSENTER(); /* Construct a sorted list from the new sysinits. */ sysinit_mklist(&new_list, set, set_end); /* Merge the new list into the existing one. */ - TSENTER2("SLIST_MERGE"); - SLIST_MERGE(&sysinit_list, &new_list, NULL, sysinit_compar, sysinit, next); - TSEXIT2("SLIST_MERGE"); + TSENTER2("STAILQ_MERGE"); + STAILQ_MERGE(&sysinit_list, &new_list, NULL, sysinit_compar, sysinit, next); + TSEXIT2("STAILQ_MERGE"); TSEXIT(); } #if defined (DDB) && defined(VERBOSE_SYSINIT) static const char * symbol_name(vm_offset_t va, db_strategy_t strategy) { const char *name; c_db_sym_t sym; db_expr_t offset; if (va == 0) return (NULL); sym = db_search_symbol(va, strategy, &offset); if (offset != 0) return (NULL); db_symbol_values(sym, &name, NULL); return (name); } #endif /* * System startup; initialize the world, create process 0, mount root * filesystem, and fork to create init and pagedaemon. Most of the * hard work is done in the lower-level initialization routines including * startup(), which does memory initialization and autoconfiguration. * * This allows simple addition of new kernel subsystems that require * boot time initialization. It also allows substitution of subsystem * (for instance, a scheduler, kernel profiler, or VM system) by object * module. Finally, it allows for optional "kernel threads". */ void mi_startup(void) { struct sysinit *sip; int last; #if defined(VERBOSE_SYSINIT) int verbose; #endif TSENTER(); if (boothowto & RB_VERBOSE) bootverbose++; /* Construct and sort sysinit list. */ sysinit_mklist(&sysinit_list, SET_BEGIN(sysinit_set), SET_LIMIT(sysinit_set)); last = SI_SUB_COPYRIGHT; #if defined(VERBOSE_SYSINIT) verbose = 0; #if !defined(DDB) printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); #endif #endif /* * Perform each system initialization task from the ordered list. Note * that if sysinit_list is modified (e.g. by a KLD) we will nonetheless * always perform the earlist-sorted sysinit at each step; using the - * SLIST_FOREACH macro would result in items being skipped if inserted + * STAILQ_FOREACH macro would result in items being skipped if inserted * earlier than the "current item". */ - while ((sip = SLIST_FIRST(&sysinit_list)) != NULL) { - SLIST_REMOVE_HEAD(&sysinit_list, next); + while ((sip = STAILQ_FIRST(&sysinit_list)) != NULL) { + STAILQ_REMOVE_HEAD(&sysinit_list, next); if (sip->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s)*/ if (sip->subsystem > last) BOOTTRACE_INIT("sysinit 0x%7x", sip->subsystem); #if defined(VERBOSE_SYSINIT) if (sip->subsystem > last && verbose_sysinit != 0) { verbose = 1; printf("subsystem %x\n", last); } if (verbose) { #if defined(DDB) const char *func, *data; func = symbol_name((vm_offset_t)sip->func, DB_STGY_PROC); data = symbol_name((vm_offset_t)sip->udata, DB_STGY_ANY); if (func != NULL && data != NULL) printf(" %s(&%s)... ", func, data); else if (func != NULL) printf(" %s(%p)... ", func, sip->udata); else #endif printf(" %p(%p)... ", sip->func, sip->udata); } #endif /* Call function */ (*(sip->func))(sip->udata); #if defined(VERBOSE_SYSINIT) if (verbose) printf("done.\n"); #endif /* Check off the one we're just done */ last = sip->subsystem; } TSEXIT(); /* Here so we don't overlap with start_init. */ BOOTTRACE("mi_startup done"); mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); mtx_unlock(&Giant); /* * Now hand over this thread to swapper. */ swapper(); /* NOTREACHED*/ } static void print_caddr_t(void *data) { printf("%s", (char *)data); } static void print_version(void *data __unused) { int len; /* Strip a trailing newline from version. */ len = strlen(version); while (len > 0 && version[len - 1] == '\n') len--; printf("%.*s %s\n", len, version, machine); printf("%s\n", compiler_version); } SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright); SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, trademark); SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); #ifdef WITNESS static char wit_warn[] = "WARNING: WITNESS option enabled, expect reduced performance.\n"; SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_FOURTH, print_caddr_t, wit_warn); SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_FOURTH, print_caddr_t, wit_warn); #endif #ifdef DIAGNOSTIC static char diag_warn[] = "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH, print_caddr_t, diag_warn); SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_FIFTH, print_caddr_t, diag_warn); #endif #if __SIZEOF_LONG__ == 4 static char ilp32_warn[] = "WARNING: 32-bit kernels are deprecated and may be removed in FreeBSD 15.0.\n"; SYSINIT(ilp32warn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH, print_caddr_t, ilp32_warn); SYSINIT(ilp32warn2, SI_SUB_LAST, SI_ORDER_FIFTH, print_caddr_t, ilp32_warn); #endif static int null_fetch_syscall_args(struct thread *td __unused) { panic("null_fetch_syscall_args"); } static void null_set_syscall_retval(struct thread *td __unused, int error __unused) { panic("null_set_syscall_retval"); } static void null_set_fork_retval(struct thread *td __unused) { } struct sysentvec null_sysvec = { .sv_size = 0, .sv_table = NULL, .sv_fixup = NULL, .sv_sendsig = NULL, .sv_sigcode = NULL, .sv_szsigcode = NULL, .sv_name = "null", .sv_coredump = NULL, .sv_minsigstksz = 0, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_psstringssz = sizeof(struct ps_strings), .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = NULL, .sv_setregs = NULL, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = 0, .sv_set_syscall_retval = null_set_syscall_retval, .sv_fetch_syscall_args = null_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_set_fork_retval = null_set_fork_retval, .sv_regset_begin = NULL, .sv_regset_end = NULL, }; /* * The two following SYSINIT's are proc0 specific glue code. I am not * convinced that they can not be safely combined, but their order of * operation has been maintained as the same as the original init_main.c * for right now. */ /* ARGSUSED*/ static void proc0_init(void *dummy __unused) { struct proc *p; struct thread *td; struct ucred *newcred; struct uidinfo tmpuinfo; struct loginclass tmplc = { .lc_name = "", }; vm_paddr_t pageablemem; int i; GIANT_REQUIRED; p = &proc0; td = &thread0; /* * Initialize magic number and osrel. */ p->p_magic = P_MAGIC; p->p_osrel = osreldate; /* * Initialize thread and process structures. */ procinit(); /* set up proc zone */ threadinit(); /* set up UMA zones */ /* * Initialise scheduler resources. * Add scheduler specific parts to proc, thread as needed. */ schedinit(); /* scheduler gets its house in order */ /* * Create process 0 (the swapper). */ LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); sx_init(&pgrp0.pg_killsx, "killpg racer"); p->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); pgrp0.pg_session = &session0; mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); refcount_init(&session0.s_count, 1); session0.s_leader = p; p->p_sysent = &null_sysvec; p->p_flag = P_SYSTEM | P_INMEM | P_KPROC; p->p_flag2 = 0; p->p_state = PRS_NORMAL; p->p_klist = knlist_alloc(&p->p_mtx); STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; td->td_tid = THREAD0_TID; tidhash_add(td); TD_SET_STATE(td, TDS_RUNNING); td->td_pri_class = PRI_TIMESHARE; td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; td->td_lend_user_pri = PRI_MAX; td->td_priority = PVM; td->td_base_pri = PVM; td->td_oncpu = curcpu; td->td_flags = TDF_INMEM; td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); td->td_domain.dr_policy = td->td_cpuset->cs_domain; prison0_init(); p->p_peers = 0; p->p_leader = p; p->p_reaper = p; p->p_treeflag |= P_TREE_REAPER; LIST_INIT(&p->p_reaplist); strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); strncpy(td->td_name, "swapper", sizeof (td->td_name)); callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); callout_init(&td->td_slpcallout, 1); TAILQ_INIT(&p->p_kqtim_stop); /* Create credentials. */ newcred = crget(); newcred->cr_ngroups = 1; /* group 0 */ /* A hack to prevent uifind from tripping over NULL pointers. */ curthread->td_ucred = newcred; tmpuinfo.ui_uid = 1; newcred->cr_uidinfo = newcred->cr_ruidinfo = &tmpuinfo; newcred->cr_uidinfo = uifind(0); newcred->cr_ruidinfo = uifind(0); newcred->cr_loginclass = &tmplc; newcred->cr_loginclass = loginclass_find("default"); /* End hack. creds get properly set later with thread_cow_get_proc */ curthread->td_ucred = NULL; newcred->cr_prison = &prison0; newcred->cr_users++; /* avoid assertion failure */ proc_set_cred_init(p, newcred); newcred->cr_users--; crfree(newcred); #ifdef AUDIT audit_cred_kproc0(newcred); #endif #ifdef MAC mac_cred_create_swapper(newcred); #endif /* Create sigacts. */ p->p_sigacts = sigacts_alloc(); /* Initialize signal state for process 0. */ siginit(&proc0); /* Create the file descriptor table. */ p->p_pd = pdinit(NULL, false); p->p_fd = fdinit(); p->p_fdtol = NULL; /* Create the limits structures. */ p->p_limit = lim_alloc(); for (i = 0; i < RLIM_NLIMITS; i++) p->p_limit->pl_rlimit[i].rlim_cur = p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; /* Cast to avoid overflow on i386/PAE. */ pageablemem = ptoa((vm_paddr_t)vm_free_count()); p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; PROC_LOCK(p); thread_cow_get_proc(td, p); PROC_UNLOCK(p); /* Initialize resource accounting structures. */ racct_create(&p->p_racct); p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ p->p_vmspace = &vmspace0; refcount_init(&vmspace0.vm_refcnt, 1); pmap_pinit0(vmspace_pmap(&vmspace0)); /* * proc0 is not expected to enter usermode, so there is no special * handling for sv_minuser here, like is done for exec_new_vmspace(). */ vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); /* * Call the init and ctor for the new thread and proc. We wait * to do this until all other structures are fairly sane. */ EVENTHANDLER_DIRECT_INVOKE(process_init, p); EVENTHANDLER_DIRECT_INVOKE(thread_init, td); #ifdef KDTRACE_HOOKS kdtrace_proc_ctor(p); kdtrace_thread_ctor(td); #endif EVENTHANDLER_DIRECT_INVOKE(process_ctor, p); EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); /* * Charge root for one process. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); PROC_LOCK(p); racct_add_force(p, RACCT_NPROC, 1); PROC_UNLOCK(p); } SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); /* ARGSUSED*/ static void proc0_post(void *dummy __unused) { struct proc *p; struct rusage ru; struct thread *td; /* * Now we can look at the time, having had a chance to verify the * time from the filesystem. Pretend that proc0 started now. */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } microuptime(&p->p_stats->p_start); PROC_STATLOCK(p); rufetch(p, &ru); /* Clears thread stats */ p->p_rux.rux_runtime = 0; p->p_rux.rux_uticks = 0; p->p_rux.rux_sticks = 0; p->p_rux.rux_iticks = 0; PROC_STATUNLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { td->td_runtime = 0; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); } SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); /* *************************************************************************** **** **** The following SYSINIT's and glue code should be moved to the **** respective files on a per subsystem basis. **** *************************************************************************** */ /* * List of paths to try when searching for "init". */ static char init_path[MAXPATHLEN] = #ifdef INIT_PATH __XSTRING(INIT_PATH); #else "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init"; #endif SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, "Path used to search the init process"); /* * Shutdown timeout of init(8). * Unused within kernel, but used to control init(8), hence do not remove. */ #ifndef INIT_SHUTDOWN_TIMEOUT #define INIT_SHUTDOWN_TIMEOUT 120 #endif static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). " "Unused within kernel, but used to control init(8)"); /* * Start the initial user process; try exec'ing each pathname in init_path. * The program is invoked with one argument containing the boot flags. */ static void start_init(void *dummy) { struct image_args args; int error; char *var, *path; char *free_init_path, *tmp_init_path; struct thread *td; struct proc *p; struct vmspace *oldvmspace; TSENTER(); /* Here so we don't overlap with mi_startup. */ td = curthread; p = td->td_proc; vfs_mountroot(); /* Wipe GELI passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); /* For Multicons, report which console is primary to both */ if (boothowto & RB_MULTIPLE) { if (boothowto & RB_SERIAL) printf("Dual Console: Serial Primary, Video Secondary\n"); else printf("Dual Console: Video Primary, Serial Secondary\n"); } if ((var = kern_getenv("init_path")) != NULL) { strlcpy(init_path, var, sizeof(init_path)); freeenv(var); } free_init_path = tmp_init_path = strdup(init_path, M_TEMP); while ((path = strsep(&tmp_init_path, ":")) != NULL) { if (bootverbose) printf("start_init: trying %s\n", path); memset(&args, 0, sizeof(args)); error = exec_alloc_args(&args); if (error != 0) panic("%s: Can't allocate space for init arguments %d", __func__, error); error = exec_args_add_fname(&args, path, UIO_SYSSPACE); if (error != 0) panic("%s: Can't add fname %d", __func__, error); error = exec_args_add_arg(&args, path, UIO_SYSSPACE); if (error != 0) panic("%s: Can't add argv[0] %d", __func__, error); if (boothowto & RB_SINGLE) error = exec_args_add_arg(&args, "-s", UIO_SYSSPACE); if (error != 0) panic("%s: Can't add argv[0] %d", __func__, error); /* * Now try to exec the program. If can't for any reason * other than it doesn't exist, complain. * * Otherwise, return via fork_trampoline() all the way * to user mode as init! */ KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); oldvmspace = p->p_vmspace; error = kern_execve(td, &args, NULL, oldvmspace); KASSERT(error != 0, ("kern_execve returned success, not EJUSTRETURN")); if (error == EJUSTRETURN) { exec_cleanup(td, oldvmspace); free(free_init_path, M_TEMP); TSEXIT(); return; } if (error != ENOENT) printf("exec %s: error %d\n", path, error); } free(free_init_path, M_TEMP); printf("init: not found in path %s\n", init_path); panic("no init"); } /* * Like kproc_create(), but runs in its own address space. We do this * early to reserve pid 1. Note special case - do not make it * runnable yet, init execution is started when userspace can be served. */ static void create_init(const void *udata __unused) { struct fork_req fr; struct ucred *newcred, *oldcred; struct thread *td; int error; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFSTOPPED; fr.fr_procp = &initproc; error = fork1(&thread0, &fr); if (error) panic("cannot fork init: %d\n", error); KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); /* divorce init's credentials from the kernel's */ newcred = crget(); sx_xlock(&proctree_lock); PROC_LOCK(initproc); initproc->p_flag |= P_SYSTEM | P_INMEM; initproc->p_treeflag |= P_TREE_REAPER; oldcred = initproc->p_ucred; crcopy(newcred, oldcred); #ifdef MAC mac_cred_create_init(newcred); #endif #ifdef AUDIT audit_cred_proc1(newcred); #endif proc_set_cred(initproc, newcred); td = FIRST_THREAD_IN_PROC(initproc); crcowfree(td); td->td_realucred = crcowget(initproc->p_ucred); td->td_ucred = td->td_realucred; PROC_UNLOCK(initproc); sx_xunlock(&proctree_lock); crfree(oldcred); cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); } SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); /* * Make it runnable now. */ static void kick_init(const void *udata __unused) { struct thread *td; td = FIRST_THREAD_IN_PROC(initproc); thread_lock(td); TD_SET_CAN_RUN(td); sched_add(td, SRQ_BORING); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL); /* * DDB(4). */ #ifdef DDB static void db_show_print_syinit(struct sysinit *sip, bool ddb) { const char *sname, *funcname; c_db_sym_t sym; db_expr_t offset; #define xprint(...) \ if (ddb) \ db_printf(__VA_ARGS__); \ else \ printf(__VA_ARGS__) if (sip == NULL) { xprint("%s: no sysinit * given\n", __func__); return; } sym = db_search_symbol((vm_offset_t)sip, DB_STGY_ANY, &offset); db_symbol_values(sym, &sname, NULL); sym = db_search_symbol((vm_offset_t)sip->func, DB_STGY_PROC, &offset); db_symbol_values(sym, &funcname, NULL); xprint("%s(%p)\n", (sname != NULL) ? sname : "", sip); xprint(" %#08x %#08x\n", sip->subsystem, sip->order); xprint(" %p(%s)(%p)\n", sip->func, (funcname != NULL) ? funcname : "", sip->udata); #undef xprint } DB_SHOW_COMMAND_FLAGS(sysinit, db_show_sysinit, DB_CMD_MEMSAFE) { struct sysinit *sip; db_printf("SYSINIT vs Name(Ptr)\n"); db_printf(" Subsystem Order\n"); db_printf(" Function(Name)(Arg)\n"); - SLIST_FOREACH(sip, &sysinit_list, next) { + STAILQ_FOREACH(sip, &sysinit_list, next) { db_show_print_syinit(sip, true); if (db_pager_quit) break; } } #endif /* DDB */ diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index 12e346514d1a..62b9e6a02b3c 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -1,487 +1,487 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernel.h 8.3 (Berkeley) 1/21/94 */ #ifndef _SYS_KERNEL_H_ #define _SYS_KERNEL_H_ #include #ifdef _KERNEL /* for intrhook and sysinit linked list below */ #include /* for timestamping SYSINITs; other files may assume this is included here */ #include /* Global variables for the kernel. */ /* 1.1 */ extern char kernelname[MAXPATHLEN]; extern int tick; /* usec per tick (1000000 / hz) */ extern int hz; /* system clock's frequency */ extern int psratio; /* ratio: prof / stat */ extern int stathz; /* statistics clock's frequency */ extern int profhz; /* profiling clock's frequency */ extern int profprocs; /* number of process's profiling */ extern volatile int ticks; #endif /* _KERNEL */ /* * Enumerated types for known system startup interfaces. * * Startup occurs in ascending numeric order; the list entries are * sorted prior to attempting startup to guarantee order. Items * of the same level are arbitrated for order based on the 'order' * element. * * These numbers are arbitrary and are chosen ONLY for ordering; the * enumeration values are explicit rather than implicit to provide * for binary compatibility with inserted elements. * * The SI_SUB_LAST value must have the highest lexical value. */ enum sysinit_sub_id { SI_SUB_DUMMY = 0x0000000, /* not executed; for linker*/ SI_SUB_DONE = 0x0000001, /* processed*/ SI_SUB_TUNABLES = 0x0700000, /* establish tunable values */ SI_SUB_COPYRIGHT = 0x0800001, /* first use of console*/ SI_SUB_VM = 0x1000000, /* virtual memory system init */ SI_SUB_COUNTER = 0x1100000, /* counter(9) is initialized */ SI_SUB_KMEM = 0x1800000, /* kernel memory*/ SI_SUB_HYPERVISOR = 0x1A40000, /* * Hypervisor detection and * virtualization support * setup. */ SI_SUB_WITNESS = 0x1A80000, /* witness initialization */ SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */ SI_SUB_LOCK = 0x1B00000, /* various locks */ SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */ SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */ SI_SUB_KLD = 0x2000000, /* KLD and module setup */ SI_SUB_KHELP = 0x2080000, /* khelp modules */ SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/ SI_SUB_RACCT = 0x2110000, /* resource accounting */ SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */ SI_SUB_RANDOM = 0x2160000, /* random number generator */ SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */ SI_SUB_MAC_POLICY = 0x21C0000, /* TrustedBSD MAC policies */ SI_SUB_MAC_LATE = 0x21D0000, /* TrustedBSD MAC subsystem */ SI_SUB_VNET = 0x21E0000, /* vnet 0 */ SI_SUB_INTRINSIC = 0x2200000, /* proc 0*/ SI_SUB_VM_CONF = 0x2300000, /* config VM, set limits*/ SI_SUB_DDB_SERVICES = 0x2380000, /* capture, scripting, etc. */ SI_SUB_RUN_QUEUE = 0x2400000, /* set up run queue*/ SI_SUB_KTRACE = 0x2480000, /* ktrace */ SI_SUB_OPENSOLARIS = 0x2490000, /* OpenSolaris compatibility */ SI_SUB_AUDIT = 0x24C0000, /* audit */ SI_SUB_CREATE_INIT = 0x2500000, /* create init process*/ SI_SUB_SCHED_IDLE = 0x2600000, /* required idle procs */ SI_SUB_MBUF = 0x2700000, /* mbuf subsystem */ SI_SUB_INTR = 0x2800000, /* interrupt threads */ SI_SUB_TASKQ = 0x2880000, /* task queues */ SI_SUB_EPOCH = 0x2888000, /* epoch subsystem */ #ifdef EARLY_AP_STARTUP SI_SUB_SMP = 0x2900000, /* start the APs*/ #endif SI_SUB_SOFTINTR = 0x2A00000, /* start soft interrupt thread */ SI_SUB_DEVFS = 0x2F00000, /* devfs ready for devices */ SI_SUB_INIT_IF = 0x3000000, /* prep for net interfaces */ SI_SUB_NETGRAPH = 0x3010000, /* Let Netgraph initialize */ SI_SUB_DTRACE = 0x3020000, /* DTrace subsystem */ SI_SUB_DTRACE_PROVIDER = 0x3048000, /* DTrace providers */ SI_SUB_DTRACE_ANON = 0x308C000, /* DTrace anon enabling */ SI_SUB_DRIVERS = 0x3100000, /* Let Drivers initialize */ SI_SUB_CONFIGURE = 0x3800000, /* Configure devices */ SI_SUB_VFS = 0x4000000, /* virtual filesystem*/ SI_SUB_CLOCKS = 0x4800000, /* real time and stat clocks*/ SI_SUB_SYSV_SHM = 0x6400000, /* System V shared memory*/ SI_SUB_SYSV_SEM = 0x6800000, /* System V semaphores*/ SI_SUB_SYSV_MSG = 0x6C00000, /* System V message queues*/ SI_SUB_P1003_1B = 0x6E00000, /* P1003.1B realtime */ SI_SUB_PSEUDO = 0x7000000, /* pseudo devices*/ SI_SUB_EXEC = 0x7400000, /* execve() handlers */ SI_SUB_PROTO_BEGIN = 0x8000000, /* VNET initialization */ SI_SUB_PROTO_PFIL = 0x8100000, /* Initialize pfil before FWs */ SI_SUB_PROTO_IF = 0x8400000, /* interfaces*/ SI_SUB_PROTO_DOMAININIT = 0x8600000, /* domain registration system */ SI_SUB_PROTO_MC = 0x8700000, /* Multicast */ SI_SUB_PROTO_DOMAIN = 0x8800000, /* domains (address families?)*/ SI_SUB_PROTO_FIREWALL = 0x8806000, /* Firewalls */ SI_SUB_PROTO_IFATTACHDOMAIN = 0x8808000,/* domain dependent data init */ SI_SUB_PROTO_END = 0x8ffffff, /* VNET helper functions */ SI_SUB_KPROF = 0x9000000, /* kernel profiling*/ SI_SUB_KICK_SCHEDULER = 0xa000000, /* start the timeout events*/ SI_SUB_INT_CONFIG_HOOKS = 0xa800000, /* Interrupts enabled config */ SI_SUB_ROOT_CONF = 0xb000000, /* Find root devices */ SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/ SI_SUB_SYSCALLS = 0xd800000, /* register system calls */ SI_SUB_VNET_DONE = 0xdc00000, /* vnet registration complete */ SI_SUB_KTHREAD_INIT = 0xe000000, /* init process*/ SI_SUB_KTHREAD_PAGE = 0xe400000, /* pageout daemon*/ SI_SUB_KTHREAD_VM = 0xe800000, /* vm daemon*/ SI_SUB_KTHREAD_BUF = 0xea00000, /* buffer daemon*/ SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/ SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/ #ifndef EARLY_AP_STARTUP SI_SUB_SMP = 0xf000000, /* start the APs*/ #endif SI_SUB_RACCTD = 0xf100000, /* start racctd*/ SI_SUB_LAST = 0xfffffff /* final initialization */ }; /* * Some enumerated orders; "ANY" sorts last. */ enum sysinit_elem_order { SI_ORDER_FIRST = 0x0000000, /* first*/ SI_ORDER_SECOND = 0x0000001, /* second*/ SI_ORDER_THIRD = 0x0000002, /* third*/ SI_ORDER_FOURTH = 0x0000003, /* fourth*/ SI_ORDER_FIFTH = 0x0000004, /* fifth*/ SI_ORDER_SIXTH = 0x0000005, /* sixth*/ SI_ORDER_SEVENTH = 0x0000006, /* seventh*/ SI_ORDER_EIGHTH = 0x0000007, /* eighth*/ SI_ORDER_MIDDLE = 0x1000000, /* somewhere in the middle */ SI_ORDER_ANY = 0xfffffff /* last*/ }; /* * A system initialization call instance * * At the moment there is one instance of sysinit. We probably do not * want two which is why this code is if'd out, but we definitely want * to discern SYSINIT's which take non-constant data pointers and * SYSINIT's which take constant data pointers, * * The C_* macros take functions expecting const void * arguments * while the non-C_* macros take functions expecting just void * arguments. * * With -Wcast-qual on, the compiler issues warnings: * - if we pass non-const data or functions taking non-const data * to a C_* macro. * * - if we pass const data to the normal macros * * However, no warning is issued if we pass a function taking const data * through a normal non-const macro. This is ok because the function is * saying it won't modify the data so we don't care whether the data is * modifiable or not. */ typedef void (*sysinit_nfunc_t)(void *); typedef void (*sysinit_cfunc_t)(const void *); struct sysinit { enum sysinit_sub_id subsystem; /* subsystem identifier*/ enum sysinit_elem_order order; /* init order within subsystem*/ - SLIST_ENTRY(sysinit) next; /* singly-linked list */ + STAILQ_ENTRY(sysinit) next; /* singly-linked list */ sysinit_cfunc_t func; /* function */ const void *udata; /* multiplexer/argument */ }; /* * Default: no special processing * * The C_ version of SYSINIT is for data pointers to const * data ( and functions taking data pointers to const data ). * At the moment it is no different from SYSINIT and thus * still results in warnings. * * The casts are necessary to have the compiler produce the * correct warnings when -Wcast-qual is used. * */ #ifdef TSLOG struct sysinit_tslog { sysinit_cfunc_t func; const void * data; const char * name; }; static inline void sysinit_tslog_shim(const void * data) { const struct sysinit_tslog * x = data; TSRAW(curthread, TS_ENTER, "SYSINIT", x->name); (x->func)(x->data); TSRAW(curthread, TS_EXIT, "SYSINIT", x->name); } #define C_SYSINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit_tslog uniquifier ## _sys_init_tslog = { \ func, \ (ident), \ #uniquifier \ }; \ static struct sysinit uniquifier ## _sys_init = { \ subsystem, \ order, \ { NULL }, \ sysinit_tslog_shim, \ &uniquifier ## _sys_init_tslog \ }; \ DATA_WSET(sysinit_set,uniquifier ## _sys_init) #else #define C_SYSINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit uniquifier ## _sys_init = { \ subsystem, \ order, \ { NULL }, \ func, \ (ident) \ }; \ DATA_WSET(sysinit_set,uniquifier ## _sys_init) #endif #define SYSINIT(uniquifier, subsystem, order, func, ident) \ C_SYSINIT(uniquifier, subsystem, order, \ (sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident)) /* * Called on module unload: no special processing */ #define C_SYSUNINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit uniquifier ## _sys_uninit = { \ subsystem, \ order, \ { NULL }, \ func, \ (ident) \ }; \ DATA_WSET(sysuninit_set,uniquifier ## _sys_uninit) #define SYSUNINIT(uniquifier, subsystem, order, func, ident) \ C_SYSUNINIT(uniquifier, subsystem, order, \ (sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident)) void sysinit_add(struct sysinit **set, struct sysinit **set_end); #ifdef _KERNEL /* * Infrastructure for tunable 'constants'. Value may be specified at compile * time or kernel load time. Rules relating tunables together can be placed * in a SYSINIT function at SI_SUB_TUNABLES with SI_ORDER_ANY. * * WARNING: developers should never use the reserved suffixes specified in * loader.conf(5) for any tunables or conflicts will result. */ /* * int * please avoid using for new tunables! */ extern void tunable_int_init(void *); struct tunable_int { const char *path; int *var; }; #define TUNABLE_INT(path, var) \ static struct tunable_int __CONCAT(__tunable_int_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int_init, \ &__CONCAT(__tunable_int_, __LINE__)) #define TUNABLE_INT_FETCH(path, var) getenv_int((path), (var)) /* * long */ extern void tunable_long_init(void *); struct tunable_long { const char *path; long *var; }; #define TUNABLE_LONG(path, var) \ static struct tunable_long __CONCAT(__tunable_long_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_long_init,\ &__CONCAT(__tunable_long_, __LINE__)) #define TUNABLE_LONG_FETCH(path, var) getenv_long((path), (var)) /* * unsigned long */ extern void tunable_ulong_init(void *); struct tunable_ulong { const char *path; unsigned long *var; }; #define TUNABLE_ULONG(path, var) \ static struct tunable_ulong __CONCAT(__tunable_ulong_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_ulong_init, \ &__CONCAT(__tunable_ulong_, __LINE__)) #define TUNABLE_ULONG_FETCH(path, var) getenv_ulong((path), (var)) /* * int64_t */ extern void tunable_int64_init(void *); struct tunable_int64 { const char *path; int64_t *var; }; #define TUNABLE_INT64(path, var) \ static struct tunable_int64 __CONCAT(__tunable_int64_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int64_init, \ &__CONCAT(__tunable_int64_, __LINE__)) #define TUNABLE_INT64_FETCH(path, var) getenv_int64((path), (var)) /* * uint64_t */ extern void tunable_uint64_init(void *); struct tunable_uint64 { const char *path; uint64_t *var; }; #define TUNABLE_UINT64(path, var) \ static struct tunable_uint64 __CONCAT(__tunable_uint64_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_uint64_init, \ &__CONCAT(__tunable_uint64_, __LINE__)) #define TUNABLE_UINT64_FETCH(path, var) getenv_uint64((path), (var)) /* * quad */ extern void tunable_quad_init(void *); struct tunable_quad { const char *path; quad_t *var; }; #define TUNABLE_QUAD(path, var) \ static struct tunable_quad __CONCAT(__tunable_quad_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_quad_init, \ &__CONCAT(__tunable_quad_, __LINE__)) #define TUNABLE_QUAD_FETCH(path, var) getenv_quad((path), (var)) /* * bool */ extern void tunable_bool_init(void *); struct tunable_bool { const char *path; bool *var; }; #define TUNABLE_BOOL(path, var) \ static struct tunable_bool __CONCAT(__tunable_bool_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_bool_init, \ &__CONCAT(__tunable_bool_, __LINE__)) #define TUNABLE_BOOL_FETCH(path, var) getenv_bool((path), (var)) extern void tunable_str_init(void *); struct tunable_str { const char *path; char *var; int size; }; #define TUNABLE_STR(path, var, size) \ static struct tunable_str __CONCAT(__tunable_str_, __LINE__) = { \ (path), \ (var), \ (size), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_str_init, \ &__CONCAT(__tunable_str_, __LINE__)) #define TUNABLE_STR_FETCH(path, var, size) \ getenv_string((path), (var), (size)) #endif /* _KERNEL */ typedef void (*ich_func_t)(void *_arg); struct intr_config_hook { STAILQ_ENTRY(intr_config_hook) ich_links; uintptr_t ich_state; #define ICHS_QUEUED 0x1 #define ICHS_RUNNING 0x2 #define ICHS_DONE 0x3 ich_func_t ich_func; void *ich_arg; }; int config_intrhook_establish(struct intr_config_hook *hook); void config_intrhook_disestablish(struct intr_config_hook *hook); int config_intrhook_drain(struct intr_config_hook *hook); void config_intrhook_oneshot(ich_func_t _func, void *_arg); #endif /* !_SYS_KERNEL_H_*/