diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c index a1073512e856..2367bf35da76 100644 --- a/sys/kern/kern_linker.c +++ b/sys/kern/kern_linker.c @@ -1,2350 +1,2353 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1997-2000 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_ddb.h" #include "opt_kld.h" #include "opt_hwpmc_hooks.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include "linker_if.h" #ifdef HWPMC_HOOKS #include #endif #ifdef KLD_DEBUG int kld_debug = 0; SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RWTUN, &kld_debug, 0, "Set various levels of KLD debug"); #endif /* These variables are used by kernel debuggers to enumerate loaded files. */ const int kld_off_address = offsetof(struct linker_file, address); const int kld_off_filename = offsetof(struct linker_file, filename); const int kld_off_pathname = offsetof(struct linker_file, pathname); const int kld_off_next = offsetof(struct linker_file, link.tqe_next); /* * static char *linker_search_path(const char *name, struct mod_depend * *verinfo); */ static const char *linker_basename(const char *path); /* * Find a currently loaded file given its filename. */ static linker_file_t linker_find_file_by_name(const char* _filename); /* * Find a currently loaded file given its file id. */ static linker_file_t linker_find_file_by_id(int _fileid); /* Metadata from the static kernel */ SET_DECLARE(modmetadata_set, struct mod_metadata); MALLOC_DEFINE(M_LINKER, "linker", "kernel linker"); linker_file_t linker_kernel_file; static struct sx kld_sx; /* kernel linker lock */ static u_int kld_busy; static struct thread *kld_busy_owner; /* * Load counter used by clients to determine if a linker file has been * re-loaded. This counter is incremented for each file load. */ static int loadcnt; static linker_class_list_t classes; static linker_file_list_t linker_files; static int next_file_id = 1; static int linker_no_more_classes = 0; #define LINKER_GET_NEXT_FILE_ID(a) do { \ linker_file_t lftmp; \ \ if (!cold) \ sx_assert(&kld_sx, SA_XLOCKED); \ retry: \ TAILQ_FOREACH(lftmp, &linker_files, link) { \ if (next_file_id == lftmp->id) { \ next_file_id++; \ goto retry; \ } \ } \ (a) = next_file_id; \ } while (0) /* XXX wrong name; we're looking at version provision tags here, not modules */ typedef TAILQ_HEAD(, modlist) modlisthead_t; struct modlist { TAILQ_ENTRY(modlist) link; /* chain together all modules */ linker_file_t container; const char *name; int version; }; typedef struct modlist *modlist_t; static modlisthead_t found_modules; static int linker_file_add_dependency(linker_file_t file, linker_file_t dep); static caddr_t linker_file_lookup_symbol_internal(linker_file_t file, const char* name, int deps); static int linker_load_module(const char *kldname, const char *modname, struct linker_file *parent, const struct mod_depend *verinfo, struct linker_file **lfpp); static modlist_t modlist_lookup2(const char *name, const struct mod_depend *verinfo); static void linker_init(void *arg) { sx_init(&kld_sx, "kernel linker"); TAILQ_INIT(&classes); TAILQ_INIT(&linker_files); } SYSINIT(linker, SI_SUB_KLD, SI_ORDER_FIRST, linker_init, NULL); static void linker_stop_class_add(void *arg) { linker_no_more_classes = 1; } SYSINIT(linker_class, SI_SUB_KLD, SI_ORDER_ANY, linker_stop_class_add, NULL); int linker_add_class(linker_class_t lc) { /* * We disallow any class registration past SI_ORDER_ANY * of SI_SUB_KLD. We bump the reference count to keep the * ops from being freed. */ if (linker_no_more_classes == 1) return (EPERM); kobj_class_compile((kobj_class_t) lc); ((kobj_class_t)lc)->refs++; /* XXX: kobj_mtx */ TAILQ_INSERT_TAIL(&classes, lc, link); return (0); } static void linker_file_sysinit(linker_file_t lf) { struct sysinit **start, **stop, **sipp, **xipp, *save; int last; KLD_DPF(FILE, ("linker_file_sysinit: calling SYSINITs for %s\n", lf->filename)); sx_assert(&kld_sx, SA_XLOCKED); if (linker_file_lookup_set(lf, "sysinit_set", &start, &stop, NULL) != 0) return; /* * Perform a bubble sort of the system initialization objects by * their subsystem (primary key) and order (secondary key). * * Since some things care about execution order, this is the operation * which ensures continued function. */ for (sipp = start; sipp < stop; sipp++) { for (xipp = sipp + 1; xipp < stop; xipp++) { if ((*sipp)->subsystem < (*xipp)->subsystem || ((*sipp)->subsystem == (*xipp)->subsystem && (*sipp)->order <= (*xipp)->order)) continue; /* skip */ save = *sipp; *sipp = *xipp; *xipp = save; } } /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ last = SI_SUB_DUMMY; sx_xunlock(&kld_sx); mtx_lock(&Giant); for (sipp = start; sipp < stop; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s) */ if ((*sipp)->subsystem > last) BOOTTRACE("%s: sysinit 0x%7x", lf->filename, (*sipp)->subsystem); /* Call function */ (*((*sipp)->func)) ((*sipp)->udata); last = (*sipp)->subsystem; } mtx_unlock(&Giant); sx_xlock(&kld_sx); } static void linker_file_sysuninit(linker_file_t lf) { struct sysinit **start, **stop, **sipp, **xipp, *save; int last; KLD_DPF(FILE, ("linker_file_sysuninit: calling SYSUNINITs for %s\n", lf->filename)); sx_assert(&kld_sx, SA_XLOCKED); if (linker_file_lookup_set(lf, "sysuninit_set", &start, &stop, NULL) != 0) return; /* * Perform a reverse bubble sort of the system initialization objects * by their subsystem (primary key) and order (secondary key). * * Since some things care about execution order, this is the operation * which ensures continued function. */ for (sipp = start; sipp < stop; sipp++) { for (xipp = sipp + 1; xipp < stop; xipp++) { if ((*sipp)->subsystem > (*xipp)->subsystem || ((*sipp)->subsystem == (*xipp)->subsystem && (*sipp)->order >= (*xipp)->order)) continue; /* skip */ save = *sipp; *sipp = *xipp; *xipp = save; } } /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ sx_xunlock(&kld_sx); mtx_lock(&Giant); last = SI_SUB_DUMMY; for (sipp = start; sipp < stop; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s) */ if ((*sipp)->subsystem > last) BOOTTRACE("%s: sysuninit 0x%7x", lf->filename, (*sipp)->subsystem); /* Call function */ (*((*sipp)->func)) ((*sipp)->udata); last = (*sipp)->subsystem; } mtx_unlock(&Giant); sx_xlock(&kld_sx); } static void linker_file_register_sysctls(linker_file_t lf, bool enable) { struct sysctl_oid **start, **stop, **oidp; KLD_DPF(FILE, ("linker_file_register_sysctls: registering SYSCTLs for %s\n", lf->filename)); sx_assert(&kld_sx, SA_XLOCKED); if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0) return; sx_xunlock(&kld_sx); sysctl_wlock(); for (oidp = start; oidp < stop; oidp++) { if (enable) sysctl_register_oid(*oidp); else sysctl_register_disabled_oid(*oidp); } sysctl_wunlock(); sx_xlock(&kld_sx); } static void linker_file_enable_sysctls(linker_file_t lf) { struct sysctl_oid **start, **stop, **oidp; KLD_DPF(FILE, ("linker_file_enable_sysctls: enable SYSCTLs for %s\n", lf->filename)); sx_assert(&kld_sx, SA_XLOCKED); if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0) return; sx_xunlock(&kld_sx); sysctl_wlock(); for (oidp = start; oidp < stop; oidp++) sysctl_enable_oid(*oidp); sysctl_wunlock(); sx_xlock(&kld_sx); } static void linker_file_unregister_sysctls(linker_file_t lf) { struct sysctl_oid **start, **stop, **oidp; KLD_DPF(FILE, ("linker_file_unregister_sysctls: unregistering SYSCTLs" " for %s\n", lf->filename)); sx_assert(&kld_sx, SA_XLOCKED); if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0) return; sx_xunlock(&kld_sx); sysctl_wlock(); for (oidp = start; oidp < stop; oidp++) sysctl_unregister_oid(*oidp); sysctl_wunlock(); sx_xlock(&kld_sx); } static int linker_file_register_modules(linker_file_t lf) { struct mod_metadata **start, **stop, **mdp; const moduledata_t *moddata; int first_error, error; KLD_DPF(FILE, ("linker_file_register_modules: registering modules" " in %s\n", lf->filename)); sx_assert(&kld_sx, SA_XLOCKED); if (linker_file_lookup_set(lf, MDT_SETNAME, &start, &stop, NULL) != 0) { /* * This fallback should be unnecessary, but if we get booted * from boot2 instead of loader and we are missing our * metadata then we have to try the best we can. */ if (lf == linker_kernel_file) { start = SET_BEGIN(modmetadata_set); stop = SET_LIMIT(modmetadata_set); } else return (0); } first_error = 0; for (mdp = start; mdp < stop; mdp++) { if ((*mdp)->md_type != MDT_MODULE) continue; moddata = (*mdp)->md_data; KLD_DPF(FILE, ("Registering module %s in %s\n", moddata->name, lf->filename)); error = module_register(moddata, lf); if (error) { printf("Module %s failed to register: %d\n", moddata->name, error); if (first_error == 0) first_error = error; } } return (first_error); } static void linker_init_kernel_modules(void) { sx_xlock(&kld_sx); linker_file_register_modules(linker_kernel_file); sx_xunlock(&kld_sx); } SYSINIT(linker_kernel, SI_SUB_KLD, SI_ORDER_ANY, linker_init_kernel_modules, NULL); static int linker_load_file(const char *filename, linker_file_t *result) { linker_class_t lc; linker_file_t lf; int foundfile, error, modules; /* Refuse to load modules if securelevel raised */ if (prison0.pr_securelevel > 0) return (EPERM); sx_assert(&kld_sx, SA_XLOCKED); lf = linker_find_file_by_name(filename); if (lf) { KLD_DPF(FILE, ("linker_load_file: file %s is already loaded," " incrementing refs\n", filename)); *result = lf; lf->refs++; return (0); } foundfile = 0; error = 0; /* * We do not need to protect (lock) classes here because there is * no class registration past startup (SI_SUB_KLD, SI_ORDER_ANY) * and there is no class deregistration mechanism at this time. */ TAILQ_FOREACH(lc, &classes, link) { KLD_DPF(FILE, ("linker_load_file: trying to load %s\n", filename)); error = LINKER_LOAD_FILE(lc, filename, &lf); /* * If we got something other than ENOENT, then it exists but * we cannot load it for some other reason. */ if (error != ENOENT) foundfile = 1; if (lf) { error = linker_file_register_modules(lf); if (error == EEXIST) { linker_file_unload(lf, LINKER_UNLOAD_FORCE); return (error); } modules = !TAILQ_EMPTY(&lf->modules); linker_file_register_sysctls(lf, false); +#ifdef VIMAGE + LINKER_PROPAGATE_VNETS(lf); +#endif linker_file_sysinit(lf); lf->flags |= LINKER_FILE_LINKED; /* * If all of the modules in this file failed * to load, unload the file and return an * error of ENOEXEC. */ if (modules && TAILQ_EMPTY(&lf->modules)) { linker_file_unload(lf, LINKER_UNLOAD_FORCE); return (ENOEXEC); } linker_file_enable_sysctls(lf); EVENTHANDLER_INVOKE(kld_load, lf); *result = lf; return (0); } } /* * Less than ideal, but tells the user whether it failed to load or * the module was not found. */ if (foundfile) { /* * If the file type has not been recognized by the last try * printout a message before to fail. */ if (error == ENOSYS) printf("%s: %s - unsupported file type\n", __func__, filename); /* * Format not recognized or otherwise unloadable. * When loading a module that is statically built into * the kernel EEXIST percolates back up as the return * value. Preserve this so that apps like sysinstall * can recognize this special case and not post bogus * dialog boxes. */ if (error != EEXIST) error = ENOEXEC; } else error = ENOENT; /* Nothing found */ return (error); } int linker_reference_module(const char *modname, struct mod_depend *verinfo, linker_file_t *result) { modlist_t mod; int error; sx_xlock(&kld_sx); if ((mod = modlist_lookup2(modname, verinfo)) != NULL) { *result = mod->container; (*result)->refs++; sx_xunlock(&kld_sx); return (0); } error = linker_load_module(NULL, modname, NULL, verinfo, result); sx_xunlock(&kld_sx); return (error); } int linker_release_module(const char *modname, struct mod_depend *verinfo, linker_file_t lf) { modlist_t mod; int error; sx_xlock(&kld_sx); if (lf == NULL) { KASSERT(modname != NULL, ("linker_release_module: no file or name")); mod = modlist_lookup2(modname, verinfo); if (mod == NULL) { sx_xunlock(&kld_sx); return (ESRCH); } lf = mod->container; } else KASSERT(modname == NULL && verinfo == NULL, ("linker_release_module: both file and name")); error = linker_file_unload(lf, LINKER_UNLOAD_NORMAL); sx_xunlock(&kld_sx); return (error); } static linker_file_t linker_find_file_by_name(const char *filename) { linker_file_t lf; char *koname; koname = malloc(strlen(filename) + 4, M_LINKER, M_WAITOK); sprintf(koname, "%s.ko", filename); sx_assert(&kld_sx, SA_XLOCKED); TAILQ_FOREACH(lf, &linker_files, link) { if (strcmp(lf->filename, koname) == 0) break; if (strcmp(lf->filename, filename) == 0) break; } free(koname, M_LINKER); return (lf); } static linker_file_t linker_find_file_by_id(int fileid) { linker_file_t lf; sx_assert(&kld_sx, SA_XLOCKED); TAILQ_FOREACH(lf, &linker_files, link) if (lf->id == fileid && lf->flags & LINKER_FILE_LINKED) break; return (lf); } int linker_file_foreach(linker_predicate_t *predicate, void *context) { linker_file_t lf; int retval = 0; sx_xlock(&kld_sx); TAILQ_FOREACH(lf, &linker_files, link) { retval = predicate(lf, context); if (retval != 0) break; } sx_xunlock(&kld_sx); return (retval); } linker_file_t linker_make_file(const char *pathname, linker_class_t lc) { linker_file_t lf; const char *filename; if (!cold) sx_assert(&kld_sx, SA_XLOCKED); filename = linker_basename(pathname); KLD_DPF(FILE, ("linker_make_file: new file, filename='%s' for pathname='%s'\n", filename, pathname)); lf = (linker_file_t)kobj_create((kobj_class_t)lc, M_LINKER, M_WAITOK); if (lf == NULL) return (NULL); lf->ctors_addr = 0; lf->ctors_size = 0; lf->dtors_addr = 0; lf->dtors_size = 0; lf->refs = 1; lf->userrefs = 0; lf->flags = 0; lf->filename = strdup(filename, M_LINKER); lf->pathname = strdup(pathname, M_LINKER); LINKER_GET_NEXT_FILE_ID(lf->id); lf->ndeps = 0; lf->deps = NULL; lf->loadcnt = ++loadcnt; #ifdef __arm__ lf->exidx_addr = 0; lf->exidx_size = 0; #endif STAILQ_INIT(&lf->common); TAILQ_INIT(&lf->modules); TAILQ_INSERT_TAIL(&linker_files, lf, link); return (lf); } int linker_file_unload(linker_file_t file, int flags) { module_t mod, next; modlist_t ml, nextml; struct common_symbol *cp; int error, i; /* Refuse to unload modules if securelevel raised. */ if (prison0.pr_securelevel > 0) return (EPERM); sx_assert(&kld_sx, SA_XLOCKED); KLD_DPF(FILE, ("linker_file_unload: lf->refs=%d\n", file->refs)); /* Easy case of just dropping a reference. */ if (file->refs > 1) { file->refs--; return (0); } /* Give eventhandlers a chance to prevent the unload. */ error = 0; EVENTHANDLER_INVOKE(kld_unload_try, file, &error); if (error != 0) return (EBUSY); KLD_DPF(FILE, ("linker_file_unload: file is unloading," " informing modules\n")); /* * Quiesce all the modules to give them a chance to veto the unload. */ MOD_SLOCK; for (mod = TAILQ_FIRST(&file->modules); mod; mod = module_getfnext(mod)) { error = module_quiesce(mod); if (error != 0 && flags != LINKER_UNLOAD_FORCE) { KLD_DPF(FILE, ("linker_file_unload: module %s" " vetoed unload\n", module_getname(mod))); /* * XXX: Do we need to tell all the quiesced modules * that they can resume work now via a new module * event? */ MOD_SUNLOCK; return (error); } } MOD_SUNLOCK; /* * Inform any modules associated with this file that they are * being unloaded. */ MOD_XLOCK; for (mod = TAILQ_FIRST(&file->modules); mod; mod = next) { next = module_getfnext(mod); MOD_XUNLOCK; /* * Give the module a chance to veto the unload. */ if ((error = module_unload(mod)) != 0) { #ifdef KLD_DEBUG MOD_SLOCK; KLD_DPF(FILE, ("linker_file_unload: module %s" " failed unload\n", module_getname(mod))); MOD_SUNLOCK; #endif return (error); } MOD_XLOCK; module_release(mod); } MOD_XUNLOCK; TAILQ_FOREACH_SAFE(ml, &found_modules, link, nextml) { if (ml->container == file) { TAILQ_REMOVE(&found_modules, ml, link); free(ml, M_LINKER); } } /* * Don't try to run SYSUNINITs if we are unloaded due to a * link error. */ if (file->flags & LINKER_FILE_LINKED) { file->flags &= ~LINKER_FILE_LINKED; linker_file_unregister_sysctls(file); linker_file_sysuninit(file); } TAILQ_REMOVE(&linker_files, file, link); if (file->deps) { for (i = 0; i < file->ndeps; i++) linker_file_unload(file->deps[i], flags); free(file->deps, M_LINKER); file->deps = NULL; } while ((cp = STAILQ_FIRST(&file->common)) != NULL) { STAILQ_REMOVE_HEAD(&file->common, link); free(cp, M_LINKER); } LINKER_UNLOAD(file); EVENTHANDLER_INVOKE(kld_unload, file->filename, file->address, file->size); if (file->filename) { free(file->filename, M_LINKER); file->filename = NULL; } if (file->pathname) { free(file->pathname, M_LINKER); file->pathname = NULL; } kobj_delete((kobj_t) file, M_LINKER); return (0); } int linker_ctf_get(linker_file_t file, linker_ctf_t *lc) { return (LINKER_CTF_GET(file, lc)); } static int linker_file_add_dependency(linker_file_t file, linker_file_t dep) { linker_file_t *newdeps; sx_assert(&kld_sx, SA_XLOCKED); file->deps = realloc(file->deps, (file->ndeps + 1) * sizeof(*newdeps), M_LINKER, M_WAITOK | M_ZERO); file->deps[file->ndeps] = dep; file->ndeps++; KLD_DPF(FILE, ("linker_file_add_dependency:" " adding %s as dependency for %s\n", dep->filename, file->filename)); return (0); } /* * Locate a linker set and its contents. This is a helper function to avoid * linker_if.h exposure elsewhere. Note: firstp and lastp are really void **. * This function is used in this file so we can avoid having lots of (void **) * casts. */ int linker_file_lookup_set(linker_file_t file, const char *name, void *firstp, void *lastp, int *countp) { sx_assert(&kld_sx, SA_LOCKED); return (LINKER_LOOKUP_SET(file, name, firstp, lastp, countp)); } /* * List all functions in a file. */ int linker_file_function_listall(linker_file_t lf, linker_function_nameval_callback_t callback_func, void *arg) { return (LINKER_EACH_FUNCTION_NAMEVAL(lf, callback_func, arg)); } caddr_t linker_file_lookup_symbol(linker_file_t file, const char *name, int deps) { caddr_t sym; int locked; locked = sx_xlocked(&kld_sx); if (!locked) sx_xlock(&kld_sx); sym = linker_file_lookup_symbol_internal(file, name, deps); if (!locked) sx_xunlock(&kld_sx); return (sym); } static caddr_t linker_file_lookup_symbol_internal(linker_file_t file, const char *name, int deps) { c_linker_sym_t sym; linker_symval_t symval; caddr_t address; size_t common_size = 0; int i; sx_assert(&kld_sx, SA_XLOCKED); KLD_DPF(SYM, ("linker_file_lookup_symbol: file=%p, name=%s, deps=%d\n", file, name, deps)); if (LINKER_LOOKUP_SYMBOL(file, name, &sym) == 0) { LINKER_SYMBOL_VALUES(file, sym, &symval); if (symval.value == 0) /* * For commons, first look them up in the * dependencies and only allocate space if not found * there. */ common_size = symval.size; else { KLD_DPF(SYM, ("linker_file_lookup_symbol: symbol" ".value=%p\n", symval.value)); return (symval.value); } } if (deps) { for (i = 0; i < file->ndeps; i++) { address = linker_file_lookup_symbol_internal( file->deps[i], name, 0); if (address) { KLD_DPF(SYM, ("linker_file_lookup_symbol:" " deps value=%p\n", address)); return (address); } } } if (common_size > 0) { /* * This is a common symbol which was not found in the * dependencies. We maintain a simple common symbol table in * the file object. */ struct common_symbol *cp; STAILQ_FOREACH(cp, &file->common, link) { if (strcmp(cp->name, name) == 0) { KLD_DPF(SYM, ("linker_file_lookup_symbol:" " old common value=%p\n", cp->address)); return (cp->address); } } /* * Round the symbol size up to align. */ common_size = (common_size + sizeof(int) - 1) & -sizeof(int); cp = malloc(sizeof(struct common_symbol) + common_size + strlen(name) + 1, M_LINKER, M_WAITOK | M_ZERO); cp->address = (caddr_t)(cp + 1); cp->name = cp->address + common_size; strcpy(cp->name, name); bzero(cp->address, common_size); STAILQ_INSERT_TAIL(&file->common, cp, link); KLD_DPF(SYM, ("linker_file_lookup_symbol: new common" " value=%p\n", cp->address)); return (cp->address); } KLD_DPF(SYM, ("linker_file_lookup_symbol: fail\n")); return (0); } /* * Both DDB and stack(9) rely on the kernel linker to provide forward and * backward lookup of symbols. However, DDB and sometimes stack(9) need to * do this in a lockfree manner. We provide a set of internal helper * routines to perform these operations without locks, and then wrappers that * optionally lock. * * linker_debug_lookup() is ifdef DDB as currently it's only used by DDB. */ #ifdef DDB static int linker_debug_lookup(const char *symstr, c_linker_sym_t *sym) { linker_file_t lf; TAILQ_FOREACH(lf, &linker_files, link) { if (LINKER_LOOKUP_DEBUG_SYMBOL(lf, symstr, sym) == 0) return (0); } return (ENOENT); } #endif static int linker_debug_search_symbol(caddr_t value, c_linker_sym_t *sym, long *diffp) { linker_file_t lf; c_linker_sym_t best, es; u_long diff, bestdiff, off; best = 0; off = (uintptr_t)value; bestdiff = off; TAILQ_FOREACH(lf, &linker_files, link) { if (LINKER_SEARCH_SYMBOL(lf, value, &es, &diff) != 0) continue; if (es != 0 && diff < bestdiff) { best = es; bestdiff = diff; } if (bestdiff == 0) break; } if (best) { *sym = best; *diffp = bestdiff; return (0); } else { *sym = 0; *diffp = off; return (ENOENT); } } static int linker_debug_symbol_values(c_linker_sym_t sym, linker_symval_t *symval) { linker_file_t lf; TAILQ_FOREACH(lf, &linker_files, link) { if (LINKER_DEBUG_SYMBOL_VALUES(lf, sym, symval) == 0) return (0); } return (ENOENT); } static int linker_debug_search_symbol_name(caddr_t value, char *buf, u_int buflen, long *offset) { linker_symval_t symval; c_linker_sym_t sym; int error; *offset = 0; error = linker_debug_search_symbol(value, &sym, offset); if (error) return (error); error = linker_debug_symbol_values(sym, &symval); if (error) return (error); strlcpy(buf, symval.name, buflen); return (0); } /* * DDB Helpers. DDB has to look across multiple files with their own symbol * tables and string tables. * * Note that we do not obey list locking protocols here. We really don't need * DDB to hang because somebody's got the lock held. We'll take the chance * that the files list is inconsistent instead. */ #ifdef DDB int linker_ddb_lookup(const char *symstr, c_linker_sym_t *sym) { return (linker_debug_lookup(symstr, sym)); } #endif int linker_ddb_search_symbol(caddr_t value, c_linker_sym_t *sym, long *diffp) { return (linker_debug_search_symbol(value, sym, diffp)); } int linker_ddb_symbol_values(c_linker_sym_t sym, linker_symval_t *symval) { return (linker_debug_symbol_values(sym, symval)); } int linker_ddb_search_symbol_name(caddr_t value, char *buf, u_int buflen, long *offset) { return (linker_debug_search_symbol_name(value, buf, buflen, offset)); } /* * stack(9) helper for non-debugging environemnts. Unlike DDB helpers, we do * obey locking protocols, and offer a significantly less complex interface. */ int linker_search_symbol_name_flags(caddr_t value, char *buf, u_int buflen, long *offset, int flags) { int error; KASSERT((flags & (M_NOWAIT | M_WAITOK)) != 0 && (flags & (M_NOWAIT | M_WAITOK)) != (M_NOWAIT | M_WAITOK), ("%s: bad flags: 0x%x", __func__, flags)); if (flags & M_NOWAIT) { if (!sx_try_slock(&kld_sx)) return (EWOULDBLOCK); } else sx_slock(&kld_sx); error = linker_debug_search_symbol_name(value, buf, buflen, offset); sx_sunlock(&kld_sx); return (error); } int linker_search_symbol_name(caddr_t value, char *buf, u_int buflen, long *offset) { return (linker_search_symbol_name_flags(value, buf, buflen, offset, M_WAITOK)); } int linker_kldload_busy(int flags) { int error; MPASS((flags & ~(LINKER_UB_UNLOCK | LINKER_UB_LOCKED | LINKER_UB_PCATCH)) == 0); if ((flags & LINKER_UB_LOCKED) != 0) sx_assert(&kld_sx, SA_XLOCKED); if ((flags & LINKER_UB_LOCKED) == 0) sx_xlock(&kld_sx); while (kld_busy > 0) { if (kld_busy_owner == curthread) break; error = sx_sleep(&kld_busy, &kld_sx, (flags & LINKER_UB_PCATCH) != 0 ? PCATCH : 0, "kldbusy", 0); if (error != 0) { if ((flags & LINKER_UB_UNLOCK) != 0) sx_xunlock(&kld_sx); return (error); } } kld_busy++; kld_busy_owner = curthread; if ((flags & LINKER_UB_UNLOCK) != 0) sx_xunlock(&kld_sx); return (0); } void linker_kldload_unbusy(int flags) { MPASS((flags & ~LINKER_UB_LOCKED) == 0); if ((flags & LINKER_UB_LOCKED) != 0) sx_assert(&kld_sx, SA_XLOCKED); if ((flags & LINKER_UB_LOCKED) == 0) sx_xlock(&kld_sx); MPASS(kld_busy > 0); if (kld_busy_owner != curthread) panic("linker_kldload_unbusy done by not owning thread %p", kld_busy_owner); kld_busy--; if (kld_busy == 0) { kld_busy_owner = NULL; wakeup(&kld_busy); } sx_xunlock(&kld_sx); } /* * Syscalls. */ int kern_kldload(struct thread *td, const char *file, int *fileid) { const char *kldname, *modname; linker_file_t lf; int error; if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); if ((error = priv_check(td, PRIV_KLD_LOAD)) != 0) return (error); /* * If file does not contain a qualified name or any dot in it * (kldname.ko, or kldname.ver.ko) treat it as an interface * name. */ if (strchr(file, '/') || strchr(file, '.')) { kldname = file; modname = NULL; } else { kldname = NULL; modname = file; } error = linker_kldload_busy(LINKER_UB_PCATCH); if (error != 0) { sx_xunlock(&kld_sx); return (error); } /* * It is possible that kldloaded module will attach a new ifnet, * so vnet context must be set when this ocurs. */ CURVNET_SET(TD_TO_VNET(td)); error = linker_load_module(kldname, modname, NULL, NULL, &lf); CURVNET_RESTORE(); if (error == 0) { lf->userrefs++; if (fileid != NULL) *fileid = lf->id; } linker_kldload_unbusy(LINKER_UB_LOCKED); return (error); } int sys_kldload(struct thread *td, struct kldload_args *uap) { char *pathname = NULL; int error, fileid; td->td_retval[0] = -1; pathname = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); error = copyinstr(uap->file, pathname, MAXPATHLEN, NULL); if (error == 0) { error = kern_kldload(td, pathname, &fileid); if (error == 0) td->td_retval[0] = fileid; } free(pathname, M_TEMP); return (error); } int kern_kldunload(struct thread *td, int fileid, int flags) { linker_file_t lf; int error = 0; if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); if ((error = priv_check(td, PRIV_KLD_UNLOAD)) != 0) return (error); error = linker_kldload_busy(LINKER_UB_PCATCH); if (error != 0) { sx_xunlock(&kld_sx); return (error); } CURVNET_SET(TD_TO_VNET(td)); lf = linker_find_file_by_id(fileid); if (lf) { KLD_DPF(FILE, ("kldunload: lf->userrefs=%d\n", lf->userrefs)); if (lf->userrefs == 0) { /* * XXX: maybe LINKER_UNLOAD_FORCE should override ? */ printf("kldunload: attempt to unload file that was" " loaded by the kernel\n"); error = EBUSY; } else { lf->userrefs--; error = linker_file_unload(lf, flags); if (error) lf->userrefs++; } } else error = ENOENT; CURVNET_RESTORE(); linker_kldload_unbusy(LINKER_UB_LOCKED); return (error); } int sys_kldunload(struct thread *td, struct kldunload_args *uap) { return (kern_kldunload(td, uap->fileid, LINKER_UNLOAD_NORMAL)); } int sys_kldunloadf(struct thread *td, struct kldunloadf_args *uap) { if (uap->flags != LINKER_UNLOAD_NORMAL && uap->flags != LINKER_UNLOAD_FORCE) return (EINVAL); return (kern_kldunload(td, uap->fileid, uap->flags)); } int sys_kldfind(struct thread *td, struct kldfind_args *uap) { char *pathname; const char *filename; linker_file_t lf; int error; #ifdef MAC error = mac_kld_check_stat(td->td_ucred); if (error) return (error); #endif td->td_retval[0] = -1; pathname = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); if ((error = copyinstr(uap->file, pathname, MAXPATHLEN, NULL)) != 0) goto out; filename = linker_basename(pathname); sx_xlock(&kld_sx); lf = linker_find_file_by_name(filename); if (lf) td->td_retval[0] = lf->id; else error = ENOENT; sx_xunlock(&kld_sx); out: free(pathname, M_TEMP); return (error); } int sys_kldnext(struct thread *td, struct kldnext_args *uap) { linker_file_t lf; int error = 0; #ifdef MAC error = mac_kld_check_stat(td->td_ucred); if (error) return (error); #endif sx_xlock(&kld_sx); if (uap->fileid == 0) lf = TAILQ_FIRST(&linker_files); else { lf = linker_find_file_by_id(uap->fileid); if (lf == NULL) { error = ENOENT; goto out; } lf = TAILQ_NEXT(lf, link); } /* Skip partially loaded files. */ while (lf != NULL && !(lf->flags & LINKER_FILE_LINKED)) lf = TAILQ_NEXT(lf, link); if (lf) td->td_retval[0] = lf->id; else td->td_retval[0] = 0; out: sx_xunlock(&kld_sx); return (error); } int sys_kldstat(struct thread *td, struct kldstat_args *uap) { struct kld_file_stat *stat; int error, version; /* * Check the version of the user's structure. */ if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) return (error); if (version != sizeof(struct kld_file_stat_1) && version != sizeof(struct kld_file_stat)) return (EINVAL); stat = malloc(sizeof(*stat), M_TEMP, M_WAITOK | M_ZERO); error = kern_kldstat(td, uap->fileid, stat); if (error == 0) error = copyout(stat, uap->stat, version); free(stat, M_TEMP); return (error); } int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat) { linker_file_t lf; int namelen; #ifdef MAC int error; error = mac_kld_check_stat(td->td_ucred); if (error) return (error); #endif sx_xlock(&kld_sx); lf = linker_find_file_by_id(fileid); if (lf == NULL) { sx_xunlock(&kld_sx); return (ENOENT); } /* Version 1 fields: */ namelen = strlen(lf->filename) + 1; if (namelen > sizeof(stat->name)) namelen = sizeof(stat->name); bcopy(lf->filename, &stat->name[0], namelen); stat->refs = lf->refs; stat->id = lf->id; stat->address = lf->address; stat->size = lf->size; /* Version 2 fields: */ namelen = strlen(lf->pathname) + 1; if (namelen > sizeof(stat->pathname)) namelen = sizeof(stat->pathname); bcopy(lf->pathname, &stat->pathname[0], namelen); sx_xunlock(&kld_sx); td->td_retval[0] = 0; return (0); } #ifdef DDB DB_COMMAND_FLAGS(kldstat, db_kldstat, DB_CMD_MEMSAFE) { linker_file_t lf; #define POINTER_WIDTH ((int)(sizeof(void *) * 2 + 2)) db_printf("Id Refs Address%*c Size Name\n", POINTER_WIDTH - 7, ' '); #undef POINTER_WIDTH TAILQ_FOREACH(lf, &linker_files, link) { if (db_pager_quit) return; db_printf("%2d %4d %p %-8zx %s\n", lf->id, lf->refs, lf->address, lf->size, lf->filename); } } #endif /* DDB */ int sys_kldfirstmod(struct thread *td, struct kldfirstmod_args *uap) { linker_file_t lf; module_t mp; int error = 0; #ifdef MAC error = mac_kld_check_stat(td->td_ucred); if (error) return (error); #endif sx_xlock(&kld_sx); lf = linker_find_file_by_id(uap->fileid); if (lf) { MOD_SLOCK; mp = TAILQ_FIRST(&lf->modules); if (mp != NULL) td->td_retval[0] = module_getid(mp); else td->td_retval[0] = 0; MOD_SUNLOCK; } else error = ENOENT; sx_xunlock(&kld_sx); return (error); } int sys_kldsym(struct thread *td, struct kldsym_args *uap) { char *symstr = NULL; c_linker_sym_t sym; linker_symval_t symval; linker_file_t lf; struct kld_sym_lookup lookup; int error = 0; #ifdef MAC error = mac_kld_check_stat(td->td_ucred); if (error) return (error); #endif if ((error = copyin(uap->data, &lookup, sizeof(lookup))) != 0) return (error); if (lookup.version != sizeof(lookup) || uap->cmd != KLDSYM_LOOKUP) return (EINVAL); symstr = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); if ((error = copyinstr(lookup.symname, symstr, MAXPATHLEN, NULL)) != 0) goto out; sx_xlock(&kld_sx); if (uap->fileid != 0) { lf = linker_find_file_by_id(uap->fileid); if (lf == NULL) error = ENOENT; else if (LINKER_LOOKUP_SYMBOL(lf, symstr, &sym) == 0 && LINKER_SYMBOL_VALUES(lf, sym, &symval) == 0) { lookup.symvalue = (uintptr_t) symval.value; lookup.symsize = symval.size; error = copyout(&lookup, uap->data, sizeof(lookup)); } else error = ENOENT; } else { TAILQ_FOREACH(lf, &linker_files, link) { if (LINKER_LOOKUP_SYMBOL(lf, symstr, &sym) == 0 && LINKER_SYMBOL_VALUES(lf, sym, &symval) == 0) { lookup.symvalue = (uintptr_t)symval.value; lookup.symsize = symval.size; error = copyout(&lookup, uap->data, sizeof(lookup)); break; } } if (lf == NULL) error = ENOENT; } sx_xunlock(&kld_sx); out: free(symstr, M_TEMP); return (error); } /* * Preloaded module support */ static modlist_t modlist_lookup(const char *name, int ver) { modlist_t mod; TAILQ_FOREACH(mod, &found_modules, link) { if (strcmp(mod->name, name) == 0 && (ver == 0 || mod->version == ver)) return (mod); } return (NULL); } static modlist_t modlist_lookup2(const char *name, const struct mod_depend *verinfo) { modlist_t mod, bestmod; int ver; if (verinfo == NULL) return (modlist_lookup(name, 0)); bestmod = NULL; TAILQ_FOREACH(mod, &found_modules, link) { if (strcmp(mod->name, name) != 0) continue; ver = mod->version; if (ver == verinfo->md_ver_preferred) return (mod); if (ver >= verinfo->md_ver_minimum && ver <= verinfo->md_ver_maximum && (bestmod == NULL || ver > bestmod->version)) bestmod = mod; } return (bestmod); } static modlist_t modlist_newmodule(const char *modname, int version, linker_file_t container) { modlist_t mod; mod = malloc(sizeof(struct modlist), M_LINKER, M_NOWAIT | M_ZERO); if (mod == NULL) panic("no memory for module list"); mod->container = container; mod->name = modname; mod->version = version; TAILQ_INSERT_TAIL(&found_modules, mod, link); return (mod); } static void linker_addmodules(linker_file_t lf, struct mod_metadata **start, struct mod_metadata **stop, int preload) { struct mod_metadata *mp, **mdp; const char *modname; int ver; for (mdp = start; mdp < stop; mdp++) { mp = *mdp; if (mp->md_type != MDT_VERSION) continue; modname = mp->md_cval; ver = ((const struct mod_version *)mp->md_data)->mv_version; if (modlist_lookup(modname, ver) != NULL) { printf("module %s already present!\n", modname); /* XXX what can we do? this is a build error. :-( */ continue; } modlist_newmodule(modname, ver, lf); } } static void linker_preload(void *arg) { caddr_t modptr; const char *modname, *nmodname; char *modtype; linker_file_t lf, nlf; linker_class_t lc; int error; linker_file_list_t loaded_files; linker_file_list_t depended_files; struct mod_metadata *mp, *nmp; struct mod_metadata **start, **stop, **mdp, **nmdp; const struct mod_depend *verinfo; int nver; int resolves; modlist_t mod; struct sysinit **si_start, **si_stop; TAILQ_INIT(&loaded_files); TAILQ_INIT(&depended_files); TAILQ_INIT(&found_modules); error = 0; modptr = NULL; sx_xlock(&kld_sx); while ((modptr = preload_search_next_name(modptr)) != NULL) { modname = (char *)preload_search_info(modptr, MODINFO_NAME); modtype = (char *)preload_search_info(modptr, MODINFO_TYPE); if (modname == NULL) { printf("Preloaded module at %p does not have a" " name!\n", modptr); continue; } if (modtype == NULL) { printf("Preloaded module at %p does not have a type!\n", modptr); continue; } if (bootverbose) printf("Preloaded %s \"%s\" at %p.\n", modtype, modname, modptr); lf = NULL; TAILQ_FOREACH(lc, &classes, link) { error = LINKER_LINK_PRELOAD(lc, modname, &lf); if (!error) break; lf = NULL; } if (lf) TAILQ_INSERT_TAIL(&loaded_files, lf, loaded); } /* * First get a list of stuff in the kernel. */ if (linker_file_lookup_set(linker_kernel_file, MDT_SETNAME, &start, &stop, NULL) == 0) linker_addmodules(linker_kernel_file, start, stop, 1); /* * This is a once-off kinky bubble sort to resolve relocation * dependency requirements. */ restart: TAILQ_FOREACH(lf, &loaded_files, loaded) { error = linker_file_lookup_set(lf, MDT_SETNAME, &start, &stop, NULL); /* * First, look to see if we would successfully link with this * stuff. */ resolves = 1; /* unless we know otherwise */ if (!error) { for (mdp = start; mdp < stop; mdp++) { mp = *mdp; if (mp->md_type != MDT_DEPEND) continue; modname = mp->md_cval; verinfo = mp->md_data; for (nmdp = start; nmdp < stop; nmdp++) { nmp = *nmdp; if (nmp->md_type != MDT_VERSION) continue; nmodname = nmp->md_cval; if (strcmp(modname, nmodname) == 0) break; } if (nmdp < stop) /* it's a self reference */ continue; /* * ok, the module isn't here yet, we * are not finished */ if (modlist_lookup2(modname, verinfo) == NULL) resolves = 0; } } /* * OK, if we found our modules, we can link. So, "provide" * the modules inside and add it to the end of the link order * list. */ if (resolves) { if (!error) { for (mdp = start; mdp < stop; mdp++) { mp = *mdp; if (mp->md_type != MDT_VERSION) continue; modname = mp->md_cval; nver = ((const struct mod_version *) mp->md_data)->mv_version; if (modlist_lookup(modname, nver) != NULL) { printf("module %s already" " present!\n", modname); TAILQ_REMOVE(&loaded_files, lf, loaded); linker_file_unload(lf, LINKER_UNLOAD_FORCE); /* we changed tailq next ptr */ goto restart; } modlist_newmodule(modname, nver, lf); } } TAILQ_REMOVE(&loaded_files, lf, loaded); TAILQ_INSERT_TAIL(&depended_files, lf, loaded); /* * Since we provided modules, we need to restart the * sort so that the previous files that depend on us * have a chance. Also, we've busted the tailq next * pointer with the REMOVE. */ goto restart; } } /* * At this point, we check to see what could not be resolved.. */ while ((lf = TAILQ_FIRST(&loaded_files)) != NULL) { TAILQ_REMOVE(&loaded_files, lf, loaded); printf("KLD file %s is missing dependencies\n", lf->filename); linker_file_unload(lf, LINKER_UNLOAD_FORCE); } /* * We made it. Finish off the linking in the order we determined. */ TAILQ_FOREACH_SAFE(lf, &depended_files, loaded, nlf) { if (linker_kernel_file) { linker_kernel_file->refs++; error = linker_file_add_dependency(lf, linker_kernel_file); if (error) panic("cannot add dependency"); } error = linker_file_lookup_set(lf, MDT_SETNAME, &start, &stop, NULL); if (!error) { for (mdp = start; mdp < stop; mdp++) { mp = *mdp; if (mp->md_type != MDT_DEPEND) continue; modname = mp->md_cval; verinfo = mp->md_data; mod = modlist_lookup2(modname, verinfo); if (mod == NULL) { printf("KLD file %s - cannot find " "dependency \"%s\"\n", lf->filename, modname); goto fail; } /* Don't count self-dependencies */ if (lf == mod->container) continue; mod->container->refs++; error = linker_file_add_dependency(lf, mod->container); if (error) panic("cannot add dependency"); } } /* * Now do relocation etc using the symbol search paths * established by the dependencies */ error = LINKER_LINK_PRELOAD_FINISH(lf); if (error) { printf("KLD file %s - could not finalize loading\n", lf->filename); goto fail; } linker_file_register_modules(lf); if (!TAILQ_EMPTY(&lf->modules)) lf->flags |= LINKER_FILE_MODULES; if (linker_file_lookup_set(lf, "sysinit_set", &si_start, &si_stop, NULL) == 0) sysinit_add(si_start, si_stop); linker_file_register_sysctls(lf, true); lf->flags |= LINKER_FILE_LINKED; continue; fail: TAILQ_REMOVE(&depended_files, lf, loaded); linker_file_unload(lf, LINKER_UNLOAD_FORCE); } sx_xunlock(&kld_sx); /* woohoo! we made it! */ } SYSINIT(preload, SI_SUB_KLD, SI_ORDER_MIDDLE, linker_preload, NULL); /* * Handle preload files that failed to load any modules. */ static void linker_preload_finish(void *arg) { linker_file_t lf, nlf; sx_xlock(&kld_sx); TAILQ_FOREACH_SAFE(lf, &linker_files, link, nlf) { /* * If all of the modules in this file failed to load, unload * the file and return an error of ENOEXEC. (Parity with * linker_load_file.) */ if ((lf->flags & LINKER_FILE_MODULES) != 0 && TAILQ_EMPTY(&lf->modules)) { linker_file_unload(lf, LINKER_UNLOAD_FORCE); continue; } lf->flags &= ~LINKER_FILE_MODULES; lf->userrefs++; /* so we can (try to) kldunload it */ } sx_xunlock(&kld_sx); } /* * Attempt to run after all DECLARE_MODULE SYSINITs. Unfortunately they can be * scheduled at any subsystem and order, so run this as late as possible. init * becomes runnable in SI_SUB_KTHREAD_INIT, so go slightly before that. */ SYSINIT(preload_finish, SI_SUB_KTHREAD_INIT - 100, SI_ORDER_MIDDLE, linker_preload_finish, NULL); /* * Search for a not-loaded module by name. * * Modules may be found in the following locations: * * - preloaded (result is just the module name) - on disk (result is full path * to module) * * If the module name is qualified in any way (contains path, etc.) the we * simply return a copy of it. * * The search path can be manipulated via sysctl. Note that we use the ';' * character as a separator to be consistent with the bootloader. */ static char linker_hintfile[] = "linker.hints"; static char linker_path[MAXPATHLEN] = "/boot/kernel;/boot/modules"; SYSCTL_STRING(_kern, OID_AUTO, module_path, CTLFLAG_RWTUN, linker_path, sizeof(linker_path), "module load search path"); TUNABLE_STR("module_path", linker_path, sizeof(linker_path)); static const char * const linker_ext_list[] = { "", ".ko", NULL }; /* * Check if file actually exists either with or without extension listed in * the linker_ext_list. (probably should be generic for the rest of the * kernel) */ static char * linker_lookup_file(const char *path, int pathlen, const char *name, int namelen, struct vattr *vap) { struct nameidata nd; struct thread *td = curthread; /* XXX */ const char * const *cpp, *sep; char *result; int error, len, extlen, reclen, flags; __enum_uint8(vtype) type; extlen = 0; for (cpp = linker_ext_list; *cpp; cpp++) { len = strlen(*cpp); if (len > extlen) extlen = len; } extlen++; /* trailing '\0' */ sep = (path[pathlen - 1] != '/') ? "/" : ""; reclen = pathlen + strlen(sep) + namelen + extlen + 1; result = malloc(reclen, M_LINKER, M_WAITOK); for (cpp = linker_ext_list; *cpp; cpp++) { snprintf(result, reclen, "%.*s%s%.*s%s", pathlen, path, sep, namelen, name, *cpp); /* * Attempt to open the file, and return the path if * we succeed and it's a regular file. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, result); flags = FREAD; error = vn_open(&nd, &flags, 0, NULL); if (error == 0) { NDFREE_PNBUF(&nd); type = nd.ni_vp->v_type; if (vap) VOP_GETATTR(nd.ni_vp, vap, td->td_ucred); VOP_UNLOCK(nd.ni_vp); vn_close(nd.ni_vp, FREAD, td->td_ucred, td); if (type == VREG) return (result); } } free(result, M_LINKER); return (NULL); } #define INT_ALIGN(base, ptr) ptr = \ (base) + roundup2((ptr) - (base), sizeof(int)) /* * Lookup KLD which contains requested module in the "linker.hints" file. If * version specification is available, then try to find the best KLD. * Otherwise just find the latest one. */ static char * linker_hints_lookup(const char *path, int pathlen, const char *modname, int modnamelen, const struct mod_depend *verinfo) { struct thread *td = curthread; /* XXX */ struct ucred *cred = td ? td->td_ucred : NULL; struct nameidata nd; struct vattr vattr, mattr; const char *best, *sep; u_char *hints = NULL; u_char *cp, *recptr, *bufend, *result, *pathbuf; int error, ival, bestver, *intp, found, flags, clen, blen; ssize_t reclen; result = NULL; bestver = found = 0; sep = (path[pathlen - 1] != '/') ? "/" : ""; reclen = imax(modnamelen, strlen(linker_hintfile)) + pathlen + strlen(sep) + 1; pathbuf = malloc(reclen, M_LINKER, M_WAITOK); snprintf(pathbuf, reclen, "%.*s%s%s", pathlen, path, sep, linker_hintfile); NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf); flags = FREAD; error = vn_open(&nd, &flags, 0, NULL); if (error) goto bad; NDFREE_PNBUF(&nd); if (nd.ni_vp->v_type != VREG) goto bad; best = cp = NULL; error = VOP_GETATTR(nd.ni_vp, &vattr, cred); if (error) goto bad; /* * XXX: we need to limit this number to some reasonable value */ if (vattr.va_size > LINKER_HINTS_MAX) { printf("linker.hints file too large %ld\n", (long)vattr.va_size); goto bad; } hints = malloc(vattr.va_size, M_TEMP, M_WAITOK); error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)hints, vattr.va_size, 0, UIO_SYSSPACE, IO_NODELOCKED, cred, NOCRED, &reclen, td); if (error) goto bad; VOP_UNLOCK(nd.ni_vp); vn_close(nd.ni_vp, FREAD, cred, td); nd.ni_vp = NULL; if (reclen != 0) { printf("can't read %zd\n", reclen); goto bad; } intp = (int *)hints; ival = *intp++; if (ival != LINKER_HINTS_VERSION) { printf("linker.hints file version mismatch %d\n", ival); goto bad; } bufend = hints + vattr.va_size; recptr = (u_char *)intp; clen = blen = 0; while (recptr < bufend && !found) { intp = (int *)recptr; reclen = *intp++; ival = *intp++; cp = (char *)intp; switch (ival) { case MDT_VERSION: clen = *cp++; if (clen != modnamelen || bcmp(cp, modname, clen) != 0) break; cp += clen; INT_ALIGN(hints, cp); ival = *(int *)cp; cp += sizeof(int); clen = *cp++; if (verinfo == NULL || ival == verinfo->md_ver_preferred) { found = 1; break; } if (ival >= verinfo->md_ver_minimum && ival <= verinfo->md_ver_maximum && ival > bestver) { bestver = ival; best = cp; blen = clen; } break; default: break; } recptr += reclen + sizeof(int); } /* * Finally check if KLD is in the place */ if (found) result = linker_lookup_file(path, pathlen, cp, clen, &mattr); else if (best) result = linker_lookup_file(path, pathlen, best, blen, &mattr); /* * KLD is newer than hints file. What we should do now? */ if (result && timespeccmp(&mattr.va_mtime, &vattr.va_mtime, >)) printf("warning: KLD '%s' is newer than the linker.hints" " file\n", result); bad: free(pathbuf, M_LINKER); if (hints) free(hints, M_TEMP); if (nd.ni_vp != NULL) { VOP_UNLOCK(nd.ni_vp); vn_close(nd.ni_vp, FREAD, cred, td); } /* * If nothing found or hints is absent - fallback to the old * way by using "kldname[.ko]" as module name. */ if (!found && !bestver && result == NULL) result = linker_lookup_file(path, pathlen, modname, modnamelen, NULL); return (result); } /* * Lookup KLD which contains requested module in the all directories. */ static char * linker_search_module(const char *modname, int modnamelen, const struct mod_depend *verinfo) { char *cp, *ep, *result; /* * traverse the linker path */ for (cp = linker_path; *cp; cp = ep + 1) { /* find the end of this component */ for (ep = cp; (*ep != 0) && (*ep != ';'); ep++); result = linker_hints_lookup(cp, ep - cp, modname, modnamelen, verinfo); if (result != NULL) return (result); if (*ep == 0) break; } return (NULL); } /* * Search for module in all directories listed in the linker_path. */ static char * linker_search_kld(const char *name) { char *cp, *ep, *result; int len; /* qualified at all? */ if (strchr(name, '/')) return (strdup(name, M_LINKER)); /* traverse the linker path */ len = strlen(name); for (ep = linker_path; *ep; ep++) { cp = ep; /* find the end of this component */ for (; *ep != 0 && *ep != ';'; ep++); result = linker_lookup_file(cp, ep - cp, name, len, NULL); if (result != NULL) return (result); } return (NULL); } static const char * linker_basename(const char *path) { const char *filename; filename = strrchr(path, '/'); if (filename == NULL) return path; if (filename[1]) filename++; return (filename); } #ifdef HWPMC_HOOKS /* * Inform hwpmc about the set of kernel modules currently loaded. */ void * linker_hwpmc_list_objects(void) { linker_file_t lf; struct pmckern_map_in *kobase; int i, nmappings; nmappings = 0; sx_slock(&kld_sx); TAILQ_FOREACH(lf, &linker_files, link) nmappings++; /* Allocate nmappings + 1 entries. */ kobase = malloc((nmappings + 1) * sizeof(struct pmckern_map_in), M_LINKER, M_WAITOK | M_ZERO); i = 0; TAILQ_FOREACH(lf, &linker_files, link) { /* Save the info for this linker file. */ kobase[i].pm_file = lf->pathname; kobase[i].pm_address = (uintptr_t)lf->address; i++; } sx_sunlock(&kld_sx); KASSERT(i > 0, ("linker_hpwmc_list_objects: no kernel objects?")); /* The last entry of the malloced area comprises of all zeros. */ KASSERT(kobase[i].pm_file == NULL, ("linker_hwpmc_list_objects: last object not NULL")); return ((void *)kobase); } #endif /* check if root file system is not mounted */ static bool linker_root_mounted(void) { struct pwd *pwd; bool ret; if (rootvnode == NULL) return (false); pwd = pwd_hold(curthread); ret = pwd->pwd_rdir != NULL; pwd_drop(pwd); return (ret); } /* * Find a file which contains given module and load it, if "parent" is not * NULL, register a reference to it. */ static int linker_load_module(const char *kldname, const char *modname, struct linker_file *parent, const struct mod_depend *verinfo, struct linker_file **lfpp) { linker_file_t lfdep; const char *filename; char *pathname; int error; sx_assert(&kld_sx, SA_XLOCKED); if (modname == NULL) { /* * We have to load KLD */ KASSERT(verinfo == NULL, ("linker_load_module: verinfo" " is not NULL")); if (!linker_root_mounted()) return (ENXIO); pathname = linker_search_kld(kldname); } else { if (modlist_lookup2(modname, verinfo) != NULL) return (EEXIST); if (!linker_root_mounted()) return (ENXIO); if (kldname != NULL) pathname = strdup(kldname, M_LINKER); else /* * Need to find a KLD with required module */ pathname = linker_search_module(modname, strlen(modname), verinfo); } if (pathname == NULL) return (ENOENT); /* * Can't load more than one file with the same basename XXX: * Actually it should be possible to have multiple KLDs with * the same basename but different path because they can * provide different versions of the same modules. */ filename = linker_basename(pathname); if (linker_find_file_by_name(filename)) error = EEXIST; else do { error = linker_load_file(pathname, &lfdep); if (error) break; if (modname && verinfo && modlist_lookup2(modname, verinfo) == NULL) { linker_file_unload(lfdep, LINKER_UNLOAD_FORCE); error = ENOENT; break; } if (parent) { error = linker_file_add_dependency(parent, lfdep); if (error) break; } if (lfpp) *lfpp = lfdep; } while (0); free(pathname, M_LINKER); return (error); } /* * This routine is responsible for finding dependencies of userland initiated * kldload(2)'s of files. */ int linker_load_dependencies(linker_file_t lf) { linker_file_t lfdep; struct mod_metadata **start, **stop, **mdp, **nmdp; struct mod_metadata *mp, *nmp; const struct mod_depend *verinfo; modlist_t mod; const char *modname, *nmodname; int ver, error = 0; /* * All files are dependent on /kernel. */ sx_assert(&kld_sx, SA_XLOCKED); if (linker_kernel_file) { linker_kernel_file->refs++; error = linker_file_add_dependency(lf, linker_kernel_file); if (error) return (error); } if (linker_file_lookup_set(lf, MDT_SETNAME, &start, &stop, NULL) != 0) return (0); for (mdp = start; mdp < stop; mdp++) { mp = *mdp; if (mp->md_type != MDT_VERSION) continue; modname = mp->md_cval; ver = ((const struct mod_version *)mp->md_data)->mv_version; mod = modlist_lookup(modname, ver); if (mod != NULL) { printf("interface %s.%d already present in the KLD" " '%s'!\n", modname, ver, mod->container->filename); return (EEXIST); } } for (mdp = start; mdp < stop; mdp++) { mp = *mdp; if (mp->md_type != MDT_DEPEND) continue; modname = mp->md_cval; verinfo = mp->md_data; nmodname = NULL; for (nmdp = start; nmdp < stop; nmdp++) { nmp = *nmdp; if (nmp->md_type != MDT_VERSION) continue; nmodname = nmp->md_cval; if (strcmp(modname, nmodname) == 0) break; } if (nmdp < stop)/* early exit, it's a self reference */ continue; mod = modlist_lookup2(modname, verinfo); if (mod) { /* woohoo, it's loaded already */ lfdep = mod->container; lfdep->refs++; error = linker_file_add_dependency(lf, lfdep); if (error) break; continue; } error = linker_load_module(NULL, modname, lf, verinfo, NULL); if (error) { printf("KLD %s: depends on %s - not available or" " version mismatch\n", lf->filename, modname); break; } } if (error) return (error); linker_addmodules(lf, start, stop, 0); return (error); } static int sysctl_kern_function_list_iterate(const char *name, void *opaque) { struct sysctl_req *req; req = opaque; return (SYSCTL_OUT(req, name, strlen(name) + 1)); } /* * Export a nul-separated, double-nul-terminated list of all function names * in the kernel. */ static int sysctl_kern_function_list(SYSCTL_HANDLER_ARGS) { linker_file_t lf; int error; #ifdef MAC error = mac_kld_check_stat(req->td->td_ucred); if (error) return (error); #endif error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sx_xlock(&kld_sx); TAILQ_FOREACH(lf, &linker_files, link) { error = LINKER_EACH_FUNCTION_NAME(lf, sysctl_kern_function_list_iterate, req); if (error) { sx_xunlock(&kld_sx); return (error); } } sx_xunlock(&kld_sx); return (SYSCTL_OUT(req, "", 1)); } SYSCTL_PROC(_kern, OID_AUTO, function_list, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_function_list, "", "kernel function list"); diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 814579a80f5a..a1d502d58bff 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,3019 +1,3016 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Karels at Berkeley Software Design, Inc. * * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD * project, to make these variables more userfriendly. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 */ #include #include "opt_capsicum.h" #include "opt_ddb.h" #include "opt_ktrace.h" #include "opt_sysctl.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #ifdef DDB #include #include #endif #include #include #include #include static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids"); static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); RB_GENERATE(sysctl_oid_list, sysctl_oid, oid_link, cmp_sysctl_oid); /* * The sysctllock protects the MIB tree. It also protects sysctl * contexts used with dynamic sysctls. The sysctl_register_oid() and * sysctl_unregister_oid() routines require the sysctllock to already * be held, so the sysctl_wlock() and sysctl_wunlock() routines are * provided for the few places in the kernel which need to use that * API rather than using the dynamic API. Use of the dynamic API is * strongly encouraged for most code. * * The sysctlmemlock is used to limit the amount of user memory wired for * sysctl requests. This is implemented by serializing any userland * sysctl requests larger than a single page via an exclusive lock. * * The sysctlstringlock is used to protect concurrent access to writable * string nodes in sysctl_handle_string(). */ static struct rmlock sysctllock; static struct sx __exclusive_cache_line sysctlmemlock; static struct sx sysctlstringlock; #define SYSCTL_WLOCK() rm_wlock(&sysctllock) #define SYSCTL_WUNLOCK() rm_wunlock(&sysctllock) #define SYSCTL_RLOCK(tracker) rm_rlock(&sysctllock, (tracker)) #define SYSCTL_RUNLOCK(tracker) rm_runlock(&sysctllock, (tracker)) #define SYSCTL_WLOCKED() rm_wowned(&sysctllock) #define SYSCTL_ASSERT_LOCKED() rm_assert(&sysctllock, RA_LOCKED) #define SYSCTL_ASSERT_WLOCKED() rm_assert(&sysctllock, RA_WLOCKED) #define SYSCTL_ASSERT_RLOCKED() rm_assert(&sysctllock, RA_RLOCKED) #define SYSCTL_INIT() rm_init_flags(&sysctllock, "sysctl lock", \ RM_SLEEPABLE) #define SYSCTL_SLEEP(ch, wmesg, timo) \ rm_sleep(ch, &sysctllock, 0, wmesg, timo) static int sysctl_root(SYSCTL_HANDLER_ARGS); /* Root list */ struct sysctl_oid_list sysctl__children = RB_INITIALIZER(&sysctl__children); static char* sysctl_escape_name(const char*); static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse); static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t); static int sysctl_new_kernel(struct sysctl_req *, void *, size_t); static struct sysctl_oid * sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) { struct sysctl_oid *oidp; SYSCTL_ASSERT_LOCKED(); SYSCTL_FOREACH(oidp, list) { if (strcmp(oidp->oid_name, name) == 0) { return (oidp); } } return (NULL); } /* * Initialization of the MIB tree. * * Order by number in each list. */ void sysctl_wlock(void) { SYSCTL_WLOCK(); } void sysctl_wunlock(void) { SYSCTL_WUNLOCK(); } static int sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intmax_t arg2, struct sysctl_req *req, struct rm_priotracker *tracker) { int error; if (oid->oid_kind & CTLFLAG_DYN) atomic_add_int(&oid->oid_running, 1); if (tracker != NULL) SYSCTL_RUNLOCK(tracker); else SYSCTL_WUNLOCK(); /* * Treat set CTLFLAG_NEEDGIANT and unset CTLFLAG_MPSAFE flags the same, * untill we're ready to remove all traces of Giant from sysctl(9). */ if ((oid->oid_kind & CTLFLAG_NEEDGIANT) || (!(oid->oid_kind & CTLFLAG_MPSAFE))) mtx_lock(&Giant); error = oid->oid_handler(oid, arg1, arg2, req); if ((oid->oid_kind & CTLFLAG_NEEDGIANT) || (!(oid->oid_kind & CTLFLAG_MPSAFE))) mtx_unlock(&Giant); KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error); if (tracker != NULL) SYSCTL_RLOCK(tracker); else SYSCTL_WLOCK(); if (oid->oid_kind & CTLFLAG_DYN) { if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 && (oid->oid_kind & CTLFLAG_DYING) != 0) wakeup(&oid->oid_running); } return (error); } static void sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp) { struct sysctl_req req; struct sysctl_oid *curr; char *penv = NULL; char path[96]; ssize_t rem = sizeof(path); ssize_t len; uint8_t data[512] __aligned(sizeof(uint64_t)); int size; int error; path[--rem] = 0; for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) { len = strlen(curr->oid_name); rem -= len; if (curr != oidp) rem -= 1; if (rem < 0) { printf("OID path exceeds %d bytes\n", (int)sizeof(path)); return; } memcpy(path + rem, curr->oid_name, len); if (curr != oidp) path[rem + len] = '.'; } memset(&req, 0, sizeof(req)); req.td = curthread; req.oldfunc = sysctl_old_kernel; req.newfunc = sysctl_new_kernel; req.lock = REQ_UNWIRED; switch (oidp->oid_kind & CTLTYPE) { case CTLTYPE_INT: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(int), GETENV_SIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_UINT: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(int), GETENV_UNSIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_LONG: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(long), GETENV_SIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_ULONG: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(long), GETENV_UNSIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_S8: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(int8_t), GETENV_SIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_S16: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(int16_t), GETENV_SIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_S32: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(int32_t), GETENV_SIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_S64: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(int64_t), GETENV_SIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_U8: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(uint8_t), GETENV_UNSIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_U16: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(uint16_t), GETENV_UNSIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_U32: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(uint32_t), GETENV_UNSIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_U64: if (getenv_array(path + rem, data, sizeof(data), &size, sizeof(uint64_t), GETENV_UNSIGNED) == 0) return; req.newlen = size; req.newptr = data; break; case CTLTYPE_STRING: penv = kern_getenv(path + rem); if (penv == NULL) return; req.newlen = strlen(penv); req.newptr = penv; break; default: return; } error = sysctl_root_handler_locked(oidp, oidp->oid_arg1, oidp->oid_arg2, &req, NULL); if (error != 0) printf("Setting sysctl %s failed: %d\n", path + rem, error); if (penv != NULL) freeenv(penv); } /* * Locate the path to a given oid. Returns the length of the resulting path, * or -1 if the oid was not found. nodes must have room for CTL_MAXNAME * elements. */ static int sysctl_search_oid(struct sysctl_oid **nodes, struct sysctl_oid *needle) { int indx; SYSCTL_ASSERT_LOCKED(); indx = 0; /* * Do a depth-first search of the oid tree, looking for 'needle'. Start * with the first child of the root. */ nodes[indx] = RB_MIN(sysctl_oid_list, &sysctl__children); for (;;) { if (nodes[indx] == needle) return (indx + 1); if (nodes[indx] == NULL) { /* Node has no more siblings, so back up to parent. */ if (indx-- == 0) { /* Retreat to root, so give up. */ break; } } else if ((nodes[indx]->oid_kind & CTLTYPE) == CTLTYPE_NODE) { /* Node has children. */ if (++indx == CTL_MAXNAME) { /* Max search depth reached, so give up. */ break; } /* Start with the first child. */ nodes[indx] = RB_MIN(sysctl_oid_list, &nodes[indx - 1]->oid_children); continue; } /* Consider next sibling. */ nodes[indx] = RB_NEXT(sysctl_oid_list, NULL, nodes[indx]); } return (-1); } static void sysctl_warn_reuse(const char *func, struct sysctl_oid *leaf) { struct sysctl_oid *nodes[CTL_MAXNAME]; char buf[128]; struct sbuf sb; int rc, i; (void)sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN | SBUF_INCLUDENUL); sbuf_set_drain(&sb, sbuf_printf_drain, NULL); sbuf_printf(&sb, "%s: can't re-use a leaf (", __func__); rc = sysctl_search_oid(nodes, leaf); if (rc > 0) { for (i = 0; i < rc; i++) sbuf_printf(&sb, "%s%.*s", nodes[i]->oid_name, i != (rc - 1), "."); } else { sbuf_printf(&sb, "%s", leaf->oid_name); } sbuf_printf(&sb, ")!\n"); (void)sbuf_finish(&sb); } #ifdef SYSCTL_DEBUG static int sysctl_reuse_test(SYSCTL_HANDLER_ARGS) { struct rm_priotracker tracker; SYSCTL_RLOCK(&tracker); sysctl_warn_reuse(__func__, oidp); SYSCTL_RUNLOCK(&tracker); return (0); } SYSCTL_PROC(_sysctl, OID_AUTO, reuse_test, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_reuse_test, "-", ""); #endif void sysctl_register_oid(struct sysctl_oid *oidp) { struct sysctl_oid_list *parent = oidp->oid_parent; struct sysctl_oid *p, key; int oid_number; int timeout = 2; /* * First check if another oid with the same name already * exists in the parent's list. */ SYSCTL_ASSERT_WLOCKED(); p = sysctl_find_oidname(oidp->oid_name, parent); if (p != NULL) { if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) { p->oid_refcnt++; return; } else { sysctl_warn_reuse(__func__, p); return; } } /* get current OID number */ oid_number = oidp->oid_number; #if (OID_AUTO >= 0) #error "OID_AUTO is expected to be a negative value" #endif /* * Any negative OID number qualifies as OID_AUTO. Valid OID * numbers should always be positive. * * NOTE: DO NOT change the starting value here, change it in * , and make sure it is at least 256 to * accommodate e.g. net.inet.raw as a static sysctl node. */ if (oid_number < 0) { static int newoid; /* * By decrementing the next OID number we spend less * time inserting the OIDs into a sorted list. */ if (--newoid < CTL_AUTO_START) newoid = 0x7fffffff; oid_number = newoid; } /* * Insert the OID into the parent's list sorted by OID number. */ key.oid_number = oid_number; p = RB_NFIND(sysctl_oid_list, parent, &key); while (p != NULL && oid_number == p->oid_number) { /* get the next valid OID number */ if (oid_number < CTL_AUTO_START || oid_number == 0x7fffffff) { /* wraparound - restart */ oid_number = CTL_AUTO_START; /* don't loop forever */ if (!timeout--) panic("sysctl: Out of OID numbers\n"); key.oid_number = oid_number; p = RB_NFIND(sysctl_oid_list, parent, &key); continue; } p = RB_NEXT(sysctl_oid_list, NULL, p); oid_number++; } /* check for non-auto OID number collision */ if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START && oid_number >= CTL_AUTO_START) { printf("sysctl: OID number(%d) is already in use for '%s'\n", oidp->oid_number, oidp->oid_name); } /* update the OID number, if any */ oidp->oid_number = oid_number; RB_INSERT(sysctl_oid_list, parent, oidp); if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && -#ifdef VIMAGE - (oidp->oid_kind & CTLFLAG_VNET) == 0 && -#endif (oidp->oid_kind & CTLFLAG_TUN) != 0 && (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { /* only fetch value once */ oidp->oid_kind |= CTLFLAG_NOFETCH; /* try to fetch value from kernel environment */ sysctl_load_tunable_by_oid_locked(oidp); } } void sysctl_register_disabled_oid(struct sysctl_oid *oidp) { /* * Mark the leaf as dormant if it's not to be immediately enabled. * We do not disable nodes as they can be shared between modules * and it is always safe to access a node. */ KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0, ("internal flag is set in oid_kind")); if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) oidp->oid_kind |= CTLFLAG_DORMANT; sysctl_register_oid(oidp); } void sysctl_enable_oid(struct sysctl_oid *oidp) { SYSCTL_ASSERT_WLOCKED(); if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0, ("sysctl node is marked as dormant")); return; } KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) != 0, ("enabling already enabled sysctl oid")); oidp->oid_kind &= ~CTLFLAG_DORMANT; } void sysctl_unregister_oid(struct sysctl_oid *oidp) { int error; SYSCTL_ASSERT_WLOCKED(); if (oidp->oid_number == OID_AUTO) { error = EINVAL; } else { error = ENOENT; if (RB_REMOVE(sysctl_oid_list, oidp->oid_parent, oidp)) error = 0; } /* * This can happen when a module fails to register and is * being unloaded afterwards. It should not be a panic() * for normal use. */ if (error) { printf("%s: failed(%d) to unregister sysctl(%s)\n", __func__, error, oidp->oid_name); } } /* Initialize a new context to keep track of dynamically added sysctls. */ int sysctl_ctx_init(struct sysctl_ctx_list *c) { if (c == NULL) { return (EINVAL); } /* * No locking here, the caller is responsible for not adding * new nodes to a context until after this function has * returned. */ TAILQ_INIT(c); return (0); } /* Free the context, and destroy all dynamic oids registered in this context */ int sysctl_ctx_free(struct sysctl_ctx_list *clist) { struct sysctl_ctx_entry *e, *e1; int error; error = 0; /* * First perform a "dry run" to check if it's ok to remove oids. * XXX FIXME * XXX This algorithm is a hack. But I don't know any * XXX better solution for now... */ SYSCTL_WLOCK(); TAILQ_FOREACH(e, clist, link) { error = sysctl_remove_oid_locked(e->entry, 0, 0); if (error) break; } /* * Restore deregistered entries, either from the end, * or from the place where error occurred. * e contains the entry that was not unregistered */ if (error) e1 = TAILQ_PREV(e, sysctl_ctx_list, link); else e1 = TAILQ_LAST(clist, sysctl_ctx_list); while (e1 != NULL) { sysctl_register_oid(e1->entry); e1 = TAILQ_PREV(e1, sysctl_ctx_list, link); } if (error) { SYSCTL_WUNLOCK(); return(EBUSY); } /* Now really delete the entries */ e = TAILQ_FIRST(clist); while (e != NULL) { e1 = TAILQ_NEXT(e, link); error = sysctl_remove_oid_locked(e->entry, 1, 0); if (error) panic("sysctl_remove_oid: corrupt tree, entry: %s", e->entry->oid_name); free(e, M_SYSCTLOID); e = e1; } SYSCTL_WUNLOCK(); return (error); } /* Add an entry to the context */ struct sysctl_ctx_entry * sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) { struct sysctl_ctx_entry *e; SYSCTL_ASSERT_WLOCKED(); if (clist == NULL || oidp == NULL) return(NULL); e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK); e->entry = oidp; TAILQ_INSERT_HEAD(clist, e, link); return (e); } /* Find an entry in the context */ struct sysctl_ctx_entry * sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) { struct sysctl_ctx_entry *e; SYSCTL_ASSERT_WLOCKED(); if (clist == NULL || oidp == NULL) return(NULL); TAILQ_FOREACH(e, clist, link) { if (e->entry == oidp) return(e); } return (e); } /* * Delete an entry from the context. * NOTE: this function doesn't free oidp! You have to remove it * with sysctl_remove_oid(). */ int sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) { struct sysctl_ctx_entry *e; if (clist == NULL || oidp == NULL) return (EINVAL); SYSCTL_WLOCK(); e = sysctl_ctx_entry_find(clist, oidp); if (e != NULL) { TAILQ_REMOVE(clist, e, link); SYSCTL_WUNLOCK(); free(e, M_SYSCTLOID); return (0); } else { SYSCTL_WUNLOCK(); return (ENOENT); } } /* * Remove dynamically created sysctl trees. * oidp - top of the tree to be removed * del - if 0 - just deregister, otherwise free up entries as well * recurse - if != 0 traverse the subtree to be deleted */ int sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse) { int error; SYSCTL_WLOCK(); error = sysctl_remove_oid_locked(oidp, del, recurse); SYSCTL_WUNLOCK(); return (error); } int sysctl_remove_name(struct sysctl_oid *parent, const char *name, int del, int recurse) { struct sysctl_oid *p; int error; error = ENOENT; SYSCTL_WLOCK(); p = sysctl_find_oidname(name, &parent->oid_children); if (p) error = sysctl_remove_oid_locked(p, del, recurse); SYSCTL_WUNLOCK(); return (error); } /* * Duplicate the provided string, escaping any illegal characters. The result * must be freed when no longer in use. * * The list of illegal characters is ".". */ static char* sysctl_escape_name(const char* orig) { int i, s = 0, d = 0, nillegals = 0; char *new; /* First count the number of illegal characters */ for (i = 0; orig[i] != '\0'; i++) { if (orig[i] == '.') nillegals++; } /* Allocate storage for new string */ new = malloc(i + 2 * nillegals + 1, M_SYSCTLOID, M_WAITOK); /* Copy the name, escaping characters as we go */ while (orig[s] != '\0') { if (orig[s] == '.') { /* %25 is the hexadecimal representation of '.' */ new[d++] = '%'; new[d++] = '2'; new[d++] = '5'; s++; } else { new[d++] = orig[s++]; } } /* Finally, nul-terminate */ new[d] = '\0'; return (new); } static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) { struct sysctl_oid *p, *tmp; int error; SYSCTL_ASSERT_WLOCKED(); if (oidp == NULL) return(EINVAL); if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { printf("Warning: can't remove non-dynamic nodes (%s)!\n", oidp->oid_name); return (EINVAL); } /* * WARNING: normal method to do this should be through * sysctl_ctx_free(). Use recursing as the last resort * method to purge your sysctl tree of leftovers... * However, if some other code still references these nodes, * it will panic. */ if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { if (oidp->oid_refcnt == 1) { for(p = RB_MIN(sysctl_oid_list, &oidp->oid_children); p != NULL; p = tmp) { if (!recurse) { printf("Warning: failed attempt to " "remove oid %s with child %s\n", oidp->oid_name, p->oid_name); return (ENOTEMPTY); } tmp = RB_NEXT(sysctl_oid_list, &oidp->oid_children, p); error = sysctl_remove_oid_locked(p, del, recurse); if (error) return (error); } } } if (oidp->oid_refcnt > 1 ) { oidp->oid_refcnt--; } else { if (oidp->oid_refcnt == 0) { printf("Warning: bad oid_refcnt=%u (%s)!\n", oidp->oid_refcnt, oidp->oid_name); return (EINVAL); } sysctl_unregister_oid(oidp); if (del) { /* * Wait for all threads running the handler to drain. * This preserves the previous behavior when the * sysctl lock was held across a handler invocation, * and is necessary for module unload correctness. */ while (oidp->oid_running > 0) { oidp->oid_kind |= CTLFLAG_DYING; SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); } if (oidp->oid_descr) free(__DECONST(char *, oidp->oid_descr), M_SYSCTLOID); if (oidp->oid_label) free(__DECONST(char *, oidp->oid_label), M_SYSCTLOID); free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); free(oidp, M_SYSCTLOID); } } return (0); } /* * Create new sysctls at run time. * clist may point to a valid context initialized with sysctl_ctx_init(). */ struct sysctl_oid * sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, int number, const char *name, int kind, void *arg1, intmax_t arg2, int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr, const char *label) { struct sysctl_oid *oidp; char *escaped; /* You have to hook up somewhere.. */ if (parent == NULL) return(NULL); escaped = sysctl_escape_name(name); /* Check if the node already exists, otherwise create it */ SYSCTL_WLOCK(); oidp = sysctl_find_oidname(escaped, parent); if (oidp != NULL) { free(escaped, M_SYSCTLOID); if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { oidp->oid_refcnt++; /* Update the context */ if (clist != NULL) sysctl_ctx_entry_add(clist, oidp); SYSCTL_WUNLOCK(); return (oidp); } else { sysctl_warn_reuse(__func__, oidp); SYSCTL_WUNLOCK(); return (NULL); } } oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO); oidp->oid_parent = parent; RB_INIT(&oidp->oid_children); oidp->oid_number = number; oidp->oid_refcnt = 1; oidp->oid_name = escaped; oidp->oid_handler = handler; oidp->oid_kind = CTLFLAG_DYN | kind; oidp->oid_arg1 = arg1; oidp->oid_arg2 = arg2; oidp->oid_fmt = fmt; if (descr != NULL) oidp->oid_descr = strdup(descr, M_SYSCTLOID); if (label != NULL) oidp->oid_label = strdup(label, M_SYSCTLOID); /* Update the context, if used */ if (clist != NULL) sysctl_ctx_entry_add(clist, oidp); /* Register this oid */ sysctl_register_oid(oidp); SYSCTL_WUNLOCK(); return (oidp); } /* * Rename an existing oid. */ void sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) { char *newname; char *oldname; newname = strdup(name, M_SYSCTLOID); SYSCTL_WLOCK(); oldname = __DECONST(char *, oidp->oid_name); oidp->oid_name = newname; SYSCTL_WUNLOCK(); free(oldname, M_SYSCTLOID); } /* * Reparent an existing oid. */ int sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent) { struct sysctl_oid *oidp; SYSCTL_WLOCK(); if (oid->oid_parent == parent) { SYSCTL_WUNLOCK(); return (0); } oidp = sysctl_find_oidname(oid->oid_name, parent); if (oidp != NULL) { SYSCTL_WUNLOCK(); return (EEXIST); } sysctl_unregister_oid(oid); oid->oid_parent = parent; oid->oid_number = OID_AUTO; sysctl_register_oid(oid); SYSCTL_WUNLOCK(); return (0); } /* * Register the kernel's oids on startup. */ SET_DECLARE(sysctl_set, struct sysctl_oid); static void sysctl_register_all(void *arg) { struct sysctl_oid **oidp; sx_init(&sysctlmemlock, "sysctl mem"); sx_init(&sysctlstringlock, "sysctl string handler"); SYSCTL_INIT(); SYSCTL_WLOCK(); SET_FOREACH(oidp, sysctl_set) sysctl_register_oid(*oidp); SYSCTL_WUNLOCK(); } SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL); /* * "Staff-functions" * * These functions implement a presently undocumented interface * used by the sysctl program to walk the tree, and get the type * so it can print the value. * This interface is under work and consideration, and should probably * be killed with a big axe by the first person who can find the time. * (be aware though, that the proper interface isn't as obvious as it * may seem, there are various conflicting requirements. * * {CTL_SYSCTL, CTL_SYSCTL_DEBUG} printf the entire MIB-tree. * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...} return the name of the "..." * OID. * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...} return the next OID, honoring * CTLFLAG_SKIP. * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID} return the OID of the name in * "new" * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...} return the kind & format info * for the "..." OID. * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...} return the description of the * "..." OID. * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...} return the aggregation label of * the "..." OID. * {CTL_SYSCTL, CTL_SYSCTL_NEXTNOSKIP, ...} return the next OID, ignoring * CTLFLAG_SKIP. */ #ifdef SYSCTL_DEBUG static void sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) { int k; struct sysctl_oid *oidp; SYSCTL_ASSERT_LOCKED(); SYSCTL_FOREACH(oidp, l) { for (k=0; koid_number, oidp->oid_name); printf("%c%c", oidp->oid_kind & CTLFLAG_RD ? 'R':' ', oidp->oid_kind & CTLFLAG_WR ? 'W':' '); if (oidp->oid_handler) printf(" *Handler"); switch (oidp->oid_kind & CTLTYPE) { case CTLTYPE_NODE: printf(" Node\n"); if (!oidp->oid_handler) { sysctl_sysctl_debug_dump_node( SYSCTL_CHILDREN(oidp), i + 2); } break; case CTLTYPE_INT: printf(" Int\n"); break; case CTLTYPE_UINT: printf(" u_int\n"); break; case CTLTYPE_LONG: printf(" Long\n"); break; case CTLTYPE_ULONG: printf(" u_long\n"); break; case CTLTYPE_STRING: printf(" String\n"); break; case CTLTYPE_S8: printf(" int8_t\n"); break; case CTLTYPE_S16: printf(" int16_t\n"); break; case CTLTYPE_S32: printf(" int32_t\n"); break; case CTLTYPE_S64: printf(" int64_t\n"); break; case CTLTYPE_U8: printf(" uint8_t\n"); break; case CTLTYPE_U16: printf(" uint16_t\n"); break; case CTLTYPE_U32: printf(" uint32_t\n"); break; case CTLTYPE_U64: printf(" uint64_t\n"); break; case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; default: printf("\n"); } } } static int sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) { struct rm_priotracker tracker; int error; error = priv_check(req->td, PRIV_SYSCTL_DEBUG); if (error) return (error); SYSCTL_RLOCK(&tracker); sysctl_sysctl_debug_dump_node(&sysctl__children, 0); SYSCTL_RUNLOCK(&tracker); return (ENOENT); } SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", ""); #endif static int sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) { int *name = (int *) arg1; u_int namelen = arg2; int error; struct sysctl_oid *oid, key; struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; struct rm_priotracker tracker; char buf[10]; error = sysctl_wire_old_buffer(req, 0); if (error) return (error); SYSCTL_RLOCK(&tracker); while (namelen) { if (!lsp) { snprintf(buf,sizeof(buf),"%d",*name); if (req->oldidx) error = SYSCTL_OUT(req, ".", 1); if (!error) error = SYSCTL_OUT(req, buf, strlen(buf)); if (error) goto out; namelen--; name++; continue; } lsp2 = NULL; key.oid_number = *name; oid = RB_FIND(sysctl_oid_list, lsp, &key); if (oid) { if (req->oldidx) error = SYSCTL_OUT(req, ".", 1); if (!error) error = SYSCTL_OUT(req, oid->oid_name, strlen(oid->oid_name)); if (error) goto out; namelen--; name++; if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE && !oid->oid_handler) lsp2 = SYSCTL_CHILDREN(oid); } lsp = lsp2; } error = SYSCTL_OUT(req, "", 1); out: SYSCTL_RUNLOCK(&tracker); return (error); } /* * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in * capability mode. */ static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, ""); enum sysctl_iter_action { ITER_SIBLINGS, /* Not matched, continue iterating siblings */ ITER_CHILDREN, /* Node has children we need to iterate over them */ ITER_FOUND, /* Matching node was found */ }; /* * Tries to find the next node for @name and @namelen. * * Returns next action to take. */ static enum sysctl_iter_action sysctl_sysctl_next_node(struct sysctl_oid *oidp, int *name, unsigned int namelen, bool honor_skip) { if ((oidp->oid_kind & CTLFLAG_DORMANT) != 0) return (ITER_SIBLINGS); if (honor_skip && (oidp->oid_kind & CTLFLAG_SKIP) != 0) return (ITER_SIBLINGS); if (namelen == 0) { /* * We have reached a node with a full name match and are * looking for the next oid in its children. * * For CTL_SYSCTL_NEXTNOSKIP we are done. * * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it * has a handler) and move on to the children. */ if (!honor_skip) return (ITER_FOUND); if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) return (ITER_FOUND); /* If node does not have an iterator, treat it as leaf */ if (oidp->oid_handler) return (ITER_FOUND); /* Report oid as a node to iterate */ return (ITER_CHILDREN); } /* * No match yet. Continue seeking the given name. * * We are iterating in order by oid_number, so skip oids lower * than the one we are looking for. * * When the current oid_number is higher than the one we seek, * that means we have reached the next oid in the sequence and * should return it. * * If the oid_number matches the name at this level then we * have to find a node to continue searching at the next level. */ if (oidp->oid_number < *name) return (ITER_SIBLINGS); if (oidp->oid_number > *name) { /* * We have reached the next oid. * * For CTL_SYSCTL_NEXTNOSKIP we are done. * * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it * has a handler) and move on to the children. */ if (!honor_skip) return (ITER_FOUND); if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) return (ITER_FOUND); /* If node does not have an iterator, treat it as leaf */ if (oidp->oid_handler) return (ITER_FOUND); return (ITER_CHILDREN); } /* match at a current level */ if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) return (ITER_SIBLINGS); if (oidp->oid_handler) return (ITER_SIBLINGS); return (ITER_CHILDREN); } /* * Recursively walk the sysctl subtree at lsp until we find the given name. * Returns true and fills in next oid data in @next and @len if oid is found. */ static bool sysctl_sysctl_next_action(struct sysctl_oid_list *lsp, int *name, u_int namelen, int *next, int *len, int level, bool honor_skip) { struct sysctl_oid_list *next_lsp; struct sysctl_oid *oidp = NULL, key; bool success = false; enum sysctl_iter_action action; SYSCTL_ASSERT_LOCKED(); /* * Start the search at the requested oid. But if not found, then scan * through all children. */ if (namelen > 0) { key.oid_number = *name; oidp = RB_FIND(sysctl_oid_list, lsp, &key); } if (!oidp) oidp = RB_MIN(sysctl_oid_list, lsp); for(; oidp != NULL; oidp = RB_NEXT(sysctl_oid_list, lsp, oidp)) { action = sysctl_sysctl_next_node(oidp, name, namelen, honor_skip); if (action == ITER_SIBLINGS) continue; if (action == ITER_FOUND) { success = true; break; } KASSERT((action== ITER_CHILDREN), ("ret(%d)!=ITER_CHILDREN", action)); next_lsp = SYSCTL_CHILDREN(oidp); if (namelen == 0) { success = sysctl_sysctl_next_action(next_lsp, NULL, 0, next + 1, len, level + 1, honor_skip); } else { success = sysctl_sysctl_next_action(next_lsp, name + 1, namelen - 1, next + 1, len, level + 1, honor_skip); if (!success) { /* * We maintain the invariant that current node oid * is >= the oid provided in @name. * As there are no usable children at this node, * current node oid is strictly > than the requested * oid. * Hence, reduce namelen to 0 to allow for picking first * nodes/leafs in the next node in list. */ namelen = 0; } } if (success) break; } if (success) { *next = oidp->oid_number; if (level > *len) *len = level; } return (success); } static int sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) { int *name = (int *) arg1; u_int namelen = arg2; int len, error; bool success; struct sysctl_oid_list *lsp = &sysctl__children; struct rm_priotracker tracker; int next[CTL_MAXNAME]; len = 0; SYSCTL_RLOCK(&tracker); success = sysctl_sysctl_next_action(lsp, name, namelen, next, &len, 1, oidp->oid_number == CTL_SYSCTL_NEXT); SYSCTL_RUNLOCK(&tracker); if (!success) return (ENOENT); error = SYSCTL_OUT(req, next, len * sizeof (int)); return (error); } /* * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in * capability mode. */ static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, ""); static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXTNOSKIP, nextnoskip, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, ""); static int name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) { struct sysctl_oid *oidp; struct sysctl_oid_list *lsp = &sysctl__children; SYSCTL_ASSERT_LOCKED(); for (*len = 0; *len < CTL_MAXNAME;) { oidp = sysctl_find_oidname(strsep(&name, "."), lsp); if (oidp == NULL) return (ENOENT); *oid++ = oidp->oid_number; (*len)++; if (name == NULL || *name == '\0') { if (oidpp) *oidpp = oidp; return (0); } if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) break; if (oidp->oid_handler) break; lsp = SYSCTL_CHILDREN(oidp); } return (ENOENT); } static int sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) { char *p; int error, oid[CTL_MAXNAME], len = 0; struct sysctl_oid *op = NULL; struct rm_priotracker tracker; char buf[32]; if (!req->newlen) return (ENOENT); if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ return (ENAMETOOLONG); p = buf; if (req->newlen >= sizeof(buf)) p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); error = SYSCTL_IN(req, p, req->newlen); if (error) { if (p != buf) free(p, M_SYSCTL); return (error); } p [req->newlen] = '\0'; SYSCTL_RLOCK(&tracker); error = name2oid(p, oid, &len, &op); SYSCTL_RUNLOCK(&tracker); if (p != buf) free(p, M_SYSCTL); if (error) return (error); error = SYSCTL_OUT(req, oid, len * sizeof *oid); return (error); } /* * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in * capability mode. */ SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); static int sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) { struct sysctl_oid *oid; struct rm_priotracker tracker; int error; error = sysctl_wire_old_buffer(req, 0); if (error) return (error); SYSCTL_RLOCK(&tracker); error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); if (error) goto out; if (oid->oid_fmt == NULL) { error = ENOENT; goto out; } error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); if (error) goto out; error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); out: SYSCTL_RUNLOCK(&tracker); return (error); } static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, ""); static int sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) { struct sysctl_oid *oid; struct rm_priotracker tracker; int error; error = sysctl_wire_old_buffer(req, 0); if (error) return (error); SYSCTL_RLOCK(&tracker); error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); if (error) goto out; if (oid->oid_descr == NULL) { error = ENOENT; goto out; } error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1); out: SYSCTL_RUNLOCK(&tracker); return (error); } static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD | CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, ""); static int sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS) { struct sysctl_oid *oid; struct rm_priotracker tracker; int error; error = sysctl_wire_old_buffer(req, 0); if (error) return (error); SYSCTL_RLOCK(&tracker); error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); if (error) goto out; if (oid->oid_label == NULL) { error = ENOENT; goto out; } error = SYSCTL_OUT(req, oid->oid_label, strlen(oid->oid_label) + 1); out: SYSCTL_RUNLOCK(&tracker); return (error); } static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, ""); /* * Default "handler" functions. */ /* * Handle a bool. * Two cases: * a variable: point arg1 at it. * a constant: pass it in arg2. */ int sysctl_handle_bool(SYSCTL_HANDLER_ARGS) { uint8_t temp; int error; /* * Attempt to get a coherent snapshot by making a copy of the data. */ if (arg1) temp = *(bool *)arg1 ? 1 : 0; else temp = arg2 ? 1 : 0; error = SYSCTL_OUT(req, &temp, sizeof(temp)); if (error || !req->newptr) return (error); if (!arg1) error = EPERM; else { error = SYSCTL_IN(req, &temp, sizeof(temp)); if (!error) *(bool *)arg1 = temp ? 1 : 0; } return (error); } /* * Handle an int8_t, signed or unsigned. * Two cases: * a variable: point arg1 at it. * a constant: pass it in arg2. */ int sysctl_handle_8(SYSCTL_HANDLER_ARGS) { int8_t tmpout; int error = 0; /* * Attempt to get a coherent snapshot by making a copy of the data. */ if (arg1) tmpout = *(int8_t *)arg1; else tmpout = arg2; error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout)); if (error || !req->newptr) return (error); if (!arg1) error = EPERM; else error = SYSCTL_IN(req, arg1, sizeof(tmpout)); return (error); } /* * Handle an int16_t, signed or unsigned. * Two cases: * a variable: point arg1 at it. * a constant: pass it in arg2. */ int sysctl_handle_16(SYSCTL_HANDLER_ARGS) { int16_t tmpout; int error = 0; /* * Attempt to get a coherent snapshot by making a copy of the data. */ if (arg1) tmpout = *(int16_t *)arg1; else tmpout = arg2; error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout)); if (error || !req->newptr) return (error); if (!arg1) error = EPERM; else error = SYSCTL_IN(req, arg1, sizeof(tmpout)); return (error); } /* * Handle an int32_t, signed or unsigned. * Two cases: * a variable: point arg1 at it. * a constant: pass it in arg2. */ int sysctl_handle_32(SYSCTL_HANDLER_ARGS) { int32_t tmpout; int error = 0; /* * Attempt to get a coherent snapshot by making a copy of the data. */ if (arg1) tmpout = *(int32_t *)arg1; else tmpout = arg2; error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout)); if (error || !req->newptr) return (error); if (!arg1) error = EPERM; else error = SYSCTL_IN(req, arg1, sizeof(tmpout)); return (error); } /* * Handle an int, signed or unsigned. * Two cases: * a variable: point arg1 at it. * a constant: pass it in arg2. */ int sysctl_handle_int(SYSCTL_HANDLER_ARGS) { int tmpout, error = 0; /* * Attempt to get a coherent snapshot by making a copy of the data. */ if (arg1) tmpout = *(int *)arg1; else tmpout = arg2; error = SYSCTL_OUT(req, &tmpout, sizeof(int)); if (error || !req->newptr) return (error); if (!arg1) error = EPERM; else error = SYSCTL_IN(req, arg1, sizeof(int)); return (error); } /* * Based on sysctl_handle_int() convert milliseconds into ticks. * Note: this is used by TCP. */ int sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) { int error, s, tt; tt = *(int *)arg1; s = (int)((int64_t)tt * 1000 / hz); error = sysctl_handle_int(oidp, &s, 0, req); if (error || !req->newptr) return (error); tt = (int)((int64_t)s * hz / 1000); if (tt < 1) return (EINVAL); *(int *)arg1 = tt; return (0); } /* * Handle a long, signed or unsigned. * Two cases: * a variable: point arg1 at it. * a constant: pass it in arg2. */ int sysctl_handle_long(SYSCTL_HANDLER_ARGS) { int error = 0; long tmplong; #ifdef SCTL_MASK32 int tmpint; #endif /* * Attempt to get a coherent snapshot by making a copy of the data. */ if (arg1) tmplong = *(long *)arg1; else tmplong = arg2; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { tmpint = tmplong; error = SYSCTL_OUT(req, &tmpint, sizeof(int)); } else #endif error = SYSCTL_OUT(req, &tmplong, sizeof(long)); if (error || !req->newptr) return (error); if (!arg1) error = EPERM; #ifdef SCTL_MASK32 else if (req->flags & SCTL_MASK32) { error = SYSCTL_IN(req, &tmpint, sizeof(int)); *(long *)arg1 = (long)tmpint; } #endif else error = SYSCTL_IN(req, arg1, sizeof(long)); return (error); } /* * Handle a 64 bit int, signed or unsigned. * Two cases: * a variable: point arg1 at it. * a constant: pass it in arg2. */ int sysctl_handle_64(SYSCTL_HANDLER_ARGS) { int error = 0; uint64_t tmpout; /* * Attempt to get a coherent snapshot by making a copy of the data. */ if (arg1) tmpout = *(uint64_t *)arg1; else tmpout = arg2; error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t)); if (error || !req->newptr) return (error); if (!arg1) error = EPERM; else error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); return (error); } /* * Handle our generic '\0' terminated 'C' string. * Two cases: * a variable string: point arg1 at it, arg2 is max length. * a constant string: point arg1 at it, arg2 is zero. */ int sysctl_handle_string(SYSCTL_HANDLER_ARGS) { char *tmparg; size_t outlen; int error = 0, ro_string = 0; /* * If the sysctl isn't writable and isn't a preallocated tunable that * can be modified by kenv(2), microoptimise and treat it as a * read-only string. * A zero-length buffer indicates a fixed size read-only * string. In ddb, don't worry about trying to make a malloced * snapshot. */ if ((oidp->oid_kind & (CTLFLAG_WR | CTLFLAG_TUN)) == 0 || arg2 == 0 || kdb_active) { arg2 = strlen((char *)arg1) + 1; ro_string = 1; } if (req->oldptr != NULL) { if (ro_string) { tmparg = arg1; outlen = strlen(tmparg) + 1; } else { tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK); sx_slock(&sysctlstringlock); memcpy(tmparg, arg1, arg2); sx_sunlock(&sysctlstringlock); outlen = strlen(tmparg) + 1; } error = SYSCTL_OUT(req, tmparg, outlen); if (!ro_string) free(tmparg, M_SYSCTLTMP); } else { if (!ro_string) sx_slock(&sysctlstringlock); outlen = strlen((char *)arg1) + 1; if (!ro_string) sx_sunlock(&sysctlstringlock); error = SYSCTL_OUT(req, NULL, outlen); } if (error || !req->newptr) return (error); if (req->newlen - req->newidx >= arg2 || req->newlen - req->newidx < 0) { error = EINVAL; } else if (req->newlen - req->newidx == 0) { sx_xlock(&sysctlstringlock); ((char *)arg1)[0] = '\0'; sx_xunlock(&sysctlstringlock); } else if (req->newfunc == sysctl_new_kernel) { arg2 = req->newlen - req->newidx; sx_xlock(&sysctlstringlock); error = SYSCTL_IN(req, arg1, arg2); if (error == 0) { ((char *)arg1)[arg2] = '\0'; req->newidx += arg2; } sx_xunlock(&sysctlstringlock); } else { arg2 = req->newlen - req->newidx; tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK); error = SYSCTL_IN(req, tmparg, arg2); if (error) { free(tmparg, M_SYSCTLTMP); return (error); } sx_xlock(&sysctlstringlock); memcpy(arg1, tmparg, arg2); ((char *)arg1)[arg2] = '\0'; sx_xunlock(&sysctlstringlock); free(tmparg, M_SYSCTLTMP); req->newidx += arg2; } return (error); } /* * Handle any kind of opaque data. * arg1 points to it, arg2 is the size. */ int sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) { int error, tries; u_int generation; struct sysctl_req req2; /* * Attempt to get a coherent snapshot, by using the thread * pre-emption counter updated from within mi_switch() to * determine if we were pre-empted during a bcopy() or * copyout(). Make 3 attempts at doing this before giving up. * If we encounter an error, stop immediately. */ tries = 0; req2 = *req; retry: generation = curthread->td_generation; error = SYSCTL_OUT(req, arg1, arg2); if (error) return (error); tries++; if (generation != curthread->td_generation && tries < 3) { *req = req2; goto retry; } error = SYSCTL_IN(req, arg1, arg2); return (error); } /* * Based on sysctl_handle_64() convert microseconds to a sbintime. */ int sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS) { int error; int64_t usec; usec = sbttous(*(sbintime_t *)arg1); error = sysctl_handle_64(oidp, &usec, 0, req); if (error || !req->newptr) return (error); *(sbintime_t *)arg1 = ustosbt(usec); return (0); } /* * Based on sysctl_handle_64() convert milliseconds to a sbintime. */ int sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS) { int error; int64_t msec; msec = sbttoms(*(sbintime_t *)arg1); error = sysctl_handle_64(oidp, &msec, 0, req); if (error || !req->newptr) return (error); *(sbintime_t *)arg1 = mstosbt(msec); return (0); } /* * Convert seconds to a struct timeval. Intended for use with * intervals and thus does not permit negative seconds. */ int sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS) { struct timeval *tv; int error, secs; tv = arg1; secs = tv->tv_sec; error = sysctl_handle_int(oidp, &secs, 0, req); if (error || req->newptr == NULL) return (error); if (secs < 0) return (EINVAL); tv->tv_sec = secs; return (0); } /* * Transfer functions to/from kernel space. * XXX: rather untested at this point */ static int sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) { size_t i = 0; if (req->oldptr) { i = l; if (req->oldlen <= req->oldidx) i = 0; else if (i > req->oldlen - req->oldidx) i = req->oldlen - req->oldidx; if (i > 0) bcopy(p, (char *)req->oldptr + req->oldidx, i); } req->oldidx += l; if (req->oldptr && i != l) return (ENOMEM); return (0); } static int sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) { if (!req->newptr) return (0); if (req->newlen - req->newidx < l) return (EINVAL); bcopy((const char *)req->newptr + req->newidx, p, l); req->newidx += l; return (0); } int kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags) { int error = 0; struct sysctl_req req; bzero(&req, sizeof req); req.td = td; req.flags = flags; if (oldlenp) { req.oldlen = *oldlenp; } req.validlen = req.oldlen; if (old) { req.oldptr= old; } if (new != NULL) { req.newlen = newlen; req.newptr = new; } req.oldfunc = sysctl_old_kernel; req.newfunc = sysctl_new_kernel; req.lock = REQ_UNWIRED; error = sysctl_root(0, name, namelen, &req); if (req.lock == REQ_WIRED && req.validlen > 0) vsunlock(req.oldptr, req.validlen); if (error && error != ENOMEM) return (error); if (retval) { if (req.oldptr && req.oldidx > req.validlen) *retval = req.validlen; else *retval = req.oldidx; } return (error); } int kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags) { int oid[CTL_MAXNAME]; size_t oidlen, plen; int error; oid[0] = CTL_SYSCTL; oid[1] = CTL_SYSCTL_NAME2OID; oidlen = sizeof(oid); error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, strlen(name), &plen, flags); if (error) return (error); error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp, new, newlen, retval, flags); return (error); } /* * Transfer function to/from user space. */ static int sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) { size_t i, len, origidx; int error; origidx = req->oldidx; req->oldidx += l; if (req->oldptr == NULL) return (0); /* * If we have not wired the user supplied buffer and we are currently * holding locks, drop a witness warning, as it's possible that * write operations to the user page can sleep. */ if (req->lock != REQ_WIRED) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "sysctl_old_user()"); i = l; len = req->validlen; if (len <= origidx) i = 0; else { if (i > len - origidx) i = len - origidx; if (req->lock == REQ_WIRED) { error = copyout_nofault(p, (char *)req->oldptr + origidx, i); } else error = copyout(p, (char *)req->oldptr + origidx, i); if (error != 0) return (error); } if (i < l) return (ENOMEM); return (0); } static int sysctl_new_user(struct sysctl_req *req, void *p, size_t l) { int error; if (!req->newptr) return (0); if (req->newlen - req->newidx < l) return (EINVAL); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "sysctl_new_user()"); error = copyin((const char *)req->newptr + req->newidx, p, l); req->newidx += l; return (error); } /* * Wire the user space destination buffer. If set to a value greater than * zero, the len parameter limits the maximum amount of wired memory. */ int sysctl_wire_old_buffer(struct sysctl_req *req, size_t len) { int ret; size_t wiredlen; wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen; ret = 0; if (req->lock != REQ_WIRED && req->oldptr && req->oldfunc == sysctl_old_user) { if (wiredlen != 0) { ret = vslock(req->oldptr, wiredlen); if (ret != 0) { if (ret != ENOMEM) return (ret); wiredlen = 0; } } req->lock = REQ_WIRED; req->validlen = wiredlen; } return (0); } int sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, int *nindx, struct sysctl_req *req) { struct sysctl_oid_list *lsp; struct sysctl_oid *oid; struct sysctl_oid key; int indx; SYSCTL_ASSERT_LOCKED(); lsp = &sysctl__children; indx = 0; while (indx < CTL_MAXNAME) { key.oid_number = name[indx]; oid = RB_FIND(sysctl_oid_list, lsp, &key); if (oid == NULL) return (ENOENT); indx++; if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { if (oid->oid_handler != NULL || indx == namelen) { *noid = oid; if (nindx != NULL) *nindx = indx; KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, ("%s found DYING node %p", __func__, oid)); return (0); } lsp = SYSCTL_CHILDREN(oid); } else if (indx == namelen) { if ((oid->oid_kind & CTLFLAG_DORMANT) != 0) return (ENOENT); *noid = oid; if (nindx != NULL) *nindx = indx; KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, ("%s found DYING node %p", __func__, oid)); return (0); } else { return (ENOTDIR); } } return (ENOENT); } /* * Traverse our tree, and find the right node, execute whatever it points * to, and return the resulting error code. */ static int sysctl_root(SYSCTL_HANDLER_ARGS) { struct sysctl_oid *oid; struct rm_priotracker tracker; int error, indx, lvl; SYSCTL_RLOCK(&tracker); error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); if (error) goto out; if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { /* * You can't call a sysctl when it's a node, but has * no handler. Inform the user that it's a node. * The indx may or may not be the same as namelen. */ if (oid->oid_handler == NULL) { error = EISDIR; goto out; } } /* Is this sysctl writable? */ if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) { error = EPERM; goto out; } KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); #ifdef CAPABILITY_MODE /* * If the process is in capability mode, then don't permit reading or * writing unless specifically granted for the node. */ if (IN_CAPABILITY_MODE(req->td)) { if ((req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) || (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))) { error = EPERM; goto out; } } #endif /* Is this sysctl sensitive to securelevels? */ if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) { lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE; error = securelevel_gt(req->td->td_ucred, lvl); if (error) goto out; } /* Is this sysctl writable by only privileged users? */ if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) { int priv; if (oid->oid_kind & CTLFLAG_PRISON) priv = PRIV_SYSCTL_WRITEJAIL; #ifdef VIMAGE else if ((oid->oid_kind & CTLFLAG_VNET) && prison_owns_vnet(req->td->td_ucred)) priv = PRIV_SYSCTL_WRITEJAIL; #endif else priv = PRIV_SYSCTL_WRITE; error = priv_check(req->td, priv); if (error) goto out; } if (!oid->oid_handler) { error = EINVAL; goto out; } if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { arg1 = (int *)arg1 + indx; arg2 -= indx; } else { arg1 = oid->oid_arg1; arg2 = oid->oid_arg2; } #ifdef MAC error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2, req); if (error != 0) goto out; #endif #ifdef VIMAGE if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL) arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); #endif error = sysctl_root_handler_locked(oid, arg1, arg2, req, &tracker); out: SYSCTL_RUNLOCK(&tracker); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __sysctl_args { int *name; u_int namelen; void *old; size_t *oldlenp; void *new; size_t newlen; }; #endif int sys___sysctl(struct thread *td, struct __sysctl_args *uap) { int error, i, name[CTL_MAXNAME]; size_t j; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, &name, uap->namelen * sizeof(int)); if (error) return (error); error = userland_sysctl(td, name, uap->namelen, uap->old, uap->oldlenp, 0, uap->new, uap->newlen, &j, 0); if (error && error != ENOMEM) return (error); if (uap->oldlenp) { i = copyout(&j, uap->oldlenp, sizeof(j)); if (i) return (i); } return (error); } int kern___sysctlbyname(struct thread *td, const char *oname, size_t namelen, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags, bool inkernel) { int oid[CTL_MAXNAME]; char namebuf[16]; char *name; size_t oidlen; int error; if (namelen > MAXPATHLEN || namelen == 0) return (EINVAL); name = namebuf; if (namelen > sizeof(namebuf)) name = malloc(namelen, M_SYSCTL, M_WAITOK); error = copyin(oname, name, namelen); if (error != 0) goto out; oid[0] = CTL_SYSCTL; oid[1] = CTL_SYSCTL_NAME2OID; oidlen = sizeof(oid); error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, namelen, retval, flags); if (error != 0) goto out; error = userland_sysctl(td, oid, *retval / sizeof(int), old, oldlenp, inkernel, new, newlen, retval, flags); out: if (namelen > sizeof(namebuf)) free(name, M_SYSCTL); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __sysctlbyname_args { const char *name; size_t namelen; void *old; size_t *oldlenp; void *new; size_t newlen; }; #endif int sys___sysctlbyname(struct thread *td, struct __sysctlbyname_args *uap) { size_t rv; int error; error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old, uap->oldlenp, uap->new, uap->newlen, &rv, 0, 0); if (error != 0) return (error); if (uap->oldlenp != NULL) error = copyout(&rv, uap->oldlenp, sizeof(rv)); return (error); } /* * This is used from various compatibility syscalls too. That's why name * must be in kernel space. */ int userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, size_t *oldlenp, int inkernel, const void *new, size_t newlen, size_t *retval, int flags) { int error = 0, memlocked; struct sysctl_req req; bzero(&req, sizeof req); req.td = td; req.flags = flags; if (oldlenp) { if (inkernel) { req.oldlen = *oldlenp; } else { error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); if (error) return (error); } } req.validlen = req.oldlen; req.oldptr = old; if (new != NULL) { req.newlen = newlen; req.newptr = new; } req.oldfunc = sysctl_old_user; req.newfunc = sysctl_new_user; req.lock = REQ_UNWIRED; #ifdef KTRACE if (KTRPOINT(curthread, KTR_SYSCTL)) ktrsysctl(name, namelen); #endif memlocked = 0; if (req.oldptr && req.oldlen > 4 * PAGE_SIZE) { memlocked = 1; sx_xlock(&sysctlmemlock); } CURVNET_SET(TD_TO_VNET(td)); for (;;) { req.oldidx = 0; req.newidx = 0; error = sysctl_root(0, name, namelen, &req); if (error != EAGAIN) break; kern_yield(PRI_USER); } CURVNET_RESTORE(); if (req.lock == REQ_WIRED && req.validlen > 0) vsunlock(req.oldptr, req.validlen); if (memlocked) sx_xunlock(&sysctlmemlock); if (error && error != ENOMEM) return (error); if (retval) { if (req.oldptr && req.oldidx > req.validlen) *retval = req.validlen; else *retval = req.oldidx; } return (error); } /* * Drain into a sysctl struct. The user buffer should be wired if a page * fault would cause issue. */ static int sbuf_sysctl_drain(void *arg, const char *data, int len) { struct sysctl_req *req = arg; int error; error = SYSCTL_OUT(req, data, len); KASSERT(error >= 0, ("Got unexpected negative value %d", error)); return (error == 0 ? len : -error); } struct sbuf * sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, struct sysctl_req *req) { /* Supply a default buffer size if none given. */ if (buf == NULL && length == 0) length = 64; s = sbuf_new(s, buf, length, SBUF_FIXEDLEN | SBUF_INCLUDENUL); sbuf_set_drain(s, sbuf_sysctl_drain, req); return (s); } #ifdef DDB /* The current OID the debugger is working with */ static struct sysctl_oid *g_ddb_oid; /* The current flags specified by the user */ static int g_ddb_sysctl_flags; /* Check to see if the last sysctl printed */ static int g_ddb_sysctl_printed; static const int ctl_sign[CTLTYPE+1] = { [CTLTYPE_INT] = 1, [CTLTYPE_LONG] = 1, [CTLTYPE_S8] = 1, [CTLTYPE_S16] = 1, [CTLTYPE_S32] = 1, [CTLTYPE_S64] = 1, }; static const int ctl_size[CTLTYPE+1] = { [CTLTYPE_INT] = sizeof(int), [CTLTYPE_UINT] = sizeof(u_int), [CTLTYPE_LONG] = sizeof(long), [CTLTYPE_ULONG] = sizeof(u_long), [CTLTYPE_S8] = sizeof(int8_t), [CTLTYPE_S16] = sizeof(int16_t), [CTLTYPE_S32] = sizeof(int32_t), [CTLTYPE_S64] = sizeof(int64_t), [CTLTYPE_U8] = sizeof(uint8_t), [CTLTYPE_U16] = sizeof(uint16_t), [CTLTYPE_U32] = sizeof(uint32_t), [CTLTYPE_U64] = sizeof(uint64_t), }; #define DB_SYSCTL_NAME_ONLY 0x001 /* Compare with -N */ #define DB_SYSCTL_VALUE_ONLY 0x002 /* Compare with -n */ #define DB_SYSCTL_OPAQUE 0x004 /* Compare with -o */ #define DB_SYSCTL_HEX 0x008 /* Compare with -x */ #define DB_SYSCTL_SAFE_ONLY 0x100 /* Only simple types */ static const char db_sysctl_modifs[] = { 'N', 'n', 'o', 'x', }; static const int db_sysctl_modif_values[] = { DB_SYSCTL_NAME_ONLY, DB_SYSCTL_VALUE_ONLY, DB_SYSCTL_OPAQUE, DB_SYSCTL_HEX, }; /* Handlers considered safe to print while recursing */ static int (* const db_safe_handlers[])(SYSCTL_HANDLER_ARGS) = { sysctl_handle_bool, sysctl_handle_8, sysctl_handle_16, sysctl_handle_32, sysctl_handle_64, sysctl_handle_int, sysctl_handle_long, sysctl_handle_string, sysctl_handle_opaque, }; /* * Use in place of sysctl_old_kernel to print sysctl values. * * Compare to the output handling in show_var from sbin/sysctl/sysctl.c */ static int sysctl_old_ddb(struct sysctl_req *req, const void *ptr, size_t len) { const u_char *val, *p; const char *sep1; size_t intlen, slen; uintmax_t umv; intmax_t mv; int sign, ctltype, hexlen, xflag, error; /* Suppress false-positive GCC uninitialized variable warnings */ mv = 0; umv = 0; slen = len; val = p = ptr; if (ptr == NULL) { error = 0; goto out; } /* We are going to print */ g_ddb_sysctl_printed = 1; xflag = g_ddb_sysctl_flags & DB_SYSCTL_HEX; ctltype = (g_ddb_oid->oid_kind & CTLTYPE); sign = ctl_sign[ctltype]; intlen = ctl_size[ctltype]; switch (ctltype) { case CTLTYPE_NODE: case CTLTYPE_STRING: db_printf("%.*s", (int) len, (const char *) p); error = 0; goto out; case CTLTYPE_INT: case CTLTYPE_UINT: case CTLTYPE_LONG: case CTLTYPE_ULONG: case CTLTYPE_S8: case CTLTYPE_S16: case CTLTYPE_S32: case CTLTYPE_S64: case CTLTYPE_U8: case CTLTYPE_U16: case CTLTYPE_U32: case CTLTYPE_U64: hexlen = 2 + (intlen * CHAR_BIT + 3) / 4; sep1 = ""; while (len >= intlen) { switch (ctltype) { case CTLTYPE_INT: case CTLTYPE_UINT: umv = *(const u_int *)p; mv = *(const int *)p; break; case CTLTYPE_LONG: case CTLTYPE_ULONG: umv = *(const u_long *)p; mv = *(const long *)p; break; case CTLTYPE_S8: case CTLTYPE_U8: umv = *(const uint8_t *)p; mv = *(const int8_t *)p; break; case CTLTYPE_S16: case CTLTYPE_U16: umv = *(const uint16_t *)p; mv = *(const int16_t *)p; break; case CTLTYPE_S32: case CTLTYPE_U32: umv = *(const uint32_t *)p; mv = *(const int32_t *)p; break; case CTLTYPE_S64: case CTLTYPE_U64: umv = *(const uint64_t *)p; mv = *(const int64_t *)p; break; } db_printf("%s", sep1); if (xflag) db_printf("%#0*jx", hexlen, umv); else if (!sign) db_printf("%ju", umv); else if (g_ddb_oid->oid_fmt[1] == 'K') { /* Kelvins are currently unsupported. */ error = EOPNOTSUPP; goto out; } else db_printf("%jd", mv); sep1 = " "; len -= intlen; p += intlen; } error = 0; goto out; case CTLTYPE_OPAQUE: /* TODO: Support struct functions. */ /* FALLTHROUGH */ default: db_printf("Format:%s Length:%zu Dump:0x", g_ddb_oid->oid_fmt, len); while (len-- && (xflag || p < val + 16)) db_printf("%02x", *p++); if (!xflag && len > 16) db_printf("..."); error = 0; goto out; } out: req->oldidx += slen; return (error); } /* * Avoid setting new sysctl values from the debugger */ static int sysctl_new_ddb(struct sysctl_req *req, void *p, size_t l) { if (!req->newptr) return (0); /* Changing sysctls from the debugger is currently unsupported */ return (EPERM); } /* * Run a sysctl handler with the DDB oldfunc and newfunc attached. * Instead of copying any output to a buffer we'll dump it right to * the console. */ static int db_sysctl(struct sysctl_oid *oidp, int *name, u_int namelen, void *old, size_t *oldlenp, size_t *retval, int flags) { struct sysctl_req req; int error; /* Setup the request */ bzero(&req, sizeof req); req.td = kdb_thread; req.oldfunc = sysctl_old_ddb; req.newfunc = sysctl_new_ddb; req.lock = REQ_UNWIRED; if (oldlenp) { req.oldlen = *oldlenp; } req.validlen = req.oldlen; if (old) { req.oldptr = old; } /* Setup our globals for sysctl_old_ddb */ g_ddb_oid = oidp; g_ddb_sysctl_flags = flags; g_ddb_sysctl_printed = 0; error = sysctl_root(0, name, namelen, &req); /* Reset globals */ g_ddb_oid = NULL; g_ddb_sysctl_flags = 0; if (retval) { if (req.oldptr && req.oldidx > req.validlen) *retval = req.validlen; else *retval = req.oldidx; } return (error); } /* * Show a sysctl's name */ static void db_show_oid_name(int *oid, size_t nlen) { struct sysctl_oid *oidp; int qoid[CTL_MAXNAME + 2]; int error; qoid[0] = CTL_SYSCTL; qoid[1] = CTL_SYSCTL_NAME; memcpy(qoid + 2, oid, nlen * sizeof(int)); error = sysctl_find_oid(qoid, nlen + 2, &oidp, NULL, NULL); if (error) db_error("sysctl name oid"); error = db_sysctl(oidp, qoid, nlen + 2, NULL, NULL, NULL, 0); if (error) db_error("sysctl name"); } /* * Check to see if an OID is safe to print from ddb. */ static bool db_oid_safe(const struct sysctl_oid *oidp) { for (unsigned int i = 0; i < nitems(db_safe_handlers); ++i) { if (oidp->oid_handler == db_safe_handlers[i]) return (true); } return (false); } /* * Show a sysctl at a specific OID * Compare to the input handling in show_var from sbin/sysctl/sysctl.c */ static int db_show_oid(struct sysctl_oid *oidp, int *oid, size_t nlen, int flags) { int error, xflag, oflag, Nflag, nflag; size_t len; xflag = flags & DB_SYSCTL_HEX; oflag = flags & DB_SYSCTL_OPAQUE; nflag = flags & DB_SYSCTL_VALUE_ONLY; Nflag = flags & DB_SYSCTL_NAME_ONLY; if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_OPAQUE && (!xflag && !oflag)) return (0); if (Nflag) { db_show_oid_name(oid, nlen); error = 0; goto out; } if (!nflag) { db_show_oid_name(oid, nlen); db_printf(": "); } if ((flags & DB_SYSCTL_SAFE_ONLY) && !db_oid_safe(oidp)) { db_printf("Skipping, unsafe to print while recursing."); error = 0; goto out; } /* Try once, and ask about the size */ len = 0; error = db_sysctl(oidp, oid, nlen, NULL, NULL, &len, flags); if (error) goto out; if (!g_ddb_sysctl_printed) /* Lie about the size */ error = db_sysctl(oidp, oid, nlen, (void *) 1, &len, NULL, flags); out: db_printf("\n"); return (error); } /* * Show all sysctls under a specific OID * Compare to sysctl_all from sbin/sysctl/sysctl.c */ static int db_show_sysctl_all(int *oid, size_t len, int flags) { struct sysctl_oid *oidp; int qoid[CTL_MAXNAME + 2], next[CTL_MAXNAME]; size_t nlen; qoid[0] = CTL_SYSCTL; qoid[1] = CTL_SYSCTL_NEXT; if (len) { nlen = len; memcpy(&qoid[2], oid, nlen * sizeof(int)); } else { nlen = 1; qoid[2] = CTL_KERN; } for (;;) { int error; size_t nextsize = sizeof(next); error = kernel_sysctl(kdb_thread, qoid, nlen + 2, next, &nextsize, NULL, 0, &nlen, 0); if (error != 0) { if (error == ENOENT) return (0); else db_error("sysctl(next)"); } nlen /= sizeof(int); if (nlen < (unsigned int)len) return (0); if (memcmp(&oid[0], &next[0], len * sizeof(int)) != 0) return (0); /* Find the OID in question */ error = sysctl_find_oid(next, nlen, &oidp, NULL, NULL); if (error) return (error); (void)db_show_oid(oidp, next, nlen, flags | DB_SYSCTL_SAFE_ONLY); if (db_pager_quit) return (0); memcpy(&qoid[2 + len], &next[len], (nlen - len) * sizeof(int)); } } /* * Show a sysctl by its user facing string */ static int db_sysctlbyname(char *name, int flags) { struct sysctl_oid *oidp; int oid[CTL_MAXNAME]; int error, nlen; error = name2oid(name, oid, &nlen, &oidp); if (error) { return (error); } if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { db_show_sysctl_all(oid, nlen, flags); } else { error = db_show_oid(oidp, oid, nlen, flags); } return (error); } static void db_sysctl_cmd_usage(void) { db_printf( " sysctl [/Nnox] \n" " \n" " The name of the sysctl to show. \n" " \n" " Show a sysctl by hooking into SYSCTL_IN and SYSCTL_OUT. \n" " This will work for most sysctls, but should not be used \n" " with sysctls that are known to malloc. \n" " \n" " While recursing any \"unsafe\" sysctls will be skipped. \n" " Call sysctl directly on the sysctl to try printing the \n" " skipped sysctl. This is unsafe and may make the ddb \n" " session unusable. \n" " \n" " Arguments: \n" " /N Display only the name of the sysctl. \n" " /n Display only the value of the sysctl. \n" " /o Display opaque values. \n" " /x Display the sysctl in hex. \n" " \n" "For example: \n" "sysctl vm.v_free_min \n" "vn.v_free_min: 12669 \n" ); } /* * Show a specific sysctl similar to sysctl (8). */ DB_COMMAND_FLAGS(sysctl, db_sysctl_cmd, CS_OWN) { char name[TOK_STRING_SIZE]; int error, i, t, flags; /* Parse the modifiers */ t = db_read_token(); if (t == tSLASH || t == tMINUS) { t = db_read_token(); if (t != tIDENT) { db_printf("Bad modifier\n"); error = EINVAL; goto out; } db_strcpy(modif, db_tok_string); } else { db_unread_token(t); modif[0] = '\0'; } flags = 0; for (i = 0; i < nitems(db_sysctl_modifs); i++) { if (strchr(modif, db_sysctl_modifs[i])) { flags |= db_sysctl_modif_values[i]; } } /* Parse the sysctl names */ t = db_read_token(); if (t != tIDENT) { db_printf("Need sysctl name\n"); error = EINVAL; goto out; } /* Copy the name into a temporary buffer */ db_strcpy(name, db_tok_string); /* Ensure there is no trailing cruft */ t = db_read_token(); if (t != tEOL) { db_printf("Unexpected sysctl argument\n"); error = EINVAL; goto out; } error = db_sysctlbyname(name, flags); if (error == ENOENT) { db_printf("unknown oid: '%s'\n", db_tok_string); goto out; } else if (error) { db_printf("%s: error: %d\n", db_tok_string, error); goto out; } out: /* Ensure we eat all of our text */ db_flush_lex(); if (error == EINVAL) { db_sysctl_cmd_usage(); } } #endif /* DDB */ diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index 5be2db9d32bf..3e9998f27baa 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -1,1989 +1,2009 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1998-2000 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_ddb.h" #include "opt_gdb.h" #include #include #include #include #include #ifdef SPARSE_MAPPING #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SPARSE_MAPPING #include #include #include #endif #include #include #include #include "linker_if.h" #define MAXSEGS 4 typedef struct elf_file { struct linker_file lf; /* Common fields */ int preloaded; /* Was file pre-loaded */ caddr_t address; /* Relocation address */ #ifdef SPARSE_MAPPING vm_object_t object; /* VM object to hold file pages */ #endif Elf_Dyn *dynamic; /* Symbol table etc. */ Elf_Hashelt nbuckets; /* DT_HASH info */ Elf_Hashelt nchains; const Elf_Hashelt *buckets; const Elf_Hashelt *chains; caddr_t hash; caddr_t strtab; /* DT_STRTAB */ int strsz; /* DT_STRSZ */ const Elf_Sym *symtab; /* DT_SYMTAB */ Elf_Addr *got; /* DT_PLTGOT */ const Elf_Rel *pltrel; /* DT_JMPREL */ int pltrelsize; /* DT_PLTRELSZ */ const Elf_Rela *pltrela; /* DT_JMPREL */ int pltrelasize; /* DT_PLTRELSZ */ const Elf_Rel *rel; /* DT_REL */ int relsize; /* DT_RELSZ */ const Elf_Rela *rela; /* DT_RELA */ int relasize; /* DT_RELASZ */ caddr_t modptr; const Elf_Sym *ddbsymtab; /* The symbol table we are using */ long ddbsymcnt; /* Number of symbols */ caddr_t ddbstrtab; /* String table */ long ddbstrcnt; /* number of bytes in string table */ caddr_t symbase; /* malloc'ed symbold base */ caddr_t strbase; /* malloc'ed string base */ caddr_t ctftab; /* CTF table */ long ctfcnt; /* number of bytes in CTF table */ caddr_t ctfoff; /* CTF offset table */ caddr_t typoff; /* Type offset table */ long typlen; /* Number of type entries. */ Elf_Addr pcpu_start; /* Pre-relocation pcpu set start. */ Elf_Addr pcpu_stop; /* Pre-relocation pcpu set stop. */ Elf_Addr pcpu_base; /* Relocated pcpu set address. */ #ifdef VIMAGE Elf_Addr vnet_start; /* Pre-relocation vnet set start. */ Elf_Addr vnet_stop; /* Pre-relocation vnet set stop. */ Elf_Addr vnet_base; /* Relocated vnet set address. */ #endif #ifdef GDB struct link_map gdb; /* hooks for gdb */ #endif } *elf_file_t; struct elf_set { Elf_Addr es_start; Elf_Addr es_stop; Elf_Addr es_base; TAILQ_ENTRY(elf_set) es_link; }; TAILQ_HEAD(elf_set_head, elf_set); #include static int link_elf_link_common_finish(linker_file_t); static int link_elf_link_preload(linker_class_t cls, const char *, linker_file_t *); static int link_elf_link_preload_finish(linker_file_t); static int link_elf_load_file(linker_class_t, const char *, linker_file_t *); static int link_elf_lookup_symbol(linker_file_t, const char *, c_linker_sym_t *); static int link_elf_lookup_debug_symbol(linker_file_t, const char *, c_linker_sym_t *); static int link_elf_symbol_values(linker_file_t, c_linker_sym_t, linker_symval_t *); static int link_elf_debug_symbol_values(linker_file_t, c_linker_sym_t, linker_symval_t*); static int link_elf_search_symbol(linker_file_t, caddr_t, c_linker_sym_t *, long *); static void link_elf_unload_file(linker_file_t); static void link_elf_unload_preload(linker_file_t); static int link_elf_lookup_set(linker_file_t, const char *, void ***, void ***, int *); static int link_elf_each_function_name(linker_file_t, int (*)(const char *, void *), void *); static int link_elf_each_function_nameval(linker_file_t, linker_function_nameval_callback_t, void *); static void link_elf_reloc_local(linker_file_t); static long link_elf_symtab_get(linker_file_t, const Elf_Sym **); static long link_elf_strtab_get(linker_file_t, caddr_t *); +#ifdef VIMAGE +static void link_elf_propagate_vnets(linker_file_t); +#endif static int elf_lookup(linker_file_t, Elf_Size, int, Elf_Addr *); static kobj_method_t link_elf_methods[] = { KOBJMETHOD(linker_lookup_symbol, link_elf_lookup_symbol), KOBJMETHOD(linker_lookup_debug_symbol, link_elf_lookup_debug_symbol), KOBJMETHOD(linker_symbol_values, link_elf_symbol_values), KOBJMETHOD(linker_debug_symbol_values, link_elf_debug_symbol_values), KOBJMETHOD(linker_search_symbol, link_elf_search_symbol), KOBJMETHOD(linker_unload, link_elf_unload_file), KOBJMETHOD(linker_load_file, link_elf_load_file), KOBJMETHOD(linker_link_preload, link_elf_link_preload), KOBJMETHOD(linker_link_preload_finish, link_elf_link_preload_finish), KOBJMETHOD(linker_lookup_set, link_elf_lookup_set), KOBJMETHOD(linker_each_function_name, link_elf_each_function_name), KOBJMETHOD(linker_each_function_nameval, link_elf_each_function_nameval), KOBJMETHOD(linker_ctf_get, link_elf_ctf_get), KOBJMETHOD(linker_symtab_get, link_elf_symtab_get), KOBJMETHOD(linker_strtab_get, link_elf_strtab_get), +#ifdef VIMAGE + KOBJMETHOD(linker_propagate_vnets, link_elf_propagate_vnets), +#endif KOBJMETHOD_END }; static struct linker_class link_elf_class = { #if ELF_TARG_CLASS == ELFCLASS32 "elf32", #else "elf64", #endif link_elf_methods, sizeof(struct elf_file) }; static bool link_elf_leak_locals = true; SYSCTL_BOOL(_debug, OID_AUTO, link_elf_leak_locals, CTLFLAG_RWTUN, &link_elf_leak_locals, 0, "Allow local symbols to participate in global module symbol resolution"); typedef int (*elf_reloc_fn)(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup); static int parse_dynamic(elf_file_t); static int relocate_file(elf_file_t); static int relocate_file1(elf_file_t ef, elf_lookup_fn lookup, elf_reloc_fn reloc, bool ifuncs); static int link_elf_preload_parse_symbols(elf_file_t); static struct elf_set_head set_pcpu_list; #ifdef VIMAGE static struct elf_set_head set_vnet_list; #endif static void elf_set_add(struct elf_set_head *list, Elf_Addr start, Elf_Addr stop, Elf_Addr base) { struct elf_set *set, *iter; set = malloc(sizeof(*set), M_LINKER, M_WAITOK); set->es_start = start; set->es_stop = stop; set->es_base = base; TAILQ_FOREACH(iter, list, es_link) { KASSERT((set->es_start < iter->es_start && set->es_stop < iter->es_stop) || (set->es_start > iter->es_start && set->es_stop > iter->es_stop), ("linker sets intersection: to insert: 0x%jx-0x%jx; inserted: 0x%jx-0x%jx", (uintmax_t)set->es_start, (uintmax_t)set->es_stop, (uintmax_t)iter->es_start, (uintmax_t)iter->es_stop)); if (iter->es_start > set->es_start) { TAILQ_INSERT_BEFORE(iter, set, es_link); break; } } if (iter == NULL) TAILQ_INSERT_TAIL(list, set, es_link); } static int elf_set_find(struct elf_set_head *list, Elf_Addr addr, Elf_Addr *start, Elf_Addr *base) { struct elf_set *set; TAILQ_FOREACH(set, list, es_link) { if (addr < set->es_start) return (0); if (addr < set->es_stop) { *start = set->es_start; *base = set->es_base; return (1); } } return (0); } static void elf_set_delete(struct elf_set_head *list, Elf_Addr start) { struct elf_set *set; TAILQ_FOREACH(set, list, es_link) { if (start < set->es_start) break; if (start == set->es_start) { TAILQ_REMOVE(list, set, es_link); free(set, M_LINKER); return; } } KASSERT(0, ("deleting unknown linker set (start = 0x%jx)", (uintmax_t)start)); } #ifdef GDB static void r_debug_state(struct r_debug *, struct link_map *); /* * A list of loaded modules for GDB to use for loading symbols. */ struct r_debug r_debug; #define GDB_STATE(s) do { \ r_debug.r_state = s; r_debug_state(NULL, NULL); \ } while (0) /* * Function for the debugger to set a breakpoint on to gain control. */ static void r_debug_state(struct r_debug *dummy_one __unused, struct link_map *dummy_two __unused) { } static void link_elf_add_gdb(struct link_map *l) { struct link_map *prev; l->l_next = NULL; if (r_debug.r_map == NULL) { /* Add first. */ l->l_prev = NULL; r_debug.r_map = l; } else { /* Append to list. */ for (prev = r_debug.r_map; prev->l_next != NULL; prev = prev->l_next) ; l->l_prev = prev; prev->l_next = l; } } static void link_elf_delete_gdb(struct link_map *l) { if (l->l_prev == NULL) { /* Remove first. */ if ((r_debug.r_map = l->l_next) != NULL) l->l_next->l_prev = NULL; } else { /* Remove any but first. */ if ((l->l_prev->l_next = l->l_next) != NULL) l->l_next->l_prev = l->l_prev; } } #endif /* GDB */ /* * The kernel symbol table starts here. */ extern struct _dynamic _DYNAMIC; static void link_elf_error(const char *filename, const char *s) { if (filename == NULL) printf("kldload: %s\n", s); else printf("kldload: %s: %s\n", filename, s); } static void link_elf_invoke_ctors(caddr_t addr, size_t size) { void (**ctor)(void); size_t i, cnt; if (addr == NULL || size == 0) return; cnt = size / sizeof(*ctor); ctor = (void *)addr; for (i = 0; i < cnt; i++) { if (ctor[i] != NULL) (*ctor[i])(); } } /* * Actions performed after linking/loading both the preloaded kernel and any * modules; whether preloaded or dynamicly loaded. */ static int link_elf_link_common_finish(linker_file_t lf) { #ifdef GDB elf_file_t ef = (elf_file_t)lf; char *newfilename; #endif int error; /* Notify MD code that a module is being loaded. */ error = elf_cpu_load_file(lf); if (error != 0) return (error); #ifdef GDB GDB_STATE(RT_ADD); ef->gdb.l_addr = lf->address; newfilename = malloc(strlen(lf->filename) + 1, M_LINKER, M_WAITOK); strcpy(newfilename, lf->filename); ef->gdb.l_name = newfilename; ef->gdb.l_ld = ef->dynamic; link_elf_add_gdb(&ef->gdb); GDB_STATE(RT_CONSISTENT); #endif /* Invoke .ctors */ link_elf_invoke_ctors(lf->ctors_addr, lf->ctors_size); return (0); } #ifdef RELOCATABLE_KERNEL /* * __startkernel and __endkernel are symbols set up as relocation canaries. * * They are defined in locore to reference linker script symbols at the * beginning and end of the LOAD area. This has the desired side effect of * giving us variables that have relative relocations pointing at them, so * relocation of the kernel object will cause the variables to be updated * automatically by the runtime linker when we initialize. * * There are two main reasons to relocate the kernel: * 1) If the loader needed to load the kernel at an alternate load address. * 2) If the kernel is switching address spaces on machines like POWER9 * under Radix where the high bits of the effective address are used to * differentiate between hypervisor, host, guest, and problem state. */ extern vm_offset_t __startkernel, __endkernel; #endif static unsigned long kern_relbase = KERNBASE; SYSCTL_ULONG(_kern, OID_AUTO, base_address, CTLFLAG_RD, SYSCTL_NULL_ULONG_PTR, KERNBASE, "Kernel base address"); SYSCTL_ULONG(_kern, OID_AUTO, relbase_address, CTLFLAG_RD, &kern_relbase, 0, "Kernel relocated base address"); static void link_elf_init(void* arg) { Elf_Dyn *dp; Elf_Addr *ctors_addrp; Elf_Size *ctors_sizep; caddr_t modptr, baseptr, sizeptr; elf_file_t ef; const char *modname; linker_add_class(&link_elf_class); dp = (Elf_Dyn *)&_DYNAMIC; modname = NULL; modptr = preload_search_by_type("elf" __XSTRING(__ELF_WORD_SIZE) " kernel"); if (modptr == NULL) modptr = preload_search_by_type("elf kernel"); modname = (char *)preload_search_info(modptr, MODINFO_NAME); if (modname == NULL) modname = "kernel"; linker_kernel_file = linker_make_file(modname, &link_elf_class); if (linker_kernel_file == NULL) panic("%s: Can't create linker structures for kernel", __func__); ef = (elf_file_t) linker_kernel_file; ef->preloaded = 1; #ifdef RELOCATABLE_KERNEL /* Compute relative displacement */ ef->address = (caddr_t) (__startkernel - KERNBASE); #else ef->address = 0; #endif #ifdef SPARSE_MAPPING ef->object = NULL; #endif ef->dynamic = dp; if (dp != NULL) parse_dynamic(ef); #ifdef RELOCATABLE_KERNEL linker_kernel_file->address = (caddr_t)__startkernel; linker_kernel_file->size = (intptr_t)(__endkernel - __startkernel); kern_relbase = (unsigned long)__startkernel; #else linker_kernel_file->address += KERNBASE; linker_kernel_file->size = -(intptr_t)linker_kernel_file->address; #endif if (modptr != NULL) { ef->modptr = modptr; baseptr = preload_search_info(modptr, MODINFO_ADDR); if (baseptr != NULL) linker_kernel_file->address = *(caddr_t *)baseptr; sizeptr = preload_search_info(modptr, MODINFO_SIZE); if (sizeptr != NULL) linker_kernel_file->size = *(size_t *)sizeptr; ctors_addrp = (Elf_Addr *)preload_search_info(modptr, MODINFO_METADATA | MODINFOMD_CTORS_ADDR); ctors_sizep = (Elf_Size *)preload_search_info(modptr, MODINFO_METADATA | MODINFOMD_CTORS_SIZE); if (ctors_addrp != NULL && ctors_sizep != NULL) { linker_kernel_file->ctors_addr = ef->address + *ctors_addrp; linker_kernel_file->ctors_size = *ctors_sizep; } } (void)link_elf_preload_parse_symbols(ef); #ifdef GDB r_debug.r_map = NULL; r_debug.r_brk = r_debug_state; r_debug.r_state = RT_CONSISTENT; #endif (void)link_elf_link_common_finish(linker_kernel_file); linker_kernel_file->flags |= LINKER_FILE_LINKED; TAILQ_INIT(&set_pcpu_list); #ifdef VIMAGE TAILQ_INIT(&set_vnet_list); #endif } SYSINIT(link_elf, SI_SUB_KLD, SI_ORDER_THIRD, link_elf_init, NULL); static int link_elf_preload_parse_symbols(elf_file_t ef) { caddr_t pointer; caddr_t ssym, esym, base; caddr_t strtab; int strcnt; Elf_Sym *symtab; int symcnt; if (ef->modptr == NULL) return (0); pointer = preload_search_info(ef->modptr, MODINFO_METADATA | MODINFOMD_SSYM); if (pointer == NULL) return (0); ssym = *(caddr_t *)pointer; pointer = preload_search_info(ef->modptr, MODINFO_METADATA | MODINFOMD_ESYM); if (pointer == NULL) return (0); esym = *(caddr_t *)pointer; base = ssym; symcnt = *(long *)base; base += sizeof(long); symtab = (Elf_Sym *)base; base += roundup(symcnt, sizeof(long)); if (base > esym || base < ssym) { printf("Symbols are corrupt!\n"); return (EINVAL); } strcnt = *(long *)base; base += sizeof(long); strtab = base; base += roundup(strcnt, sizeof(long)); if (base > esym || base < ssym) { printf("Symbols are corrupt!\n"); return (EINVAL); } ef->ddbsymtab = symtab; ef->ddbsymcnt = symcnt / sizeof(Elf_Sym); ef->ddbstrtab = strtab; ef->ddbstrcnt = strcnt; return (0); } static int parse_dynamic(elf_file_t ef) { Elf_Dyn *dp; int plttype = DT_REL; for (dp = ef->dynamic; dp->d_tag != DT_NULL; dp++) { switch (dp->d_tag) { case DT_HASH: { /* From src/libexec/rtld-elf/rtld.c */ const Elf_Hashelt *hashtab = (const Elf_Hashelt *) (ef->address + dp->d_un.d_ptr); ef->nbuckets = hashtab[0]; ef->nchains = hashtab[1]; ef->buckets = hashtab + 2; ef->chains = ef->buckets + ef->nbuckets; break; } case DT_STRTAB: ef->strtab = (caddr_t) (ef->address + dp->d_un.d_ptr); break; case DT_STRSZ: ef->strsz = dp->d_un.d_val; break; case DT_SYMTAB: ef->symtab = (Elf_Sym*) (ef->address + dp->d_un.d_ptr); break; case DT_SYMENT: if (dp->d_un.d_val != sizeof(Elf_Sym)) return (ENOEXEC); break; case DT_PLTGOT: ef->got = (Elf_Addr *) (ef->address + dp->d_un.d_ptr); break; case DT_REL: ef->rel = (const Elf_Rel *) (ef->address + dp->d_un.d_ptr); break; case DT_RELSZ: ef->relsize = dp->d_un.d_val; break; case DT_RELENT: if (dp->d_un.d_val != sizeof(Elf_Rel)) return (ENOEXEC); break; case DT_JMPREL: ef->pltrel = (const Elf_Rel *) (ef->address + dp->d_un.d_ptr); break; case DT_PLTRELSZ: ef->pltrelsize = dp->d_un.d_val; break; case DT_RELA: ef->rela = (const Elf_Rela *) (ef->address + dp->d_un.d_ptr); break; case DT_RELASZ: ef->relasize = dp->d_un.d_val; break; case DT_RELAENT: if (dp->d_un.d_val != sizeof(Elf_Rela)) return (ENOEXEC); break; case DT_PLTREL: plttype = dp->d_un.d_val; if (plttype != DT_REL && plttype != DT_RELA) return (ENOEXEC); break; #ifdef GDB case DT_DEBUG: dp->d_un.d_ptr = (Elf_Addr)&r_debug; break; #endif } } if (plttype == DT_RELA) { ef->pltrela = (const Elf_Rela *)ef->pltrel; ef->pltrel = NULL; ef->pltrelasize = ef->pltrelsize; ef->pltrelsize = 0; } ef->ddbsymtab = ef->symtab; ef->ddbsymcnt = ef->nchains; ef->ddbstrtab = ef->strtab; ef->ddbstrcnt = ef->strsz; return elf_cpu_parse_dynamic(ef->address, ef->dynamic); } #define LS_PADDING 0x90909090 static int parse_dpcpu(elf_file_t ef) { int error, size; #if defined(__i386__) uint32_t pad; #endif ef->pcpu_start = 0; ef->pcpu_stop = 0; error = link_elf_lookup_set(&ef->lf, "pcpu", (void ***)&ef->pcpu_start, (void ***)&ef->pcpu_stop, NULL); /* Error just means there is no pcpu set to relocate. */ if (error != 0) return (0); size = (uintptr_t)ef->pcpu_stop - (uintptr_t)ef->pcpu_start; /* Empty set? */ if (size < 1) return (0); #if defined(__i386__) /* In case we do find __start/stop_set_ symbols double-check. */ if (size < 4) { uprintf("Kernel module '%s' must be recompiled with " "linker script\n", ef->lf.pathname); return (ENOEXEC); } /* Padding from linker-script correct? */ pad = *(uint32_t *)((uintptr_t)ef->pcpu_stop - sizeof(pad)); if (pad != LS_PADDING) { uprintf("Kernel module '%s' must be recompiled with " "linker script, invalid padding %#04x (%#04x)\n", ef->lf.pathname, pad, LS_PADDING); return (ENOEXEC); } /* If we only have valid padding, nothing to do. */ if (size == 4) return (0); #endif /* * Allocate space in the primary pcpu area. Copy in our * initialization from the data section and then initialize * all per-cpu storage from that. */ ef->pcpu_base = (Elf_Addr)(uintptr_t)dpcpu_alloc(size); if (ef->pcpu_base == 0) { printf("%s: pcpu module space is out of space; " "cannot allocate %d for %s\n", __func__, size, ef->lf.pathname); return (ENOSPC); } memcpy((void *)ef->pcpu_base, (void *)ef->pcpu_start, size); dpcpu_copy((void *)ef->pcpu_base, size); elf_set_add(&set_pcpu_list, ef->pcpu_start, ef->pcpu_stop, ef->pcpu_base); return (0); } #ifdef VIMAGE static int parse_vnet(elf_file_t ef) { int error, size; #if defined(__i386__) uint32_t pad; #endif ef->vnet_start = 0; ef->vnet_stop = 0; error = link_elf_lookup_set(&ef->lf, "vnet", (void ***)&ef->vnet_start, (void ***)&ef->vnet_stop, NULL); /* Error just means there is no vnet data set to relocate. */ if (error != 0) return (0); size = (uintptr_t)ef->vnet_stop - (uintptr_t)ef->vnet_start; /* Empty set? */ if (size < 1) return (0); #if defined(__i386__) /* In case we do find __start/stop_set_ symbols double-check. */ if (size < 4) { uprintf("Kernel module '%s' must be recompiled with " "linker script\n", ef->lf.pathname); return (ENOEXEC); } /* Padding from linker-script correct? */ pad = *(uint32_t *)((uintptr_t)ef->vnet_stop - sizeof(pad)); if (pad != LS_PADDING) { uprintf("Kernel module '%s' must be recompiled with " "linker script, invalid padding %#04x (%#04x)\n", ef->lf.pathname, pad, LS_PADDING); return (ENOEXEC); } /* If we only have valid padding, nothing to do. */ if (size == 4) return (0); #endif /* * Allocate space in the primary vnet area. Copy in our * initialization from the data section and then initialize * all per-vnet storage from that. */ ef->vnet_base = (Elf_Addr)(uintptr_t)vnet_data_alloc(size); if (ef->vnet_base == 0) { printf("%s: vnet module space is out of space; " "cannot allocate %d for %s\n", __func__, size, ef->lf.pathname); return (ENOSPC); } memcpy((void *)ef->vnet_base, (void *)ef->vnet_start, size); vnet_data_copy((void *)ef->vnet_base, size); elf_set_add(&set_vnet_list, ef->vnet_start, ef->vnet_stop, ef->vnet_base); return (0); } #endif #undef LS_PADDING /* * Apply the specified protection to the loadable segments of a preloaded linker * file. */ static int preload_protect(elf_file_t ef, vm_prot_t prot) { #if defined(__aarch64__) || defined(__amd64__) Elf_Ehdr *hdr; Elf_Phdr *phdr, *phlimit; vm_prot_t nprot; int error; error = 0; hdr = (Elf_Ehdr *)ef->address; phdr = (Elf_Phdr *)(ef->address + hdr->e_phoff); phlimit = phdr + hdr->e_phnum; for (; phdr < phlimit; phdr++) { if (phdr->p_type != PT_LOAD) continue; nprot = prot | VM_PROT_READ; if ((phdr->p_flags & PF_W) != 0) nprot |= VM_PROT_WRITE; if ((phdr->p_flags & PF_X) != 0) nprot |= VM_PROT_EXECUTE; error = pmap_change_prot((vm_offset_t)ef->address + phdr->p_vaddr, round_page(phdr->p_memsz), nprot); if (error != 0) break; } return (error); #else return (0); #endif } #ifdef __arm__ /* * Locate the ARM exception/unwind table info for DDB and stack(9) use by * searching for the section header that describes it. There may be no unwind * info, for example in a module containing only data. */ static void link_elf_locate_exidx(linker_file_t lf, Elf_Shdr *shdr, int nhdr) { int i; for (i = 0; i < nhdr; i++) { if (shdr[i].sh_type == SHT_ARM_EXIDX) { lf->exidx_addr = shdr[i].sh_addr + lf->address; lf->exidx_size = shdr[i].sh_size; break; } } } /* * Locate the section headers metadata in a preloaded module, then use it to * locate the exception/unwind table in the module. The size of the metadata * block is stored in a uint32 word immediately before the data itself, and a * comment in preload_search_info() says it is safe to rely on that. */ static void link_elf_locate_exidx_preload(struct linker_file *lf, caddr_t modptr) { uint32_t *modinfo; Elf_Shdr *shdr; uint32_t nhdr; modinfo = (uint32_t *)preload_search_info(modptr, MODINFO_METADATA | MODINFOMD_SHDR); if (modinfo != NULL) { shdr = (Elf_Shdr *)modinfo; nhdr = modinfo[-1] / sizeof(Elf_Shdr); link_elf_locate_exidx(lf, shdr, nhdr); } } #endif /* __arm__ */ static int link_elf_link_preload(linker_class_t cls, const char *filename, linker_file_t *result) { Elf_Addr *ctors_addrp; Elf_Size *ctors_sizep; caddr_t modptr, baseptr, sizeptr, dynptr; char *type; elf_file_t ef; linker_file_t lf; int error; vm_offset_t dp; /* Look to see if we have the file preloaded */ modptr = preload_search_by_name(filename); if (modptr == NULL) return (ENOENT); type = (char *)preload_search_info(modptr, MODINFO_TYPE); baseptr = preload_search_info(modptr, MODINFO_ADDR); sizeptr = preload_search_info(modptr, MODINFO_SIZE); dynptr = preload_search_info(modptr, MODINFO_METADATA | MODINFOMD_DYNAMIC); if (type == NULL || (strcmp(type, "elf" __XSTRING(__ELF_WORD_SIZE) " module") != 0 && strcmp(type, "elf module") != 0)) return (EFTYPE); if (baseptr == NULL || sizeptr == NULL || dynptr == NULL) return (EINVAL); lf = linker_make_file(filename, &link_elf_class); if (lf == NULL) return (ENOMEM); ef = (elf_file_t) lf; ef->preloaded = 1; ef->modptr = modptr; ef->address = *(caddr_t *)baseptr; #ifdef SPARSE_MAPPING ef->object = NULL; #endif dp = (vm_offset_t)ef->address + *(vm_offset_t *)dynptr; ef->dynamic = (Elf_Dyn *)dp; lf->address = ef->address; lf->size = *(size_t *)sizeptr; ctors_addrp = (Elf_Addr *)preload_search_info(modptr, MODINFO_METADATA | MODINFOMD_CTORS_ADDR); ctors_sizep = (Elf_Size *)preload_search_info(modptr, MODINFO_METADATA | MODINFOMD_CTORS_SIZE); if (ctors_addrp != NULL && ctors_sizep != NULL) { lf->ctors_addr = ef->address + *ctors_addrp; lf->ctors_size = *ctors_sizep; } #ifdef __arm__ link_elf_locate_exidx_preload(lf, modptr); #endif error = parse_dynamic(ef); if (error == 0) error = parse_dpcpu(ef); #ifdef VIMAGE if (error == 0) error = parse_vnet(ef); #endif if (error == 0) error = preload_protect(ef, VM_PROT_ALL); if (error != 0) { linker_file_unload(lf, LINKER_UNLOAD_FORCE); return (error); } link_elf_reloc_local(lf); *result = lf; return (0); } static int link_elf_link_preload_finish(linker_file_t lf) { elf_file_t ef; int error; ef = (elf_file_t) lf; error = relocate_file(ef); if (error == 0) error = preload_protect(ef, VM_PROT_NONE); if (error != 0) return (error); (void)link_elf_preload_parse_symbols(ef); return (link_elf_link_common_finish(lf)); } static int link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* result) { struct nameidata nd; struct thread* td = curthread; /* XXX */ Elf_Ehdr *hdr; caddr_t firstpage, segbase; int nbytes, i; Elf_Phdr *phdr; Elf_Phdr *phlimit; Elf_Phdr *segs[MAXSEGS]; int nsegs; Elf_Phdr *phdyn; caddr_t mapbase; size_t mapsize; Elf_Addr base_vaddr; Elf_Addr base_vlimit; int error = 0; ssize_t resid; int flags; elf_file_t ef; linker_file_t lf; Elf_Shdr *shdr; int symtabindex; int symstrindex; int shstrindex; int symcnt; int strcnt; char *shstrs; shdr = NULL; lf = NULL; shstrs = NULL; NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename); flags = FREAD; error = vn_open(&nd, &flags, 0, NULL); if (error != 0) return (error); NDFREE_PNBUF(&nd); if (nd.ni_vp->v_type != VREG) { error = ENOEXEC; firstpage = NULL; goto out; } #ifdef MAC error = mac_kld_check_load(curthread->td_ucred, nd.ni_vp); if (error != 0) { firstpage = NULL; goto out; } #endif /* * Read the elf header from the file. */ firstpage = malloc(PAGE_SIZE, M_LINKER, M_WAITOK); hdr = (Elf_Ehdr *)firstpage; error = vn_rdwr(UIO_READ, nd.ni_vp, firstpage, PAGE_SIZE, 0, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); nbytes = PAGE_SIZE - resid; if (error != 0) goto out; if (!IS_ELF(*hdr)) { error = ENOEXEC; goto out; } if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) { link_elf_error(filename, "Unsupported file layout"); error = ENOEXEC; goto out; } if (hdr->e_ident[EI_VERSION] != EV_CURRENT || hdr->e_version != EV_CURRENT) { link_elf_error(filename, "Unsupported file version"); error = ENOEXEC; goto out; } if (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN) { error = ENOSYS; goto out; } if (hdr->e_machine != ELF_TARG_MACH) { link_elf_error(filename, "Unsupported machine"); error = ENOEXEC; goto out; } /* * We rely on the program header being in the first page. * This is not strictly required by the ABI specification, but * it seems to always true in practice. And, it simplifies * things considerably. */ if (!((hdr->e_phentsize == sizeof(Elf_Phdr)) && (hdr->e_phoff + hdr->e_phnum*sizeof(Elf_Phdr) <= PAGE_SIZE) && (hdr->e_phoff + hdr->e_phnum*sizeof(Elf_Phdr) <= nbytes))) link_elf_error(filename, "Unreadable program headers"); /* * Scan the program header entries, and save key information. * * We rely on there being exactly two load segments, text and data, * in that order. */ phdr = (Elf_Phdr *) (firstpage + hdr->e_phoff); phlimit = phdr + hdr->e_phnum; nsegs = 0; phdyn = NULL; while (phdr < phlimit) { switch (phdr->p_type) { case PT_LOAD: if (nsegs == MAXSEGS) { link_elf_error(filename, "Too many sections"); error = ENOEXEC; goto out; } /* * XXX: We just trust they come in right order ?? */ segs[nsegs] = phdr; ++nsegs; break; case PT_DYNAMIC: phdyn = phdr; break; case PT_INTERP: error = ENOSYS; goto out; } ++phdr; } if (phdyn == NULL) { link_elf_error(filename, "Object is not dynamically-linked"); error = ENOEXEC; goto out; } if (nsegs == 0) { link_elf_error(filename, "No sections"); error = ENOEXEC; goto out; } /* * Allocate the entire address space of the object, to stake * out our contiguous region, and to establish the base * address for relocation. */ base_vaddr = trunc_page(segs[0]->p_vaddr); base_vlimit = round_page(segs[nsegs - 1]->p_vaddr + segs[nsegs - 1]->p_memsz); mapsize = base_vlimit - base_vaddr; lf = linker_make_file(filename, &link_elf_class); if (lf == NULL) { error = ENOMEM; goto out; } ef = (elf_file_t) lf; #ifdef SPARSE_MAPPING ef->object = vm_pager_allocate(OBJT_PHYS, NULL, mapsize, VM_PROT_ALL, 0, thread0.td_ucred); if (ef->object == NULL) { error = ENOMEM; goto out; } #ifdef __amd64__ mapbase = (caddr_t)KERNBASE; #else mapbase = (caddr_t)vm_map_min(kernel_map); #endif /* * Mapping protections are downgraded after relocation processing. */ error = vm_map_find(kernel_map, ef->object, 0, (vm_offset_t *)&mapbase, mapsize, 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error != 0) { vm_object_deallocate(ef->object); ef->object = NULL; goto out; } #else mapbase = malloc_exec(mapsize, M_LINKER, M_WAITOK); #endif ef->address = mapbase; /* * Read the text and data sections and zero the bss. */ for (i = 0; i < nsegs; i++) { segbase = mapbase + segs[i]->p_vaddr - base_vaddr; #ifdef SPARSE_MAPPING /* * Consecutive segments may have different mapping permissions, * so be strict and verify that their mappings do not overlap. */ if (((vm_offset_t)segbase & PAGE_MASK) != 0) { error = EINVAL; goto out; } error = vm_map_wire(kernel_map, (vm_offset_t)segbase, (vm_offset_t)segbase + round_page(segs[i]->p_memsz), VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { error = ENOMEM; goto out; } #endif error = vn_rdwr(UIO_READ, nd.ni_vp, segbase, segs[i]->p_filesz, segs[i]->p_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error != 0) goto out; bzero(segbase + segs[i]->p_filesz, segs[i]->p_memsz - segs[i]->p_filesz); } ef->dynamic = (Elf_Dyn *) (mapbase + phdyn->p_vaddr - base_vaddr); lf->address = ef->address; lf->size = mapsize; error = parse_dynamic(ef); if (error != 0) goto out; error = parse_dpcpu(ef); if (error != 0) goto out; #ifdef VIMAGE error = parse_vnet(ef); if (error != 0) goto out; #endif link_elf_reloc_local(lf); VOP_UNLOCK(nd.ni_vp); error = linker_load_dependencies(lf); vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) goto out; error = relocate_file(ef); if (error != 0) goto out; #ifdef SPARSE_MAPPING /* * Downgrade permissions on text segment mappings now that relocation * processing is complete. Restrict permissions on read-only segments. */ for (i = 0; i < nsegs; i++) { vm_prot_t prot; if (segs[i]->p_type != PT_LOAD) continue; prot = VM_PROT_READ; if ((segs[i]->p_flags & PF_W) != 0) prot |= VM_PROT_WRITE; if ((segs[i]->p_flags & PF_X) != 0) prot |= VM_PROT_EXECUTE; segbase = mapbase + segs[i]->p_vaddr - base_vaddr; error = vm_map_protect(kernel_map, (vm_offset_t)segbase, (vm_offset_t)segbase + round_page(segs[i]->p_memsz), prot, 0, VM_MAP_PROTECT_SET_PROT); if (error != KERN_SUCCESS) { error = ENOMEM; goto out; } } #endif /* * Try and load the symbol table if it's present. (you can * strip it!) */ nbytes = hdr->e_shnum * hdr->e_shentsize; if (nbytes == 0 || hdr->e_shoff == 0) goto nosyms; shdr = malloc(nbytes, M_LINKER, M_WAITOK | M_ZERO); error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)shdr, nbytes, hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error != 0) goto out; /* Read section string table */ shstrindex = hdr->e_shstrndx; if (shstrindex != 0 && shdr[shstrindex].sh_type == SHT_STRTAB && shdr[shstrindex].sh_size != 0) { nbytes = shdr[shstrindex].sh_size; shstrs = malloc(nbytes, M_LINKER, M_WAITOK | M_ZERO); error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)shstrs, nbytes, shdr[shstrindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) goto out; } symtabindex = -1; symstrindex = -1; for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_type == SHT_SYMTAB) { symtabindex = i; symstrindex = shdr[i].sh_link; } else if (shstrs != NULL && shdr[i].sh_name != 0 && strcmp(shstrs + shdr[i].sh_name, ".ctors") == 0) { /* Record relocated address and size of .ctors. */ lf->ctors_addr = mapbase + shdr[i].sh_addr - base_vaddr; lf->ctors_size = shdr[i].sh_size; } } if (symtabindex < 0 || symstrindex < 0) goto nosyms; symcnt = shdr[symtabindex].sh_size; ef->symbase = malloc(symcnt, M_LINKER, M_WAITOK); strcnt = shdr[symstrindex].sh_size; ef->strbase = malloc(strcnt, M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, nd.ni_vp, ef->symbase, symcnt, shdr[symtabindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error != 0) goto out; error = vn_rdwr(UIO_READ, nd.ni_vp, ef->strbase, strcnt, shdr[symstrindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error != 0) goto out; ef->ddbsymcnt = symcnt / sizeof(Elf_Sym); ef->ddbsymtab = (const Elf_Sym *)ef->symbase; ef->ddbstrcnt = strcnt; ef->ddbstrtab = ef->strbase; nosyms: #ifdef __arm__ link_elf_locate_exidx(lf, shdr, hdr->e_shnum); #endif error = link_elf_link_common_finish(lf); if (error != 0) goto out; *result = lf; out: VOP_UNLOCK(nd.ni_vp); vn_close(nd.ni_vp, FREAD, td->td_ucred, td); if (error != 0 && lf != NULL) linker_file_unload(lf, LINKER_UNLOAD_FORCE); free(shdr, M_LINKER); free(firstpage, M_LINKER); free(shstrs, M_LINKER); return (error); } Elf_Addr elf_relocaddr(linker_file_t lf, Elf_Addr x) { elf_file_t ef; KASSERT(lf->ops->cls == (kobj_class_t)&link_elf_class, ("elf_relocaddr: unexpected linker file %p", lf)); ef = (elf_file_t)lf; if (x >= ef->pcpu_start && x < ef->pcpu_stop) return ((x - ef->pcpu_start) + ef->pcpu_base); #ifdef VIMAGE if (x >= ef->vnet_start && x < ef->vnet_stop) return ((x - ef->vnet_start) + ef->vnet_base); #endif return (x); } static void link_elf_unload_file(linker_file_t file) { elf_file_t ef = (elf_file_t) file; if (ef->pcpu_base != 0) { dpcpu_free((void *)ef->pcpu_base, ef->pcpu_stop - ef->pcpu_start); elf_set_delete(&set_pcpu_list, ef->pcpu_start); } #ifdef VIMAGE if (ef->vnet_base != 0) { vnet_data_free((void *)ef->vnet_base, ef->vnet_stop - ef->vnet_start); elf_set_delete(&set_vnet_list, ef->vnet_start); } #endif #ifdef GDB if (ef->gdb.l_ld != NULL) { GDB_STATE(RT_DELETE); free((void *)(uintptr_t)ef->gdb.l_name, M_LINKER); link_elf_delete_gdb(&ef->gdb); GDB_STATE(RT_CONSISTENT); } #endif /* Notify MD code that a module is being unloaded. */ elf_cpu_unload_file(file); if (ef->preloaded) { link_elf_unload_preload(file); return; } #ifdef SPARSE_MAPPING if (ef->object != NULL) { vm_map_remove(kernel_map, (vm_offset_t) ef->address, (vm_offset_t) ef->address + (ef->object->size << PAGE_SHIFT)); } #else free(ef->address, M_LINKER); #endif free(ef->symbase, M_LINKER); free(ef->strbase, M_LINKER); free(ef->ctftab, M_LINKER); free(ef->ctfoff, M_LINKER); free(ef->typoff, M_LINKER); } static void link_elf_unload_preload(linker_file_t file) { if (file->pathname != NULL) preload_delete_name(file->pathname); } static const char * symbol_name(elf_file_t ef, Elf_Size r_info) { const Elf_Sym *ref; if (ELF_R_SYM(r_info)) { ref = ef->symtab + ELF_R_SYM(r_info); return (ef->strtab + ref->st_name); } return (NULL); } static int symbol_type(elf_file_t ef, Elf_Size r_info) { const Elf_Sym *ref; if (ELF_R_SYM(r_info)) { ref = ef->symtab + ELF_R_SYM(r_info); return (ELF_ST_TYPE(ref->st_info)); } return (STT_NOTYPE); } static int relocate_file1(elf_file_t ef, elf_lookup_fn lookup, elf_reloc_fn reloc, bool ifuncs) { const Elf_Rel *rel; const Elf_Rela *rela; const char *symname; TSENTER(); #define APPLY_RELOCS(iter, tbl, tblsize, type) do { \ for ((iter) = (tbl); (iter) != NULL && \ (iter) < (tbl) + (tblsize) / sizeof(*(iter)); (iter)++) { \ if ((symbol_type(ef, (iter)->r_info) == \ STT_GNU_IFUNC || \ elf_is_ifunc_reloc((iter)->r_info)) != ifuncs) \ continue; \ if (reloc(&ef->lf, (Elf_Addr)ef->address, \ (iter), (type), lookup)) { \ symname = symbol_name(ef, (iter)->r_info); \ printf("link_elf: symbol %s undefined\n", \ symname); \ return (ENOENT); \ } \ } \ } while (0) APPLY_RELOCS(rel, ef->rel, ef->relsize, ELF_RELOC_REL); TSENTER2("ef->rela"); APPLY_RELOCS(rela, ef->rela, ef->relasize, ELF_RELOC_RELA); TSEXIT2("ef->rela"); APPLY_RELOCS(rel, ef->pltrel, ef->pltrelsize, ELF_RELOC_REL); APPLY_RELOCS(rela, ef->pltrela, ef->pltrelasize, ELF_RELOC_RELA); #undef APPLY_RELOCS TSEXIT(); return (0); } static int relocate_file(elf_file_t ef) { int error; error = relocate_file1(ef, elf_lookup, elf_reloc, false); if (error == 0) error = relocate_file1(ef, elf_lookup, elf_reloc, true); return (error); } /* * SysV hash function for symbol table lookup. It is specified by the * System V ABI. */ static Elf32_Word elf_hash(const char *name) { const unsigned char *p = (const unsigned char *)name; Elf32_Word h = 0; while (*p != '\0') { h = (h << 4) + *p++; h ^= (h >> 24) & 0xf0; } return (h & 0x0fffffff); } static int link_elf_lookup_symbol1(linker_file_t lf, const char *name, c_linker_sym_t *sym, bool see_local) { elf_file_t ef = (elf_file_t) lf; unsigned long symnum; const Elf_Sym* symp; const char *strp; Elf32_Word hash; /* If we don't have a hash, bail. */ if (ef->buckets == NULL || ef->nbuckets == 0) { printf("link_elf_lookup_symbol: missing symbol hash table\n"); return (ENOENT); } /* First, search hashed global symbols */ hash = elf_hash(name); symnum = ef->buckets[hash % ef->nbuckets]; while (symnum != STN_UNDEF) { if (symnum >= ef->nchains) { printf("%s: corrupt symbol table\n", __func__); return (ENOENT); } symp = ef->symtab + symnum; if (symp->st_name == 0) { printf("%s: corrupt symbol table\n", __func__); return (ENOENT); } strp = ef->strtab + symp->st_name; if (strcmp(name, strp) == 0) { if (symp->st_shndx != SHN_UNDEF || (symp->st_value != 0 && (ELF_ST_TYPE(symp->st_info) == STT_FUNC || ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC))) { if (see_local || ELF_ST_BIND(symp->st_info) != STB_LOCAL) { *sym = (c_linker_sym_t) symp; return (0); } } return (ENOENT); } symnum = ef->chains[symnum]; } return (ENOENT); } static int link_elf_lookup_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym) { if (link_elf_leak_locals) return (link_elf_lookup_debug_symbol(lf, name, sym)); return (link_elf_lookup_symbol1(lf, name, sym, false)); } static int link_elf_lookup_debug_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym) { elf_file_t ef = (elf_file_t)lf; const Elf_Sym* symp; const char *strp; int i; if (link_elf_lookup_symbol1(lf, name, sym, true) == 0) return (0); for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { strp = ef->ddbstrtab + symp->st_name; if (strcmp(name, strp) == 0) { if (symp->st_shndx != SHN_UNDEF || (symp->st_value != 0 && (ELF_ST_TYPE(symp->st_info) == STT_FUNC || ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC))) { *sym = (c_linker_sym_t) symp; return (0); } return (ENOENT); } } return (ENOENT); } static int link_elf_symbol_values1(linker_file_t lf, c_linker_sym_t sym, linker_symval_t *symval, bool see_local) { elf_file_t ef; const Elf_Sym *es; caddr_t val; ef = (elf_file_t)lf; es = (const Elf_Sym *)sym; if (es >= ef->symtab && es < ef->symtab + ef->nchains) { if (!see_local && ELF_ST_BIND(es->st_info) == STB_LOCAL) return (ENOENT); symval->name = ef->strtab + es->st_name; val = (caddr_t)ef->address + es->st_value; if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC) val = ((caddr_t (*)(void))val)(); symval->value = val; symval->size = es->st_size; return (0); } return (ENOENT); } static int link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym, linker_symval_t *symval) { if (link_elf_leak_locals) return (link_elf_debug_symbol_values(lf, sym, symval)); return (link_elf_symbol_values1(lf, sym, symval, false)); } static int link_elf_debug_symbol_values(linker_file_t lf, c_linker_sym_t sym, linker_symval_t *symval) { elf_file_t ef = (elf_file_t)lf; const Elf_Sym *es = (const Elf_Sym *)sym; caddr_t val; if (link_elf_symbol_values1(lf, sym, symval, true) == 0) return (0); if (ef->symtab == ef->ddbsymtab) return (ENOENT); if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) { symval->name = ef->ddbstrtab + es->st_name; val = (caddr_t)ef->address + es->st_value; if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC) val = ((caddr_t (*)(void))val)(); symval->value = val; symval->size = es->st_size; return (0); } return (ENOENT); } static int link_elf_search_symbol(linker_file_t lf, caddr_t value, c_linker_sym_t *sym, long *diffp) { elf_file_t ef = (elf_file_t)lf; u_long off = (uintptr_t)(void *)value; u_long diff = off; u_long st_value; const Elf_Sym *es; const Elf_Sym *best = NULL; int i; for (i = 0, es = ef->ddbsymtab; i < ef->ddbsymcnt; i++, es++) { if (es->st_name == 0) continue; st_value = es->st_value + (uintptr_t) (void *) ef->address; if (off >= st_value) { if (off - st_value < diff) { diff = off - st_value; best = es; if (diff == 0) break; } else if (off - st_value == diff) { best = es; } } } if (best == NULL) *diffp = off; else *diffp = diff; *sym = (c_linker_sym_t) best; return (0); } /* * Look up a linker set on an ELF system. */ static int link_elf_lookup_set(linker_file_t lf, const char *name, void ***startp, void ***stopp, int *countp) { c_linker_sym_t sym; linker_symval_t symval; char *setsym; void **start, **stop; int len, error = 0, count; len = strlen(name) + sizeof("__start_set_"); /* sizeof includes \0 */ setsym = malloc(len, M_LINKER, M_WAITOK); /* get address of first entry */ snprintf(setsym, len, "%s%s", "__start_set_", name); error = link_elf_lookup_symbol(lf, setsym, &sym); if (error != 0) goto out; link_elf_symbol_values(lf, sym, &symval); if (symval.value == 0) { error = ESRCH; goto out; } start = (void **)symval.value; /* get address of last entry */ snprintf(setsym, len, "%s%s", "__stop_set_", name); error = link_elf_lookup_symbol(lf, setsym, &sym); if (error != 0) goto out; link_elf_symbol_values(lf, sym, &symval); if (symval.value == 0) { error = ESRCH; goto out; } stop = (void **)symval.value; /* and the number of entries */ count = stop - start; /* and copy out */ if (startp != NULL) *startp = start; if (stopp != NULL) *stopp = stop; if (countp != NULL) *countp = count; out: free(setsym, M_LINKER); return (error); } static int link_elf_each_function_name(linker_file_t file, int (*callback)(const char *, void *), void *opaque) { elf_file_t ef = (elf_file_t)file; const Elf_Sym *symp; int i, error; /* Exhaustive search */ for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { if (symp->st_value != 0 && (ELF_ST_TYPE(symp->st_info) == STT_FUNC || ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) { error = callback(ef->ddbstrtab + symp->st_name, opaque); if (error != 0) return (error); } } return (0); } static int link_elf_each_function_nameval(linker_file_t file, linker_function_nameval_callback_t callback, void *opaque) { linker_symval_t symval; elf_file_t ef = (elf_file_t)file; const Elf_Sym *symp; int i, error; /* Exhaustive search */ for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { if (symp->st_value != 0 && (ELF_ST_TYPE(symp->st_info) == STT_FUNC || ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) { error = link_elf_debug_symbol_values(file, (c_linker_sym_t) symp, &symval); if (error == 0) error = callback(file, i, &symval, opaque); if (error != 0) return (error); } } return (0); } const Elf_Sym * elf_get_sym(linker_file_t lf, Elf_Size symidx) { elf_file_t ef = (elf_file_t)lf; if (symidx >= ef->nchains) return (NULL); return (ef->symtab + symidx); } const char * elf_get_symname(linker_file_t lf, Elf_Size symidx) { elf_file_t ef = (elf_file_t)lf; const Elf_Sym *sym; if (symidx >= ef->nchains) return (NULL); sym = ef->symtab + symidx; return (ef->strtab + sym->st_name); } /* * Symbol lookup function that can be used when the symbol index is known (ie * in relocations). It uses the symbol index instead of doing a fully fledged * hash table based lookup when such is valid. For example for local symbols. * This is not only more efficient, it's also more correct. It's not always * the case that the symbol can be found through the hash table. */ static int elf_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res) { elf_file_t ef = (elf_file_t)lf; const Elf_Sym *sym; const char *symbol; Elf_Addr addr, start, base; /* Don't even try to lookup the symbol if the index is bogus. */ if (symidx >= ef->nchains) { *res = 0; return (EINVAL); } sym = ef->symtab + symidx; /* * Don't do a full lookup when the symbol is local. It may even * fail because it may not be found through the hash table. */ if (ELF_ST_BIND(sym->st_info) == STB_LOCAL) { /* Force lookup failure when we have an insanity. */ if (sym->st_shndx == SHN_UNDEF || sym->st_value == 0) { *res = 0; return (EINVAL); } *res = ((Elf_Addr)ef->address + sym->st_value); return (0); } /* * XXX we can avoid doing a hash table based lookup for global * symbols as well. This however is not always valid, so we'll * just do it the hard way for now. Performance tweaks can * always be added. */ symbol = ef->strtab + sym->st_name; /* Force a lookup failure if the symbol name is bogus. */ if (*symbol == 0) { *res = 0; return (EINVAL); } addr = ((Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps)); if (addr == 0 && ELF_ST_BIND(sym->st_info) != STB_WEAK) { *res = 0; return (EINVAL); } if (elf_set_find(&set_pcpu_list, addr, &start, &base)) addr = addr - start + base; #ifdef VIMAGE else if (elf_set_find(&set_vnet_list, addr, &start, &base)) addr = addr - start + base; #endif *res = addr; return (0); } static void link_elf_reloc_local(linker_file_t lf) { const Elf_Rel *rellim; const Elf_Rel *rel; const Elf_Rela *relalim; const Elf_Rela *rela; elf_file_t ef = (elf_file_t)lf; /* Perform relocations without addend if there are any: */ if ((rel = ef->rel) != NULL) { rellim = (const Elf_Rel *)((const char *)ef->rel + ef->relsize); while (rel < rellim) { elf_reloc_local(lf, (Elf_Addr)ef->address, rel, ELF_RELOC_REL, elf_lookup); rel++; } } /* Perform relocations with addend if there are any: */ if ((rela = ef->rela) != NULL) { relalim = (const Elf_Rela *) ((const char *)ef->rela + ef->relasize); while (rela < relalim) { elf_reloc_local(lf, (Elf_Addr)ef->address, rela, ELF_RELOC_RELA, elf_lookup); rela++; } } } static long link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab) { elf_file_t ef = (elf_file_t)lf; *symtab = ef->ddbsymtab; if (*symtab == NULL) return (0); return (ef->ddbsymcnt); } static long link_elf_strtab_get(linker_file_t lf, caddr_t *strtab) { elf_file_t ef = (elf_file_t)lf; *strtab = ef->ddbstrtab; if (*strtab == NULL) return (0); return (ef->ddbstrcnt); } +#ifdef VIMAGE +static void +link_elf_propagate_vnets(linker_file_t lf) +{ + elf_file_t ef = (elf_file_t)lf; + int size; + + if (ef->vnet_base != 0) { + size = (uintptr_t)ef->vnet_stop - (uintptr_t)ef->vnet_start; + vnet_data_copy((void *)ef->vnet_base, size); + } +} +#endif + #if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) || defined(__powerpc__) /* * Use this lookup routine when performing relocations early during boot. * The generic lookup routine depends on kobj, which is not initialized * at that point. */ static int elf_lookup_ifunc(linker_file_t lf, Elf_Size symidx, int deps __unused, Elf_Addr *res) { elf_file_t ef; const Elf_Sym *symp; caddr_t val; ef = (elf_file_t)lf; symp = ef->symtab + symidx; if (ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC) { val = (caddr_t)ef->address + symp->st_value; *res = ((Elf_Addr (*)(void))val)(); return (0); } return (ENOENT); } void link_elf_ireloc(caddr_t kmdp) { struct elf_file eff; elf_file_t ef; TSENTER(); ef = &eff; bzero_early(ef, sizeof(*ef)); ef->modptr = kmdp; ef->dynamic = (Elf_Dyn *)&_DYNAMIC; #ifdef RELOCATABLE_KERNEL ef->address = (caddr_t) (__startkernel - KERNBASE); #else ef->address = 0; #endif parse_dynamic(ef); link_elf_preload_parse_symbols(ef); relocate_file1(ef, elf_lookup_ifunc, elf_reloc, true); TSEXIT(); } #if defined(__aarch64__) || defined(__amd64__) void link_elf_late_ireloc(void) { elf_file_t ef; KASSERT(linker_kernel_file != NULL, ("link_elf_late_ireloc: No kernel linker file found")); ef = (elf_file_t)linker_kernel_file; relocate_file1(ef, elf_lookup_ifunc, elf_reloc_late, true); } #endif #endif diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index b853ca284f7d..768808d2102e 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -1,1861 +1,1889 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1998-2000 Doug Rabson * Copyright (c) 2004 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB_CTF #include #endif #include "linker_if.h" typedef struct { void *addr; Elf_Off size; int flags; /* Section flags. */ int sec; /* Original section number. */ char *name; } Elf_progent; typedef struct { Elf_Rel *rel; int nrel; int sec; } Elf_relent; typedef struct { Elf_Rela *rela; int nrela; int sec; } Elf_relaent; typedef struct elf_file { struct linker_file lf; /* Common fields */ int preloaded; caddr_t address; /* Relocation address */ vm_object_t object; /* VM object to hold file pages */ Elf_Shdr *e_shdr; Elf_progent *progtab; u_int nprogtab; Elf_relaent *relatab; u_int nrelatab; Elf_relent *reltab; int nreltab; Elf_Sym *ddbsymtab; /* The symbol table we are using */ long ddbsymcnt; /* Number of symbols */ caddr_t ddbstrtab; /* String table */ long ddbstrcnt; /* number of bytes in string table */ caddr_t shstrtab; /* Section name string table */ long shstrcnt; /* number of bytes in string table */ caddr_t ctftab; /* CTF table */ long ctfcnt; /* number of bytes in CTF table */ caddr_t ctfoff; /* CTF offset table */ caddr_t typoff; /* Type offset table */ long typlen; /* Number of type entries. */ } *elf_file_t; #include static int link_elf_link_preload(linker_class_t cls, const char *, linker_file_t *); static int link_elf_link_preload_finish(linker_file_t); static int link_elf_load_file(linker_class_t, const char *, linker_file_t *); static int link_elf_lookup_symbol(linker_file_t, const char *, c_linker_sym_t *); static int link_elf_lookup_debug_symbol(linker_file_t, const char *, c_linker_sym_t *); static int link_elf_symbol_values(linker_file_t, c_linker_sym_t, linker_symval_t *); static int link_elf_debug_symbol_values(linker_file_t, c_linker_sym_t, linker_symval_t *); static int link_elf_search_symbol(linker_file_t, caddr_t value, c_linker_sym_t *sym, long *diffp); static void link_elf_unload_file(linker_file_t); static int link_elf_lookup_set(linker_file_t, const char *, void ***, void ***, int *); static int link_elf_each_function_name(linker_file_t, int (*)(const char *, void *), void *); static int link_elf_each_function_nameval(linker_file_t, linker_function_nameval_callback_t, void *); static int link_elf_reloc_local(linker_file_t, bool); static long link_elf_symtab_get(linker_file_t, const Elf_Sym **); static long link_elf_strtab_get(linker_file_t, caddr_t *); +#ifdef VIMAGE +static void link_elf_propagate_vnets(linker_file_t); +#endif static int elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *); static kobj_method_t link_elf_methods[] = { KOBJMETHOD(linker_lookup_symbol, link_elf_lookup_symbol), KOBJMETHOD(linker_lookup_debug_symbol, link_elf_lookup_debug_symbol), KOBJMETHOD(linker_symbol_values, link_elf_symbol_values), KOBJMETHOD(linker_debug_symbol_values, link_elf_debug_symbol_values), KOBJMETHOD(linker_search_symbol, link_elf_search_symbol), KOBJMETHOD(linker_unload, link_elf_unload_file), KOBJMETHOD(linker_load_file, link_elf_load_file), KOBJMETHOD(linker_link_preload, link_elf_link_preload), KOBJMETHOD(linker_link_preload_finish, link_elf_link_preload_finish), KOBJMETHOD(linker_lookup_set, link_elf_lookup_set), KOBJMETHOD(linker_each_function_name, link_elf_each_function_name), KOBJMETHOD(linker_each_function_nameval, link_elf_each_function_nameval), KOBJMETHOD(linker_ctf_get, link_elf_ctf_get), KOBJMETHOD(linker_symtab_get, link_elf_symtab_get), KOBJMETHOD(linker_strtab_get, link_elf_strtab_get), +#ifdef VIMAGE + KOBJMETHOD(linker_propagate_vnets, link_elf_propagate_vnets), +#endif KOBJMETHOD_END }; static struct linker_class link_elf_class = { #if ELF_TARG_CLASS == ELFCLASS32 "elf32_obj", #else "elf64_obj", #endif link_elf_methods, sizeof(struct elf_file) }; static bool link_elf_obj_leak_locals = true; SYSCTL_BOOL(_debug, OID_AUTO, link_elf_obj_leak_locals, CTLFLAG_RWTUN, &link_elf_obj_leak_locals, 0, "Allow local symbols to participate in global module symbol resolution"); static int relocate_file(elf_file_t ef); static void elf_obj_cleanup_globals_cache(elf_file_t); static void link_elf_error(const char *filename, const char *s) { if (filename == NULL) printf("kldload: %s\n", s); else printf("kldload: %s: %s\n", filename, s); } static void link_elf_init(void *arg) { linker_add_class(&link_elf_class); } SYSINIT(link_elf_obj, SI_SUB_KLD, SI_ORDER_SECOND, link_elf_init, NULL); static void link_elf_protect_range(elf_file_t ef, vm_offset_t start, vm_offset_t end, vm_prot_t prot) { int error __unused; KASSERT(start <= end && start >= (vm_offset_t)ef->address && end <= round_page((vm_offset_t)ef->address + ef->lf.size), ("link_elf_protect_range: invalid range %#jx-%#jx", (uintmax_t)start, (uintmax_t)end)); if (start == end) return; if (ef->preloaded) { #ifdef __amd64__ error = pmap_change_prot(start, end - start, prot); KASSERT(error == 0, ("link_elf_protect_range: pmap_change_prot() returned %d", error)); #endif return; } error = vm_map_protect(kernel_map, start, end, prot, 0, VM_MAP_PROTECT_SET_PROT); KASSERT(error == KERN_SUCCESS, ("link_elf_protect_range: vm_map_protect() returned %d", error)); } /* * Restrict permissions on linker file memory based on section flags. * Sections need not be page-aligned, so overlap within a page is possible. */ static void link_elf_protect(elf_file_t ef) { vm_offset_t end, segend, segstart, start; vm_prot_t gapprot, prot, segprot; int i; /* * If the file was preloaded, the last page may contain other preloaded * data which may need to be writeable. ELF files are always * page-aligned, but other preloaded data, such as entropy or CPU * microcode may be loaded with a smaller alignment. */ gapprot = ef->preloaded ? VM_PROT_RW : VM_PROT_READ; start = end = (vm_offset_t)ef->address; prot = VM_PROT_READ; for (i = 0; i < ef->nprogtab; i++) { /* * VNET and DPCPU sections have their memory allocated by their * respective subsystems. */ if (ef->progtab[i].name != NULL && ( #ifdef VIMAGE strcmp(ef->progtab[i].name, VNET_SETNAME) == 0 || #endif strcmp(ef->progtab[i].name, DPCPU_SETNAME) == 0)) continue; segstart = trunc_page((vm_offset_t)ef->progtab[i].addr); segend = round_page((vm_offset_t)ef->progtab[i].addr + ef->progtab[i].size); segprot = VM_PROT_READ; if ((ef->progtab[i].flags & SHF_WRITE) != 0) segprot |= VM_PROT_WRITE; if ((ef->progtab[i].flags & SHF_EXECINSTR) != 0) segprot |= VM_PROT_EXECUTE; if (end <= segstart) { /* * Case 1: there is no overlap between the previous * segment and this one. Apply protections to the * previous segment, and protect the gap between the * previous and current segments, if any. */ link_elf_protect_range(ef, start, end, prot); link_elf_protect_range(ef, end, segstart, gapprot); start = segstart; end = segend; prot = segprot; } else if (start < segstart && end == segend) { /* * Case 2: the current segment is a subrange of the * previous segment. Apply protections to the * non-overlapping portion of the previous segment. */ link_elf_protect_range(ef, start, segstart, prot); start = segstart; prot |= segprot; } else if (end < segend) { /* * Case 3: there is partial overlap between the previous * and current segments. Apply protections to the * non-overlapping portion of the previous segment, and * then the overlap, which must use the union of the two * segments' protections. */ link_elf_protect_range(ef, start, segstart, prot); link_elf_protect_range(ef, segstart, end, prot | segprot); start = end; end = segend; prot = segprot; } else { /* * Case 4: the two segments reside in the same page. */ prot |= segprot; } } /* * Fix up the last unprotected segment and trailing data. */ link_elf_protect_range(ef, start, end, prot); link_elf_protect_range(ef, end, round_page((vm_offset_t)ef->address + ef->lf.size), gapprot); } static int link_elf_link_preload(linker_class_t cls, const char *filename, linker_file_t *result) { Elf_Ehdr *hdr; Elf_Shdr *shdr; Elf_Sym *es; void *modptr, *baseptr, *sizeptr; char *type; elf_file_t ef; linker_file_t lf; Elf_Addr off; int error, i, j, pb, ra, rl, shstrindex, symstrindex, symtabindex; /* Look to see if we have the file preloaded */ modptr = preload_search_by_name(filename); if (modptr == NULL) return ENOENT; type = (char *)preload_search_info(modptr, MODINFO_TYPE); baseptr = preload_search_info(modptr, MODINFO_ADDR); sizeptr = preload_search_info(modptr, MODINFO_SIZE); hdr = (Elf_Ehdr *)preload_search_info(modptr, MODINFO_METADATA | MODINFOMD_ELFHDR); shdr = (Elf_Shdr *)preload_search_info(modptr, MODINFO_METADATA | MODINFOMD_SHDR); if (type == NULL || (strcmp(type, "elf" __XSTRING(__ELF_WORD_SIZE) " obj module") != 0 && strcmp(type, "elf obj module") != 0)) { return (EFTYPE); } if (baseptr == NULL || sizeptr == NULL || hdr == NULL || shdr == NULL) return (EINVAL); lf = linker_make_file(filename, &link_elf_class); if (lf == NULL) return (ENOMEM); ef = (elf_file_t)lf; ef->preloaded = 1; ef->address = *(caddr_t *)baseptr; lf->address = *(caddr_t *)baseptr; lf->size = *(size_t *)sizeptr; if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || hdr->e_ident[EI_DATA] != ELF_TARG_DATA || hdr->e_ident[EI_VERSION] != EV_CURRENT || hdr->e_version != EV_CURRENT || hdr->e_type != ET_REL || hdr->e_machine != ELF_TARG_MACH) { error = EFTYPE; goto out; } ef->e_shdr = shdr; /* Scan the section header for information and table sizing. */ symtabindex = -1; symstrindex = -1; for (i = 0; i < hdr->e_shnum; i++) { switch (shdr[i].sh_type) { case SHT_PROGBITS: case SHT_NOBITS: #ifdef __amd64__ case SHT_X86_64_UNWIND: #endif case SHT_INIT_ARRAY: case SHT_FINI_ARRAY: /* Ignore sections not loaded by the loader. */ if (shdr[i].sh_addr == 0) break; ef->nprogtab++; break; case SHT_SYMTAB: symtabindex = i; symstrindex = shdr[i].sh_link; break; case SHT_REL: /* * Ignore relocation tables for sections not * loaded by the loader. */ if (shdr[shdr[i].sh_info].sh_addr == 0) break; ef->nreltab++; break; case SHT_RELA: if (shdr[shdr[i].sh_info].sh_addr == 0) break; ef->nrelatab++; break; } } shstrindex = hdr->e_shstrndx; if (ef->nprogtab == 0 || symstrindex < 0 || symstrindex >= hdr->e_shnum || shdr[symstrindex].sh_type != SHT_STRTAB || shstrindex == 0 || shstrindex >= hdr->e_shnum || shdr[shstrindex].sh_type != SHT_STRTAB) { printf("%s: bad/missing section headers\n", filename); error = ENOEXEC; goto out; } /* Allocate space for tracking the load chunks */ if (ef->nprogtab != 0) ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab), M_LINKER, M_WAITOK | M_ZERO); if (ef->nreltab != 0) ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab), M_LINKER, M_WAITOK | M_ZERO); if (ef->nrelatab != 0) ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab), M_LINKER, M_WAITOK | M_ZERO); if ((ef->nprogtab != 0 && ef->progtab == NULL) || (ef->nreltab != 0 && ef->reltab == NULL) || (ef->nrelatab != 0 && ef->relatab == NULL)) { error = ENOMEM; goto out; } /* XXX, relocate the sh_addr fields saved by the loader. */ off = 0; for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_addr != 0 && (off == 0 || shdr[i].sh_addr < off)) off = shdr[i].sh_addr; } for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_addr != 0) shdr[i].sh_addr = shdr[i].sh_addr - off + (Elf_Addr)ef->address; } ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym); ef->ddbsymtab = (Elf_Sym *)shdr[symtabindex].sh_addr; ef->ddbstrcnt = shdr[symstrindex].sh_size; ef->ddbstrtab = (char *)shdr[symstrindex].sh_addr; ef->shstrcnt = shdr[shstrindex].sh_size; ef->shstrtab = (char *)shdr[shstrindex].sh_addr; /* Now fill out progtab and the relocation tables. */ pb = 0; rl = 0; ra = 0; for (i = 0; i < hdr->e_shnum; i++) { switch (shdr[i].sh_type) { case SHT_PROGBITS: case SHT_NOBITS: #ifdef __amd64__ case SHT_X86_64_UNWIND: #endif case SHT_INIT_ARRAY: case SHT_FINI_ARRAY: if (shdr[i].sh_addr == 0) break; ef->progtab[pb].addr = (void *)shdr[i].sh_addr; if (shdr[i].sh_type == SHT_PROGBITS) ef->progtab[pb].name = "<>"; #ifdef __amd64__ else if (shdr[i].sh_type == SHT_X86_64_UNWIND) ef->progtab[pb].name = "<>"; #endif else if (shdr[i].sh_type == SHT_INIT_ARRAY) ef->progtab[pb].name = "<>"; else if (shdr[i].sh_type == SHT_FINI_ARRAY) ef->progtab[pb].name = "<>"; else ef->progtab[pb].name = "<>"; ef->progtab[pb].size = shdr[i].sh_size; ef->progtab[pb].flags = shdr[i].sh_flags; ef->progtab[pb].sec = i; if (ef->shstrtab && shdr[i].sh_name != 0) ef->progtab[pb].name = ef->shstrtab + shdr[i].sh_name; if (ef->progtab[pb].name != NULL && !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) { void *dpcpu; dpcpu = dpcpu_alloc(shdr[i].sh_size); if (dpcpu == NULL) { printf("%s: pcpu module space is out " "of space; cannot allocate %#jx " "for %s\n", __func__, (uintmax_t)shdr[i].sh_size, filename); error = ENOSPC; goto out; } memcpy(dpcpu, ef->progtab[pb].addr, ef->progtab[pb].size); dpcpu_copy(dpcpu, shdr[i].sh_size); ef->progtab[pb].addr = dpcpu; #ifdef VIMAGE } else if (ef->progtab[pb].name != NULL && !strcmp(ef->progtab[pb].name, VNET_SETNAME)) { void *vnet_data; vnet_data = vnet_data_alloc(shdr[i].sh_size); if (vnet_data == NULL) { printf("%s: vnet module space is out " "of space; cannot allocate %#jx " "for %s\n", __func__, (uintmax_t)shdr[i].sh_size, filename); error = ENOSPC; goto out; } memcpy(vnet_data, ef->progtab[pb].addr, ef->progtab[pb].size); vnet_data_copy(vnet_data, shdr[i].sh_size); ef->progtab[pb].addr = vnet_data; #endif } else if ((ef->progtab[pb].name != NULL && strcmp(ef->progtab[pb].name, ".ctors") == 0) || shdr[i].sh_type == SHT_INIT_ARRAY) { if (lf->ctors_addr != 0) { printf( "%s: multiple ctor sections in %s\n", __func__, filename); } else { lf->ctors_addr = ef->progtab[pb].addr; lf->ctors_size = shdr[i].sh_size; } } else if ((ef->progtab[pb].name != NULL && strcmp(ef->progtab[pb].name, ".dtors") == 0) || shdr[i].sh_type == SHT_FINI_ARRAY) { if (lf->dtors_addr != 0) { printf( "%s: multiple dtor sections in %s\n", __func__, filename); } else { lf->dtors_addr = ef->progtab[pb].addr; lf->dtors_size = shdr[i].sh_size; } } /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { es = &ef->ddbsymtab[j]; if (es->st_shndx != i) continue; es->st_value += (Elf_Addr)ef->progtab[pb].addr; } pb++; break; case SHT_REL: if (shdr[shdr[i].sh_info].sh_addr == 0) break; ef->reltab[rl].rel = (Elf_Rel *)shdr[i].sh_addr; ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel); ef->reltab[rl].sec = shdr[i].sh_info; rl++; break; case SHT_RELA: if (shdr[shdr[i].sh_info].sh_addr == 0) break; ef->relatab[ra].rela = (Elf_Rela *)shdr[i].sh_addr; ef->relatab[ra].nrela = shdr[i].sh_size / sizeof(Elf_Rela); ef->relatab[ra].sec = shdr[i].sh_info; ra++; break; } } if (pb != ef->nprogtab) { printf("%s: lost progbits\n", filename); error = ENOEXEC; goto out; } if (rl != ef->nreltab) { printf("%s: lost reltab\n", filename); error = ENOEXEC; goto out; } if (ra != ef->nrelatab) { printf("%s: lost relatab\n", filename); error = ENOEXEC; goto out; } /* * The file needs to be writeable and executable while applying * relocations. Mapping protections are applied once relocation * processing is complete. */ link_elf_protect_range(ef, (vm_offset_t)ef->address, round_page((vm_offset_t)ef->address + ef->lf.size), VM_PROT_ALL); /* Local intra-module relocations */ error = link_elf_reloc_local(lf, false); if (error != 0) goto out; *result = lf; return (0); out: /* preload not done this way */ linker_file_unload(lf, LINKER_UNLOAD_FORCE); return (error); } static void link_elf_invoke_cbs(caddr_t addr, size_t size) { void (**ctor)(void); size_t i, cnt; if (addr == NULL || size == 0) return; cnt = size / sizeof(*ctor); ctor = (void *)addr; for (i = 0; i < cnt; i++) { if (ctor[i] != NULL) (*ctor[i])(); } } static int link_elf_link_preload_finish(linker_file_t lf) { elf_file_t ef; int error; ef = (elf_file_t)lf; error = relocate_file(ef); if (error) return (error); /* Notify MD code that a module is being loaded. */ error = elf_cpu_load_file(lf); if (error) return (error); #if defined(__i386__) || defined(__amd64__) /* Now ifuncs. */ error = link_elf_reloc_local(lf, true); if (error != 0) return (error); #endif /* Apply protections now that relocation processing is complete. */ link_elf_protect(ef); link_elf_invoke_cbs(lf->ctors_addr, lf->ctors_size); return (0); } static int link_elf_load_file(linker_class_t cls, const char *filename, linker_file_t *result) { struct nameidata *nd; struct thread *td = curthread; /* XXX */ Elf_Ehdr *hdr; Elf_Shdr *shdr; Elf_Sym *es; int nbytes, i, j; vm_offset_t mapbase; size_t mapsize; int error = 0; ssize_t resid; int flags; elf_file_t ef; linker_file_t lf; int symtabindex; int symstrindex; int shstrindex; int nsym; int pb, rl, ra; int alignmask; shdr = NULL; lf = NULL; mapsize = 0; hdr = NULL; nd = malloc(sizeof(struct nameidata), M_TEMP, M_WAITOK); NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename); flags = FREAD; error = vn_open(nd, &flags, 0, NULL); if (error) { free(nd, M_TEMP); return error; } NDFREE_PNBUF(nd); if (nd->ni_vp->v_type != VREG) { error = ENOEXEC; goto out; } #ifdef MAC error = mac_kld_check_load(td->td_ucred, nd->ni_vp); if (error) { goto out; } #endif /* Read the elf header from the file. */ hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)hdr, sizeof(*hdr), 0, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) goto out; if (resid != 0){ error = ENOEXEC; goto out; } if (!IS_ELF(*hdr)) { error = ENOEXEC; goto out; } if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) { link_elf_error(filename, "Unsupported file layout"); error = ENOEXEC; goto out; } if (hdr->e_ident[EI_VERSION] != EV_CURRENT || hdr->e_version != EV_CURRENT) { link_elf_error(filename, "Unsupported file version"); error = ENOEXEC; goto out; } if (hdr->e_type != ET_REL) { error = ENOSYS; goto out; } if (hdr->e_machine != ELF_TARG_MACH) { link_elf_error(filename, "Unsupported machine"); error = ENOEXEC; goto out; } lf = linker_make_file(filename, &link_elf_class); if (!lf) { error = ENOMEM; goto out; } ef = (elf_file_t) lf; ef->nprogtab = 0; ef->e_shdr = 0; ef->nreltab = 0; ef->nrelatab = 0; /* Allocate and read in the section header */ nbytes = hdr->e_shnum * hdr->e_shentsize; if (nbytes == 0 || hdr->e_shoff == 0 || hdr->e_shentsize != sizeof(Elf_Shdr)) { error = ENOEXEC; goto out; } shdr = malloc(nbytes, M_LINKER, M_WAITOK); ef->e_shdr = shdr; error = vn_rdwr(UIO_READ, nd->ni_vp, (caddr_t)shdr, nbytes, hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) goto out; if (resid) { error = ENOEXEC; goto out; } /* Scan the section header for information and table sizing. */ nsym = 0; symtabindex = -1; symstrindex = -1; for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_size == 0) continue; switch (shdr[i].sh_type) { case SHT_PROGBITS: case SHT_NOBITS: #ifdef __amd64__ case SHT_X86_64_UNWIND: #endif case SHT_INIT_ARRAY: case SHT_FINI_ARRAY: if ((shdr[i].sh_flags & SHF_ALLOC) == 0) break; ef->nprogtab++; break; case SHT_SYMTAB: nsym++; symtabindex = i; symstrindex = shdr[i].sh_link; break; case SHT_REL: /* * Ignore relocation tables for unallocated * sections. */ if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0) break; ef->nreltab++; break; case SHT_RELA: if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0) break; ef->nrelatab++; break; case SHT_STRTAB: break; } } if (ef->nprogtab == 0) { link_elf_error(filename, "file has no contents"); error = ENOEXEC; goto out; } if (nsym != 1) { /* Only allow one symbol table for now */ link_elf_error(filename, "file must have exactly one symbol table"); error = ENOEXEC; goto out; } if (symstrindex < 0 || symstrindex > hdr->e_shnum || shdr[symstrindex].sh_type != SHT_STRTAB) { link_elf_error(filename, "file has invalid symbol strings"); error = ENOEXEC; goto out; } /* Allocate space for tracking the load chunks */ if (ef->nprogtab != 0) ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab), M_LINKER, M_WAITOK | M_ZERO); if (ef->nreltab != 0) ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab), M_LINKER, M_WAITOK | M_ZERO); if (ef->nrelatab != 0) ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab), M_LINKER, M_WAITOK | M_ZERO); if (symtabindex == -1) { link_elf_error(filename, "lost symbol table index"); error = ENOEXEC; goto out; } /* Allocate space for and load the symbol table */ ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym); ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->ddbsymtab, shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) goto out; if (resid != 0){ error = EINVAL; goto out; } /* Allocate space for and load the symbol strings */ ef->ddbstrcnt = shdr[symstrindex].sh_size; ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, nd->ni_vp, ef->ddbstrtab, shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) goto out; if (resid != 0){ error = EINVAL; goto out; } /* Do we have a string table for the section names? */ shstrindex = -1; if (hdr->e_shstrndx != 0 && shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) { shstrindex = hdr->e_shstrndx; ef->shstrcnt = shdr[shstrindex].sh_size; ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER, M_WAITOK); error = vn_rdwr(UIO_READ, nd->ni_vp, ef->shstrtab, shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) goto out; if (resid != 0){ error = EINVAL; goto out; } } /* Size up code/data(progbits) and bss(nobits). */ alignmask = 0; for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_size == 0) continue; switch (shdr[i].sh_type) { case SHT_PROGBITS: case SHT_NOBITS: #ifdef __amd64__ case SHT_X86_64_UNWIND: #endif case SHT_INIT_ARRAY: case SHT_FINI_ARRAY: if ((shdr[i].sh_flags & SHF_ALLOC) == 0) break; alignmask = shdr[i].sh_addralign - 1; mapsize += alignmask; mapsize &= ~alignmask; mapsize += shdr[i].sh_size; break; } } /* * We know how much space we need for the text/data/bss/etc. * This stuff needs to be in a single chunk so that profiling etc * can get the bounds and gdb can associate offsets with modules */ ef->object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(mapsize), VM_PROT_ALL, 0, thread0.td_ucred); if (ef->object == NULL) { error = ENOMEM; goto out; } #if VM_NRESERVLEVEL > 0 vm_object_color(ef->object, 0); #endif /* * In order to satisfy amd64's architectural requirements on the * location of code and data in the kernel's address space, request a * mapping that is above the kernel. * * Protections will be restricted once relocations are applied. */ #ifdef __amd64__ mapbase = KERNBASE; #else mapbase = VM_MIN_KERNEL_ADDRESS; #endif error = vm_map_find(kernel_map, ef->object, 0, &mapbase, round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) { vm_object_deallocate(ef->object); ef->object = NULL; error = ENOMEM; goto out; } /* Wire the pages */ error = vm_map_wire(kernel_map, mapbase, mapbase + round_page(mapsize), VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { error = ENOMEM; goto out; } /* Inform the kld system about the situation */ lf->address = ef->address = (caddr_t)mapbase; lf->size = mapsize; /* * Now load code/data(progbits), zero bss(nobits), allocate space for * and load relocs */ pb = 0; rl = 0; ra = 0; alignmask = 0; for (i = 0; i < hdr->e_shnum; i++) { if (shdr[i].sh_size == 0) continue; switch (shdr[i].sh_type) { case SHT_PROGBITS: case SHT_NOBITS: #ifdef __amd64__ case SHT_X86_64_UNWIND: #endif case SHT_INIT_ARRAY: case SHT_FINI_ARRAY: if ((shdr[i].sh_flags & SHF_ALLOC) == 0) break; alignmask = shdr[i].sh_addralign - 1; mapbase += alignmask; mapbase &= ~alignmask; if (ef->shstrtab != NULL && shdr[i].sh_name != 0) { ef->progtab[pb].name = ef->shstrtab + shdr[i].sh_name; if (!strcmp(ef->progtab[pb].name, ".ctors") || shdr[i].sh_type == SHT_INIT_ARRAY) { if (lf->ctors_addr != 0) { printf( "%s: multiple ctor sections in %s\n", __func__, filename); } else { lf->ctors_addr = (caddr_t)mapbase; lf->ctors_size = shdr[i].sh_size; } } else if (!strcmp(ef->progtab[pb].name, ".dtors") || shdr[i].sh_type == SHT_FINI_ARRAY) { if (lf->dtors_addr != 0) { printf( "%s: multiple dtor sections in %s\n", __func__, filename); } else { lf->dtors_addr = (caddr_t)mapbase; lf->dtors_size = shdr[i].sh_size; } } } else if (shdr[i].sh_type == SHT_PROGBITS) ef->progtab[pb].name = "<>"; #ifdef __amd64__ else if (shdr[i].sh_type == SHT_X86_64_UNWIND) ef->progtab[pb].name = "<>"; #endif else ef->progtab[pb].name = "<>"; if (ef->progtab[pb].name != NULL && !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) { ef->progtab[pb].addr = dpcpu_alloc(shdr[i].sh_size); if (ef->progtab[pb].addr == NULL) { printf("%s: pcpu module space is out " "of space; cannot allocate %#jx " "for %s\n", __func__, (uintmax_t)shdr[i].sh_size, filename); } } #ifdef VIMAGE else if (ef->progtab[pb].name != NULL && !strcmp(ef->progtab[pb].name, VNET_SETNAME)) { ef->progtab[pb].addr = vnet_data_alloc(shdr[i].sh_size); if (ef->progtab[pb].addr == NULL) { printf("%s: vnet module space is out " "of space; cannot allocate %#jx " "for %s\n", __func__, (uintmax_t)shdr[i].sh_size, filename); } } #endif else ef->progtab[pb].addr = (void *)(uintptr_t)mapbase; if (ef->progtab[pb].addr == NULL) { error = ENOSPC; goto out; } ef->progtab[pb].size = shdr[i].sh_size; ef->progtab[pb].flags = shdr[i].sh_flags; ef->progtab[pb].sec = i; if (shdr[i].sh_type == SHT_PROGBITS #ifdef __amd64__ || shdr[i].sh_type == SHT_X86_64_UNWIND #endif ) { error = vn_rdwr(UIO_READ, nd->ni_vp, ef->progtab[pb].addr, shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) goto out; if (resid != 0){ error = EINVAL; goto out; } /* Initialize the per-cpu or vnet area. */ if (ef->progtab[pb].addr != (void *)mapbase && !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) dpcpu_copy(ef->progtab[pb].addr, shdr[i].sh_size); #ifdef VIMAGE else if (ef->progtab[pb].addr != (void *)mapbase && !strcmp(ef->progtab[pb].name, VNET_SETNAME)) vnet_data_copy(ef->progtab[pb].addr, shdr[i].sh_size); #endif } else bzero(ef->progtab[pb].addr, shdr[i].sh_size); /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { es = &ef->ddbsymtab[j]; if (es->st_shndx != i) continue; es->st_value += (Elf_Addr)ef->progtab[pb].addr; } mapbase += shdr[i].sh_size; pb++; break; case SHT_REL: if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0) break; ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER, M_WAITOK); ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel); ef->reltab[rl].sec = shdr[i].sh_info; error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->reltab[rl].rel, shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) goto out; if (resid != 0){ error = EINVAL; goto out; } rl++; break; case SHT_RELA: if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0) break; ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER, M_WAITOK); ef->relatab[ra].nrela = shdr[i].sh_size / sizeof(Elf_Rela); ef->relatab[ra].sec = shdr[i].sh_info; error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->relatab[ra].rela, shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid, td); if (error) goto out; if (resid != 0){ error = EINVAL; goto out; } ra++; break; } } if (pb != ef->nprogtab) { link_elf_error(filename, "lost progbits"); error = ENOEXEC; goto out; } if (rl != ef->nreltab) { link_elf_error(filename, "lost reltab"); error = ENOEXEC; goto out; } if (ra != ef->nrelatab) { link_elf_error(filename, "lost relatab"); error = ENOEXEC; goto out; } if (mapbase != (vm_offset_t)ef->address + mapsize) { printf( "%s: mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n", filename != NULL ? filename : "", (u_long)mapbase, ef->address, (u_long)mapsize, (u_long)(vm_offset_t)ef->address + mapsize); error = ENOMEM; goto out; } /* Local intra-module relocations */ error = link_elf_reloc_local(lf, false); if (error != 0) goto out; /* Pull in dependencies */ VOP_UNLOCK(nd->ni_vp); error = linker_load_dependencies(lf); vn_lock(nd->ni_vp, LK_EXCLUSIVE | LK_RETRY); if (error) goto out; /* External relocations */ error = relocate_file(ef); if (error) goto out; /* Notify MD code that a module is being loaded. */ error = elf_cpu_load_file(lf); if (error) goto out; #if defined(__i386__) || defined(__amd64__) /* Now ifuncs. */ error = link_elf_reloc_local(lf, true); if (error != 0) goto out; #endif link_elf_protect(ef); link_elf_invoke_cbs(lf->ctors_addr, lf->ctors_size); *result = lf; out: VOP_UNLOCK(nd->ni_vp); vn_close(nd->ni_vp, FREAD, td->td_ucred, td); free(nd, M_TEMP); if (error && lf) linker_file_unload(lf, LINKER_UNLOAD_FORCE); free(hdr, M_LINKER); return error; } static void link_elf_unload_file(linker_file_t file) { elf_file_t ef = (elf_file_t) file; u_int i; link_elf_invoke_cbs(file->dtors_addr, file->dtors_size); /* Notify MD code that a module is being unloaded. */ elf_cpu_unload_file(file); if (ef->progtab) { for (i = 0; i < ef->nprogtab; i++) { if (ef->progtab[i].size == 0) continue; if (ef->progtab[i].name == NULL) continue; if (!strcmp(ef->progtab[i].name, DPCPU_SETNAME)) dpcpu_free(ef->progtab[i].addr, ef->progtab[i].size); #ifdef VIMAGE else if (!strcmp(ef->progtab[i].name, VNET_SETNAME)) vnet_data_free(ef->progtab[i].addr, ef->progtab[i].size); #endif } } if (ef->preloaded) { free(ef->reltab, M_LINKER); free(ef->relatab, M_LINKER); free(ef->progtab, M_LINKER); free(ef->ctftab, M_LINKER); free(ef->ctfoff, M_LINKER); free(ef->typoff, M_LINKER); if (file->pathname != NULL) preload_delete_name(file->pathname); return; } for (i = 0; i < ef->nreltab; i++) free(ef->reltab[i].rel, M_LINKER); for (i = 0; i < ef->nrelatab; i++) free(ef->relatab[i].rela, M_LINKER); free(ef->reltab, M_LINKER); free(ef->relatab, M_LINKER); free(ef->progtab, M_LINKER); if (ef->object != NULL) vm_map_remove(kernel_map, (vm_offset_t)ef->address, (vm_offset_t)ef->address + ptoa(ef->object->size)); free(ef->e_shdr, M_LINKER); free(ef->ddbsymtab, M_LINKER); free(ef->ddbstrtab, M_LINKER); free(ef->shstrtab, M_LINKER); free(ef->ctftab, M_LINKER); free(ef->ctfoff, M_LINKER); free(ef->typoff, M_LINKER); } static const char * symbol_name(elf_file_t ef, Elf_Size r_info) { const Elf_Sym *ref; if (ELF_R_SYM(r_info)) { ref = ef->ddbsymtab + ELF_R_SYM(r_info); return ef->ddbstrtab + ref->st_name; } else return NULL; } static Elf_Addr findbase(elf_file_t ef, int sec) { int i; Elf_Addr base = 0; for (i = 0; i < ef->nprogtab; i++) { if (sec == ef->progtab[i].sec) { base = (Elf_Addr)ef->progtab[i].addr; break; } } return base; } static int relocate_file1(elf_file_t ef, bool ifuncs) { const Elf_Rel *rellim; const Elf_Rel *rel; const Elf_Rela *relalim; const Elf_Rela *rela; const char *symname; const Elf_Sym *sym; int i; Elf_Size symidx; Elf_Addr base; /* Perform relocations without addend if there are any: */ for (i = 0; i < ef->nreltab; i++) { rel = ef->reltab[i].rel; if (rel == NULL) { link_elf_error(ef->lf.filename, "lost a reltab!"); return (ENOEXEC); } rellim = rel + ef->reltab[i].nrel; base = findbase(ef, ef->reltab[i].sec); if (base == 0) { link_elf_error(ef->lf.filename, "lost base for reltab"); return (ENOEXEC); } for ( ; rel < rellim; rel++) { symidx = ELF_R_SYM(rel->r_info); if (symidx >= ef->ddbsymcnt) continue; sym = ef->ddbsymtab + symidx; /* Local relocs are already done */ if (ELF_ST_BIND(sym->st_info) == STB_LOCAL) continue; if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC || elf_is_ifunc_reloc(rel->r_info)) != ifuncs) continue; if (elf_reloc(&ef->lf, base, rel, ELF_RELOC_REL, elf_obj_lookup)) { symname = symbol_name(ef, rel->r_info); printf("link_elf_obj: symbol %s undefined\n", symname); return (ENOENT); } } } /* Perform relocations with addend if there are any: */ for (i = 0; i < ef->nrelatab; i++) { rela = ef->relatab[i].rela; if (rela == NULL) { link_elf_error(ef->lf.filename, "lost a relatab!"); return (ENOEXEC); } relalim = rela + ef->relatab[i].nrela; base = findbase(ef, ef->relatab[i].sec); if (base == 0) { link_elf_error(ef->lf.filename, "lost base for relatab"); return (ENOEXEC); } for ( ; rela < relalim; rela++) { symidx = ELF_R_SYM(rela->r_info); if (symidx >= ef->ddbsymcnt) continue; sym = ef->ddbsymtab + symidx; /* Local relocs are already done */ if (ELF_ST_BIND(sym->st_info) == STB_LOCAL) continue; if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC || elf_is_ifunc_reloc(rela->r_info)) != ifuncs) continue; if (elf_reloc(&ef->lf, base, rela, ELF_RELOC_RELA, elf_obj_lookup)) { symname = symbol_name(ef, rela->r_info); printf("link_elf_obj: symbol %s undefined\n", symname); return (ENOENT); } } } /* * Only clean SHN_FBSD_CACHED for successful return. If we * modified symbol table for the object but found an * unresolved symbol, there is no reason to roll back. */ elf_obj_cleanup_globals_cache(ef); return (0); } static int relocate_file(elf_file_t ef) { int error; error = relocate_file1(ef, false); if (error == 0) error = relocate_file1(ef, true); return (error); } static int link_elf_lookup_symbol1(linker_file_t lf, const char *name, c_linker_sym_t *sym, bool see_local) { elf_file_t ef = (elf_file_t)lf; const Elf_Sym *symp; const char *strp; int i; for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { strp = ef->ddbstrtab + symp->st_name; if (symp->st_shndx != SHN_UNDEF && strcmp(name, strp) == 0) { if (see_local || ELF_ST_BIND(symp->st_info) == STB_GLOBAL) { *sym = (c_linker_sym_t) symp; return (0); } return (ENOENT); } } return (ENOENT); } static int link_elf_lookup_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym) { return (link_elf_lookup_symbol1(lf, name, sym, link_elf_obj_leak_locals)); } static int link_elf_lookup_debug_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym) { return (link_elf_lookup_symbol1(lf, name, sym, true)); } static int link_elf_symbol_values1(linker_file_t lf, c_linker_sym_t sym, linker_symval_t *symval, bool see_local) { elf_file_t ef; const Elf_Sym *es; caddr_t val; ef = (elf_file_t) lf; es = (const Elf_Sym*) sym; val = (caddr_t)es->st_value; if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) { if (!see_local && ELF_ST_BIND(es->st_info) == STB_LOCAL) return (ENOENT); symval->name = ef->ddbstrtab + es->st_name; val = (caddr_t)es->st_value; if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC) val = ((caddr_t (*)(void))val)(); symval->value = val; symval->size = es->st_size; return (0); } return (ENOENT); } static int link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym, linker_symval_t *symval) { return (link_elf_symbol_values1(lf, sym, symval, link_elf_obj_leak_locals)); } static int link_elf_debug_symbol_values(linker_file_t lf, c_linker_sym_t sym, linker_symval_t *symval) { return (link_elf_symbol_values1(lf, sym, symval, true)); } static int link_elf_search_symbol(linker_file_t lf, caddr_t value, c_linker_sym_t *sym, long *diffp) { elf_file_t ef = (elf_file_t)lf; u_long off = (uintptr_t)(void *)value; u_long diff = off; u_long st_value; const Elf_Sym *es; const Elf_Sym *best = NULL; int i; for (i = 0, es = ef->ddbsymtab; i < ef->ddbsymcnt; i++, es++) { if (es->st_name == 0) continue; st_value = es->st_value; if (off >= st_value) { if (off - st_value < diff) { diff = off - st_value; best = es; if (diff == 0) break; } else if (off - st_value == diff) { best = es; } } } if (best == NULL) *diffp = off; else *diffp = diff; *sym = (c_linker_sym_t) best; return (0); } /* * Look up a linker set on an ELF system. */ static int link_elf_lookup_set(linker_file_t lf, const char *name, void ***startp, void ***stopp, int *countp) { elf_file_t ef = (elf_file_t)lf; void **start, **stop; int i, count; /* Relative to section number */ for (i = 0; i < ef->nprogtab; i++) { if ((strncmp(ef->progtab[i].name, "set_", 4) == 0) && strcmp(ef->progtab[i].name + 4, name) == 0) { start = (void **)ef->progtab[i].addr; stop = (void **)((char *)ef->progtab[i].addr + ef->progtab[i].size); count = stop - start; if (startp) *startp = start; if (stopp) *stopp = stop; if (countp) *countp = count; return (0); } } return (ESRCH); } static int link_elf_each_function_name(linker_file_t file, int (*callback)(const char *, void *), void *opaque) { elf_file_t ef = (elf_file_t)file; const Elf_Sym *symp; int i, error; /* Exhaustive search */ for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { if (symp->st_value != 0 && (ELF_ST_TYPE(symp->st_info) == STT_FUNC || ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) { error = callback(ef->ddbstrtab + symp->st_name, opaque); if (error) return (error); } } return (0); } static int link_elf_each_function_nameval(linker_file_t file, linker_function_nameval_callback_t callback, void *opaque) { linker_symval_t symval; elf_file_t ef = (elf_file_t)file; const Elf_Sym *symp; int i, error; /* Exhaustive search */ for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) { if (symp->st_value != 0 && (ELF_ST_TYPE(symp->st_info) == STT_FUNC || ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) { error = link_elf_debug_symbol_values(file, (c_linker_sym_t)symp, &symval); if (error == 0) error = callback(file, i, &symval, opaque); if (error != 0) return (error); } } return (0); } static void elf_obj_cleanup_globals_cache(elf_file_t ef) { Elf_Sym *sym; Elf_Size i; for (i = 0; i < ef->ddbsymcnt; i++) { sym = ef->ddbsymtab + i; if (sym->st_shndx == SHN_FBSD_CACHED) { sym->st_shndx = SHN_UNDEF; sym->st_value = 0; } } } /* * Symbol lookup function that can be used when the symbol index is known (ie * in relocations). It uses the symbol index instead of doing a fully fledged * hash table based lookup when such is valid. For example for local symbols. * This is not only more efficient, it's also more correct. It's not always * the case that the symbol can be found through the hash table. */ static int elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res) { elf_file_t ef = (elf_file_t)lf; Elf_Sym *sym; const char *symbol; Elf_Addr res1; /* Don't even try to lookup the symbol if the index is bogus. */ if (symidx >= ef->ddbsymcnt) { *res = 0; return (EINVAL); } sym = ef->ddbsymtab + symidx; /* Quick answer if there is a definition included. */ if (sym->st_shndx != SHN_UNDEF) { res1 = (Elf_Addr)sym->st_value; if (ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC) res1 = ((Elf_Addr (*)(void))res1)(); *res = res1; return (0); } /* If we get here, then it is undefined and needs a lookup. */ switch (ELF_ST_BIND(sym->st_info)) { case STB_LOCAL: /* Local, but undefined? huh? */ *res = 0; return (EINVAL); case STB_GLOBAL: case STB_WEAK: /* Relative to Data or Function name */ symbol = ef->ddbstrtab + sym->st_name; /* Force a lookup failure if the symbol name is bogus. */ if (*symbol == 0) { *res = 0; return (EINVAL); } res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps); /* * Cache global lookups during module relocation. The failure * case is particularly expensive for callers, who must scan * through the entire globals table doing strcmp(). Cache to * avoid doing such work repeatedly. * * After relocation is complete, undefined globals will be * restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(), * above. */ if (res1 != 0) { sym->st_shndx = SHN_FBSD_CACHED; sym->st_value = res1; *res = res1; return (0); } else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { sym->st_value = 0; *res = 0; return (0); } return (EINVAL); default: return (EINVAL); } } static void link_elf_fix_link_set(elf_file_t ef) { static const char startn[] = "__start_"; static const char stopn[] = "__stop_"; Elf_Sym *sym; const char *sym_name, *linkset_name; Elf_Addr startp, stopp; Elf_Size symidx; int start, i; startp = stopp = 0; for (symidx = 1 /* zero entry is special */; symidx < ef->ddbsymcnt; symidx++) { sym = ef->ddbsymtab + symidx; if (sym->st_shndx != SHN_UNDEF) continue; sym_name = ef->ddbstrtab + sym->st_name; if (strncmp(sym_name, startn, sizeof(startn) - 1) == 0) { start = 1; linkset_name = sym_name + sizeof(startn) - 1; } else if (strncmp(sym_name, stopn, sizeof(stopn) - 1) == 0) { start = 0; linkset_name = sym_name + sizeof(stopn) - 1; } else continue; for (i = 0; i < ef->nprogtab; i++) { if (strcmp(ef->progtab[i].name, linkset_name) == 0) { startp = (Elf_Addr)ef->progtab[i].addr; stopp = (Elf_Addr)(startp + ef->progtab[i].size); break; } } if (i == ef->nprogtab) continue; sym->st_value = start ? startp : stopp; sym->st_shndx = i; } } static int link_elf_reloc_local(linker_file_t lf, bool ifuncs) { elf_file_t ef = (elf_file_t)lf; const Elf_Rel *rellim; const Elf_Rel *rel; const Elf_Rela *relalim; const Elf_Rela *rela; const Elf_Sym *sym; Elf_Addr base; int i; Elf_Size symidx; link_elf_fix_link_set(ef); /* Perform relocations without addend if there are any: */ for (i = 0; i < ef->nreltab; i++) { rel = ef->reltab[i].rel; if (rel == NULL) { link_elf_error(ef->lf.filename, "lost a reltab"); return (ENOEXEC); } rellim = rel + ef->reltab[i].nrel; base = findbase(ef, ef->reltab[i].sec); if (base == 0) { link_elf_error(ef->lf.filename, "lost base for reltab"); return (ENOEXEC); } for ( ; rel < rellim; rel++) { symidx = ELF_R_SYM(rel->r_info); if (symidx >= ef->ddbsymcnt) continue; sym = ef->ddbsymtab + symidx; /* Only do local relocs */ if (ELF_ST_BIND(sym->st_info) != STB_LOCAL) continue; if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC || elf_is_ifunc_reloc(rel->r_info)) != ifuncs) continue; if (elf_reloc_local(lf, base, rel, ELF_RELOC_REL, elf_obj_lookup) != 0) return (ENOEXEC); } } /* Perform relocations with addend if there are any: */ for (i = 0; i < ef->nrelatab; i++) { rela = ef->relatab[i].rela; if (rela == NULL) { link_elf_error(ef->lf.filename, "lost a relatab!"); return (ENOEXEC); } relalim = rela + ef->relatab[i].nrela; base = findbase(ef, ef->relatab[i].sec); if (base == 0) { link_elf_error(ef->lf.filename, "lost base for reltab"); return (ENOEXEC); } for ( ; rela < relalim; rela++) { symidx = ELF_R_SYM(rela->r_info); if (symidx >= ef->ddbsymcnt) continue; sym = ef->ddbsymtab + symidx; /* Only do local relocs */ if (ELF_ST_BIND(sym->st_info) != STB_LOCAL) continue; if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC || elf_is_ifunc_reloc(rela->r_info)) != ifuncs) continue; if (elf_reloc_local(lf, base, rela, ELF_RELOC_RELA, elf_obj_lookup) != 0) return (ENOEXEC); } } return (0); } static long link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab) { elf_file_t ef = (elf_file_t)lf; *symtab = ef->ddbsymtab; if (*symtab == NULL) return (0); return (ef->ddbsymcnt); } - + static long link_elf_strtab_get(linker_file_t lf, caddr_t *strtab) { elf_file_t ef = (elf_file_t)lf; *strtab = ef->ddbstrtab; if (*strtab == NULL) return (0); return (ef->ddbstrcnt); } + +#ifdef VIMAGE +static void +link_elf_propagate_vnets(linker_file_t lf) +{ + elf_file_t ef = (elf_file_t) lf; + + if (ef->progtab) { + for (int i = 0; i < ef->nprogtab; i++) { + if (ef->progtab[i].size == 0) + continue; + if (ef->progtab[i].name == NULL) + continue; + if (strcmp(ef->progtab[i].name, VNET_SETNAME) == 0) { + vnet_data_copy(ef->progtab[i].addr, + ef->progtab[i].size); + break; + } + } + } +} +#endif diff --git a/sys/kern/linker_if.m b/sys/kern/linker_if.m index 0722390d4e20..a50ed1ea84a3 100644 --- a/sys/kern/linker_if.m +++ b/sys/kern/linker_if.m @@ -1,156 +1,165 @@ #- # Copyright (c) 2000 Doug Rabson # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # #include INTERFACE linker; # # Lookup a symbol in the file's symbol table. If the symbol is not # found then return ENOENT, otherwise zero. # METHOD int lookup_symbol { linker_file_t file; const char* name; c_linker_sym_t* symp; }; METHOD int lookup_debug_symbol { linker_file_t file; const char* name; c_linker_sym_t* symp; }; METHOD int symbol_values { linker_file_t file; c_linker_sym_t sym; linker_symval_t* valp; }; METHOD int debug_symbol_values { linker_file_t file; c_linker_sym_t sym; linker_symval_t* valp; }; METHOD int search_symbol { linker_file_t file; caddr_t value; c_linker_sym_t* symp; long* diffp; }; # # Call the callback with each specified function defined in the file. # Stop and return the error if the callback returns an error. # METHOD int each_function_name { linker_file_t file; linker_function_name_callback_t callback; void* opaque; }; # # Call the callback with each specified function and it's value # defined in the file. # Stop and return the error if the callback returns an error. # METHOD int each_function_nameval { linker_file_t file; linker_function_nameval_callback_t callback; void* opaque; }; # # Search for a linker set in a file. Return a pointer to the first # entry (which is itself a pointer), and the number of entries. # "stop" points to the entry beyond the last valid entry. # If count, start or stop are NULL, they are not returned. # METHOD int lookup_set { linker_file_t file; const char* name; void*** start; void*** stop; int* count; }; # # Unload a file, releasing dependencies and freeing storage. # METHOD void unload { linker_file_t file; }; # # Load CTF data if necessary and if there is a .SUNW_ctf section # in the ELF file, returning info in the linker CTF structure. # METHOD int ctf_get { linker_file_t file; linker_ctf_t *lc; }; # # Get the symbol table, returning it in **symtab. Return the # number of symbols, otherwise zero. # METHOD long symtab_get { linker_file_t file; const Elf_Sym **symtab; }; # # Get the string table, returning it in *strtab. Return the # size (in bytes) of the string table, otherwise zero. # METHOD long strtab_get { linker_file_t file; caddr_t *strtab; }; # # Load a file, returning the new linker_file_t in *result. If # the class does not recognise the file type, zero should be # returned, without modifying *result. If the file is # recognised, the file should be loaded, *result set to the new # file and zero returned. If some other error is detected an # appropriate errno should be returned. # STATICMETHOD int load_file { linker_class_t cls; const char* filename; linker_file_t* result; }; STATICMETHOD int link_preload { linker_class_t cls; const char* filename; linker_file_t* result; }; METHOD int link_preload_finish { linker_file_t file; }; + +#ifdef VIMAGE +# +# Propagate system tunable values to all vnets. +# +METHOD void propagate_vnets { + linker_file_t file; +}; +#endif