diff --git a/stand/common/boot.c b/stand/common/boot.c index 2103f6dc240c..5ce6fe8a6760 100644 --- a/stand/common/boot.c +++ b/stand/common/boot.c @@ -1,426 +1,429 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Loading modules, booting the system */ #include #include #include #include #include "bootstrap.h" static int autoboot(int timeout, char *prompt); static char *getbootfile(int try); static int loadakernel(int try, int argc, char* argv[]); /* List of kernel names to try (may be overwritten by boot.config) XXX should move from here? */ static const char *default_bootfiles = "kernel"; static int autoboot_tried; /* * The user wants us to boot. */ COMMAND_SET(boot, "boot", "boot a file or loaded kernel", command_boot); static int command_boot(int argc, char *argv[]) { struct preloaded_file *fp; /* * See if the user has specified an explicit kernel to boot. */ if ((argc > 1) && (argv[1][0] != '-')) { /* XXX maybe we should discard everything and start again? */ if (file_findfile(NULL, NULL) != NULL) { snprintf(command_errbuf, sizeof(command_errbuf), "can't boot '%s', kernel module already loaded", argv[1]); return(CMD_ERROR); } /* find/load the kernel module */ if (mod_loadkld(argv[1], argc - 2, argv + 2) != 0) return(CMD_ERROR); /* we have consumed all arguments */ argc = 1; } /* * See if there is a kernel module already loaded */ if (file_findfile(NULL, NULL) == NULL) if (loadakernel(0, argc - 1, argv + 1)) /* we have consumed all arguments */ argc = 1; /* * Loaded anything yet? */ if ((fp = file_findfile(NULL, NULL)) == NULL) { command_errmsg = "no bootable kernel"; return(CMD_ERROR); } /* * If we were given arguments, discard any previous. * XXX should we merge arguments? Hard to DWIM. */ if (argc > 1) { if (fp->f_args != NULL) free(fp->f_args); fp->f_args = unargv(argc - 1, argv + 1); } /* Hook for platform-specific autoloading of modules */ if (archsw.arch_autoload() != 0) return(CMD_ERROR); #ifdef LOADER_VERIEXEC verify_pcr_export(); /* for measured boot */ #ifdef LOADER_VERIEXEC_PASS_MANIFEST pass_manifest_export_envs(); #endif #endif + /* Pass the tslog buffer to the kernel as a preloaded module. */ + tslog_publish(); + /* Call the exec handler from the loader matching the kernel */ file_formats[fp->f_loader]->l_exec(fp); return(CMD_ERROR); } /* * Autoboot after a delay */ COMMAND_SET(autoboot, "autoboot", "boot automatically after a delay", command_autoboot); static int command_autoboot(int argc, char *argv[]) { int howlong; char *cp, *prompt; prompt = NULL; howlong = -1; switch(argc) { case 3: prompt = argv[2]; /* FALLTHROUGH */ case 2: howlong = strtol(argv[1], &cp, 0); if (*cp != 0) { snprintf(command_errbuf, sizeof(command_errbuf), "bad delay '%s'", argv[1]); return(CMD_ERROR); } /* FALLTHROUGH */ case 1: return(autoboot(howlong, prompt)); } command_errmsg = "too many arguments"; return(CMD_ERROR); } /* * Called before we go interactive. If we think we can autoboot, and * we haven't tried already, try now. */ void autoboot_maybe() { char *cp; cp = getenv("autoboot_delay"); if ((autoboot_tried == 0) && ((cp == NULL) || strcasecmp(cp, "NO"))) autoboot(-1, NULL); /* try to boot automatically */ } static int autoboot(int timeout, char *prompt) { time_t when, otime, ntime; int c, yes; char *argv[2], *cp, *ep; char *kernelname; #ifdef BOOT_PROMPT_123 const char *seq = "123", *p = seq; #endif autoboot_tried = 1; if (timeout == -1) { timeout = 10; /* try to get a delay from the environment */ if ((cp = getenv("autoboot_delay"))) { timeout = strtol(cp, &ep, 0); if (cp == ep) timeout = 10; /* Unparseable? Set default! */ } } kernelname = getenv("kernelname"); if (kernelname == NULL) { argv[0] = NULL; loadakernel(0, 0, argv); kernelname = getenv("kernelname"); if (kernelname == NULL) { command_errmsg = "no valid kernel found"; return(CMD_ERROR); } } if (timeout >= 0) { otime = -1; ntime = time(NULL); when = ntime + timeout; /* when to boot */ yes = 0; #ifdef BOOT_PROMPT_123 printf("%s\n", (prompt == NULL) ? "Hit [Enter] to boot immediately, or " "1 2 3 sequence for command prompt." : prompt); #else printf("%s\n", (prompt == NULL) ? "Hit [Enter] to boot immediately, or any other key for command prompt." : prompt); #endif for (;;) { if (ischar()) { c = getchar(); #ifdef BOOT_PROMPT_123 if ((c == '\r') || (c == '\n')) { yes = 1; break; } else if (c != *p++) p = seq; if (*p == 0) break; #else if ((c == '\r') || (c == '\n')) yes = 1; break; #endif } ntime = time(NULL); if (ntime >= when) { yes = 1; break; } if (ntime != otime) { printf("\rBooting [%s] in %d second%s... ", kernelname, (int)(when - ntime), (when-ntime)==1?"":"s"); otime = ntime; } } } else { yes = 1; } if (yes) printf("\rBooting [%s]... ", kernelname); putchar('\n'); if (yes) { argv[0] = "boot"; argv[1] = NULL; return(command_boot(1, argv)); } return(CMD_OK); } /* * Scrounge for the name of the (try)'th file we will try to boot. */ static char * getbootfile(int try) { static char *name = NULL; const char *spec, *ep; size_t len; /* we use dynamic storage */ if (name != NULL) { free(name); name = NULL; } /* * Try $bootfile, then try our builtin default */ if ((spec = getenv("bootfile")) == NULL) spec = default_bootfiles; while ((try > 0) && (spec != NULL)) { spec = strchr(spec, ';'); if (spec) spec++; /* skip over the leading ';' */ try--; } if (spec != NULL) { if ((ep = strchr(spec, ';')) != NULL) { len = ep - spec; } else { len = strlen(spec); } name = malloc(len + 1); strncpy(name, spec, len); name[len] = 0; } if (name && name[0] == 0) { free(name); name = NULL; } return(name); } /* * Try to find the /etc/fstab file on the filesystem (rootdev), * which should be be the root filesystem, and parse it to find * out what the kernel ought to think the root filesystem is. * * If we're successful, set vfs.root.mountfrom to : * so that the kernel can tell both which VFS and which node to use * to mount the device. If this variable's already set, don't * overwrite it. */ int getrootmount(char *rootdev) { char lbuf[128], *cp, *ep, *dev, *fstyp, *options; int fd, error; if (getenv("vfs.root.mountfrom") != NULL) return(0); error = 1; sprintf(lbuf, "%s/etc/fstab", rootdev); if ((fd = open(lbuf, O_RDONLY)) < 0) goto notfound; /* loop reading lines from /etc/fstab What was that about sscanf again? */ fstyp = NULL; dev = NULL; while (fgetstr(lbuf, sizeof(lbuf), fd) >= 0) { if ((lbuf[0] == 0) || (lbuf[0] == '#')) continue; /* skip device name */ for (cp = lbuf; (*cp != 0) && !isspace(*cp); cp++) ; if (*cp == 0) /* misformatted */ continue; /* delimit and save */ *cp++ = 0; free(dev); dev = strdup(lbuf); /* skip whitespace up to mountpoint */ while ((*cp != 0) && isspace(*cp)) cp++; /* must have / to be root */ if ((*cp == 0) || (*cp != '/') || !isspace(*(cp + 1))) continue; /* skip whitespace up to fstype */ cp += 2; while ((*cp != 0) && isspace(*cp)) cp++; if (*cp == 0) /* misformatted */ continue; /* skip text to end of fstype and delimit */ ep = cp; while ((*cp != 0) && !isspace(*cp)) cp++; *cp = 0; free(fstyp); fstyp = strdup(ep); /* skip whitespace up to mount options */ cp += 1; while ((*cp != 0) && isspace(*cp)) cp++; if (*cp == 0) /* misformatted */ continue; /* skip text to end of mount options and delimit */ ep = cp; while ((*cp != 0) && !isspace(*cp)) cp++; *cp = 0; options = strdup(ep); /* Build the : and save it in vfs.root.mountfrom */ sprintf(lbuf, "%s:%s", fstyp, dev); setenv("vfs.root.mountfrom", lbuf, 0); /* Don't override vfs.root.mountfrom.options if it is already set */ if (getenv("vfs.root.mountfrom.options") == NULL) { /* save mount options */ setenv("vfs.root.mountfrom.options", options, 0); } free(options); error = 0; break; } close(fd); free(dev); free(fstyp); notfound: if (error) { const char *currdev; currdev = getenv("currdev"); if (currdev != NULL && strncmp("zfs:", currdev, 4) == 0) { cp = strdup(currdev); cp[strlen(cp) - 1] = '\0'; setenv("vfs.root.mountfrom", cp, 0); error = 0; free(cp); } } return(error); } static int loadakernel(int try, int argc, char* argv[]) { char *cp; for (try = 0; (cp = getbootfile(try)) != NULL; try++) if (mod_loadkld(cp, argc - 1, argv + 1) != 0) printf("can't load '%s'\n", cp); else return 1; return 0; } diff --git a/stand/common/bootstrap.h b/stand/common/bootstrap.h index 80a306f1d3e7..eb4e50203133 100644 --- a/stand/common/bootstrap.h +++ b/stand/common/bootstrap.h @@ -1,404 +1,407 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BOOTSTRAP_H_ #define _BOOTSTRAP_H_ #include #include #include #include #include "readin.h" /* Commands and return values; nonzero return sets command_errmsg != NULL */ typedef int (bootblk_cmd_t)(int argc, char *argv[]); #define COMMAND_ERRBUFSZ (256) extern const char *command_errmsg; extern char command_errbuf[COMMAND_ERRBUFSZ]; #define CMD_OK 0 #define CMD_WARN 1 #define CMD_ERROR 2 #define CMD_CRIT 3 #define CMD_FATAL 4 /* interp.c */ void interact(void); void interp_emit_prompt(void); int interp_builtin_cmd(int argc, char *argv[]); /* Called by interp.c for interp_*.c embedded interpreters */ int interp_include(const char *); /* Execute commands from filename */ void interp_init(void); /* Initialize interpreater */ int interp_run(const char *); /* Run a single command */ /* interp_backslash.c */ char *backslash(const char *str); /* interp_parse.c */ int parse(int *argc, char ***argv, const char *str); /* boot.c */ void autoboot_maybe(void); int getrootmount(char *rootdev); /* misc.c */ char *unargv(int argc, char *argv[]); size_t strlenout(vm_offset_t str); char *strdupout(vm_offset_t str); void kern_bzero(vm_offset_t dest, size_t len); int kern_pread(readin_handle_t fd, vm_offset_t dest, size_t len, off_t off); void *alloc_pread(readin_handle_t fd, off_t off, size_t len); /* bcache.c */ void bcache_init(size_t nblks, size_t bsize); void bcache_add_dev(int); void *bcache_allocate(void); void bcache_free(void *); int bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size, char *buf, size_t *rsize); /* * Disk block cache */ struct bcache_devdata { int (*dv_strategy)(void *, int, daddr_t, size_t, char *, size_t *); void *dv_devdata; void *dv_cache; }; /* * Modular console support. */ struct console { const char *c_name; const char *c_desc; int c_flags; #define C_PRESENTIN (1<<0) /* console can provide input */ #define C_PRESENTOUT (1<<1) /* console can provide output */ #define C_ACTIVEIN (1<<2) /* user wants input from console */ #define C_ACTIVEOUT (1<<3) /* user wants output to console */ #define C_WIDEOUT (1<<4) /* c_out routine groks wide chars */ /* set c_flags to match hardware */ void (* c_probe)(struct console *cp); /* reinit XXX may need more args */ int (* c_init)(int arg); /* emit c */ void (* c_out)(int c); /* wait for and return input */ int (* c_in)(void); /* return nonzero if input waiting */ int (* c_ready)(void); }; extern struct console *consoles[]; void cons_probe(void); bool cons_update_mode(bool); void autoload_font(bool); /* * Plug-and-play enumerator/configurator interface. */ struct pnphandler { const char *pp_name; /* handler/bus name */ void (*pp_enumerate)(void); /* enumerate PnP devices, add to chain */ }; struct pnpident { /* ASCII identifier, actual format varies with bus/handler */ char *id_ident; STAILQ_ENTRY(pnpident) id_link; }; struct pnpinfo { char *pi_desc; /* ASCII description, optional */ int pi_revision; /* optional revision (or -1) if not supported */ char *pi_module; /* module/args nominated to handle device */ int pi_argc; /* module arguments */ char **pi_argv; struct pnphandler *pi_handler; /* handler which detected this device */ STAILQ_HEAD(, pnpident) pi_ident; /* list of identifiers */ STAILQ_ENTRY(pnpinfo) pi_link; }; STAILQ_HEAD(pnpinfo_stql, pnpinfo); extern struct pnphandler *pnphandlers[]; /* provided by MD code */ void pnp_addident(struct pnpinfo *pi, char *ident); struct pnpinfo *pnp_allocinfo(void); void pnp_freeinfo(struct pnpinfo *pi); void pnp_addinfo(struct pnpinfo *pi); char *pnp_eisaformat(uint8_t *data); /* * < 0 - No ISA in system * == 0 - Maybe ISA, search for read data port * > 0 - ISA in system, value is read data port address */ extern int isapnp_readport; /* * Version information */ extern char bootprog_info[]; /* * Interpreter information */ extern const char bootprog_interp[]; #define INTERP_DEFINE(interpstr) \ const char bootprog_interp[] = "$Interpreter:" interpstr /* * Preloaded file metadata header. * * Metadata are allocated on our heap, and copied into kernel space * before executing the kernel. */ struct file_metadata { size_t md_size; uint16_t md_type; struct file_metadata *md_next; char md_data[1]; /* data are immediately appended */ }; struct preloaded_file; struct mod_depend; struct kernel_module { char *m_name; /* module name */ int m_version; /* module version */ /* char *m_args; */ /* arguments for the module */ struct preloaded_file *m_fp; struct kernel_module *m_next; }; /* * Preloaded file information. Depending on type, file can contain * additional units called 'modules'. * * At least one file (the kernel) must be loaded in order to boot. * The kernel is always loaded first. * * String fields (m_name, m_type) should be dynamically allocated. */ struct preloaded_file { char *f_name; /* file name */ char *f_type; /* verbose file type, eg 'ELF kernel', 'pnptable', etc. */ char *f_args; /* arguments for the file */ /* metadata that will be placed in the module directory */ struct file_metadata *f_metadata; int f_loader; /* index of the loader that read the file */ vm_offset_t f_addr; /* load address */ size_t f_size; /* file size */ struct kernel_module *f_modules; /* list of modules if any */ struct preloaded_file *f_next; /* next file */ #ifdef __amd64__ bool f_kernphys_relocatable; #endif }; struct file_format { /* * Load function must return EFTYPE if it can't handle * the module supplied */ int (*l_load)(char *, uint64_t, struct preloaded_file **); /* * Only a loader that will load a kernel (first module) * should have an exec handler */ int (*l_exec)(struct preloaded_file *); }; extern struct file_format *file_formats[]; /* supplied by consumer */ extern struct preloaded_file *preloaded_files; int mod_load(char *name, struct mod_depend *verinfo, int argc, char *argv[]); int mod_loadkld(const char *name, int argc, char *argv[]); void unload(void); struct preloaded_file *file_alloc(void); struct preloaded_file *file_findfile(const char *name, const char *type); struct file_metadata *file_findmetadata(struct preloaded_file *fp, int type); struct preloaded_file *file_loadraw(const char *name, char *type, int insert); void file_discard(struct preloaded_file *fp); void file_addmetadata(struct preloaded_file *, int, size_t, void *); int file_addmodule(struct preloaded_file *, char *, int, struct kernel_module **); void file_removemetadata(struct preloaded_file *fp); +int file_addbuf(const char *name, const char *type, size_t len, void *buf); +int tslog_init(void); +int tslog_publish(void); vm_offset_t build_font_module(vm_offset_t); /* MI module loaders */ #ifdef __elfN /* Relocation types. */ #define ELF_RELOC_REL 1 #define ELF_RELOC_RELA 2 /* Relocation offset for some architectures */ extern uint64_t __elfN(relocation_offset); struct elf_file; typedef Elf_Addr (symaddr_fn)(struct elf_file *ef, Elf_Size symidx); int __elfN(loadfile)(char *, uint64_t, struct preloaded_file **); int __elfN(obj_loadfile)(char *, uint64_t, struct preloaded_file **); int __elfN(reloc)(struct elf_file *ef, symaddr_fn *symaddr, const void *reldata, int reltype, Elf_Addr relbase, Elf_Addr dataaddr, void *data, size_t len); int __elfN(loadfile_raw)(char *, uint64_t, struct preloaded_file **, int); int __elfN(load_modmetadata)(struct preloaded_file *, uint64_t); #endif /* * Support for commands */ struct bootblk_command { const char *c_name; const char *c_desc; bootblk_cmd_t *c_fn; }; #define COMMAND_SET(tag, key, desc, func) \ static bootblk_cmd_t func; \ static struct bootblk_command _cmd_ ## tag = { key, desc, func }; \ DATA_SET(Xcommand_set, _cmd_ ## tag) SET_DECLARE(Xcommand_set, struct bootblk_command); /* * The intention of the architecture switch is to provide a convenient * encapsulation of the interface between the bootstrap MI and MD code. * MD code may selectively populate the switch at runtime based on the * actual configuration of the target system. */ struct arch_switch { /* Automatically load modules as required by detected hardware */ int (*arch_autoload)(void); /* Locate the device for (name), return pointer to tail in (*path) */ int (*arch_getdev)(void **dev, const char *name, const char **path); /* * Copy from local address space to module address space, * similar to bcopy() */ ssize_t (*arch_copyin)(const void *, vm_offset_t, const size_t); /* * Copy to local address space from module address space, * similar to bcopy() */ ssize_t (*arch_copyout)(const vm_offset_t, void *, const size_t); /* Read from file to module address space, same semantics as read() */ ssize_t (*arch_readin)(readin_handle_t, vm_offset_t, const size_t); /* Perform ISA byte port I/O (only for systems with ISA) */ int (*arch_isainb)(int port); void (*arch_isaoutb)(int port, int value); /* * Interface to adjust the load address according to the "object" * being loaded. */ uint64_t (*arch_loadaddr)(u_int type, void *data, uint64_t addr); #define LOAD_ELF 1 /* data points to the ELF header. */ #define LOAD_RAW 2 /* data points to the file name. */ /* * Interface to inform MD code about a loaded (ELF) segment. This * can be used to flush caches and/or set up translations. */ #ifdef __elfN void (*arch_loadseg)(Elf_Ehdr *eh, Elf_Phdr *ph, uint64_t delta); #else void (*arch_loadseg)(void *eh, void *ph, uint64_t delta); #endif /* Probe ZFS pool(s), if needed. */ void (*arch_zfs_probe)(void); /* Return the hypervisor name/type or NULL if not virtualized. */ const char *(*arch_hypervisor)(void); /* For kexec-type loaders, get ksegment structure */ void (*arch_kexec_kseg_get)(int *nseg, void **kseg); }; extern struct arch_switch archsw; /* This must be provided by the MD code, but should it be in the archsw? */ void delay(int delay); void dev_cleanup(void); /* * nvstore API. */ typedef int (nvstore_getter_cb_t)(void *, const char *, void **); typedef int (nvstore_setter_cb_t)(void *, int, const char *, const void *, size_t); typedef int (nvstore_setter_str_cb_t)(void *, const char *, const char *, const char *); typedef int (nvstore_unset_cb_t)(void *, const char *); typedef int (nvstore_print_cb_t)(void *, void *); typedef int (nvstore_iterate_cb_t)(void *, int (*)(void *, void *)); typedef struct nvs_callbacks { nvstore_getter_cb_t *nvs_getter; nvstore_setter_cb_t *nvs_setter; nvstore_setter_str_cb_t *nvs_setter_str; nvstore_unset_cb_t *nvs_unset; nvstore_print_cb_t *nvs_print; nvstore_iterate_cb_t *nvs_iterate; } nvs_callbacks_t; int nvstore_init(const char *, nvs_callbacks_t *, void *); int nvstore_fini(const char *); void *nvstore_get_store(const char *); int nvstore_print(void *); int nvstore_get_var(void *, const char *, void **); int nvstore_set_var(void *, int, const char *, void *, size_t); int nvstore_set_var_from_string(void *, const char *, const char *, const char *); int nvstore_unset_var(void *, const char *); #ifndef CTASSERT #define CTASSERT(x) _Static_assert(x, "compile-time assertion failed") #endif #endif /* !_BOOTSTRAP_H_ */ diff --git a/stand/common/console.c b/stand/common/console.c index 5b2ddfb10f69..7886f9386c14 100644 --- a/stand/common/console.c +++ b/stand/common/console.c @@ -1,316 +1,320 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "bootstrap.h" /* * Core console support */ static int cons_set(struct env_var *ev, int flags, const void *value); static int cons_find(const char *name); static int cons_check(const char *string); static int cons_change(const char *string); static int twiddle_set(struct env_var *ev, int flags, const void *value); /* * Detect possible console(s) to use. If preferred console(s) have been * specified, mark them as active. Else, mark the first probed console * as active. Also create the console variable. */ void cons_probe(void) { int cons; int active; char *prefconsole; + TSENTER(); + /* We want a callback to install the new value when this var changes. */ env_setenv("twiddle_divisor", EV_VOLATILE, "1", twiddle_set, env_nounset); /* Do all console probes */ for (cons = 0; consoles[cons] != NULL; cons++) { consoles[cons]->c_flags = 0; consoles[cons]->c_probe(consoles[cons]); } /* Now find the first working one */ active = -1; for (cons = 0; consoles[cons] != NULL && active == -1; cons++) { consoles[cons]->c_flags = 0; consoles[cons]->c_probe(consoles[cons]); if (consoles[cons]->c_flags == (C_PRESENTIN | C_PRESENTOUT)) active = cons; } /* Force a console even if all probes failed */ if (active == -1) active = 0; /* Check to see if a console preference has already been registered */ prefconsole = getenv("console"); if (prefconsole != NULL) prefconsole = strdup(prefconsole); if (prefconsole != NULL) { unsetenv("console"); /* we want to replace this */ cons_change(prefconsole); } else { consoles[active]->c_flags |= C_ACTIVEIN | C_ACTIVEOUT; consoles[active]->c_init(0); prefconsole = strdup(consoles[active]->c_name); } printf("Consoles: "); for (cons = 0; consoles[cons] != NULL; cons++) if (consoles[cons]->c_flags & (C_ACTIVEIN | C_ACTIVEOUT)) printf("%s ", consoles[cons]->c_desc); printf("\n"); if (prefconsole != NULL) { env_setenv("console", EV_VOLATILE, prefconsole, cons_set, env_nounset); free(prefconsole); } + + TSEXIT(); } int getchar(void) { int cons; int rv; /* Loop forever polling all active consoles */ for (;;) { for (cons = 0; consoles[cons] != NULL; cons++) { if ((consoles[cons]->c_flags & (C_PRESENTIN | C_ACTIVEIN)) == (C_PRESENTIN | C_ACTIVEIN) && ((rv = consoles[cons]->c_in()) != -1)) return (rv); } } } int ischar(void) { int cons; for (cons = 0; consoles[cons] != NULL; cons++) if ((consoles[cons]->c_flags & (C_PRESENTIN | C_ACTIVEIN)) == (C_PRESENTIN | C_ACTIVEIN) && (consoles[cons]->c_ready() != 0)) return (1); return (0); } void putchar(int c) { int cons; /* Expand newlines */ if (c == '\n') putchar('\r'); for (cons = 0; consoles[cons] != NULL; cons++) { if ((consoles[cons]->c_flags & (C_PRESENTOUT | C_ACTIVEOUT)) == (C_PRESENTOUT | C_ACTIVEOUT)) consoles[cons]->c_out(c); } } /* * Find the console with the specified name. */ static int cons_find(const char *name) { int cons; for (cons = 0; consoles[cons] != NULL; cons++) if (strcmp(consoles[cons]->c_name, name) == 0) return (cons); return (-1); } /* * Select one or more consoles. */ static int cons_set(struct env_var *ev, int flags, const void *value) { int ret; if ((value == NULL) || (cons_check(value) == 0)) { /* * Return CMD_OK instead of CMD_ERROR to prevent forth syntax * error, which would prevent it processing any further * loader.conf entries. */ return (CMD_OK); } ret = cons_change(value); if (ret != CMD_OK) return (ret); env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); return (CMD_OK); } /* * Check that at least one the consoles listed in *string is valid */ static int cons_check(const char *string) { int cons, found, failed; char *curpos, *dup, *next; dup = next = strdup(string); found = failed = 0; while (next != NULL) { curpos = strsep(&next, " ,"); if (*curpos != '\0') { cons = cons_find(curpos); if (cons == -1) { printf("console %s is invalid!\n", curpos); failed++; } else { found++; } } } free(dup); if (found == 0) printf("no valid consoles!\n"); if (found == 0 || failed != 0) { printf("Available consoles:\n"); for (cons = 0; consoles[cons] != NULL; cons++) printf(" %s\n", consoles[cons]->c_name); } return (found); } /* * Activate all the valid consoles listed in *string and disable all others. */ static int cons_change(const char *string) { int cons, active; char *curpos, *dup, *next; /* Disable all consoles */ for (cons = 0; consoles[cons] != NULL; cons++) { consoles[cons]->c_flags &= ~(C_ACTIVEIN | C_ACTIVEOUT); } /* Enable selected consoles */ dup = next = strdup(string); active = 0; while (next != NULL) { curpos = strsep(&next, " ,"); if (*curpos == '\0') continue; cons = cons_find(curpos); if (cons >= 0) { consoles[cons]->c_flags |= C_ACTIVEIN | C_ACTIVEOUT; consoles[cons]->c_init(0); if ((consoles[cons]->c_flags & (C_PRESENTIN | C_PRESENTOUT)) == (C_PRESENTIN | C_PRESENTOUT)) { active++; continue; } if (active != 0) { /* * If no consoles have initialised we * wouldn't see this. */ printf("console %s failed to initialize\n", consoles[cons]->c_name); } } } free(dup); if (active == 0) { /* * All requested consoles failed to initialise, * try to recover. */ for (cons = 0; consoles[cons] != NULL; cons++) { consoles[cons]->c_flags |= C_ACTIVEIN | C_ACTIVEOUT; consoles[cons]->c_init(0); if ((consoles[cons]->c_flags & (C_PRESENTIN | C_PRESENTOUT)) == (C_PRESENTIN | C_PRESENTOUT)) active++; } if (active == 0) return (CMD_ERROR); /* Recovery failed. */ } return (CMD_OK); } /* * Change the twiddle divisor. * * The user can set the twiddle_divisor variable to directly control how fast * the progress twiddle spins, useful for folks with slow serial consoles. The * code to monitor changes to the variable and propagate them to the twiddle * routines has to live somewhere. Twiddling is console-related so it's here. */ static int twiddle_set(struct env_var *ev, int flags, const void *value) { u_long tdiv; char *eptr; tdiv = strtoul(value, &eptr, 0); if (*(const char *)value == 0 || *eptr != 0) { printf("invalid twiddle_divisor '%s'\n", (const char *)value); return (CMD_ERROR); } twiddle_divisor((u_int)tdiv); env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); return (CMD_OK); } diff --git a/stand/common/gfx_fb.c b/stand/common/gfx_fb.c index a9fcec9942f5..f14242b70170 100644 --- a/stand/common/gfx_fb.c +++ b/stand/common/gfx_fb.c @@ -1,2844 +1,2858 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 2020 Toomas Soome * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. * Copyright 2020 RackTop Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * The workhorse here is gfxfb_blt(). It is implemented to mimic UEFI * GOP Blt, and allows us to fill the rectangle on screen, copy * rectangle from video to buffer and buffer to video and video to video. * Such implementation does allow us to have almost identical implementation * for both BIOS VBE and UEFI. * * ALL pixel data is assumed to be 32-bit BGRA (byte order Blue, Green, Red, * Alpha) format, this allows us to only handle RGB data and not to worry * about mixing RGB with indexed colors. * Data exchange between memory buffer and video will translate BGRA * and native format as following: * * 32-bit to/from 32-bit is trivial case. * 32-bit to/from 24-bit is also simple - we just drop the alpha channel. * 32-bit to/from 16-bit is more complicated, because we nee to handle * data loss from 32-bit to 16-bit. While reading/writing from/to video, we * need to apply masks of 16-bit color components. This will preserve * colors for terminal text. For 32-bit truecolor PMG images, we need to * translate 32-bit colors to 15/16 bit colors and this means data loss. * There are different algorithms how to perform such color space reduction, * we are currently using bitwise right shift to reduce color space and so far * this technique seems to be sufficient (see also gfx_fb_putimage(), the * end of for loop). * 32-bit to/from 8-bit is the most troublesome because 8-bit colors are * indexed. From video, we do get color indexes, and we do translate * color index values to RGB. To write to video, we again need to translate * RGB to color index. Additionally, we need to translate between VGA and * console colors. * * Our internal color data is represented using BGRA format. But the hardware * used indexed colors for 8-bit colors (0-255) and for this mode we do * need to perform translation to/from BGRA and index values. * * - paletteentry RGB <-> index - * BGRA BUFFER <----/ \ - VIDEO * \ / * - RGB (16/24/32) - * * To perform index to RGB translation, we use palette table generated * from when we set up 8-bit mode video. We cannot read palette data from * the hardware, because not all hardware supports reading it. * * BGRA to index is implemented in rgb_to_color_index() by searching * palette array for closest match of RBG values. * * Note: In 8-bit mode, We do store first 16 colors to palette registers * in VGA color order, this serves two purposes; firstly, * if palette update is not supported, we still have correct 16 colors. * Secondly, the kernel does get correct 16 colors when some other boot * loader is used. However, the palette map for 8-bit colors is using * console color ordering - this does allow us to skip translation * from VGA colors to console colors, while we are reading RGB data. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(EFI) #include #include #else #include #endif /* VGA text mode does use bold font. */ #if !defined(VGA_8X16_FONT) #define VGA_8X16_FONT "/boot/fonts/8x16b.fnt" #endif #if !defined(DEFAULT_8X16_FONT) #define DEFAULT_8X16_FONT "/boot/fonts/8x16.fnt" #endif /* * Must be sorted by font size in descending order */ font_list_t fonts = STAILQ_HEAD_INITIALIZER(fonts); #define DEFAULT_FONT_DATA font_data_8x16 extern vt_font_bitmap_data_t font_data_8x16; teken_gfx_t gfx_state = { 0 }; static struct { unsigned char r; /* Red percentage value. */ unsigned char g; /* Green percentage value. */ unsigned char b; /* Blue percentage value. */ } color_def[NCOLORS] = { {0, 0, 0}, /* black */ {50, 0, 0}, /* dark red */ {0, 50, 0}, /* dark green */ {77, 63, 0}, /* dark yellow */ {20, 40, 64}, /* dark blue */ {50, 0, 50}, /* dark magenta */ {0, 50, 50}, /* dark cyan */ {75, 75, 75}, /* light gray */ {18, 20, 21}, /* dark gray */ {100, 0, 0}, /* light red */ {0, 100, 0}, /* light green */ {100, 100, 0}, /* light yellow */ {45, 62, 81}, /* light blue */ {100, 0, 100}, /* light magenta */ {0, 100, 100}, /* light cyan */ {100, 100, 100}, /* white */ }; uint32_t cmap[NCMAP]; /* * Between console's palette and VGA's one: * - blue and red are swapped (1 <-> 4) * - yellow and cyan are swapped (3 <-> 6) */ const int cons_to_vga_colors[NCOLORS] = { 0, 4, 2, 6, 1, 5, 3, 7, 8, 12, 10, 14, 9, 13, 11, 15 }; static const int vga_to_cons_colors[NCOLORS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; struct text_pixel *screen_buffer; #if defined(EFI) static EFI_GRAPHICS_OUTPUT_BLT_PIXEL *GlyphBuffer; #else static struct paletteentry *GlyphBuffer; #endif static size_t GlyphBufferSize; static bool insert_font(char *, FONT_FLAGS); static int font_set(struct env_var *, int, const void *); static void * allocate_glyphbuffer(uint32_t, uint32_t); static void gfx_fb_cursor_draw(teken_gfx_t *, const teken_pos_t *, bool); /* * Initialize gfx framework. */ void gfx_framework_init(void) { /* * Setup font list to have builtin font. */ (void) insert_font(NULL, FONT_BUILTIN); } static uint8_t * gfx_get_fb_address(void) { return (ptov((uint32_t)gfx_state.tg_fb.fb_addr)); } /* * Utility function to parse gfx mode line strings. */ bool gfx_parse_mode_str(char *str, int *x, int *y, int *depth) { char *p, *end; errno = 0; p = str; *x = strtoul(p, &end, 0); if (*x == 0 || errno != 0) return (false); if (*end != 'x') return (false); p = end + 1; *y = strtoul(p, &end, 0); if (*y == 0 || errno != 0) return (false); if (*end != 'x') { *depth = -1; /* auto select */ } else { p = end + 1; *depth = strtoul(p, &end, 0); if (*depth == 0 || errno != 0 || *end != '\0') return (false); } return (true); } static uint32_t rgb_color_map(uint8_t index, uint32_t rmax, int roffset, uint32_t gmax, int goffset, uint32_t bmax, int boffset) { uint32_t color, code, gray, level; if (index < NCOLORS) { #define CF(_f, _i) ((_f ## max * color_def[(_i)]._f / 100) << _f ## offset) return (CF(r, index) | CF(g, index) | CF(b, index)); #undef CF } #define CF(_f, _c) ((_f ## max & _c) << _f ## offset) /* 6x6x6 color cube */ if (index > 15 && index < 232) { uint32_t red, green, blue; for (red = 0; red < 6; red++) { for (green = 0; green < 6; green++) { for (blue = 0; blue < 6; blue++) { code = 16 + (red * 36) + (green * 6) + blue; if (code != index) continue; red = red ? (red * 40 + 55) : 0; green = green ? (green * 40 + 55) : 0; blue = blue ? (blue * 40 + 55) : 0; color = CF(r, red); color |= CF(g, green); color |= CF(b, blue); return (color); } } } } /* colors 232-255 are a grayscale ramp */ for (gray = 0; gray < 24; gray++) { level = (gray * 10) + 8; code = 232 + gray; if (code == index) break; } return (CF(r, level) | CF(g, level) | CF(b, level)); #undef CF } /* * Support for color mapping. * For 8, 24 and 32 bit depth, use mask size 8. * 15/16 bit depth needs to use mask size from mode, * or we will lose color information from 32-bit to 15/16 bit translation. */ uint32_t gfx_fb_color_map(uint8_t index) { int rmask, gmask, bmask; int roff, goff, boff, bpp; roff = ffs(gfx_state.tg_fb.fb_mask_red) - 1; goff = ffs(gfx_state.tg_fb.fb_mask_green) - 1; boff = ffs(gfx_state.tg_fb.fb_mask_blue) - 1; bpp = roundup2(gfx_state.tg_fb.fb_bpp, 8) >> 3; if (bpp == 2) rmask = gfx_state.tg_fb.fb_mask_red >> roff; else rmask = 0xff; if (bpp == 2) gmask = gfx_state.tg_fb.fb_mask_green >> goff; else gmask = 0xff; if (bpp == 2) bmask = gfx_state.tg_fb.fb_mask_blue >> boff; else bmask = 0xff; return (rgb_color_map(index, rmask, 16, gmask, 8, bmask, 0)); } /* * Get indexed color from RGB. This function is used to write data to video * memory when the adapter is set to use indexed colors. * Since UEFI does only support 32-bit colors, we do not implement it for * UEFI because there is no need for it and we do not have palette array * for UEFI. */ static uint8_t rgb_to_color_index(uint8_t r, uint8_t g, uint8_t b) { #if !defined(EFI) uint32_t color, best, dist, k; int diff; color = 0; best = 255 * 255 * 255; for (k = 0; k < NCMAP; k++) { diff = r - pe8[k].Red; dist = diff * diff; diff = g - pe8[k].Green; dist += diff * diff; diff = b - pe8[k].Blue; dist += diff * diff; /* Exact match, exit the loop */ if (dist == 0) break; if (dist < best) { color = k; best = dist; } } if (k == NCMAP) k = color; return (k); #else (void) r; (void) g; (void) b; return (0); #endif } int generate_cons_palette(uint32_t *palette, int format, uint32_t rmax, int roffset, uint32_t gmax, int goffset, uint32_t bmax, int boffset) { int i; switch (format) { case COLOR_FORMAT_VGA: for (i = 0; i < NCOLORS; i++) palette[i] = cons_to_vga_colors[i]; for (; i < NCMAP; i++) palette[i] = i; break; case COLOR_FORMAT_RGB: for (i = 0; i < NCMAP; i++) palette[i] = rgb_color_map(i, rmax, roffset, gmax, goffset, bmax, boffset); break; default: return (ENODEV); } return (0); } static void gfx_mem_wr1(uint8_t *base, size_t size, uint32_t o, uint8_t v) { if (o >= size) return; *(uint8_t *)(base + o) = v; } static void gfx_mem_wr2(uint8_t *base, size_t size, uint32_t o, uint16_t v) { if (o >= size) return; *(uint16_t *)(base + o) = v; } static void gfx_mem_wr4(uint8_t *base, size_t size, uint32_t o, uint32_t v) { if (o >= size) return; *(uint32_t *)(base + o) = v; } static int gfxfb_blt_fill(void *BltBuffer, uint32_t DestinationX, uint32_t DestinationY, uint32_t Width, uint32_t Height) { #if defined(EFI) EFI_GRAPHICS_OUTPUT_BLT_PIXEL *p; #else struct paletteentry *p; #endif uint32_t data, bpp, pitch, y, x; int roff, goff, boff; size_t size; off_t off; uint8_t *destination; if (BltBuffer == NULL) return (EINVAL); if (DestinationY + Height > gfx_state.tg_fb.fb_height) return (EINVAL); if (DestinationX + Width > gfx_state.tg_fb.fb_width) return (EINVAL); if (Width == 0 || Height == 0) return (EINVAL); p = BltBuffer; roff = ffs(gfx_state.tg_fb.fb_mask_red) - 1; goff = ffs(gfx_state.tg_fb.fb_mask_green) - 1; boff = ffs(gfx_state.tg_fb.fb_mask_blue) - 1; if (gfx_state.tg_fb.fb_bpp == 8) { data = rgb_to_color_index(p->Red, p->Green, p->Blue); } else { data = (p->Red & (gfx_state.tg_fb.fb_mask_red >> roff)) << roff; data |= (p->Green & (gfx_state.tg_fb.fb_mask_green >> goff)) << goff; data |= (p->Blue & (gfx_state.tg_fb.fb_mask_blue >> boff)) << boff; } bpp = roundup2(gfx_state.tg_fb.fb_bpp, 8) >> 3; pitch = gfx_state.tg_fb.fb_stride * bpp; destination = gfx_get_fb_address(); size = gfx_state.tg_fb.fb_size; for (y = DestinationY; y < Height + DestinationY; y++) { off = y * pitch + DestinationX * bpp; for (x = 0; x < Width; x++) { switch (bpp) { case 1: gfx_mem_wr1(destination, size, off, (data < NCOLORS) ? cons_to_vga_colors[data] : data); break; case 2: gfx_mem_wr2(destination, size, off, data); break; case 3: gfx_mem_wr1(destination, size, off, (data >> 16) & 0xff); gfx_mem_wr1(destination, size, off + 1, (data >> 8) & 0xff); gfx_mem_wr1(destination, size, off + 2, data & 0xff); break; case 4: gfx_mem_wr4(destination, size, off, data); break; default: return (EINVAL); } off += bpp; } } return (0); } static int gfxfb_blt_video_to_buffer(void *BltBuffer, uint32_t SourceX, uint32_t SourceY, uint32_t DestinationX, uint32_t DestinationY, uint32_t Width, uint32_t Height, uint32_t Delta) { #if defined(EFI) EFI_GRAPHICS_OUTPUT_BLT_PIXEL *p; #else struct paletteentry *p; #endif uint32_t x, sy, dy; uint32_t bpp, pitch, copybytes; off_t off; uint8_t *source, *destination, *sb; uint8_t rm, rp, gm, gp, bm, bp; bool bgra; if (BltBuffer == NULL) return (EINVAL); if (SourceY + Height > gfx_state.tg_fb.fb_height) return (EINVAL); if (SourceX + Width > gfx_state.tg_fb.fb_width) return (EINVAL); if (Width == 0 || Height == 0) return (EINVAL); if (Delta == 0) Delta = Width * sizeof (*p); bpp = roundup2(gfx_state.tg_fb.fb_bpp, 8) >> 3; pitch = gfx_state.tg_fb.fb_stride * bpp; copybytes = Width * bpp; rp = ffs(gfx_state.tg_fb.fb_mask_red) - 1; gp = ffs(gfx_state.tg_fb.fb_mask_green) - 1; bp = ffs(gfx_state.tg_fb.fb_mask_blue) - 1; rm = gfx_state.tg_fb.fb_mask_red >> rp; gm = gfx_state.tg_fb.fb_mask_green >> gp; bm = gfx_state.tg_fb.fb_mask_blue >> bp; /* If FB pixel format is BGRA, we can use direct copy. */ bgra = bpp == 4 && ffs(rm) - 1 == 8 && rp == 16 && ffs(gm) - 1 == 8 && gp == 8 && ffs(bm) - 1 == 8 && bp == 0; for (sy = SourceY, dy = DestinationY; dy < Height + DestinationY; sy++, dy++) { off = sy * pitch + SourceX * bpp; source = gfx_get_fb_address() + off; destination = (uint8_t *)BltBuffer + dy * Delta + DestinationX * sizeof (*p); if (bgra) { bcopy(source, destination, copybytes); } else { for (x = 0; x < Width; x++) { uint32_t c = 0; p = (void *)(destination + x * sizeof (*p)); sb = source + x * bpp; switch (bpp) { case 1: c = *sb; break; case 2: c = *(uint16_t *)sb; break; case 3: c = sb[0] << 16 | sb[1] << 8 | sb[2]; break; case 4: c = *(uint32_t *)sb; break; default: return (EINVAL); } if (bpp == 1) { *(uint32_t *)p = gfx_fb_color_map( (c < 16) ? vga_to_cons_colors[c] : c); } else { p->Red = (c >> rp) & rm; p->Green = (c >> gp) & gm; p->Blue = (c >> bp) & bm; p->Reserved = 0; } } } } return (0); } static int gfxfb_blt_buffer_to_video(void *BltBuffer, uint32_t SourceX, uint32_t SourceY, uint32_t DestinationX, uint32_t DestinationY, uint32_t Width, uint32_t Height, uint32_t Delta) { #if defined(EFI) EFI_GRAPHICS_OUTPUT_BLT_PIXEL *p; #else struct paletteentry *p; #endif uint32_t x, sy, dy; uint32_t bpp, pitch, copybytes; off_t off; uint8_t *source, *destination; uint8_t rm, rp, gm, gp, bm, bp; bool bgra; if (BltBuffer == NULL) return (EINVAL); if (DestinationY + Height > gfx_state.tg_fb.fb_height) return (EINVAL); if (DestinationX + Width > gfx_state.tg_fb.fb_width) return (EINVAL); if (Width == 0 || Height == 0) return (EINVAL); if (Delta == 0) Delta = Width * sizeof (*p); bpp = roundup2(gfx_state.tg_fb.fb_bpp, 8) >> 3; pitch = gfx_state.tg_fb.fb_stride * bpp; copybytes = Width * bpp; rp = ffs(gfx_state.tg_fb.fb_mask_red) - 1; gp = ffs(gfx_state.tg_fb.fb_mask_green) - 1; bp = ffs(gfx_state.tg_fb.fb_mask_blue) - 1; rm = gfx_state.tg_fb.fb_mask_red >> rp; gm = gfx_state.tg_fb.fb_mask_green >> gp; bm = gfx_state.tg_fb.fb_mask_blue >> bp; /* If FB pixel format is BGRA, we can use direct copy. */ bgra = bpp == 4 && ffs(rm) - 1 == 8 && rp == 16 && ffs(gm) - 1 == 8 && gp == 8 && ffs(bm) - 1 == 8 && bp == 0; for (sy = SourceY, dy = DestinationY; sy < Height + SourceY; sy++, dy++) { off = dy * pitch + DestinationX * bpp; destination = gfx_get_fb_address() + off; if (bgra) { source = (uint8_t *)BltBuffer + sy * Delta + SourceX * sizeof (*p); bcopy(source, destination, copybytes); } else { for (x = 0; x < Width; x++) { uint32_t c; p = (void *)((uint8_t *)BltBuffer + sy * Delta + (SourceX + x) * sizeof (*p)); if (bpp == 1) { c = rgb_to_color_index(p->Red, p->Green, p->Blue); } else { c = (p->Red & rm) << rp | (p->Green & gm) << gp | (p->Blue & bm) << bp; } off = x * bpp; switch (bpp) { case 1: gfx_mem_wr1(destination, copybytes, off, (c < 16) ? cons_to_vga_colors[c] : c); break; case 2: gfx_mem_wr2(destination, copybytes, off, c); break; case 3: gfx_mem_wr1(destination, copybytes, off, (c >> 16) & 0xff); gfx_mem_wr1(destination, copybytes, off + 1, (c >> 8) & 0xff); gfx_mem_wr1(destination, copybytes, off + 2, c & 0xff); break; case 4: gfx_mem_wr4(destination, copybytes, x * bpp, c); break; default: return (EINVAL); } } } } return (0); } static int gfxfb_blt_video_to_video(uint32_t SourceX, uint32_t SourceY, uint32_t DestinationX, uint32_t DestinationY, uint32_t Width, uint32_t Height) { uint32_t bpp, copybytes; int pitch; uint8_t *source, *destination; off_t off; if (SourceY + Height > gfx_state.tg_fb.fb_height) return (EINVAL); if (SourceX + Width > gfx_state.tg_fb.fb_width) return (EINVAL); if (DestinationY + Height > gfx_state.tg_fb.fb_height) return (EINVAL); if (DestinationX + Width > gfx_state.tg_fb.fb_width) return (EINVAL); if (Width == 0 || Height == 0) return (EINVAL); bpp = roundup2(gfx_state.tg_fb.fb_bpp, 8) >> 3; pitch = gfx_state.tg_fb.fb_stride * bpp; copybytes = Width * bpp; off = SourceY * pitch + SourceX * bpp; source = gfx_get_fb_address() + off; off = DestinationY * pitch + DestinationX * bpp; destination = gfx_get_fb_address() + off; if ((uintptr_t)destination > (uintptr_t)source) { source += Height * pitch; destination += Height * pitch; pitch = -pitch; } while (Height-- > 0) { bcopy(source, destination, copybytes); source += pitch; destination += pitch; } return (0); } int gfxfb_blt(void *BltBuffer, GFXFB_BLT_OPERATION BltOperation, uint32_t SourceX, uint32_t SourceY, uint32_t DestinationX, uint32_t DestinationY, uint32_t Width, uint32_t Height, uint32_t Delta) { int rv; #if defined(EFI) EFI_STATUS status; EFI_GRAPHICS_OUTPUT *gop = gfx_state.tg_private; extern int boot_services_gone; EFI_TPL tpl; /* * We assume Blt() does work, if not, we will need to build * exception list case by case. */ if (gop != NULL && boot_services_gone == 0) { tpl = BS->RaiseTPL(TPL_NOTIFY); switch (BltOperation) { case GfxFbBltVideoFill: status = gop->Blt(gop, BltBuffer, EfiBltVideoFill, SourceX, SourceY, DestinationX, DestinationY, Width, Height, Delta); break; case GfxFbBltVideoToBltBuffer: status = gop->Blt(gop, BltBuffer, EfiBltVideoToBltBuffer, SourceX, SourceY, DestinationX, DestinationY, Width, Height, Delta); break; case GfxFbBltBufferToVideo: status = gop->Blt(gop, BltBuffer, EfiBltBufferToVideo, SourceX, SourceY, DestinationX, DestinationY, Width, Height, Delta); break; case GfxFbBltVideoToVideo: status = gop->Blt(gop, BltBuffer, EfiBltVideoToVideo, SourceX, SourceY, DestinationX, DestinationY, Width, Height, Delta); break; default: status = EFI_INVALID_PARAMETER; break; } switch (status) { case EFI_SUCCESS: rv = 0; break; case EFI_INVALID_PARAMETER: rv = EINVAL; break; case EFI_DEVICE_ERROR: default: rv = EIO; break; } BS->RestoreTPL(tpl); return (rv); } #endif switch (BltOperation) { case GfxFbBltVideoFill: rv = gfxfb_blt_fill(BltBuffer, DestinationX, DestinationY, Width, Height); break; case GfxFbBltVideoToBltBuffer: rv = gfxfb_blt_video_to_buffer(BltBuffer, SourceX, SourceY, DestinationX, DestinationY, Width, Height, Delta); break; case GfxFbBltBufferToVideo: rv = gfxfb_blt_buffer_to_video(BltBuffer, SourceX, SourceY, DestinationX, DestinationY, Width, Height, Delta); break; case GfxFbBltVideoToVideo: rv = gfxfb_blt_video_to_video(SourceX, SourceY, DestinationX, DestinationY, Width, Height); break; default: rv = EINVAL; break; } return (rv); } void gfx_bitblt_bitmap(teken_gfx_t *state, const uint8_t *glyph, const teken_attr_t *a, uint32_t alpha, bool cursor) { uint32_t width, height; uint32_t fgc, bgc, bpl, cc, o; int bpp, bit, byte; bool invert = false; bpp = 4; /* We only generate BGRA */ width = state->tg_font.vf_width; height = state->tg_font.vf_height; bpl = (width + 7) / 8; /* Bytes per source line. */ fgc = a->ta_fgcolor; bgc = a->ta_bgcolor; if (a->ta_format & TF_BOLD) fgc |= TC_LIGHT; if (a->ta_format & TF_BLINK) bgc |= TC_LIGHT; fgc = gfx_fb_color_map(fgc); bgc = gfx_fb_color_map(bgc); if (a->ta_format & TF_REVERSE) invert = !invert; if (cursor) invert = !invert; if (invert) { uint32_t tmp; tmp = fgc; fgc = bgc; bgc = tmp; } alpha = alpha << 24; fgc |= alpha; bgc |= alpha; for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) { byte = y * bpl + x / 8; bit = 0x80 >> (x % 8); o = y * width * bpp + x * bpp; cc = glyph[byte] & bit ? fgc : bgc; gfx_mem_wr4(state->tg_glyph, state->tg_glyph_size, o, cc); } } } /* * Draw prepared glyph on terminal point p. */ static void gfx_fb_printchar(teken_gfx_t *state, const teken_pos_t *p) { unsigned x, y, width, height; width = state->tg_font.vf_width; height = state->tg_font.vf_height; x = state->tg_origin.tp_col + p->tp_col * width; y = state->tg_origin.tp_row + p->tp_row * height; gfx_fb_cons_display(x, y, width, height, state->tg_glyph); } /* * Store char with its attribute to buffer and put it on screen. */ void gfx_fb_putchar(void *arg, const teken_pos_t *p, teken_char_t c, const teken_attr_t *a) { teken_gfx_t *state = arg; const uint8_t *glyph; int idx; idx = p->tp_col + p->tp_row * state->tg_tp.tp_col; if (idx >= state->tg_tp.tp_col * state->tg_tp.tp_row) return; /* remove the cursor */ if (state->tg_cursor_visible) gfx_fb_cursor_draw(state, &state->tg_cursor, false); screen_buffer[idx].c = c; screen_buffer[idx].a = *a; glyph = font_lookup(&state->tg_font, c, a); gfx_bitblt_bitmap(state, glyph, a, 0xff, false); gfx_fb_printchar(state, p); /* display the cursor */ if (state->tg_cursor_visible) { const teken_pos_t *c; c = teken_get_cursor(&state->tg_teken); gfx_fb_cursor_draw(state, c, true); } } void gfx_fb_fill(void *arg, const teken_rect_t *r, teken_char_t c, const teken_attr_t *a) { teken_gfx_t *state = arg; const uint8_t *glyph; teken_pos_t p; struct text_pixel *row; /* remove the cursor */ if (state->tg_cursor_visible) gfx_fb_cursor_draw(state, &state->tg_cursor, false); glyph = font_lookup(&state->tg_font, c, a); gfx_bitblt_bitmap(state, glyph, a, 0xff, false); for (p.tp_row = r->tr_begin.tp_row; p.tp_row < r->tr_end.tp_row; p.tp_row++) { row = &screen_buffer[p.tp_row * state->tg_tp.tp_col]; for (p.tp_col = r->tr_begin.tp_col; p.tp_col < r->tr_end.tp_col; p.tp_col++) { row[p.tp_col].c = c; row[p.tp_col].a = *a; gfx_fb_printchar(state, &p); } } /* display the cursor */ if (state->tg_cursor_visible) { const teken_pos_t *c; c = teken_get_cursor(&state->tg_teken); gfx_fb_cursor_draw(state, c, true); } } static void gfx_fb_cursor_draw(teken_gfx_t *state, const teken_pos_t *pos, bool on) { unsigned x, y, width, height; const uint8_t *glyph; teken_pos_t p; int idx; p = *pos; if (p.tp_col >= state->tg_tp.tp_col) p.tp_col = state->tg_tp.tp_col - 1; if (p.tp_row >= state->tg_tp.tp_row) p.tp_row = state->tg_tp.tp_row - 1; idx = p.tp_col + p.tp_row * state->tg_tp.tp_col; if (idx >= state->tg_tp.tp_col * state->tg_tp.tp_row) return; width = state->tg_font.vf_width; height = state->tg_font.vf_height; x = state->tg_origin.tp_col + p.tp_col * width; y = state->tg_origin.tp_row + p.tp_row * height; /* * Save original display content to preserve image data. */ if (on) { if (state->tg_cursor_image == NULL || state->tg_cursor_size != width * height * 4) { free(state->tg_cursor_image); state->tg_cursor_size = width * height * 4; state->tg_cursor_image = malloc(state->tg_cursor_size); } if (state->tg_cursor_image != NULL) { if (gfxfb_blt(state->tg_cursor_image, GfxFbBltVideoToBltBuffer, x, y, 0, 0, width, height, 0) != 0) { free(state->tg_cursor_image); state->tg_cursor_image = NULL; } } } else { /* * Restore display from tg_cursor_image. * If there is no image, restore char from screen_buffer. */ if (state->tg_cursor_image != NULL && gfxfb_blt(state->tg_cursor_image, GfxFbBltBufferToVideo, 0, 0, x, y, width, height, 0) == 0) { state->tg_cursor = p; return; } } glyph = font_lookup(&state->tg_font, screen_buffer[idx].c, &screen_buffer[idx].a); gfx_bitblt_bitmap(state, glyph, &screen_buffer[idx].a, 0xff, on); gfx_fb_printchar(state, &p); state->tg_cursor = p; } void gfx_fb_cursor(void *arg, const teken_pos_t *p) { teken_gfx_t *state = arg; /* Switch cursor off in old location and back on in new. */ if (state->tg_cursor_visible) { gfx_fb_cursor_draw(state, &state->tg_cursor, false); gfx_fb_cursor_draw(state, p, true); } } void gfx_fb_param(void *arg, int cmd, unsigned int value) { teken_gfx_t *state = arg; const teken_pos_t *c; switch (cmd) { case TP_SETLOCALCURSOR: /* * 0 means normal (usually block), 1 means hidden, and * 2 means blinking (always block) for compatibility with * syscons. We don't support any changes except hiding, * so must map 2 to 0. */ value = (value == 1) ? 0 : 1; /* FALLTHROUGH */ case TP_SHOWCURSOR: c = teken_get_cursor(&state->tg_teken); gfx_fb_cursor_draw(state, c, true); if (value != 0) state->tg_cursor_visible = true; else state->tg_cursor_visible = false; break; default: /* Not yet implemented */ break; } } bool is_same_pixel(struct text_pixel *px1, struct text_pixel *px2) { if (px1->c != px2->c) return (false); /* Is there image stored? */ if ((px1->a.ta_format & TF_IMAGE) || (px2->a.ta_format & TF_IMAGE)) return (false); if (px1->a.ta_format != px2->a.ta_format) return (false); if (px1->a.ta_fgcolor != px2->a.ta_fgcolor) return (false); if (px1->a.ta_bgcolor != px2->a.ta_bgcolor) return (false); return (true); } static void gfx_fb_copy_area(teken_gfx_t *state, const teken_rect_t *s, const teken_pos_t *d) { uint32_t sx, sy, dx, dy, width, height; width = state->tg_font.vf_width; height = state->tg_font.vf_height; sx = state->tg_origin.tp_col + s->tr_begin.tp_col * width; sy = state->tg_origin.tp_row + s->tr_begin.tp_row * height; dx = state->tg_origin.tp_col + d->tp_col * width; dy = state->tg_origin.tp_row + d->tp_row * height; width *= (s->tr_end.tp_col - s->tr_begin.tp_col + 1); (void) gfxfb_blt(NULL, GfxFbBltVideoToVideo, sx, sy, dx, dy, width, height, 0); } static void gfx_fb_copy_line(teken_gfx_t *state, int ncol, teken_pos_t *s, teken_pos_t *d) { teken_rect_t sr; teken_pos_t dp; unsigned soffset, doffset; bool mark = false; int x; soffset = s->tp_col + s->tp_row * state->tg_tp.tp_col; doffset = d->tp_col + d->tp_row * state->tg_tp.tp_col; for (x = 0; x < ncol; x++) { if (is_same_pixel(&screen_buffer[soffset + x], &screen_buffer[doffset + x])) { if (mark) { gfx_fb_copy_area(state, &sr, &dp); mark = false; } } else { screen_buffer[doffset + x] = screen_buffer[soffset + x]; if (mark) { /* update end point */ sr.tr_end.tp_col = s->tp_col + x;; } else { /* set up new rectangle */ mark = true; sr.tr_begin.tp_col = s->tp_col + x; sr.tr_begin.tp_row = s->tp_row; sr.tr_end.tp_col = s->tp_col + x; sr.tr_end.tp_row = s->tp_row; dp.tp_col = d->tp_col + x; dp.tp_row = d->tp_row; } } } if (mark) { gfx_fb_copy_area(state, &sr, &dp); } } void gfx_fb_copy(void *arg, const teken_rect_t *r, const teken_pos_t *p) { teken_gfx_t *state = arg; unsigned doffset, soffset; teken_pos_t d, s; int nrow, ncol, y; /* Has to be signed - >= 0 comparison */ /* * Copying is a little tricky. We must make sure we do it in * correct order, to make sure we don't overwrite our own data. */ nrow = r->tr_end.tp_row - r->tr_begin.tp_row; ncol = r->tr_end.tp_col - r->tr_begin.tp_col; if (p->tp_row + nrow > state->tg_tp.tp_row || p->tp_col + ncol > state->tg_tp.tp_col) return; soffset = r->tr_begin.tp_col + r->tr_begin.tp_row * state->tg_tp.tp_col; doffset = p->tp_col + p->tp_row * state->tg_tp.tp_col; /* remove the cursor */ if (state->tg_cursor_visible) gfx_fb_cursor_draw(state, &state->tg_cursor, false); /* * Copy line by line. */ if (doffset <= soffset) { s = r->tr_begin; d = *p; for (y = 0; y < nrow; y++) { s.tp_row = r->tr_begin.tp_row + y; d.tp_row = p->tp_row + y; gfx_fb_copy_line(state, ncol, &s, &d); } } else { for (y = nrow - 1; y >= 0; y--) { s.tp_row = r->tr_begin.tp_row + y; d.tp_row = p->tp_row + y; gfx_fb_copy_line(state, ncol, &s, &d); } } /* display the cursor */ if (state->tg_cursor_visible) { const teken_pos_t *c; c = teken_get_cursor(&state->tg_teken); gfx_fb_cursor_draw(state, c, true); } } /* * Implements alpha blending for RGBA data, could use pixels for arguments, * but byte stream seems more generic. * The generic alpha blending is: * blend = alpha * fg + (1.0 - alpha) * bg. * Since our alpha is not from range [0..1], we scale appropriately. */ static uint8_t alpha_blend(uint8_t fg, uint8_t bg, uint8_t alpha) { uint16_t blend, h, l; /* trivial corner cases */ if (alpha == 0) return (bg); if (alpha == 0xFF) return (fg); blend = (alpha * fg + (0xFF - alpha) * bg); /* Division by 0xFF */ h = blend >> 8; l = blend & 0xFF; if (h + l >= 0xFF) h++; return (h); } /* * Implements alpha blending for RGBA data, could use pixels for arguments, * but byte stream seems more generic. * The generic alpha blending is: * blend = alpha * fg + (1.0 - alpha) * bg. * Since our alpha is not from range [0..1], we scale appropriately. */ static void bitmap_cpy(void *dst, void *src, uint32_t size) { #if defined(EFI) EFI_GRAPHICS_OUTPUT_BLT_PIXEL *ps, *pd; #else struct paletteentry *ps, *pd; #endif uint32_t i; uint8_t a; ps = src; pd = dst; /* * we only implement alpha blending for depth 32. */ for (i = 0; i < size; i ++) { a = ps[i].Reserved; pd[i].Red = alpha_blend(ps[i].Red, pd[i].Red, a); pd[i].Green = alpha_blend(ps[i].Green, pd[i].Green, a); pd[i].Blue = alpha_blend(ps[i].Blue, pd[i].Blue, a); pd[i].Reserved = a; } } static void * allocate_glyphbuffer(uint32_t width, uint32_t height) { size_t size; size = sizeof (*GlyphBuffer) * width * height; if (size != GlyphBufferSize) { free(GlyphBuffer); GlyphBuffer = malloc(size); if (GlyphBuffer == NULL) return (NULL); GlyphBufferSize = size; } return (GlyphBuffer); } void gfx_fb_cons_display(uint32_t x, uint32_t y, uint32_t width, uint32_t height, void *data) { #if defined(EFI) EFI_GRAPHICS_OUTPUT_BLT_PIXEL *buf; #else struct paletteentry *buf; #endif size_t size; size = width * height * sizeof(*buf); /* * Common data to display is glyph, use preallocated * glyph buffer. */ if (gfx_state.tg_glyph_size != GlyphBufferSize) (void) allocate_glyphbuffer(width, height); if (size == GlyphBufferSize) buf = GlyphBuffer; else buf = malloc(size); if (buf == NULL) return; if (gfxfb_blt(buf, GfxFbBltVideoToBltBuffer, x, y, 0, 0, width, height, 0) == 0) { bitmap_cpy(buf, data, width * height); (void) gfxfb_blt(buf, GfxFbBltBufferToVideo, 0, 0, x, y, width, height, 0); } if (buf != GlyphBuffer) free(buf); } /* * Public graphics primitives. */ static int isqrt(int num) { int res = 0; int bit = 1 << 30; /* "bit" starts at the highest power of four <= the argument. */ while (bit > num) bit >>= 2; while (bit != 0) { if (num >= res + bit) { num -= res + bit; res = (res >> 1) + bit; } else { res >>= 1; } bit >>= 2; } return (res); } static uint32_t gfx_fb_getcolor(void) { uint32_t c; const teken_attr_t *ap; ap = teken_get_curattr(&gfx_state.tg_teken); if (ap->ta_format & TF_REVERSE) { c = ap->ta_bgcolor; if (ap->ta_format & TF_BLINK) c |= TC_LIGHT; } else { c = ap->ta_fgcolor; if (ap->ta_format & TF_BOLD) c |= TC_LIGHT; } return (gfx_fb_color_map(c)); } /* set pixel in framebuffer using gfx coordinates */ void gfx_fb_setpixel(uint32_t x, uint32_t y) { uint32_t c; if (gfx_state.tg_fb_type == FB_TEXT) return; c = gfx_fb_getcolor(); if (x >= gfx_state.tg_fb.fb_width || y >= gfx_state.tg_fb.fb_height) return; gfxfb_blt(&c, GfxFbBltVideoFill, 0, 0, x, y, 1, 1, 0); } /* * draw rectangle in framebuffer using gfx coordinates. */ void gfx_fb_drawrect(uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2, uint32_t fill) { uint32_t c; if (gfx_state.tg_fb_type == FB_TEXT) return; c = gfx_fb_getcolor(); if (fill != 0) { gfxfb_blt(&c, GfxFbBltVideoFill, 0, 0, x1, y1, x2 - x1, y2 - y1, 0); } else { gfxfb_blt(&c, GfxFbBltVideoFill, 0, 0, x1, y1, x2 - x1, 1, 0); gfxfb_blt(&c, GfxFbBltVideoFill, 0, 0, x1, y2, x2 - x1, 1, 0); gfxfb_blt(&c, GfxFbBltVideoFill, 0, 0, x1, y1, 1, y2 - y1, 0); gfxfb_blt(&c, GfxFbBltVideoFill, 0, 0, x2, y1, 1, y2 - y1, 0); } } void gfx_fb_line(uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, uint32_t wd) { int dx, sx, dy, sy; int err, e2, x2, y2, ed, width; if (gfx_state.tg_fb_type == FB_TEXT) return; width = wd; sx = x0 < x1? 1 : -1; sy = y0 < y1? 1 : -1; dx = x1 > x0? x1 - x0 : x0 - x1; dy = y1 > y0? y1 - y0 : y0 - y1; err = dx + dy; ed = dx + dy == 0 ? 1: isqrt(dx * dx + dy * dy); for (;;) { gfx_fb_setpixel(x0, y0); e2 = err; x2 = x0; if ((e2 << 1) >= -dx) { /* x step */ e2 += dy; y2 = y0; while (e2 < ed * width && (y1 != (uint32_t)y2 || dx > dy)) { y2 += sy; gfx_fb_setpixel(x0, y2); e2 += dx; } if (x0 == x1) break; e2 = err; err -= dy; x0 += sx; } if ((e2 << 1) <= dy) { /* y step */ e2 = dx-e2; while (e2 < ed * width && (x1 != (uint32_t)x2 || dx < dy)) { x2 += sx; gfx_fb_setpixel(x2, y0); e2 += dy; } if (y0 == y1) break; err += dx; y0 += sy; } } } /* * quadratic Bézier curve limited to gradients without sign change. */ void gfx_fb_bezier(uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2, uint32_t wd) { int sx, sy, xx, yy, xy, width; int dx, dy, err, curvature; int i; if (gfx_state.tg_fb_type == FB_TEXT) return; width = wd; sx = x2 - x1; sy = y2 - y1; xx = x0 - x1; yy = y0 - y1; curvature = xx*sy - yy*sx; if (sx*sx + sy*sy > xx*xx+yy*yy) { x2 = x0; x0 = sx + x1; y2 = y0; y0 = sy + y1; curvature = -curvature; } if (curvature != 0) { xx += sx; sx = x0 < x2? 1 : -1; xx *= sx; yy += sy; sy = y0 < y2? 1 : -1; yy *= sy; xy = (xx*yy) << 1; xx *= xx; yy *= yy; if (curvature * sx * sy < 0) { xx = -xx; yy = -yy; xy = -xy; curvature = -curvature; } dx = 4 * sy * curvature * (x1 - x0) + xx - xy; dy = 4 * sx * curvature * (y0 - y1) + yy - xy; xx += xx; yy += yy; err = dx + dy + xy; do { for (i = 0; i <= width; i++) gfx_fb_setpixel(x0 + i, y0); if (x0 == x2 && y0 == y2) return; /* last pixel -> curve finished */ y1 = 2 * err < dx; if (2 * err > dy) { x0 += sx; dx -= xy; dy += yy; err += dy; } if (y1 != 0) { y0 += sy; dy -= xy; dx += xx; err += dx; } } while (dy < dx); /* gradient negates -> algorithm fails */ } gfx_fb_line(x0, y0, x2, y2, width); } /* * draw rectangle using terminal coordinates and current foreground color. */ void gfx_term_drawrect(uint32_t ux1, uint32_t uy1, uint32_t ux2, uint32_t uy2) { int x1, y1, x2, y2; int xshift, yshift; int width, i; uint32_t vf_width, vf_height; teken_rect_t r; if (gfx_state.tg_fb_type == FB_TEXT) return; vf_width = gfx_state.tg_font.vf_width; vf_height = gfx_state.tg_font.vf_height; width = vf_width / 4; /* line width */ xshift = (vf_width - width) / 2; yshift = (vf_height - width) / 2; /* Shift coordinates */ if (ux1 != 0) ux1--; if (uy1 != 0) uy1--; ux2--; uy2--; /* mark area used in terminal */ r.tr_begin.tp_col = ux1; r.tr_begin.tp_row = uy1; r.tr_end.tp_col = ux2 + 1; r.tr_end.tp_row = uy2 + 1; term_image_display(&gfx_state, &r); /* * Draw horizontal lines width points thick, shifted from outer edge. */ x1 = (ux1 + 1) * vf_width + gfx_state.tg_origin.tp_col; y1 = uy1 * vf_height + gfx_state.tg_origin.tp_row + yshift; x2 = ux2 * vf_width + gfx_state.tg_origin.tp_col; gfx_fb_drawrect(x1, y1, x2, y1 + width, 1); y2 = uy2 * vf_height + gfx_state.tg_origin.tp_row; y2 += vf_height - yshift - width; gfx_fb_drawrect(x1, y2, x2, y2 + width, 1); /* * Draw vertical lines width points thick, shifted from outer edge. */ x1 = ux1 * vf_width + gfx_state.tg_origin.tp_col + xshift; y1 = uy1 * vf_height + gfx_state.tg_origin.tp_row; y1 += vf_height; y2 = uy2 * vf_height + gfx_state.tg_origin.tp_row; gfx_fb_drawrect(x1, y1, x1 + width, y2, 1); x1 = ux2 * vf_width + gfx_state.tg_origin.tp_col; x1 += vf_width - xshift - width; gfx_fb_drawrect(x1, y1, x1 + width, y2, 1); /* Draw upper left corner. */ x1 = ux1 * vf_width + gfx_state.tg_origin.tp_col + xshift; y1 = uy1 * vf_height + gfx_state.tg_origin.tp_row; y1 += vf_height; x2 = ux1 * vf_width + gfx_state.tg_origin.tp_col; x2 += vf_width; y2 = uy1 * vf_height + gfx_state.tg_origin.tp_row + yshift; for (i = 0; i <= width; i++) gfx_fb_bezier(x1 + i, y1, x1 + i, y2 + i, x2, y2 + i, width-i); /* Draw lower left corner. */ x1 = ux1 * vf_width + gfx_state.tg_origin.tp_col; x1 += vf_width; y1 = uy2 * vf_height + gfx_state.tg_origin.tp_row; y1 += vf_height - yshift; x2 = ux1 * vf_width + gfx_state.tg_origin.tp_col + xshift; y2 = uy2 * vf_height + gfx_state.tg_origin.tp_row; for (i = 0; i <= width; i++) gfx_fb_bezier(x1, y1 - i, x2 + i, y1 - i, x2 + i, y2, width-i); /* Draw upper right corner. */ x1 = ux2 * vf_width + gfx_state.tg_origin.tp_col; y1 = uy1 * vf_height + gfx_state.tg_origin.tp_row + yshift; x2 = ux2 * vf_width + gfx_state.tg_origin.tp_col; x2 += vf_width - xshift - width; y2 = uy1 * vf_height + gfx_state.tg_origin.tp_row; y2 += vf_height; for (i = 0; i <= width; i++) gfx_fb_bezier(x1, y1 + i, x2 + i, y1 + i, x2 + i, y2, width-i); /* Draw lower right corner. */ x1 = ux2 * vf_width + gfx_state.tg_origin.tp_col; y1 = uy2 * vf_height + gfx_state.tg_origin.tp_row; y1 += vf_height - yshift; x2 = ux2 * vf_width + gfx_state.tg_origin.tp_col; x2 += vf_width - xshift - width; y2 = uy2 * vf_height + gfx_state.tg_origin.tp_row; for (i = 0; i <= width; i++) gfx_fb_bezier(x1, y1 - i, x2 + i, y1 - i, x2 + i, y2, width-i); } int gfx_fb_putimage(png_t *png, uint32_t ux1, uint32_t uy1, uint32_t ux2, uint32_t uy2, uint32_t flags) { #if defined(EFI) EFI_GRAPHICS_OUTPUT_BLT_PIXEL *p; #else struct paletteentry *p; #endif uint8_t *data; uint32_t i, j, x, y, fheight, fwidth; int rs, gs, bs; uint8_t r, g, b, a; bool scale = false; bool trace = false; teken_rect_t rect; trace = (flags & FL_PUTIMAGE_DEBUG) != 0; if (gfx_state.tg_fb_type == FB_TEXT) { if (trace) printf("Framebuffer not active.\n"); return (1); } if (png->color_type != PNG_TRUECOLOR_ALPHA) { if (trace) printf("Not truecolor image.\n"); return (1); } if (ux1 > gfx_state.tg_fb.fb_width || uy1 > gfx_state.tg_fb.fb_height) { if (trace) printf("Top left coordinate off screen.\n"); return (1); } if (png->width > UINT16_MAX || png->height > UINT16_MAX) { if (trace) printf("Image too large.\n"); return (1); } if (png->width < 1 || png->height < 1) { if (trace) printf("Image too small.\n"); return (1); } /* * If 0 was passed for either ux2 or uy2, then calculate the missing * part of the bottom right coordinate. */ scale = true; if (ux2 == 0 && uy2 == 0) { /* Both 0, use the native resolution of the image */ ux2 = ux1 + png->width; uy2 = uy1 + png->height; scale = false; } else if (ux2 == 0) { /* Set ux2 from uy2/uy1 to maintain aspect ratio */ ux2 = ux1 + (png->width * (uy2 - uy1)) / png->height; } else if (uy2 == 0) { /* Set uy2 from ux2/ux1 to maintain aspect ratio */ uy2 = uy1 + (png->height * (ux2 - ux1)) / png->width; } if (ux2 > gfx_state.tg_fb.fb_width || uy2 > gfx_state.tg_fb.fb_height) { if (trace) printf("Bottom right coordinate off screen.\n"); return (1); } fwidth = ux2 - ux1; fheight = uy2 - uy1; /* * If the original image dimensions have been passed explicitly, * disable scaling. */ if (fwidth == png->width && fheight == png->height) scale = false; if (ux1 == 0) { /* * No top left X co-ordinate (real coordinates start at 1), * place as far right as it will fit. */ ux2 = gfx_state.tg_fb.fb_width - gfx_state.tg_origin.tp_col; ux1 = ux2 - fwidth; } if (uy1 == 0) { /* * No top left Y co-ordinate (real coordinates start at 1), * place as far down as it will fit. */ uy2 = gfx_state.tg_fb.fb_height - gfx_state.tg_origin.tp_row; uy1 = uy2 - fheight; } if (ux1 >= ux2 || uy1 >= uy2) { if (trace) printf("Image dimensions reversed.\n"); return (1); } if (fwidth < 2 || fheight < 2) { if (trace) printf("Target area too small\n"); return (1); } if (trace) printf("Image %ux%u -> %ux%u @%ux%u\n", png->width, png->height, fwidth, fheight, ux1, uy1); rect.tr_begin.tp_col = ux1 / gfx_state.tg_font.vf_width; rect.tr_begin.tp_row = uy1 / gfx_state.tg_font.vf_height; rect.tr_end.tp_col = (ux1 + fwidth) / gfx_state.tg_font.vf_width; rect.tr_end.tp_row = (uy1 + fheight) / gfx_state.tg_font.vf_height; /* * mark area used in terminal */ if (!(flags & FL_PUTIMAGE_NOSCROLL)) term_image_display(&gfx_state, &rect); if ((flags & FL_PUTIMAGE_BORDER)) gfx_fb_drawrect(ux1, uy1, ux2, uy2, 0); data = malloc(fwidth * fheight * sizeof(*p)); p = (void *)data; if (data == NULL) { if (trace) printf("Out of memory.\n"); return (1); } /* * Build image for our framebuffer. */ /* Helper to calculate the pixel index from the source png */ #define GETPIXEL(xx, yy) (((yy) * png->width + (xx)) * png->bpp) /* * For each of the x and y directions, calculate the number of pixels * in the source image that correspond to a single pixel in the target. * Use fixed-point arithmetic with 16-bits for each of the integer and * fractional parts. */ const uint32_t wcstep = ((png->width - 1) << 16) / (fwidth - 1); const uint32_t hcstep = ((png->height - 1) << 16) / (fheight - 1); rs = 8 - (fls(gfx_state.tg_fb.fb_mask_red) - ffs(gfx_state.tg_fb.fb_mask_red) + 1); gs = 8 - (fls(gfx_state.tg_fb.fb_mask_green) - ffs(gfx_state.tg_fb.fb_mask_green) + 1); bs = 8 - (fls(gfx_state.tg_fb.fb_mask_blue) - ffs(gfx_state.tg_fb.fb_mask_blue) + 1); uint32_t hc = 0; for (y = 0; y < fheight; y++) { uint32_t hc2 = (hc >> 9) & 0x7f; uint32_t hc1 = 0x80 - hc2; uint32_t offset_y = hc >> 16; uint32_t offset_y1 = offset_y + 1; uint32_t wc = 0; for (x = 0; x < fwidth; x++) { uint32_t wc2 = (wc >> 9) & 0x7f; uint32_t wc1 = 0x80 - wc2; uint32_t offset_x = wc >> 16; uint32_t offset_x1 = offset_x + 1; /* Target pixel index */ j = y * fwidth + x; if (!scale) { i = GETPIXEL(x, y); r = png->image[i]; g = png->image[i + 1]; b = png->image[i + 2]; a = png->image[i + 3]; } else { uint8_t pixel[4]; uint32_t p00 = GETPIXEL(offset_x, offset_y); uint32_t p01 = GETPIXEL(offset_x, offset_y1); uint32_t p10 = GETPIXEL(offset_x1, offset_y); uint32_t p11 = GETPIXEL(offset_x1, offset_y1); /* * Given a 2x2 array of pixels in the source * image, combine them to produce a single * value for the pixel in the target image. * Each column of pixels is combined using * a weighted average where the top and bottom * pixels contribute hc1 and hc2 respectively. * The calculation for bottom pixel pB and * top pixel pT is: * (pT * hc1 + pB * hc2) / (hc1 + hc2) * Once the values are determined for the two * columns of pixels, then the columns are * averaged together in the same way but using * wc1 and wc2 for the weightings. * * Since hc1 and hc2 are chosen so that * hc1 + hc2 == 128 (and same for wc1 + wc2), * the >> 14 below is a quick way to divide by * (hc1 + hc2) * (wc1 + wc2) */ for (i = 0; i < 4; i++) pixel[i] = ( (png->image[p00 + i] * hc1 + png->image[p01 + i] * hc2) * wc1 + (png->image[p10 + i] * hc1 + png->image[p11 + i] * hc2) * wc2) >> 14; r = pixel[0]; g = pixel[1]; b = pixel[2]; a = pixel[3]; } if (trace) printf("r/g/b: %x/%x/%x\n", r, g, b); /* * Rough colorspace reduction for 15/16 bit colors. */ p[j].Red = r >> rs; p[j].Green = g >> gs; p[j].Blue = b >> bs; p[j].Reserved = a; wc += wcstep; } hc += hcstep; } gfx_fb_cons_display(ux1, uy1, fwidth, fheight, data); free(data); return (0); } /* * Reset font flags to FONT_AUTO. */ void reset_font_flags(void) { struct fontlist *fl; STAILQ_FOREACH(fl, &fonts, font_next) { fl->font_flags = FONT_AUTO; } } /* Return w^2 + h^2 or 0, if the dimensions are unknown */ static unsigned edid_diagonal_squared(void) { unsigned w, h; if (edid_info == NULL) return (0); w = edid_info->display.max_horizontal_image_size; h = edid_info->display.max_vertical_image_size; /* If either one is 0, we have aspect ratio, not size */ if (w == 0 || h == 0) return (0); /* * some monitors encode the aspect ratio instead of the physical size. */ if ((w == 16 && h == 9) || (w == 16 && h == 10) || (w == 4 && h == 3) || (w == 5 && h == 4)) return (0); /* * translate cm to inch, note we scale by 100 here. */ w = w * 100 / 254; h = h * 100 / 254; /* Return w^2 + h^2 */ return (w * w + h * h); } /* * calculate pixels per inch. */ static unsigned gfx_get_ppi(void) { unsigned dp, di; di = edid_diagonal_squared(); if (di == 0) return (0); dp = gfx_state.tg_fb.fb_width * gfx_state.tg_fb.fb_width + gfx_state.tg_fb.fb_height * gfx_state.tg_fb.fb_height; return (isqrt(dp / di)); } /* * Calculate font size from density independent pixels (dp): * ((16dp * ppi) / 160) * display_factor. * Here we are using fixed constants: 1dp == 160 ppi and * display_factor 2. * * We are rounding font size up and are searching for font which is * not smaller than calculated size value. */ static vt_font_bitmap_data_t * gfx_get_font(void) { unsigned ppi, size; vt_font_bitmap_data_t *font = NULL; struct fontlist *fl, *next; /* Text mode is not supported here. */ if (gfx_state.tg_fb_type == FB_TEXT) return (NULL); ppi = gfx_get_ppi(); if (ppi == 0) return (NULL); /* * We will search for 16dp font. * We are using scale up by 10 for roundup. */ size = (16 * ppi * 10) / 160; /* Apply display factor 2. */ size = roundup(size * 2, 10) / 10; STAILQ_FOREACH(fl, &fonts, font_next) { next = STAILQ_NEXT(fl, font_next); /* * If this is last font or, if next font is smaller, * we have our font. Make sure, it actually is loaded. */ if (next == NULL || next->font_data->vfbd_height < size) { font = fl->font_data; if (font->vfbd_font == NULL || fl->font_flags == FONT_RELOAD) { if (fl->font_load != NULL && fl->font_name != NULL) font = fl->font_load(fl->font_name); } break; } } return (font); } static vt_font_bitmap_data_t * set_font(teken_unit_t *rows, teken_unit_t *cols, teken_unit_t h, teken_unit_t w) { vt_font_bitmap_data_t *font = NULL; struct fontlist *fl; unsigned height = h; unsigned width = w; /* * First check for manually loaded font. */ STAILQ_FOREACH(fl, &fonts, font_next) { if (fl->font_flags == FONT_MANUAL) { font = fl->font_data; if (font->vfbd_font == NULL && fl->font_load != NULL && fl->font_name != NULL) { font = fl->font_load(fl->font_name); } if (font == NULL || font->vfbd_font == NULL) font = NULL; break; } } if (font == NULL) font = gfx_get_font(); if (font != NULL) { *rows = height / font->vfbd_height; *cols = width / font->vfbd_width; return (font); } /* * Find best font for these dimensions, or use default. * If height >= VT_FB_MAX_HEIGHT and width >= VT_FB_MAX_WIDTH, * do not use smaller font than our DEFAULT_FONT_DATA. */ STAILQ_FOREACH(fl, &fonts, font_next) { font = fl->font_data; if ((*rows * font->vfbd_height <= height && *cols * font->vfbd_width <= width) || (height >= VT_FB_MAX_HEIGHT && width >= VT_FB_MAX_WIDTH && font->vfbd_height == DEFAULT_FONT_DATA.vfbd_height && font->vfbd_width == DEFAULT_FONT_DATA.vfbd_width)) { if (font->vfbd_font == NULL || fl->font_flags == FONT_RELOAD) { if (fl->font_load != NULL && fl->font_name != NULL) { font = fl->font_load(fl->font_name); } if (font == NULL) continue; } *rows = height / font->vfbd_height; *cols = width / font->vfbd_width; break; } font = NULL; } if (font == NULL) { /* * We have fonts sorted smallest last, try it before * falling back to builtin. */ fl = STAILQ_LAST(&fonts, fontlist, font_next); if (fl != NULL && fl->font_load != NULL && fl->font_name != NULL) { font = fl->font_load(fl->font_name); } if (font == NULL) font = &DEFAULT_FONT_DATA; *rows = height / font->vfbd_height; *cols = width / font->vfbd_width; } return (font); } static void cons_clear(void) { char clear[] = { '\033', 'c' }; /* Reset terminal */ teken_input(&gfx_state.tg_teken, clear, sizeof(clear)); gfx_state.tg_functions->tf_param(&gfx_state, TP_SHOWCURSOR, 0); } void setup_font(teken_gfx_t *state, teken_unit_t height, teken_unit_t width) { vt_font_bitmap_data_t *font_data; teken_pos_t *tp = &state->tg_tp; char env[8]; int i; /* * set_font() will select a appropriate sized font for * the number of rows and columns selected. If we don't * have a font that will fit, then it will use the * default builtin font and adjust the rows and columns * to fit on the screen. */ font_data = set_font(&tp->tp_row, &tp->tp_col, height, width); if (font_data == NULL) panic("out of memory"); for (i = 0; i < VFNT_MAPS; i++) { state->tg_font.vf_map[i] = font_data->vfbd_font->vf_map[i]; state->tg_font.vf_map_count[i] = font_data->vfbd_font->vf_map_count[i]; } state->tg_font.vf_bytes = font_data->vfbd_font->vf_bytes; state->tg_font.vf_height = font_data->vfbd_font->vf_height; state->tg_font.vf_width = font_data->vfbd_font->vf_width; snprintf(env, sizeof (env), "%ux%u", state->tg_font.vf_width, state->tg_font.vf_height); env_setenv("screen.font", EV_VOLATILE | EV_NOHOOK, env, font_set, env_nounset); } /* Binary search for the glyph. Return 0 if not found. */ static uint16_t font_bisearch(const vfnt_map_t *map, uint32_t len, teken_char_t src) { unsigned min, mid, max; min = 0; max = len - 1; /* Empty font map. */ if (len == 0) return (0); /* Character below minimal entry. */ if (src < map[0].vfm_src) return (0); /* Optimization: ASCII characters occur very often. */ if (src <= map[0].vfm_src + map[0].vfm_len) return (src - map[0].vfm_src + map[0].vfm_dst); /* Character above maximum entry. */ if (src > map[max].vfm_src + map[max].vfm_len) return (0); /* Binary search. */ while (max >= min) { mid = (min + max) / 2; if (src < map[mid].vfm_src) max = mid - 1; else if (src > map[mid].vfm_src + map[mid].vfm_len) min = mid + 1; else return (src - map[mid].vfm_src + map[mid].vfm_dst); } return (0); } /* * Return glyph bitmap. If glyph is not found, we will return bitmap * for the first (offset 0) glyph. */ uint8_t * font_lookup(const struct vt_font *vf, teken_char_t c, const teken_attr_t *a) { uint16_t dst; size_t stride; /* Substitute bold with normal if not found. */ if (a->ta_format & TF_BOLD) { dst = font_bisearch(vf->vf_map[VFNT_MAP_BOLD], vf->vf_map_count[VFNT_MAP_BOLD], c); if (dst != 0) goto found; } dst = font_bisearch(vf->vf_map[VFNT_MAP_NORMAL], vf->vf_map_count[VFNT_MAP_NORMAL], c); found: stride = howmany(vf->vf_width, 8) * vf->vf_height; return (&vf->vf_bytes[dst * stride]); } static int load_mapping(int fd, struct vt_font *fp, int n) { size_t i, size; ssize_t rv; vfnt_map_t *mp; if (fp->vf_map_count[n] == 0) return (0); size = fp->vf_map_count[n] * sizeof(*mp); mp = malloc(size); if (mp == NULL) return (ENOMEM); fp->vf_map[n] = mp; rv = read(fd, mp, size); if (rv < 0 || (size_t)rv != size) { free(fp->vf_map[n]); fp->vf_map[n] = NULL; return (EIO); } for (i = 0; i < fp->vf_map_count[n]; i++) { mp[i].vfm_src = be32toh(mp[i].vfm_src); mp[i].vfm_dst = be16toh(mp[i].vfm_dst); mp[i].vfm_len = be16toh(mp[i].vfm_len); } return (0); } static int builtin_mapping(struct vt_font *fp, int n) { size_t size; struct vfnt_map *mp; if (n >= VFNT_MAPS) return (EINVAL); if (fp->vf_map_count[n] == 0) return (0); size = fp->vf_map_count[n] * sizeof(*mp); mp = malloc(size); if (mp == NULL) return (ENOMEM); fp->vf_map[n] = mp; memcpy(mp, DEFAULT_FONT_DATA.vfbd_font->vf_map[n], size); return (0); } /* * Load font from builtin or from file. * We do need special case for builtin because the builtin font glyphs * are compressed and we do need to uncompress them. * Having single load_font() for both cases will help us to simplify * font switch handling. */ static vt_font_bitmap_data_t * load_font(char *path) { int fd, i; uint32_t glyphs; struct font_header fh; struct fontlist *fl; vt_font_bitmap_data_t *bp; struct vt_font *fp; size_t size; ssize_t rv; /* Get our entry from the font list. */ STAILQ_FOREACH(fl, &fonts, font_next) { if (strcmp(fl->font_name, path) == 0) break; } if (fl == NULL) return (NULL); /* Should not happen. */ bp = fl->font_data; if (bp->vfbd_font != NULL && fl->font_flags != FONT_RELOAD) return (bp); fd = -1; /* * Special case for builtin font. * Builtin font is the very first font we load, we do not have * previous loads to be released. */ if (fl->font_flags == FONT_BUILTIN) { if ((fp = calloc(1, sizeof(struct vt_font))) == NULL) return (NULL); fp->vf_width = DEFAULT_FONT_DATA.vfbd_width; fp->vf_height = DEFAULT_FONT_DATA.vfbd_height; fp->vf_bytes = malloc(DEFAULT_FONT_DATA.vfbd_uncompressed_size); if (fp->vf_bytes == NULL) { free(fp); return (NULL); } bp->vfbd_uncompressed_size = DEFAULT_FONT_DATA.vfbd_uncompressed_size; bp->vfbd_compressed_size = DEFAULT_FONT_DATA.vfbd_compressed_size; if (lz4_decompress(DEFAULT_FONT_DATA.vfbd_compressed_data, fp->vf_bytes, DEFAULT_FONT_DATA.vfbd_compressed_size, DEFAULT_FONT_DATA.vfbd_uncompressed_size, 0) != 0) { free(fp->vf_bytes); free(fp); return (NULL); } for (i = 0; i < VFNT_MAPS; i++) { fp->vf_map_count[i] = DEFAULT_FONT_DATA.vfbd_font->vf_map_count[i]; if (builtin_mapping(fp, i) != 0) goto free_done; } bp->vfbd_font = fp; return (bp); } fd = open(path, O_RDONLY); if (fd < 0) return (NULL); size = sizeof(fh); rv = read(fd, &fh, size); if (rv < 0 || (size_t)rv != size) { bp = NULL; goto done; } if (memcmp(fh.fh_magic, FONT_HEADER_MAGIC, sizeof(fh.fh_magic)) != 0) { bp = NULL; goto done; } if ((fp = calloc(1, sizeof(struct vt_font))) == NULL) { bp = NULL; goto done; } for (i = 0; i < VFNT_MAPS; i++) fp->vf_map_count[i] = be32toh(fh.fh_map_count[i]); glyphs = be32toh(fh.fh_glyph_count); fp->vf_width = fh.fh_width; fp->vf_height = fh.fh_height; size = howmany(fp->vf_width, 8) * fp->vf_height * glyphs; bp->vfbd_uncompressed_size = size; if ((fp->vf_bytes = malloc(size)) == NULL) goto free_done; rv = read(fd, fp->vf_bytes, size); if (rv < 0 || (size_t)rv != size) goto free_done; for (i = 0; i < VFNT_MAPS; i++) { if (load_mapping(fd, fp, i) != 0) goto free_done; } /* * Reset builtin flag now as we have full font loaded. */ if (fl->font_flags == FONT_BUILTIN) fl->font_flags = FONT_AUTO; /* * Release previously loaded entries. We can do this now, as * the new font is loaded. Note, there can be no console * output till the new font is in place and teken is notified. * We do need to keep fl->font_data for glyph dimensions. */ STAILQ_FOREACH(fl, &fonts, font_next) { if (fl->font_data->vfbd_font == NULL) continue; for (i = 0; i < VFNT_MAPS; i++) free(fl->font_data->vfbd_font->vf_map[i]); free(fl->font_data->vfbd_font->vf_bytes); free(fl->font_data->vfbd_font); fl->font_data->vfbd_font = NULL; } bp->vfbd_font = fp; bp->vfbd_compressed_size = 0; done: if (fd != -1) close(fd); return (bp); free_done: for (i = 0; i < VFNT_MAPS; i++) free(fp->vf_map[i]); free(fp->vf_bytes); free(fp); bp = NULL; goto done; } struct name_entry { char *n_name; SLIST_ENTRY(name_entry) n_entry; }; SLIST_HEAD(name_list, name_entry); /* Read font names from index file. */ static struct name_list * read_list(char *fonts) { struct name_list *nl; struct name_entry *np; char *dir, *ptr; char buf[PATH_MAX]; int fd, len; + TSENTER(); + dir = strdup(fonts); if (dir == NULL) return (NULL); ptr = strrchr(dir, '/'); *ptr = '\0'; fd = open(fonts, O_RDONLY); if (fd < 0) return (NULL); nl = malloc(sizeof(*nl)); if (nl == NULL) { close(fd); return (nl); } SLIST_INIT(nl); while ((len = fgetstr(buf, sizeof (buf), fd)) >= 0) { if (*buf == '#' || *buf == '\0') continue; if (bcmp(buf, "MENU", 4) == 0) continue; if (bcmp(buf, "FONT", 4) == 0) continue; ptr = strchr(buf, ':'); if (ptr == NULL) continue; else *ptr = '\0'; np = malloc(sizeof(*np)); if (np == NULL) { close(fd); return (nl); /* return what we have */ } if (asprintf(&np->n_name, "%s/%s", dir, buf) < 0) { free(np); close(fd); return (nl); /* return what we have */ } SLIST_INSERT_HEAD(nl, np, n_entry); } close(fd); + TSEXIT(); return (nl); } /* * Read the font properties and insert new entry into the list. * The font list is built in descending order. */ static bool insert_font(char *name, FONT_FLAGS flags) { struct font_header fh; struct fontlist *fp, *previous, *entry, *next; size_t size; ssize_t rv; int fd; char *font_name; + TSENTER(); + font_name = NULL; if (flags == FONT_BUILTIN) { /* * We only install builtin font once, while setting up * initial console. Since this will happen very early, * we assume asprintf will not fail. Once we have access to * files, the builtin font will be replaced by font loaded * from file. */ if (!STAILQ_EMPTY(&fonts)) return (false); fh.fh_width = DEFAULT_FONT_DATA.vfbd_width; fh.fh_height = DEFAULT_FONT_DATA.vfbd_height; (void) asprintf(&font_name, "%dx%d", DEFAULT_FONT_DATA.vfbd_width, DEFAULT_FONT_DATA.vfbd_height); } else { fd = open(name, O_RDONLY); if (fd < 0) return (false); rv = read(fd, &fh, sizeof(fh)); close(fd); if (rv < 0 || (size_t)rv != sizeof(fh)) return (false); if (memcmp(fh.fh_magic, FONT_HEADER_MAGIC, sizeof(fh.fh_magic)) != 0) return (false); font_name = strdup(name); } if (font_name == NULL) return (false); /* * If we have an entry with the same glyph dimensions, replace * the file name and mark us. We only support unique dimensions. */ STAILQ_FOREACH(entry, &fonts, font_next) { if (fh.fh_width == entry->font_data->vfbd_width && fh.fh_height == entry->font_data->vfbd_height) { free(entry->font_name); entry->font_name = font_name; entry->font_flags = FONT_RELOAD; + TSEXIT(); return (true); } } fp = calloc(sizeof(*fp), 1); if (fp == NULL) { free(font_name); return (false); } fp->font_data = calloc(sizeof(*fp->font_data), 1); if (fp->font_data == NULL) { free(font_name); free(fp); return (false); } fp->font_name = font_name; fp->font_flags = flags; fp->font_load = load_font; fp->font_data->vfbd_width = fh.fh_width; fp->font_data->vfbd_height = fh.fh_height; if (STAILQ_EMPTY(&fonts)) { STAILQ_INSERT_HEAD(&fonts, fp, font_next); + TSEXIT(); return (true); } previous = NULL; size = fp->font_data->vfbd_width * fp->font_data->vfbd_height; STAILQ_FOREACH(entry, &fonts, font_next) { vt_font_bitmap_data_t *bd; bd = entry->font_data; /* Should fp be inserted before the entry? */ if (size > bd->vfbd_width * bd->vfbd_height) { if (previous == NULL) { STAILQ_INSERT_HEAD(&fonts, fp, font_next); } else { STAILQ_INSERT_AFTER(&fonts, previous, fp, font_next); } + TSEXIT(); return (true); } next = STAILQ_NEXT(entry, font_next); if (next == NULL || size > next->font_data->vfbd_width * next->font_data->vfbd_height) { STAILQ_INSERT_AFTER(&fonts, entry, fp, font_next); + TSEXIT(); return (true); } previous = entry; } + TSEXIT(); return (true); } static int font_set(struct env_var *ev __unused, int flags __unused, const void *value) { struct fontlist *fl; char *eptr; unsigned long x = 0, y = 0; /* * Attempt to extract values from "XxY" string. In case of error, * we have unmaching glyph dimensions and will just output the * available values. */ if (value != NULL) { x = strtoul(value, &eptr, 10); if (*eptr == 'x') y = strtoul(eptr + 1, &eptr, 10); } STAILQ_FOREACH(fl, &fonts, font_next) { if (fl->font_data->vfbd_width == x && fl->font_data->vfbd_height == y) break; } if (fl != NULL) { /* Reset any FONT_MANUAL flag. */ reset_font_flags(); /* Mark this font manually loaded */ fl->font_flags = FONT_MANUAL; cons_update_mode(gfx_state.tg_fb_type != FB_TEXT); return (CMD_OK); } printf("Available fonts:\n"); STAILQ_FOREACH(fl, &fonts, font_next) { printf(" %dx%d\n", fl->font_data->vfbd_width, fl->font_data->vfbd_height); } return (CMD_OK); } void bios_text_font(bool use_vga_font) { if (use_vga_font) (void) insert_font(VGA_8X16_FONT, FONT_MANUAL); else (void) insert_font(DEFAULT_8X16_FONT, FONT_MANUAL); } void autoload_font(bool bios) { struct name_list *nl; struct name_entry *np; + TSENTER(); + nl = read_list("/boot/fonts/INDEX.fonts"); if (nl == NULL) return; while (!SLIST_EMPTY(nl)) { np = SLIST_FIRST(nl); SLIST_REMOVE_HEAD(nl, n_entry); if (insert_font(np->n_name, FONT_AUTO) == false) printf("failed to add font: %s\n", np->n_name); free(np->n_name); free(np); } /* * If vga text mode was requested, load vga.font (8x16 bold) font. */ if (bios) { bios_text_font(true); } (void) cons_update_mode(gfx_state.tg_fb_type != FB_TEXT); + + TSEXIT(); } COMMAND_SET(load_font, "loadfont", "load console font from file", command_font); static int command_font(int argc, char *argv[]) { int i, c, rc; struct fontlist *fl; vt_font_bitmap_data_t *bd; bool list; list = false; optind = 1; optreset = 1; rc = CMD_OK; while ((c = getopt(argc, argv, "l")) != -1) { switch (c) { case 'l': list = true; break; case '?': default: return (CMD_ERROR); } } argc -= optind; argv += optind; if (argc > 1 || (list && argc != 0)) { printf("Usage: loadfont [-l] | [file.fnt]\n"); return (CMD_ERROR); } if (list) { STAILQ_FOREACH(fl, &fonts, font_next) { printf("font %s: %dx%d%s\n", fl->font_name, fl->font_data->vfbd_width, fl->font_data->vfbd_height, fl->font_data->vfbd_font == NULL? "" : " loaded"); } return (CMD_OK); } /* Clear scren */ cons_clear(); if (argc == 1) { char *name = argv[0]; if (insert_font(name, FONT_MANUAL) == false) { printf("loadfont error: failed to load: %s\n", name); return (CMD_ERROR); } (void) cons_update_mode(gfx_state.tg_fb_type != FB_TEXT); return (CMD_OK); } if (argc == 0) { /* * Walk entire font list, release any loaded font, and set * autoload flag. The font list does have at least the builtin * default font. */ STAILQ_FOREACH(fl, &fonts, font_next) { if (fl->font_data->vfbd_font != NULL) { bd = fl->font_data; /* * Note the setup_font() is releasing * font bytes. */ for (i = 0; i < VFNT_MAPS; i++) free(bd->vfbd_font->vf_map[i]); free(fl->font_data->vfbd_font); fl->font_data->vfbd_font = NULL; fl->font_data->vfbd_uncompressed_size = 0; fl->font_flags = FONT_AUTO; } } (void) cons_update_mode(gfx_state.tg_fb_type != FB_TEXT); } return (rc); } bool gfx_get_edid_resolution(struct vesa_edid_info *edid, edid_res_list_t *res) { struct resolution *rp, *p; /* * Walk detailed timings tables (4). */ if ((edid->display.supported_features & EDID_FEATURE_PREFERRED_TIMING_MODE) != 0) { /* Walk detailed timing descriptors (4) */ for (int i = 0; i < DET_TIMINGS; i++) { /* * Reserved value 0 is not used for display descriptor. */ if (edid->detailed_timings[i].pixel_clock == 0) continue; if ((rp = malloc(sizeof(*rp))) == NULL) continue; rp->width = GET_EDID_INFO_WIDTH(edid, i); rp->height = GET_EDID_INFO_HEIGHT(edid, i); if (rp->width > 0 && rp->width <= EDID_MAX_PIXELS && rp->height > 0 && rp->height <= EDID_MAX_LINES) TAILQ_INSERT_TAIL(res, rp, next); else free(rp); } } /* * Walk standard timings list (8). */ for (int i = 0; i < STD_TIMINGS; i++) { /* Is this field unused? */ if (edid->standard_timings[i] == 0x0101) continue; if ((rp = malloc(sizeof(*rp))) == NULL) continue; rp->width = HSIZE(edid->standard_timings[i]); switch (RATIO(edid->standard_timings[i])) { case RATIO1_1: rp->height = HSIZE(edid->standard_timings[i]); if (edid->header.version > 1 || edid->header.revision > 2) { rp->height = rp->height * 10 / 16; } break; case RATIO4_3: rp->height = HSIZE(edid->standard_timings[i]) * 3 / 4; break; case RATIO5_4: rp->height = HSIZE(edid->standard_timings[i]) * 4 / 5; break; case RATIO16_9: rp->height = HSIZE(edid->standard_timings[i]) * 9 / 16; break; } /* * Create resolution list in decreasing order, except keep * first entry (preferred timing mode). */ TAILQ_FOREACH(p, res, next) { if (p->width * p->height < rp->width * rp->height) { /* Keep preferred mode first */ if (TAILQ_FIRST(res) == p) TAILQ_INSERT_AFTER(res, p, rp, next); else TAILQ_INSERT_BEFORE(p, rp, next); break; } if (TAILQ_NEXT(p, next) == NULL) { TAILQ_INSERT_TAIL(res, rp, next); break; } } } return (!TAILQ_EMPTY(res)); } diff --git a/stand/common/interp.c b/stand/common/interp.c index a0a6bd486614..ab68694d61b7 100644 --- a/stand/common/interp.c +++ b/stand/common/interp.c @@ -1,182 +1,184 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Simple commandline interpreter, toplevel and misc. * * XXX may be obsoleted by BootFORTH or some other, better, interpreter. */ #include #include #include "bootstrap.h" #define MAXARGS 20 /* maximum number of arguments allowed */ /* * Interactive mode */ void interact(void) { static char input[256]; /* big enough? */ const char * volatile interp_identifier; + TSENTER(); + /* * Because interp_identifier is volatile, it cannot be optimized out by * the compiler as it's considered an externally observable event. This * prevents the compiler from optimizing out our carefully placed * $Interpreter:4th string that userboot may use to determine that * we need to switch interpreters. */ interp_identifier = bootprog_interp; interp_init(); printf("\n"); /* * Before interacting, we might want to autoboot. */ autoboot_maybe(); /* * Not autobooting, go manual */ printf("\nType '?' for a list of commands, 'help' for more detailed help.\n"); if (getenv("prompt") == NULL) setenv("prompt", "${interpret}", 1); if (getenv("interpret") == NULL) setenv("interpret", "OK", 1); for (;;) { input[0] = '\0'; interp_emit_prompt(); ngets(input, sizeof(input)); interp_run(input); } } /* * Read commands from a file, then execute them. * * We store the commands in memory and close the source file so that the media * holding it can safely go away while we are executing. * * Commands may be prefixed with '@' (so they aren't displayed) or '-' (so * that the script won't stop if they fail). */ COMMAND_SET(include, "include", "read commands from a file", command_include); static int command_include(int argc, char *argv[]) { int i; int res; char **argvbuf; /* * Since argv is static, we need to save it here. */ argvbuf = (char**) calloc((u_int)argc, sizeof(char*)); for (i = 0; i < argc; i++) argvbuf[i] = strdup(argv[i]); res=CMD_OK; for (i = 1; (i < argc) && (res == CMD_OK); i++) res = interp_include(argvbuf[i]); for (i = 0; i < argc; i++) free(argvbuf[i]); free(argvbuf); return(res); } /* * Emit the current prompt; use the same syntax as the parser * for embedding environment variables. Does not accept input. */ void interp_emit_prompt(void) { char *pr, *p, *cp, *ev; if ((cp = getenv("prompt")) == NULL) cp = ">"; pr = p = strdup(cp); while (*p != 0) { if ((*p == '$') && (*(p+1) == '{')) { for (cp = p + 2; (*cp != 0) && (*cp != '}'); cp++) ; *cp = 0; ev = getenv(p + 2); if (ev != NULL) printf("%s", ev); p = cp + 1; continue; } putchar(*p++); } putchar(' '); free(pr); } /* * Perform a builtin command */ int interp_builtin_cmd(int argc, char *argv[]) { int result; struct bootblk_command **cmdp; bootblk_cmd_t *cmd; if (argc < 1) return(CMD_OK); /* set return defaults; a successful command will override these */ command_errmsg = command_errbuf; strcpy(command_errbuf, "no error message"); cmd = NULL; result = CMD_ERROR; /* search the command set for the command */ SET_FOREACH(cmdp, Xcommand_set) { if (((*cmdp)->c_name != NULL) && !strcmp(argv[0], (*cmdp)->c_name)) cmd = (*cmdp)->c_fn; } if (cmd != NULL) { result = (cmd)(argc, argv); } else { command_errmsg = "unknown command"; } return(result); } diff --git a/stand/common/interp_lua.c b/stand/common/interp_lua.c index 94001918e151..8a420660683e 100644 --- a/stand/common/interp_lua.c +++ b/stand/common/interp_lua.c @@ -1,200 +1,206 @@ /*- * Copyright (c) 2011 Wojciech A. Koszek * Copyright (c) 2014 Pedro Souza * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include "bootstrap.h" #define lua_c #include "lstd.h" #include #include #include #include #include #include #include struct interp_lua_softc { lua_State *luap; }; static struct interp_lua_softc lua_softc; #ifdef LUA_DEBUG #define LDBG(...) do { \ printf("%s(%d): ", __func__, __LINE__); \ printf(__VA_ARGS__); \ printf("\n"); \ } while (0) #else #define LDBG(...) #endif #define LOADER_LUA LUA_PATH "/loader.lua" INTERP_DEFINE("lua"); static void * interp_lua_realloc(void *ud __unused, void *ptr, size_t osize __unused, size_t nsize) { if (nsize == 0) { free(ptr); return NULL; } return realloc(ptr, nsize); } /* * The libraries commented out below either lack the proper * support from libsa, or they are unlikely to be useful * in the bootloader, so have been commented out. */ static const luaL_Reg loadedlibs[] = { {"_G", luaopen_base}, {LUA_LOADLIBNAME, luaopen_package}, // {LUA_COLIBNAME, luaopen_coroutine}, // {LUA_TABLIBNAME, luaopen_table}, {LUA_STRLIBNAME, luaopen_string}, // {LUA_IOLIBNAME, luaopen_io}, // {LUA_OSLIBNAME, luaopen_os}, // {LUA_MATHLIBNAME, luaopen_math}, // {LUA_UTF8LIBNAME, luaopen_utf8}, // {LUA_DBLIBNAME, luaopen_debug}, {"errno", luaopen_errno}, {"io", luaopen_io}, {"lfs", luaopen_lfs}, {"loader", luaopen_loader}, {"pager", luaopen_pager}, {NULL, NULL} }; void interp_init(void) { lua_State *luap; struct interp_lua_softc *softc = &lua_softc; const char *filename; const luaL_Reg *lib; + TSENTER(); + setenv("script.lang", "lua", 1); LDBG("creating context"); luap = lua_newstate(interp_lua_realloc, NULL); if (luap == NULL) { printf("problem initializing the Lua interpreter\n"); abort(); } softc->luap = luap; /* "require" functions from 'loadedlibs' and set results to global table */ for (lib = loadedlibs; lib->func; lib++) { luaL_requiref(luap, lib->name, lib->func, 1); lua_pop(luap, 1); /* remove lib */ } filename = LOADER_LUA; if (interp_include(filename) != 0) { const char *errstr = lua_tostring(luap, -1); errstr = errstr == NULL ? "unknown" : errstr; printf("ERROR: %s.\n", errstr); lua_pop(luap, 1); setenv("autoboot_delay", "NO", 1); } + + TSEXIT(); } int interp_run(const char *line) { int argc, nargc; char **argv; lua_State *luap; struct interp_lua_softc *softc = &lua_softc; int status, ret; + TSENTER(); luap = softc->luap; LDBG("executing line..."); if ((status = luaL_dostring(luap, line)) != 0) { lua_pop(luap, 1); /* * The line wasn't executable as lua; run it through parse to * to get consistent parsing of command line arguments, then * run it through cli_execute. If that fails, then we'll try it * as a builtin. */ command_errmsg = NULL; if (parse(&argc, &argv, line) == 0) { lua_getglobal(luap, "cli_execute"); for (nargc = 0; nargc < argc; ++nargc) { lua_pushstring(luap, argv[nargc]); } status = lua_pcall(luap, argc, 1, 0); ret = lua_tointeger(luap, 1); lua_pop(luap, 1); if (status != 0 || ret != 0) { /* * Lua cli_execute will pass the function back * through loader.command, which is a proxy to * interp_builtin_cmd. If we failed to interpret * the command, though, then there's a chance * that didn't happen. Call interp_builtin_cmd * directly if our lua_pcall was not successful. */ status = interp_builtin_cmd(argc, argv); } if (status != 0) { if (command_errmsg != NULL) printf("%s\n", command_errmsg); else printf("Command failed\n"); status = CMD_ERROR; } free(argv); } else { printf("Failed to parse \'%s\'\n", line); status = CMD_ERROR; } } + TSEXIT(); return (status == 0 ? CMD_OK : CMD_ERROR); } int interp_include(const char *filename) { struct interp_lua_softc *softc = &lua_softc; LDBG("loading file %s", filename); return (luaL_dofile(softc->luap, filename)); } diff --git a/stand/common/module.c b/stand/common/module.c index 34ffc10cded3..8bddd9f56f52 100644 --- a/stand/common/module.c +++ b/stand/common/module.c @@ -1,1772 +1,1823 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * file/module function dispatcher, support, etc. */ #include #include #include #include #include #include #include #include #include #if defined(LOADER_FDT_SUPPORT) #include #endif #include "bootstrap.h" #define MDIR_REMOVED 0x0001 #define MDIR_NOHINTS 0x0002 struct moduledir { char *d_path; /* path of modules directory */ u_char *d_hints; /* content of linker.hints file */ int d_hintsz; /* size of hints data */ int d_flags; STAILQ_ENTRY(moduledir) d_link; }; static int file_load(char *filename, vm_offset_t dest, struct preloaded_file **result); static int file_load_dependencies(struct preloaded_file *base_mod); static char * file_search(const char *name, char **extlist); static struct kernel_module * file_findmodule(struct preloaded_file *fp, char *modname, struct mod_depend *verinfo); static int file_havepath(const char *name); static char *mod_searchmodule(char *name, struct mod_depend *verinfo); static char * mod_searchmodule_pnpinfo(const char *bus, const char *pnpinfo); static void file_insert_tail(struct preloaded_file *mp); static void file_remove(struct preloaded_file *fp); struct file_metadata* metadata_next(struct file_metadata *base_mp, int type); static void moduledir_readhints(struct moduledir *mdp); static void moduledir_rebuild(void); /* load address should be tweaked by first module loaded (kernel) */ static vm_offset_t loadaddr = 0; #if defined(LOADER_FDT_SUPPORT) static const char *default_searchpath = "/boot/kernel;/boot/modules;/boot/dtb"; #else static const char *default_searchpath = "/boot/kernel;/boot/modules"; #endif static STAILQ_HEAD(, moduledir) moduledir_list = STAILQ_HEAD_INITIALIZER(moduledir_list); struct preloaded_file *preloaded_files = NULL; static char *kld_ext_list[] = { ".ko", "", ".debug", NULL }; /* * load an object, either a disk file or code module. * * To load a file, the syntax is: * * load -t * * code modules are loaded as: * * load */ COMMAND_SET(load, "load", "load a kernel or module", command_load); static int command_load(int argc, char *argv[]) { struct preloaded_file *fp; char *typestr; char *prefix; char *skip; int dflag, dofile, dokld, ch, error; dflag = dokld = dofile = 0; optind = 1; optreset = 1; typestr = NULL; if (argc == 1) { command_errmsg = "no filename specified"; return (CMD_CRIT); } prefix = skip = NULL; while ((ch = getopt(argc, argv, "dkp:s:t:")) != -1) { switch(ch) { case 'd': dflag++; break; case 'k': dokld = 1; break; case 'p': prefix = optarg; break; case 's': skip = optarg; break; case 't': typestr = optarg; dofile = 1; break; case '?': default: /* getopt has already reported an error */ return (CMD_OK); } } argv += (optind - 1); argc -= (optind - 1); /* * Request to load a raw file? */ if (dofile) { if ((argc != 2) || (typestr == NULL) || (*typestr == 0)) { command_errmsg = "invalid load type"; return (CMD_CRIT); } #ifdef LOADER_VERIEXEC if (strncmp(typestr, "manifest", 8) == 0) { if (dflag > 0) ve_debug_set(dflag); return (load_manifest(argv[1], prefix, skip, NULL)); } #ifdef LOADER_VERIEXEC_PASS_MANIFEST if (strncmp(typestr, "pass_manifest", 13) == 0) { if (dflag > 0) ve_debug_set(dflag); return (pass_manifest(argv[1], prefix)); } #endif #endif fp = file_findfile(argv[1], typestr); if (fp) { snprintf(command_errbuf, sizeof(command_errbuf), "warning: file '%s' already loaded", argv[1]); return (CMD_WARN); } if (file_loadraw(argv[1], typestr, 1) != NULL) return (CMD_OK); /* Failing to load mfs_root is never going to end well! */ if (strcmp("mfs_root", typestr) == 0) return (CMD_FATAL); return (CMD_ERROR); } /* * Do we have explicit KLD load ? */ if (dokld || file_havepath(argv[1])) { error = mod_loadkld(argv[1], argc - 2, argv + 2); if (error == EEXIST) { snprintf(command_errbuf, sizeof(command_errbuf), "warning: KLD '%s' already loaded", argv[1]); return (CMD_WARN); } return (error == 0 ? CMD_OK : CMD_CRIT); } /* * Looks like a request for a module. */ error = mod_load(argv[1], NULL, argc - 2, argv + 2); if (error == EEXIST) { snprintf(command_errbuf, sizeof(command_errbuf), "warning: module '%s' already loaded", argv[1]); return (CMD_WARN); } return (error == 0 ? CMD_OK : CMD_CRIT); } #ifdef LOADER_GELI_SUPPORT COMMAND_SET(load_geli, "load_geli", "load a geli key", command_load_geli); static int command_load_geli(int argc, char *argv[]) { char typestr[80]; char *cp; int ch, num; if (argc < 3) { command_errmsg = "usage is [-n key#] "; return(CMD_ERROR); } num = 0; optind = 1; optreset = 1; while ((ch = getopt(argc, argv, "n:")) != -1) { switch(ch) { case 'n': num = strtol(optarg, &cp, 0); if (cp == optarg) { snprintf(command_errbuf, sizeof(command_errbuf), "bad key index '%s'", optarg); return(CMD_ERROR); } break; case '?': default: /* getopt has already reported an error */ return(CMD_OK); } } argv += (optind - 1); argc -= (optind - 1); sprintf(typestr, "%s:geli_keyfile%d", argv[1], num); return (file_loadraw(argv[2], typestr, 1) ? CMD_OK : CMD_ERROR); } #endif void unload(void) { struct preloaded_file *fp; while (preloaded_files != NULL) { fp = preloaded_files; preloaded_files = preloaded_files->f_next; file_discard(fp); } loadaddr = 0; unsetenv("kernelname"); /* Reset tg_kernel_supported to allow next load to check it again. */ gfx_state.tg_kernel_supported = false; } COMMAND_SET(unload, "unload", "unload all modules", command_unload); static int command_unload(int argc, char *argv[]) { unload(); return(CMD_OK); } COMMAND_SET(lsmod, "lsmod", "list loaded modules", command_lsmod); static int command_lsmod(int argc, char *argv[]) { struct preloaded_file *fp; struct kernel_module *mp; struct file_metadata *md; char lbuf[80]; int ch, verbose, ret = 0; verbose = 0; optind = 1; optreset = 1; while ((ch = getopt(argc, argv, "v")) != -1) { switch(ch) { case 'v': verbose = 1; break; case '?': default: /* getopt has already reported an error */ return(CMD_OK); } } pager_open(); for (fp = preloaded_files; fp; fp = fp->f_next) { snprintf(lbuf, sizeof(lbuf), " %p: ", (void *) fp->f_addr); pager_output(lbuf); pager_output(fp->f_name); snprintf(lbuf, sizeof(lbuf), " (%s, 0x%lx)\n", fp->f_type, (long)fp->f_size); if (pager_output(lbuf)) break; if (fp->f_args != NULL) { pager_output(" args: "); pager_output(fp->f_args); if (pager_output("\n")) break; } if (fp->f_modules) { pager_output(" modules: "); for (mp = fp->f_modules; mp; mp = mp->m_next) { snprintf(lbuf, sizeof(lbuf), "%s.%d ", mp->m_name, mp->m_version); pager_output(lbuf); } if (pager_output("\n")) break; } if (verbose) { /* XXX could add some formatting smarts here to display some better */ for (md = fp->f_metadata; md != NULL; md = md->md_next) { snprintf(lbuf, sizeof(lbuf), " 0x%04x, 0x%lx\n", md->md_type, (long) md->md_size); if (pager_output(lbuf)) break; } } if (ret) break; } pager_close(); return(CMD_OK); } COMMAND_SET(pnpmatch, "pnpmatch", "list matched modules based on pnpinfo", command_pnpmatch); static int pnp_dump_flag = 0; static int pnp_unbound_flag = 0; static int pnp_verbose_flag = 0; static int command_pnpmatch(int argc, char *argv[]) { char *module; int ch; pnp_verbose_flag = 0; pnp_dump_flag = 0; optind = 1; optreset = 1; while ((ch = getopt(argc, argv, "vd")) != -1) { switch(ch) { case 'v': pnp_verbose_flag = 1; break; case 'd': pnp_dump_flag = 1; break; case '?': default: /* getopt has already reported an error */ return(CMD_OK); } } argv += (optind - 1); argc -= (optind - 1); module = mod_searchmodule_pnpinfo(argv[1], argv[2]); if (module) printf("Matched module: %s\n", module); else if(argv[1]) printf("No module matches %s\n", argv[1]); return (CMD_OK); } COMMAND_SET(pnpload, "pnpload", "load matched modules based on pnpinfo", command_pnpload); static int command_pnpload(int argc, char *argv[]) { char *module; int ch, error; pnp_verbose_flag = 0; pnp_dump_flag = 0; optind = 1; optreset = 1; while ((ch = getopt(argc, argv, "vd")) != -1) { switch(ch) { case 'v': pnp_verbose_flag = 1; break; case 'd': pnp_dump_flag = 1; break; case '?': default: /* getopt has already reported an error */ return(CMD_OK); } } argv += (optind - 1); argc -= (optind - 1); if (argc != 2) return (CMD_ERROR); module = mod_searchmodule_pnpinfo(argv[1], argv[2]); error = mod_load(module, NULL, 0, NULL); if (error == EEXIST) { snprintf(command_errbuf, sizeof(command_errbuf), "warning: module '%s' already loaded", argv[1]); return (CMD_WARN); } return (error == 0 ? CMD_OK : CMD_CRIT); } #if defined(LOADER_FDT_SUPPORT) static void pnpautoload_simplebus(void) { const char *pnpstring; const char *compatstr; char *pnpinfo = NULL; char *module = NULL; int tag = 0, len, pnplen; int error; while (1) { pnpstring = fdt_devmatch_next(&tag, &len); if (pnpstring == NULL) return; compatstr = pnpstring; for (pnplen = 0; pnplen != len; compatstr = pnpstring + pnplen) { pnplen += strlen(compatstr) + 1; asprintf(&pnpinfo, "compat=%s", compatstr); module = mod_searchmodule_pnpinfo("simplebus", pnpinfo); if (module) { error = mod_loadkld(module, 0, NULL); if (error) printf("Cannot load module %s\n", module); break; } } free(pnpinfo); free(module); } } #endif struct pnp_bus { const char *name; void (*load)(void); }; struct pnp_bus pnp_buses[] = { #if defined(LOADER_FDT_SUPPORT) {"simplebus", pnpautoload_simplebus}, #endif }; COMMAND_SET(pnpautoload, "pnpautoload", "auto load modules based on pnpinfo", command_pnpautoload); static int command_pnpautoload(int argc, char *argv[]) { int i; int verbose; int ch, match; pnp_verbose_flag = 0; pnp_dump_flag = 0; verbose = 0; optind = 1; optreset = 1; match = 0; while ((ch = getopt(argc, argv, "v")) != -1) { switch(ch) { case 'v': verbose = 1; break; case '?': default: /* getopt has already reported an error */ return(CMD_OK); } } argv += (optind - 1); argc -= (optind - 1); if (argc > 2) return (CMD_ERROR); for (i = 0; i < nitems(pnp_buses); i++) { if (argc == 2 && strcmp(argv[1], pnp_buses[i].name) != 0) { if (verbose) printf("Skipping bus %s\n", pnp_buses[i].name); continue; } if (verbose) printf("Autoloading modules for simplebus\n"); pnp_buses[i].load(); match = 1; } if (match == 0) printf("Unsupported bus %s\n", argv[1]); return (CMD_OK); } /* * File level interface, functions file_* */ int file_load(char *filename, vm_offset_t dest, struct preloaded_file **result) { static int last_file_format = 0; struct preloaded_file *fp; int error; int i; if (archsw.arch_loadaddr != NULL) dest = archsw.arch_loadaddr(LOAD_RAW, filename, dest); error = EFTYPE; for (i = last_file_format, fp = NULL; file_formats[i] && fp == NULL; i++) { error = (file_formats[i]->l_load)(filename, dest, &fp); if (error == 0) { fp->f_loader = last_file_format = i; /* remember the loader */ *result = fp; break; } else if (last_file_format == i && i != 0) { /* Restart from the beginning */ i = -1; last_file_format = 0; fp = NULL; continue; } if (error == EFTYPE) continue; /* Unknown to this handler? */ if (error) { snprintf(command_errbuf, sizeof(command_errbuf), "can't load file '%s': %s", filename, strerror(error)); break; } } return (error); } static int file_load_dependencies(struct preloaded_file *base_file) { struct file_metadata *md; struct preloaded_file *fp; struct mod_depend *verinfo; struct kernel_module *mp; char *dmodname; int error; md = file_findmetadata(base_file, MODINFOMD_DEPLIST); if (md == NULL) return (0); error = 0; do { verinfo = (struct mod_depend*)md->md_data; dmodname = (char *)(verinfo + 1); if (file_findmodule(NULL, dmodname, verinfo) == NULL) { printf("loading required module '%s'\n", dmodname); error = mod_load(dmodname, verinfo, 0, NULL); if (error) break; /* * If module loaded via kld name which isn't listed * in the linker.hints file, we should check if it have * required version. */ mp = file_findmodule(NULL, dmodname, verinfo); if (mp == NULL) { snprintf(command_errbuf, sizeof(command_errbuf), "module '%s' exists but with wrong version", dmodname); error = ENOENT; break; } } md = metadata_next(md, MODINFOMD_DEPLIST); } while (md); if (!error) return (0); /* Load failed; discard everything */ while (base_file != NULL) { fp = base_file; base_file = base_file->f_next; file_discard(fp); } return (error); } vm_offset_t build_font_module(vm_offset_t addr) { vt_font_bitmap_data_t *bd; struct vt_font *fd; struct preloaded_file *fp; size_t size; uint32_t checksum; int i; struct font_info fi; struct fontlist *fl; uint64_t fontp; if (STAILQ_EMPTY(&fonts)) return (addr); /* We can't load first */ if ((file_findfile(NULL, NULL)) == NULL) { printf("Can not load font module: %s\n", "the kernel is not loaded"); return (addr); } /* helper pointers */ bd = NULL; STAILQ_FOREACH(fl, &fonts, font_next) { if (gfx_state.tg_font.vf_width == fl->font_data->vfbd_width && gfx_state.tg_font.vf_height == fl->font_data->vfbd_height) { /* * Kernel does have better built in font. */ if (fl->font_flags == FONT_BUILTIN) return (addr); bd = fl->font_data; break; } } if (bd == NULL) return (addr); fd = bd->vfbd_font; fi.fi_width = fd->vf_width; checksum = fi.fi_width; fi.fi_height = fd->vf_height; checksum += fi.fi_height; fi.fi_bitmap_size = bd->vfbd_uncompressed_size; checksum += fi.fi_bitmap_size; size = roundup2(sizeof (struct font_info), 8); for (i = 0; i < VFNT_MAPS; i++) { fi.fi_map_count[i] = fd->vf_map_count[i]; checksum += fi.fi_map_count[i]; size += fd->vf_map_count[i] * sizeof (struct vfnt_map); size += roundup2(size, 8); } size += bd->vfbd_uncompressed_size; fi.fi_checksum = -checksum; fp = file_findfile(NULL, "elf kernel"); if (fp == NULL) fp = file_findfile(NULL, "elf64 kernel"); if (fp == NULL) panic("can't find kernel file"); fontp = addr; addr += archsw.arch_copyin(&fi, addr, sizeof (struct font_info)); addr = roundup2(addr, 8); /* Copy maps. */ for (i = 0; i < VFNT_MAPS; i++) { if (fd->vf_map_count[i] != 0) { addr += archsw.arch_copyin(fd->vf_map[i], addr, fd->vf_map_count[i] * sizeof (struct vfnt_map)); addr = roundup2(addr, 8); } } /* Copy the bitmap. */ addr += archsw.arch_copyin(fd->vf_bytes, addr, fi.fi_bitmap_size); /* Looks OK so far; populate control structure */ file_addmetadata(fp, MODINFOMD_FONT, sizeof(fontp), &fontp); return (addr); } #ifdef LOADER_VERIEXEC_VECTX #define VECTX_HANDLE(fd) vctx #else #define VECTX_HANDLE(fd) fd #endif /* * We've been asked to load (fname) as (type), so just suck it in, * no arguments or anything. */ struct preloaded_file * file_loadraw(const char *fname, char *type, int insert) { struct preloaded_file *fp; char *name; int fd, got; vm_offset_t laddr; #ifdef LOADER_VERIEXEC_VECTX struct vectx *vctx; int verror; #endif /* We can't load first */ if ((file_findfile(NULL, NULL)) == NULL) { command_errmsg = "can't load file before kernel"; return(NULL); } /* locate the file on the load path */ name = file_search(fname, NULL); if (name == NULL) { snprintf(command_errbuf, sizeof(command_errbuf), "can't find '%s'", fname); return(NULL); } if ((fd = open(name, O_RDONLY)) < 0) { snprintf(command_errbuf, sizeof(command_errbuf), "can't open '%s': %s", name, strerror(errno)); free(name); return(NULL); } #ifdef LOADER_VERIEXEC_VECTX vctx = vectx_open(fd, name, 0L, NULL, &verror, __func__); if (verror) { sprintf(command_errbuf, "can't verify '%s': %s", name, ve_error_get()); free(name); free(vctx); close(fd); return(NULL); } #else #ifdef LOADER_VERIEXEC if (verify_file(fd, name, 0, VE_MUST, __func__) < 0) { sprintf(command_errbuf, "can't verify '%s': %s", name, ve_error_get()); free(name); close(fd); return(NULL); } #endif #endif if (archsw.arch_loadaddr != NULL) loadaddr = archsw.arch_loadaddr(LOAD_RAW, name, loadaddr); printf("%s ", name); laddr = loadaddr; for (;;) { /* read in 4k chunks; size is not really important */ got = archsw.arch_readin(VECTX_HANDLE(fd), laddr, 4096); if (got == 0) /* end of file */ break; if (got < 0) { /* error */ snprintf(command_errbuf, sizeof(command_errbuf), "error reading '%s': %s", name, strerror(errno)); free(name); close(fd); #ifdef LOADER_VERIEXEC_VECTX free(vctx); #endif return(NULL); } laddr += got; } printf("size=%#jx\n", (uintmax_t)(laddr - loadaddr)); #ifdef LOADER_VERIEXEC_VECTX verror = vectx_close(vctx, VE_MUST, __func__); if (verror) { free(name); close(fd); free(vctx); return(NULL); } #endif /* Looks OK so far; create & populate control structure */ fp = file_alloc(); if (fp == NULL) { snprintf(command_errbuf, sizeof (command_errbuf), "no memory to load %s", name); free(name); close(fd); return (NULL); } fp->f_name = name; fp->f_type = strdup(type); fp->f_args = NULL; fp->f_metadata = NULL; fp->f_loader = -1; fp->f_addr = loadaddr; fp->f_size = laddr - loadaddr; if (fp->f_type == NULL) { snprintf(command_errbuf, sizeof (command_errbuf), "no memory to load %s", name); free(name); close(fd); return (NULL); } /* recognise space consumption */ loadaddr = laddr; /* Add to the list of loaded files */ if (insert != 0) file_insert_tail(fp); close(fd); return(fp); } /* * Load the module (name), pass it (argc),(argv), add container file * to the list of loaded files. * If module is already loaded just assign new argc/argv. */ int mod_load(char *modname, struct mod_depend *verinfo, int argc, char *argv[]) { struct kernel_module *mp; int err; char *filename; if (file_havepath(modname)) { printf("Warning: mod_load() called instead of mod_loadkld() for module '%s'\n", modname); return (mod_loadkld(modname, argc, argv)); } /* see if module is already loaded */ mp = file_findmodule(NULL, modname, verinfo); if (mp) { #ifdef moduleargs free(mp->m_args); mp->m_args = unargv(argc, argv); #endif snprintf(command_errbuf, sizeof(command_errbuf), "warning: module '%s' already loaded", mp->m_name); return (0); } /* locate file with the module on the search path */ filename = mod_searchmodule(modname, verinfo); if (filename == NULL) { snprintf(command_errbuf, sizeof(command_errbuf), "can't find '%s'", modname); return (ENOENT); } err = mod_loadkld(filename, argc, argv); free(filename); return (err); } /* * Load specified KLD. If path is omitted, then try to locate it via * search path. */ int mod_loadkld(const char *kldname, int argc, char *argv[]) { struct preloaded_file *fp; int err; char *filename; vm_offset_t loadaddr_saved; /* * Get fully qualified KLD name */ filename = file_search(kldname, kld_ext_list); if (filename == NULL) { snprintf(command_errbuf, sizeof(command_errbuf), "can't find '%s'", kldname); return (ENOENT); } /* * Check if KLD already loaded */ fp = file_findfile(filename, NULL); if (fp) { snprintf(command_errbuf, sizeof(command_errbuf), "warning: KLD '%s' already loaded", filename); free(filename); return (0); } do { err = file_load(filename, loadaddr, &fp); if (err) break; fp->f_args = unargv(argc, argv); loadaddr_saved = loadaddr; loadaddr = fp->f_addr + fp->f_size; file_insert_tail(fp); /* Add to the list of loaded files */ if (file_load_dependencies(fp) != 0) { err = ENOENT; file_remove(fp); loadaddr = loadaddr_saved; fp = NULL; break; } } while(0); if (err == EFTYPE) { snprintf(command_errbuf, sizeof(command_errbuf), "don't know how to load module '%s'", filename); } if (err) file_discard(fp); free(filename); return (err); } /* * Find a file matching (name) and (type). * NULL may be passed as a wildcard to either. */ struct preloaded_file * file_findfile(const char *name, const char *type) { struct preloaded_file *fp; for (fp = preloaded_files; fp != NULL; fp = fp->f_next) { if (((name == NULL) || !strcmp(name, fp->f_name)) && ((type == NULL) || !strcmp(type, fp->f_type))) break; } return (fp); } /* * Find a module matching (name) inside of given file. * NULL may be passed as a wildcard. */ struct kernel_module * file_findmodule(struct preloaded_file *fp, char *modname, struct mod_depend *verinfo) { struct kernel_module *mp, *best; int bestver, mver; if (fp == NULL) { for (fp = preloaded_files; fp; fp = fp->f_next) { mp = file_findmodule(fp, modname, verinfo); if (mp) return (mp); } return (NULL); } best = NULL; bestver = 0; for (mp = fp->f_modules; mp; mp = mp->m_next) { if (strcmp(modname, mp->m_name) == 0) { if (verinfo == NULL) return (mp); mver = mp->m_version; if (mver == verinfo->md_ver_preferred) return (mp); if (mver >= verinfo->md_ver_minimum && mver <= verinfo->md_ver_maximum && mver > bestver) { best = mp; bestver = mver; } } } return (best); } /* * Make a copy of (size) bytes of data from (p), and associate them as * metadata of (type) to the module (mp). */ void file_addmetadata(struct preloaded_file *fp, int type, size_t size, void *p) { struct file_metadata *md; md = malloc(sizeof(struct file_metadata) - sizeof(md->md_data) + size); if (md != NULL) { md->md_size = size; md->md_type = type; bcopy(p, md->md_data, size); md->md_next = fp->f_metadata; } fp->f_metadata = md; } /* * Find a metadata object of (type) associated with the file (fp) */ struct file_metadata * file_findmetadata(struct preloaded_file *fp, int type) { struct file_metadata *md; for (md = fp->f_metadata; md != NULL; md = md->md_next) if (md->md_type == type) break; return(md); } /* * Remove all metadata from the file. */ void file_removemetadata(struct preloaded_file *fp) { struct file_metadata *md, *next; for (md = fp->f_metadata; md != NULL; md = next) { next = md->md_next; free(md); } fp->f_metadata = NULL; } +/* + * Add a buffer to the list of preloaded "files". + */ +int +file_addbuf(const char *name, const char *type, size_t len, void *buf) +{ + struct preloaded_file *fp; + vm_offset_t dest; + + /* We can't load first */ + if ((file_findfile(NULL, NULL)) == NULL) { + command_errmsg = "can't load file before kernel"; + return (-1); + } + + /* Figure out where to load the data. */ + dest = loadaddr; + if (archsw.arch_loadaddr != NULL) + dest = archsw.arch_loadaddr(LOAD_RAW, (void *)name, dest); + + /* Create & populate control structure */ + fp = file_alloc(); + if (fp == NULL) { + snprintf(command_errbuf, sizeof (command_errbuf), + "no memory to load %s", name); + return (-1); + } + fp->f_name = strdup(name); + fp->f_type = strdup(type); + fp->f_args = NULL; + fp->f_metadata = NULL; + fp->f_loader = -1; + fp->f_addr = dest; + fp->f_size = len; + if ((fp->f_name == NULL) || (fp->f_type == NULL)) { + snprintf(command_errbuf, sizeof (command_errbuf), + "no memory to load %s", name); + free(fp->f_name); + free(fp->f_type); + return (-1); + } + + /* Copy the data in. */ + archsw.arch_copyin(buf, fp->f_addr, len); + loadaddr = fp->f_addr + len; + + /* Add to the list of loaded files */ + file_insert_tail(fp); + return(0); +} + struct file_metadata * metadata_next(struct file_metadata *md, int type) { if (md == NULL) return (NULL); while((md = md->md_next) != NULL) if (md->md_type == type) break; return (md); } static char *emptyextlist[] = { "", NULL }; /* * Check if the given file is in place and return full path to it. */ static char * file_lookup(const char *path, const char *name, int namelen, char **extlist) { struct stat st; char *result, *cp, **cpp; int pathlen, extlen, len; pathlen = strlen(path); extlen = 0; if (extlist == NULL) extlist = emptyextlist; for (cpp = extlist; *cpp; cpp++) { len = strlen(*cpp); if (len > extlen) extlen = len; } result = malloc(pathlen + namelen + extlen + 2); if (result == NULL) return (NULL); bcopy(path, result, pathlen); if (pathlen > 0 && result[pathlen - 1] != '/') result[pathlen++] = '/'; cp = result + pathlen; bcopy(name, cp, namelen); cp += namelen; for (cpp = extlist; *cpp; cpp++) { strcpy(cp, *cpp); if (stat(result, &st) == 0 && S_ISREG(st.st_mode)) return result; } free(result); return NULL; } /* * Check if file name have any qualifiers */ static int file_havepath(const char *name) { const char *cp; archsw.arch_getdev(NULL, name, &cp); return (cp != name || strchr(name, '/') != NULL); } /* * Attempt to find the file (name) on the module searchpath. * If (name) is qualified in any way, we simply check it and * return it or NULL. If it is not qualified, then we attempt * to construct a path using entries in the environment variable * module_path. * * The path we return a pointer to need never be freed, as we manage * it internally. */ static char * file_search(const char *name, char **extlist) { struct moduledir *mdp; struct stat sb; char *result; int namelen; /* Don't look for nothing */ if (name == NULL) return(NULL); if (*name == 0) return(strdup(name)); if (file_havepath(name)) { /* Qualified, so just see if it exists */ if (stat(name, &sb) == 0) return(strdup(name)); return(NULL); } moduledir_rebuild(); result = NULL; namelen = strlen(name); STAILQ_FOREACH(mdp, &moduledir_list, d_link) { result = file_lookup(mdp->d_path, name, namelen, extlist); if (result) break; } return(result); } #define INT_ALIGN(base, ptr) ptr = \ (base) + roundup2((ptr) - (base), sizeof(int)) static char * mod_search_hints(struct moduledir *mdp, const char *modname, struct mod_depend *verinfo) { u_char *cp, *recptr, *bufend, *best; char *result; int *intp, bestver, blen, clen, found, ival, modnamelen, reclen; moduledir_readhints(mdp); modnamelen = strlen(modname); found = 0; result = NULL; bestver = 0; if (mdp->d_hints == NULL) goto bad; recptr = mdp->d_hints; bufend = recptr + mdp->d_hintsz; clen = blen = 0; best = cp = NULL; while (recptr < bufend && !found) { intp = (int*)recptr; reclen = *intp++; ival = *intp++; cp = (u_char*)intp; switch (ival) { case MDT_VERSION: clen = *cp++; if (clen != modnamelen || bcmp(cp, modname, clen) != 0) break; cp += clen; INT_ALIGN(mdp->d_hints, cp); ival = *(int*)cp; cp += sizeof(int); clen = *cp++; if (verinfo == NULL || ival == verinfo->md_ver_preferred) { found = 1; break; } if (ival >= verinfo->md_ver_minimum && ival <= verinfo->md_ver_maximum && ival > bestver) { bestver = ival; best = cp; blen = clen; } break; default: break; } recptr += reclen + sizeof(int); } /* * Finally check if KLD is in the place */ if (found) result = file_lookup(mdp->d_path, (const char *)cp, clen, NULL); else if (best) result = file_lookup(mdp->d_path, (const char *)best, blen, NULL); bad: /* * If nothing found or hints is absent - fallback to the old way * by using "kldname[.ko]" as module name. */ if (!found && !bestver && result == NULL) result = file_lookup(mdp->d_path, modname, modnamelen, kld_ext_list); return result; } static int getint(void **ptr) { int *p = *ptr; int rv; p = (int *)roundup2((intptr_t)p, sizeof(int)); rv = *p++; *ptr = p; return rv; } static void getstr(void **ptr, char *val) { int *p = *ptr; char *c = (char *)p; int len = *(uint8_t *)c; memcpy(val, c + 1, len); val[len] = 0; c += len + 1; *ptr = (void *)c; } static int pnpval_as_int(const char *val, const char *pnpinfo) { int rv; char key[256]; char *cp; if (pnpinfo == NULL) return -1; cp = strchr(val, ';'); key[0] = ' '; if (cp == NULL) strlcpy(key + 1, val, sizeof(key) - 1); else { memcpy(key + 1, val, cp - val); key[cp - val + 1] = '\0'; } strlcat(key, "=", sizeof(key)); if (strncmp(key + 1, pnpinfo, strlen(key + 1)) == 0) rv = strtol(pnpinfo + strlen(key + 1), NULL, 0); else { cp = strstr(pnpinfo, key); if (cp == NULL) rv = -1; else rv = strtol(cp + strlen(key), NULL, 0); } return rv; } static void quoted_strcpy(char *dst, const char *src) { char q = ' '; if (*src == '\'' || *src == '"') q = *src++; while (*src && *src != q) *dst++ = *src++; // XXX backtick quoting *dst++ = '\0'; // XXX overflow } static char * pnpval_as_str(const char *val, const char *pnpinfo) { static char retval[256]; char key[256]; char *cp; if (pnpinfo == NULL) { *retval = '\0'; return retval; } cp = strchr(val, ';'); key[0] = ' '; if (cp == NULL) strlcpy(key + 1, val, sizeof(key) - 1); else { memcpy(key + 1, val, cp - val); key[cp - val + 1] = '\0'; } strlcat(key, "=", sizeof(key)); if (strncmp(key + 1, pnpinfo, strlen(key + 1)) == 0) quoted_strcpy(retval, pnpinfo + strlen(key + 1)); else { cp = strstr(pnpinfo, key); if (cp == NULL) strcpy(retval, "MISSING"); else quoted_strcpy(retval, cp + strlen(key)); } return retval; } static char * devmatch_search_hints(struct moduledir *mdp, const char *bus, const char *dev, const char *pnpinfo) { char val1[256], val2[256]; int ival, len, ents, i, notme, mask, bit, v, found; void *ptr, *walker, *hints_end; char *lastmod = NULL, *cp, *s; moduledir_readhints(mdp); found = 0; if (mdp->d_hints == NULL) goto bad; walker = mdp->d_hints; hints_end = walker + mdp->d_hintsz; while (walker < hints_end && !found) { len = getint(&walker); ival = getint(&walker); ptr = walker; switch (ival) { case MDT_VERSION: getstr(&ptr, val1); ival = getint(&ptr); getstr(&ptr, val2); if (pnp_dump_flag || pnp_verbose_flag) printf("Version: if %s.%d kmod %s\n", val1, ival, val2); break; case MDT_MODULE: getstr(&ptr, val1); getstr(&ptr, val2); if (lastmod) free(lastmod); lastmod = strdup(val2); if (pnp_dump_flag || pnp_verbose_flag) printf("module %s in %s\n", val1, val1); break; case MDT_PNP_INFO: if (!pnp_dump_flag && !pnp_unbound_flag && lastmod && strcmp(lastmod, "kernel") == 0) break; getstr(&ptr, val1); getstr(&ptr, val2); ents = getint(&ptr); if (pnp_dump_flag || pnp_verbose_flag) printf("PNP info for bus %s format %s %d entries (%s)\n", val1, val2, ents, lastmod); if (strcmp(val1, "usb") == 0) { if (pnp_verbose_flag) printf("Treating usb as uhub -- bug in source table still?\n"); strcpy(val1, "uhub"); } if (bus && strcmp(val1, bus) != 0) { if (pnp_verbose_flag) printf("Skipped because table for bus %s, looking for %s\n", val1, bus); break; } for (i = 0; i < ents; i++) { if (pnp_verbose_flag) printf("---------- Entry %d ----------\n", i); if (pnp_dump_flag) printf(" "); cp = val2; notme = 0; mask = -1; bit = -1; do { switch (*cp) { /* All integer fields */ case 'I': case 'J': case 'G': case 'L': case 'M': ival = getint(&ptr); if (pnp_dump_flag) { printf("%#x:", ival); break; } if (bit >= 0 && ((1 << bit) & mask) == 0) break; v = pnpval_as_int(cp + 2, pnpinfo); if (pnp_verbose_flag) printf("Matching %s (%c) table=%#x tomatch=%#x\n", cp + 2, *cp, v, ival); switch (*cp) { case 'J': if (ival == -1) break; /*FALLTHROUGH*/ case 'I': if (v != ival) notme++; break; case 'G': if (v < ival) notme++; break; case 'L': if (v > ival) notme++; break; case 'M': mask = ival; break; } break; /* String fields */ case 'D': case 'Z': getstr(&ptr, val1); if (pnp_dump_flag) { printf("'%s':", val1); break; } if (*cp == 'D') break; s = pnpval_as_str(cp + 2, pnpinfo); if (strcmp(s, val1) != 0) notme++; break; /* Key override fields, required to be last in the string */ case 'T': /* * This is imperfect and only does one key and will be redone * to be more general for multiple keys. Currently, nothing * does that. */ if (pnp_dump_flag) /* No per-row data stored */ break; if (cp[strlen(cp) - 1] == ';') /* Skip required ; at end */ cp[strlen(cp) - 1] = '\0'; /* in case it's not there */ if ((s = strstr(pnpinfo, cp + 2)) == NULL) notme++; else if (s > pnpinfo && s[-1] != ' ') notme++; break; default: printf("Unknown field type %c\n:", *cp); break; } bit++; cp = strchr(cp, ';'); if (cp) cp++; } while (cp && *cp); if (pnp_dump_flag) printf("\n"); else if (!notme) { if (!pnp_unbound_flag) { if (pnp_verbose_flag) printf("Matches --- %s ---\n", lastmod); } found++; } } break; default: break; } walker = (void *)(len - sizeof(int) + (intptr_t)walker); } if (pnp_unbound_flag && found == 0 && *pnpinfo) { if (pnp_verbose_flag) printf("------------------------- "); printf("%s on %s pnpinfo %s", *dev ? dev : "unattached", bus, pnpinfo); if (pnp_verbose_flag) printf(" -------------------------"); printf("\n"); } if (found != 0) return (lastmod); free(lastmod); bad: return (NULL); } /* * Attempt to locate the file containing the module (name) */ static char * mod_searchmodule(char *name, struct mod_depend *verinfo) { struct moduledir *mdp; char *result; moduledir_rebuild(); /* * Now we ready to lookup module in the given directories */ result = NULL; STAILQ_FOREACH(mdp, &moduledir_list, d_link) { result = mod_search_hints(mdp, name, verinfo); if (result) break; } return(result); } static char * mod_searchmodule_pnpinfo(const char *bus, const char *pnpinfo) { struct moduledir *mdp; char *result; moduledir_rebuild(); /* * Now we ready to lookup module in the given directories */ result = NULL; STAILQ_FOREACH(mdp, &moduledir_list, d_link) { result = devmatch_search_hints(mdp, bus, NULL, pnpinfo); if (result) break; } return(result); } int file_addmodule(struct preloaded_file *fp, char *modname, int version, struct kernel_module **newmp) { struct kernel_module *mp; struct mod_depend mdepend; bzero(&mdepend, sizeof(mdepend)); mdepend.md_ver_preferred = version; mp = file_findmodule(fp, modname, &mdepend); if (mp) return (EEXIST); mp = calloc(1, sizeof(struct kernel_module)); if (mp == NULL) return (ENOMEM); mp->m_name = strdup(modname); if (mp->m_name == NULL) { free(mp); return (ENOMEM); } mp->m_version = version; mp->m_fp = fp; mp->m_next = fp->f_modules; fp->f_modules = mp; if (newmp) *newmp = mp; return (0); } /* * Throw a file away */ void file_discard(struct preloaded_file *fp) { struct file_metadata *md, *md1; struct kernel_module *mp, *mp1; if (fp == NULL) return; md = fp->f_metadata; while (md) { md1 = md; md = md->md_next; free(md1); } mp = fp->f_modules; while (mp) { free(mp->m_name); mp1 = mp; mp = mp->m_next; free(mp1); } free(fp->f_name); free(fp->f_type); free(fp->f_args); free(fp); } /* * Allocate a new file; must be used instead of malloc() * to ensure safe initialisation. */ struct preloaded_file * file_alloc(void) { return (calloc(1, sizeof(struct preloaded_file))); } /* * Add a module to the chain */ static void file_insert_tail(struct preloaded_file *fp) { struct preloaded_file *cm; /* Append to list of loaded file */ fp->f_next = NULL; if (preloaded_files == NULL) { preloaded_files = fp; } else { for (cm = preloaded_files; cm->f_next != NULL; cm = cm->f_next) ; cm->f_next = fp; } } /* * Remove module from the chain */ static void file_remove(struct preloaded_file *fp) { struct preloaded_file *cm; if (preloaded_files == NULL) return; if (preloaded_files == fp) { preloaded_files = fp->f_next; return; } for (cm = preloaded_files; cm->f_next != NULL; cm = cm->f_next) { if (cm->f_next == fp) { cm->f_next = fp->f_next; return; } } } static char * moduledir_fullpath(struct moduledir *mdp, const char *fname) { char *cp; cp = malloc(strlen(mdp->d_path) + strlen(fname) + 2); if (cp == NULL) return NULL; strcpy(cp, mdp->d_path); strcat(cp, "/"); strcat(cp, fname); return (cp); } /* * Read linker.hints file into memory performing some sanity checks. */ static void moduledir_readhints(struct moduledir *mdp) { struct stat st; char *path; int fd, size, version; if (mdp->d_hints != NULL || (mdp->d_flags & MDIR_NOHINTS)) return; path = moduledir_fullpath(mdp, "linker.hints"); if (stat(path, &st) != 0 || st.st_size < (ssize_t)(sizeof(version) + sizeof(int)) || st.st_size > LINKER_HINTS_MAX || (fd = open(path, O_RDONLY)) < 0) { free(path); mdp->d_flags |= MDIR_NOHINTS; return; } free(path); size = read(fd, &version, sizeof(version)); if (size != sizeof(version) || version != LINKER_HINTS_VERSION) goto bad; size = st.st_size - size; mdp->d_hints = malloc(size); if (mdp->d_hints == NULL) goto bad; if (read(fd, mdp->d_hints, size) != size) goto bad; mdp->d_hintsz = size; close(fd); return; bad: close(fd); free(mdp->d_hints); mdp->d_hints = NULL; mdp->d_flags |= MDIR_NOHINTS; return; } /* * Extract directories from the ';' separated list, remove duplicates. */ static void moduledir_rebuild(void) { struct moduledir *mdp, *mtmp; const char *path, *cp, *ep; size_t cplen; path = getenv("module_path"); if (path == NULL) path = default_searchpath; /* * Rebuild list of module directories if it changed */ STAILQ_FOREACH(mdp, &moduledir_list, d_link) mdp->d_flags |= MDIR_REMOVED; for (ep = path; *ep != 0; ep++) { cp = ep; for (; *ep != 0 && *ep != ';'; ep++) ; /* * Ignore trailing slashes */ for (cplen = ep - cp; cplen > 1 && cp[cplen - 1] == '/'; cplen--) ; STAILQ_FOREACH(mdp, &moduledir_list, d_link) { if (strlen(mdp->d_path) != cplen || bcmp(cp, mdp->d_path, cplen) != 0) continue; mdp->d_flags &= ~MDIR_REMOVED; break; } if (mdp == NULL) { mdp = malloc(sizeof(*mdp) + cplen + 1); if (mdp == NULL) return; mdp->d_path = (char*)(mdp + 1); bcopy(cp, mdp->d_path, cplen); mdp->d_path[cplen] = 0; mdp->d_hints = NULL; mdp->d_flags = 0; STAILQ_INSERT_TAIL(&moduledir_list, mdp, d_link); } if (*ep == 0) break; } /* * Delete unused directories if any */ mdp = STAILQ_FIRST(&moduledir_list); while (mdp) { if ((mdp->d_flags & MDIR_REMOVED) == 0) { mdp = STAILQ_NEXT(mdp, d_link); } else { free(mdp->d_hints); mtmp = mdp; mdp = STAILQ_NEXT(mdp, d_link); STAILQ_REMOVE(&moduledir_list, mtmp, moduledir, d_link); free(mtmp); } } return; } diff --git a/stand/common/tslog.c b/stand/common/tslog.c new file mode 100644 index 000000000000..3851e760daf1 --- /dev/null +++ b/stand/common/tslog.c @@ -0,0 +1,81 @@ +/*- + * Copyright (c) 2021 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include + +#include "bootstrap.h" + +/* Buffer for holding tslog data in string format. */ +#ifndef LOADER_TSLOGSIZE +#define LOADER_TSLOGSIZE (2 * 1024 * 1024) +#endif + +int +tslog_init(void) +{ + void * tslog_buf; + + /* Allocate buffer and pass to libsa tslog code. */ + if ((tslog_buf = malloc(LOADER_TSLOGSIZE)) == NULL) + return (-1); + tslog_setbuf(tslog_buf, LOADER_TSLOGSIZE); + + /* Record this as when we entered the loader. */ + TSRAW("ENTER", "loader", NULL); + + return (0); +} + +/* + * Pass our tslog buffer as a preloaded "module" to the kernel. This should + * be called as late as possible prior to the kernel being executed, since + * any timestamps logged after this is called will not be visible to the + * kernel. + */ +int +tslog_publish(void) +{ + void * tslog_buf; + size_t tslog_bufpos; + + /* Record a log entry for ending logging. */ + TSRAW("EXIT", "loader", NULL); + + /* Get the buffer and its current length. */ + tslog_getbuf(&tslog_buf, &tslog_bufpos); + + /* If we never allocated a buffer, return an error. */ + if (tslog_buf == NULL) + return (-1); + + /* Store the buffer where the kernel can read it. */ + return (file_addbuf("TSLOG", "TSLOG data", tslog_bufpos, tslog_buf)); +} diff --git a/stand/efi/libefi/efipart.c b/stand/efi/libefi/efipart.c index 424df7d2e423..aede28ef40c3 100644 --- a/stand/efi/libefi/efipart.c +++ b/stand/efi/libefi/efipart.c @@ -1,1247 +1,1250 @@ /*- * Copyright (c) 2010 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include static EFI_GUID blkio_guid = BLOCK_IO_PROTOCOL; typedef bool (*pd_test_cb_t)(pdinfo_t *, pdinfo_t *); static int efipart_initfd(void); static int efipart_initcd(void); static int efipart_inithd(void); static void efipart_cdinfo_add(pdinfo_t *); static int efipart_strategy(void *, int, daddr_t, size_t, char *, size_t *); static int efipart_realstrategy(void *, int, daddr_t, size_t, char *, size_t *); static int efipart_open(struct open_file *, ...); static int efipart_close(struct open_file *); static int efipart_ioctl(struct open_file *, u_long, void *); static int efipart_printfd(int); static int efipart_printcd(int); static int efipart_printhd(int); /* EISA PNP ID's for floppy controllers */ #define PNP0604 0x604 #define PNP0700 0x700 #define PNP0701 0x701 /* Bounce buffer max size */ #define BIO_BUFFER_SIZE 0x4000 struct devsw efipart_fddev = { .dv_name = "fd", .dv_type = DEVT_FD, .dv_init = efipart_initfd, .dv_strategy = efipart_strategy, .dv_open = efipart_open, .dv_close = efipart_close, .dv_ioctl = efipart_ioctl, .dv_print = efipart_printfd, .dv_cleanup = NULL }; struct devsw efipart_cddev = { .dv_name = "cd", .dv_type = DEVT_CD, .dv_init = efipart_initcd, .dv_strategy = efipart_strategy, .dv_open = efipart_open, .dv_close = efipart_close, .dv_ioctl = efipart_ioctl, .dv_print = efipart_printcd, .dv_cleanup = NULL }; struct devsw efipart_hddev = { .dv_name = "disk", .dv_type = DEVT_DISK, .dv_init = efipart_inithd, .dv_strategy = efipart_strategy, .dv_open = efipart_open, .dv_close = efipart_close, .dv_ioctl = efipart_ioctl, .dv_print = efipart_printhd, .dv_cleanup = NULL }; static pdinfo_list_t fdinfo = STAILQ_HEAD_INITIALIZER(fdinfo); static pdinfo_list_t cdinfo = STAILQ_HEAD_INITIALIZER(cdinfo); static pdinfo_list_t hdinfo = STAILQ_HEAD_INITIALIZER(hdinfo); /* * efipart_inithandles() is used to build up the pdinfo list from * block device handles. Then each devsw init callback is used to * pick items from pdinfo and move to proper device list. * In ideal world, we should end up with empty pdinfo once all * devsw initializers are called. */ static pdinfo_list_t pdinfo = STAILQ_HEAD_INITIALIZER(pdinfo); pdinfo_list_t * efiblk_get_pdinfo_list(struct devsw *dev) { if (dev->dv_type == DEVT_DISK) return (&hdinfo); if (dev->dv_type == DEVT_CD) return (&cdinfo); if (dev->dv_type == DEVT_FD) return (&fdinfo); return (NULL); } /* XXX this gets called way way too often, investigate */ pdinfo_t * efiblk_get_pdinfo(struct devdesc *dev) { pdinfo_list_t *pdi; pdinfo_t *pd = NULL; pdi = efiblk_get_pdinfo_list(dev->d_dev); if (pdi == NULL) return (pd); STAILQ_FOREACH(pd, pdi, pd_link) { if (pd->pd_unit == dev->d_unit) return (pd); } return (pd); } pdinfo_t * efiblk_get_pdinfo_by_device_path(EFI_DEVICE_PATH *path) { EFI_HANDLE h; EFI_STATUS status; EFI_DEVICE_PATH *devp = path; status = BS->LocateDevicePath(&blkio_guid, &devp, &h); if (EFI_ERROR(status)) return (NULL); return (efiblk_get_pdinfo_by_handle(h)); } static bool same_handle(pdinfo_t *pd, EFI_HANDLE h) { return (pd->pd_handle == h || pd->pd_alias == h); } pdinfo_t * efiblk_get_pdinfo_by_handle(EFI_HANDLE h) { pdinfo_t *dp, *pp; /* * Check hard disks, then cd, then floppy */ STAILQ_FOREACH(dp, &hdinfo, pd_link) { if (same_handle(dp, h)) return (dp); STAILQ_FOREACH(pp, &dp->pd_part, pd_link) { if (same_handle(pp, h)) return (pp); } } STAILQ_FOREACH(dp, &cdinfo, pd_link) { if (same_handle(dp, h)) return (dp); STAILQ_FOREACH(pp, &dp->pd_part, pd_link) { if (same_handle(pp, h)) return (pp); } } STAILQ_FOREACH(dp, &fdinfo, pd_link) { if (same_handle(dp, h)) return (dp); } return (NULL); } static int efiblk_pdinfo_count(pdinfo_list_t *pdi) { pdinfo_t *pd; int i = 0; STAILQ_FOREACH(pd, pdi, pd_link) { i++; } return (i); } static pdinfo_t * efipart_find_parent(pdinfo_list_t *pdi, EFI_DEVICE_PATH *devpath) { pdinfo_t *pd; EFI_DEVICE_PATH *parent; /* We want to find direct parent */ parent = efi_devpath_trim(devpath); /* We should not get out of memory here but be careful. */ if (parent == NULL) return (NULL); STAILQ_FOREACH(pd, pdi, pd_link) { /* We must have exact match. */ if (efi_devpath_match(pd->pd_devpath, parent)) break; } free(parent); return (pd); } /* * Return true when we should ignore this device. */ static bool efipart_ignore_device(EFI_HANDLE h, EFI_BLOCK_IO *blkio, EFI_DEVICE_PATH *devpath) { EFI_DEVICE_PATH *node, *parent; /* * We assume the block size 512 or greater power of 2. * Also skip devices with block size > 64k (16 is max * ashift supported by zfs). * iPXE is known to insert stub BLOCK IO device with * BlockSize 1. */ if (blkio->Media->BlockSize < 512 || blkio->Media->BlockSize > (1 << 16) || !powerof2(blkio->Media->BlockSize)) { efi_close_devpath(h); return (true); } /* Allowed values are 0, 1 and power of 2. */ if (blkio->Media->IoAlign > 1 && !powerof2(blkio->Media->IoAlign)) { efi_close_devpath(h); return (true); } /* * With device tree setup: * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0) * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x1) * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x2) * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x3) * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x3)/CDROM.. * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x3)/CDROM.. * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x4) * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x5) * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x6) * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x7) * * In above exmple only Unit(0x3) has media, all other nodes are * missing media and should not be used. * * No media does not always mean there is no device, but in above * case, we can not really assume there is any device. * Therefore, if this node is USB, or this node is Unit (LUN) and * direct parent is USB and we have no media, we will ignore this * device. * * Variation of the same situation, but with SCSI devices: * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x1) * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x2) * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x3) * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x3)/CD.. * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x3)/CD.. * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x4) * * Here above the SCSI luns 1,2 and 4 have no media. */ /* Do not ignore device with media. */ if (blkio->Media->MediaPresent) return (false); node = efi_devpath_last_node(devpath); if (node == NULL) return (false); /* USB without media present */ if (DevicePathType(node) == MESSAGING_DEVICE_PATH && DevicePathSubType(node) == MSG_USB_DP) { efi_close_devpath(h); return (true); } parent = efi_devpath_trim(devpath); if (parent != NULL) { bool parent_is_usb = false; node = efi_devpath_last_node(parent); if (node == NULL) { free(parent); return (false); } if (DevicePathType(node) == MESSAGING_DEVICE_PATH && DevicePathSubType(node) == MSG_USB_DP) parent_is_usb = true; free(parent); node = efi_devpath_last_node(devpath); if (node == NULL) return (false); if (parent_is_usb && DevicePathType(node) == MESSAGING_DEVICE_PATH) { /* * no media, parent is USB and devicepath is * LUN or SCSI. */ if (DevicePathSubType(node) == MSG_DEVICE_LOGICAL_UNIT_DP || DevicePathSubType(node) == MSG_SCSI_DP) { efi_close_devpath(h); return (true); } } } return (false); } int efipart_inithandles(void) { unsigned i, nin; UINTN sz; EFI_HANDLE *hin; EFI_DEVICE_PATH *devpath; EFI_BLOCK_IO *blkio; EFI_STATUS status; pdinfo_t *pd; if (!STAILQ_EMPTY(&pdinfo)) return (0); sz = 0; hin = NULL; status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz, hin); if (status == EFI_BUFFER_TOO_SMALL) { hin = malloc(sz); if (hin == NULL) return (ENOMEM); status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz, hin); if (EFI_ERROR(status)) free(hin); } if (EFI_ERROR(status)) return (efi_status_to_errno(status)); nin = sz / sizeof(*hin); #ifdef EFIPART_DEBUG printf("%s: Got %d BLOCK IO MEDIA handle(s)\n", __func__, nin); #endif for (i = 0; i < nin; i++) { /* * Get devpath and open protocol. * We should not get errors here */ if ((devpath = efi_lookup_devpath(hin[i])) == NULL) continue; status = OpenProtocolByHandle(hin[i], &blkio_guid, (void **)&blkio); if (EFI_ERROR(status)) { printf("error %lu\n", EFI_ERROR_CODE(status)); continue; } if (efipart_ignore_device(hin[i], blkio, devpath)) continue; /* This is bad. */ if ((pd = calloc(1, sizeof(*pd))) == NULL) { printf("efipart_inithandles: Out of memory.\n"); free(hin); return (ENOMEM); } STAILQ_INIT(&pd->pd_part); pd->pd_handle = hin[i]; pd->pd_devpath = devpath; pd->pd_blkio = blkio; STAILQ_INSERT_TAIL(&pdinfo, pd, pd_link); } /* * Walk pdinfo and set parents based on device path. */ STAILQ_FOREACH(pd, &pdinfo, pd_link) { pd->pd_parent = efipart_find_parent(&pdinfo, pd->pd_devpath); } free(hin); return (0); } /* * Get node identified by pd_test() from plist. */ static pdinfo_t * efipart_get_pd(pdinfo_list_t *plist, pd_test_cb_t pd_test, pdinfo_t *data) { pdinfo_t *pd; STAILQ_FOREACH(pd, plist, pd_link) { if (pd_test(pd, data)) break; } return (pd); } static ACPI_HID_DEVICE_PATH * efipart_floppy(EFI_DEVICE_PATH *node) { ACPI_HID_DEVICE_PATH *acpi; if (DevicePathType(node) == ACPI_DEVICE_PATH && DevicePathSubType(node) == ACPI_DP) { acpi = (ACPI_HID_DEVICE_PATH *) node; if (acpi->HID == EISA_PNP_ID(PNP0604) || acpi->HID == EISA_PNP_ID(PNP0700) || acpi->HID == EISA_PNP_ID(PNP0701)) { return (acpi); } } return (NULL); } static bool efipart_testfd(pdinfo_t *fd, pdinfo_t *data __unused) { EFI_DEVICE_PATH *node; node = efi_devpath_last_node(fd->pd_devpath); if (node == NULL) return (false); if (efipart_floppy(node) != NULL) return (true); return (false); } static int efipart_initfd(void) { EFI_DEVICE_PATH *node; ACPI_HID_DEVICE_PATH *acpi; pdinfo_t *parent, *fd; while ((fd = efipart_get_pd(&pdinfo, efipart_testfd, NULL)) != NULL) { if ((node = efi_devpath_last_node(fd->pd_devpath)) == NULL) continue; if ((acpi = efipart_floppy(node)) == NULL) continue; STAILQ_REMOVE(&pdinfo, fd, pdinfo, pd_link); parent = fd->pd_parent; if (parent != NULL) { STAILQ_REMOVE(&pdinfo, parent, pdinfo, pd_link); parent->pd_alias = fd->pd_handle; parent->pd_unit = acpi->UID; free(fd); fd = parent; } else { fd->pd_unit = acpi->UID; } fd->pd_devsw = &efipart_fddev; STAILQ_INSERT_TAIL(&fdinfo, fd, pd_link); } bcache_add_dev(efiblk_pdinfo_count(&fdinfo)); return (0); } /* * Add or update entries with new handle data. */ static void efipart_cdinfo_add(pdinfo_t *cd) { pdinfo_t *parent, *pd, *last; if (cd == NULL) return; parent = cd->pd_parent; /* Make sure we have parent added */ efipart_cdinfo_add(parent); STAILQ_FOREACH(pd, &pdinfo, pd_link) { if (efi_devpath_match(pd->pd_devpath, cd->pd_devpath)) { STAILQ_REMOVE(&pdinfo, cd, pdinfo, pd_link); break; } } if (pd == NULL) { /* This device is already added. */ return; } if (parent != NULL) { last = STAILQ_LAST(&parent->pd_part, pdinfo, pd_link); if (last != NULL) cd->pd_unit = last->pd_unit + 1; else cd->pd_unit = 0; cd->pd_devsw = &efipart_cddev; STAILQ_INSERT_TAIL(&parent->pd_part, cd, pd_link); return; } last = STAILQ_LAST(&cdinfo, pdinfo, pd_link); if (last != NULL) cd->pd_unit = last->pd_unit + 1; else cd->pd_unit = 0; cd->pd_devsw = &efipart_cddev; STAILQ_INSERT_TAIL(&cdinfo, cd, pd_link); } static bool efipart_testcd(pdinfo_t *cd, pdinfo_t *data __unused) { EFI_DEVICE_PATH *node; node = efi_devpath_last_node(cd->pd_devpath); if (node == NULL) return (false); if (efipart_floppy(node) != NULL) return (false); if (DevicePathType(node) == MEDIA_DEVICE_PATH && DevicePathSubType(node) == MEDIA_CDROM_DP) { return (true); } /* cd drive without the media. */ if (cd->pd_blkio->Media->RemovableMedia && !cd->pd_blkio->Media->MediaPresent) { return (true); } return (false); } /* * Test if pd is parent for device. */ static bool efipart_testchild(pdinfo_t *dev, pdinfo_t *pd) { /* device with no parent. */ if (dev->pd_parent == NULL) return (false); if (efi_devpath_match(dev->pd_parent->pd_devpath, pd->pd_devpath)) { return (true); } return (false); } static int efipart_initcd(void) { pdinfo_t *cd; while ((cd = efipart_get_pd(&pdinfo, efipart_testcd, NULL)) != NULL) efipart_cdinfo_add(cd); /* Find all children of CD devices we did add above. */ STAILQ_FOREACH(cd, &cdinfo, pd_link) { pdinfo_t *child; for (child = efipart_get_pd(&pdinfo, efipart_testchild, cd); child != NULL; child = efipart_get_pd(&pdinfo, efipart_testchild, cd)) efipart_cdinfo_add(child); } bcache_add_dev(efiblk_pdinfo_count(&cdinfo)); return (0); } static void efipart_hdinfo_add_node(pdinfo_t *hd, EFI_DEVICE_PATH *node) { pdinfo_t *parent, *ptr; if (node == NULL) return; parent = hd->pd_parent; /* * If the node is not MEDIA_HARDDRIVE_DP, it is sub-partition. * This can happen with Vendor nodes, and since we do not know * the more about those nodes, we just count them. */ if (DevicePathSubType(node) != MEDIA_HARDDRIVE_DP) { ptr = STAILQ_LAST(&parent->pd_part, pdinfo, pd_link); if (ptr != NULL) hd->pd_unit = ptr->pd_unit + 1; else hd->pd_unit = 0; } else { hd->pd_unit = ((HARDDRIVE_DEVICE_PATH *)node)->PartitionNumber; } hd->pd_devsw = &efipart_hddev; STAILQ_INSERT_TAIL(&parent->pd_part, hd, pd_link); } /* * The MEDIA_FILEPATH_DP has device name. * From U-Boot sources it looks like names are in the form * of typeN:M, where type is interface type, N is disk id * and M is partition id. */ static void efipart_hdinfo_add_filepath(pdinfo_t *hd, FILEPATH_DEVICE_PATH *node) { char *pathname, *p; int len; pdinfo_t *last; last = STAILQ_LAST(&hdinfo, pdinfo, pd_link); if (last != NULL) hd->pd_unit = last->pd_unit + 1; else hd->pd_unit = 0; /* FILEPATH_DEVICE_PATH has 0 terminated string */ len = ucs2len(node->PathName); if ((pathname = malloc(len + 1)) == NULL) { printf("Failed to add disk, out of memory\n"); free(hd); return; } cpy16to8(node->PathName, pathname, len + 1); p = strchr(pathname, ':'); /* * Assume we are receiving handles in order, first disk handle, * then partitions for this disk. If this assumption proves * false, this code would need update. */ if (p == NULL) { /* no colon, add the disk */ hd->pd_devsw = &efipart_hddev; STAILQ_INSERT_TAIL(&hdinfo, hd, pd_link); free(pathname); return; } p++; /* skip the colon */ errno = 0; hd->pd_unit = (int)strtol(p, NULL, 0); if (errno != 0) { printf("Bad unit number for partition \"%s\"\n", pathname); free(pathname); free(hd); return; } /* * We should have disk registered, if not, we are receiving * handles out of order, and this code should be reworked * to create "blank" disk for partition, and to find the * disk based on PathName compares. */ if (last == NULL) { printf("BUG: No disk for partition \"%s\"\n", pathname); free(pathname); free(hd); return; } /* Add the partition. */ hd->pd_parent = last; hd->pd_devsw = &efipart_hddev; STAILQ_INSERT_TAIL(&last->pd_part, hd, pd_link); free(pathname); } static void efipart_hdinfo_add(pdinfo_t *hd) { pdinfo_t *parent, *pd, *last; EFI_DEVICE_PATH *node; if (hd == NULL) return; parent = hd->pd_parent; /* Make sure we have parent added */ efipart_hdinfo_add(parent); STAILQ_FOREACH(pd, &pdinfo, pd_link) { if (efi_devpath_match(pd->pd_devpath, hd->pd_devpath)) { STAILQ_REMOVE(&pdinfo, hd, pdinfo, pd_link); break; } } if (pd == NULL) { /* This device is already added. */ return; } if ((node = efi_devpath_last_node(hd->pd_devpath)) == NULL) return; if (DevicePathType(node) == MEDIA_DEVICE_PATH && DevicePathSubType(node) == MEDIA_FILEPATH_DP) { efipart_hdinfo_add_filepath(hd, (FILEPATH_DEVICE_PATH *)node); return; } if (parent != NULL) { efipart_hdinfo_add_node(hd, node); return; } last = STAILQ_LAST(&hdinfo, pdinfo, pd_link); if (last != NULL) hd->pd_unit = last->pd_unit + 1; else hd->pd_unit = 0; /* Add the disk. */ hd->pd_devsw = &efipart_hddev; STAILQ_INSERT_TAIL(&hdinfo, hd, pd_link); } static bool efipart_testhd(pdinfo_t *hd, pdinfo_t *data __unused) { if (efipart_testfd(hd, NULL)) return (false); if (efipart_testcd(hd, NULL)) return (false); /* Anything else must be HD. */ return (true); } static int efipart_inithd(void) { pdinfo_t *hd; while ((hd = efipart_get_pd(&pdinfo, efipart_testhd, NULL)) != NULL) efipart_hdinfo_add(hd); bcache_add_dev(efiblk_pdinfo_count(&hdinfo)); return (0); } static int efipart_print_common(struct devsw *dev, pdinfo_list_t *pdlist, int verbose) { int ret = 0; EFI_BLOCK_IO *blkio; EFI_STATUS status; EFI_HANDLE h; pdinfo_t *pd; CHAR16 *text; struct disk_devdesc pd_dev; char line[80]; if (STAILQ_EMPTY(pdlist)) return (0); printf("%s devices:", dev->dv_name); if ((ret = pager_output("\n")) != 0) return (ret); STAILQ_FOREACH(pd, pdlist, pd_link) { h = pd->pd_handle; if (verbose) { /* Output the device path. */ text = efi_devpath_name(efi_lookup_devpath(h)); if (text != NULL) { printf(" %S", text); efi_free_devpath_name(text); if ((ret = pager_output("\n")) != 0) break; } } snprintf(line, sizeof(line), " %s%d", dev->dv_name, pd->pd_unit); printf("%s:", line); status = OpenProtocolByHandle(h, &blkio_guid, (void **)&blkio); if (!EFI_ERROR(status)) { printf(" %llu", blkio->Media->LastBlock == 0? 0: (unsigned long long) (blkio->Media->LastBlock + 1)); if (blkio->Media->LastBlock != 0) { printf(" X %u", blkio->Media->BlockSize); } printf(" blocks"); if (blkio->Media->MediaPresent) { if (blkio->Media->RemovableMedia) printf(" (removable)"); } else { printf(" (no media)"); } if ((ret = pager_output("\n")) != 0) break; if (!blkio->Media->MediaPresent) continue; pd->pd_blkio = blkio; pd_dev.dd.d_dev = dev; pd_dev.dd.d_unit = pd->pd_unit; pd_dev.d_slice = D_SLICENONE; pd_dev.d_partition = D_PARTNONE; ret = disk_open(&pd_dev, blkio->Media->BlockSize * (blkio->Media->LastBlock + 1), blkio->Media->BlockSize); if (ret == 0) { ret = disk_print(&pd_dev, line, verbose); disk_close(&pd_dev); if (ret != 0) return (ret); } else { /* Do not fail from disk_open() */ ret = 0; } } else { if ((ret = pager_output("\n")) != 0) break; } } return (ret); } static int efipart_printfd(int verbose) { return (efipart_print_common(&efipart_fddev, &fdinfo, verbose)); } static int efipart_printcd(int verbose) { return (efipart_print_common(&efipart_cddev, &cdinfo, verbose)); } static int efipart_printhd(int verbose) { return (efipart_print_common(&efipart_hddev, &hdinfo, verbose)); } static int efipart_open(struct open_file *f, ...) { va_list args; struct disk_devdesc *dev; pdinfo_t *pd; EFI_BLOCK_IO *blkio; EFI_STATUS status; va_start(args, f); dev = va_arg(args, struct disk_devdesc *); va_end(args); if (dev == NULL) return (EINVAL); pd = efiblk_get_pdinfo((struct devdesc *)dev); if (pd == NULL) return (EIO); if (pd->pd_blkio == NULL) { status = OpenProtocolByHandle(pd->pd_handle, &blkio_guid, (void **)&pd->pd_blkio); if (EFI_ERROR(status)) return (efi_status_to_errno(status)); } blkio = pd->pd_blkio; if (!blkio->Media->MediaPresent) return (EAGAIN); pd->pd_open++; if (pd->pd_bcache == NULL) pd->pd_bcache = bcache_allocate(); if (dev->dd.d_dev->dv_type == DEVT_DISK) { int rc; rc = disk_open(dev, blkio->Media->BlockSize * (blkio->Media->LastBlock + 1), blkio->Media->BlockSize); if (rc != 0) { pd->pd_open--; if (pd->pd_open == 0) { pd->pd_blkio = NULL; bcache_free(pd->pd_bcache); pd->pd_bcache = NULL; } } return (rc); } return (0); } static int efipart_close(struct open_file *f) { struct disk_devdesc *dev; pdinfo_t *pd; dev = (struct disk_devdesc *)(f->f_devdata); if (dev == NULL) return (EINVAL); pd = efiblk_get_pdinfo((struct devdesc *)dev); if (pd == NULL) return (EINVAL); pd->pd_open--; if (pd->pd_open == 0) { pd->pd_blkio = NULL; bcache_free(pd->pd_bcache); pd->pd_bcache = NULL; } if (dev->dd.d_dev->dv_type == DEVT_DISK) return (disk_close(dev)); return (0); } static int efipart_ioctl(struct open_file *f, u_long cmd, void *data) { struct disk_devdesc *dev; pdinfo_t *pd; int rc; dev = (struct disk_devdesc *)(f->f_devdata); if (dev == NULL) return (EINVAL); pd = efiblk_get_pdinfo((struct devdesc *)dev); if (pd == NULL) return (EINVAL); if (dev->dd.d_dev->dv_type == DEVT_DISK) { rc = disk_ioctl(dev, cmd, data); if (rc != ENOTTY) return (rc); } switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = pd->pd_blkio->Media->BlockSize; break; case DIOCGMEDIASIZE: *(uint64_t *)data = pd->pd_blkio->Media->BlockSize * (pd->pd_blkio->Media->LastBlock + 1); break; default: return (ENOTTY); } return (0); } /* * efipart_readwrite() * Internal equivalent of efipart_strategy(), which operates on the * media-native block size. This function expects all I/O requests * to be within the media size and returns an error if such is not * the case. */ static int efipart_readwrite(EFI_BLOCK_IO *blkio, int rw, daddr_t blk, daddr_t nblks, char *buf) { EFI_STATUS status; + TSENTER(); + if (blkio == NULL) return (ENXIO); if (blk < 0 || blk > blkio->Media->LastBlock) return (EIO); if ((blk + nblks - 1) > blkio->Media->LastBlock) return (EIO); switch (rw & F_MASK) { case F_READ: status = blkio->ReadBlocks(blkio, blkio->Media->MediaId, blk, nblks * blkio->Media->BlockSize, buf); break; case F_WRITE: if (blkio->Media->ReadOnly) return (EROFS); status = blkio->WriteBlocks(blkio, blkio->Media->MediaId, blk, nblks * blkio->Media->BlockSize, buf); break; default: return (ENOSYS); } if (EFI_ERROR(status)) { printf("%s: rw=%d, blk=%ju size=%ju status=%lu\n", __func__, rw, blk, nblks, EFI_ERROR_CODE(status)); } + TSEXIT(); return (efi_status_to_errno(status)); } static int efipart_strategy(void *devdata, int rw, daddr_t blk, size_t size, char *buf, size_t *rsize) { struct bcache_devdata bcd; struct disk_devdesc *dev; pdinfo_t *pd; dev = (struct disk_devdesc *)devdata; if (dev == NULL) return (EINVAL); pd = efiblk_get_pdinfo((struct devdesc *)dev); if (pd == NULL) return (EINVAL); if (pd->pd_blkio->Media->RemovableMedia && !pd->pd_blkio->Media->MediaPresent) return (ENXIO); bcd.dv_strategy = efipart_realstrategy; bcd.dv_devdata = devdata; bcd.dv_cache = pd->pd_bcache; if (dev->dd.d_dev->dv_type == DEVT_DISK) { daddr_t offset; offset = dev->d_offset * pd->pd_blkio->Media->BlockSize; offset /= 512; return (bcache_strategy(&bcd, rw, blk + offset, size, buf, rsize)); } return (bcache_strategy(&bcd, rw, blk, size, buf, rsize)); } static int efipart_realstrategy(void *devdata, int rw, daddr_t blk, size_t size, char *buf, size_t *rsize) { struct disk_devdesc *dev = (struct disk_devdesc *)devdata; pdinfo_t *pd; EFI_BLOCK_IO *blkio; uint64_t off, disk_blocks, d_offset = 0; char *blkbuf; size_t blkoff, blksz, bio_size; unsigned ioalign; bool need_buf; int rc; uint64_t diskend, readstart; if (dev == NULL || blk < 0) return (EINVAL); pd = efiblk_get_pdinfo((struct devdesc *)dev); if (pd == NULL) return (EINVAL); blkio = pd->pd_blkio; if (blkio == NULL) return (ENXIO); if (size == 0 || (size % 512) != 0) return (EIO); off = blk * 512; /* * Get disk blocks, this value is either for whole disk or for * partition. */ disk_blocks = 0; if (dev->dd.d_dev->dv_type == DEVT_DISK) { if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks) == 0) { /* DIOCGMEDIASIZE does return bytes. */ disk_blocks /= blkio->Media->BlockSize; } d_offset = dev->d_offset; } if (disk_blocks == 0) disk_blocks = blkio->Media->LastBlock + 1 - d_offset; /* make sure we don't read past disk end */ if ((off + size) / blkio->Media->BlockSize > d_offset + disk_blocks) { diskend = d_offset + disk_blocks; readstart = off / blkio->Media->BlockSize; if (diskend <= readstart) { if (rsize != NULL) *rsize = 0; return (EIO); } size = diskend - readstart; size = size * blkio->Media->BlockSize; } need_buf = true; /* Do we need bounce buffer? */ if ((size % blkio->Media->BlockSize == 0) && (off % blkio->Media->BlockSize == 0)) need_buf = false; /* Do we have IO alignment requirement? */ ioalign = blkio->Media->IoAlign; if (ioalign == 0) ioalign++; if (ioalign > 1 && (uintptr_t)buf != roundup2((uintptr_t)buf, ioalign)) need_buf = true; if (need_buf) { for (bio_size = BIO_BUFFER_SIZE; bio_size > 0; bio_size -= blkio->Media->BlockSize) { blkbuf = memalign(ioalign, bio_size); if (blkbuf != NULL) break; } } else { blkbuf = buf; bio_size = size; } if (blkbuf == NULL) return (ENOMEM); if (rsize != NULL) *rsize = size; rc = 0; blk = off / blkio->Media->BlockSize; blkoff = off % blkio->Media->BlockSize; while (size > 0) { size_t x = min(size, bio_size); if (x < blkio->Media->BlockSize) x = 1; else x /= blkio->Media->BlockSize; switch (rw & F_MASK) { case F_READ: blksz = blkio->Media->BlockSize * x - blkoff; if (size < blksz) blksz = size; rc = efipart_readwrite(blkio, rw, blk, x, blkbuf); if (rc != 0) goto error; if (need_buf) bcopy(blkbuf + blkoff, buf, blksz); break; case F_WRITE: rc = 0; if (blkoff != 0) { /* * We got offset to sector, read 1 sector to * blkbuf. */ x = 1; blksz = blkio->Media->BlockSize - blkoff; blksz = min(blksz, size); rc = efipart_readwrite(blkio, F_READ, blk, x, blkbuf); } else if (size < blkio->Media->BlockSize) { /* * The remaining block is not full * sector. Read 1 sector to blkbuf. */ x = 1; blksz = size; rc = efipart_readwrite(blkio, F_READ, blk, x, blkbuf); } else { /* We can write full sector(s). */ blksz = blkio->Media->BlockSize * x; } if (rc != 0) goto error; /* * Put your Data In, Put your Data out, * Put your Data In, and shake it all about */ if (need_buf) bcopy(buf, blkbuf + blkoff, blksz); rc = efipart_readwrite(blkio, F_WRITE, blk, x, blkbuf); if (rc != 0) goto error; break; default: /* DO NOTHING */ rc = EROFS; goto error; } blkoff = 0; buf += blksz; size -= blksz; blk += x; } error: if (rsize != NULL) *rsize -= size; if (need_buf) free(blkbuf); return (rc); } diff --git a/stand/efi/loader/efi_main.c b/stand/efi/loader/efi_main.c index fa3c765c36ab..736c1aa56c99 100644 --- a/stand/efi/loader/efi_main.c +++ b/stand/efi/loader/efi_main.c @@ -1,190 +1,194 @@ /*- * Copyright (c) 2000 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); +#include #include #include #include #include static EFI_PHYSICAL_ADDRESS heap; static UINTN heapsize; void efi_exit(EFI_STATUS exit_code) { BS->FreePages(heap, EFI_SIZE_TO_PAGES(heapsize)); BS->Exit(IH, exit_code, 0, NULL); } void exit(int status) { efi_exit(EFI_LOAD_ERROR); } static CHAR16 * arg_skipsep(CHAR16 *argp) { while (*argp == ' ' || *argp == '\t' || *argp == '\n') argp++; return (argp); } static CHAR16 * arg_skipword(CHAR16 *argp) { while (*argp && *argp != ' ' && *argp != '\t' && *argp != '\n') argp++; return (argp); } EFI_STATUS efi_main(EFI_HANDLE image_handle, EFI_SYSTEM_TABLE *system_table) { static EFI_GUID image_protocol = LOADED_IMAGE_PROTOCOL; static EFI_GUID console_control_protocol = EFI_CONSOLE_CONTROL_PROTOCOL_GUID; EFI_CONSOLE_CONTROL_PROTOCOL *console_control = NULL; EFI_LOADED_IMAGE *img; CHAR16 *argp, *args, **argv; EFI_STATUS status; int argc, addprog; IH = image_handle; ST = system_table; BS = ST->BootServices; RS = ST->RuntimeServices; status = BS->LocateProtocol(&console_control_protocol, NULL, (VOID **)&console_control); if (status == EFI_SUCCESS) (void)console_control->SetMode(console_control, EfiConsoleControlScreenText); heapsize = 64 * 1024 * 1024; status = BS->AllocatePages(AllocateAnyPages, EfiLoaderData, EFI_SIZE_TO_PAGES(heapsize), &heap); if (status != EFI_SUCCESS) { ST->ConOut->OutputString(ST->ConOut, (CHAR16 *)L"Failed to allocate memory for heap.\r\n"); BS->Exit(IH, status, 0, NULL); } setheap((void *)(uintptr_t)heap, (void *)(uintptr_t)(heap + heapsize)); + /* Start tslog now that we have a heap.*/ + tslog_init(); + /* Use efi_exit() from here on... */ status = OpenProtocolByHandle(IH, &image_protocol, (void**)&img); if (status != EFI_SUCCESS) efi_exit(status); /* * Pre-process the (optional) load options. If the option string * is given as an ASCII string, we use a poor man's ASCII to * Unicode-16 translation. The size of the option string as given * to us includes the terminating null character. We assume the * string is an ASCII string if strlen() plus the terminating * '\0' is less than LoadOptionsSize. Even if all Unicode-16 * characters have the upper 8 bits non-zero, the terminating * null character will cause a one-off. * If the string is already in Unicode-16, we make a copy so that * we know we can always modify the string. */ if (img->LoadOptionsSize > 0 && img->LoadOptions != NULL) { if (img->LoadOptionsSize == strlen(img->LoadOptions) + 1) { args = malloc(img->LoadOptionsSize << 1); for (argc = 0; argc < (int)img->LoadOptionsSize; argc++) args[argc] = ((char*)img->LoadOptions)[argc]; } else { args = malloc(img->LoadOptionsSize); memcpy(args, img->LoadOptions, img->LoadOptionsSize); } } else args = NULL; /* * Use a quick and dirty algorithm to build the argv vector. We * first count the number of words. Then, after allocating the * vector, we split the string up. We don't deal with quotes or * other more advanced shell features. * The EFI shell will pass the name of the image as the first * word in the argument list. This does not happen if we're * loaded by the boot manager. This is not so easy to figure * out though. The ParentHandle is not always NULL, because * there can be a function (=image) that will perform the task * for the boot manager. */ /* Part 1: Figure out if we need to add our program name. */ addprog = (args == NULL || img->ParentHandle == NULL || img->FilePath == NULL) ? 1 : 0; if (!addprog) { addprog = (DevicePathType(img->FilePath) != MEDIA_DEVICE_PATH || DevicePathSubType(img->FilePath) != MEDIA_FILEPATH_DP || DevicePathNodeLength(img->FilePath) <= sizeof(FILEPATH_DEVICE_PATH)) ? 1 : 0; if (!addprog) { /* XXX todo. */ } } /* Part 2: count words. */ argc = (addprog) ? 1 : 0; argp = args; while (argp != NULL && *argp != 0) { argp = arg_skipsep(argp); if (*argp == 0) break; argc++; argp = arg_skipword(argp); } /* Part 3: build vector. */ argv = malloc((argc + 1) * sizeof(CHAR16*)); argc = 0; if (addprog) argv[argc++] = (CHAR16 *)L"loader.efi"; argp = args; while (argp != NULL && *argp != 0) { argp = arg_skipsep(argp); if (*argp == 0) break; argv[argc++] = argp; argp = arg_skipword(argp); /* Terminate the words. */ if (*argp != 0) *argp++ = 0; } argv[argc] = NULL; status = main(argc, argv); efi_exit(status); return (status); } diff --git a/stand/i386/libi386/biosdisk.c b/stand/i386/libi386/biosdisk.c index 00d2de5de418..2c52617f255d 100644 --- a/stand/i386/libi386/biosdisk.c +++ b/stand/i386/libi386/biosdisk.c @@ -1,1381 +1,1398 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2012 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * BIOS disk device handling. * * Ideas and algorithms from: * * - NetBSD libi386/biosdisk.c * - FreeBSD biosboot/disk.c * */ #include #include #include #include #include #include #include #include #include #include #include "disk.h" #include "libi386.h" #define BIOS_NUMDRIVES 0x475 #define BIOSDISK_SECSIZE 512 #define BUFSIZE (1 * BIOSDISK_SECSIZE) #define DT_ATAPI 0x10 /* disk type for ATAPI floppies */ #define WDMAJOR 0 /* major numbers for devices we frontend for */ #define WFDMAJOR 1 #define FDMAJOR 2 #define DAMAJOR 4 #define ACDMAJOR 117 #define CDMAJOR 15 /* * INT13 commands */ #define CMD_RESET 0x0000 #define CMD_READ_CHS 0x0200 #define CMD_WRITE_CHS 0x0300 #define CMD_READ_PARAM 0x0800 #define CMD_DRIVE_TYPE 0x1500 #define CMD_CHECK_EDD 0x4100 #define CMD_READ_LBA 0x4200 #define CMD_WRITE_LBA 0x4300 #define CMD_EXT_PARAM 0x4800 #define CMD_CD_GET_STATUS 0x4b01 #define DISK_BIOS 0x13 #ifdef DISK_DEBUG #define DPRINTF(fmt, args...) printf("%s: " fmt "\n", __func__, ## args) #else #define DPRINTF(fmt, args...) ((void)0) #endif struct specification_packet { uint8_t sp_size; uint8_t sp_bootmedia; uint8_t sp_drive; uint8_t sp_controller; uint32_t sp_lba; uint16_t sp_devicespec; uint16_t sp_buffersegment; uint16_t sp_loadsegment; uint16_t sp_sectorcount; uint16_t sp_cylsec; uint8_t sp_head; uint8_t sp_dummy[16]; /* Avoid memory corruption */ }; /* * List of BIOS devices, translation from disk unit number to * BIOS unit number. */ typedef struct bdinfo { STAILQ_ENTRY(bdinfo) bd_link; /* link in device list */ int bd_unit; /* BIOS unit number */ int bd_cyl; /* BIOS geometry */ int bd_hds; int bd_sec; int bd_flags; #define BD_MODEINT13 0x0000 #define BD_MODEEDD1 0x0001 #define BD_MODEEDD3 0x0002 #define BD_MODEEDD (BD_MODEEDD1 | BD_MODEEDD3) #define BD_MODEMASK 0x0003 #define BD_FLOPPY 0x0004 #define BD_CDROM 0x0008 #define BD_NO_MEDIA 0x0010 int bd_type; /* BIOS 'drive type' (floppy only) */ uint16_t bd_sectorsize; /* Sector size */ uint64_t bd_sectors; /* Disk size */ int bd_open; /* reference counter */ void *bd_bcache; /* buffer cache data */ } bdinfo_t; #define BD_RD 0 #define BD_WR 1 typedef STAILQ_HEAD(bdinfo_list, bdinfo) bdinfo_list_t; static bdinfo_list_t fdinfo = STAILQ_HEAD_INITIALIZER(fdinfo); static bdinfo_list_t cdinfo = STAILQ_HEAD_INITIALIZER(cdinfo); static bdinfo_list_t hdinfo = STAILQ_HEAD_INITIALIZER(hdinfo); static void bd_io_workaround(bdinfo_t *); static int bd_io(struct disk_devdesc *, bdinfo_t *, daddr_t, int, caddr_t, int); static bool bd_int13probe(bdinfo_t *); static int bd_init(void); static int cd_init(void); static int fd_init(void); static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t size, char *buf, size_t *rsize); static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t size, char *buf, size_t *rsize); static int bd_open(struct open_file *f, ...); static int bd_close(struct open_file *f); static int bd_ioctl(struct open_file *f, u_long cmd, void *data); static int bd_print(int verbose); static int cd_print(int verbose); static int fd_print(int verbose); static void bd_reset_disk(int); static int bd_get_diskinfo_std(struct bdinfo *); struct devsw biosfd = { .dv_name = "fd", .dv_type = DEVT_FD, .dv_init = fd_init, .dv_strategy = bd_strategy, .dv_open = bd_open, .dv_close = bd_close, .dv_ioctl = bd_ioctl, .dv_print = fd_print, .dv_cleanup = NULL }; struct devsw bioscd = { .dv_name = "cd", .dv_type = DEVT_CD, .dv_init = cd_init, .dv_strategy = bd_strategy, .dv_open = bd_open, .dv_close = bd_close, .dv_ioctl = bd_ioctl, .dv_print = cd_print, .dv_cleanup = NULL }; struct devsw bioshd = { .dv_name = "disk", .dv_type = DEVT_DISK, .dv_init = bd_init, .dv_strategy = bd_strategy, .dv_open = bd_open, .dv_close = bd_close, .dv_ioctl = bd_ioctl, .dv_print = bd_print, .dv_cleanup = NULL }; static bdinfo_list_t * bd_get_bdinfo_list(struct devsw *dev) { if (dev->dv_type == DEVT_DISK) return (&hdinfo); if (dev->dv_type == DEVT_CD) return (&cdinfo); if (dev->dv_type == DEVT_FD) return (&fdinfo); return (NULL); } /* XXX this gets called way way too often, investigate */ static bdinfo_t * bd_get_bdinfo(struct devdesc *dev) { bdinfo_list_t *bdi; bdinfo_t *bd = NULL; int unit; bdi = bd_get_bdinfo_list(dev->d_dev); if (bdi == NULL) return (bd); unit = 0; STAILQ_FOREACH(bd, bdi, bd_link) { if (unit == dev->d_unit) return (bd); unit++; } return (bd); } /* * Translate between BIOS device numbers and our private unit numbers. */ int bd_bios2unit(int biosdev) { bdinfo_list_t *bdi[] = { &fdinfo, &cdinfo, &hdinfo, NULL }; bdinfo_t *bd; int i, unit; DPRINTF("looking for bios device 0x%x", biosdev); for (i = 0; bdi[i] != NULL; i++) { unit = 0; STAILQ_FOREACH(bd, bdi[i], bd_link) { if (bd->bd_unit == biosdev) { DPRINTF("bd unit %d is BIOS device 0x%x", unit, bd->bd_unit); return (unit); } unit++; } } return (-1); } int bd_unit2bios(struct i386_devdesc *dev) { bdinfo_list_t *bdi; bdinfo_t *bd; int unit; bdi = bd_get_bdinfo_list(dev->dd.d_dev); if (bdi == NULL) return (-1); unit = 0; STAILQ_FOREACH(bd, bdi, bd_link) { if (unit == dev->dd.d_unit) return (bd->bd_unit); unit++; } return (-1); } /* * Use INT13 AH=15 - Read Drive Type. */ static int fd_count(void) { int drive; for (drive = 0; drive < MAXBDDEV; drive++) { bd_reset_disk(drive); v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; v86.eax = CMD_DRIVE_TYPE; v86.edx = drive; v86int(); if (V86_CY(v86.efl)) break; if ((v86.eax & 0x300) == 0) break; } return (drive); } /* * Quiz the BIOS for disk devices, save a little info about them. */ static int fd_init(void) { int unit, numfd; bdinfo_t *bd; numfd = fd_count(); for (unit = 0; unit < numfd; unit++) { if ((bd = calloc(1, sizeof(*bd))) == NULL) break; bd->bd_sectorsize = BIOSDISK_SECSIZE; bd->bd_flags = BD_FLOPPY; bd->bd_unit = unit; /* Use std diskinfo for floppy drive */ if (bd_get_diskinfo_std(bd) != 0) { free(bd); break; } if (bd->bd_sectors == 0) bd->bd_flags |= BD_NO_MEDIA; printf("BIOS drive %c: is %s%d\n", ('A' + unit), biosfd.dv_name, unit); STAILQ_INSERT_TAIL(&fdinfo, bd, bd_link); } bcache_add_dev(unit); return (0); } static int bd_init(void) { int base, unit; bdinfo_t *bd; + TSENTER(); + base = 0x80; for (unit = 0; unit < *(unsigned char *)PTOV(BIOS_NUMDRIVES); unit++) { /* * Check the BIOS equipment list for number of fixed disks. */ if ((bd = calloc(1, sizeof(*bd))) == NULL) break; bd->bd_unit = base + unit; if (!bd_int13probe(bd)) { free(bd); break; } printf("BIOS drive %c: is %s%d\n", ('C' + unit), bioshd.dv_name, unit); STAILQ_INSERT_TAIL(&hdinfo, bd, bd_link); } bcache_add_dev(unit); + TSEXIT(); return (0); } /* * We can't quiz, we have to be told what device to use, so this function * doesn't do anything. Instead, the loader calls bc_add() with the BIOS * device number to add. */ static int cd_init(void) { return (0); } /* * Information from bootable CD-ROM. */ static int bd_get_diskinfo_cd(struct bdinfo *bd) { struct specification_packet bc_sp; int ret = -1; (void) memset(&bc_sp, 0, sizeof (bc_sp)); /* Set sp_size as per specification. */ bc_sp.sp_size = sizeof (bc_sp) - sizeof (bc_sp.sp_dummy); v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; v86.eax = CMD_CD_GET_STATUS; v86.edx = bd->bd_unit; v86.ds = VTOPSEG(&bc_sp); v86.esi = VTOPOFF(&bc_sp); v86int(); if ((v86.eax & 0xff00) == 0 && bc_sp.sp_drive == bd->bd_unit) { bd->bd_cyl = ((bc_sp.sp_cylsec & 0xc0) << 2) + ((bc_sp.sp_cylsec & 0xff00) >> 8) + 1; bd->bd_sec = bc_sp.sp_cylsec & 0x3f; bd->bd_hds = bc_sp.sp_head + 1; bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec; if (bc_sp.sp_bootmedia & 0x0F) { /* Floppy or hard-disk emulation */ bd->bd_sectorsize = BIOSDISK_SECSIZE; return (-1); } else { bd->bd_sectorsize = 2048; bd->bd_flags = BD_MODEEDD | BD_CDROM; ret = 0; } } /* * If this is the boot_drive, default to non-emulation bootable CD-ROM. */ if (ret != 0 && bd->bd_unit >= 0x88) { bd->bd_cyl = 0; bd->bd_hds = 1; bd->bd_sec = 15; bd->bd_sectorsize = 2048; bd->bd_flags = BD_MODEEDD | BD_CDROM; bd->bd_sectors = 0; ret = 0; } /* * Note we can not use bd_get_diskinfo_ext() nor bd_get_diskinfo_std() * here - some systems do get hung with those. */ /* * Still no size? use 7.961GB. The size does not really matter * as long as it is reasonably large to make our reads to pass * the sector count check. */ if (bd->bd_sectors == 0) bd->bd_sectors = 4173824; return (ret); } int bc_add(int biosdev) { bdinfo_t *bd; int nbcinfo = 0; if (!STAILQ_EMPTY(&cdinfo)) return (-1); if ((bd = calloc(1, sizeof(*bd))) == NULL) return (-1); bd->bd_unit = biosdev; if (bd_get_diskinfo_cd(bd) < 0) { free(bd); return (-1); } STAILQ_INSERT_TAIL(&cdinfo, bd, bd_link); printf("BIOS CD is cd%d\n", nbcinfo); nbcinfo++; bcache_add_dev(nbcinfo); /* register cd device in bcache */ return(0); } /* * Return EDD version or 0 if EDD is not supported on this drive. */ static int bd_check_extensions(int unit) { /* do not use ext calls for floppy devices */ if (unit < 0x80) return (0); /* Determine if we can use EDD with this device. */ v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; v86.eax = CMD_CHECK_EDD; v86.edx = unit; v86.ebx = EDD_QUERY_MAGIC; v86int(); if (V86_CY(v86.efl) || /* carry set */ (v86.ebx & 0xffff) != EDD_INSTALLED) /* signature */ return (0); /* extended disk access functions (AH=42h-44h,47h,48h) supported */ if ((v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0) return (0); return ((v86.eax >> 8) & 0xff); } static void bd_reset_disk(int unit) { /* reset disk */ v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; v86.eax = CMD_RESET; v86.edx = unit; v86int(); } /* * Read CHS info. Return 0 on success, error otherwise. */ static int bd_get_diskinfo_std(struct bdinfo *bd) { bzero(&v86, sizeof(v86)); v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; v86.eax = CMD_READ_PARAM; v86.edx = bd->bd_unit; v86int(); if (V86_CY(v86.efl) && ((v86.eax & 0xff00) != 0)) return ((v86.eax & 0xff00) >> 8); /* return custom error on absurd sector number */ if ((v86.ecx & 0x3f) == 0) return (0x60); bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1; /* Convert max head # -> # of heads */ bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1; bd->bd_sec = v86.ecx & 0x3f; bd->bd_type = v86.ebx; bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec; return (0); } /* * Read EDD info. Return 0 on success, error otherwise. * * Avoid stack corruption on some systems by adding extra bytes to * params block. */ static int bd_get_diskinfo_ext(struct bdinfo *bd) { struct disk_params { struct edd_params head; struct edd_device_path_v3 device_path; uint8_t dummy[16]; } __packed dparams; struct edd_params *params; uint64_t total; params = &dparams.head; /* Get disk params */ bzero(&dparams, sizeof(dparams)); params->len = sizeof(struct edd_params_v3); v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; v86.eax = CMD_EXT_PARAM; v86.edx = bd->bd_unit; v86.ds = VTOPSEG(&dparams); v86.esi = VTOPOFF(&dparams); v86int(); if (V86_CY(v86.efl) && ((v86.eax & 0xff00) != 0)) return ((v86.eax & 0xff00) >> 8); /* * Sector size must be a multiple of 512 bytes. * An alternate test would be to check power of 2, * powerof2(params.sector_size). * 16K is largest read buffer we can use at this time. */ if (params->sector_size >= 512 && params->sector_size <= 16384 && (params->sector_size % BIOSDISK_SECSIZE) == 0) bd->bd_sectorsize = params->sector_size; bd->bd_cyl = params->cylinders; bd->bd_hds = params->heads; bd->bd_sec = params->sectors_per_track; if (params->sectors != 0) { total = params->sectors; } else { total = (uint64_t)params->cylinders * params->heads * params->sectors_per_track; } bd->bd_sectors = total; return (0); } /* * Try to detect a device supported by the legacy int13 BIOS */ static bool bd_int13probe(bdinfo_t *bd) { int edd, ret; bd->bd_flags &= ~BD_NO_MEDIA; if ((bd->bd_flags & BD_CDROM) != 0) { return (bd_get_diskinfo_cd(bd) == 0); } edd = bd_check_extensions(bd->bd_unit); if (edd == 0) bd->bd_flags |= BD_MODEINT13; else if (edd < 0x30) bd->bd_flags |= BD_MODEEDD1; else bd->bd_flags |= BD_MODEEDD3; /* Default sector size */ if (bd->bd_sectorsize == 0) bd->bd_sectorsize = BIOSDISK_SECSIZE; /* * Test if the floppy device is present, so we can avoid receiving * bogus information from bd_get_diskinfo_std(). */ if (bd->bd_unit < 0x80) { /* reset disk */ bd_reset_disk(bd->bd_unit); /* Get disk type */ v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; v86.eax = CMD_DRIVE_TYPE; v86.edx = bd->bd_unit; v86int(); if (V86_CY(v86.efl) || (v86.eax & 0x300) == 0) return (false); } ret = 1; if (edd != 0) ret = bd_get_diskinfo_ext(bd); if (ret != 0 || bd->bd_sectors == 0) ret = bd_get_diskinfo_std(bd); if (ret != 0 && bd->bd_unit < 0x80) { /* Set defaults for 1.44 floppy */ bd->bd_cyl = 80; bd->bd_hds = 2; bd->bd_sec = 18; bd->bd_sectors = 2880; /* Since we are there, there most likely is no media */ bd->bd_flags |= BD_NO_MEDIA; ret = 0; } if (ret != 0) { if (bd->bd_sectors != 0 && edd != 0) { bd->bd_sec = 63; bd->bd_hds = 255; bd->bd_cyl = (bd->bd_sectors + bd->bd_sec * bd->bd_hds - 1) / bd->bd_sec * bd->bd_hds; } else { const char *dv_name; if ((bd->bd_flags & BD_FLOPPY) != 0) dv_name = biosfd.dv_name; else dv_name = bioshd.dv_name; printf("Can not get information about %s unit %#x\n", dv_name, bd->bd_unit); return (false); } } if (bd->bd_sec == 0) bd->bd_sec = 63; if (bd->bd_hds == 0) bd->bd_hds = 255; if (bd->bd_sectors == 0) bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec; DPRINTF("unit 0x%x geometry %d/%d/%d\n", bd->bd_unit, bd->bd_cyl, bd->bd_hds, bd->bd_sec); return (true); } static int bd_count(bdinfo_list_t *bdi) { bdinfo_t *bd; int i; i = 0; STAILQ_FOREACH(bd, bdi, bd_link) i++; return (i); } /* * Print information about disks */ static int bd_print_common(struct devsw *dev, bdinfo_list_t *bdi, int verbose) { char line[80]; struct disk_devdesc devd; bdinfo_t *bd; int i, ret = 0; char drive; if (STAILQ_EMPTY(bdi)) return (0); printf("%s devices:", dev->dv_name); if ((ret = pager_output("\n")) != 0) return (ret); i = -1; STAILQ_FOREACH(bd, bdi, bd_link) { i++; switch (dev->dv_type) { case DEVT_FD: drive = 'A'; break; case DEVT_CD: drive = 'C' + bd_count(&hdinfo); break; default: drive = 'C'; break; } snprintf(line, sizeof(line), " %s%d: BIOS drive %c (%s%ju X %u):\n", dev->dv_name, i, drive + i, (bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA ? "no media, " : "", (uintmax_t)bd->bd_sectors, bd->bd_sectorsize); if ((ret = pager_output(line)) != 0) break; if ((bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA) continue; if (dev->dv_type != DEVT_DISK) continue; devd.dd.d_dev = dev; devd.dd.d_unit = i; devd.d_slice = D_SLICENONE; devd.d_partition = D_PARTNONE; if (disk_open(&devd, bd->bd_sectorsize * bd->bd_sectors, bd->bd_sectorsize) == 0) { snprintf(line, sizeof(line), " %s%d", dev->dv_name, i); ret = disk_print(&devd, line, verbose); disk_close(&devd); if (ret != 0) break; } } return (ret); } static int fd_print(int verbose) { return (bd_print_common(&biosfd, &fdinfo, verbose)); } static int bd_print(int verbose) { return (bd_print_common(&bioshd, &hdinfo, verbose)); } static int cd_print(int verbose) { return (bd_print_common(&bioscd, &cdinfo, verbose)); } /* * Read disk size from partition. * This is needed to work around buggy BIOS systems returning * wrong (truncated) disk media size. * During bd_probe() we tested if the multiplication of bd_sectors * would overflow so it should be safe to perform here. */ static uint64_t bd_disk_get_sectors(struct disk_devdesc *dev) { bdinfo_t *bd; struct disk_devdesc disk; uint64_t size; bd = bd_get_bdinfo(&dev->dd); if (bd == NULL) return (0); disk.dd.d_dev = dev->dd.d_dev; disk.dd.d_unit = dev->dd.d_unit; disk.d_slice = D_SLICENONE; disk.d_partition = D_PARTNONE; disk.d_offset = 0; size = bd->bd_sectors * bd->bd_sectorsize; if (disk_open(&disk, size, bd->bd_sectorsize) == 0) { (void) disk_ioctl(&disk, DIOCGMEDIASIZE, &size); disk_close(&disk); } return (size / bd->bd_sectorsize); } /* * Attempt to open the disk described by (dev) for use by (f). * * Note that the philosophy here is "give them exactly what * they ask for". This is necessary because being too "smart" * about what the user might want leads to complications. * (eg. given no slice or partition value, with a disk that is * sliced - are they after the first BSD slice, or the DOS * slice before it?) */ static int bd_open(struct open_file *f, ...) { bdinfo_t *bd; struct disk_devdesc *dev; va_list ap; int rc; + TSENTER(); + va_start(ap, f); dev = va_arg(ap, struct disk_devdesc *); va_end(ap); bd = bd_get_bdinfo(&dev->dd); if (bd == NULL) return (EIO); if ((bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA) { if (!bd_int13probe(bd)) return (EIO); if ((bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA) return (EIO); } if (bd->bd_bcache == NULL) bd->bd_bcache = bcache_allocate(); if (bd->bd_open == 0) bd->bd_sectors = bd_disk_get_sectors(dev); bd->bd_open++; rc = 0; if (dev->dd.d_dev->dv_type == DEVT_DISK) { rc = disk_open(dev, bd->bd_sectors * bd->bd_sectorsize, bd->bd_sectorsize); if (rc != 0) { bd->bd_open--; if (bd->bd_open == 0) { bcache_free(bd->bd_bcache); bd->bd_bcache = NULL; } } } + TSEXIT(); return (rc); } static int bd_close(struct open_file *f) { struct disk_devdesc *dev; bdinfo_t *bd; int rc = 0; dev = (struct disk_devdesc *)f->f_devdata; bd = bd_get_bdinfo(&dev->dd); if (bd == NULL) return (EIO); bd->bd_open--; if (bd->bd_open == 0) { bcache_free(bd->bd_bcache); bd->bd_bcache = NULL; } if (dev->dd.d_dev->dv_type == DEVT_DISK) rc = disk_close(dev); return (rc); } static int bd_ioctl(struct open_file *f, u_long cmd, void *data) { bdinfo_t *bd; struct disk_devdesc *dev; int rc; dev = (struct disk_devdesc *)f->f_devdata; bd = bd_get_bdinfo(&dev->dd); if (bd == NULL) return (EIO); if (dev->dd.d_dev->dv_type == DEVT_DISK) { rc = disk_ioctl(dev, cmd, data); if (rc != ENOTTY) return (rc); } switch (cmd) { case DIOCGSECTORSIZE: *(uint32_t *)data = bd->bd_sectorsize; break; case DIOCGMEDIASIZE: *(uint64_t *)data = bd->bd_sectors * bd->bd_sectorsize; break; default: return (ENOTTY); } return (0); } static int bd_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) { bdinfo_t *bd; struct bcache_devdata bcd; struct disk_devdesc *dev; daddr_t offset; dev = (struct disk_devdesc *)devdata; bd = bd_get_bdinfo(&dev->dd); if (bd == NULL) return (EINVAL); bcd.dv_strategy = bd_realstrategy; bcd.dv_devdata = devdata; bcd.dv_cache = bd->bd_bcache; offset = 0; if (dev->dd.d_dev->dv_type == DEVT_DISK) { offset = dev->d_offset * bd->bd_sectorsize; offset /= BIOSDISK_SECSIZE; } return (bcache_strategy(&bcd, rw, dblk + offset, size, buf, rsize)); } static int bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) { struct disk_devdesc *dev = (struct disk_devdesc *)devdata; bdinfo_t *bd; uint64_t disk_blocks, offset, d_offset; size_t blks, blkoff, bsize, bio_size, rest; caddr_t bbuf = NULL; int rc; bd = bd_get_bdinfo(&dev->dd); if (bd == NULL || (bd->bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA) return (EIO); /* * First make sure the IO size is a multiple of 512 bytes. While we do * process partial reads below, the strategy mechanism is built * assuming IO is a multiple of 512B blocks. If the request is not * a multiple of 512B blocks, it has to be some sort of bug. */ if (size == 0 || (size % BIOSDISK_SECSIZE) != 0) { printf("bd_strategy: %d bytes I/O not multiple of %d\n", size, BIOSDISK_SECSIZE); return (EIO); } DPRINTF("open_disk %p", dev); offset = dblk * BIOSDISK_SECSIZE; dblk = offset / bd->bd_sectorsize; blkoff = offset % bd->bd_sectorsize; /* * Check the value of the size argument. We do have quite small * heap (64MB), but we do not know good upper limit, so we check against * INT_MAX here. This will also protect us against possible overflows * while translating block count to bytes. */ if (size > INT_MAX) { DPRINTF("too large I/O: %zu bytes", size); return (EIO); } blks = size / bd->bd_sectorsize; if (blks == 0 || (size % bd->bd_sectorsize) != 0) blks++; if (dblk > dblk + blks) return (EIO); if (rsize) *rsize = 0; /* * Get disk blocks, this value is either for whole disk or for * partition. */ d_offset = 0; disk_blocks = 0; if (dev->dd.d_dev->dv_type == DEVT_DISK) { if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks) == 0) { /* DIOCGMEDIASIZE does return bytes. */ disk_blocks /= bd->bd_sectorsize; } d_offset = dev->d_offset; } if (disk_blocks == 0) disk_blocks = bd->bd_sectors - d_offset; /* Validate source block address. */ if (dblk < d_offset || dblk >= d_offset + disk_blocks) return (EIO); /* * Truncate if we are crossing disk or partition end. */ if (dblk + blks >= d_offset + disk_blocks) { blks = d_offset + disk_blocks - dblk; size = blks * bd->bd_sectorsize; DPRINTF("short I/O %d", blks); } bio_size = min(BIO_BUFFER_SIZE, size); while (bio_size > bd->bd_sectorsize) { bbuf = bio_alloc(bio_size); if (bbuf != NULL) break; bio_size -= bd->bd_sectorsize; } if (bbuf == NULL) { bio_size = V86_IO_BUFFER_SIZE; if (bio_size / bd->bd_sectorsize == 0) panic("BUG: Real mode buffer is too small"); /* Use alternate 4k buffer */ bbuf = PTOV(V86_IO_BUFFER); } rest = size; rc = 0; while (blks > 0) { int x = min(blks, bio_size / bd->bd_sectorsize); switch (rw & F_MASK) { case F_READ: DPRINTF("read %d from %lld to %p", x, dblk, buf); bsize = bd->bd_sectorsize * x - blkoff; if (rest < bsize) bsize = rest; if ((rc = bd_io(dev, bd, dblk, x, bbuf, BD_RD)) != 0) { rc = EIO; goto error; } bcopy(bbuf + blkoff, buf, bsize); break; case F_WRITE : DPRINTF("write %d from %lld to %p", x, dblk, buf); if (blkoff != 0) { /* * We got offset to sector, read 1 sector to * bbuf. */ x = 1; bsize = bd->bd_sectorsize - blkoff; bsize = min(bsize, rest); rc = bd_io(dev, bd, dblk, x, bbuf, BD_RD); } else if (rest < bd->bd_sectorsize) { /* * The remaining block is not full * sector. Read 1 sector to bbuf. */ x = 1; bsize = rest; rc = bd_io(dev, bd, dblk, x, bbuf, BD_RD); } else { /* We can write full sector(s). */ bsize = bd->bd_sectorsize * x; } /* * Put your Data In, Put your Data out, * Put your Data In, and shake it all about */ bcopy(buf, bbuf + blkoff, bsize); if ((rc = bd_io(dev, bd, dblk, x, bbuf, BD_WR)) != 0) { rc = EIO; goto error; } break; default: /* DO NOTHING */ rc = EROFS; goto error; } blkoff = 0; buf += bsize; rest -= bsize; blks -= x; dblk += x; } if (rsize != NULL) *rsize = size; error: if (bbuf != PTOV(V86_IO_BUFFER)) bio_free(bbuf, bio_size); return (rc); } static int bd_edd_io(bdinfo_t *bd, daddr_t dblk, int blks, caddr_t dest, int dowrite) { static struct edd_packet packet; + TSENTER(); + packet.len = sizeof(struct edd_packet); packet.count = blks; packet.off = VTOPOFF(dest); packet.seg = VTOPSEG(dest); packet.lba = dblk; v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; if (dowrite == BD_WR) v86.eax = CMD_WRITE_LBA; /* maybe Write with verify 0x4302? */ else v86.eax = CMD_READ_LBA; v86.edx = bd->bd_unit; v86.ds = VTOPSEG(&packet); v86.esi = VTOPOFF(&packet); v86int(); if (V86_CY(v86.efl)) return (v86.eax >> 8); + + TSEXIT(); return (0); } static int bd_chs_io(bdinfo_t *bd, daddr_t dblk, int blks, caddr_t dest, int dowrite) { uint32_t x, bpc, cyl, hd, sec; + TSENTER(); + bpc = bd->bd_sec * bd->bd_hds; /* blocks per cylinder */ x = dblk; cyl = x / bpc; /* block # / blocks per cylinder */ x %= bpc; /* block offset into cylinder */ hd = x / bd->bd_sec; /* offset / blocks per track */ sec = x % bd->bd_sec; /* offset into track */ /* correct sector number for 1-based BIOS numbering */ sec++; if (cyl > 1023) { /* CHS doesn't support cylinders > 1023. */ return (1); } v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; if (dowrite == BD_WR) v86.eax = CMD_WRITE_CHS | blks; else v86.eax = CMD_READ_CHS | blks; v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec; v86.edx = (hd << 8) | bd->bd_unit; v86.es = VTOPSEG(dest); v86.ebx = VTOPOFF(dest); v86int(); if (V86_CY(v86.efl)) return (v86.eax >> 8); + TSEXIT(); return (0); } static void bd_io_workaround(bdinfo_t *bd) { uint8_t buf[8 * 1024]; bd_edd_io(bd, 0xffffffff, 1, (caddr_t)buf, BD_RD); } static int bd_io(struct disk_devdesc *dev, bdinfo_t *bd, daddr_t dblk, int blks, caddr_t dest, int dowrite) { int result, retry; + TSENTER(); + /* Just in case some idiot actually tries to read/write -1 blocks... */ if (blks < 0) return (-1); /* * Workaround for a problem with some HP ProLiant BIOS failing to work * out the boot disk after installation. hrs and kuriyama discovered * this problem with an HP ProLiant DL320e Gen 8 with a 3TB HDD, and * discovered that an int13h call seems to cause a buffer overrun in * the bios. The problem is alleviated by doing an extra read before * the buggy read. It is not immediately known whether other models * are similarly affected. * Loop retrying the operation a couple of times. The BIOS * may also retry. */ if (dowrite == BD_RD && dblk >= 0x100000000) bd_io_workaround(bd); for (retry = 0; retry < 3; retry++) { if (bd->bd_flags & BD_MODEEDD) result = bd_edd_io(bd, dblk, blks, dest, dowrite); else result = bd_chs_io(bd, dblk, blks, dest, dowrite); if (result == 0) { if (bd->bd_flags & BD_NO_MEDIA) bd->bd_flags &= ~BD_NO_MEDIA; break; } bd_reset_disk(bd->bd_unit); /* * Error codes: * 20h controller failure * 31h no media in drive (IBM/MS INT 13 extensions) * 80h no media in drive, VMWare (Fusion) * There is no reason to repeat the IO with errors above. */ if (result == 0x20 || result == 0x31 || result == 0x80) { bd->bd_flags |= BD_NO_MEDIA; break; } } if (result != 0 && (bd->bd_flags & BD_NO_MEDIA) == 0) { if (dowrite == BD_WR) { printf("%s%d: Write %d sector(s) from %p (0x%x) " "to %lld: 0x%x\n", dev->dd.d_dev->dv_name, dev->dd.d_unit, blks, dest, VTOP(dest), dblk, result); } else { printf("%s%d: Read %d sector(s) from %lld to %p " "(0x%x): 0x%x\n", dev->dd.d_dev->dv_name, dev->dd.d_unit, blks, dblk, dest, VTOP(dest), result); } } + TSEXIT(); + return (result); } /* * Return the BIOS geometry of a given "fixed drive" in a format * suitable for the legacy bootinfo structure. Since the kernel is * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we * prefer to get the information directly, rather than rely on being * able to put it together from information already maintained for * different purposes and for a probably different number of drives. * * For valid drives, the geometry is expected in the format (31..0) * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are * indicated by returning the geometry of a "1.2M" PC-format floppy * disk. And, incidentally, what is returned is not the geometry as * such but the highest valid cylinder, head, and sector numbers. */ uint32_t bd_getbigeom(int bunit) { v86.ctl = V86_FLAGS; v86.addr = DISK_BIOS; v86.eax = CMD_READ_PARAM; v86.edx = 0x80 + bunit; v86int(); if (V86_CY(v86.efl)) return (0x4f010f); return (((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) | (v86.edx & 0xff00) | (v86.ecx & 0x3f)); } /* * Return a suitable dev_t value for (dev). * * In the case where it looks like (dev) is a SCSI disk, we allow the number of * IDE disks to be specified in $num_ide_disks. There should be a Better Way. */ int bd_getdev(struct i386_devdesc *d) { struct disk_devdesc *dev; bdinfo_t *bd; int biosdev; int major; int rootdev; char *nip, *cp; int i, unit, slice, partition; /* XXX: Assume partition 'a'. */ slice = 0; partition = 0; dev = (struct disk_devdesc *)d; bd = bd_get_bdinfo(&dev->dd); if (bd == NULL) return (-1); biosdev = bd_unit2bios(d); DPRINTF("unit %d BIOS device %d", dev->dd.d_unit, biosdev); if (biosdev == -1) /* not a BIOS device */ return (-1); if (dev->dd.d_dev->dv_type == DEVT_DISK) { if (disk_open(dev, bd->bd_sectors * bd->bd_sectorsize, bd->bd_sectorsize) != 0) /* oops, not a viable device */ return (-1); else disk_close(dev); slice = dev->d_slice + 1; partition = dev->d_partition; } if (biosdev < 0x80) { /* floppy (or emulated floppy) or ATAPI device */ if (bd->bd_type == DT_ATAPI) { /* is an ATAPI disk */ major = WFDMAJOR; } else { /* is a floppy disk */ major = FDMAJOR; } } else { /* assume an IDE disk */ major = WDMAJOR; } /* default root disk unit number */ unit = biosdev & 0x7f; if (dev->dd.d_dev->dv_type == DEVT_CD) { /* * XXX: Need to examine device spec here to figure out if * SCSI or ATAPI. No idea on how to figure out device number. * All we can really pass to the kernel is what bus and device * on which bus we were booted from, which dev_t isn't well * suited to since those number don't match to unit numbers * very well. We may just need to engage in a hack where * we pass -C to the boot args if we are the boot device. */ major = ACDMAJOR; unit = 0; /* XXX */ } /* XXX a better kludge to set the root disk unit number */ if ((nip = getenv("root_disk_unit")) != NULL) { i = strtol(nip, &cp, 0); /* check for parse error */ if ((cp != nip) && (*cp == 0)) unit = i; } rootdev = MAKEBOOTDEV(major, slice, unit, partition); DPRINTF("dev is 0x%x\n", rootdev); return (rootdev); } diff --git a/stand/i386/loader/main.c b/stand/i386/loader/main.c index a595340d3a13..6b81ef411f7f 100644 --- a/stand/i386/loader/main.c +++ b/stand/i386/loader/main.c @@ -1,458 +1,464 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * MD bootstrap main() and assorted miscellaneous * commands. */ #include #include #include #include #include #include #include #include #include #include "bootstrap.h" #include "common/bootargs.h" #include "libi386/libi386.h" #include #include "btxv86.h" #ifdef LOADER_ZFS_SUPPORT #include #include "libzfs.h" #endif CTASSERT(sizeof(struct bootargs) == BOOTARGS_SIZE); CTASSERT(offsetof(struct bootargs, bootinfo) == BA_BOOTINFO); CTASSERT(offsetof(struct bootargs, bootflags) == BA_BOOTFLAGS); CTASSERT(offsetof(struct bootinfo, bi_size) == BI_SIZE); /* Arguments passed in from the boot1/boot2 loader */ static struct bootargs *kargs; static uint32_t initial_howto; static uint32_t initial_bootdev; static struct bootinfo *initial_bootinfo; struct arch_switch archsw; /* MI/MD interface boundary */ static void extract_currdev(void); static int isa_inb(int port); static void isa_outb(int port, int value); void exit(int code); #ifdef LOADER_GELI_SUPPORT #include "geliboot.h" struct geli_boot_args *gargs; struct geli_boot_data *gbdata; #endif #ifdef LOADER_ZFS_SUPPORT struct zfs_boot_args *zargs; static void i386_zfs_probe(void); #endif /* XXX debugging */ extern char end[]; static void *heap_top; static void *heap_bottom; caddr_t ptov(uintptr_t x) { return (PTOV(x)); } int main(void) { int i; /* Pick up arguments */ kargs = (void *)__args; initial_howto = kargs->howto; initial_bootdev = kargs->bootdev; initial_bootinfo = kargs->bootinfo ? (struct bootinfo *)PTOV(kargs->bootinfo) : NULL; /* Initialize the v86 register set to a known-good state. */ bzero(&v86, sizeof(v86)); v86.efl = PSL_RESERVED_DEFAULT | PSL_I; /* * Initialise the heap as early as possible. * Once this is done, malloc() is usable. */ bios_getmem(); #if defined(LOADER_BZIP2_SUPPORT) || defined(LOADER_FIREWIRE_SUPPORT) || \ defined(LOADER_GPT_SUPPORT) || defined(LOADER_ZFS_SUPPORT) if (high_heap_size > 0) { heap_top = PTOV(high_heap_base + high_heap_size); heap_bottom = PTOV(high_heap_base); if (high_heap_base < memtop_copyin) memtop_copyin = high_heap_base; } else #endif { heap_top = (void *)PTOV(bios_basemem); heap_bottom = (void *)end; } setheap(heap_bottom, heap_top); + /* + * Now that malloc is usable, allocate a buffer for tslog and start + * logging timestamps during the boot process. + */ + tslog_init(); + /* * detect ACPI for future reference. This may set console to comconsole * if we do have ACPI SPCR table. */ biosacpi_detect(); /* * XXX Chicken-and-egg problem; we want to have console output early, * but some console attributes may depend on reading from eg. the boot * device, which we can't do yet. * * We can use printf() etc. once this is done. * If the previous boot stage has requested a serial console, * prefer that. */ bi_setboothowto(initial_howto); if (initial_howto & RB_MULTIPLE) { if (initial_howto & RB_SERIAL) setenv("console", "comconsole vidconsole", 1); else setenv("console", "vidconsole comconsole", 1); } else if (initial_howto & RB_SERIAL) { setenv("console", "comconsole", 1); } else if (initial_howto & RB_MUTE) { setenv("console", "nullconsole", 1); } cons_probe(); /* * Initialise the block cache. Set the upper limit. */ bcache_init(32768, 512); /* * Special handling for PXE and CD booting. */ if (kargs->bootinfo == 0) { /* * We only want the PXE disk to try to init itself in the below * walk through devsw if we actually booted off of PXE. */ if (kargs->bootflags & KARGS_FLAGS_PXE) pxe_enable(kargs->pxeinfo ? PTOV(kargs->pxeinfo) : NULL); else if (kargs->bootflags & KARGS_FLAGS_CD) bc_add(initial_bootdev); } archsw.arch_autoload = i386_autoload; archsw.arch_getdev = i386_getdev; archsw.arch_copyin = i386_copyin; archsw.arch_copyout = i386_copyout; archsw.arch_readin = i386_readin; archsw.arch_isainb = isa_inb; archsw.arch_isaoutb = isa_outb; archsw.arch_hypervisor = x86_hypervisor; #ifdef LOADER_ZFS_SUPPORT archsw.arch_zfs_probe = i386_zfs_probe; /* * zfsboot and gptzfsboot have always passed KARGS_FLAGS_ZFS, * so if that is set along with KARGS_FLAGS_EXTARG we know we * can interpret the extarg data as a struct zfs_boot_args. */ #define KARGS_EXTARGS_ZFS (KARGS_FLAGS_EXTARG | KARGS_FLAGS_ZFS) if ((kargs->bootflags & KARGS_EXTARGS_ZFS) == KARGS_EXTARGS_ZFS) { zargs = (struct zfs_boot_args *)(kargs + 1); } #endif /* LOADER_ZFS_SUPPORT */ #ifdef LOADER_GELI_SUPPORT /* * If we decided earlier that we have zfs_boot_args extarg data, * and it is big enough to contain the embedded geli data * (the early zfs_boot_args structs weren't), then init the gbdata * pointer accordingly. If there is extarg data which isn't * zfs_boot_args data, determine whether it is geli_boot_args data. * Recent versions of gptboot set KARGS_FLAGS_GELI to indicate that. * Earlier versions didn't, but we presume that's what we * have if the extarg size exactly matches the size of the * geli_boot_args struct during that pre-flag era. */ #define LEGACY_GELI_ARGS_SIZE 260 /* This can never change */ #ifdef LOADER_ZFS_SUPPORT if (zargs != NULL) { if (zargs->size > offsetof(struct zfs_boot_args, gelidata)) { gbdata = &zargs->gelidata; } } else #endif /* LOADER_ZFS_SUPPORT */ if ((kargs->bootflags & KARGS_FLAGS_EXTARG) != 0) { gargs = (struct geli_boot_args *)(kargs + 1); if ((kargs->bootflags & KARGS_FLAGS_GELI) || gargs->size == LEGACY_GELI_ARGS_SIZE) { gbdata = &gargs->gelidata; } } if (gbdata != NULL) import_geli_boot_data(gbdata); #endif /* LOADER_GELI_SUPPORT */ /* * March through the device switch probing for things. */ for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); printf("BIOS %dkB/%dkB available memory\n", bios_basemem / 1024, bios_extmem / 1024); if (initial_bootinfo != NULL) { initial_bootinfo->bi_basemem = bios_basemem / 1024; initial_bootinfo->bi_extmem = bios_extmem / 1024; } /* detect SMBIOS for future reference */ smbios_detect(NULL); /* detect PCI BIOS for future reference */ biospci_detect(); printf("\n%s", bootprog_info); extract_currdev(); /* set $currdev and $loaddev */ autoload_font(true); bios_getsmap(); interact(); /* if we ever get here, it is an error */ return (1); } /* * Set the 'current device' by (if possible) recovering the boot device as * supplied by the initial bootstrap. * * XXX should be extended for netbooting. */ static void extract_currdev(void) { struct i386_devdesc new_currdev; #ifdef LOADER_ZFS_SUPPORT char buf[20]; char *bootonce; #endif int biosdev = -1; /* Assume we are booting from a BIOS disk by default */ new_currdev.dd.d_dev = &bioshd; /* new-style boot loaders such as pxeldr and cdldr */ if (kargs->bootinfo == 0) { if ((kargs->bootflags & KARGS_FLAGS_CD) != 0) { /* we are booting from a CD with cdboot */ new_currdev.dd.d_dev = &bioscd; new_currdev.dd.d_unit = bd_bios2unit(initial_bootdev); } else if ((kargs->bootflags & KARGS_FLAGS_PXE) != 0) { /* we are booting from pxeldr */ new_currdev.dd.d_dev = &pxedisk; new_currdev.dd.d_unit = 0; } else { /* we don't know what our boot device is */ new_currdev.d_kind.biosdisk.slice = -1; new_currdev.d_kind.biosdisk.partition = 0; biosdev = -1; } #ifdef LOADER_ZFS_SUPPORT } else if ((kargs->bootflags & KARGS_FLAGS_ZFS) != 0) { /* * zargs was set in main() if we have new style extended * argument */ if (zargs != NULL && zargs->size >= offsetof(struct zfs_boot_args, primary_pool)) { /* sufficient data is provided */ new_currdev.d_kind.zfs.pool_guid = zargs->pool; new_currdev.d_kind.zfs.root_guid = zargs->root; if (zargs->size >= sizeof(*zargs) && zargs->primary_vdev != 0) { sprintf(buf, "%llu", zargs->primary_pool); setenv("vfs.zfs.boot.primary_pool", buf, 1); sprintf(buf, "%llu", zargs->primary_vdev); setenv("vfs.zfs.boot.primary_vdev", buf, 1); } } else { /* old style zfsboot block */ new_currdev.d_kind.zfs.pool_guid = kargs->zfspool; new_currdev.d_kind.zfs.root_guid = 0; } new_currdev.dd.d_dev = &zfs_dev; if ((bootonce = malloc(VDEV_PAD_SIZE)) != NULL) { if (zfs_get_bootonce(&new_currdev, OS_BOOTONCE_USED, bootonce, VDEV_PAD_SIZE) == 0) { setenv("zfs-bootonce", bootonce, 1); } free(bootonce); (void) zfs_attach_nvstore(&new_currdev); } #endif } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) { /* The passed-in boot device is bad */ new_currdev.d_kind.biosdisk.slice = -1; new_currdev.d_kind.biosdisk.partition = 0; biosdev = -1; } else { new_currdev.d_kind.biosdisk.slice = B_SLICE(initial_bootdev) - 1; new_currdev.d_kind.biosdisk.partition = B_PARTITION(initial_bootdev); biosdev = initial_bootinfo->bi_bios_dev; /* * If we are booted by an old bootstrap, we have to guess at * the BIOS unit number. We will lose if there is more than * one disk type and we are not booting from the * lowest-numbered disk type (ie. SCSI when IDE also exists). */ if ((biosdev == 0) && (B_TYPE(initial_bootdev) != 2)) { /* * biosdev doesn't match major, assume harddisk */ biosdev = 0x80 + B_UNIT(initial_bootdev); } } /* * If we are booting off of a BIOS disk and we didn't succeed * in determining which one we booted off of, just use disk0: * as a reasonable default. */ if ((new_currdev.dd.d_dev->dv_type == bioshd.dv_type) && ((new_currdev.dd.d_unit = bd_bios2unit(biosdev)) == -1)) { printf("Can't work out which disk we are booting " "from.\nGuessed BIOS device 0x%x not found by " "probes, defaulting to disk0:\n", biosdev); new_currdev.dd.d_unit = 0; } #ifdef LOADER_ZFS_SUPPORT if (new_currdev.dd.d_dev->dv_type == DEVT_ZFS) init_zfs_boot_options(zfs_fmtdev(&new_currdev)); #endif env_setenv("currdev", EV_VOLATILE, i386_fmtdev(&new_currdev), i386_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, i386_fmtdev(&new_currdev), env_noset, env_nounset); } COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int command_reboot(int argc, char *argv[]) { int i; for (i = 0; devsw[i] != NULL; ++i) if (devsw[i]->dv_cleanup != NULL) (devsw[i]->dv_cleanup)(); printf("Rebooting...\n"); delay(1000000); __exit(0); } /* provide this for panic, as it's not in the startup code */ void exit(int code) { __exit(code); } COMMAND_SET(heap, "heap", "show heap usage", command_heap); static int command_heap(int argc, char *argv[]) { mallocstats(); printf("heap base at %p, top at %p, upper limit at %p\n", heap_bottom, sbrk(0), heap_top); return (CMD_OK); } /* ISA bus access functions for PnP. */ static int isa_inb(int port) { return (inb(port)); } static void isa_outb(int port, int value) { outb(port, value); } #ifdef LOADER_ZFS_SUPPORT static void i386_zfs_probe(void) { char devname[32]; struct i386_devdesc dev; /* * Open all the disks we can find and see if we can reconstruct * ZFS pools from them. */ dev.dd.d_dev = &bioshd; for (dev.dd.d_unit = 0; bd_unit2bios(&dev) >= 0; dev.dd.d_unit++) { snprintf(devname, sizeof(devname), "%s%d:", bioshd.dv_name, dev.dd.d_unit); zfs_probe_dev(devname, NULL); } } #endif diff --git a/stand/libsa/Makefile b/stand/libsa/Makefile index 838fefb260a8..815f479b5a0a 100644 --- a/stand/libsa/Makefile +++ b/stand/libsa/Makefile @@ -1,191 +1,191 @@ # $FreeBSD$ # Originally from $NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $ # # Notes: # - We don't use the libc strerror/sys_errlist because the string table is # quite large. # .include LIBSA_CPUARCH?=${MACHINE_CPUARCH} LIB?= sa # standalone components and stuff we have modified locally SRCS+= gzguts.h zutil.h __main.c abort.c assert.c bcd.c environment.c \ getopt.c gets.c globals.c \ hexdump.c pager.c panic.c printf.c strdup.c strerror.c \ - random.c sbrk.c twiddle.c zalloc.c zalloc_malloc.c + random.c sbrk.c tslog.c twiddle.c zalloc.c zalloc_malloc.c # private (pruned) versions of libc string functions SRCS+= strcasecmp.c .PATH: ${LIBCSRC}/net SRCS+= ntoh.c # string functions from libc .PATH: ${LIBCSRC}/string SRCS+= bcmp.c bcopy.c bzero.c ffs.c fls.c \ memccpy.c memchr.c memcmp.c memcpy.c memmove.c memset.c \ strcat.c strchr.c strchrnul.c strcmp.c strcpy.c stpcpy.c stpncpy.c \ strcspn.c strlcat.c strlcpy.c strlen.c strncat.c strncmp.c strncpy.c \ strnlen.c strpbrk.c strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c # stdlib functions from libc .PATH: ${LIBCSRC}/stdlib SRCS+= abs.c strtol.c strtoll.c strtoul.c strtoull.c # common boot code .PATH: ${SYSDIR}/kern SRCS+= subr_boot.c .if ${MACHINE_CPUARCH} == "arm" .PATH: ${LIBCSRC}/arm/gen # Do not generate movt/movw, because the relocation fixup for them does not # translate to the -Bsymbolic -pie format required by self_reloc() in loader(8). # Also, the fpu is not available in a standalone environment. CFLAGS.clang+= -mno-movt CFLAGS.clang+= -mfpu=none .PATH: ${SRCTOP}/contrib/llvm-project/compiler-rt/lib/builtins/arm/ SRCS+= aeabi_idivmod.S aeabi_ldivmod.S aeabi_uidivmod.S aeabi_uldivmod.S SRCS+= aeabi_memcmp.S aeabi_memcpy.S aeabi_memmove.S aeabi_memset.S .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv" .PATH: ${LIBCSRC}/${MACHINE_CPUARCH}/gen .endif # Compiler support functions .PATH: ${SRCTOP}/contrib/llvm-project/compiler-rt/lib/builtins/ # __clzsi2 and ctzsi2 for various builtin functions SRCS+= clzsi2.c ctzsi2.c # Divide and modulus functions called by the compiler SRCS+= divmoddi4.c divmodsi4.c divdi3.c divsi3.c moddi3.c modsi3.c SRCS+= udivmoddi4.c udivmodsi4.c udivdi3.c udivsi3.c umoddi3.c umodsi3.c SRCS+= ashldi3.c ashrdi3.c lshrdi3.c .if ${MACHINE_CPUARCH:Namd64:Ni386} == "" .PATH: ${SASRC}/x86 SRCS+= hypervisor.c .endif .if ${MACHINE_CPUARCH} == "powerpc" SRCS+= syncicache.c .endif # uuid functions from libc .PATH: ${LIBCSRC}/uuid SRCS+= uuid_create_nil.c uuid_equal.c uuid_from_string.c uuid_is_nil.c uuid_to_string.c # _setjmp/_longjmp .PATH: ${SASRC}/${LIBSA_CPUARCH} SRCS+= _setjmp.S # decompression functionality from libbz2 # NOTE: to actually test this functionality after libbz2 upgrade compile # loader(8) with LOADER_BZIP2_SUPPORT defined .PATH: ${SRCTOP}/contrib/bzip2 CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS SRCS+=bzlib.c crctable.c decompress.c huffman.c randtable.c # decompression functionality from zlib .PATH: ${SRCTOP}/sys/contrib/zlib CFLAGS+=-DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib SRCS+= adler32.c crc32.c SRCS+= infback.c inffast.c inflate.c inftrees.c zutil.c # lz4 decompression functionality .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 SRCS+= lz4.c CFLAGS.lz4.c+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 # Create a subset of includes that are safe, as well as adjusting those that aren't # The lists may drive people nuts, but they are explicitly opt-in FAKE_DIRS=xlocale arpa SAFE_INCS=a.out.h assert.h elf.h limits.h nlist.h setjmp.h stddef.h stdbool.h string.h strings.h time.h unistd.h uuid.h STAND_H_INC=ctype.h fcntl.h signal.h stdio.h stdlib.h OTHER_INC=stdarg.h errno.h stdint.h beforedepend: mkdir -p ${FAKE_DIRS}; \ for i in ${SAFE_INCS}; do \ ln -sf ${SRCTOP}/include/$$i $$i; \ done; \ ln -sf ${SYSDIR}/${MACHINE}/include/stdarg.h stdarg.h; \ ln -sf ${SYSDIR}/sys/errno.h errno.h; \ ln -sf ${SYSDIR}/sys/stdint.h stdint.h; \ ln -sf ${SRCTOP}/include/arpa/inet.h arpa/inet.h; \ ln -sf ${SRCTOP}/include/arpa/tftp.h arpa/tftp.h; \ for i in _time.h _strings.h _string.h; do \ [ -f xlocale/$$i ] || :> xlocale/$$i; \ done; \ for i in ${STAND_H_INC}; do \ ln -sf ${SASRC}/stand.h $$i; \ done CLEANDIRS+=${FAKE_DIRS} CLEANFILES+= ${SAFE_INCS} ${STAND_H_INC} ${OTHER_INC} # io routines SRCS+= closeall.c dev.c ioctl.c nullfs.c stat.c \ fstat.c close.c lseek.c open.c read.c write.c readdir.c # SMBios routines SRCS+= smbios.c .if !defined(BOOT_HIDE_SERIAL_NUMBERS) # Export serial numbers, UUID, and asset tag from loader. CFLAGS.smbios.c+= -DSMBIOS_SERIAL_NUMBERS .if defined(BOOT_LITTLE_ENDIAN_UUID) # Use little-endian UUID format as defined in SMBIOS 2.6. CFLAGS.smbios.c+= -DSMBIOS_LITTLE_ENDIAN_UUID .elif defined(BOOT_NETWORK_ENDIAN_UUID) # Use network-endian UUID format for backward compatibility. CFLAGS.smbios.c+= -DSMBIOS_NETWORK_ENDIAN_UUID .endif .endif # network routines SRCS+= arp.c ether.c ip.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c # network info services: SRCS+= bootp.c rarp.c bootparam.c # boot filesystems SRCS+= ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c SRCS+= dosfs.c ext2fs.c SRCS+= splitfs.c SRCS+= pkgfs.c # Time support SRCS+= time.c # kernel ufs support .PATH: ${SRCTOP}/sys/ufs/ffs SRCS+=ffs_subr.c ffs_tables.c CFLAGS.bzipfs.c+= -I${SRCTOP}/contrib/bzip2 # explicit_bzero and calculate_crc32c .PATH: ${SYSDIR}/libkern SRCS+= explicit_bzero.c crc32_libkern.c # Maybe GELI .if ${MK_LOADER_GELI} == "yes" .include "${SASRC}/geli/Makefile.inc" .endif .if ${MK_LOADER_VERIEXEC} == "yes" && ${MK_BEARSSL} == "yes" .include "${SRCTOP}/lib/libbearssl/Makefile.libsa.inc" .include "${SRCTOP}/lib/libsecureboot/Makefile.libsa.inc" .endif # Maybe ZFS .if ${MK_LOADER_ZFS} == "yes" .include "${SASRC}/zfs/Makefile.inc" .endif .include diff --git a/stand/libsa/open.c b/stand/libsa/open.c index e2591b680319..c62e48f0e1d8 100644 --- a/stand/libsa/open.c +++ b/stand/libsa/open.c @@ -1,206 +1,211 @@ /* $NetBSD: open.c,v 1.16 1997/01/28 09:41:03 pk Exp $ */ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)open.c 8.1 (Berkeley) 6/11/93 * * * Copyright (c) 1989, 1990, 1991 Carnegie Mellon University * All Rights Reserved. * * Author: Alessandro Forin * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); #include "stand.h" struct fs_ops *exclusive_file_system; /* * Open file list. The current implementation and assumption is, * we only remove entries from tail and we only add new entries to tail. * This decision is to keep file id management simple - we get list * entries ordered by continiously growing f_id field. * If we do have multiple files open and we do close file not from tail, * this entry will be marked unused. open() will reuse unused entry, or * close will free all unused tail entries. * * Only case we expect open file list to grow long, is with zfs pools with * many disks. */ file_list_t files = TAILQ_HEAD_INITIALIZER(files); /* * Walk file list and return pointer to open_file structure. * if fd is < 0, return first unused open_file. */ struct open_file * fd2open_file(int fd) { struct open_file *f; TAILQ_FOREACH(f, &files, f_link) { if (fd >= 0) { if (f->f_id == fd) break; continue; } if (f->f_flags == 0) break; } return (f); } static int o_gethandle(struct open_file **ptr) { struct open_file *f, *last; /* Pick up unused entry */ f = fd2open_file(-1); if (f != NULL) { *ptr = f; return (f->f_id); } /* Add new entry */ f = calloc(1, sizeof (*f)); if (f == NULL) return (-1); last = TAILQ_LAST(&files, file_list); if (last != NULL) f->f_id = last->f_id + 1; TAILQ_INSERT_TAIL(&files, f, f_link); *ptr = f; return (f->f_id); } static void o_rainit(struct open_file *f) { f->f_rabuf = malloc(SOPEN_RASIZE); f->f_ralen = 0; f->f_raoffset = 0; } int open(const char *fname, int mode) { struct fs_ops *fs; struct open_file *f; int fd, i, error, besterror; const char *file; + TSENTER(); + if ((fd = o_gethandle(&f)) == -1) { errno = EMFILE; return (-1); } f->f_flags = mode + 1; f->f_dev = NULL; f->f_ops = NULL; f->f_offset = 0; f->f_devdata = NULL; file = NULL; if (exclusive_file_system != NULL) { fs = exclusive_file_system; error = (fs->fo_open)(fname, f); if (error == 0) goto ok; goto err; } error = devopen(f, fname, &file); if (error || (((f->f_flags & F_NODEV) == 0) && f->f_dev == NULL)) goto err; /* see if we opened a raw device; otherwise, 'file' is the file name. */ if (file == NULL || *file == '\0') { f->f_flags |= F_RAW; f->f_rabuf = NULL; + TSEXIT(); return (fd); } /* pass file name to the different filesystem open routines */ besterror = ENOENT; for (i = 0; file_system[i] != NULL; i++) { fs = file_system[i]; error = (fs->fo_open)(file, f); if (error == 0) goto ok; if (error != EINVAL) besterror = error; } error = besterror; if ((f->f_flags & F_NODEV) == 0 && f->f_dev != NULL) f->f_dev->dv_close(f); if (error) devclose(f); err: f->f_flags = 0; errno = error; + TSEXIT(); return (-1); ok: f->f_ops = fs; o_rainit(f); + TSEXIT(); return (fd); } diff --git a/stand/libsa/read.c b/stand/libsa/read.c index 7d19dae0670d..084bca2522f3 100644 --- a/stand/libsa/read.c +++ b/stand/libsa/read.c @@ -1,134 +1,141 @@ /* $NetBSD: read.c,v 1.8 1997/01/22 00:38:12 cgd Exp $ */ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)read.c 8.1 (Berkeley) 6/11/93 * * * Copyright (c) 1989, 1990, 1991 Carnegie Mellon University * All Rights Reserved. * * Author: Alessandro Forin * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); #include #include "stand.h" ssize_t read(int fd, void *dest, size_t bcount) { struct open_file *f; size_t resid; + TSENTER(); f = fd2open_file(fd); if (f == NULL || !(f->f_flags & F_READ)) { errno = EBADF; return (-1); } if (f->f_flags & F_RAW) { twiddle(4); errno = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, btodb(f->f_offset), bcount, dest, &resid); if (errno) return (-1); f->f_offset += resid; + TSEXIT(); return (resid); } /* * Optimise reads from regular files using a readahead buffer. * If the request can't be satisfied from the current buffer contents, * check to see if it should be bypassed, or refill the buffer and * complete the request. */ resid = bcount; for (;;) { size_t ccount, cresid; /* how much can we supply? */ ccount = imin(f->f_ralen, resid); if (ccount > 0) { bcopy(f->f_rabuf + f->f_raoffset, dest, ccount); f->f_raoffset += ccount; f->f_ralen -= ccount; resid -= ccount; - if (resid == 0) + if (resid == 0) { + TSEXIT(); return (bcount); + } dest = (char *)dest + ccount; } /* will filling the readahead buffer again not help? */ if (f->f_rabuf == NULL || resid >= SOPEN_RASIZE) { /* * bypass the rest of the request and leave the * buffer empty */ errno = (f->f_ops->fo_read)(f, dest, resid, &cresid); if (errno != 0) return (-1); + TSEXIT(); return (bcount - cresid); } /* fetch more data */ errno = (f->f_ops->fo_read)(f, f->f_rabuf, SOPEN_RASIZE, &cresid); if (errno != 0) return (-1); f->f_raoffset = 0; f->f_ralen = SOPEN_RASIZE - cresid; /* no more data, return what we had */ - if (f->f_ralen == 0) + if (f->f_ralen == 0) { + TSEXIT(); return (bcount - resid); + } } } diff --git a/stand/libsa/stand.h b/stand/libsa/stand.h index e49b8c50619d..535fee31d586 100644 --- a/stand/libsa/stand.h +++ b/stand/libsa/stand.h @@ -1,480 +1,489 @@ /* * Copyright (c) 1998 Michael Smith. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * From $NetBSD: stand.h,v 1.22 1997/06/26 19:17:40 drochner Exp $ */ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)stand.h 8.1 (Berkeley) 6/11/93 */ #ifndef STAND_H #define STAND_H #include #include #include #include #include /* this header intentionally exports NULL from */ #include #define strcoll(a, b) strcmp((a), (b)) #define CHK(fmt, args...) printf("%s(%d): " fmt "\n", __func__, __LINE__ , ##args) #define PCHK(fmt, args...) {printf("%s(%d): " fmt "\n", __func__, __LINE__ , ##args); getchar();} #include /* special stand error codes */ #define EADAPT (ELAST+1) /* bad adaptor */ #define ECTLR (ELAST+2) /* bad controller */ #define EUNIT (ELAST+3) /* bad unit */ #define ESLICE (ELAST+4) /* bad slice */ #define EPART (ELAST+5) /* bad partition */ #define ERDLAB (ELAST+6) /* can't read disk label */ #define EUNLAB (ELAST+7) /* unlabeled disk */ #define EOFFSET (ELAST+8) /* relative seek not supported */ #define ESALAST (ELAST+8) /* */ /* Partial signal emulation for sig_atomic_t */ #include struct open_file; /* * This structure is used to define file system operations in a file system * independent way. * * XXX note that filesystem providers should export a pointer to their fs_ops * struct, so that consumers can reference this and thus include the * filesystems that they require. */ struct fs_ops { const char *fs_name; int (*fo_open)(const char *path, struct open_file *f); int (*fo_close)(struct open_file *f); int (*fo_read)(struct open_file *f, void *buf, size_t size, size_t *resid); int (*fo_write)(struct open_file *f, const void *buf, size_t size, size_t *resid); off_t (*fo_seek)(struct open_file *f, off_t offset, int where); int (*fo_stat)(struct open_file *f, struct stat *sb); int (*fo_readdir)(struct open_file *f, struct dirent *d); }; /* * libstand-supplied filesystems */ extern struct fs_ops ufs_fsops; extern struct fs_ops tftp_fsops; extern struct fs_ops nfs_fsops; extern struct fs_ops cd9660_fsops; extern struct fs_ops gzipfs_fsops; extern struct fs_ops bzipfs_fsops; extern struct fs_ops dosfs_fsops; extern struct fs_ops ext2fs_fsops; extern struct fs_ops splitfs_fsops; extern struct fs_ops pkgfs_fsops; extern struct fs_ops efihttp_fsops; /* where values for lseek(2) */ #define SEEK_SET 0 /* set file offset to offset */ #define SEEK_CUR 1 /* set file offset to current plus offset */ #define SEEK_END 2 /* set file offset to EOF plus offset */ /* * Device switch */ struct devsw { const char dv_name[8]; int dv_type; /* opaque type constant, arch-dependant */ #define DEVT_NONE 0 #define DEVT_DISK 1 #define DEVT_NET 2 #define DEVT_CD 3 #define DEVT_ZFS 4 #define DEVT_FD 5 int (*dv_init)(void); /* early probe call */ int (*dv_strategy)(void *devdata, int rw, daddr_t blk, size_t size, char *buf, size_t *rsize); int (*dv_open)(struct open_file *f, ...); int (*dv_close)(struct open_file *f); int (*dv_ioctl)(struct open_file *f, u_long cmd, void *data); int (*dv_print)(int verbose); /* print device information */ void (*dv_cleanup)(void); }; /* * libstand-supplied device switch */ extern struct devsw netdev; extern int errno; /* * Generic device specifier; architecture-dependent * versions may be larger, but should be allowed to * overlap. */ struct devdesc { struct devsw *d_dev; int d_unit; void *d_opendata; }; struct open_file { int f_flags; /* see F_* below */ struct devsw *f_dev; /* pointer to device operations */ void *f_devdata; /* device specific data */ struct fs_ops *f_ops; /* pointer to file system operations */ void *f_fsdata; /* file system specific data */ off_t f_offset; /* current file offset */ char *f_rabuf; /* readahead buffer pointer */ size_t f_ralen; /* valid data in readahead buffer */ off_t f_raoffset; /* consumer offset in readahead buffer */ int f_id; /* file number */ TAILQ_ENTRY(open_file) f_link; /* next entry */ #define SOPEN_RASIZE 512 }; typedef TAILQ_HEAD(file_list, open_file) file_list_t; extern file_list_t files; extern struct open_file *fd2open_file(int); /* f_flags values */ #define F_READ 0x0001 /* file opened for reading */ #define F_WRITE 0x0002 /* file opened for writing */ #define F_RAW 0x0004 /* raw device open - no file system */ #define F_NODEV 0x0008 /* network open - no device */ #define F_MASK 0xFFFF /* Mode modifier for strategy() */ #define F_NORA (0x01 << 16) /* Disable Read-Ahead */ #define isascii(c) (((c) & ~0x7F) == 0) static __inline int isupper(int c) { return c >= 'A' && c <= 'Z'; } static __inline int islower(int c) { return c >= 'a' && c <= 'z'; } static __inline int isspace(int c) { return c == ' ' || (c >= 0x9 && c <= 0xd); } static __inline int isdigit(int c) { return c >= '0' && c <= '9'; } static __inline int isxdigit(int c) { return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } static __inline int isalpha(int c) { return isupper(c) || islower(c); } static __inline int isalnum(int c) { return isalpha(c) || isdigit(c); } static __inline int iscntrl(int c) { return (c >= 0 && c < ' ') || c == 127; } static __inline int isgraph(int c) { return c >= '!' && c <= '~'; } static __inline int ispunct(int c) { return (c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~'); } static __inline int toupper(int c) { return islower(c) ? c - 'a' + 'A' : c; } static __inline int tolower(int c) { return isupper(c) ? c - 'A' + 'a' : c; } /* sbrk emulation */ extern void setheap(void *base, void *top); extern char *sbrk(int incr); extern int printf(const char *fmt, ...) __printflike(1, 2); extern int asprintf(char **buf, const char *cfmt, ...) __printflike(2, 3); extern int sprintf(char *buf, const char *cfmt, ...) __printflike(2, 3); extern int snprintf(char *buf, size_t size, const char *cfmt, ...) __printflike(3, 4); extern int vprintf(const char *fmt, __va_list); extern int vsprintf(char *buf, const char *cfmt, __va_list); extern int vsnprintf(char *buf, size_t size, const char *cfmt, __va_list); extern void twiddle(u_int callerdiv); extern void twiddle_divisor(u_int globaldiv); extern void ngets(char *, int); #define gets(x) ngets((x), 0) extern int fgetstr(char *buf, int size, int fd); extern int open(const char *, int); #define O_RDONLY 0x0 #define O_WRONLY 0x1 #define O_RDWR 0x2 #define O_ACCMODE 0x3 /* NOT IMPLEMENTED */ #define O_CREAT 0x0200 /* create if nonexistent */ #define O_TRUNC 0x0400 /* truncate to zero length */ extern int close(int); extern void closeall(void); extern ssize_t read(int, void *, size_t); extern ssize_t write(int, const void *, size_t); extern struct dirent *readdirfd(int); extern void srandom(unsigned int); extern long random(void); /* imports from stdlib, locally modified */ extern char *optarg; /* getopt(3) external variables */ extern int optind, opterr, optopt, optreset; extern int getopt(int, char * const [], const char *); /* pager.c */ extern void pager_open(void); extern void pager_close(void); extern int pager_output(const char *lines); extern int pager_file(const char *fname); /* No signal state to preserve */ #define setjmp _setjmp #define longjmp _longjmp /* environment.c */ #define EV_DYNAMIC (1<<0) /* value was dynamically allocated, free if changed/unset */ #define EV_VOLATILE (1<<1) /* value is volatile, make a copy of it */ #define EV_NOHOOK (1<<2) /* don't call hook when setting */ struct env_var; typedef char *(ev_format_t)(struct env_var *ev); typedef int (ev_sethook_t)(struct env_var *ev, int flags, const void *value); typedef int (ev_unsethook_t)(struct env_var *ev); struct env_var { char *ev_name; int ev_flags; void *ev_value; ev_sethook_t *ev_sethook; ev_unsethook_t *ev_unsethook; struct env_var *ev_next, *ev_prev; }; extern struct env_var *environ; extern struct env_var *env_getenv(const char *name); extern int env_setenv(const char *name, int flags, const void *value, ev_sethook_t sethook, ev_unsethook_t unsethook); extern void env_discard(struct env_var *); extern char *getenv(const char *name); extern int setenv(const char *name, const char *value, int overwrite); extern int putenv(char *string); extern int unsetenv(const char *name); extern ev_sethook_t env_noset; /* refuse set operation */ extern ev_unsethook_t env_nounset; /* refuse unset operation */ /* stdlib.h routines */ extern int abs(int a); extern void abort(void) __dead2; extern long strtol(const char * __restrict, char ** __restrict, int); extern long long strtoll(const char * __restrict, char ** __restrict, int); extern unsigned long strtoul(const char * __restrict, char ** __restrict, int); extern unsigned long long strtoull(const char * __restrict, char ** __restrict, int); /* BCD conversions (undocumented) */ extern u_char const bcd2bin_data[]; extern u_char const bin2bcd_data[]; extern char const hex2ascii_data[]; #define bcd2bin(bcd) (bcd2bin_data[bcd]) #define bin2bcd(bin) (bin2bcd_data[bin]) #define hex2ascii(hex) (hex2ascii_data[hex]) #define validbcd(bcd) (bcd == 0 || (bcd > 0 && bcd <= 0x99 && bcd2bin_data[bcd] != 0)) /* min/max (undocumented) */ static __inline int imax(int a, int b) { return (a > b ? a : b); } static __inline int imin(int a, int b) { return (a < b ? a : b); } static __inline long lmax(long a, long b) { return (a > b ? a : b); } static __inline long lmin(long a, long b) { return (a < b ? a : b); } static __inline u_int max(u_int a, u_int b) { return (a > b ? a : b); } static __inline u_int min(u_int a, u_int b) { return (a < b ? a : b); } static __inline quad_t qmax(quad_t a, quad_t b) { return (a > b ? a : b); } static __inline quad_t qmin(quad_t a, quad_t b) { return (a < b ? a : b); } static __inline u_long ulmax(u_long a, u_long b) { return (a > b ? a : b); } static __inline u_long ulmin(u_long a, u_long b) { return (a < b ? a : b); } /* null functions for device/filesystem switches (undocumented) */ extern int nodev(void); extern int noioctl(struct open_file *, u_long, void *); extern void nullsys(void); extern int null_open(const char *path, struct open_file *f); extern int null_close(struct open_file *f); extern int null_read(struct open_file *f, void *buf, size_t size, size_t *resid); extern int null_write(struct open_file *f, const void *buf, size_t size, size_t *resid); extern off_t null_seek(struct open_file *f, off_t offset, int where); extern int null_stat(struct open_file *f, struct stat *sb); extern int null_readdir(struct open_file *f, struct dirent *d); /* * Machine dependent functions and data, must be provided or stubbed by * the consumer */ extern void exit(int) __dead2; extern int getchar(void); extern int ischar(void); extern void putchar(int); extern int devopen(struct open_file *, const char *, const char **); extern int devclose(struct open_file *f); extern void panic(const char *, ...) __dead2 __printflike(1, 2); extern void panic_action(void) __weak_symbol __dead2; extern time_t getsecs(void); extern struct fs_ops *file_system[]; extern struct fs_ops *exclusive_file_system; extern struct devsw *devsw[]; /* * Time routines */ time_t time(time_t *); /* * Expose byteorder(3) functions. */ #ifndef _BYTEORDER_PROTOTYPED #define _BYTEORDER_PROTOTYPED extern uint32_t htonl(uint32_t); extern uint16_t htons(uint16_t); extern uint32_t ntohl(uint32_t); extern uint16_t ntohs(uint16_t); #endif #ifndef _BYTEORDER_FUNC_DEFINED #define _BYTEORDER_FUNC_DEFINED #define htonl(x) __htonl(x) #define htons(x) __htons(x) #define ntohl(x) __ntohl(x) #define ntohs(x) __ntohs(x) #endif void *Malloc(size_t, const char *, int); void *Memalign(size_t, size_t, const char *, int); void *Calloc(size_t, size_t, const char *, int); void *Realloc(void *, size_t, const char *, int); void *Reallocf(void *, size_t, const char *, int); void Free(void *, const char *, int); extern void mallocstats(void); const char *x86_hypervisor(void); #ifdef USER_MALLOC extern void *malloc(size_t); extern void *memalign(size_t, size_t); extern void *calloc(size_t, size_t); extern void free(void *); extern void *realloc(void *, size_t); extern void *reallocf(void *, size_t); #elif defined(DEBUG_MALLOC) #define malloc(x) Malloc(x, __FILE__, __LINE__) #define memalign(x, y) Memalign(x, y, __FILE__, __LINE__) #define calloc(x, y) Calloc(x, y, __FILE__, __LINE__) #define free(x) Free(x, __FILE__, __LINE__) #define realloc(x, y) Realloc(x, y, __FILE__, __LINE__) #define reallocf(x, y) Reallocf(x, y, __FILE__, __LINE__) #else #define malloc(x) Malloc(x, NULL, 0) #define memalign(x, y) Memalign(x, y, NULL, 0) #define calloc(x, y) Calloc(x, y, NULL, 0) #define free(x) Free(x, NULL, 0) #define realloc(x, y) Realloc(x, y, NULL, 0) #define reallocf(x, y) Reallocf(x, y, NULL, 0) #endif /* * va <-> pa routines. MD code must supply. */ caddr_t ptov(uintptr_t); /* hexdump.c */ void hexdump(caddr_t region, size_t len); +/* tslog.c */ +#define TSRAW(a, b, c) tslog(a, b, c) +#define TSENTER() TSRAW("ENTER", __func__, NULL) +#define TSEXIT() TSRAW("EXIT", __func__, NULL) +#define TSLINE() TSRAW("EVENT", __FILE__, __XSTRING(__LINE__)) +void tslog(const char *, const char *, const char *); +void tslog_setbuf(void * buf, size_t len); +void tslog_getbuf(void ** buf, size_t * len); + #endif /* STAND_H */ diff --git a/stand/libsa/twiddle.c b/stand/libsa/tslog.c similarity index 50% copy from stand/libsa/twiddle.c copy to stand/libsa/tslog.c index 31828542f7f3..7ea18e0bb8f8 100644 --- a/stand/libsa/twiddle.c +++ b/stand/libsa/tslog.c @@ -1,69 +1,78 @@ /*- - * Copyright (c) 1986, 1988, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. + * Copyright (c) 2021 Colin Percival + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)subr_prf.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include -#include "stand.h" -/* Extra functions from NetBSD standalone printf.c */ +#include + +#include -static u_int globaldiv; +/* Buffer for holding tslog data in string format. */ +static char * tslog_buf = NULL; +static size_t tslog_buflen = 0; +static size_t tslog_bufpos = 0; void -twiddle(u_int callerdiv) +tslog_setbuf(void * buf, size_t len) { - static u_int callercnt, globalcnt, pos; - callercnt++; - if (callerdiv > 1 && (callercnt % callerdiv) != 0) - return; + tslog_buf = (char *)buf; + tslog_buflen = len; + tslog_bufpos = 0; +} - globalcnt++; - if (globaldiv > 1 && (globalcnt % globaldiv) != 0) - return; +void +tslog_getbuf(void ** buf, size_t * len) +{ - putchar("|/-\\"[pos++ & 3]); - putchar('\b'); + *buf = (void *)tslog_buf; + *len = tslog_bufpos; } void -twiddle_divisor(u_int gdiv) +tslog(const char * type, const char * f, const char * s) { +#if defined(__amd64__) || defined(__i386__) + uint64_t tsc = rdtsc(); +#else + uint64_t tsc = 0; +#endif + int len; + + /* If we have no buffer, do nothing. */ + if (tslog_buf == NULL) + return; - globaldiv = gdiv; + /* Append to existing buffer, if we have enough space. */ + len = snprintf(&tslog_buf[tslog_bufpos], + tslog_buflen - tslog_bufpos, "0x0 %llu %s %s%s%s\n", + (unsigned long long)tsc, type, f, s ? " " : "", s ? s : ""); + if ((len > 0) && (tslog_bufpos + len <= tslog_buflen)) + tslog_bufpos += len; } diff --git a/stand/libsa/twiddle.c b/stand/libsa/twiddle.c index 31828542f7f3..7565295fa1a3 100644 --- a/stand/libsa/twiddle.c +++ b/stand/libsa/twiddle.c @@ -1,69 +1,76 @@ /*- * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)subr_prf.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include #include "stand.h" /* Extra functions from NetBSD standalone printf.c */ static u_int globaldiv; void twiddle(u_int callerdiv) { static u_int callercnt, globalcnt, pos; + TSENTER(); + callercnt++; - if (callerdiv > 1 && (callercnt % callerdiv) != 0) + if (callerdiv > 1 && (callercnt % callerdiv) != 0) { + TSEXIT(); return; + } globalcnt++; - if (globaldiv > 1 && (globalcnt % globaldiv) != 0) + if (globaldiv > 1 && (globalcnt % globaldiv) != 0) { + TSEXIT(); return; + } putchar("|/-\\"[pos++ & 3]); putchar('\b'); + TSEXIT(); } void twiddle_divisor(u_int gdiv) { globaldiv = gdiv; } diff --git a/stand/loader.mk b/stand/loader.mk index 3a38a9bc9e63..85d3aad326de 100644 --- a/stand/loader.mk +++ b/stand/loader.mk @@ -1,196 +1,196 @@ # $FreeBSD$ .PATH: ${LDRSRC} ${BOOTSRC}/libsa CFLAGS+=-I${LDRSRC} SRCS+= boot.c commands.c console.c devopen.c interp.c SRCS+= interp_backslash.c interp_parse.c ls.c misc.c -SRCS+= module.c nvstore.c pnglite.c +SRCS+= module.c nvstore.c pnglite.c tslog.c CFLAGS.module.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite .PATH: ${SRCTOP}/contrib/pnglite CFLAGS.pnglite.c+= -I${SRCTOP}/contrib/pnglite CFLAGS.pnglite.c+= -DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib .if ${MACHINE} == "i386" || ${MACHINE_CPUARCH} == "amd64" SRCS+= load_elf32.c load_elf32_obj.c reloc_elf32.c SRCS+= load_elf64.c load_elf64_obj.c reloc_elf64.c CFLAGS.load_elf32.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite CFLAGS.load_elf64.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite .elif ${MACHINE_CPUARCH} == "aarch64" SRCS+= load_elf64.c reloc_elf64.c CFLAGS.load_elf64.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite .elif ${MACHINE_CPUARCH} == "arm" SRCS+= load_elf32.c reloc_elf32.c CFLAGS.load_elf32.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite .elif ${MACHINE_CPUARCH} == "powerpc" SRCS+= load_elf32.c reloc_elf32.c SRCS+= load_elf64.c reloc_elf64.c SRCS+= metadata.c CFLAGS.load_elf32.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite CFLAGS.load_elf64.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite .elif ${MACHINE_ARCH:Mmips64*} != "" SRCS+= load_elf64.c reloc_elf64.c SRCS+= metadata.c CFLAGS.load_elf64.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite .elif ${MACHINE} == "mips" SRCS+= load_elf32.c reloc_elf32.c SRCS+= metadata.c CFLAGS.load_elf32.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite .elif ${MACHINE_CPUARCH} == "riscv" SRCS+= load_elf64.c reloc_elf64.c SRCS+= metadata.c CFLAGS.load_elf64.c += -I$(SRCTOP)/sys/teken -I${SRCTOP}/contrib/pnglite .endif .if ${LOADER_DISK_SUPPORT:Uyes} == "yes" CFLAGS.part.c+= -DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib SRCS+= disk.c part.c vdisk.c .endif .if ${LOADER_NET_SUPPORT:Uno} == "yes" SRCS+= dev_net.c .endif .if defined(HAVE_BCACHE) SRCS+= bcache.c .endif .if defined(MD_IMAGE_SIZE) CFLAGS+= -DMD_IMAGE_SIZE=${MD_IMAGE_SIZE} SRCS+= md.c .else CLEANFILES+= md.o .endif # Machine-independant ISA PnP .if defined(HAVE_ISABUS) SRCS+= isapnp.c .endif .if defined(HAVE_PNP) SRCS+= pnp.c .endif .if ${LOADER_INTERP} == "lua" SRCS+= interp_lua.c .include "${BOOTSRC}/lua.mk" LDR_INTERP= ${LIBLUA} LDR_INTERP32= ${LIBLUA32} CFLAGS.interp_lua.c= -DLUA_PATH=\"${LUAPATH}\" -I${FLUASRC}/modules .elif ${LOADER_INTERP} == "4th" SRCS+= interp_forth.c .include "${BOOTSRC}/ficl.mk" LDR_INTERP= ${LIBFICL} LDR_INTERP32= ${LIBFICL32} .elif ${LOADER_INTERP} == "simp" SRCS+= interp_simple.c .else .error Unknown interpreter ${LOADER_INTERP} .endif .if ${MK_LOADER_VERIEXEC} != "no" CFLAGS+= -DLOADER_VERIEXEC -I${SRCTOP}/lib/libsecureboot/h .if ${MK_LOADER_VERIEXEC_VECTX} != "no" CFLAGS+= -DLOADER_VERIEXEC_VECTX .endif .endif .if ${MK_LOADER_VERIEXEC_PASS_MANIFEST} != "no" CFLAGS+= -DLOADER_VERIEXEC_PASS_MANIFEST -I${SRCTOP}/lib/libsecureboot/h .endif .if defined(BOOT_PROMPT_123) CFLAGS+= -DBOOT_PROMPT_123 .endif .if defined(LOADER_INSTALL_SUPPORT) SRCS+= install.c .endif # Filesystem support .if ${LOADER_CD9660_SUPPORT:Uno} == "yes" CFLAGS+= -DLOADER_CD9660_SUPPORT .endif .if ${LOADER_EXT2FS_SUPPORT:Uno} == "yes" CFLAGS+= -DLOADER_EXT2FS_SUPPORT .endif .if ${LOADER_MSDOS_SUPPORT:Uno} == "yes" CFLAGS+= -DLOADER_MSDOS_SUPPORT .endif .if ${LOADER_UFS_SUPPORT:Uyes} == "yes" CFLAGS+= -DLOADER_UFS_SUPPORT .endif # Compression .if ${LOADER_GZIP_SUPPORT:Uno} == "yes" CFLAGS+= -DLOADER_GZIP_SUPPORT .endif .if ${LOADER_BZIP2_SUPPORT:Uno} == "yes" CFLAGS+= -DLOADER_BZIP2_SUPPORT .endif # Network related things .if ${LOADER_NET_SUPPORT:Uno} == "yes" CFLAGS+= -DLOADER_NET_SUPPORT .endif .if ${LOADER_NFS_SUPPORT:Uno} == "yes" CFLAGS+= -DLOADER_NFS_SUPPORT .endif .if ${LOADER_TFTP_SUPPORT:Uno} == "yes" CFLAGS+= -DLOADER_TFTP_SUPPORT .endif # Partition support .if ${LOADER_GPT_SUPPORT:Uyes} == "yes" CFLAGS+= -DLOADER_GPT_SUPPORT .endif .if ${LOADER_MBR_SUPPORT:Uyes} == "yes" CFLAGS+= -DLOADER_MBR_SUPPORT .endif .if ${HAVE_ZFS:Uno} == "yes" CFLAGS+= -DLOADER_ZFS_SUPPORT CFLAGS+= -I${ZFSSRC} CFLAGS+= -I${SYSDIR}/cddl/boot/zfs CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common SRCS+= zfs_cmd.c .endif LIBFICL= ${BOOTOBJ}/ficl/libficl.a .if ${MACHINE} == "i386" LIBFICL32= ${LIBFICL} .else LIBFICL32= ${BOOTOBJ}/ficl32/libficl.a .endif LIBLUA= ${BOOTOBJ}/liblua/liblua.a .if ${MACHINE} == "i386" LIBLUA32= ${LIBLUA} .else LIBLUA32= ${BOOTOBJ}/liblua32/liblua.a .endif CLEANFILES+= vers.c VERSION_FILE?= ${.CURDIR}/version .if ${MK_REPRODUCIBLE_BUILD} != no REPRO_FLAG= -r .endif vers.c: ${LDRSRC}/newvers.sh ${VERSION_FILE} sh ${LDRSRC}/newvers.sh ${REPRO_FLAG} ${VERSION_FILE} \ ${NEWVERSWHAT} .if ${MK_LOADER_VERBOSE} != "no" CFLAGS+= -DELF_VERBOSE .endif .if !empty(HELP_FILES) HELP_FILES+= ${LDRSRC}/help.common CLEANFILES+= loader.help FILES+= loader.help loader.help: ${HELP_FILES} cat ${HELP_FILES} | awk -f ${LDRSRC}/merge_help.awk > ${.TARGET} .endif diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 7b27e5b8a885..246d72234a29 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,1954 +1,1955 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, , , exec, "char *"); SDT_PROBE_DEFINE1(proc, , , exec__failure, "int"); SDT_PROBE_DEFINE1(proc, , , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); int coredump_pack_fileinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, &coredump_pack_fileinfo, 0, "Enable file path packing in 'procstat -f' coredump notes"); int coredump_pack_vmmapinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN, &coredump_pack_vmmapinfo, 0, "Enable file path packing in 'procstat -v' coredump notes"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_ps_strings, "LU", "Location of process' ps_strings structure"); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_usrstack, "LU", "Top of process stack"); SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_stackprot, "I", "Stack memory permissions"); u_long ps_arg_cache_limit = PAGE_SIZE / 16; SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, &ps_arg_cache_limit, 0, "Process' command line characters cache limit"); static int disallow_high_osrel; SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW, &disallow_high_osrel, 0, "Disallow execution of binaries built for higher version of the world"); static int map_at_zero = 0; SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0, "Permit processes to map an object at virtual address 0."); int core_dump_can_intr = 1; SYSCTL_INT(_kern, OID_AUTO, core_dump_can_intr, CTLFLAG_RWTUN, &core_dump_can_intr, 0, "Core dumping interruptible with SIGKILL"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_psstrings; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, sizeof(p->p_sysent->sv_psstrings)); return error; } static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_usrstack; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, sizeof(p->p_sysent->sv_usrstack)); return error; } static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) { struct proc *p; p = curproc; return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, sizeof(p->p_sysent->sv_stackprot))); } /* * Each of the items is a pointer to a `const struct execsw', hence the * double pointer here. */ static const struct execsw **execsw; #ifndef _SYS_SYSPROTO_H_ struct execve_args { char *fname; char **argv; char **envv; }; #endif int sys_execve(struct thread *td, struct execve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, NULL, oldvmspace); post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fexecve_args { int fd; char **argv; char **envv; }; #endif int sys_fexecve(struct thread *td, struct fexecve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { args.fd = uap->fd; error = kern_execve(td, &args, NULL, oldvmspace); } post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __mac_execve_args { char *fname; char **argv; char **envv; struct mac *mac_p; }; #endif int sys___mac_execve(struct thread *td, struct __mac_execve_args *uap) { #ifdef MAC struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, uap->mac_p, oldvmspace); post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); #else return (ENOSYS); #endif } int pre_execve(struct thread *td, struct vmspace **oldvmspace) { struct proc *p; int error; KASSERT(td == curthread, ("non-current thread %p", td)); error = 0; p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); if (thread_single(p, SINGLE_BOUNDARY) != 0) error = ERESTART; PROC_UNLOCK(p); } KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); *oldvmspace = p->p_vmspace; return (error); } void post_execve(struct thread *td, int error, struct vmspace *oldvmspace) { struct proc *p; KASSERT(td == curthread, ("non-current thread %p", td)); p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); /* * If success, we upgrade to SINGLE_EXIT state to * force other threads to suicide. */ if (error == EJUSTRETURN) thread_single(p, SINGLE_EXIT); else thread_single_end(p, SINGLE_BOUNDARY); PROC_UNLOCK(p); } exec_cleanup(td, oldvmspace); } /* * kern_execve() has the astonishing property of not always returning to * the caller. If sufficiently bad things happen during the call to * do_execve(), it can end up calling exit1(); as a result, callers must * avoid doing anything which they might need to undo (e.g., allocating * memory). */ int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace) { + TSEXEC(td->td_proc->p_pid, args->begin_argv); AUDIT_ARG_ARGV(args->begin_argv, args->argc, exec_args_get_begin_envv(args) - args->begin_argv); AUDIT_ARG_ENVV(exec_args_get_begin_envv(args), args->envc, args->endp - exec_args_get_begin_envv(args)); return (do_execve(td, args, mac_p, oldvmspace)); } static void execve_nosetid(struct image_params *imgp) { imgp->credential_setid = false; if (imgp->newcred != NULL) { crfree(imgp->newcred); imgp->newcred = NULL; } } /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. */ static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace) { struct proc *p = td->td_proc; struct nameidata nd; struct ucred *oldcred; struct uidinfo *euip = NULL; uintptr_t stack_base; struct image_params image_params, *imgp; struct vattr attr; int (*img_first)(struct image_params *); struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts = NULL, *newsigacts = NULL; #ifdef KTRACE struct ktr_io_params *kiop; #endif struct vnode *oldtextvp, *newtextvp; struct vnode *oldtextdvp, *newtextdvp; char *oldbinname, *newbinname; bool credential_changing; #ifdef MAC struct label *interpvplabel = NULL; bool will_transition; #endif #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif int error, i, orig_osrel; uint32_t orig_fctl0; size_t freepath_size; static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; oldtextvp = oldtextdvp = NULL; newtextvp = newtextdvp = NULL; newbinname = oldbinname = NULL; #ifdef KTRACE kiop = NULL; #endif /* * Lock the process and set the P_INEXEC flag to indicate that * it should be left alone until we're done here. This is * necessary to avoid race conditions - e.g. in ptrace() - * that might allow a local user to illicitly obtain elevated * privileges. */ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); /* * Initialize part of the common data */ bzero(imgp, sizeof(*imgp)); imgp->proc = p; imgp->attr = &attr; imgp->args = args; oldcred = p->p_ucred; orig_osrel = p->p_osrel; orig_fctl0 = p->p_fctl0; #ifdef MAC error = mac_execve_enter(imgp, mac_p); if (error) goto exec_fail; #endif SDT_PROBE1(proc, , , exec, args->fname); interpret: if (args->fname != NULL) { #ifdef CAPABILITY_MODE /* * While capability mode can't reach this point via direct * path arguments to execve(), we also don't allow * interpreters to be used in capability mode (for now). * Catch indirect lookups and return a permissions error. */ if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; goto exec_fail; } #endif /* * Translate the file name. namei() returns a vnode * pointer in ni_vp among other things. */ NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW | SAVENAME | AUDITVNODE1 | WANTPARENT, UIO_SYSSPACE, args->fname, td); error = namei(&nd); if (error) goto exec_fail; newtextvp = nd.ni_vp; newtextdvp = nd.ni_dvp; nd.ni_dvp = NULL; newbinname = malloc(nd.ni_cnd.cn_namelen + 1, M_PARGS, M_WAITOK); memcpy(newbinname, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen); newbinname[nd.ni_cnd.cn_namelen] = '\0'; imgp->vp = newtextvp; /* * Do the best to calculate the full path to the image file. */ if (args->fname[0] == '/') { imgp->execpath = args->fname; } else { VOP_UNLOCK(imgp->vp); freepath_size = MAXPATHLEN; if (vn_fullpath_hardlink(newtextvp, newtextdvp, newbinname, nd.ni_cnd.cn_namelen, &imgp->execpath, &imgp->freepath, &freepath_size) != 0) imgp->execpath = args->fname; vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } } else { AUDIT_ARG_FD(args->fd); /* * If the descriptors was not opened with O_PATH, then * we require that it was opened with O_EXEC or * O_RDONLY. In either case, exec_check_permissions() * below checks _current_ file access mode regardless * of the permissions additionally checked at the * open(2). */ error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp); if (error != 0) goto exec_fail; if (vn_fullpath(newtextvp, &imgp->execpath, &imgp->freepath) != 0) imgp->execpath = args->fname; vn_lock(newtextvp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(newtextvp); imgp->vp = newtextvp; } /* * Check file permissions. Also 'opens' file and sets its vnode to * text mode. */ error = exec_check_permissions(imgp); if (error) goto exec_fail_dealloc; imgp->object = imgp->vp->v_object; if (imgp->object != NULL) vm_object_reference(imgp->object); error = exec_map_first_page(imgp); if (error) goto exec_fail_dealloc; imgp->proc->p_osrel = 0; imgp->proc->p_fctl0 = 0; /* * Implement image setuid/setgid. * * Determine new credentials before attempting image activators * so that it can be used by process_exec handlers to determine * credential/setid changes. * * Don't honor setuid/setgid if the filesystem prohibits it or if * the process is being traced. * * We disable setuid/setgid/etc in capability mode on the basis * that most setugid applications are not written with that * environment in mind, and will therefore almost certainly operate * incorrectly. In principle there's no reason that setugid * applications might not be useful in capability mode, so we may want * to reconsider this conservative design choice in the future. * * XXXMAC: For the time being, use NOSUID to also prohibit * transitions on the file system. */ credential_changing = false; credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != attr.va_uid; credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != attr.va_gid; #ifdef MAC will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, interpvplabel, imgp) != 0; credential_changing |= will_transition; #endif /* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */ if (credential_changing) imgp->proc->p_pdeathsig = 0; if (credential_changing && #ifdef CAPABILITY_MODE ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && #endif (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && (p->p_flag & P_TRACED) == 0) { imgp->credential_setid = true; VOP_UNLOCK(imgp->vp); imgp->newcred = crdup(oldcred); if (attr.va_mode & S_ISUID) { euip = uifind(attr.va_uid); change_euid(imgp->newcred, euip); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (attr.va_mode & S_ISGID) change_egid(imgp->newcred, attr.va_gid); /* * Implement correct POSIX saved-id behavior. * * XXXMAC: Note that the current logic will save the * uid and gid if a MAC domain transition occurs, even * though maybe it shouldn't. */ change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } else { /* * Implement correct POSIX saved-id behavior. * * XXX: It's not clear that the existing behavior is * POSIX-compliant. A number of sources indicate that the * saved uid/gid should only be updated if the new ruid is * not equal to the old ruid, or the new euid is not equal * to the old euid and the new euid is not equal to the old * ruid. The FreeBSD code always updates the saved uid/gid. * Also, this code uses the new (replaced) euid and egid as * the source, which may or may not be the right ones to use. */ if (oldcred->cr_svuid != oldcred->cr_uid || oldcred->cr_svgid != oldcred->cr_gid) { VOP_UNLOCK(imgp->vp); imgp->newcred = crdup(oldcred); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } } /* The new credentials are installed into the process later. */ /* * If the current process has a special image activator it * wants to try first, call it. For example, emulating shell * scripts differently. */ error = -1; if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) error = img_first(imgp); /* * Loop through the list of image activators, calling each one. * An activator returns -1 if there is no match, 0 on success, * and an error otherwise. */ for (i = 0; error == -1 && execsw[i]; ++i) { if (execsw[i]->ex_imgact == NULL || execsw[i]->ex_imgact == img_first) { continue; } error = (*execsw[i]->ex_imgact)(imgp); } if (error) { if (error == -1) error = ENOEXEC; goto exec_fail_dealloc; } /* * Special interpreter operation, cleanup and loop up to try to * activate the interpreter. */ if (imgp->interpreted) { exec_unmap_first_page(imgp); /* * The text reference needs to be removed for scripts. * There is a short period before we determine that * something is a script where text reference is active. * The vnode lock is held over this entire period * so nothing should illegitimately be blocked. */ MPASS(imgp->textset); VOP_UNSET_TEXT_CHECKED(newtextvp); imgp->textset = false; /* free name buffer and old vnode */ #ifdef MAC mac_execve_interpreter_enter(newtextvp, &interpvplabel); #endif if (imgp->opened) { VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td); imgp->opened = false; } vput(newtextvp); imgp->vp = newtextvp = NULL; if (args->fname != NULL) { if (newtextdvp != NULL) { vrele(newtextdvp); newtextdvp = NULL; } NDFREE(&nd, NDF_ONLY_PNBUF); free(newbinname, M_PARGS); newbinname = NULL; } vm_object_deallocate(imgp->object); imgp->object = NULL; execve_nosetid(imgp); imgp->execpath = NULL; free(imgp->freepath, M_TEMP); imgp->freepath = NULL; /* set new name to that of the interpreter */ args->fname = imgp->interpreter_name; goto interpret; } /* * NB: We unlock the vnode here because it is believed that none * of the sv_copyout_strings/sv_fixup operations require the vnode. */ VOP_UNLOCK(imgp->vp); if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; uprintf("Osrel %d for image %s too high\n", p->p_osrel, imgp->execpath != NULL ? imgp->execpath : ""); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* ABI enforces the use of Capsicum. Switch into capabilities mode. */ if (SV_PROC_FLAG(p, SV_CAPSICUM)) sys_cap_enter(td, NULL); /* * Copy out strings (args and env) and initialize stack base. */ error = (*p->p_sysent->sv_copyout_strings)(imgp, &stack_base); if (error != 0) { vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* * Stack setup. */ error = (*p->p_sysent->sv_fixup)(&stack_base, imgp); if (error != 0) { vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } if (args->fdp != NULL) { /* Install a brand new file descriptor table. */ fdinstall_remapped(td, args->fdp); args->fdp = NULL; } else { /* * Keep on using the existing file descriptor table. For * security and other reasons, the file descriptor table * cannot be shared after an exec. */ fdunshare(td); pdunshare(td); /* close files on exec */ fdcloseexec(td); } /* * Malloc things before we need locks. */ i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { newargs = pargs_alloc(i); bcopy(imgp->args->begin_argv, newargs->ar_args, i); } /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old * handlers. In execsigs(), the new process will have its signals * reset. */ if (sigacts_shared(p->p_sigacts)) { oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; /* Stop profiling */ stopprofclock(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ bzero(p->p_comm, sizeof(p->p_comm)); if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR sched_clear_tdname(td); #endif /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back */ p->p_flag |= P_EXEC; if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0) p->p_flag2 &= ~P2_NOTRACE; if ((p->p_flag2 & P2_STKGAP_DISABLE_EXEC) == 0) p->p_flag2 &= ~P2_STKGAP_DISABLE; if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); /* STOPs are no longer ignored, arrange for AST */ signotify(td); } if ((imgp->sysent->sv_setid_allowed != NULL && !(*imgp->sysent->sv_setid_allowed)(td, imgp)) || (p->p_flag2 & P2_NO_NEW_PRIVS) != 0) execve_nosetid(imgp); /* * Implement image setuid/setgid installation. */ if (imgp->credential_setid) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that * we do not regain any tracing during a possible block. */ setsugid(p); #ifdef KTRACE kiop = ktrprocexec(p); #endif /* * Close any file descriptors 0..2 that reference procfs, * then make sure file descriptors 0..2 are in use. * * Both fdsetugidsafety() and fdcheckstd() may call functions * taking sleepable locks, so temporarily drop our locks. */ PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp); fdsetugidsafety(td); error = fdcheckstd(td); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error != 0) goto exec_fail_dealloc; PROC_LOCK(p); #ifdef MAC if (will_transition) { mac_vnode_execve_transition(oldcred, imgp->newcred, imgp->vp, interpvplabel, imgp); } #endif } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; } /* * Set the new credentials. */ if (imgp->newcred != NULL) { proc_set_cred(p, imgp->newcred); crfree(oldcred); oldcred = NULL; } /* * Store the vp for use in kern.proc.pathname. This vnode was * referenced by namei() or by fexecve variant of fname handling. */ oldtextvp = p->p_textvp; p->p_textvp = newtextvp; oldtextdvp = p->p_textdvp; p->p_textdvp = newtextdvp; newtextdvp = NULL; oldbinname = p->p_binname; p->p_binname = newbinname; newbinname = NULL; #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exec if it * has declared an interest. */ if (dtrace_fasttrap_exec) dtrace_fasttrap_exec(p); #endif /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* * Free any previous argument cache and replace it with * the new argument cache, if any. */ oldargs = p->p_args; p->p_args = newargs; newargs = NULL; PROC_UNLOCK(p); #ifdef HWPMC_HOOKS /* * Check if system-wide sampling is in effect or if the * current process is using PMCs. If so, do exec() time * processing. This processing needs to happen AFTER the * P_INEXEC flag is cleared. */ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { VOP_UNLOCK(imgp->vp); pe.pm_credentialschanged = credential_changing; pe.pm_entryaddr = imgp->entry_addr; PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } #endif /* Set values passed into the program in registers. */ (*p->p_sysent->sv_setregs)(td, imgp, stack_base); VOP_MMAPPED(imgp->vp); SDT_PROBE1(proc, , , exec__success, args->fname); exec_fail_dealloc: if (error != 0) { p->p_osrel = orig_osrel; p->p_fctl0 = orig_fctl0; } if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); if (imgp->vp != NULL) { if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); if (imgp->textset) VOP_UNSET_TEXT_CHECKED(imgp->vp); if (error != 0) vput(imgp->vp); else VOP_UNLOCK(imgp->vp); if (args->fname != NULL) NDFREE(&nd, NDF_ONLY_PNBUF); if (newtextdvp != NULL) vrele(newtextdvp); free(newbinname, M_PARGS); } if (imgp->object != NULL) vm_object_deallocate(imgp->object); free(imgp->freepath, M_TEMP); if (error == 0) { if (p->p_ptevents & PTRACE_EXEC) { PROC_LOCK(p); if (p->p_ptevents & PTRACE_EXEC) td->td_dbgflags |= TDB_EXEC; PROC_UNLOCK(p); } } else { exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); p->p_flag &= ~P_INEXEC; PROC_UNLOCK(p); SDT_PROBE1(proc, , , exec__failure, error); } if (imgp->newcred != NULL && oldcred != NULL) crfree(imgp->newcred); #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); /* * Handle deferred decrement of ref counts. */ if (oldtextvp != NULL) vrele(oldtextvp); if (oldtextdvp != NULL) vrele(oldtextdvp); free(oldbinname, M_PARGS); #ifdef KTRACE ktr_io_params_free(kiop); #endif pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) sigacts_free(oldsigacts); if (euip != NULL) uifree(euip); if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exec_cleanup(td, oldvmspace); exit1(td, 0, SIGABRT); /* NOT REACHED */ } #ifdef KTRACE if (error == 0) ktrprocctor(p); #endif /* * We don't want cpu_set_syscall_retval() to overwrite any of * the register values put in place by exec_setregs(). * Implementations of cpu_set_syscall_retval() will leave * registers unmodified when returning EJUSTRETURN. */ return (error == 0 ? EJUSTRETURN : error); } void exec_cleanup(struct thread *td, struct vmspace *oldvmspace) { if ((td->td_pflags & TDP_EXECVMSPC) != 0) { KASSERT(td->td_proc->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } } int exec_map_first_page(struct image_params *imgp) { vm_object_t object; vm_page_t m; int error; if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); object = imgp->vp->v_object; if (object == NULL) return (EACCES); #if VM_NRESERVLEVEL > 0 if ((object->flags & OBJ_COLORED) == 0) { VM_OBJECT_WLOCK(object); vm_object_color(object, 0); VM_OBJECT_WUNLOCK(object); } #endif error = vm_page_grab_valid_unlocked(&m, object, 0, VM_ALLOC_COUNT(VM_INITIAL_PAGEIN) | VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); if (error != VM_PAGER_OK) return (EIO); imgp->firstpage = sf_buf_alloc(m, 0); imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); return (0); } void exec_unmap_first_page(struct image_params *imgp) { vm_page_t m; if (imgp->firstpage != NULL) { m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_unwire(m, PQ_ACTIVE); } } void exec_onexec_old(struct thread *td) { sigfastblock_clear(td); umtx_exec(td->td_proc); } /* * This is an optimization which removes the unmanaged shared page * mapping. In combination with pmap_remove_pages(), which cleans all * managed mappings in the process' vmspace pmap, no work will be left * for pmap_remove(min, max). */ void exec_free_abi_mappings(struct proc *p) { struct vmspace *vmspace; struct sysentvec *sv; vmspace = p->p_vmspace; if (refcount_load(&vmspace->vm_refcnt) != 1) return; sv = p->p_sysent; if (sv->sv_shared_page_obj == NULL) return; pmap_remove(vmspace_pmap(vmspace), sv->sv_shared_page_base, sv->sv_shared_page_base + sv->sv_shared_page_len); } /* * Destroy old address space, and allocate a new stack. * The new stack is only sgrowsiz large because it is grown * automatically on a page fault. */ int exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) { int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; struct thread *td = curthread; vm_object_t obj; struct rlimit rlim_stack; vm_offset_t sv_minuser, stack_addr; vm_map_t map; vm_prot_t stack_prot; u_long ssiz; imgp->vmspace_destroyed = true; imgp->sysent = sv; if (p->p_sysent->sv_onexec_old != NULL) p->p_sysent->sv_onexec_old(td); itimers_exec(p); if (sv->sv_onexec != NULL) sv->sv_onexec(p, imgp); EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp); /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted */ map = &vmspace->vm_map; if (map_at_zero) sv_minuser = sv->sv_minuser; else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (refcount_load(&vmspace->vm_refcnt) == 1 && vm_map_min(map) == sv_minuser && vm_map_max(map) == sv->sv_maxuser && cpu_exec_vmspace_reuse(p, map)) { exec_free_abi_mappings(p); shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); /* * An exec terminates mlockall(MCL_FUTURE). * ASLR and W^X states must be re-evaluated. */ vm_map_lock(map); vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR | MAP_ASLR_IGNSTART | MAP_WXORX); vm_map_unlock(map); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); if (error) return (error); vmspace = p->p_vmspace; map = &vmspace->vm_map; } map->flags |= imgp->map_flags; /* Map a shared page */ obj = sv->sv_shared_page_obj; if (obj != NULL) { vm_object_reference(obj); error = vm_map_fixed(map, obj, 0, sv->sv_shared_page_base, sv->sv_shared_page_len, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); if (error != KERN_SUCCESS) { vm_object_deallocate(obj); return (vm_mmap_to_errno(error)); } } /* Allocate a new stack */ if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack); PROC_UNLOCK(p); if (ssiz > rlim_stack.rlim_max) ssiz = rlim_stack.rlim_max; if (ssiz > rlim_stack.rlim_cur) { rlim_stack.rlim_cur = ssiz; kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack); } } else if (sv->sv_maxssiz != NULL) { ssiz = *sv->sv_maxssiz; } else { ssiz = maxssiz; } imgp->eff_stack_sz = lim_cur(curthread, RLIMIT_STACK); if (ssiz < imgp->eff_stack_sz) imgp->eff_stack_sz = ssiz; stack_addr = sv->sv_usrstack - ssiz; stack_prot = obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot; error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, stack_prot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN); if (error != KERN_SUCCESS) { uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x " "failed mach error %d errno %d\n", (uintmax_t)ssiz, stack_prot, error, vm_mmap_to_errno(error)); return (vm_mmap_to_errno(error)); } /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_maxsaddr = (char *)stack_addr; return (0); } /* * Copy out argument and environment strings from the old process address * space into the temporary string buffer. */ int exec_copyin_args(struct image_args *args, const char *fname, enum uio_seg segflg, char **argv, char **envv) { u_long arg, env; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ error = exec_args_add_fname(args, fname, segflg); if (error != 0) goto err_exit; /* * extract arguments first */ for (;;) { error = fueword(argv++, &arg); if (error == -1) { error = EFAULT; goto err_exit; } if (arg == 0) break; error = exec_args_add_arg(args, (char *)(uintptr_t)arg, UIO_USERSPACE); if (error != 0) goto err_exit; } /* * extract environment strings */ if (envv) { for (;;) { error = fueword(envv++, &env); if (error == -1) { error = EFAULT; goto err_exit; } if (env == 0) break; error = exec_args_add_env(args, (char *)(uintptr_t)env, UIO_USERSPACE); if (error != 0) goto err_exit; } } return (0); err_exit: exec_free_args(args); return (error); } int exec_copyin_data_fds(struct thread *td, struct image_args *args, const void *data, size_t datalen, const int *fds, size_t fdslen) { struct filedesc *ofdp; const char *p; int *kfds; int error; memset(args, '\0', sizeof(*args)); ofdp = td->td_proc->p_fd; if (datalen >= ARG_MAX || fdslen >= ofdp->fd_nfiles) return (E2BIG); error = exec_alloc_args(args); if (error != 0) return (error); args->begin_argv = args->buf; args->stringspace = ARG_MAX; if (datalen > 0) { /* * Argument buffer has been provided. Copy it into the * kernel as a single string and add a terminating null * byte. */ error = copyin(data, args->begin_argv, datalen); if (error != 0) goto err_exit; args->begin_argv[datalen] = '\0'; args->endp = args->begin_argv + datalen + 1; args->stringspace -= datalen + 1; /* * Traditional argument counting. Count the number of * null bytes. */ for (p = args->begin_argv; p < args->endp; ++p) if (*p == '\0') ++args->argc; } else { /* No argument buffer provided. */ args->endp = args->begin_argv; } /* Create new file descriptor table. */ kfds = malloc(fdslen * sizeof(int), M_TEMP, M_WAITOK); error = copyin(fds, kfds, fdslen * sizeof(int)); if (error != 0) { free(kfds, M_TEMP); goto err_exit; } error = fdcopy_remapped(ofdp, kfds, fdslen, &args->fdp); free(kfds, M_TEMP); if (error != 0) goto err_exit; return (0); err_exit: exec_free_args(args); return (error); } struct exec_args_kva { vm_offset_t addr; u_int gen; SLIST_ENTRY(exec_args_kva) next; }; DPCPU_DEFINE_STATIC(struct exec_args_kva *, exec_args_kva); static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist; static struct mtx exec_args_kva_mtx; static u_int exec_args_gen; static void exec_prealloc_args_kva(void *arg __unused) { struct exec_args_kva *argkva; u_int i; SLIST_INIT(&exec_args_kva_freelist); mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF); for (i = 0; i < exec_map_entries; i++) { argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK); argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size); argkva->gen = exec_args_gen; SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); } } SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL); static vm_offset_t exec_alloc_args_kva(void **cookie) { struct exec_args_kva *argkva; argkva = (void *)atomic_readandclear_ptr( (uintptr_t *)DPCPU_PTR(exec_args_kva)); if (argkva == NULL) { mtx_lock(&exec_args_kva_mtx); while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL) (void)mtx_sleep(&exec_args_kva_freelist, &exec_args_kva_mtx, 0, "execkva", 0); SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next); mtx_unlock(&exec_args_kva_mtx); } kasan_mark((void *)argkva->addr, exec_map_entry_size, exec_map_entry_size, 0); *(struct exec_args_kva **)cookie = argkva; return (argkva->addr); } static void exec_release_args_kva(struct exec_args_kva *argkva, u_int gen) { vm_offset_t base; base = argkva->addr; kasan_mark((void *)argkva->addr, 0, exec_map_entry_size, KASAN_EXEC_ARGS_FREED); if (argkva->gen != gen) { (void)vm_map_madvise(exec_map, base, base + exec_map_entry_size, MADV_FREE); argkva->gen = gen; } if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva), (uintptr_t)NULL, (uintptr_t)argkva)) { mtx_lock(&exec_args_kva_mtx); SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); wakeup_one(&exec_args_kva_freelist); mtx_unlock(&exec_args_kva_mtx); } } static void exec_free_args_kva(void *cookie) { exec_release_args_kva(cookie, exec_args_gen); } static void exec_args_kva_lowmem(void *arg __unused) { SLIST_HEAD(, exec_args_kva) head; struct exec_args_kva *argkva; u_int gen; int i; gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1; /* * Force an madvise of each KVA range. Any currently allocated ranges * will have MADV_FREE applied once they are freed. */ SLIST_INIT(&head); mtx_lock(&exec_args_kva_mtx); SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva); mtx_unlock(&exec_args_kva_mtx); while ((argkva = SLIST_FIRST(&head)) != NULL) { SLIST_REMOVE_HEAD(&head, next); exec_release_args_kva(argkva, gen); } CPU_FOREACH(i) { argkva = (void *)atomic_readandclear_ptr( (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva)); if (argkva != NULL) exec_release_args_kva(argkva, gen); } } EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL, EVENTHANDLER_PRI_ANY); /* * Allocate temporary demand-paged, zero-filled memory for the file name, * argument, and environment strings. */ int exec_alloc_args(struct image_args *args) { args->buf = (char *)exec_alloc_args_kva(&args->bufkva); return (0); } void exec_free_args(struct image_args *args) { if (args->buf != NULL) { exec_free_args_kva(args->bufkva); args->buf = NULL; } if (args->fname_buf != NULL) { free(args->fname_buf, M_TEMP); args->fname_buf = NULL; } if (args->fdp != NULL) fdescfree_remapped(args->fdp); } /* * A set to functions to fill struct image args. * * NOTE: exec_args_add_fname() must be called (possibly with a NULL * fname) before the other functions. All exec_args_add_arg() calls must * be made before any exec_args_add_env() calls. exec_args_adjust_args() * may be called any time after exec_args_add_fname(). * * exec_args_add_fname() - install path to be executed * exec_args_add_arg() - append an argument string * exec_args_add_env() - append an env string * exec_args_adjust_args() - adjust location of the argument list to * allow new arguments to be prepended */ int exec_args_add_fname(struct image_args *args, const char *fname, enum uio_seg segflg) { int error; size_t length; KASSERT(args->fname == NULL, ("fname already appended")); KASSERT(args->endp == NULL, ("already appending to args")); if (fname != NULL) { args->fname = args->buf; error = segflg == UIO_SYSSPACE ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) return (error == ENAMETOOLONG ? E2BIG : error); } else length = 0; /* Set up for _arg_*()/_env_*() */ args->endp = args->buf + length; /* begin_argv must be set and kept updated */ args->begin_argv = args->endp; KASSERT(exec_map_entry_size - length >= ARG_MAX, ("too little space remaining for arguments %zu < %zu", exec_map_entry_size - length, (size_t)ARG_MAX)); args->stringspace = ARG_MAX; return (0); } static int exec_args_add_str(struct image_args *args, const char *str, enum uio_seg segflg, int *countp) { int error; size_t length; KASSERT(args->endp != NULL, ("endp not initialized")); KASSERT(args->begin_argv != NULL, ("begin_argp not initialized")); error = (segflg == UIO_SYSSPACE) ? copystr(str, args->endp, args->stringspace, &length) : copyinstr(str, args->endp, args->stringspace, &length); if (error != 0) return (error == ENAMETOOLONG ? E2BIG : error); args->stringspace -= length; args->endp += length; (*countp)++; return (0); } int exec_args_add_arg(struct image_args *args, const char *argp, enum uio_seg segflg) { KASSERT(args->envc == 0, ("appending args after env")); return (exec_args_add_str(args, argp, segflg, &args->argc)); } int exec_args_add_env(struct image_args *args, const char *envp, enum uio_seg segflg) { if (args->envc == 0) args->begin_envv = args->endp; return (exec_args_add_str(args, envp, segflg, &args->envc)); } int exec_args_adjust_args(struct image_args *args, size_t consume, ssize_t extend) { ssize_t offset; KASSERT(args->endp != NULL, ("endp not initialized")); KASSERT(args->begin_argv != NULL, ("begin_argp not initialized")); offset = extend - consume; if (args->stringspace < offset) return (E2BIG); memmove(args->begin_argv + extend, args->begin_argv + consume, args->endp - args->begin_argv + consume); if (args->envc > 0) args->begin_envv += offset; args->endp += offset; args->stringspace -= offset; return (0); } char * exec_args_get_begin_envv(struct image_args *args) { KASSERT(args->endp != NULL, ("endp not initialized")); if (args->envc > 0) return (args->begin_envv); return (args->endp); } void exec_stackgap(struct image_params *imgp, uintptr_t *dp) { if (imgp->sysent->sv_stackgap == NULL || (imgp->proc->p_fctl0 & (NT_FREEBSD_FCTL_ASLR_DISABLE | NT_FREEBSD_FCTL_ASG_DISABLE)) != 0 || (imgp->map_flags & MAP_ASLR) == 0) return; imgp->sysent->sv_stackgap(imgp, dp); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ int exec_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc; char **vectp; char *stringp; uintptr_t destp, ustringp; struct ps_strings *arginfo; struct proc *p; size_t execpath_len; int error, szsigcode, szps; char canary[sizeof(long) * 8]; szps = sizeof(pagesizes[0]) * MAXPAGESIZES; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; szsigcode = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; imgp->ps_strings = arginfo; if (p->p_sysent->sv_sigcode_base == 0) { if (p->p_sysent->sv_szsigcode != NULL) szsigcode = *(p->p_sysent->sv_szsigcode); } destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(void *)); error = copyout(p->p_sysent->sv_sigcode, (void *)destp, szsigcode); if (error != 0) return (error); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; destp = rounddown2(destp, sizeof(void *)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ destp -= szps; destp = rounddown2(destp, sizeof(void *)); imgp->pagesizes = (void *)destp; error = copyout(pagesizes, imgp->pagesizes, szps); if (error != 0) return (error); imgp->pagesizeslen = szps; /* * Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); ustringp = destp; exec_stackgap(imgp, &destp); if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has up to AT_COUNT entries. */ destp -= AT_COUNT * sizeof(Elf_Auxinfo); destp = rounddown2(destp, sizeof(void *)); } vectp = (char **)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* * vectp also becomes our initial stack base */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* * Fill in "ps_strings" struct for ps, w, etc. */ imgp->argv = vectp; if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* a null vector table pointer separates the argp's from the envp's */ if (suword(vectp++, 0) != 0) return (EFAULT); imgp->envv = vectp; if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* end of vector table is a null pointer */ if (suword(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } /* * Check permissions of file to execute. * Called with imgp->vp locked. * Return 0 for success or error code on failure. */ int exec_check_permissions(struct image_params *imgp) { struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; struct thread *td; int error; td = curthread; /* Get file attributes */ error = VOP_GETATTR(vp, attr, td->td_ucred); if (error) return (error); #ifdef MAC error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); if (error) return (error); #endif /* * 1) Check if file execution is disabled for the filesystem that * this file resides on. * 2) Ensure that at least one execute bit is on. Otherwise, a * privileged user will always succeed, and we don't want this * to happen unless the file really is executable. * 3) Ensure that the file is a regular file. */ if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || (attr->va_type != VREG)) return (EACCES); /* * Zero length files can't be exec'd */ if (attr->va_size == 0) return (ENOEXEC); /* * Check for execute permission to file based on current credentials. */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) return (error); /* * Check number of open-for-writes on the file and deny execution * if there are any. * * Add a text reference now so no one can write to the * executable while we're activating it. * * Remember if this was set before and unset it in case this is not * actually an executable image. */ error = VOP_SET_TEXT(vp); if (error != 0) return (error); imgp->textset = true; /* * Call filesystem specific open routine (which does nothing in the * general case). */ error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error == 0) imgp->opened = true; return (error); } /* * Exec handler registration */ int exec_register(const struct execsw *execsw_arg) { const struct execsw **es, **xs, **newexecsw; u_int count = 2; /* New slot and trailing NULL */ if (execsw) for (es = execsw; *es; es++) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; if (execsw) for (es = execsw; *es; es++) *xs++ = *es; *xs++ = execsw_arg; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } int exec_unregister(const struct execsw *execsw_arg) { const struct execsw **es, **xs, **newexecsw; int count = 1; if (execsw == NULL) panic("unregister with no handlers left?\n"); for (es = execsw; *es; es++) { if (*es == execsw_arg) break; } if (*es == NULL) return (ENOENT); for (es = execsw; *es; es++) if (*es != execsw_arg) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; for (es = execsw; *es; es++) if (*es != execsw_arg) *xs++ = *es; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 0b3265cac6f1..226cd5991261 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,1439 +1,1440 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for acct_process() function prototype */ #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exit; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, , , exit, "int"); static int kern_kill_on_dbg_exit = 1; SYSCTL_INT(_kern, OID_AUTO, kill_on_debugger_exit, CTLFLAG_RWTUN, &kern_kill_on_dbg_exit, 0, "Kill ptraced processes when debugger exits"); static bool kern_wait_dequeue_sigchld = 1; SYSCTL_BOOL(_kern, OID_AUTO, wait_dequeue_sigchld, CTLFLAG_RWTUN, &kern_wait_dequeue_sigchld, 0, "Dequeue SIGCHLD on wait(2) for live process"); struct proc * proc_realparent(struct proc *child) { struct proc *p, *parent; sx_assert(&proctree_lock, SX_LOCKED); if ((child->p_treeflag & P_TREE_ORPHANED) == 0) return (child->p_pptr->p_pid == child->p_oppid ? child->p_pptr : child->p_reaper); for (p = child; (p->p_treeflag & P_TREE_FIRST_ORPHAN) == 0;) { /* Cannot use LIST_PREV(), since the list head is not known. */ p = __containerof(p->p_orphan.le_prev, struct proc, p_orphan.le_next); KASSERT((p->p_treeflag & P_TREE_ORPHANED) != 0, ("missing P_ORPHAN %p", p)); } parent = __containerof(p->p_orphan.le_prev, struct proc, p_orphans.lh_first); return (parent); } void reaper_abandon_children(struct proc *p, bool exiting) { struct proc *p1, *p2, *ptmp; sx_assert(&proctree_lock, SX_LOCKED); KASSERT(p != initproc, ("reaper_abandon_children for initproc")); if ((p->p_treeflag & P_TREE_REAPER) == 0) return; p1 = p->p_reaper; LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) { LIST_REMOVE(p2, p_reapsibling); p2->p_reaper = p1; p2->p_reapsubtree = p->p_reapsubtree; LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling); if (exiting && p2->p_pptr == p) { PROC_LOCK(p2); proc_reparent(p2, p1, true); PROC_UNLOCK(p2); } } KASSERT(LIST_EMPTY(&p->p_reaplist), ("p_reaplist not empty")); p->p_treeflag &= ~P_TREE_REAPER; } static void reaper_clear(struct proc *p) { struct proc *p1; bool clear; sx_assert(&proctree_lock, SX_LOCKED); LIST_REMOVE(p, p_reapsibling); if (p->p_reapsubtree == 1) return; clear = true; LIST_FOREACH(p1, &p->p_reaper->p_reaplist, p_reapsibling) { if (p1->p_reapsubtree == p->p_reapsubtree) { clear = false; break; } } if (clear) proc_id_clear(PROC_ID_REAP, p->p_reapsubtree); } void proc_clear_orphan(struct proc *p) { struct proc *p1; sx_assert(&proctree_lock, SA_XLOCKED); if ((p->p_treeflag & P_TREE_ORPHANED) == 0) return; if ((p->p_treeflag & P_TREE_FIRST_ORPHAN) != 0) { p1 = LIST_NEXT(p, p_orphan); if (p1 != NULL) p1->p_treeflag |= P_TREE_FIRST_ORPHAN; p->p_treeflag &= ~P_TREE_FIRST_ORPHAN; } LIST_REMOVE(p, p_orphan); p->p_treeflag &= ~P_TREE_ORPHANED; } void exit_onexit(struct proc *p) { MPASS(p->p_numthreads == 1); umtx_thread_exit(FIRST_THREAD_IN_PROC(p)); } /* * exit -- death of process. */ void sys_sys_exit(struct thread *td, struct sys_exit_args *uap) { exit1(td, uap->rval, 0); /* NOTREACHED */ } /* * Exit: deallocate address space and other resources, change proc state to * zombie, and unlink proc from allproc and parent's lists. Save exit status * and rusage for wait(). Check for child processes and orphan them. */ void exit1(struct thread *td, int rval, int signo) { struct proc *p, *nq, *q, *t; struct thread *tdt; ksiginfo_t *ksi, *ksi1; int signal_parent; mtx_assert(&Giant, MA_NOTOWNED); KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo)); + TSPROCEXIT(td->td_proc->p_pid); p = td->td_proc; /* * XXX in case we're rebooting we just let init die in order to * work around an unsolved stack overflow seen very late during * shutdown on sparc64 when the gmirror worker process exists. * XXX what to do now that sparc64 is gone... remove if? */ if (p == initproc && rebooting == 0) { printf("init died (signal %d, exit %d)\n", signo, rval); panic("Going nowhere without my init!"); } /* * Deref SU mp, since the thread does not return to userspace. */ td_softdep_cleanup(td); /* * MUST abort all other threads before proceeding past here. */ PROC_LOCK(p); /* * First check if some other thread or external request got * here before us. If so, act appropriately: exit or suspend. * We must ensure that stop requests are handled before we set * P_WEXIT. */ thread_suspend_check(0); while (p->p_flag & P_HADTHREADS) { /* * Kill off the other threads. This requires * some co-operation from other parts of the kernel * so it may not be instantaneous. With this state set * any thread entering the kernel from userspace will * thread_exit() in trap(). Any thread attempting to * sleep will return immediately with EINTR or EWOULDBLOCK * which will hopefully force them to back out to userland * freeing resources as they go. Any thread attempting * to return to userland will thread_exit() from userret(). * thread_exit() will unsuspend us when the last of the * other threads exits. * If there is already a thread singler after resumption, * calling thread_single will fail; in that case, we just * re-check all suspension request, the thread should * either be suspended there or exit. */ if (!thread_single(p, SINGLE_EXIT)) /* * All other activity in this process is now * stopped. Threading support has been turned * off. */ break; /* * Recheck for new stop or suspend requests which * might appear while process lock was dropped in * thread_single(). */ thread_suspend_check(0); } KASSERT(p->p_numthreads == 1, ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); racct_sub(p, RACCT_NTHR, 1); /* Let event handler change exit status */ p->p_xexit = rval; p->p_xsig = signo; /* * Ignore any pending request to stop due to a stop signal. * Once P_WEXIT is set, future requests will be ignored as * well. */ p->p_flag &= ~P_STOPPED_SIG; KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped")); /* Note that we are exiting. */ p->p_flag |= P_WEXIT; /* * Wait for any processes that have a hold on our vmspace to * release their reference. */ while (p->p_lock > 0) msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); PROC_UNLOCK(p); /* Drain the limit callout while we don't have the proc locked */ callout_drain(&p->p_limco); #ifdef AUDIT /* * The Sun BSM exit token contains two components: an exit status as * passed to exit(), and a return value to indicate what sort of exit * it was. The exit status is WEXITSTATUS(rv), but it's not clear * what the return value is. */ AUDIT_ARG_EXIT(rval, 0); AUDIT_SYSCALL_EXIT(0, td); #endif /* Are we a task leader with peers? */ if (p->p_peers != NULL && p == p->p_leader) { mtx_lock(&ppeers_lock); q = p->p_peers; while (q != NULL) { PROC_LOCK(q); kern_psignal(q, SIGKILL); PROC_UNLOCK(q); q = q->p_peers; } while (p->p_peers != NULL) msleep(p, &ppeers_lock, PWAIT, "exit1", 0); mtx_unlock(&ppeers_lock); } itimers_exit(p); /* * Check if any loadable modules need anything done at process exit. * E.g. SYSV IPC stuff. * Event handler could change exit status. * XXX what if one of these generates an error? */ EVENTHANDLER_DIRECT_INVOKE(process_exit, p); /* * If parent is waiting for us to exit or exec, * P_PPWAIT is set; we will wakeup the parent below. */ PROC_LOCK(p); stopprofclock(p); p->p_ptevents = 0; /* * Stop the real interval timer. If the handler is currently * executing, prevent it from rearming itself and let it finish. */ if (timevalisset(&p->p_realtimer.it_value) && _callout_stop_safe(&p->p_itcallout, CS_EXECUTING, NULL) == 0) { timevalclear(&p->p_realtimer.it_interval); msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); KASSERT(!timevalisset(&p->p_realtimer.it_value), ("realtime timer is still armed")); } PROC_UNLOCK(p); if (p->p_sysent->sv_onexit != NULL) p->p_sysent->sv_onexit(p); seltdfini(td); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pid. The P_WEXIT flag interlocks with fsetown(). */ funsetownlst(&p->p_sigiolst); /* * Close open files and release open-file table. * This may block! */ pdescfree(td); fdescfree(td); /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Remove ourself from our leader's peer list and wake our leader. */ if (p->p_leader->p_peers != NULL) { mtx_lock(&ppeers_lock); if (p->p_leader->p_peers != NULL) { q = p->p_leader; while (q->p_peers != p) q = q->p_peers; q->p_peers = p->p_peers; wakeup(p->p_leader); } mtx_unlock(&ppeers_lock); } exec_free_abi_mappings(p); vmspace_exit(td); (void)acct_process(td); #ifdef KTRACE ktrprocexit(td); #endif /* * Release reference to text vnode etc */ if (p->p_textvp != NULL) { vrele(p->p_textvp); p->p_textvp = NULL; } if (p->p_textdvp != NULL) { vrele(p->p_textdvp); p->p_textdvp = NULL; } if (p->p_binname != NULL) { free(p->p_binname, M_PARGS); p->p_binname = NULL; } /* * Release our limits structure. */ lim_free(p->p_limit); p->p_limit = NULL; tidhash_remove(td); /* * Call machine-dependent code to release any * machine-dependent resources other than the address space. * The address space is released by "vmspace_exitfree(p)" in * vm_waitproc(). */ cpu_exit(td); WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid); /* * Remove from allproc. It still sits in the hash. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); #ifdef DDB /* * Used by ddb's 'ps' command to find this process via the * pidhash. */ p->p_list.le_prev = NULL; #endif sx_xunlock(&allproc_lock); sx_xlock(&proctree_lock); PROC_LOCK(p); p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE); PROC_UNLOCK(p); /* * killjobc() might drop and re-acquire proctree_lock to * revoke control tty if exiting process was a session leader. */ killjobc(); /* * Reparent all children processes: * - traced ones to the original parent (or init if we are that parent) * - the rest to init */ q = LIST_FIRST(&p->p_children); if (q != NULL) /* only need this if any child is S_ZOMB */ wakeup(q->p_reaper); for (; q != NULL; q = nq) { nq = LIST_NEXT(q, p_sibling); ksi = ksiginfo_alloc(TRUE); PROC_LOCK(q); q->p_sigparent = SIGCHLD; if ((q->p_flag & P_TRACED) == 0) { proc_reparent(q, q->p_reaper, true); if (q->p_state == PRS_ZOMBIE) { /* * Inform reaper about the reparented * zombie, since wait(2) has something * new to report. Guarantee queueing * of the SIGCHLD signal, similar to * the _exit() behaviour, by providing * our ksiginfo. Ksi is freed by the * signal delivery. */ if (q->p_ksi == NULL) { ksi1 = NULL; } else { ksiginfo_copy(q->p_ksi, ksi); ksi->ksi_flags |= KSI_INS; ksi1 = ksi; ksi = NULL; } PROC_LOCK(q->p_reaper); pksignal(q->p_reaper, SIGCHLD, ksi1); PROC_UNLOCK(q->p_reaper); } else if (q->p_pdeathsig > 0) { /* * The child asked to received a signal * when we exit. */ kern_psignal(q, q->p_pdeathsig); } } else { /* * Traced processes are killed by default * since their existence means someone is * screwing up. */ t = proc_realparent(q); if (t == p) { proc_reparent(q, q->p_reaper, true); } else { PROC_LOCK(t); proc_reparent(q, t, true); PROC_UNLOCK(t); } /* * Since q was found on our children list, the * proc_reparent() call moved q to the orphan * list due to present P_TRACED flag. Clear * orphan link for q now while q is locked. */ proc_clear_orphan(q); q->p_flag &= ~P_TRACED; q->p_flag2 &= ~P2_PTRACE_FSTP; q->p_ptevents = 0; p->p_xthread = NULL; FOREACH_THREAD_IN_PROC(q, tdt) { tdt->td_dbgflags &= ~(TDB_SUSPEND | TDB_XSIG | TDB_FSTP); tdt->td_xsig = 0; } if (kern_kill_on_dbg_exit) { q->p_flag &= ~P_STOPPED_TRACE; kern_psignal(q, SIGKILL); } else if ((q->p_flag & (P_STOPPED_TRACE | P_STOPPED_SIG)) != 0) { sigqueue_delete_proc(q, SIGTRAP); ptrace_unsuspend(q); } } PROC_UNLOCK(q); if (ksi != NULL) ksiginfo_free(ksi); } /* * Also get rid of our orphans. */ while ((q = LIST_FIRST(&p->p_orphans)) != NULL) { PROC_LOCK(q); KASSERT(q->p_oppid == p->p_pid, ("orphan %p of %p has unexpected oppid %d", q, p, q->p_oppid)); q->p_oppid = q->p_reaper->p_pid; /* * If we are the real parent of this process * but it has been reparented to a debugger, then * check if it asked for a signal when we exit. */ if (q->p_pdeathsig > 0) kern_psignal(q, q->p_pdeathsig); CTR2(KTR_PTRACE, "exit: pid %d, clearing orphan %d", p->p_pid, q->p_pid); proc_clear_orphan(q); PROC_UNLOCK(q); } #ifdef KDTRACE_HOOKS if (SDT_PROBES_ENABLED()) { int reason = CLD_EXITED; if (WCOREDUMP(signo)) reason = CLD_DUMPED; else if (WIFSIGNALED(signo)) reason = CLD_KILLED; SDT_PROBE1(proc, , , exit, reason); } #endif /* Save exit status. */ PROC_LOCK(p); p->p_xthread = td; if (p->p_sysent->sv_ontdexit != NULL) p->p_sysent->sv_ontdexit(td); #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exit if it * has declared an interest. */ if (dtrace_fasttrap_exit) dtrace_fasttrap_exit(p); #endif /* * Notify interested parties of our demise. */ KNOTE_LOCKED(p->p_klist, NOTE_EXIT); /* * If this is a process with a descriptor, we may not need to deliver * a signal to the parent. proctree_lock is held over * procdesc_exit() to serialize concurrent calls to close() and * exit(). */ signal_parent = 0; if (p->p_procdesc == NULL || procdesc_exit(p)) { /* * Notify parent that we're gone. If parent has the * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN, * notify process 1 instead (and hope it will handle this * situation). */ PROC_LOCK(p->p_pptr); mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { struct proc *pp; mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); pp = p->p_pptr; PROC_UNLOCK(pp); proc_reparent(p, p->p_reaper, true); p->p_sigparent = SIGCHLD; PROC_LOCK(p->p_pptr); /* * Notify parent, so in case he was wait(2)ing or * executing waitpid(2) with our pid, he will * continue. */ wakeup(pp); } else mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr == p->p_reaper || p->p_pptr == initproc) { signal_parent = 1; } else if (p->p_sigparent != 0) { if (p->p_sigparent == SIGCHLD) { signal_parent = 1; } else { /* LINUX thread */ signal_parent = 2; } } } else PROC_LOCK(p->p_pptr); sx_xunlock(&proctree_lock); if (signal_parent == 1) { childproc_exited(p); } else if (signal_parent == 2) { kern_psignal(p->p_pptr, p->p_sigparent); } /* Tell the prison that we are gone. */ prison_proc_free(p->p_ucred->cr_prison); /* * The state PRS_ZOMBIE prevents other proesses from sending * signal to the process, to avoid memory leak, we free memory * for signal queue at the time when the state is set. */ sigqueue_flush(&p->p_sigqueue); sigqueue_flush(&td->td_sigqueue); /* * We have to wait until after acquiring all locks before * changing p_state. We need to avoid all possible context * switches (including ones from blocking on a mutex) while * marked as a zombie. We also have to set the zombie state * before we release the parent process' proc lock to avoid * a lost wakeup. So, we first call wakeup, then we grab the * sched lock, update the state, and release the parent process' * proc lock. */ wakeup(p->p_pptr); cv_broadcast(&p->p_pwait); sched_exit(p->p_pptr, td); PROC_SLOCK(p); p->p_state = PRS_ZOMBIE; PROC_UNLOCK(p->p_pptr); /* * Save our children's rusage information in our exit rusage. */ PROC_STATLOCK(p); ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); PROC_STATUNLOCK(p); /* * Make sure the scheduler takes this thread out of its tables etc. * This will also release this thread's reference to the ucred. * Other thread parts to release include pcb bits and such. */ thread_exit(); } #ifndef _SYS_SYSPROTO_H_ struct abort2_args { char *why; int nargs; void **args; }; #endif int sys_abort2(struct thread *td, struct abort2_args *uap) { struct proc *p = td->td_proc; struct sbuf *sb; void *uargs[16]; int error, i, sig; /* * Do it right now so we can log either proper call of abort2(), or * note, that invalid argument was passed. 512 is big enough to * handle 16 arguments' descriptions with additional comments. */ sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN); sbuf_clear(sb); sbuf_printf(sb, "%s(pid %d uid %d) aborted: ", p->p_comm, p->p_pid, td->td_ucred->cr_uid); /* * Since we can't return from abort2(), send SIGKILL in cases, where * abort2() was called improperly */ sig = SIGKILL; /* Prevent from DoSes from user-space. */ if (uap->nargs < 0 || uap->nargs > 16) goto out; if (uap->nargs > 0) { if (uap->args == NULL) goto out; error = copyin(uap->args, uargs, uap->nargs * sizeof(void *)); if (error != 0) goto out; } /* * Limit size of 'reason' string to 128. Will fit even when * maximal number of arguments was chosen to be logged. */ if (uap->why != NULL) { error = sbuf_copyin(sb, uap->why, 128); if (error < 0) goto out; } else { sbuf_printf(sb, "(null)"); } if (uap->nargs > 0) { sbuf_printf(sb, "("); for (i = 0;i < uap->nargs; i++) sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]); sbuf_printf(sb, ")"); } /* * Final stage: arguments were proper, string has been * successfully copied from userspace, and copying pointers * from user-space succeed. */ sig = SIGABRT; out: if (sig == SIGKILL) { sbuf_trim(sb); sbuf_printf(sb, " (Reason text inaccessible)"); } sbuf_cat(sb, "\n"); sbuf_finish(sb); log(LOG_INFO, "%s", sbuf_data(sb)); sbuf_delete(sb); exit1(td, 0, sig); return (0); } #ifdef COMPAT_43 /* * The dirty work is handled by kern_wait(). */ int owait(struct thread *td, struct owait_args *uap __unused) { int error, status; error = kern_wait(td, WAIT_ANY, &status, 0, NULL); if (error == 0) td->td_retval[1] = status; return (error); } #endif /* COMPAT_43 */ /* * The dirty work is handled by kern_wait(). */ int sys_wait4(struct thread *td, struct wait4_args *uap) { struct rusage ru, *rup; int error, status; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (uap->status != NULL && error == 0 && td->td_retval[0] != 0) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0 && td->td_retval[0] != 0) error = copyout(&ru, uap->rusage, sizeof(struct rusage)); return (error); } int sys_wait6(struct thread *td, struct wait6_args *uap) { struct __wrusage wru, *wrup; siginfo_t si, *sip; idtype_t idtype; id_t id; int error, status; idtype = uap->idtype; id = uap->id; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; /* * We expect all callers of wait6() to know about WEXITED and * WTRAPPED. */ error = kern_wait6(td, idtype, id, &status, uap->options, wrup, sip); if (uap->status != NULL && error == 0 && td->td_retval[0] != 0) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0 && td->td_retval[0] != 0) error = copyout(&wru, uap->wrusage, sizeof(wru)); if (uap->info != NULL && error == 0) error = copyout(&si, uap->info, sizeof(si)); return (error); } /* * Reap the remains of a zombie process and optionally return status and * rusage. Asserts and will release both the proctree_lock and the process * lock as part of its work. */ void proc_reap(struct thread *td, struct proc *p, int *status, int options) { struct proc *q, *t; sx_assert(&proctree_lock, SA_XLOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(p->p_state == PRS_ZOMBIE, ("proc_reap: !PRS_ZOMBIE")); mtx_spin_wait_unlocked(&p->p_slock); q = td->td_proc; if (status) *status = KW_EXITCODE(p->p_xexit, p->p_xsig); if (options & WNOWAIT) { /* * Only poll, returning the status. Caller does not wish to * release the proc struct just yet. */ PROC_UNLOCK(p); sx_xunlock(&proctree_lock); return; } PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); /* * If we got the child via a ptrace 'attach', we need to give it back * to the old parent. */ if (p->p_oppid != p->p_pptr->p_pid) { PROC_UNLOCK(p); t = proc_realparent(p); PROC_LOCK(t); PROC_LOCK(p); CTR2(KTR_PTRACE, "wait: traced child %d moved back to parent %d", p->p_pid, t->p_pid); proc_reparent(p, t, false); PROC_UNLOCK(p); pksignal(t, SIGCHLD, p->p_ksi); wakeup(t); cv_broadcast(&p->p_pwait); PROC_UNLOCK(t); sx_xunlock(&proctree_lock); return; } PROC_UNLOCK(p); /* * Remove other references to this process to ensure we have an * exclusive reference. */ sx_xlock(PIDHASHLOCK(p->p_pid)); LIST_REMOVE(p, p_hash); sx_xunlock(PIDHASHLOCK(p->p_pid)); LIST_REMOVE(p, p_sibling); reaper_abandon_children(p, true); reaper_clear(p); PROC_LOCK(p); proc_clear_orphan(p); PROC_UNLOCK(p); leavepgrp(p); if (p->p_procdesc != NULL) procdesc_reap(p); sx_xunlock(&proctree_lock); proc_id_clear(PROC_ID_PID, p->p_pid); PROC_LOCK(p); knlist_detach(p->p_klist); p->p_klist = NULL; PROC_UNLOCK(p); /* * Removal from allproc list and process group list paired with * PROC_LOCK which was executed during that time should guarantee * nothing can reach this process anymore. As such further locking * is unnecessary. */ p->p_xexit = p->p_xsig = 0; /* XXX: why? */ PROC_LOCK(q); ruadd(&q->p_stats->p_cru, &q->p_crux, &p->p_ru, &p->p_rux); PROC_UNLOCK(q); /* * Decrement the count of procs running with this uid. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0); /* * Destroy resource accounting information associated with the process. */ #ifdef RACCT if (racct_enable) { PROC_LOCK(p); racct_sub(p, RACCT_NPROC, 1); PROC_UNLOCK(p); } #endif racct_proc_exit(p); /* * Free credentials, arguments, and sigacts. */ proc_unset_cred(p); pargs_drop(p->p_args); p->p_args = NULL; sigacts_free(p->p_sigacts); p->p_sigacts = NULL; /* * Do any thread-system specific cleanups. */ thread_wait(p); /* * Give vm and machine-dependent layer a chance to free anything that * cpu_exit couldn't release while still running in process context. */ vm_waitproc(p); #ifdef MAC mac_proc_destroy(p); #endif KASSERT(FIRST_THREAD_IN_PROC(p), ("proc_reap: no residual thread!")); uma_zfree(proc_zone, p); atomic_add_int(&nprocs, -1); } static int proc_to_reap(struct thread *td, struct proc *p, idtype_t idtype, id_t id, int *status, int options, struct __wrusage *wrusage, siginfo_t *siginfo, int check_only) { struct rusage *rup; sx_assert(&proctree_lock, SA_XLOCKED); PROC_LOCK(p); switch (idtype) { case P_ALL: if (p->p_procdesc == NULL || (p->p_pptr == td->td_proc && (p->p_flag & P_TRACED) != 0)) { break; } PROC_UNLOCK(p); return (0); case P_PID: if (p->p_pid != (pid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_PGID: if (p->p_pgid != (pid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_SID: if (p->p_session->s_sid != (pid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_UID: if (p->p_ucred->cr_uid != (uid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_GID: if (p->p_ucred->cr_gid != (gid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_JAILID: if (p->p_ucred->cr_prison->pr_id != (int)id) { PROC_UNLOCK(p); return (0); } break; /* * It seems that the thread structures get zeroed out * at process exit. This makes it impossible to * support P_SETID, P_CID or P_CPUID. */ default: PROC_UNLOCK(p); return (0); } if (p_canwait(td, p)) { PROC_UNLOCK(p); return (0); } if (((options & WEXITED) == 0) && (p->p_state == PRS_ZOMBIE)) { PROC_UNLOCK(p); return (0); } /* * This special case handles a kthread spawned by linux_clone * (see linux_misc.c). The linux_wait4 and linux_waitpid * functions need to be able to distinguish between waiting * on a process and waiting on a thread. It is a thread if * p_sigparent is not SIGCHLD, and the WLINUXCLONE option * signifies we want to wait for threads and not processes. */ if ((p->p_sigparent != SIGCHLD) ^ ((options & WLINUXCLONE) != 0)) { PROC_UNLOCK(p); return (0); } if (siginfo != NULL) { bzero(siginfo, sizeof(*siginfo)); siginfo->si_errno = 0; /* * SUSv4 requires that the si_signo value is always * SIGCHLD. Obey it despite the rfork(2) interface * allows to request other signal for child exit * notification. */ siginfo->si_signo = SIGCHLD; /* * This is still a rough estimate. We will fix the * cases TRAPPED, STOPPED, and CONTINUED later. */ if (WCOREDUMP(p->p_xsig)) { siginfo->si_code = CLD_DUMPED; siginfo->si_status = WTERMSIG(p->p_xsig); } else if (WIFSIGNALED(p->p_xsig)) { siginfo->si_code = CLD_KILLED; siginfo->si_status = WTERMSIG(p->p_xsig); } else { siginfo->si_code = CLD_EXITED; siginfo->si_status = p->p_xexit; } siginfo->si_pid = p->p_pid; siginfo->si_uid = p->p_ucred->cr_uid; /* * The si_addr field would be useful additional * detail, but apparently the PC value may be lost * when we reach this point. bzero() above sets * siginfo->si_addr to NULL. */ } /* * There should be no reason to limit resources usage info to * exited processes only. A snapshot about any resources used * by a stopped process may be exactly what is needed. */ if (wrusage != NULL) { rup = &wrusage->wru_self; *rup = p->p_ru; PROC_STATLOCK(p); calcru(p, &rup->ru_utime, &rup->ru_stime); PROC_STATUNLOCK(p); rup = &wrusage->wru_children; *rup = p->p_stats->p_cru; calccru(p, &rup->ru_utime, &rup->ru_stime); } if (p->p_state == PRS_ZOMBIE && !check_only) { proc_reap(td, p, status, options); return (-1); } return (1); } int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rusage) { struct __wrusage wru, *wrup; idtype_t idtype; id_t id; int ret; /* * Translate the special pid values into the (idtype, pid) * pair for kern_wait6. The WAIT_MYPGRP case is handled by * kern_wait6() on its own. */ if (pid == WAIT_ANY) { idtype = P_ALL; id = 0; } else if (pid < 0) { idtype = P_PGID; id = (id_t)-pid; } else { idtype = P_PID; id = (id_t)pid; } if (rusage != NULL) wrup = &wru; else wrup = NULL; /* * For backward compatibility we implicitly add flags WEXITED * and WTRAPPED here. */ options |= WEXITED | WTRAPPED; ret = kern_wait6(td, idtype, id, status, options, wrup, NULL); if (rusage != NULL) *rusage = wru.wru_self; return (ret); } static void report_alive_proc(struct thread *td, struct proc *p, siginfo_t *siginfo, int *status, int options, int si_code) { bool cont; PROC_LOCK_ASSERT(p, MA_OWNED); sx_assert(&proctree_lock, SA_XLOCKED); MPASS(si_code == CLD_TRAPPED || si_code == CLD_STOPPED || si_code == CLD_CONTINUED); cont = si_code == CLD_CONTINUED; if ((options & WNOWAIT) == 0) { if (cont) p->p_flag &= ~P_CONTINUED; else p->p_flag |= P_WAITED; if (kern_wait_dequeue_sigchld && (td->td_proc->p_sysent->sv_flags & SV_SIG_WAITNDQ) == 0) { PROC_LOCK(td->td_proc); sigqueue_take(p->p_ksi); PROC_UNLOCK(td->td_proc); } } sx_xunlock(&proctree_lock); if (siginfo != NULL) { siginfo->si_code = si_code; siginfo->si_status = cont ? SIGCONT : p->p_xsig; } if (status != NULL) *status = cont ? SIGCONT : W_STOPCODE(p->p_xsig); PROC_UNLOCK(p); td->td_retval[0] = p->p_pid; } int kern_wait6(struct thread *td, idtype_t idtype, id_t id, int *status, int options, struct __wrusage *wrusage, siginfo_t *siginfo) { struct proc *p, *q; pid_t pid; int error, nfound, ret; bool report; AUDIT_ARG_VALUE((int)idtype); /* XXX - This is likely wrong! */ AUDIT_ARG_PID((pid_t)id); /* XXX - This may be wrong! */ AUDIT_ARG_VALUE(options); q = td->td_proc; if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) { PROC_LOCK(q); id = (id_t)q->p_pgid; PROC_UNLOCK(q); idtype = P_PGID; } /* If we don't know the option, just return. */ if ((options & ~(WUNTRACED | WNOHANG | WCONTINUED | WNOWAIT | WEXITED | WTRAPPED | WLINUXCLONE)) != 0) return (EINVAL); if ((options & (WEXITED | WUNTRACED | WCONTINUED | WTRAPPED)) == 0) { /* * We will be unable to find any matching processes, * because there are no known events to look for. * Prefer to return error instead of blocking * indefinitely. */ return (EINVAL); } loop: if (q->p_flag & P_STATCHILD) { PROC_LOCK(q); q->p_flag &= ~P_STATCHILD; PROC_UNLOCK(q); } sx_xlock(&proctree_lock); loop_locked: nfound = 0; LIST_FOREACH(p, &q->p_children, p_sibling) { pid = p->p_pid; ret = proc_to_reap(td, p, idtype, id, status, options, wrusage, siginfo, 0); if (ret == 0) continue; else if (ret != 1) { td->td_retval[0] = pid; return (0); } nfound++; PROC_LOCK_ASSERT(p, MA_OWNED); if ((options & WTRAPPED) != 0 && (p->p_flag & P_TRACED) != 0) { PROC_SLOCK(p); report = ((p->p_flag & (P_STOPPED_TRACE | P_STOPPED_SIG)) && p->p_suspcount == p->p_numthreads && (p->p_flag & P_WAITED) == 0); PROC_SUNLOCK(p); if (report) { CTR4(KTR_PTRACE, "wait: returning trapped pid %d status %#x " "(xstat %d) xthread %d", p->p_pid, W_STOPCODE(p->p_xsig), p->p_xsig, p->p_xthread != NULL ? p->p_xthread->td_tid : -1); report_alive_proc(td, p, siginfo, status, options, CLD_TRAPPED); return (0); } } if ((options & WUNTRACED) != 0 && (p->p_flag & P_STOPPED_SIG) != 0) { PROC_SLOCK(p); report = (p->p_suspcount == p->p_numthreads && ((p->p_flag & P_WAITED) == 0)); PROC_SUNLOCK(p); if (report) { report_alive_proc(td, p, siginfo, status, options, CLD_STOPPED); return (0); } } if ((options & WCONTINUED) != 0 && (p->p_flag & P_CONTINUED) != 0) { report_alive_proc(td, p, siginfo, status, options, CLD_CONTINUED); return (0); } PROC_UNLOCK(p); } /* * Look in the orphans list too, to allow the parent to * collect it's child exit status even if child is being * debugged. * * Debugger detaches from the parent upon successful * switch-over from parent to child. At this point due to * re-parenting the parent loses the child to debugger and a * wait4(2) call would report that it has no children to wait * for. By maintaining a list of orphans we allow the parent * to successfully wait until the child becomes a zombie. */ if (nfound == 0) { LIST_FOREACH(p, &q->p_orphans, p_orphan) { ret = proc_to_reap(td, p, idtype, id, NULL, options, NULL, NULL, 1); if (ret != 0) { KASSERT(ret != -1, ("reaped an orphan (pid %d)", (int)td->td_retval[0])); PROC_UNLOCK(p); nfound++; break; } } } if (nfound == 0) { sx_xunlock(&proctree_lock); return (ECHILD); } if (options & WNOHANG) { sx_xunlock(&proctree_lock); td->td_retval[0] = 0; return (0); } PROC_LOCK(q); if (q->p_flag & P_STATCHILD) { q->p_flag &= ~P_STATCHILD; PROC_UNLOCK(q); goto loop_locked; } sx_xunlock(&proctree_lock); error = msleep(q, &q->p_mtx, PWAIT | PCATCH | PDROP, "wait", 0); if (error) return (error); goto loop; } void proc_add_orphan(struct proc *child, struct proc *parent) { sx_assert(&proctree_lock, SX_XLOCKED); KASSERT((child->p_flag & P_TRACED) != 0, ("proc_add_orphan: not traced")); if (LIST_EMPTY(&parent->p_orphans)) { child->p_treeflag |= P_TREE_FIRST_ORPHAN; LIST_INSERT_HEAD(&parent->p_orphans, child, p_orphan); } else { LIST_INSERT_AFTER(LIST_FIRST(&parent->p_orphans), child, p_orphan); } child->p_treeflag |= P_TREE_ORPHANED; } /* * Make process 'parent' the new parent of process 'child'. * Must be called with an exclusive hold of proctree lock. */ void proc_reparent(struct proc *child, struct proc *parent, bool set_oppid) { sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(child, MA_OWNED); if (child->p_pptr == parent) return; PROC_LOCK(child->p_pptr); sigqueue_take(child->p_ksi); PROC_UNLOCK(child->p_pptr); LIST_REMOVE(child, p_sibling); LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); proc_clear_orphan(child); if ((child->p_flag & P_TRACED) != 0) { proc_add_orphan(child, child->p_pptr); } child->p_pptr = parent; if (set_oppid) child->p_oppid = parent->p_pid; } diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 74b415a838ee..16af36635db2 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -1,1164 +1,1165 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_fork_func_t dtrace_fasttrap_fork; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE3(proc, , , create, "struct proc *", "struct proc *", "int"); #ifndef _SYS_SYSPROTO_H_ struct fork_args { int dummy; }; #endif /* ARGSUSED */ int sys_fork(struct thread *td, struct fork_args *uap) { struct fork_req fr; int error, pid; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC; fr.fr_pidp = &pid; error = fork1(td, &fr); if (error == 0) { td->td_retval[0] = pid; td->td_retval[1] = 0; } return (error); } /* ARGUSED */ int sys_pdfork(struct thread *td, struct pdfork_args *uap) { struct fork_req fr; int error, fd, pid; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFPROCDESC; fr.fr_pidp = &pid; fr.fr_pd_fd = &fd; fr.fr_pd_flags = uap->flags; AUDIT_ARG_FFLAGS(uap->flags); /* * It is necessary to return fd by reference because 0 is a valid file * descriptor number, and the child needs to be able to distinguish * itself from the parent using the return value. */ error = fork1(td, &fr); if (error == 0) { td->td_retval[0] = pid; td->td_retval[1] = 0; error = copyout(&fd, uap->fdp, sizeof(fd)); } return (error); } /* ARGSUSED */ int sys_vfork(struct thread *td, struct vfork_args *uap) { struct fork_req fr; int error, pid; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; fr.fr_pidp = &pid; error = fork1(td, &fr); if (error == 0) { td->td_retval[0] = pid; td->td_retval[1] = 0; } return (error); } int sys_rfork(struct thread *td, struct rfork_args *uap) { struct fork_req fr; int error, pid; /* Don't allow kernel-only flags. */ if ((uap->flags & RFKERNELONLY) != 0) return (EINVAL); /* RFSPAWN must not appear with others */ if ((uap->flags & RFSPAWN) != 0 && uap->flags != RFSPAWN) return (EINVAL); AUDIT_ARG_FFLAGS(uap->flags); bzero(&fr, sizeof(fr)); if ((uap->flags & RFSPAWN) != 0) { fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; fr.fr_flags2 = FR2_DROPSIG_CAUGHT; } else { fr.fr_flags = uap->flags; } fr.fr_pidp = &pid; error = fork1(td, &fr); if (error == 0) { td->td_retval[0] = pid; td->td_retval[1] = 0; } return (error); } int __exclusive_cache_line nprocs = 1; /* process 0 */ int lastpid = 0; SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, "Last used PID"); /* * Random component to lastpid generation. We mix in a random factor to make * it a little harder to predict. We sanity check the modulus value to avoid * doing it in critical paths. Don't let it be too small or we pointlessly * waste randomness entropy, and don't let it be impossibly large. Using a * modulus that is too big causes a LOT more process table scans and slows * down fork processing as the pidchecked caching is defeated. */ static int randompid = 0; static int sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) { int error, pid; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error != 0) return(error); sx_xlock(&allproc_lock); pid = randompid; error = sysctl_handle_int(oidp, &pid, 0, req); if (error == 0 && req->newptr != NULL) { if (pid == 0) randompid = 0; else if (pid == 1) /* generate a random PID modulus between 100 and 1123 */ randompid = 100 + arc4random() % 1024; else if (pid < 0 || pid > pid_max - 100) /* out of range */ randompid = pid_max - 100; else if (pid < 100) /* Make it reasonable */ randompid = 100; else randompid = pid; } sx_xunlock(&allproc_lock); return (error); } SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_randompid, "I", "Random PID modulus. Special values: 0: disable, 1: choose random value"); extern bitstr_t proc_id_pidmap; extern bitstr_t proc_id_grpidmap; extern bitstr_t proc_id_sessidmap; extern bitstr_t proc_id_reapmap; /* * Find an unused process ID * * If RFHIGHPID is set (used during system boot), do not allocate * low-numbered pids. */ static int fork_findpid(int flags) { pid_t result; int trypid, random; /* * Avoid calling arc4random with procid_lock held. */ random = 0; if (__predict_false(randompid)) random = arc4random() % randompid; mtx_lock(&procid_lock); trypid = lastpid + 1; if (flags & RFHIGHPID) { if (trypid < 10) trypid = 10; } else { trypid += random; } retry: if (trypid >= pid_max) trypid = 2; bit_ffc_at(&proc_id_pidmap, trypid, pid_max, &result); if (result == -1) { KASSERT(trypid != 2, ("unexpectedly ran out of IDs")); trypid = 2; goto retry; } if (bit_test(&proc_id_grpidmap, result) || bit_test(&proc_id_sessidmap, result) || bit_test(&proc_id_reapmap, result)) { trypid = result + 1; goto retry; } /* * RFHIGHPID does not mess with the lastpid counter during boot. */ if ((flags & RFHIGHPID) == 0) lastpid = result; bit_set(&proc_id_pidmap, result); mtx_unlock(&procid_lock); return (result); } static int fork_norfproc(struct thread *td, int flags) { int error; struct proc *p1; KASSERT((flags & RFPROC) == 0, ("fork_norfproc called with RFPROC set")); p1 = td->td_proc; /* * Quiesce other threads if necessary. If RFMEM is not specified we * must ensure that other threads do not concurrently create a second * process sharing the vmspace, see vmspace_unshare(). */ if ((p1->p_flag & (P_HADTHREADS | P_SYSTEM)) == P_HADTHREADS && ((flags & (RFCFDG | RFFDG)) != 0 || (flags & RFMEM) == 0)) { PROC_LOCK(p1); if (thread_single(p1, SINGLE_BOUNDARY)) { PROC_UNLOCK(p1); return (ERESTART); } PROC_UNLOCK(p1); } error = vm_forkproc(td, NULL, NULL, NULL, flags); if (error) goto fail; /* * Close all file descriptors. */ if (flags & RFCFDG) { struct filedesc *fdtmp; struct pwddesc *pdtmp; pdtmp = pdinit(td->td_proc->p_pd, false); fdtmp = fdinit(td->td_proc->p_fd, false, NULL); pdescfree(td); fdescfree(td); p1->p_fd = fdtmp; p1->p_pd = pdtmp; } /* * Unshare file descriptors (from parent). */ if (flags & RFFDG) { fdunshare(td); pdunshare(td); } fail: if ((p1->p_flag & (P_HADTHREADS | P_SYSTEM)) == P_HADTHREADS && ((flags & (RFCFDG | RFFDG)) != 0 || (flags & RFMEM) == 0)) { PROC_LOCK(p1); thread_single_end(p1, SINGLE_BOUNDARY); PROC_UNLOCK(p1); } return (error); } static void do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2, struct vmspace *vm2, struct file *fp_procdesc) { struct proc *p1, *pptr; struct filedesc *fd; struct filedesc_to_leader *fdtol; struct pwddesc *pd; struct sigacts *newsigacts; p1 = td->td_proc; PROC_LOCK(p1); bcopy(&p1->p_startcopy, &p2->p_startcopy, __rangeof(struct proc, p_startcopy, p_endcopy)); pargs_hold(p2->p_args); PROC_UNLOCK(p1); bzero(&p2->p_startzero, __rangeof(struct proc, p_startzero, p_endzero)); /* Tell the prison that we exist. */ prison_proc_hold(p2->p_ucred->cr_prison); p2->p_state = PRS_NEW; /* protect against others */ p2->p_pid = fork_findpid(fr->fr_flags); AUDIT_ARG_PID(p2->p_pid); + TSFORK(p2->p_pid, p1->p_pid); sx_xlock(&allproc_lock); LIST_INSERT_HEAD(&allproc, p2, p_list); allproc_gen++; sx_xunlock(&allproc_lock); sx_xlock(PIDHASHLOCK(p2->p_pid)); LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); sx_xunlock(PIDHASHLOCK(p2->p_pid)); tidhash_add(td2); /* * Malloc things while we don't hold any locks. */ if (fr->fr_flags & RFSIGSHARE) newsigacts = NULL; else newsigacts = sigacts_alloc(); /* * Copy filedesc. */ if (fr->fr_flags & RFCFDG) { pd = pdinit(p1->p_pd, false); fd = fdinit(p1->p_fd, false, NULL); fdtol = NULL; } else if (fr->fr_flags & RFFDG) { if (fr->fr_flags2 & FR2_SHARE_PATHS) pd = pdshare(p1->p_pd); else pd = pdcopy(p1->p_pd); fd = fdcopy(p1->p_fd); fdtol = NULL; } else { if (fr->fr_flags2 & FR2_SHARE_PATHS) pd = pdcopy(p1->p_pd); else pd = pdshare(p1->p_pd); fd = fdshare(p1->p_fd); if (p1->p_fdtol == NULL) p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL, p1->p_leader); if ((fr->fr_flags & RFTHREAD) != 0) { /* * Shared file descriptor table, and shared * process leaders. */ fdtol = p1->p_fdtol; FILEDESC_XLOCK(p1->p_fd); fdtol->fdl_refcount++; FILEDESC_XUNLOCK(p1->p_fd); } else { /* * Shared file descriptor table, and different * process leaders. */ fdtol = filedesc_to_leader_alloc(p1->p_fdtol, p1->p_fd, p2); } } /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ PROC_LOCK(p2); PROC_LOCK(p1); bzero(&td2->td_startzero, __rangeof(struct thread, td_startzero, td_endzero)); bcopy(&td->td_startcopy, &td2->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name)); td2->td_sigstk = td->td_sigstk; td2->td_flags = TDF_INMEM; td2->td_lend_user_pri = PRI_MAX; #ifdef VIMAGE td2->td_vnet = NULL; td2->td_vnet_lpush = NULL; #endif /* * Allow the scheduler to initialize the child. */ thread_lock(td); sched_fork(td, td2); thread_unlock(td); /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. */ p2->p_flag = P_INMEM; p2->p_flag2 = p1->p_flag2 & (P2_ASLR_DISABLE | P2_ASLR_ENABLE | P2_ASLR_IGNSTART | P2_NOTRACE | P2_NOTRACE_EXEC | P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE | P2_TRAPCAP | P2_STKGAP_DISABLE | P2_STKGAP_DISABLE_EXEC | P2_NO_NEW_PRIVS | P2_WXORX_DISABLE | P2_WXORX_ENABLE_EXEC); p2->p_swtick = ticks; if (p1->p_flag & P_PROFIL) startprofclock(p2); if (fr->fr_flags & RFSIGSHARE) { p2->p_sigacts = sigacts_hold(p1->p_sigacts); } else { sigacts_copy(newsigacts, p1->p_sigacts); p2->p_sigacts = newsigacts; if ((fr->fr_flags2 & (FR2_DROPSIG_CAUGHT | FR2_KPROC)) != 0) { mtx_lock(&p2->p_sigacts->ps_mtx); if ((fr->fr_flags2 & FR2_DROPSIG_CAUGHT) != 0) sig_drop_caught(p2); if ((fr->fr_flags2 & FR2_KPROC) != 0) p2->p_sigacts->ps_flag |= PS_NOCLDWAIT; mtx_unlock(&p2->p_sigacts->ps_mtx); } } if (fr->fr_flags & RFTSIGZMB) p2->p_sigparent = RFTSIGNUM(fr->fr_flags); else if (fr->fr_flags & RFLINUXTHPN) p2->p_sigparent = SIGUSR1; else p2->p_sigparent = SIGCHLD; if ((fr->fr_flags2 & FR2_KPROC) != 0) { p2->p_flag |= P_SYSTEM | P_KPROC; td2->td_pflags |= TDP_KTHREAD; } p2->p_textvp = p1->p_textvp; p2->p_textdvp = p1->p_textdvp; p2->p_fd = fd; p2->p_fdtol = fdtol; p2->p_pd = pd; if (p1->p_flag2 & P2_INHERIT_PROTECTED) { p2->p_flag |= P_PROTECTED; p2->p_flag2 |= P2_INHERIT_PROTECTED; } /* * p_limit is copy-on-write. Bump its refcount. */ lim_fork(p1, p2); thread_cow_get_proc(td2, p2); pstats_fork(p1->p_stats, p2->p_stats); PROC_UNLOCK(p1); PROC_UNLOCK(p2); /* * Bump references to the text vnode and directory, and copy * the hardlink name. */ if (p2->p_textvp != NULL) vrefact(p2->p_textvp); if (p2->p_textdvp != NULL) vrefact(p2->p_textdvp); p2->p_binname = p1->p_binname == NULL ? NULL : strdup(p1->p_binname, M_PARGS); /* * Set up linkage for kernel based threading. */ if ((fr->fr_flags & RFTHREAD) != 0) { mtx_lock(&ppeers_lock); p2->p_peers = p1->p_peers; p1->p_peers = p2; p2->p_leader = p1->p_leader; mtx_unlock(&ppeers_lock); PROC_LOCK(p1->p_leader); if ((p1->p_leader->p_flag & P_WEXIT) != 0) { PROC_UNLOCK(p1->p_leader); /* * The task leader is exiting, so process p1 is * going to be killed shortly. Since p1 obviously * isn't dead yet, we know that the leader is either * sending SIGKILL's to all the processes in this * task or is sleeping waiting for all the peers to * exit. We let p1 complete the fork, but we need * to go ahead and kill the new process p2 since * the task leader may not get a chance to send * SIGKILL to it. We leave it on the list so that * the task leader will wait for this new process * to commit suicide. */ PROC_LOCK(p2); kern_psignal(p2, SIGKILL); PROC_UNLOCK(p2); } else PROC_UNLOCK(p1->p_leader); } else { p2->p_peers = NULL; p2->p_leader = p2; } sx_xlock(&proctree_lock); PGRP_LOCK(p1->p_pgrp); PROC_LOCK(p2); PROC_LOCK(p1); /* * Preserve some more flags in subprocess. P_PROFIL has already * been preserved. */ p2->p_flag |= p1->p_flag & P_SUGID; td2->td_pflags |= (td->td_pflags & (TDP_ALTSTACK | TDP_SIGFASTBLOCK)) | TDP_FORKING; SESS_LOCK(p1->p_session); if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) p2->p_flag |= P_CONTROLT; SESS_UNLOCK(p1->p_session); if (fr->fr_flags & RFPPWAIT) p2->p_flag |= P_PPWAIT; p2->p_pgrp = p1->p_pgrp; LIST_INSERT_AFTER(p1, p2, p_pglist); PGRP_UNLOCK(p1->p_pgrp); LIST_INIT(&p2->p_children); LIST_INIT(&p2->p_orphans); callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0); TAILQ_INIT(&p2->p_kqtim_stop); /* * This begins the section where we must prevent the parent * from being swapped. */ _PHOLD(p1); PROC_UNLOCK(p1); /* * Attach the new process to its parent. * * If RFNOWAIT is set, the newly created process becomes a child * of init. This effectively disassociates the child from the * parent. */ if ((fr->fr_flags & RFNOWAIT) != 0) { pptr = p1->p_reaper; p2->p_reaper = pptr; } else { p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ? p1 : p1->p_reaper; pptr = p1; } p2->p_pptr = pptr; p2->p_oppid = pptr->p_pid; LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); LIST_INIT(&p2->p_reaplist); LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling); if (p2->p_reaper == p1 && p1 != initproc) { p2->p_reapsubtree = p2->p_pid; proc_id_set_cond(PROC_ID_REAP, p2->p_pid); } sx_xunlock(&proctree_lock); /* Inform accounting that we have forked. */ p2->p_acflag = AFORK; PROC_UNLOCK(p2); #ifdef KTRACE ktrprocfork(p1, p2); #endif /* * Finish creating the child process. It will return via a different * execution path later. (ie: directly into user mode) */ vm_forkproc(td, p2, td2, vm2, fr->fr_flags); if (fr->fr_flags == (RFFDG | RFPROC)) { VM_CNT_INC(v_forks); VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { VM_CNT_INC(v_vforks); VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } else if (p1 == &proc0) { VM_CNT_INC(v_kthreads); VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } else { VM_CNT_INC(v_rforks); VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } /* * Associate the process descriptor with the process before anything * can happen that might cause that process to need the descriptor. * However, don't do this until after fork(2) can no longer fail. */ if (fr->fr_flags & RFPROCDESC) procdesc_new(p2, fr->fr_pd_flags); /* * Both processes are set up, now check if any loadable modules want * to adjust anything. */ EVENTHANDLER_DIRECT_INVOKE(process_fork, p1, p2, fr->fr_flags); /* * Set the child start time and mark the process as being complete. */ PROC_LOCK(p2); PROC_LOCK(p1); microuptime(&p2->p_stats->p_start); PROC_SLOCK(p2); p2->p_state = PRS_NORMAL; PROC_SUNLOCK(p2); #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the new process so that any * tracepoints inherited from the parent can be removed. We have to do * this only after p_state is PRS_NORMAL since the fasttrap module will * use pfind() later on. */ if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork) dtrace_fasttrap_fork(p1, p2); #endif if (fr->fr_flags & RFPPWAIT) { td->td_pflags |= TDP_RFPPWAIT; td->td_rfppwait_p = p2; td->td_dbgflags |= TDB_VFORK; } PROC_UNLOCK(p2); /* * Tell any interested parties about the new process. */ knote_fork(p1->p_klist, p2->p_pid); /* * Now can be swapped. */ _PRELE(p1); PROC_UNLOCK(p1); SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags); if (fr->fr_flags & RFPROCDESC) { procdesc_finit(p2->p_procdesc, fp_procdesc); fdrop(fp_procdesc, td); } /* * Speculative check for PTRACE_FORK. PTRACE_FORK is not * synced with forks in progress so it is OK if we miss it * if being set atm. */ if ((p1->p_ptevents & PTRACE_FORK) != 0) { sx_xlock(&proctree_lock); PROC_LOCK(p2); /* * p1->p_ptevents & p1->p_pptr are protected by both * process and proctree locks for modifications, * so owning proctree_lock allows the race-free read. */ if ((p1->p_ptevents & PTRACE_FORK) != 0) { /* * Arrange for debugger to receive the fork event. * * We can report PL_FLAG_FORKED regardless of * P_FOLLOWFORK settings, but it does not make a sense * for runaway child. */ td->td_dbgflags |= TDB_FORK; td->td_dbg_forked = p2->p_pid; td2->td_dbgflags |= TDB_STOPATFORK; proc_set_traced(p2, true); CTR2(KTR_PTRACE, "do_fork: attaching to new child pid %d: oppid %d", p2->p_pid, p2->p_oppid); proc_reparent(p2, p1->p_pptr, false); } PROC_UNLOCK(p2); sx_xunlock(&proctree_lock); } racct_proc_fork_done(p2); if ((fr->fr_flags & RFSTOPPED) == 0) { if (fr->fr_pidp != NULL) *fr->fr_pidp = p2->p_pid; /* * If RFSTOPPED not requested, make child runnable and * add to run queue. */ thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); } else { *fr->fr_procp = p2; } } void fork_rfppwait(struct thread *td) { struct proc *p, *p2; MPASS(td->td_pflags & TDP_RFPPWAIT); p = td->td_proc; /* * Preserve synchronization semantics of vfork. If * waiting for child to exec or exit, fork set * P_PPWAIT on child, and there we sleep on our proc * (in case of exit). * * Do it after the ptracestop() above is finished, to * not block our debugger until child execs or exits * to finish vfork wait. */ td->td_pflags &= ~TDP_RFPPWAIT; p2 = td->td_rfppwait_p; again: PROC_LOCK(p2); while (p2->p_flag & P_PPWAIT) { PROC_LOCK(p); if (thread_suspend_check_needed()) { PROC_UNLOCK(p2); thread_suspend_check(0); PROC_UNLOCK(p); goto again; } else { PROC_UNLOCK(p); } cv_timedwait(&p2->p_pwait, &p2->p_mtx, hz); } PROC_UNLOCK(p2); if (td->td_dbgflags & TDB_VFORK) { PROC_LOCK(p); if (p->p_ptevents & PTRACE_VFORK) ptracestop(td, SIGTRAP, NULL); td->td_dbgflags &= ~TDB_VFORK; PROC_UNLOCK(p); } } int fork1(struct thread *td, struct fork_req *fr) { struct proc *p1, *newproc; struct thread *td2; struct vmspace *vm2; struct ucred *cred; struct file *fp_procdesc; vm_ooffset_t mem_charged; int error, nprocs_new; static int curfail; static struct timeval lastfail; int flags, pages; flags = fr->fr_flags; pages = fr->fr_pages; if ((flags & RFSTOPPED) != 0) MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL); else MPASS(fr->fr_procp == NULL); /* Check for the undefined or unimplemented flags. */ if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0) return (EINVAL); /* Signal value requires RFTSIGZMB. */ if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0) return (EINVAL); /* Can't copy and clear. */ if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) return (EINVAL); /* Check the validity of the signal number. */ if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG) return (EINVAL); if ((flags & RFPROCDESC) != 0) { /* Can't not create a process yet get a process descriptor. */ if ((flags & RFPROC) == 0) return (EINVAL); /* Must provide a place to put a procdesc if creating one. */ if (fr->fr_pd_fd == NULL) return (EINVAL); /* Check if we are using supported flags. */ if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0) return (EINVAL); } p1 = td->td_proc; /* * Here we don't create a new process, but we divorce * certain parts of a process from itself. */ if ((flags & RFPROC) == 0) { if (fr->fr_procp != NULL) *fr->fr_procp = NULL; else if (fr->fr_pidp != NULL) *fr->fr_pidp = 0; return (fork_norfproc(td, flags)); } fp_procdesc = NULL; newproc = NULL; vm2 = NULL; /* * Increment the nprocs resource before allocations occur. * Although process entries are dynamically created, we still * keep a global limit on the maximum number we will * create. There are hard-limits as to the number of processes * that can run, established by the KVA and memory usage for * the process data. * * Don't allow a nonprivileged user to use the last ten * processes; don't let root exceed the limit. */ nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1; if (nprocs_new >= maxproc - 10) { if (priv_check_cred(td->td_ucred, PRIV_MAXPROC) != 0 || nprocs_new >= maxproc) { error = EAGAIN; sx_xlock(&allproc_lock); if (ppsratecheck(&lastfail, &curfail, 1)) { printf("maxproc limit exceeded by uid %u " "(pid %d); see tuning(7) and " "login.conf(5)\n", td->td_ucred->cr_ruid, p1->p_pid); } sx_xunlock(&allproc_lock); goto fail2; } } /* * If required, create a process descriptor in the parent first; we * will abandon it if something goes wrong. We don't finit() until * later. */ if (flags & RFPROCDESC) { error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd, fr->fr_pd_flags, fr->fr_pd_fcaps); if (error != 0) goto fail2; AUDIT_ARG_FD(*fr->fr_pd_fd); } mem_charged = 0; if (pages == 0) pages = kstack_pages; /* Allocate new proc. */ newproc = uma_zalloc(proc_zone, M_WAITOK); td2 = FIRST_THREAD_IN_PROC(newproc); if (td2 == NULL) { td2 = thread_alloc(pages); if (td2 == NULL) { error = ENOMEM; goto fail2; } proc_linkup(newproc, td2); } else { if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) { if (td2->td_kstack != 0) vm_thread_dispose(td2); if (!thread_alloc_stack(td2, pages)) { error = ENOMEM; goto fail2; } } } if ((flags & RFMEM) == 0) { vm2 = vmspace_fork(p1->p_vmspace, &mem_charged); if (vm2 == NULL) { error = ENOMEM; goto fail2; } if (!swap_reserve(mem_charged)) { /* * The swap reservation failed. The accounting * from the entries of the copied vm2 will be * subtracted in vmspace_free(), so force the * reservation there. */ swap_reserve_force(mem_charged); error = ENOMEM; goto fail2; } } else vm2 = NULL; /* * XXX: This is ugly; when we copy resource usage, we need to bump * per-cred resource counters. */ proc_set_cred_init(newproc, td->td_ucred); /* * Initialize resource accounting for the child process. */ error = racct_proc_fork(p1, newproc); if (error != 0) { error = EAGAIN; goto fail1; } #ifdef MAC mac_proc_init(newproc); #endif newproc->p_klist = knlist_alloc(&newproc->p_mtx); STAILQ_INIT(&newproc->p_ktr); /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit. */ cred = td->td_ucred; if (!chgproccnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_NPROC))) { if (priv_check_cred(cred, PRIV_PROC_LIMIT) != 0) goto fail0; chgproccnt(cred->cr_ruidinfo, 1, 0); } do_fork(td, fr, newproc, td2, vm2, fp_procdesc); return (0); fail0: error = EAGAIN; #ifdef MAC mac_proc_destroy(newproc); #endif racct_proc_exit(newproc); fail1: proc_unset_cred(newproc); fail2: if (vm2 != NULL) vmspace_free(vm2); uma_zfree(proc_zone, newproc); if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) { fdclose(td, fp_procdesc, *fr->fr_pd_fd); fdrop(fp_procdesc, td); } atomic_add_int(&nprocs, -1); pause("fork", hz / 2); return (error); } /* * Handle the return of a child process from fork1(). This function * is called from the MD fork_trampoline() entry point. */ void fork_exit(void (*callout)(void *, struct trapframe *), void *arg, struct trapframe *frame) { struct proc *p; struct thread *td; struct thread *dtd; td = curthread; p = td->td_proc; KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new")); CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)", td, td_get_sched(td), p->p_pid, td->td_name); sched_fork_exit(td); /* * Processes normally resume in mi_switch() after being * cpu_switch()'ed to, but when children start up they arrive here * instead, so we must do much the same things as mi_switch() would. */ if ((dtd = PCPU_GET(deadthread))) { PCPU_SET(deadthread, NULL); thread_stash(dtd); } thread_unlock(td); /* * cpu_fork_kthread_handler intercepts this function call to * have this call a non-return function to stay in kernel mode. * initproc has its own fork handler, but it does return. */ KASSERT(callout != NULL, ("NULL callout in fork_exit")); callout(arg, frame); /* * Check if a kernel thread misbehaved and returned from its main * function. */ if (p->p_flag & P_KPROC) { printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", td->td_name, p->p_pid); kthread_exit(); } mtx_assert(&Giant, MA_NOTOWNED); if (p->p_sysent->sv_schedtail != NULL) (p->p_sysent->sv_schedtail)(td); td->td_pflags &= ~TDP_FORKING; } /* * Simplified back end of syscall(), used when returning from fork() * directly into user mode. This function is passed in to fork_exit() * as the first parameter and is called when returning to a new * userland process. */ void fork_return(struct thread *td, struct trapframe *frame) { struct proc *p; p = td->td_proc; if (td->td_dbgflags & TDB_STOPATFORK) { PROC_LOCK(p); if ((p->p_flag & P_TRACED) != 0) { /* * Inform the debugger if one is still present. */ td->td_dbgflags |= TDB_CHILD | TDB_SCX | TDB_FSTP; ptracestop(td, SIGSTOP, NULL); td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX); } else { /* * ... otherwise clear the request. */ td->td_dbgflags &= ~TDB_STOPATFORK; } PROC_UNLOCK(p); } else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) { /* * This is the start of a new thread in a traced * process. Report a system call exit event. */ PROC_LOCK(p); td->td_dbgflags |= TDB_SCX; if ((p->p_ptevents & PTRACE_SCX) != 0 || (td->td_dbgflags & TDB_BORN) != 0) ptracestop(td, SIGTRAP, NULL); td->td_dbgflags &= ~(TDB_SCX | TDB_BORN); PROC_UNLOCK(p); } /* * If the prison was killed mid-fork, die along with it. */ if (!prison_isalive(td->td_ucred->cr_prison)) exit1(td, 0, SIGKILL); userret(td, frame); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(SYS_fork, 0, 0); #endif } diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index a0cb230f191c..0e7ad3c9ff23 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -1,118 +1,219 @@ /*- * Copyright (c) 2017 Colin Percival * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include +#include +#include +#include +#include #include #include #include #include #include #include #ifndef TSLOGSIZE #define TSLOGSIZE 262144 #endif static volatile long nrecs = 0; static struct timestamp { void * td; int type; const char * f; const char * s; uint64_t tsc; } timestamps[TSLOGSIZE]; void tslog(void * td, int type, const char * f, const char * s) { uint64_t tsc = get_cyclecount(); long pos; /* Grab a slot. */ pos = atomic_fetchadd_long(&nrecs, 1); /* Store record. */ if (pos < nitems(timestamps)) { timestamps[pos].td = td; timestamps[pos].type = type; timestamps[pos].f = f; timestamps[pos].s = s; timestamps[pos].tsc = tsc; } } static int sysctl_debug_tslog(SYSCTL_HANDLER_ARGS) { int error; struct sbuf *sb; size_t i, limit; + caddr_t loader_tslog; + void * loader_tslog_buf; + size_t loader_tslog_len; /* * This code can race against the code in tslog() which stores * records: Theoretically we could end up reading a record after * its slots have been reserved but before it has been written. * Since this code takes orders of magnitude longer to run than * tslog() takes to write a record, it is highly unlikely that * anyone will ever experience this race. */ sb = sbuf_new_for_sysctl(NULL, NULL, 1024, req); + + /* Get data from the boot loader, if it provided any. */ + loader_tslog = preload_search_by_type("TSLOG data"); + if (loader_tslog != NULL) { + loader_tslog_buf = preload_fetch_addr(loader_tslog); + loader_tslog_len = preload_fetch_size(loader_tslog); + sbuf_bcat(sb, loader_tslog_buf, loader_tslog_len); + } + + /* Add data logged within the kernel. */ limit = MIN(nrecs, nitems(timestamps)); for (i = 0; i < limit; i++) { sbuf_printf(sb, "%p", timestamps[i].td); sbuf_printf(sb, " %llu", (unsigned long long)timestamps[i].tsc); switch (timestamps[i].type) { case TS_ENTER: sbuf_printf(sb, " ENTER"); break; case TS_EXIT: sbuf_printf(sb, " EXIT"); break; case TS_THREAD: sbuf_printf(sb, " THREAD"); break; case TS_EVENT: sbuf_printf(sb, " EVENT"); break; } sbuf_printf(sb, " %s", timestamps[i].f ? timestamps[i].f : "(null)"); if (timestamps[i].s) sbuf_printf(sb, " %s\n", timestamps[i].s); else sbuf_printf(sb, "\n"); } error = sbuf_finish(sb); sbuf_delete(sb); return (error); } SYSCTL_PROC(_debug, OID_AUTO, tslog, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_debug_tslog, "", "Dump recorded event timestamps"); + +MALLOC_DEFINE(M_TSLOGUSER, "tsloguser", "Strings used by userland tslog"); +static struct procdata { + pid_t ppid; + uint64_t tsc_forked; + uint64_t tsc_exited; + char * execname; + char * namei; + int reused; +} procs[PID_MAX + 1]; + +void +tslog_user(pid_t pid, pid_t ppid, const char * execname, const char * namei) +{ + uint64_t tsc = get_cyclecount(); + + /* If we wrapped, do nothing. */ + if (procs[pid].reused) + return; + + /* If we have a ppid, we're recording a fork. */ + if (ppid != (pid_t)(-1)) { + /* If we have a ppid already, we wrapped. */ + if (procs[pid].ppid) { + procs[pid].reused = 1; + return; + } + + /* Fill in some fields. */ + procs[pid].ppid = ppid; + procs[pid].tsc_forked = tsc; + return; + } + + /* If we have an execname, record it. */ + if (execname != NULL) { + if (procs[pid].execname != NULL) + free(procs[pid].execname, M_TSLOGUSER); + procs[pid].execname = strdup(execname, M_TSLOGUSER); + return; + } + + /* Record the first namei for the process. */ + if (namei != NULL) { + if (procs[pid].namei == NULL) + procs[pid].namei = strdup(namei, M_TSLOGUSER); + return; + } + + /* Otherwise we're recording an exit. */ + procs[pid].tsc_exited = tsc; +} + +static int +sysctl_debug_tslog_user(SYSCTL_HANDLER_ARGS) +{ + int error; + struct sbuf *sb; + pid_t pid; + + sb = sbuf_new_for_sysctl(NULL, NULL, 1024, req); + + /* Export the data we logged. */ + for (pid = 0; pid <= PID_MAX; pid++) { + sbuf_printf(sb, "%zu", (size_t)pid); + sbuf_printf(sb, " %zu", (size_t)procs[pid].ppid); + sbuf_printf(sb, " %llu", + (unsigned long long)procs[pid].tsc_forked); + sbuf_printf(sb, " %llu", + (unsigned long long)procs[pid].tsc_exited); + sbuf_printf(sb, " \"%s\"", procs[pid].execname ? + procs[pid].execname : ""); + sbuf_printf(sb, " \"%s\"", procs[pid].namei ? + procs[pid].namei : ""); + sbuf_printf(sb, "\n"); + } + error = sbuf_finish(sb); + sbuf_delete(sb); + return (error); +} + +SYSCTL_PROC(_debug, OID_AUTO, tslog_user, + CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_debug_tslog_user, + "", "Dump recorded userland event timestamps"); diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 454a10175e1b..f4faf0c77210 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -1,1781 +1,1782 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #ifdef INVARIANTS #include #endif #include #include #include #define NAMEI_DIAGNOSTIC 1 #undef NAMEI_DIAGNOSTIC SDT_PROVIDER_DEFINE(vfs); SDT_PROBE_DEFINE4(vfs, namei, lookup, entry, "struct vnode *", "char *", "unsigned long", "bool"); SDT_PROBE_DEFINE4(vfs, namei, lookup, return, "int", "struct vnode *", "bool", "struct nameidata"); /* Allocation zone for namei. */ uma_zone_t namei_zone; /* Placeholder vnode for mp traversal. */ static struct vnode *vp_crossmp; static int crossmp_vop_islocked(struct vop_islocked_args *ap) { return (LK_SHARED); } static int crossmp_vop_lock1(struct vop_lock1_args *ap) { struct vnode *vp; struct lock *lk __unused; const char *file __unused; int flags, line __unused; vp = ap->a_vp; lk = vp->v_vnlock; flags = ap->a_flags; file = ap->a_file; line = ap->a_line; if ((flags & LK_SHARED) == 0) panic("invalid lock request for crossmp"); WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line, flags & LK_INTERLOCK ? &VI_MTX(vp)->lock_object : NULL); WITNESS_LOCK(&lk->lock_object, 0, file, line); if ((flags & LK_INTERLOCK) != 0) VI_UNLOCK(vp); LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, ap->a_file, line); return (0); } static int crossmp_vop_unlock(struct vop_unlock_args *ap) { struct vnode *vp; struct lock *lk __unused; vp = ap->a_vp; lk = vp->v_vnlock; WITNESS_UNLOCK(&lk->lock_object, 0, LOCK_FILE, LOCK_LINE); LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, LOCK_FILE, LOCK_LINE); return (0); } static struct vop_vector crossmp_vnodeops = { .vop_default = &default_vnodeops, .vop_islocked = crossmp_vop_islocked, .vop_lock1 = crossmp_vop_lock1, .vop_unlock = crossmp_vop_unlock, }; /* * VFS_VOP_VECTOR_REGISTER(crossmp_vnodeops) is not used here since the vnode * gets allocated early. See nameiinit for the direct call below. */ struct nameicap_tracker { struct vnode *dp; TAILQ_ENTRY(nameicap_tracker) nm_link; }; /* Zone for cap mode tracker elements used for dotdot capability checks. */ MALLOC_DEFINE(M_NAMEITRACKER, "namei_tracker", "namei tracking for dotdot"); static void nameiinit(void *dummy __unused) { namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); vfs_vector_op_register(&crossmp_vnodeops); getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); static int lookup_cap_dotdot = 1; SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot, CTLFLAG_RWTUN, &lookup_cap_dotdot, 0, "enables \"..\" components in path lookup in capability mode"); static int lookup_cap_dotdot_nonlocal = 1; SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN, &lookup_cap_dotdot_nonlocal, 0, "enables \"..\" components in path lookup in capability mode " "on non-local mount"); static void nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) { struct nameicap_tracker *nt; struct componentname *cnp; if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) return; cnp = &ndp->ni_cnd; nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head); if (nt != NULL && nt->dp == dp) return; nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK); vhold(dp); nt->dp = dp; TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); } static void nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first) { struct nameicap_tracker *nt, *nt1; nt = first; TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); vdrop(nt->dp); free(nt, M_NAMEITRACKER); } } static void nameicap_cleanup(struct nameidata *ndp) { KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); nameicap_cleanup_from(ndp, NULL); } /* * For dotdot lookups in capability mode, only allow the component * lookup to succeed if the resulting directory was already traversed * during the operation. This catches situations where already * traversed directory is moved to different parent, and then we walk * over it with dotdots. * * Also allow to force failure of dotdot lookups for non-local * filesystems, where external agents might assist local lookups to * escape the compartment. */ static int nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) { struct nameicap_tracker *nt; struct mount *mp; if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0) return (0); if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0) return (ENOTCAPABLE); mp = dp->v_mount; if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && (mp->mnt_flag & MNT_LOCAL) == 0) return (ENOTCAPABLE); TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, nm_link) { if (dp == nt->dp) { nt = TAILQ_NEXT(nt, nm_link); if (nt != NULL) nameicap_cleanup_from(ndp, nt); return (0); } } return (ENOTCAPABLE); } static void namei_cleanup_cnp(struct componentname *cnp) { uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif } static int namei_handle_root(struct nameidata *ndp, struct vnode **dpp) { struct componentname *cnp; cnp = &ndp->ni_cnd; if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif return (ENOTCAPABLE); } while (*(cnp->cn_nameptr) == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } *dpp = ndp->ni_rootdir; vrefact(*dpp); return (0); } static int namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp) { struct componentname *cnp; struct file *dfp; struct thread *td; struct pwd *pwd; cap_rights_t rights; int error; bool startdir_used; cnp = &ndp->ni_cnd; td = cnp->cn_thread; startdir_used = false; *pwdp = NULL; *dpp = NULL; #ifdef CAPABILITY_MODE /* * In capability mode, lookups must be restricted to happen in * the subtree with the root specified by the file descriptor: * - The root must be real file descriptor, not the pseudo-descriptor * AT_FDCWD. * - The passed path must be relative and not absolute. * - If lookup_cap_dotdot is disabled, path must not contain the * '..' components. * - If lookup_cap_dotdot is enabled, we verify that all '..' * components lookups result in the directories which were * previously walked by us, which prevents an escape from * the relative root. */ if (IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) { ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; ndp->ni_resflags |= NIRES_STRICTREL; if (ndp->ni_dirfd == AT_FDCWD) { #ifdef KTRACE if (KTRPOINT(td, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif return (ECAPMODE); } } #endif error = 0; /* * Get starting point for the translation. */ pwd = pwd_hold(td); /* * The reference on ni_rootdir is acquired in the block below to avoid * back-to-back atomics for absolute lookups. */ ndp->ni_rootdir = pwd->pwd_rdir; ndp->ni_topdir = pwd->pwd_jdir; if (cnp->cn_pnbuf[0] == '/') { ndp->ni_resflags |= NIRES_ABS; error = namei_handle_root(ndp, dpp); } else { if (ndp->ni_startdir != NULL) { *dpp = ndp->ni_startdir; startdir_used = true; } else if (ndp->ni_dirfd == AT_FDCWD) { *dpp = pwd->pwd_cdir; vrefact(*dpp); } else { rights = *ndp->ni_rightsneeded; cap_rights_set_one(&rights, CAP_LOOKUP); if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG_ATFD1(ndp->ni_dirfd); if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_ATFD2(ndp->ni_dirfd); /* * Effectively inlined fgetvp_rights, because * we need to inspect the file as well as * grabbing the vnode. No check for O_PATH, * files to implement its semantic. */ error = fget_cap(td, ndp->ni_dirfd, &rights, &dfp, &ndp->ni_filecaps); if (error != 0) { /* * Preserve the error; it should either be EBADF * or capability-related, both of which can be * safely returned to the caller. */ } else { if (dfp->f_ops == &badfileops) { error = EBADF; } else if (dfp->f_vnode == NULL) { error = ENOTDIR; } else { *dpp = dfp->f_vnode; vref(*dpp); if ((dfp->f_flag & FSEARCH) != 0) cnp->cn_flags |= NOEXECCHECK; } fdrop(dfp, td); } #ifdef CAPABILITIES /* * If file descriptor doesn't have all rights, * all lookups relative to it must also be * strictly relative. */ CAP_ALL(&rights); if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, &rights) || ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || ndp->ni_filecaps.fc_nioctls != -1) { ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; ndp->ni_resflags |= NIRES_STRICTREL; } #endif } if (error == 0 && (*dpp)->v_type != VDIR && (cnp->cn_pnbuf[0] != '\0' || (cnp->cn_flags & EMPTYPATH) == 0)) error = ENOTDIR; } if (error == 0 && (cnp->cn_flags & RBENEATH) != 0) { if (cnp->cn_pnbuf[0] == '/') { error = ENOTCAPABLE; } else if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0) { ndp->ni_lcf |= NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT; } } /* * If we are auditing the kernel pathname, save the user pathname. */ if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf); if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf); if (ndp->ni_startdir != NULL && !startdir_used) vrele(ndp->ni_startdir); if (error != 0) { if (*dpp != NULL) vrele(*dpp); pwd_drop(pwd); return (error); } if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 && lookup_cap_dotdot != 0) ndp->ni_lcf |= NI_LCF_CAP_DOTDOT; SDT_PROBE4(vfs, namei, lookup, entry, *dpp, cnp->cn_pnbuf, cnp->cn_flags, false); *pwdp = pwd; return (0); } static int namei_getpath(struct nameidata *ndp) { struct componentname *cnp; int error; cnp = &ndp->ni_cnd; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. */ cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (ndp->ni_segflg == UIO_SYSSPACE) { error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); } else { error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); } if (__predict_false(error != 0)) return (error); /* * Don't allow empty pathnames unless EMPTYPATH is specified. * Caller checks for ENOENT as an indication for the empty path. */ if (__predict_false(*cnp->cn_pnbuf == '\0')) return (ENOENT); cnp->cn_nameptr = cnp->cn_pnbuf; return (0); } static int namei_emptypath(struct nameidata *ndp) { struct componentname *cnp; struct pwd *pwd; struct vnode *dp; int error; cnp = &ndp->ni_cnd; MPASS(*cnp->cn_pnbuf == '\0'); MPASS((cnp->cn_flags & EMPTYPATH) != 0); MPASS((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) == 0); ndp->ni_resflags |= NIRES_EMPTYPATH; error = namei_setup(ndp, &dp, &pwd); if (error != 0) { namei_cleanup_cnp(cnp); goto errout; } /* * Usecount on dp already provided by namei_setup. */ ndp->ni_vp = dp; namei_cleanup_cnp(cnp); pwd_drop(pwd); NDVALIDATE(ndp); if ((cnp->cn_flags & LOCKLEAF) != 0) { VOP_LOCK(dp, (cnp->cn_flags & LOCKSHARED) != 0 ? LK_SHARED : LK_EXCLUSIVE); if (VN_IS_DOOMED(dp)) { vput(dp); error = ENOENT; goto errout; } } SDT_PROBE4(vfs, namei, lookup, return, 0, ndp->ni_vp, false, ndp); return (0); errout: SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); return (error); } /* * Convert a pathname into a pointer to a locked vnode. * * The FOLLOW flag is set when symbolic links are to be followed * when they occur at the end of the name translation process. * Symbolic links are always followed for all other pathname * components other than the last. * * The segflg defines whether the name is to be copied from user * space or kernel space. * * Overall outline of namei: * * copy in name * get starting directory * while (!done && !error) { * call lookup to search path. * if symbolic link, massage name in buffer and continue * } */ int namei(struct nameidata *ndp) { char *cp; /* pointer into pathname argument */ struct vnode *dp; /* the directory we are searching */ struct iovec aiov; /* uio for reading symbolic links */ struct componentname *cnp; struct thread *td; struct pwd *pwd; struct uio auio; int error, linklen; enum cache_fpl_status status; cnp = &ndp->ni_cnd; td = cnp->cn_thread; #ifdef INVARIANTS KASSERT(cnp->cn_thread == curthread, ("namei not using curthread")); KASSERT((ndp->ni_debugflags & NAMEI_DBG_CALLED) == 0, ("%s: repeated call to namei without NDREINIT", __func__)); KASSERT(ndp->ni_debugflags == NAMEI_DBG_INITED, ("%s: bad debugflags %d", __func__, ndp->ni_debugflags)); ndp->ni_debugflags |= NAMEI_DBG_CALLED; if (ndp->ni_startdir != NULL) ndp->ni_debugflags |= NAMEI_DBG_HADSTARTDIR; if (cnp->cn_flags & FAILIFEXISTS) { KASSERT(cnp->cn_nameiop == CREATE, ("%s: FAILIFEXISTS passed for op %d", __func__, cnp->cn_nameiop)); /* * The limitation below is to restrict hairy corner cases. */ KASSERT((cnp->cn_flags & (LOCKPARENT | LOCKLEAF)) == LOCKPARENT, ("%s: FAILIFEXISTS must be passed with LOCKPARENT and without LOCKLEAF", __func__)); } /* * For NDVALIDATE. * * While NDINIT may seem like a more natural place to do it, there are * callers which directly modify flags past invoking init. */ cnp->cn_origflags = cnp->cn_flags; #endif ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; KASSERT(ndp->ni_resflags == 0, ("%s: garbage in ni_resflags: %x\n", __func__, ndp->ni_resflags)); KASSERT(cnp->cn_cred && td->td_proc, ("namei: bad cred/proc")); KASSERT((cnp->cn_flags & NAMEI_INTERNAL_FLAGS) == 0, ("namei: unexpected flags: %" PRIx64 "\n", cnp->cn_flags & NAMEI_INTERNAL_FLAGS)); if (cnp->cn_flags & NOCACHE) KASSERT(cnp->cn_nameiop != LOOKUP, ("%s: NOCACHE passed with LOOKUP", __func__)); MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR || ndp->ni_startdir->v_type == VBAD); ndp->ni_lcf = 0; ndp->ni_loopcnt = 0; ndp->ni_vp = NULL; error = namei_getpath(ndp); if (__predict_false(error != 0)) { if (error == ENOENT && (cnp->cn_flags & EMPTYPATH) != 0) return (namei_emptypath(ndp)); namei_cleanup_cnp(cnp); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); return (error); } #ifdef KTRACE if (KTRPOINT(td, KTR_NAMEI)) { ktrnamei(cnp->cn_pnbuf); } #endif + TSNAMEI(curthread->td_proc->p_pid, cnp->cn_pnbuf); /* * First try looking up the target without locking any vnodes. * * We may need to start from scratch or pick up where it left off. */ error = cache_fplookup(ndp, &status, &pwd); switch (status) { case CACHE_FPL_STATUS_UNSET: __assert_unreachable(); break; case CACHE_FPL_STATUS_HANDLED: if (error == 0) NDVALIDATE(ndp); return (error); case CACHE_FPL_STATUS_PARTIAL: TAILQ_INIT(&ndp->ni_cap_tracker); dp = ndp->ni_startdir; break; case CACHE_FPL_STATUS_DESTROYED: ndp->ni_loopcnt = 0; error = namei_getpath(ndp); if (__predict_false(error != 0)) { namei_cleanup_cnp(cnp); return (error); } /* FALLTHROUGH */ case CACHE_FPL_STATUS_ABORTED: TAILQ_INIT(&ndp->ni_cap_tracker); MPASS(ndp->ni_lcf == 0); error = namei_setup(ndp, &dp, &pwd); if (error != 0) { namei_cleanup_cnp(cnp); return (error); } break; } /* * Locked lookup. */ for (;;) { ndp->ni_startdir = dp; error = lookup(ndp); if (error != 0) goto out; /* * If not a symbolic link, we're done. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { SDT_PROBE4(vfs, namei, lookup, return, error, (error == 0 ? ndp->ni_vp : NULL), false, ndp); if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { namei_cleanup_cnp(cnp); } else cnp->cn_flags |= HASBUF; nameicap_cleanup(ndp); pwd_drop(pwd); if (error == 0) NDVALIDATE(ndp); return (error); } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; } #ifdef MAC if ((cnp->cn_flags & NOMACCHECK) == 0) { error = mac_vnode_check_readlink(td->td_ucred, ndp->ni_vp); if (error != 0) break; } #endif if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error != 0) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); error = ENOENT; break; } if (linklen + ndp->ni_pathlen > MAXPATHLEN) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); error = ENAMETOOLONG; break; } if (ndp->ni_pathlen > 1) { bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; vput(ndp->ni_vp); dp = ndp->ni_dvp; /* * Check if root directory should replace current directory. */ cnp->cn_nameptr = cnp->cn_pnbuf; if (*(cnp->cn_nameptr) == '/') { vrele(dp); error = namei_handle_root(ndp, &dp); if (error != 0) goto out; } } vput(ndp->ni_vp); ndp->ni_vp = NULL; vrele(ndp->ni_dvp); out: MPASS(error != 0); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); namei_cleanup_cnp(cnp); nameicap_cleanup(ndp); pwd_drop(pwd); return (error); } static int compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags) { if (mp == NULL || ((lkflags & LK_SHARED) && (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || ((cnflags & ISDOTDOT) && (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { lkflags &= ~LK_SHARED; lkflags |= LK_EXCLUSIVE; } lkflags |= LK_NODDLKTREAT; return (lkflags); } static __inline int needs_exclusive_leaf(struct mount *mp, int flags) { /* * Intermediate nodes can use shared locks, we only need to * force an exclusive lock for leaf nodes. */ if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) return (0); /* Always use exclusive locks if LOCKSHARED isn't set. */ if (!(flags & LOCKSHARED)) return (1); /* * For lookups during open(), if the mount point supports * extended shared operations, then use a shared lock for the * leaf node, otherwise use an exclusive lock. */ if ((flags & ISOPEN) != 0) return (!MNT_EXTENDED_SHARED(mp)); /* * Lookup requests outside of open() that specify LOCKSHARED * only need a shared lock on the leaf vnode. */ return (0); } /* * Various filesystems expect to be able to copy a name component with length * bounded by NAME_MAX into a directory entry buffer of size MAXNAMLEN. Make * sure that these are the same size. */ _Static_assert(MAXNAMLEN == NAME_MAX, "MAXNAMLEN and NAME_MAX have different values"); /* * Search a pathname. * This is a very central and rather complicated routine. * * The pathname is pointed to by ni_ptr and is of length ni_pathlen. * The starting directory is taken from ni_startdir. The pathname is * descended until done, or a symbolic link is encountered. The variable * ni_more is clear if the path is completed; it is set to one if a * symbolic link needing interpretation is encountered. * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it, the parent directory is returned * locked. If flag has WANTPARENT or'ed into it, the parent directory is * returned unlocked. Otherwise the parent directory is not returned. If * the target of the pathname exists and LOCKLEAF is or'ed into the flag * the target is returned locked, otherwise it is returned unlocked. * When creating or renaming and LOCKPARENT is specified, the target may not * be ".". When deleting and LOCKPARENT is specified, the target may be ".". * * Overall outline of lookup: * * dirloop: * identify next component of name at ndp->ni_ptr * handle degenerate case where name is null string * if .. and crossing mount points and on mounted filesys, find parent * call VOP_LOOKUP routine for next component name * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set * component vnode returned in ni_vp (if it exists), locked. * if result vnode is mounted on and crossing mount points, * find mounted on vnode * if more components of name, do next level at dirloop * return the answer in ni_vp, locked if LOCKLEAF set * if LOCKPARENT set, return locked parent in ni_dvp * if WANTPARENT set, return unlocked parent in ni_dvp */ int lookup(struct nameidata *ndp) { char *cp; /* pointer into pathname argument */ char *prev_ni_next; /* saved ndp->ni_next */ char *nulchar; /* location of '\0' in cn_pnbuf */ struct vnode *dp = NULL; /* the directory we are searching */ struct vnode *tdp; /* saved dp */ struct mount *mp; /* mount table entry */ struct prison *pr; size_t prev_ni_pathlen; /* saved ndp->ni_pathlen */ int docache; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int error = 0; int dpunlocked = 0; /* dp has already been unlocked */ int relookup = 0; /* do not consume the path component */ struct componentname *cnp = &ndp->ni_cnd; int lkflags_save; int ni_dvp_unlocked; /* * Setup: break out flag bits into variables. */ ni_dvp_unlocked = 0; wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); /* * When set to zero, docache causes the last component of the * pathname to be deleted from the cache and the full lookup * of the name to be done (via VOP_CACHEDLOOKUP()). Often * filesystems need some pre-computed values that are made * during the full lookup, for instance UFS sets dp->i_offset. * * The docache variable is set to zero when requested by the * NOCACHE flag and for all modifying operations except CREATE. */ docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; if (cnp->cn_nameiop == DELETE || (wantparent && cnp->cn_nameiop != CREATE && cnp->cn_nameiop != LOOKUP)) docache = 0; rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; ndp->ni_dvp = NULL; /* * We use shared locks until we hit the parent of the last cn then * we adjust based on the requesting flags. */ cnp->cn_lkflags = LK_SHARED; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, cnp->cn_flags)); dirloop: /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. * * Store / as a temporary sentinel so that we only have one character * to test for. Pathnames tend to be short so this should not be * resulting in cache misses. */ nulchar = &cnp->cn_nameptr[ndp->ni_pathlen - 1]; KASSERT(*nulchar == '\0', ("%s: expected nul at %p; string [%s]\n", __func__, nulchar, cnp->cn_pnbuf)); *nulchar = '/'; for (cp = cnp->cn_nameptr; *cp != '/'; cp++) { KASSERT(*cp != '\0', ("%s: encountered unexpected nul; string [%s]\n", __func__, cnp->cn_nameptr)); continue; } *nulchar = '\0'; cnp->cn_namelen = cp - cnp->cn_nameptr; if (cnp->cn_namelen > NAME_MAX) { error = ENAMETOOLONG; goto bad; } #ifdef NAMEI_DIAGNOSTIC { char c = *cp; *cp = '\0'; printf("{%s}: ", cnp->cn_nameptr); *cp = c; } #endif prev_ni_pathlen = ndp->ni_pathlen; ndp->ni_pathlen -= cnp->cn_namelen; KASSERT(ndp->ni_pathlen <= PATH_MAX, ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen)); prev_ni_next = ndp->ni_next; ndp->ni_next = cp; /* * Replace multiple slashes by a single slash and trailing slashes * by a null. This must be done before VOP_LOOKUP() because some * fs's don't know about trailing slashes. Remember if there were * trailing slashes to handle symlinks, existing non-directories * and non-existing files that won't be directories specially later. */ while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { cp++; ndp->ni_pathlen--; if (*cp == '\0') { *ndp->ni_next = '\0'; cnp->cn_flags |= TRAILINGSLASH; } } ndp->ni_next = cp; cnp->cn_flags |= MAKEENTRY; if (*cp == '\0' && docache == 0) cnp->cn_flags &= ~MAKEENTRY; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') cnp->cn_flags |= ISDOTDOT; else cnp->cn_flags &= ~ISDOTDOT; if (*ndp->ni_next == 0) cnp->cn_flags |= ISLASTCN; else cnp->cn_flags &= ~ISLASTCN; if ((cnp->cn_flags & ISLASTCN) != 0 && cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EINVAL; goto bad; } nameicap_tracker_add(ndp, dp); /* * Check for degenerate name (e.g. / or "") * which is a way of talking about a directory, * e.g. like "/." or ".". */ if (cnp->cn_nameptr[0] == '\0') { if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } if (cnp->cn_nameiop != LOOKUP) { error = EISDIR; goto bad; } if (wantparent) { ndp->ni_dvp = dp; VREF(dp); } ndp->ni_vp = dp; if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG_VNODE1(dp); else if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_VNODE2(dp); if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) VOP_UNLOCK(dp); /* XXX This should probably move to the top of function. */ if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); goto success; } /* * Handle "..": five special cases. * 0. If doing a capability lookup and lookup_cap_dotdot is * disabled, return ENOTCAPABLE. * 1. Return an error if this is the last component of * the name and the operation is DELETE or RENAME. * 2. If at root directory (e.g. after chroot) * or at absolute root directory * then ignore it so can't get out. * 3. If this vnode is the root of a mounted * filesystem, then replace it with the * vnode which was mounted on so we take the * .. in the other filesystem. * 4. If the vnode is the top directory of * the jail or chroot, don't let them out. * 5. If doing a capability lookup and lookup_cap_dotdot is * enabled, return ENOTCAPABLE if the lookup would escape * from the initial file descriptor directory. Checks are * done by ensuring that namei() already traversed the * result of dotdot lookup. */ if (cnp->cn_flags & ISDOTDOT) { if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT)) == NI_LCF_STRICTRELATIVE) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif error = ENOTCAPABLE; goto bad; } if ((cnp->cn_flags & ISLASTCN) != 0 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EINVAL; goto bad; } for (;;) { for (pr = cnp->cn_cred->cr_prison; pr != NULL; pr = pr->pr_parent) if (dp == pr->pr_root) break; if (dp == ndp->ni_rootdir || dp == ndp->ni_topdir || dp == rootvnode || pr != NULL || ((dp->v_vflag & VV_ROOT) != 0 && (cnp->cn_flags & NOCROSSMOUNT) != 0)) { ndp->ni_dvp = dp; ndp->ni_vp = dp; VREF(dp); goto nextname; } if ((dp->v_vflag & VV_ROOT) == 0) break; if (VN_IS_DOOMED(dp)) { /* forced unmount */ error = ENOENT; goto bad; } tdp = dp; dp = dp->v_mount->mnt_vnodecovered; VREF(dp); vput(tdp); vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, ISDOTDOT)); error = nameicap_check_dotdot(ndp, dp); if (error != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif goto bad; } } } /* * We now have a segment name to search for, and a directory to search. */ unionlookup: #ifdef MAC error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, cnp); if (error) goto bad; #endif ndp->ni_dvp = dp; ndp->ni_vp = NULL; ASSERT_VOP_LOCKED(dp, "lookup"); /* * If we have a shared lock we may need to upgrade the lock for the * last operation. */ if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) && dp != vp_crossmp && VOP_ISLOCKED(dp) == LK_SHARED) vn_lock(dp, LK_UPGRADE|LK_RETRY); if (VN_IS_DOOMED(dp)) { error = ENOENT; goto bad; } /* * If we're looking up the last component and we need an exclusive * lock, adjust our lkflags. */ if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) cnp->cn_lkflags = LK_EXCLUSIVE; #ifdef NAMEI_DIAGNOSTIC vn_printf(dp, "lookup in "); #endif lkflags_save = cnp->cn_lkflags; cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, cnp->cn_flags); error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp); cnp->cn_lkflags = lkflags_save; if (error != 0) { KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); #ifdef NAMEI_DIAGNOSTIC printf("not found\n"); #endif if ((error == ENOENT) && (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { tdp = dp; dp = dp->v_mount->mnt_vnodecovered; VREF(dp); vput(tdp); vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, cnp->cn_flags)); nameicap_tracker_add(ndp, dp); goto unionlookup; } if (error == ERELOOKUP) { vref(dp); ndp->ni_vp = dp; error = 0; relookup = 1; goto good; } if (error != EJUSTRETURN) goto bad; /* * At this point, we know we're at the end of the * pathname. If creating / renaming, we can consider * allowing the file or directory to be created / renamed, * provided we're not on a read-only filesystem. */ if (rdonly) { error = EROFS; goto bad; } /* trailing slash only allowed for directories */ if ((cnp->cn_flags & TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) { error = ENOENT; goto bad; } if ((cnp->cn_flags & LOCKPARENT) == 0) VOP_UNLOCK(dp); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory vnode in ndp->ni_dvp. */ if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; VREF(ndp->ni_startdir); } goto success; } good: #ifdef NAMEI_DIAGNOSTIC printf("found\n"); #endif dp = ndp->ni_vp; /* * Check to see if the vnode has been mounted on; * if so find the root of the mounted filesystem. */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { if (vfs_busy(mp, 0)) continue; vput(dp); if (dp != ndp->ni_dvp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); vrefact(vp_crossmp); ndp->ni_dvp = vp_crossmp; error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags, cnp->cn_flags), &tdp); vfs_unbusy(mp); if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) panic("vp_crossmp exclusively locked or reclaimed"); if (error) { dpunlocked = 1; goto bad2; } ndp->ni_vp = dp = tdp; } /* * Check for symbolic link */ if ((dp->v_type == VLNK) && ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) || *ndp->ni_next == '/')) { cnp->cn_flags |= ISSYMLINK; if (VN_IS_DOOMED(dp)) { /* * We can't know whether the directory was mounted with * NOSYMFOLLOW, so we can't follow safely. */ error = ENOENT; goto bad2; } if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { error = EACCES; goto bad2; } /* * Symlink code always expects an unlocked dvp. */ if (ndp->ni_dvp != ndp->ni_vp) { VOP_UNLOCK(ndp->ni_dvp); ni_dvp_unlocked = 1; } goto success; } nextname: /* * Not a symbolic link that we will follow. Continue with the * next component if there is any; otherwise, we're done. */ KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', ("lookup: invalid path state.")); if (relookup) { relookup = 0; ndp->ni_pathlen = prev_ni_pathlen; ndp->ni_next = prev_ni_next; if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); goto dirloop; } if (cnp->cn_flags & ISDOTDOT) { error = nameicap_check_dotdot(ndp, ndp->ni_vp); if (error != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif goto bad2; } } if (*ndp->ni_next == '/') { cnp->cn_nameptr = ndp->ni_next; while (*cnp->cn_nameptr == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); goto dirloop; } /* * If we're processing a path with a trailing slash, * check that the end result is a directory. */ if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { error = ENOTDIR; goto bad2; } /* * Disallow directory write attempts on read-only filesystems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EROFS; goto bad2; } if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; VREF(ndp->ni_startdir); } if (!wantparent) { ni_dvp_unlocked = 2; if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) { VOP_UNLOCK(ndp->ni_dvp); ni_dvp_unlocked = 1; } if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG_VNODE1(dp); else if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_VNODE2(dp); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp); success: /* * FIXME: for lookups which only cross a mount point to fetch the * root vnode, ni_dvp will be set to vp_crossmp. This can be a problem * if either WANTPARENT or LOCKPARENT is set. */ /* * Because of shared lookup we may have the vnode shared locked, but * the caller may want it to be exclusively locked. */ if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { vn_lock(dp, LK_UPGRADE | LK_RETRY); if (VN_IS_DOOMED(dp)) { error = ENOENT; goto bad2; } } if (ndp->ni_vp != NULL) { if ((cnp->cn_flags & ISDOTDOT) == 0) nameicap_tracker_add(ndp, ndp->ni_vp); if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS) goto bad_eexist; } return (0); bad2: if (ni_dvp_unlocked != 2) { if (dp != ndp->ni_dvp && !ni_dvp_unlocked) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); } bad: if (!dpunlocked) vput(dp); ndp->ni_vp = NULL; return (error); bad_eexist: /* * FAILIFEXISTS handling. * * XXX namei called with LOCKPARENT but not LOCKLEAF has the strange * behaviour of leaving the vnode unlocked if the target is the same * vnode as the parent. */ MPASS((cnp->cn_flags & ISSYMLINK) == 0); if (ndp->ni_vp == ndp->ni_dvp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); ndp->ni_dvp = NULL; ndp->ni_vp = NULL; NDFREE(ndp, NDF_ONLY_PNBUF); return (EEXIST); } /* * relookup - lookup a path name component * Used by lookup to re-acquire things. */ int relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { struct vnode *dp = NULL; /* the directory we are searching */ int rdonly; /* lookup read-only flag bit */ int error = 0; KASSERT(cnp->cn_flags & ISLASTCN, ("relookup: Not given last component.")); /* * Setup: break out flag bits into variables. */ KASSERT((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) != 0, ("relookup: parent not wanted")); rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; dp = dvp; cnp->cn_lkflags = LK_EXCLUSIVE; vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. */ #ifdef NAMEI_DIAGNOSTIC printf("{%s}: ", cnp->cn_nameptr); #endif /* * Check for "" which represents the root directory after slash * removal. */ if (cnp->cn_nameptr[0] == '\0') { /* * Support only LOOKUP for "/" because lookup() * can't succeed for CREATE, DELETE and RENAME. */ KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP")); KASSERT(dp->v_type == VDIR, ("dp is not a directory")); if (!(cnp->cn_flags & LOCKLEAF)) VOP_UNLOCK(dp); *vpp = dp; /* XXX This should probably move to the top of function. */ if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); return (0); } if (cnp->cn_flags & ISDOTDOT) panic ("relookup: lookup on dot-dot"); /* * We now have a segment name to search for, and a directory to search. */ #ifdef NAMEI_DIAGNOSTIC vn_printf(dp, "search in "); #endif if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { KASSERT(*vpp == NULL, ("leaf should be empty")); if (error != EJUSTRETURN) goto bad; /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (rdonly) { error = EROFS; goto bad; } /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); if ((cnp->cn_flags & LOCKPARENT) == 0) VOP_UNLOCK(dp); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory vnode in ndp->ni_dvp. */ return (0); } dp = *vpp; /* * Disallow directory write attempts on read-only filesystems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { if (dvp == dp) vrele(dvp); else vput(dvp); error = EROFS; goto bad; } /* * Set the parent lock/ref state to the requested state. */ if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) VOP_UNLOCK(dvp); /* * Check for symbolic link */ KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), ("relookup: symlink found.\n")); /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp); return (0); bad: vput(dp); *vpp = NULL; return (error); } /* * Free data allocated by namei(); see namei(9) for details. */ void NDFREE_PNBUF(struct nameidata *ndp) { if ((ndp->ni_cnd.cn_flags & HASBUF) != 0) { MPASS((ndp->ni_cnd.cn_flags & (SAVENAME | SAVESTART)) != 0); uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } } /* * NDFREE_PNBUF replacement for callers that know there is no buffer. * * This is a hack. Preferably the VFS layer would not produce anything more * than it was asked to do. Unfortunately several non-LOOKUP cases can add the * HASBUF flag to the result. Even then an interface could be implemented where * the caller specifies what they expected to see in the result and what they * are going to take care of. * * In the meantime provide this kludge as a trivial replacement for NDFREE_PNBUF * calls scattered throughout the kernel where we know for a fact the flag must not * be seen. */ #ifdef INVARIANTS void NDFREE_NOTHING(struct nameidata *ndp) { struct componentname *cnp; cnp = &ndp->ni_cnd; KASSERT(cnp->cn_nameiop == LOOKUP, ("%s: got non-LOOKUP op %d\n", __func__, cnp->cn_nameiop)); KASSERT((cnp->cn_flags & (SAVENAME | HASBUF)) == 0, ("%s: bad flags \%" PRIx64 "\n", __func__, cnp->cn_flags)); } #endif void (NDFREE)(struct nameidata *ndp, const u_int flags) { int unlock_dvp; int unlock_vp; unlock_dvp = 0; unlock_vp = 0; if (!(flags & NDF_NO_FREE_PNBUF)) { NDFREE_PNBUF(ndp); } if (!(flags & NDF_NO_VP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) unlock_vp = 1; if (!(flags & NDF_NO_DVP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKPARENT) && ndp->ni_dvp != ndp->ni_vp) unlock_dvp = 1; if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { if (unlock_vp) { vput(ndp->ni_vp); unlock_vp = 0; } else vrele(ndp->ni_vp); ndp->ni_vp = NULL; } if (unlock_vp) VOP_UNLOCK(ndp->ni_vp); if (!(flags & NDF_NO_DVP_RELE) && (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { if (unlock_dvp) { vput(ndp->ni_dvp); unlock_dvp = 0; } else vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; } if (unlock_dvp) VOP_UNLOCK(ndp->ni_dvp); if (!(flags & NDF_NO_STARTDIR_RELE) && (ndp->ni_cnd.cn_flags & SAVESTART)) { vrele(ndp->ni_startdir); ndp->ni_startdir = NULL; } } #ifdef INVARIANTS /* * Validate the final state of ndp after the lookup. * * Historically filesystems were allowed to modify cn_flags. Most notably they * can add SAVENAME to the request, resulting in HASBUF and pushing subsequent * clean up to the consumer. In practice this seems to only concern != LOOKUP * operations. * * As a step towards stricter API contract this routine validates the state to * clean up. Note validation is a work in progress with the intent of becoming * stricter over time. */ #define NDMODIFYINGFLAGS (LOCKLEAF | LOCKPARENT | WANTPARENT | SAVENAME | SAVESTART | HASBUF) void NDVALIDATE(struct nameidata *ndp) { struct componentname *cnp; u_int64_t used, orig; cnp = &ndp->ni_cnd; orig = cnp->cn_origflags; used = cnp->cn_flags; switch (cnp->cn_nameiop) { case LOOKUP: /* * For plain lookup we require strict conformance -- nothing * to clean up if it was not requested by the caller. */ orig &= NDMODIFYINGFLAGS; used &= NDMODIFYINGFLAGS; if ((orig & (SAVENAME | SAVESTART)) != 0) orig |= HASBUF; if (orig != used) { goto out_mismatch; } break; case CREATE: case DELETE: case RENAME: /* * Some filesystems set SAVENAME to provoke HASBUF, accomodate * for it until it gets fixed. */ orig &= NDMODIFYINGFLAGS; orig |= (SAVENAME | HASBUF); used &= NDMODIFYINGFLAGS; used |= (SAVENAME | HASBUF); if (orig != used) { goto out_mismatch; } break; } return; out_mismatch: panic("%s: mismatched flags for op %d: added %" PRIx64 ", " "removed %" PRIx64" (%" PRIx64" != %" PRIx64"; stored %" PRIx64" != %" PRIx64")", __func__, cnp->cn_nameiop, used & ~orig, orig &~ used, orig, used, cnp->cn_origflags, cnp->cn_flags); } #endif /* * Determine if there is a suitable alternate filename under the specified * prefix for the specified path. If the create flag is set, then the * alternate prefix will be used so long as the parent directory exists. * This is used by the various compatibility ABIs so that Linux binaries prefer * files under /compat/linux for example. The chosen path (whether under * the prefix or under /) is returned in a kernel malloc'd buffer pointed * to by pathbuf. The caller is responsible for free'ing the buffer from * the M_TEMP bucket if one is returned. */ int kern_alternate_path(struct thread *td, const char *prefix, const char *path, enum uio_seg pathseg, char **pathbuf, int create, int dirfd) { struct nameidata nd, ndroot; char *ptr, *buf, *cp; size_t len, sz; int error; buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); *pathbuf = buf; /* Copy the prefix into the new pathname as a starting point. */ len = strlcpy(buf, prefix, MAXPATHLEN); if (len >= MAXPATHLEN) { *pathbuf = NULL; free(buf, M_TEMP); return (EINVAL); } sz = MAXPATHLEN - len; ptr = buf + len; /* Append the filename to the prefix. */ if (pathseg == UIO_SYSSPACE) error = copystr(path, ptr, sz, &len); else error = copyinstr(path, ptr, sz, &len); if (error) { *pathbuf = NULL; free(buf, M_TEMP); return (error); } /* Only use a prefix with absolute pathnames. */ if (*ptr != '/') { error = EINVAL; goto keeporig; } if (dirfd != AT_FDCWD) { /* * We want the original because the "prefix" is * included in the already opened dirfd. */ bcopy(ptr, buf, len); return (0); } /* * We know that there is a / somewhere in this pathname. * Search backwards for it, to find the file's parent dir * to see if it exists in the alternate tree. If it does, * and we want to create a file (cflag is set). We don't * need to worry about the root comparison in this case. */ if (create) { for (cp = &ptr[len] - 1; *cp != '/'; cp--); *cp = '\0'; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td); error = namei(&nd); *cp = '/'; if (error != 0) goto keeporig; } else { NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td); error = namei(&nd); if (error != 0) goto keeporig; /* * We now compare the vnode of the prefix to the one * vnode asked. If they resolve to be the same, then we * ignore the match so that the real root gets used. * This avoids the problem of traversing "../.." to find the * root directory and never finding it, because "/" resolves * to the emulation root directory. This is expensive :-( */ NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix, td); /* We shouldn't ever get an error from this namei(). */ error = namei(&ndroot); if (error == 0) { if (nd.ni_vp == ndroot.ni_vp) error = ENOENT; NDFREE(&ndroot, NDF_ONLY_PNBUF); vrele(ndroot.ni_vp); } } NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); keeporig: /* If there was an error, use the original path name. */ if (error) bcopy(ptr, buf, len); return (error); } diff --git a/sys/sys/tslog.h b/sys/sys/tslog.h index 4b2971e4e643..af3c0931cac8 100644 --- a/sys/sys/tslog.h +++ b/sys/sys/tslog.h @@ -1,62 +1,69 @@ /*- * Copyright (c) 2017 Colin Percival * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _TSLOG_H_ #define _TSLOG_H_ #ifdef TSLOG #include #include #endif #define TS_ENTER 0 #define TS_EXIT 1 #define TS_THREAD 2 #define TS_EVENT 3 #define TSENTER() TSRAW(curthread, TS_ENTER, __func__, NULL) #define TSENTER2(x) TSRAW(curthread, TS_ENTER, __func__, x) #define TSEXIT() TSRAW(curthread, TS_EXIT, __func__, NULL) #define TSEXIT2(x) TSRAW(curthread, TS_EXIT, __func__, x) #define TSTHREAD(td, x) TSRAW(td, TS_THREAD, x, NULL) #define TSEVENT(x) TSRAW(curthread, TS_EVENT, x, NULL) #define TSEVENT2(x, y) TSRAW(curthread, TS_EVENT, x, y) #define TSLINE() TSEVENT2(__FILE__, __XSTRING(__LINE__)) #define TSWAIT(x) TSEVENT2("WAIT", x); #define TSUNWAIT(x) TSEVENT2("UNWAIT", x); #define TSHOLD(x) TSEVENT2("HOLD", x); #define TSRELEASE(x) TSEVENT2("RELEASE", x); +#define TSFORK(p, pp) TSRAW_USER(p, pp, NULL, NULL) +#define TSEXEC(p, name) TSRAW_USER(p, (pid_t)(-1), name, NULL) +#define TSNAMEI(p, name) TSRAW_USER(p, (pid_t)(-1), NULL, name) +#define TSPROCEXIT(p) TSRAW_USER(p, (pid_t)(-1), NULL, NULL) #ifdef TSLOG #define TSRAW(a, b, c, d) tslog(a, b, c, d) void tslog(void *, int, const char *, const char *); +#define TSRAW_USER(a, b, c, d) tslog_user(a, b, c, d) +void tslog_user(pid_t, pid_t, const char *, const char *); #else #define TSRAW(a, b, c, d) /* Timestamp logging disabled */ +#define TSRAW_USER(a, b, c, d) /* Timestamp logging disabled */ #endif #endif /* _TSLOG_H_ */