Index: lib/libkvm/Makefile =================================================================== --- lib/libkvm/Makefile +++ lib/libkvm/Makefile @@ -11,7 +11,7 @@ WARNS?= 3 SRCS= kvm.c kvm_cptime.c kvm_getloadavg.c \ - kvm_getswapinfo.c kvm_pcpu.c kvm_proc.c kvm_vnet.c \ + kvm_getswapinfo.c kvm_pcpu.c kvm_private.c kvm_proc.c kvm_vnet.c \ kvm_minidump_aarch64.c \ kvm_amd64.c kvm_minidump_amd64.c \ kvm_arm.c kvm_minidump_arm.c \ Index: lib/libkvm/kvm.c =================================================================== --- lib/libkvm/kvm.c +++ lib/libkvm/kvm.c @@ -66,114 +66,12 @@ SET_DECLARE(kvm_arch, struct kvm_arch); -/* from src/lib/libc/gen/nlist.c */ -int __fdnlist(int, struct nlist *); - -static int -kvm_fdnlist(kvm_t *kd, struct kvm_nlist *list) -{ - kvaddr_t addr; - int error, nfail; - - if (kd->resolve_symbol == NULL) { - struct nlist *nl; - int count, i; - - for (count = 0; list[count].n_name != NULL && - list[count].n_name[0] != '\0'; count++) - ; - nl = calloc(count + 1, sizeof(*nl)); - for (i = 0; i < count; i++) - nl[i].n_name = list[i].n_name; - nfail = __fdnlist(kd->nlfd, nl); - for (i = 0; i < count; i++) { - list[i].n_type = nl[i].n_type; - list[i].n_value = nl[i].n_value; - } - free(nl); - return (nfail); - } - - nfail = 0; - while (list->n_name != NULL && list->n_name[0] != '\0') { - error = kd->resolve_symbol(list->n_name, &addr); - if (error != 0) { - nfail++; - list->n_value = 0; - list->n_type = 0; - } else { - list->n_value = addr; - list->n_type = N_DATA | N_EXT; - } - list++; - } - return (nfail); -} - char * kvm_geterr(kvm_t *kd) { return (kd->errbuf); } -#include - -/* - * Report an error using printf style arguments. "program" is kd->program - * on hard errors, and 0 on soft errors, so that under sun error emulation, - * only hard errors are printed out (otherwise, programs like gdb will - * generate tons of error messages when trying to access bogus pointers). - */ -void -_kvm_err(kvm_t *kd, const char *program, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - if (program != NULL) { - (void)fprintf(stderr, "%s: ", program); - (void)vfprintf(stderr, fmt, ap); - (void)fputc('\n', stderr); - } else - (void)vsnprintf(kd->errbuf, - sizeof(kd->errbuf), fmt, ap); - - va_end(ap); -} - -void -_kvm_syserr(kvm_t *kd, const char *program, const char *fmt, ...) -{ - va_list ap; - int n; - - va_start(ap, fmt); - if (program != NULL) { - (void)fprintf(stderr, "%s: ", program); - (void)vfprintf(stderr, fmt, ap); - (void)fprintf(stderr, ": %s\n", strerror(errno)); - } else { - char *cp = kd->errbuf; - - (void)vsnprintf(cp, sizeof(kd->errbuf), fmt, ap); - n = strlen(cp); - (void)snprintf(&cp[n], sizeof(kd->errbuf) - n, ": %s", - strerror(errno)); - } - va_end(ap); -} - -void * -_kvm_malloc(kvm_t *kd, size_t n) -{ - void *p; - - if ((p = calloc(n, sizeof(char))) == NULL) - _kvm_err(kd, kd->program, "can't allocate %zu bytes: %s", - n, strerror(errno)); - return (p); -} - static int _kvm_read_kernel_ehdr(kvm_t *kd) { @@ -210,166 +108,6 @@ } } -int -_kvm_probe_elf_kernel(kvm_t *kd, int class, int machine) -{ - - return (kd->nlehdr.e_ident[EI_CLASS] == class && - kd->nlehdr.e_type == ET_EXEC && - kd->nlehdr.e_machine == machine); -} - -int -_kvm_is_minidump(kvm_t *kd) -{ - char minihdr[8]; - - if (kd->rawdump) - return (0); - if (pread(kd->pmfd, &minihdr, 8, 0) == 8 && - memcmp(&minihdr, "minidump", 8) == 0) - return (1); - return (0); -} - -/* - * The powerpc backend has a hack to strip a leading kerneldump - * header from the core before treating it as an ELF header. - * - * We can add that here if we can get a change to libelf to support - * an initial offset into the file. Alternatively we could patch - * savecore to extract cores from a regular file instead. - */ -int -_kvm_read_core_phdrs(kvm_t *kd, size_t *phnump, GElf_Phdr **phdrp) -{ - GElf_Ehdr ehdr; - GElf_Phdr *phdr; - Elf *elf; - size_t i, phnum; - - elf = elf_begin(kd->pmfd, ELF_C_READ, NULL); - if (elf == NULL) { - _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); - return (-1); - } - if (elf_kind(elf) != ELF_K_ELF) { - _kvm_err(kd, kd->program, "invalid core"); - goto bad; - } - if (gelf_getclass(elf) != kd->nlehdr.e_ident[EI_CLASS]) { - _kvm_err(kd, kd->program, "invalid core"); - goto bad; - } - if (gelf_getehdr(elf, &ehdr) == NULL) { - _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); - goto bad; - } - if (ehdr.e_type != ET_CORE) { - _kvm_err(kd, kd->program, "invalid core"); - goto bad; - } - if (ehdr.e_machine != kd->nlehdr.e_machine) { - _kvm_err(kd, kd->program, "invalid core"); - goto bad; - } - - if (elf_getphdrnum(elf, &phnum) == -1) { - _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); - goto bad; - } - - phdr = calloc(phnum, sizeof(*phdr)); - if (phdr == NULL) { - _kvm_err(kd, kd->program, "failed to allocate phdrs"); - goto bad; - } - - for (i = 0; i < phnum; i++) { - if (gelf_getphdr(elf, i, &phdr[i]) == NULL) { - _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); - goto bad; - } - } - elf_end(elf); - *phnump = phnum; - *phdrp = phdr; - return (0); - -bad: - elf_end(elf); - return (-1); -} - -static void -_kvm_hpt_insert(struct hpt *hpt, uint64_t pa, off_t off) -{ - struct hpte *hpte; - uint32_t fnv = FNV1_32_INIT; - - fnv = fnv_32_buf(&pa, sizeof(pa), fnv); - fnv &= (HPT_SIZE - 1); - hpte = malloc(sizeof(*hpte)); - hpte->pa = pa; - hpte->off = off; - hpte->next = hpt->hpt_head[fnv]; - hpt->hpt_head[fnv] = hpte; -} - -void -_kvm_hpt_init(kvm_t *kd, struct hpt *hpt, void *base, size_t len, off_t off, - int page_size, int word_size) -{ - uint64_t bits, idx, pa; - uint64_t *base64; - uint32_t *base32; - - base64 = base; - base32 = base; - for (idx = 0; idx < len / word_size; idx++) { - if (word_size == sizeof(uint64_t)) - bits = _kvm64toh(kd, base64[idx]); - else - bits = _kvm32toh(kd, base32[idx]); - pa = idx * word_size * NBBY * page_size; - for (; bits != 0; bits >>= 1, pa += page_size) { - if ((bits & 1) == 0) - continue; - _kvm_hpt_insert(hpt, pa, off); - off += page_size; - } - } -} - -off_t -_kvm_hpt_find(struct hpt *hpt, uint64_t pa) -{ - struct hpte *hpte; - uint32_t fnv = FNV1_32_INIT; - - fnv = fnv_32_buf(&pa, sizeof(pa), fnv); - fnv &= (HPT_SIZE - 1); - for (hpte = hpt->hpt_head[fnv]; hpte != NULL; hpte = hpte->next) { - if (pa == hpte->pa) - return (hpte->off); - } - return (-1); -} - -void -_kvm_hpt_free(struct hpt *hpt) -{ - struct hpte *hpte, *next; - int i; - - for (i = 0; i < HPT_SIZE; i++) { - for (hpte = hpt->hpt_head[i]; hpte != NULL; hpte = next) { - next = hpte->next; - free(hpte); - } - } -} - static kvm_t * _kvm_open(kvm_t *kd, const char *uf, const char *mf, int flag, char *errout) { @@ -545,212 +283,15 @@ free((void *) kd->argspc); if (kd->argv != 0) free((void *)kd->argv); + if (kd->pt_map != NULL) + _kvm_unmap(kd->pt_map, kd->pt_map_size); + if (kd->pt_sparse_pages != NULL) + _kvm_unmap(kd->pt_sparse_pages, kd->pt_sparse_size); free((void *)kd); return (0); } -/* - * Walk the list of unresolved symbols, generate a new list and prefix the - * symbol names, try again, and merge back what we could resolve. - */ -static int -kvm_fdnlist_prefix(kvm_t *kd, struct kvm_nlist *nl, int missing, - const char *prefix, kvaddr_t (*validate_fn)(kvm_t *, kvaddr_t)) -{ - struct kvm_nlist *n, *np, *p; - char *cp, *ce; - const char *ccp; - size_t len; - int slen, unresolved; - - /* - * Calculate the space we need to malloc for nlist and names. - * We are going to store the name twice for later lookups: once - * with the prefix and once the unmodified name delmited by \0. - */ - len = 0; - unresolved = 0; - for (p = nl; p->n_name && p->n_name[0]; ++p) { - if (p->n_type != N_UNDF) - continue; - len += sizeof(struct kvm_nlist) + strlen(prefix) + - 2 * (strlen(p->n_name) + 1); - unresolved++; - } - if (unresolved == 0) - return (unresolved); - /* Add space for the terminating nlist entry. */ - len += sizeof(struct kvm_nlist); - unresolved++; - - /* Alloc one chunk for (nlist, [names]) and setup pointers. */ - n = np = malloc(len); - bzero(n, len); - if (n == NULL) - return (missing); - cp = ce = (char *)np; - cp += unresolved * sizeof(struct kvm_nlist); - ce += len; - - /* Generate shortened nlist with special prefix. */ - unresolved = 0; - for (p = nl; p->n_name && p->n_name[0]; ++p) { - if (p->n_type != N_UNDF) - continue; - *np = *p; - /* Save the new\0orig. name so we can later match it again. */ - slen = snprintf(cp, ce - cp, "%s%s%c%s", prefix, - (prefix[0] != '\0' && p->n_name[0] == '_') ? - (p->n_name + 1) : p->n_name, '\0', p->n_name); - if (slen < 0 || slen >= ce - cp) - continue; - np->n_name = cp; - cp += slen + 1; - np++; - unresolved++; - } - - /* Do lookup on the reduced list. */ - np = n; - unresolved = kvm_fdnlist(kd, np); - - /* Check if we could resolve further symbols and update the list. */ - if (unresolved >= 0 && unresolved < missing) { - /* Find the first freshly resolved entry. */ - for (; np->n_name && np->n_name[0]; np++) - if (np->n_type != N_UNDF) - break; - /* - * The lists are both in the same order, - * so we can walk them in parallel. - */ - for (p = nl; np->n_name && np->n_name[0] && - p->n_name && p->n_name[0]; ++p) { - if (p->n_type != N_UNDF) - continue; - /* Skip expanded name and compare to orig. one. */ - ccp = np->n_name + strlen(np->n_name) + 1; - if (strcmp(ccp, p->n_name) != 0) - continue; - /* Update nlist with new, translated results. */ - p->n_type = np->n_type; - if (validate_fn) - p->n_value = (*validate_fn)(kd, np->n_value); - else - p->n_value = np->n_value; - missing--; - /* Find next freshly resolved entry. */ - for (np++; np->n_name && np->n_name[0]; np++) - if (np->n_type != N_UNDF) - break; - } - } - /* We could assert missing = unresolved here. */ - - free(n); - return (unresolved); -} - -int -_kvm_nlist(kvm_t *kd, struct kvm_nlist *nl, int initialize) -{ - struct kvm_nlist *p; - int nvalid; - struct kld_sym_lookup lookup; - int error; - const char *prefix = ""; - char symname[1024]; /* XXX-BZ symbol name length limit? */ - int tried_vnet, tried_dpcpu; - - /* - * If we can't use the kld symbol lookup, revert to the - * slow library call. - */ - if (!ISALIVE(kd)) { - error = kvm_fdnlist(kd, nl); - if (error <= 0) /* Hard error or success. */ - return (error); - - if (_kvm_vnet_initialized(kd, initialize)) - error = kvm_fdnlist_prefix(kd, nl, error, - VNET_SYMPREFIX, _kvm_vnet_validaddr); - - if (error > 0 && _kvm_dpcpu_initialized(kd, initialize)) - error = kvm_fdnlist_prefix(kd, nl, error, - DPCPU_SYMPREFIX, _kvm_dpcpu_validaddr); - - return (error); - } - - /* - * We can use the kld lookup syscall. Go through each nlist entry - * and look it up with a kldsym(2) syscall. - */ - nvalid = 0; - tried_vnet = 0; - tried_dpcpu = 0; -again: - for (p = nl; p->n_name && p->n_name[0]; ++p) { - if (p->n_type != N_UNDF) - continue; - - lookup.version = sizeof(lookup); - lookup.symvalue = 0; - lookup.symsize = 0; - - error = snprintf(symname, sizeof(symname), "%s%s", prefix, - (prefix[0] != '\0' && p->n_name[0] == '_') ? - (p->n_name + 1) : p->n_name); - if (error < 0 || error >= (int)sizeof(symname)) - continue; - lookup.symname = symname; - if (lookup.symname[0] == '_') - lookup.symname++; - - if (kldsym(0, KLDSYM_LOOKUP, &lookup) != -1) { - p->n_type = N_TEXT; - if (_kvm_vnet_initialized(kd, initialize) && - strcmp(prefix, VNET_SYMPREFIX) == 0) - p->n_value = - _kvm_vnet_validaddr(kd, lookup.symvalue); - else if (_kvm_dpcpu_initialized(kd, initialize) && - strcmp(prefix, DPCPU_SYMPREFIX) == 0) - p->n_value = - _kvm_dpcpu_validaddr(kd, lookup.symvalue); - else - p->n_value = lookup.symvalue; - ++nvalid; - /* lookup.symsize */ - } - } - - /* - * Check the number of entries that weren't found. If they exist, - * try again with a prefix for virtualized or DPCPU symbol names. - */ - error = ((p - nl) - nvalid); - if (error && _kvm_vnet_initialized(kd, initialize) && !tried_vnet) { - tried_vnet = 1; - prefix = VNET_SYMPREFIX; - goto again; - } - if (error && _kvm_dpcpu_initialized(kd, initialize) && !tried_dpcpu) { - tried_dpcpu = 1; - prefix = DPCPU_SYMPREFIX; - goto again; - } - - /* - * Return the number of entries that weren't found. If they exist, - * also fill internal error buffer. - */ - error = ((p - nl) - nvalid); - if (error) - _kvm_syserr(kd, kd->program, "kvm_nlist"); - return (error); -} - int kvm_nlist2(kvm_t *kd, struct kvm_nlist *nl) { Index: lib/libkvm/kvm_minidump_aarch64.c =================================================================== --- lib/libkvm/kvm_minidump_aarch64.c +++ lib/libkvm/kvm_minidump_aarch64.c @@ -50,7 +50,6 @@ struct vmstate { struct minidumphdr hdr; - struct hpt hpt; uint64_t *page_map; }; @@ -67,8 +66,7 @@ { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); - free(vm->page_map); + _kvm_unmap(vm->page_map, vm->hdr.pmapsize); free(vm); kd->vmst = NULL; } @@ -77,8 +75,7 @@ _aarch64_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint64_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -114,50 +111,22 @@ /* Skip header and msgbuf */ off = AARCH64_PAGE_SIZE + aarch64_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, - "cannot allocate %d bytes for bitmap", - vmst->hdr.bitmapsize); - return (-1); - } - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, - "cannot read %d bytes for page bitmap", - vmst->hdr.bitmapsize); - free(bitmap); + /* build physical address lookup table for sparse pages */ + sparse_off = off + aarch64_round_page(vmst->hdr.bitmapsize) + + aarch64_round_page(vmst->hdr.pmapsize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + AARCH64_PAGE_SIZE, sizeof(uint64_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += aarch64_round_page(vmst->hdr.bitmapsize); - vmst->page_map = _kvm_malloc(kd, vmst->hdr.pmapsize); - if (vmst->page_map == NULL) { - _kvm_err(kd, kd->program, - "cannot allocate %d bytes for page_map", + if (_kvm_map(kd, vmst->hdr.pmapsize, off, (void **)&vmst->page_map) == -1) { + _kvm_err(kd, kd->program, "cannot map %d bytes for page_map", vmst->hdr.pmapsize); - free(bitmap); return (-1); } - /* This is the end of the dump, savecore may have truncated it. */ - /* - * XXX: This doesn't make sense. The pmap is not at the end, - * and if it is truncated we don't have any actual data (it's - * all stored after the bitmap and pmap. -- jhb - */ - if (pread(kd->pmfd, vmst->page_map, vmst->hdr.pmapsize, off) < - AARCH64_PAGE_SIZE) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page_map", - vmst->hdr.pmapsize); - free(bitmap); - return (-1); - } - off += vmst->hdr.pmapsize; - - /* build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - AARCH64_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += aarch64_round_page(vmst->hdr.pmapsize); return (0); } @@ -178,7 +147,7 @@ if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) { a = (va - vm->hdr.dmapbase + vm->hdr.dmapphys) & ~AARCH64_PAGE_MASK; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_aarch64_minidump_vatop: " "direct map address 0x%jx not in minidump", @@ -198,7 +167,7 @@ goto invalid; } a = l3 & ~AARCH64_ATTR_MASK; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_aarch64_minidump_vatop: " "physical address 0x%jx not in minidump", Index: lib/libkvm/kvm_minidump_amd64.c =================================================================== --- lib/libkvm/kvm_minidump_amd64.c +++ lib/libkvm/kvm_minidump_amd64.c @@ -49,7 +49,6 @@ struct vmstate { struct minidumphdr hdr; - struct hpt hpt; amd64_pte_t *page_map; }; @@ -66,9 +65,7 @@ { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); - if (vm->page_map) - free(vm->page_map); + _kvm_unmap(vm->page_map, vm->hdr.pmapsize); free(vm); kd->vmst = NULL; } @@ -77,8 +74,7 @@ _amd64_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint64_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -116,37 +112,21 @@ /* Skip header and msgbuf */ off = AMD64_PAGE_SIZE + amd64_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for bitmap", vmst->hdr.bitmapsize); - return (-1); - } - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", vmst->hdr.bitmapsize); - free(bitmap); + sparse_off = off + amd64_round_page(vmst->hdr.bitmapsize) + + amd64_round_page(vmst->hdr.pmapsize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + AMD64_PAGE_SIZE, sizeof(uint64_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += amd64_round_page(vmst->hdr.bitmapsize); - vmst->page_map = _kvm_malloc(kd, vmst->hdr.pmapsize); - if (vmst->page_map == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for page_map", vmst->hdr.pmapsize); - free(bitmap); + if (_kvm_map(kd, vmst->hdr.pmapsize, off, (void **)&vmst->page_map) == -1) { + _kvm_err(kd, kd->program, "cannot map %d bytes for page_map", + vmst->hdr.pmapsize); return (-1); } - if (pread(kd->pmfd, vmst->page_map, vmst->hdr.pmapsize, off) != - (ssize_t)vmst->hdr.pmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page_map", vmst->hdr.pmapsize); - free(bitmap); - return (-1); - } - off += vmst->hdr.pmapsize; - - /* build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - AMD64_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += amd64_round_page(vmst->hdr.pmapsize); return (0); } @@ -175,7 +155,7 @@ goto invalid; } a = pte & AMD64_PG_FRAME; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_amd64_minidump_vatop_v1: physical address 0x%jx not in minidump", @@ -186,7 +166,7 @@ return (AMD64_PAGE_SIZE - offset); } else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) { a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_amd64_minidump_vatop_v1: direct map address 0x%jx not in minidump", @@ -235,20 +215,12 @@ } if ((pde & AMD64_PG_PS) == 0) { a = pde & AMD64_PG_FRAME; - ofs = _kvm_hpt_find(&vm->hpt, a); - if (ofs == -1) { - _kvm_err(kd, kd->program, - "_amd64_minidump_vatop: pt physical address 0x%jx not in minidump", - (uintmax_t)a); - goto invalid; - } /* TODO: Just read the single PTE */ - if (pread(kd->pmfd, &pt, AMD64_PAGE_SIZE, ofs) != - AMD64_PAGE_SIZE) { + if (_kvm_pt_read(kd, a, AMD64_PAGE_SIZE, pt) == -1) { _kvm_err(kd, kd->program, "cannot read %d bytes for page table", AMD64_PAGE_SIZE); - return (-1); + goto invalid; } pteindex = (va >> AMD64_PAGE_SHIFT) & (AMD64_NPTEPG - 1); @@ -263,7 +235,7 @@ a = pde & AMD64_PG_PS_FRAME; a += (va & AMD64_PDRMASK) ^ offset; } - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_amd64_minidump_vatop: physical address 0x%jx not in minidump", @@ -274,7 +246,7 @@ return (AMD64_PAGE_SIZE - offset); } else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) { a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_amd64_minidump_vatop: direct map address 0x%jx not in minidump", Index: lib/libkvm/kvm_minidump_arm.c =================================================================== --- lib/libkvm/kvm_minidump_arm.c +++ lib/libkvm/kvm_minidump_arm.c @@ -51,7 +51,6 @@ struct vmstate { struct minidumphdr hdr; - struct hpt hpt; void *ptemap; unsigned char ei_data; }; @@ -69,9 +68,7 @@ { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); - if (vm->ptemap) - free(vm->ptemap); + _kvm_unmap(vm->ptemap, vm->hdr.ptesize); free(vm); kd->vmst = NULL; } @@ -80,8 +77,7 @@ _arm_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint32_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -122,44 +118,21 @@ /* Skip header and msgbuf */ off = ARM_PAGE_SIZE + arm_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for " - "bitmap", vmst->hdr.bitmapsize); - return (-1); - } - - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", - vmst->hdr.bitmapsize); - free(bitmap); + sparse_off = off + arm_round_page(vmst->hdr.bitmapsize) + + arm_round_page(vmst->hdr.ptesize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + ARM_PAGE_SIZE, sizeof(uint32_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += arm_round_page(vmst->hdr.bitmapsize); - vmst->ptemap = _kvm_malloc(kd, vmst->hdr.ptesize); - if (vmst->ptemap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for " - "ptemap", vmst->hdr.ptesize); - free(bitmap); - return (-1); - } - - if (pread(kd->pmfd, vmst->ptemap, vmst->hdr.ptesize, off) != - (ssize_t)vmst->hdr.ptesize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for ptemap", + if (_kvm_map(kd, vmst->hdr.ptesize, off, (void **)&vmst->ptemap) == -1) { + _kvm_err(kd, kd->program, "cannot map %d bytes for ptemap", vmst->hdr.ptesize); - free(bitmap); return (-1); } - - off += vmst->hdr.ptesize; - - /* Build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - ARM_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += arm_round_page(vmst->hdr.ptesize); return (0); } @@ -184,6 +157,8 @@ if (va >= vm->hdr.kernbase) { pteindex = (va - vm->hdr.kernbase) >> ARM_PAGE_SHIFT; + if (pteindex >= vm->hdr.ptesize / sizeof(*ptemap)) + goto invalid; pte = _kvm32toh(kd, ptemap[pteindex]); if ((pte & ARM_L2_TYPE_MASK) == ARM_L2_TYPE_INV) { _kvm_err(kd, kd->program, @@ -207,7 +182,7 @@ a = pte & ARM_L2_S_FRAME; } - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_arm_minidump_kvatop: " "physical address 0x%jx not in minidump", Index: lib/libkvm/kvm_minidump_i386.c =================================================================== --- lib/libkvm/kvm_minidump_i386.c +++ lib/libkvm/kvm_minidump_i386.c @@ -49,7 +49,6 @@ struct vmstate { struct minidumphdr hdr; - struct hpt hpt; void *ptemap; }; @@ -66,9 +65,7 @@ { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); - if (vm->ptemap) - free(vm->ptemap); + _kvm_unmap(vm->ptemap, vm->hdr.ptesize); free(vm); kd->vmst = NULL; } @@ -77,8 +74,7 @@ _i386_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint32_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -110,37 +106,21 @@ /* Skip header and msgbuf */ off = I386_PAGE_SIZE + i386_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for bitmap", vmst->hdr.bitmapsize); - return (-1); - } - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", vmst->hdr.bitmapsize); - free(bitmap); + sparse_off = off + i386_round_page(vmst->hdr.bitmapsize) + + i386_round_page(vmst->hdr.ptesize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + I386_PAGE_SIZE, sizeof(uint32_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += i386_round_page(vmst->hdr.bitmapsize); - vmst->ptemap = _kvm_malloc(kd, vmst->hdr.ptesize); - if (vmst->ptemap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for ptemap", vmst->hdr.ptesize); - free(bitmap); - return (-1); - } - if (pread(kd->pmfd, vmst->ptemap, vmst->hdr.ptesize, off) != - (ssize_t)vmst->hdr.ptesize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for ptemap", vmst->hdr.ptesize); - free(bitmap); + if (_kvm_map(kd, vmst->hdr.ptesize, off, (void **)&vmst->ptemap) == -1) { + _kvm_err(kd, kd->program, "cannot map %d bytes for ptemap", + vmst->hdr.ptesize); return (-1); } - off += vmst->hdr.ptesize; - - /* build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - I386_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += i386_round_page(vmst->hdr.ptesize); return (0); } @@ -162,6 +142,8 @@ if (va >= vm->hdr.kernbase) { pteindex = (va - vm->hdr.kernbase) >> I386_PAGE_SHIFT; + if (pteindex >= vm->hdr.ptesize / sizeof(*ptemap)) + goto invalid; pte = le64toh(ptemap[pteindex]); if ((pte & I386_PG_V) == 0) { _kvm_err(kd, kd->program, @@ -169,7 +151,7 @@ goto invalid; } a = pte & I386_PG_FRAME_PAE; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_i386_minidump_vatop_pae: physical address 0x%jx not in minidump", @@ -207,6 +189,8 @@ if (va >= vm->hdr.kernbase) { pteindex = (va - vm->hdr.kernbase) >> I386_PAGE_SHIFT; + if (pteindex >= vm->hdr.ptesize / sizeof(*ptemap)) + goto invalid; pte = le32toh(ptemap[pteindex]); if ((pte & I386_PG_V) == 0) { _kvm_err(kd, kd->program, @@ -214,7 +198,7 @@ goto invalid; } a = pte & I386_PG_FRAME; - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_i386_minidump_vatop: physical address 0x%jx not in minidump", Index: lib/libkvm/kvm_minidump_mips.c =================================================================== --- lib/libkvm/kvm_minidump_mips.c +++ lib/libkvm/kvm_minidump_mips.c @@ -52,7 +52,6 @@ struct vmstate { struct minidumphdr hdr; - struct hpt hpt; void *ptemap; int pte_size; }; @@ -74,9 +73,7 @@ { struct vmstate *vm = kd->vmst; - _kvm_hpt_free(&vm->hpt); - if (vm->ptemap) - free(vm->ptemap); + _kvm_unmap(vm->ptemap, vm->hdr.ptesize); free(vm); kd->vmst = NULL; } @@ -85,8 +82,7 @@ _mips_minidump_initvtop(kvm_t *kd) { struct vmstate *vmst; - uint32_t *bitmap; - off_t off; + off_t off, sparse_off; vmst = _kvm_malloc(kd, sizeof(*vmst)); if (vmst == NULL) { @@ -129,44 +125,21 @@ /* Skip header and msgbuf */ off = MIPS_PAGE_SIZE + mips_round_page(vmst->hdr.msgbufsize); - bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize); - if (bitmap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for " - "bitmap", vmst->hdr.bitmapsize); - return (-1); - } - - if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) != - (ssize_t)vmst->hdr.bitmapsize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", - vmst->hdr.bitmapsize); - free(bitmap); + sparse_off = off + mips_round_page(vmst->hdr.bitmapsize) + + mips_round_page(vmst->hdr.ptesize); + if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off, + MIPS_PAGE_SIZE, sizeof(uint32_t)) == -1) { + _kvm_err(kd, kd->program, "cannot load core bitmap"); return (-1); } off += mips_round_page(vmst->hdr.bitmapsize); - vmst->ptemap = _kvm_malloc(kd, vmst->hdr.ptesize); - if (vmst->ptemap == NULL) { - _kvm_err(kd, kd->program, "cannot allocate %d bytes for " - "ptemap", vmst->hdr.ptesize); - free(bitmap); - return (-1); - } - - if (pread(kd->pmfd, vmst->ptemap, vmst->hdr.ptesize, off) != - (ssize_t)vmst->hdr.ptesize) { - _kvm_err(kd, kd->program, "cannot read %d bytes for ptemap", + if (_kvm_map(kd, vmst->hdr.ptesize, off, (void **)&vmst->ptemap) == -1) { + _kvm_err(kd, kd->program, "cannot map %d bytes for ptemap", vmst->hdr.ptesize); - free(bitmap); return (-1); } - - off += vmst->hdr.ptesize; - - /* Build physical address hash table for sparse pages */ - _kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off, - MIPS_PAGE_SIZE, sizeof(*bitmap)); - free(bitmap); + off += mips_round_page(vmst->hdr.ptesize); return (0); } @@ -221,9 +194,13 @@ if (va >= vm->hdr.kernbase) { pteindex = (va - vm->hdr.kernbase) >> MIPS_PAGE_SHIFT; if (vm->pte_size == 64) { + if (pteindex >= vm->hdr.ptesize / sizeof(*ptemap64)) + goto invalid; pte = _kvm64toh(kd, ptemap64[pteindex]); a = MIPS64_PTE_TO_PA(pte); } else { + if (pteindex >= vm->hdr.ptesize / sizeof(*ptemap32)) + goto invalid; pte = _kvm32toh(kd, ptemap32[pteindex]); a = MIPS32_PTE_TO_PA(pte); } @@ -239,7 +216,7 @@ } found: - ofs = _kvm_hpt_find(&vm->hpt, a); + ofs = _kvm_pt_find(kd, a); if (ofs == -1) { _kvm_err(kd, kd->program, "_mips_minidump_kvatop: physical " "address 0x%jx not in minidump", (uintmax_t)a); Index: lib/libkvm/kvm_private.h =================================================================== --- lib/libkvm/kvm_private.h +++ lib/libkvm/kvm_private.h @@ -97,23 +97,22 @@ uintptr_t *dpcpu_off; /* base array, indexed by CPU ID */ u_int dpcpu_curcpu; /* CPU we're currently working with */ kvaddr_t dpcpu_curoff; /* dpcpu base of current CPU */ -}; -/* - * Page table hash used by minidump backends to map physical addresses - * to file offsets. - */ -struct hpte { - struct hpte *next; - uint64_t pa; - off_t off; + /* Page table lookup structures. */ + uint64_t *pt_map; + size_t pt_map_size; + off_t pt_sparse_off; + uint64_t pt_sparse_size; + void *pt_sparse_pages; + uint32_t *pt_popcounts; + unsigned int pt_page_size; + unsigned int pt_word_size; }; -#define HPT_SIZE 1024 - -struct hpt { - struct hpte *hpt_head[HPT_SIZE]; -}; +/* Page table lookup constants. */ +#define POPCOUNT_BITS 1024 +#define BITS_IN(v) (sizeof(v) * NBBY) +#define POPCOUNTS_IN(v) (POPCOUNT_BITS / BITS_IN(v)) /* * Functions used internally by kvm, but across kvm modules. @@ -154,6 +153,9 @@ int _kvm_probe_elf_kernel(kvm_t *, int, int); int _kvm_is_minidump(kvm_t *); int _kvm_read_core_phdrs(kvm_t *, size_t *, GElf_Phdr **); -void _kvm_hpt_init(kvm_t *, struct hpt *, void *, size_t, off_t, int, int); -off_t _kvm_hpt_find(struct hpt *, uint64_t); -void _kvm_hpt_free(struct hpt *); +int _kvm_pt_init(kvm_t *, size_t, off_t, off_t, int, int); +off_t _kvm_pt_find(kvm_t *, uint64_t); + +int _kvm_map(kvm_t *, size_t, off_t, void **); +void _kvm_unmap(void *, size_t); +int _kvm_pt_read(kvm_t *, uint64_t, size_t, void *); Index: lib/libkvm/kvm_private.c =================================================================== --- lib/libkvm/kvm_private.c +++ lib/libkvm/kvm_private.c @@ -34,12 +34,6 @@ #include __FBSDID("$FreeBSD$"); -#if defined(LIBC_SCCS) && !defined(lint) -#if 0 -static char sccsid[] = "@(#)kvm.c 8.2 (Berkeley) 2/13/94"; -#endif -#endif /* LIBC_SCCS and not lint */ - #include #include @@ -52,6 +46,7 @@ #include +#include #include #include #include @@ -61,63 +56,19 @@ #include #include #include +#include + +#include #include "kvm_private.h" -SET_DECLARE(kvm_arch, struct kvm_arch); +/* + * Routines private to libkvm. + */ /* from src/lib/libc/gen/nlist.c */ int __fdnlist(int, struct nlist *); -static int -kvm_fdnlist(kvm_t *kd, struct kvm_nlist *list) -{ - kvaddr_t addr; - int error, nfail; - - if (kd->resolve_symbol == NULL) { - struct nlist *nl; - int count, i; - - for (count = 0; list[count].n_name != NULL && - list[count].n_name[0] != '\0'; count++) - ; - nl = calloc(count + 1, sizeof(*nl)); - for (i = 0; i < count; i++) - nl[i].n_name = list[i].n_name; - nfail = __fdnlist(kd->nlfd, nl); - for (i = 0; i < count; i++) { - list[i].n_type = nl[i].n_type; - list[i].n_value = nl[i].n_value; - } - free(nl); - return (nfail); - } - - nfail = 0; - while (list->n_name != NULL && list->n_name[0] != '\0') { - error = kd->resolve_symbol(list->n_name, &addr); - if (error != 0) { - nfail++; - list->n_value = 0; - list->n_type = 0; - } else { - list->n_value = addr; - list->n_type = N_DATA | N_EXT; - } - list++; - } - return (nfail); -} - -char * -kvm_geterr(kvm_t *kd) -{ - return (kd->errbuf); -} - -#include - /* * Report an error using printf style arguments. "program" is kd->program * on hard errors, and 0 on soft errors, so that under sun error emulation, @@ -174,40 +125,23 @@ return (p); } -static int -_kvm_read_kernel_ehdr(kvm_t *kd) +int +_kvm_map(kvm_t *kd, size_t len, off_t off, void **addrp) { - Elf *elf; + void *addr; - if (elf_version(EV_CURRENT) == EV_NONE) { - _kvm_err(kd, kd->program, "Unsupported libelf"); - return (-1); - } - elf = elf_begin(kd->nlfd, ELF_C_READ, NULL); - if (elf == NULL) { - _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); - return (-1); - } - if (elf_kind(elf) != ELF_K_ELF) { - _kvm_err(kd, kd->program, "kernel is not an ELF file"); - return (-1); - } - if (gelf_getehdr(elf, &kd->nlehdr) == NULL) { - _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); - elf_end(elf); + addr = mmap(NULL, len, PROT_READ, MAP_PRIVATE, kd->pmfd, off); + if (addr == MAP_FAILED) return (-1); - } - elf_end(elf); + *addrp = addr; + return (0); +} - switch (kd->nlehdr.e_ident[EI_DATA]) { - case ELFDATA2LSB: - case ELFDATA2MSB: - return (0); - default: - _kvm_err(kd, kd->program, - "unsupported ELF data encoding for kernel"); - return (-1); - } +void +_kvm_unmap(void *addr, size_t len) +{ + if (addr != NULL) + (void) munmap(addr, len); } int @@ -301,253 +235,251 @@ return (-1); } -static void -_kvm_hpt_insert(struct hpt *hpt, uint64_t pa, off_t off) +/* + * Transform v such that only bits [bit0, bitN) may be set. Generates a + * bitmask covering the number of bits, then shifts so +bit0+ is the first. + */ +static uint64_t +bitmask_range(uint64_t v, uint64_t bit0, uint64_t bitN) { - struct hpte *hpte; - uint32_t fnv = FNV1_32_INIT; - - fnv = fnv_32_buf(&pa, sizeof(pa), fnv); - fnv &= (HPT_SIZE - 1); - hpte = malloc(sizeof(*hpte)); - hpte->pa = pa; - hpte->off = off; - hpte->next = hpt->hpt_head[fnv]; - hpt->hpt_head[fnv] = hpte; -} + if (bit0 == 0 && bitN == BITS_IN(v)) + return (v); -void -_kvm_hpt_init(kvm_t *kd, struct hpt *hpt, void *base, size_t len, off_t off, - int page_size, int word_size) -{ - uint64_t bits, idx, pa; - uint64_t *base64; - uint32_t *base32; - - base64 = base; - base32 = base; - for (idx = 0; idx < len / word_size; idx++) { - if (word_size == sizeof(uint64_t)) - bits = _kvm64toh(kd, base64[idx]); - else - bits = _kvm32toh(kd, base32[idx]); - pa = idx * word_size * NBBY * page_size; - for (; bits != 0; bits >>= 1, pa += page_size) { - if ((bits & 1) == 0) - continue; - _kvm_hpt_insert(hpt, pa, off); - off += page_size; - } - } + return (v & (((1ULL << (bitN - bit0)) - 1ULL) << bit0)); } -off_t -_kvm_hpt_find(struct hpt *hpt, uint64_t pa) +/* + * Returns the number of bits set in a given u64. For explanation, see: + * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + * https://en.wikipedia.org/wiki/Hamming_weight + * + * Can also use POPCNT CPU instruction, but this is portable and fast enough. + */ +static uint64_t +popcount64(uint64_t v) { - struct hpte *hpte; - uint32_t fnv = FNV1_32_INIT; - - fnv = fnv_32_buf(&pa, sizeof(pa), fnv); - fnv &= (HPT_SIZE - 1); - for (hpte = hpt->hpt_head[fnv]; hpte != NULL; hpte = hpte->next) { - if (pa == hpte->pa) - return (hpte->off); - } - return (-1); + v -= ((v >> 1) & 0x5555555555555555ULL); + v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); + v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + return ((uint64_t)(v * 0x0101010101010101ULL) >> 56); } -void -_kvm_hpt_free(struct hpt *hpt) +/* + * Returns the number of bits in a given byte array range starting at a + * given base, from bit0 to bitN. bit0 may be non-zero in the case of + * counting backwards from bitN. + */ +static uint64_t +popcount_bytes(uint64_t *addr, uint32_t bit0, uint32_t bitN) { - struct hpte *hpte, *next; - int i; + uint32_t res = bitN - bit0; + uint64_t count = 0; + uint32_t bound; - for (i = 0; i < HPT_SIZE; i++) { - for (hpte = hpt->hpt_head[i]; hpte != NULL; hpte = next) { - next = hpte->next; - free(hpte); - } + /* Align to 64-bit boundary on the left side if needed. */ + if ((bit0 % BITS_IN(*addr)) != 0) { + bound = MIN(bitN, roundup2(bit0, BITS_IN(*addr))); + count += popcount64(bitmask_range(*addr, bit0, bound)); + res -= (bound - bit0); + addr++; + } + + while (res > 0) { + bound = MIN(res, BITS_IN(*addr)); + count += popcount64(bitmask_range(*addr, 0, bound)); + res -= bound; + addr++; } + + return (count); } -static kvm_t * -_kvm_open(kvm_t *kd, const char *uf, const char *mf, int flag, char *errout) +int +_kvm_pt_init(kvm_t *kd, size_t map_len, off_t map_off, off_t sparse_off, + int page_size, int word_size) { - struct kvm_arch **parch; - struct stat st; - - kd->vmfd = -1; - kd->pmfd = -1; - kd->nlfd = -1; - kd->vmst = NULL; - kd->procbase = NULL; - kd->argspc = NULL; - kd->argv = NULL; - - if (uf == NULL) - uf = getbootfile(); - else if (strlen(uf) >= MAXPATHLEN) { - _kvm_err(kd, kd->program, "exec file name too long"); - goto failed; - } - if (flag & ~O_RDWR) { - _kvm_err(kd, kd->program, "bad flags arg"); - goto failed; - } - if (mf == NULL) - mf = _PATH_MEM; + uint64_t *addr; + uint32_t *popcount_bin; + int bin_popcounts = 0; + uint64_t pc_bins, res; - if ((kd->pmfd = open(mf, flag | O_CLOEXEC, 0)) < 0) { - _kvm_syserr(kd, kd->program, "%s", mf); - goto failed; - } - if (fstat(kd->pmfd, &st) < 0) { - _kvm_syserr(kd, kd->program, "%s", mf); - goto failed; - } - if (S_ISREG(st.st_mode) && st.st_size <= 0) { - errno = EINVAL; - _kvm_syserr(kd, kd->program, "empty file"); - goto failed; - } - if (S_ISCHR(st.st_mode)) { - /* - * If this is a character special device, then check that - * it's /dev/mem. If so, open kmem too. (Maybe we should - * make it work for either /dev/mem or /dev/kmem -- in either - * case you're working with a live kernel.) - */ - if (strcmp(mf, _PATH_DEVNULL) == 0) { - kd->vmfd = open(_PATH_DEVNULL, O_RDONLY | O_CLOEXEC); - return (kd); - } else if (strcmp(mf, _PATH_MEM) == 0) { - if ((kd->vmfd = open(_PATH_KMEM, flag | O_CLOEXEC)) < - 0) { - _kvm_syserr(kd, kd->program, "%s", _PATH_KMEM); - goto failed; - } - return (kd); - } - } /* - * This is a crash dump. - * Open the namelist fd and determine the architecture. + * Map the bitmap specified by the arguments. */ - if ((kd->nlfd = open(uf, O_RDONLY | O_CLOEXEC, 0)) < 0) { - _kvm_syserr(kd, kd->program, "%s", uf); - goto failed; - } - if (_kvm_read_kernel_ehdr(kd) < 0) - goto failed; - if (strncmp(mf, _PATH_FWMEM, strlen(_PATH_FWMEM)) == 0) - kd->rawdump = 1; - SET_FOREACH(parch, kvm_arch) { - if ((*parch)->ka_probe(kd)) { - kd->arch = *parch; - break; - } - } - if (kd->arch == NULL) { - _kvm_err(kd, kd->program, "unsupported architecture"); - goto failed; - } + if (_kvm_map(kd, map_len, map_off, (void **)&kd->pt_map) == -1) + return (-1); + kd->pt_map_size = map_len; /* - * Non-native kernels require a symbol resolver. + * Generate a popcount cache for every POPCOUNT_BITS in the bitmap, + * so lookups only have to calculate the number of bits set between + * a cache point and their bit. This reduces lookups to O(1), + * without significantly increasing memory requirements. + * + * Round up the number of bins so that 'upper half' lookups work for + * the final bin, if needed. The first popcount is 0, since no bits + * precede bit 0, so add 1 for that also. Without this, extra work + * would be needed to handle the first PTEs in _kvm_pt_find(). */ - if (!kd->arch->ka_native(kd) && kd->resolve_symbol == NULL) { - _kvm_err(kd, kd->program, - "non-native kernel requires a symbol resolver"); - goto failed; + addr = kd->pt_map; + res = map_len; + pc_bins = 1 + (res * NBBY + POPCOUNT_BITS / 2) / POPCOUNT_BITS; + kd->pt_popcounts = calloc(pc_bins, sizeof(uint32_t)); + if (kd->pt_popcounts == NULL) + return (-1); + + for (popcount_bin = &kd->pt_popcounts[1]; res > 0; + addr++, res -= sizeof(*addr)) { + *popcount_bin += popcount_bytes(addr, 0, + MIN(res * NBBY, BITS_IN(*addr))); + if (++bin_popcounts == POPCOUNTS_IN(*addr)) { + popcount_bin++; + *popcount_bin = *(popcount_bin - 1); + bin_popcounts = 0; + } } + assert(pc_bins * sizeof(*popcount_bin) == + ((uintptr_t)popcount_bin - (uintptr_t)kd->pt_popcounts)); + /* - * Initialize the virtual address translation machinery. - */ - if (kd->arch->ka_initvtop(kd) < 0) - goto failed; - return (kd); -failed: - /* - * Copy out the error if doing sane error semantics. + * Map the sparse page map. This is useful for reading specific + * pages via _kvm_pt_read. */ - if (errout != NULL) - strlcpy(errout, kd->errbuf, _POSIX2_LINE_MAX); - (void)kvm_close(kd); + kd->pt_sparse_off = sparse_off; + kd->pt_sparse_size = (uint64_t)*popcount_bin * PAGE_SIZE; + if (_kvm_map(kd, kd->pt_sparse_size, kd->pt_sparse_off, + (void **)&kd->pt_sparse_pages) == -1) + return (-1); + + kd->pt_page_size = page_size; + kd->pt_word_size = word_size; return (0); } -kvm_t * -kvm_openfiles(const char *uf, const char *mf, const char *sf __unused, int flag, - char *errout) +/* + * Find the offset for the given physical page address; returns -1 otherwise. + * + * A page's offset is represented by the sparse page base offset plus the + * number of bits set before its bit multiplied by PAGE_SIZE. This means + * that if a page exists in the dump, it's necessary to know how many pages + * in the dump precede it. Reduce this O(n) counting to O(1) by caching the + * number of bits set at POPCOUNT_BITS intervals. + * + * Then to find the number of pages before the requested address, simply + * index into the cache and count the number of bits set between that cache + * bin and the page's bit. Halve the number of bytes that have to be + * checked by also counting down from the next higher bin if it's closer. + */ +off_t +_kvm_pt_find(kvm_t *kd, uint64_t pa) { - kvm_t *kd; + uint64_t *bitmap = kd->pt_map; + uint64_t pte_bit_id = pa / PAGE_SIZE; + uint64_t pte_u64 = pte_bit_id / BITS_IN(*bitmap); + uint64_t popcount_id = pte_bit_id / POPCOUNT_BITS; + uint64_t pte_mask = 1ULL << (pte_bit_id % BITS_IN(*bitmap)); + uint64_t bitN; + uint32_t count; + + /* Check whether the page address requested is in the dump. */ + if (pte_bit_id >= (kd->pt_map_size * NBBY) || + (bitmap[pte_u64] & pte_mask) == 0) + return (-1); - if ((kd = calloc(1, sizeof(*kd))) == NULL) { - if (errout != NULL) - (void)strlcpy(errout, strerror(errno), - _POSIX2_LINE_MAX); - return (0); + /* + * Add/sub popcounts from the bitmap until the PTE's bit is reached. + * For bits that are in the upper half between the calculated + * popcount id and the next one, use the next one and subtract to + * minimize the number of popcounts required. + */ + if ((pte_bit_id % POPCOUNT_BITS) < (POPCOUNT_BITS / 2)) { + count = kd->pt_popcounts[popcount_id] + popcount_bytes( + bitmap + popcount_id * POPCOUNTS_IN(*bitmap), + 0, pte_bit_id - popcount_id * POPCOUNT_BITS); + } else { + /* + * Counting in reverse is trickier, since we must avoid + * reading from bytes that are not in range, and invert. + */ + uint64_t pte_u64_bit_off = pte_u64 * BITS_IN(*bitmap); + + popcount_id++; + bitN = MIN(popcount_id * POPCOUNT_BITS, + kd->pt_map_size * BITS_IN(uint8_t)); + count = kd->pt_popcounts[popcount_id] - popcount_bytes( + bitmap + pte_u64, + pte_bit_id - pte_u64_bit_off, bitN - pte_u64_bit_off); } - return (_kvm_open(kd, uf, mf, flag, errout)); -} -kvm_t * -kvm_open(const char *uf, const char *mf, const char *sf __unused, int flag, - const char *errstr) -{ - kvm_t *kd; + /* + * This can only happen if the core is truncated. Treat these + * entries as if they don't exist, since their backing doesn't. + */ + if (count >= (kd->pt_sparse_size / PAGE_SIZE)) + return (-1); - if ((kd = calloc(1, sizeof(*kd))) == NULL) { - if (errstr != NULL) - (void)fprintf(stderr, "%s: %s\n", - errstr, strerror(errno)); - return (0); - } - kd->program = errstr; - return (_kvm_open(kd, uf, mf, flag, NULL)); + return (kd->pt_sparse_off + (uint64_t)count * PAGE_SIZE); } -kvm_t * -kvm_open2(const char *uf, const char *mf, int flag, char *errout, - int (*resolver)(const char *, kvaddr_t *)) +int +_kvm_pt_read(kvm_t *kd, uint64_t pa, size_t length, void *buf) { - kvm_t *kd; + off_t off; + void *src; - if ((kd = calloc(1, sizeof(*kd))) == NULL) { - if (errout != NULL) - (void)strlcpy(errout, strerror(errno), - _POSIX2_LINE_MAX); - return (0); - } - kd->resolve_symbol = resolver; - return (_kvm_open(kd, uf, mf, flag, errout)); + off = _kvm_pt_find(kd, pa); + /* Make sure the request doesn't go off the end. */ + if (off == -1) + return (-1); + off -= kd->pt_sparse_off; + if ((uint64_t)off + length > kd->pt_sparse_size) + return (-1); + + src = (void *)((uintptr_t)kd->pt_sparse_pages + off); + memcpy(buf, src, length); + return (0); } -int -kvm_close(kvm_t *kd) +static int +kvm_fdnlist(kvm_t *kd, struct kvm_nlist *list) { - int error = 0; - - if (kd->vmst != NULL) - kd->arch->ka_freevtop(kd); - if (kd->pmfd >= 0) - error |= close(kd->pmfd); - if (kd->vmfd >= 0) - error |= close(kd->vmfd); - if (kd->nlfd >= 0) - error |= close(kd->nlfd); - if (kd->procbase != 0) - free((void *)kd->procbase); - if (kd->argbuf != 0) - free((void *) kd->argbuf); - if (kd->argspc != 0) - free((void *) kd->argspc); - if (kd->argv != 0) - free((void *)kd->argv); - free((void *)kd); + kvaddr_t addr; + int error, nfail; - return (0); + if (kd->resolve_symbol == NULL) { + struct nlist *nl; + int count, i; + + for (count = 0; list[count].n_name != NULL && + list[count].n_name[0] != '\0'; count++) + ; + nl = calloc(count + 1, sizeof(*nl)); + for (i = 0; i < count; i++) + nl[i].n_name = list[i].n_name; + nfail = __fdnlist(kd->nlfd, nl); + for (i = 0; i < count; i++) { + list[i].n_type = nl[i].n_type; + list[i].n_value = nl[i].n_value; + } + free(nl); + return (nfail); + } + + nfail = 0; + while (list->n_name != NULL && list->n_name[0] != '\0') { + error = kd->resolve_symbol(list->n_name, &addr); + if (error != 0) { + nfail++; + list->n_value = 0; + list->n_type = 0; + } else { + list->n_value = addr; + list->n_type = N_DATA | N_EXT; + } + list++; + } + return (nfail); } /* @@ -750,152 +682,3 @@ _kvm_syserr(kd, kd->program, "kvm_nlist"); return (error); } - -int -kvm_nlist2(kvm_t *kd, struct kvm_nlist *nl) -{ - - /* - * If called via the public interface, permit initialization of - * further virtualized modules on demand. - */ - return (_kvm_nlist(kd, nl, 1)); -} - -int -kvm_nlist(kvm_t *kd, struct nlist *nl) -{ - struct kvm_nlist *kl; - int count, i, nfail; - - /* - * Avoid reporting truncated addresses by failing for non-native - * cores. - */ - if (!kvm_native(kd)) { - _kvm_err(kd, kd->program, "kvm_nlist of non-native vmcore"); - return (-1); - } - - for (count = 0; nl[count].n_name != NULL && nl[count].n_name[0] != '\0'; - count++) - ; - if (count == 0) - return (0); - kl = calloc(count + 1, sizeof(*kl)); - for (i = 0; i < count; i++) - kl[i].n_name = nl[i].n_name; - nfail = kvm_nlist2(kd, kl); - for (i = 0; i < count; i++) { - nl[i].n_type = kl[i].n_type; - nl[i].n_other = 0; - nl[i].n_desc = 0; - nl[i].n_value = kl[i].n_value; - } - return (nfail); -} - -ssize_t -kvm_read(kvm_t *kd, u_long kva, void *buf, size_t len) -{ - - return (kvm_read2(kd, kva, buf, len)); -} - -ssize_t -kvm_read2(kvm_t *kd, kvaddr_t kva, void *buf, size_t len) -{ - int cc; - ssize_t cr; - off_t pa; - char *cp; - - if (ISALIVE(kd)) { - /* - * We're using /dev/kmem. Just read straight from the - * device and let the active kernel do the address translation. - */ - errno = 0; - if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) { - _kvm_err(kd, 0, "invalid address (0x%jx)", - (uintmax_t)kva); - return (-1); - } - cr = read(kd->vmfd, buf, len); - if (cr < 0) { - _kvm_syserr(kd, 0, "kvm_read"); - return (-1); - } else if (cr < (ssize_t)len) - _kvm_err(kd, kd->program, "short read"); - return (cr); - } - - cp = buf; - while (len > 0) { - cc = kd->arch->ka_kvatop(kd, kva, &pa); - if (cc == 0) - return (-1); - if (cc > (ssize_t)len) - cc = len; - errno = 0; - if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) { - _kvm_syserr(kd, 0, _PATH_MEM); - break; - } - cr = read(kd->pmfd, cp, cc); - if (cr < 0) { - _kvm_syserr(kd, kd->program, "kvm_read"); - break; - } - /* - * If ka_kvatop returns a bogus value or our core file is - * truncated, we might wind up seeking beyond the end of the - * core file in which case the read will return 0 (EOF). - */ - if (cr == 0) - break; - cp += cr; - kva += cr; - len -= cr; - } - - return (cp - (char *)buf); -} - -ssize_t -kvm_write(kvm_t *kd, u_long kva, const void *buf, size_t len) -{ - int cc; - - if (ISALIVE(kd)) { - /* - * Just like kvm_read, only we write. - */ - errno = 0; - if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) { - _kvm_err(kd, 0, "invalid address (%lx)", kva); - return (-1); - } - cc = write(kd->vmfd, buf, len); - if (cc < 0) { - _kvm_syserr(kd, 0, "kvm_write"); - return (-1); - } else if ((size_t)cc < len) - _kvm_err(kd, kd->program, "short write"); - return (cc); - } else { - _kvm_err(kd, kd->program, - "kvm_write not implemented for dead kernels"); - return (-1); - } - /* NOTREACHED */ -} - -int -kvm_native(kvm_t *kd) -{ - - if (ISALIVE(kd)) - return (1); - return (kd->arch->ka_native(kd)); -}