diff --git a/sys/amd64/amd64/minidump_machdep.c b/sys/amd64/amd64/minidump_machdep.c --- a/sys/amd64/amd64/minidump_machdep.c +++ b/sys/amd64/amd64/minidump_machdep.c @@ -52,6 +52,7 @@ #include #include #include +#include CTASSERT(sizeof(struct kerneldumpheader) == 512); @@ -163,10 +164,11 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) { uint32_t pmapsize; - vm_offset_t va; + vm_offset_t va, kva_end; int error; uint64_t *pml4, *pdp, *pd, *pt, pa; - int i, ii, j, k, n; + uint64_t pdpe, pde, pte; + int ii, j, k, n; int retry_count; struct minidumphdr mdhdr; @@ -174,10 +176,18 @@ retry: retry_count++; - /* Walk page table pages, set bits in vm_page_dump */ + /* Snapshot the KVA upper bound in case it grows. */ + kva_end = MAX(KERNBASE + nkpt * NBPDR, kernel_vm_end); + + /* + * Walk the kernel page table pages, setting the active entries in the + * dump bitmap. + * + * NB: for a live dump, we may be racing with updates to the page + * tables, so care must be taken to read each entry only once. + */ pmapsize = 0; - for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, - kernel_vm_end); ) { + for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; ) { /* * We always write a page, even if it is zero. Each * page written corresponds to 1GB of space @@ -186,8 +196,8 @@ ii = pmap_pml4e_index(va); pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); - i = pmap_pdpe_index(va); - if ((pdp[i] & PG_V) == 0) { + pdpe = atomic_load_64(&pdp[pmap_pdpe_index(va)]); + if ((pdpe & PG_V) == 0) { va += NBPDP; continue; } @@ -195,9 +205,9 @@ /* * 1GB page is represented as 512 2MB pages in a dump. */ - if ((pdp[i] & PG_PS) != 0) { + if ((pdpe & PG_PS) != 0) { va += NBPDP; - pa = pdp[i] & PG_PS_FRAME; + pa = pdpe & PG_PS_FRAME; for (n = 0; n < NPDEPG * NPTEPG; n++) { if (vm_phys_is_dumpable(pa)) dump_add_page(pa); @@ -206,16 +216,16 @@ continue; } - pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); + pd = (uint64_t *)PHYS_TO_DMAP(pdpe & PG_FRAME); for (n = 0; n < NPDEPG; n++, va += NBPDR) { - j = pmap_pde_index(va); + pde = atomic_load_64(&pd[pmap_pde_index(va)]); - if ((pd[j] & PG_V) == 0) + if ((pde & PG_V) == 0) continue; - if ((pd[j] & PG_PS) != 0) { + if ((pde & PG_PS) != 0) { /* This is an entire 2M page. */ - pa = pd[j] & PG_PS_FRAME; + pa = pde & PG_PS_FRAME; for (k = 0; k < NPTEPG; k++) { if (vm_phys_is_dumpable(pa)) dump_add_page(pa); @@ -224,17 +234,18 @@ continue; } - pa = pd[j] & PG_FRAME; + pa = pde & PG_FRAME; /* set bit for this PTE page */ if (vm_phys_is_dumpable(pa)) dump_add_page(pa); /* and for each valid page in this 2MB block */ - pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); + pt = (uint64_t *)PHYS_TO_DMAP(pde & PG_FRAME); for (k = 0; k < NPTEPG; k++) { - if ((pt[k] & PG_V) == 0) + pte = atomic_load_64(&pt[k]); + if ((pte & PG_V) == 0) continue; - pa = pt[k] & PG_FRAME; - if (vm_phys_is_dumpable(pa)) + pa = pte & PG_FRAME; + if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) dump_add_page(pa); } } @@ -247,7 +258,7 @@ dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages)); VM_PAGE_DUMP_FOREACH(pa) { /* Clear out undumpable pages now if needed */ - if (vm_phys_is_dumpable(pa)) { + if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) { dumpsize += PAGE_SIZE; } else { dump_drop_page(pa); @@ -309,15 +320,14 @@ /* Dump kernel page directory pages */ bzero(fakepd, sizeof(fakepd)); - for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, - kernel_vm_end); va += NBPDP) { + for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += NBPDP) { ii = pmap_pml4e_index(va); pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); - i = pmap_pdpe_index(va); + pdpe = atomic_load_64(&pdp[pmap_pdpe_index(va)]); /* We always write a page, even if it is zero */ - if ((pdp[i] & PG_V) == 0) { + if ((pdpe & PG_V) == 0) { error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); if (error) goto fail; @@ -329,9 +339,9 @@ } /* 1GB page is represented as 512 2MB pages in a dump */ - if ((pdp[i] & PG_PS) != 0) { + if ((pdpe & PG_PS) != 0) { /* PDPE and PDP have identical layout in this case */ - fakepd[0] = pdp[i]; + fakepd[0] = pdpe; for (j = 1; j < NPDEPG; j++) fakepd[j] = fakepd[j - 1] + NBPDR; error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); @@ -345,8 +355,14 @@ continue; } - pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); - error = blk_write(di, (char *)pd, 0, PAGE_SIZE); + pa = pdpe & PG_FRAME; + if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) { + pd = (uint64_t *)PHYS_TO_DMAP(pa); + error = blk_write(di, (char *)pd, 0, PAGE_SIZE); + } else { + /* Malformed pa, write the zeroed fakepd. */ + error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); + } if (error) goto fail; error = blk_flush(di); diff --git a/sys/arm/arm/minidump_machdep.c b/sys/arm/arm/minidump_machdep.c --- a/sys/arm/arm/minidump_machdep.c +++ b/sys/arm/arm/minidump_machdep.c @@ -159,7 +159,7 @@ uint64_t dumpsize, *dump_avail_buf; uint32_t ptesize; uint32_t pa, prev_pa = 0, count = 0; - vm_offset_t va; + vm_offset_t va, kva_end; int error, i; char *addr; @@ -174,9 +174,15 @@ */ dcache_wbinv_poc_all(); - /* Walk page table pages, set bits in vm_page_dump */ + /* Snapshot the KVA upper bound in case it grows. */ + kva_end = kernel_vm_end; + + /* + * Walk the kernel page table pages, setting the active entries in the + * dump bitmap. + */ ptesize = 0; - for (va = KERNBASE; va < kernel_vm_end; va += PAGE_SIZE) { + for (va = KERNBASE; va < kva_end; va += PAGE_SIZE) { pa = pmap_dump_kextract(va, NULL); if (pa != 0 && vm_phys_is_dumpable(pa)) dump_add_page(pa); @@ -255,7 +261,7 @@ /* Dump kernel page table pages */ addr = dumpbuf; - for (va = KERNBASE; va < kernel_vm_end; va += PAGE_SIZE) { + for (va = KERNBASE; va < kva_end; va += PAGE_SIZE) { pmap_dump_kextract(va, (pt2_entry_t *)addr); addr += sizeof(pt2_entry_t); if (addr == dumpbuf + sizeof(dumpbuf)) { diff --git a/sys/arm64/arm64/minidump_machdep.c b/sys/arm64/arm64/minidump_machdep.c --- a/sys/arm64/arm64/minidump_machdep.c +++ b/sys/arm64/arm64/minidump_machdep.c @@ -51,6 +51,7 @@ #include #include +#include #include #include #include @@ -150,9 +151,9 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) { struct minidumphdr mdhdr; - pd_entry_t *l0, *l1, *l2; - pt_entry_t *l3; - vm_offset_t va; + pd_entry_t *l0, *l1, l1e, *l2, l2e; + pt_entry_t *l3, l3e; + vm_offset_t va, kva_end; vm_paddr_t pa; uint32_t pmapsize; int error, i, j, retry_count; @@ -162,31 +163,46 @@ retry_count++; error = 0; pmapsize = 0; - for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { + + /* Snapshot the KVA upper bound in case it grows. */ + kva_end = kernel_vm_end; + + /* + * Walk the kernel page table pages, setting the active entries in the + * dump bitmap. + * + * NB: for a live dump, we may be racing with updates to the page + * tables, so care must be taken to read each entry only once. + */ + for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += L2_SIZE) { pmapsize += PAGE_SIZE; if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) continue; - if ((*l1 & ATTR_DESCR_MASK) == L1_BLOCK) { - pa = *l1 & ~ATTR_MASK; + l1e = atomic_load_64(l1); + l2e = atomic_load_64(l2); + if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) { + pa = l1e & ~ATTR_MASK; for (i = 0; i < Ln_ENTRIES * Ln_ENTRIES; i++, pa += PAGE_SIZE) if (vm_phys_is_dumpable(pa)) dump_add_page(pa); pmapsize += (Ln_ENTRIES - 1) * PAGE_SIZE; va += L1_SIZE - L2_SIZE; - } else if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) { - pa = *l2 & ~ATTR_MASK; + } else if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) { + pa = l2e & ~ATTR_MASK; for (i = 0; i < Ln_ENTRIES; i++, pa += PAGE_SIZE) { if (vm_phys_is_dumpable(pa)) dump_add_page(pa); } - } else if ((*l2 & ATTR_DESCR_MASK) == L2_TABLE) { + } else if ((l2e & ATTR_DESCR_MASK) == L2_TABLE) { for (i = 0; i < Ln_ENTRIES; i++) { - if ((l3[i] & ATTR_DESCR_MASK) != L3_PAGE) + l3e = atomic_load_64(&l3[i]); + if ((l3e & ATTR_DESCR_MASK) != L3_PAGE) continue; - pa = l3[i] & ~ATTR_MASK; - if (vm_phys_is_dumpable(pa)) + pa = l3e & ~ATTR_MASK; + pa = l3e & ~ATTR_MASK; + if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) dump_add_page(pa); } } @@ -198,7 +214,7 @@ dumpsize += round_page(sizeof(dump_avail)); dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages)); VM_PAGE_DUMP_FOREACH(pa) { - if (vm_phys_is_dumpable(pa)) + if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) dumpsize += PAGE_SIZE; else dump_drop_page(pa); @@ -260,7 +276,7 @@ /* Dump kernel page directory pages */ bzero(&tmpbuffer, sizeof(tmpbuffer)); - for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { + for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += L2_SIZE) { if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) { /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); @@ -270,12 +286,17 @@ error = blk_flush(di); if (error) goto fail; - } else if ((*l1 & ATTR_DESCR_MASK) == L1_BLOCK) { + continue; + } + + l1e = atomic_load_64(l1); + l2e = atomic_load_64(l2); + if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) { /* * Handle a 1GB block mapping: write out 512 fake L2 * pages. */ - pa = (*l1 & ~ATTR_MASK) | (va & L1_OFFSET); + pa = (l1e & ~ATTR_MASK) | (va & L1_OFFSET); for (i = 0; i < Ln_ENTRIES; i++) { for (j = 0; j < Ln_ENTRIES; j++) { @@ -294,8 +315,8 @@ goto fail; bzero(&tmpbuffer, sizeof(tmpbuffer)); va += L1_SIZE - L2_SIZE; - } else if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) { - pa = (*l2 & ~ATTR_MASK) | (va & L2_OFFSET); + } else if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) { + pa = (l2e & ~ATTR_MASK) | (va & L2_OFFSET); /* Generate fake l3 entries based upon the l1 entry */ for (i = 0; i < Ln_ENTRIES; i++) { @@ -312,9 +333,17 @@ bzero(&tmpbuffer, sizeof(tmpbuffer)); continue; } else { - pa = *l2 & ~ATTR_MASK; + pa = l2e & ~ATTR_MASK; - error = blk_write(di, NULL, pa, PAGE_SIZE); + /* + * We always write a page, even if it is zero. If pa + * is malformed, write the zeroed tmpbuffer. + */ + if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) + error = blk_write(di, NULL, pa, PAGE_SIZE); + else + error = blk_write(di, (char *)&tmpbuffer, 0, + PAGE_SIZE); if (error) goto fail; } diff --git a/sys/i386/i386/minidump_machdep_base.c b/sys/i386/i386/minidump_machdep_base.c --- a/sys/i386/i386/minidump_machdep_base.c +++ b/sys/i386/i386/minidump_machdep_base.c @@ -157,17 +157,26 @@ { uint64_t dumpsize; uint32_t ptesize; - vm_offset_t va; + vm_offset_t va, kva_end; int error; uint64_t pa; - pd_entry_t *pd; - pt_entry_t *pt; + pd_entry_t *pd, pde; + pt_entry_t *pt, pte; int j, k; struct minidumphdr mdhdr; - /* Walk page table pages, set bits in vm_page_dump */ + /* Snapshot the KVA upper bound in case it grows. */ + kva_end = kernel_vm_end; + + /* + * Walk the kernel page table pages, setting the active entries in the + * dump bitmap. + * + * NB: for a live dump, we may be racing with updates to the page + * tables, so care must be taken to read each entry only once. + */ ptesize = 0; - for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { + for (va = KERNBASE; va < kva_end; va += NBPDR) { /* * We always write a page, even if it is zero. Each * page written corresponds to 2MB of space @@ -175,9 +184,10 @@ ptesize += PAGE_SIZE; pd = IdlePTD; /* always mapped! */ j = va >> PDRSHIFT; - if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + pde = pte_load(&pd[va >> PDRSHIFT]); + if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V)) { /* This is an entire 2M page. */ - pa = pd[j] & PG_PS_FRAME; + pa = pde & PG_PS_FRAME; for (k = 0; k < NPTEPG; k++) { if (vm_phys_is_dumpable(pa)) dump_add_page(pa); @@ -185,12 +195,13 @@ } continue; } - if ((pd[j] & PG_V) == PG_V) { + if ((pde & PG_V) == PG_V) { /* set bit for each valid page in this 2MB block */ - pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0); + pt = pmap_kenter_temporary(pde & PG_FRAME, 0); for (k = 0; k < NPTEPG; k++) { - if ((pt[k] & PG_V) == PG_V) { - pa = pt[k] & PG_FRAME; + pte = pte_load(&pt[k]); + if ((pte & PG_V) == PG_V) { + pa = pte & PG_FRAME; if (vm_phys_is_dumpable(pa)) dump_add_page(pa); } @@ -266,13 +277,13 @@ goto fail; /* Dump kernel page table pages */ - for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { + for (va = KERNBASE; va < kva_end; va += NBPDR) { /* We always write a page, even if it is zero */ pd = IdlePTD; /* always mapped! */ - j = va >> PDRSHIFT; - if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + pde = pte_load(&pd[va >> PDRSHIFT]); + if ((pde & (PG_PS | PG_V)) == (PG_PS | PG_V)) { /* This is a single 2M block. Generate a fake PTP */ - pa = pd[j] & PG_PS_FRAME; + pa = pde & PG_PS_FRAME; for (k = 0; k < NPTEPG; k++) { fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; } @@ -285,8 +296,8 @@ goto fail; continue; } - if ((pd[j] & PG_V) == PG_V) { - pa = pd[j] & PG_FRAME; + if ((pde & PG_V) == PG_V) { + pa = pde & PG_FRAME; error = blk_write(di, 0, pa, PAGE_SIZE); if (error) goto fail; diff --git a/sys/riscv/riscv/minidump_machdep.c b/sys/riscv/riscv/minidump_machdep.c --- a/sys/riscv/riscv/minidump_machdep.c +++ b/sys/riscv/riscv/minidump_machdep.c @@ -155,11 +155,11 @@ int cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) { - pd_entry_t *l1, *l2; - pt_entry_t *l3; + pd_entry_t *l1, *l2, l2e; + pt_entry_t *l3, l3e; struct minidumphdr mdhdr; uint32_t pmapsize; - vm_offset_t va; + vm_offset_t va, kva_max; vm_paddr_t pa; int error; int i; @@ -171,8 +171,17 @@ error = 0; pmapsize = 0; - /* Build set of dumpable pages from kernel pmap */ - for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { + /* Snapshot the KVA upper bound in case it grows. */ + kva_max = kernel_vm_end; + + /* + * Walk the kernel page table pages, setting the active entries in the + * dump bitmap. + * + * NB: for a live dump, we may be racing with updates to the page + * tables, so care must be taken to read each entry only once. + */ + for (va = VM_MIN_KERNEL_ADDRESS; va < kva_max; va += L2_SIZE) { pmapsize += PAGE_SIZE; if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) continue; @@ -182,18 +191,20 @@ continue; /* l2 may be a superpage */ - if ((*l2 & PTE_RWX) != 0) { - pa = (*l2 >> PTE_PPN1_S) << L2_SHIFT; + l2e = atomic_load_64(l2); + if ((l2e & PTE_RWX) != 0) { + pa = (l2e >> PTE_PPN1_S) << L2_SHIFT; for (i = 0; i < Ln_ENTRIES; i++, pa += PAGE_SIZE) { if (vm_phys_is_dumpable(pa)) dump_add_page(pa); } } else { for (i = 0; i < Ln_ENTRIES; i++) { - if ((l3[i] & PTE_V) == 0) + l3e = atomic_load_64(&l3[i]); + if ((l3e & PTE_V) == 0) continue; - pa = (l3[i] >> PTE_PPN0_S) * PAGE_SIZE; - if (vm_phys_is_dumpable(pa)) + pa = (l3e >> PTE_PPN0_S) * PAGE_SIZE; + if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) dump_add_page(pa); } } @@ -206,7 +217,7 @@ dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages)); VM_PAGE_DUMP_FOREACH(pa) { /* Clear out undumpable pages now if needed */ - if (vm_phys_is_dumpable(pa)) + if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) dumpsize += PAGE_SIZE; else dump_drop_page(pa); @@ -268,7 +279,7 @@ /* Dump kernel page directory pages */ bzero(&tmpbuffer, sizeof(tmpbuffer)); - for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { + for (va = VM_MIN_KERNEL_ADDRESS; va < kva_max; va += L2_SIZE) { if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) { /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); @@ -278,10 +289,14 @@ error = blk_flush(di); if (error) goto fail; - } else if ((*l2 & PTE_RWX) != 0) { + continue; + } + + l2e = atomic_load_64(l2); + if ((l2e & PTE_RWX) != 0) { /* Generate fake l3 entries based on the l2 superpage */ for (i = 0; i < Ln_ENTRIES; i++) { - tmpbuffer[i] = (*l2 | (i << PTE_PPN0_S)); + tmpbuffer[i] = (l2e | (i << PTE_PPN0_S)); } /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); @@ -293,10 +308,17 @@ goto fail; bzero(&tmpbuffer, sizeof(tmpbuffer)); } else { - pa = (*l2 >> PTE_PPN0_S) * PAGE_SIZE; - - /* We always write a page, even if it is zero */ - error = blk_write(di, NULL, pa, PAGE_SIZE); + pa = (l2e >> PTE_PPN0_S) * PAGE_SIZE; + + /* + * We always write a page, even if it is zero. If pa + * is malformed, write the zeroed tmpbuffer. + */ + if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) + error = blk_write(di, NULL, pa, PAGE_SIZE); + else + error = blk_write(di, (char *)&tmpbuffer, 0, + PAGE_SIZE); if (error) goto fail; }