Page MenuHomeFreeBSD

D14282.id39229.diff
No OneTemporary

D14282.id39229.diff

Index: sys/amd64/amd64/initcpu.c
===================================================================
--- sys/amd64/amd64/initcpu.c
+++ sys/amd64/amd64/initcpu.c
@@ -218,7 +218,7 @@
if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP))
cr4 |= CR4_SMEP;
load_cr4(cr4);
- if ((amd_feature & AMDID_NX) != 0) {
+ if (IS_BSP() && (amd_feature & AMDID_NX) != 0) {
msr = rdmsr(MSR_EFER) | EFER_NXE;
wrmsr(MSR_EFER, msr);
pg_nx = PG_NX;
Index: sys/amd64/amd64/mpboot.S
===================================================================
--- sys/amd64/amd64/mpboot.S
+++ sys/amd64/amd64/mpboot.S
@@ -221,15 +221,31 @@
/*
* From here on down is executed in the kernel .text section.
- *
- * Load a real %cr3 that has all the direct map stuff and switches
- * off the 1GB replicated mirror. Load a stack pointer and jump
- * into AP startup code in C.
*/
.text
.code64
.p2align 4,0
entry_64:
+ /*
+ * If the BSP reported NXE support, enable EFER.NXE for all APs
+ * prior to loading %cr3. This avoids page faults if the AP
+ * encounters memory marked with the NX bit prior to detecting and
+ * enabling NXE support.
+ */
+ movq pg_nx, %rbx
+ testq %rbx, %rbx
+ je 1f
+ movl $MSR_EFER, %ecx
+ rdmsr
+ orl $EFER_NXE, %eax
+ wrmsr
+
+1:
+ /*
+ * Load a real %cr3 that has all the direct map stuff and switches
+ * off the 1GB replicated mirror. Load a stack pointer and jump
+ * into AP startup code in C.
+ */
movq KPML4phys, %rax
movq %rax, %cr3
movq bootSTK, %rsp
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -397,6 +397,12 @@
static struct md_page *pv_table;
static struct md_page pv_dummy;
+static int pmap_kernelro = 1;
+
+SYSCTL_INT(_vm_pmap, OID_AUTO, kernelro, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &pmap_kernelro, 0,
+ "Map the read-only portions of the kernel with read-only permissions");
+
/*
* All those kernel PT submaps that BSD is so fond of
*/
@@ -887,15 +893,71 @@
nkpt = pt_pages;
}
+/*
+ * Returns the proper write/execute permission for a physical page that is
+ * part of the initial boot allocations.
+ *
+ * If the page has kernel text, it is marked as read-only. If the page has
+ * kernel read-only data, it is marked as read-only/not-executable. If the
+ * page has only read-write data, it is marked as read-write/not-executable.
+ * If the page is below/above the kernel range, it is marked as read-write.
+ *
+ * This function operates on 2M pages, since we map the kernel space that
+ * way.
+ *
+ * Note that this doesn't currently provide any protection for modules.
+ */
+static inline pt_entry_t
+bootaddr_rwx(vm_paddr_t pa)
+{
+
+ /*
+ * Everything in the same 2M page as the start of the kernel
+ * should be static. On the other hand, things in the same 2M
+ * page as the end of the kernel could be read-write/executable,
+ * as the kernel image is not guaranteed to end on a 2M boundary.
+ */
+ if (pa < trunc_2mpage(btext - KERNBASE) ||
+ pa >= trunc_2mpage(_end - KERNBASE))
+ return (X86_PG_RW);
+ /*
+ * The linker should ensure that the read-only and read-write
+ * portions don't share the same 2M page, so this shouldn't
+ * impact read-only data. However, in any case, any page with
+ * read-write data needs to be read-write.
+ */
+ if (pa >= trunc_2mpage(brwsection - KERNBASE))
+ return (X86_PG_RW | pg_nx);
+ /*
+ * Mark any 2M page containing kernel text as read-only. Mark
+ * other pages with read-only data as read-only and not executable.
+ * (It is likely a small portion of the read-only data section will
+ * be marked as read-only, but executable. This should be acceptable
+ * since the read-only protection will keep the data from changing.)
+ * Note that fixups to the .text section will still work until we
+ * set CR0.WP.
+ */
+ if (pa < round_2mpage(etext - KERNBASE))
+ return (pmap_kernelro ? 0 : X86_PG_RW);
+ return (pg_nx | (pmap_kernelro ? 0 : X86_PG_RW));
+}
+
static void
create_pagetables(vm_paddr_t *firstaddr)
{
- int i, j, ndm1g, nkpdpe;
+ int i, j, ndm1g, nkpdpe, nkdmpde;
pt_entry_t *pt_p;
pd_entry_t *pd_p;
pdp_entry_t *pdp_p;
pml4_entry_t *p4_p;
+ uint64_t DMPDkernphys;
+ /*
+ * Determine if we are marking the read-only portion with read-only
+ * permissions.
+ */
+ TUNABLE_INT_FETCH("vm.pmap.kernelro", &pmap_kernelro);
+
/* Allocate page table pages for the direct map */
ndmpdp = howmany(ptoa(Maxmem), NBPDP);
if (ndmpdp < 4) /* Minimum 4GB of dirmap */
@@ -913,8 +975,14 @@
}
DMPDPphys = allocpages(firstaddr, ndmpdpphys);
ndm1g = 0;
- if ((amd_feature & AMDID_PAGE1GB) != 0)
+ if ((amd_feature & AMDID_PAGE1GB) != 0) {
ndm1g = ptoa(Maxmem) >> PDPSHIFT;
+ if (pmap_kernelro) {
+ nkdmpde = howmany((vm_offset_t)(brwsection - KERNBASE),
+ NBPDP);
+ DMPDkernphys = allocpages(firstaddr, nkdmpde);
+ }
+ }
if (ndm1g < ndmpdp)
DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g);
dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
@@ -940,11 +1008,10 @@
KPDphys = allocpages(firstaddr, nkpdpe);
/* Fill in the underlying page table pages */
- /* Nominally read-only (but really R/W) from zero to physfree */
/* XXX not fully used, underneath 2M pages */
pt_p = (pt_entry_t *)KPTphys;
for (i = 0; ptoa(i) < *firstaddr; i++)
- pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | pg_g;
+ pt_p[i] = ptoa(i) | X86_PG_V | pg_g | bootaddr_rwx(ptoa(i));
/* Now map the page tables at their location within PTmap */
pd_p = (pd_entry_t *)KPDphys;
@@ -954,8 +1021,8 @@
/* Map from zero to end of allocations under 2M pages */
/* This replaces some of the KPTphys entries above */
for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
- pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS |
- pg_g;
+ pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g |
+ bootaddr_rwx(i << PDRSHIFT);
/*
* Because we map the physical blocks in 2M pages, adjust firstaddr
@@ -995,6 +1062,22 @@
for (j = 0; i < ndmpdp; i++, j++) {
pdp_p[i] = DMPDphys + ptoa(j);
pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_U;
+ }
+
+ /*
+ * Instead of using a 1G page for the memory containing the kernel,
+ * use 2M pages with appropriate permissions. (If using 1G pages,
+ * this will partially overwrite the PDPEs above.)
+ */
+ if (ndm1g && pmap_kernelro) {
+ pd_p = (pd_entry_t *)DMPDkernphys;
+ for (i = 0; i < NPDPEPG; i++)
+ pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g |
+ X86_PG_M | X86_PG_A | pg_nx |
+ bootaddr_rwx(i << PDRSHIFT);
+ for (i = 0; i < nkdmpde; i++)
+ pdp_p[i] = (DMPDkernphys + ptoa(i)) | X86_PG_RW |
+ X86_PG_V | PG_U;
}
/* And recursively map PML4 to itself in order to get PTmap */
Index: sys/amd64/include/cpu.h
===================================================================
--- sys/amd64/include/cpu.h
+++ sys/amd64/include/cpu.h
@@ -68,7 +68,9 @@
};
extern struct cpu_ops cpu_ops;
+extern char brwsection[];
extern char btext[];
+extern char _end[];
extern char etext[];
/* Resume hook for VMM. */
Index: sys/conf/kern.pre.mk
===================================================================
--- sys/conf/kern.pre.mk
+++ sys/conf/kern.pre.mk
@@ -120,6 +120,10 @@
LDFLAGS+= -Wl,--build-id=sha1
.endif
+.if ${MACHINE_CPUARCH} == "amd64"
+LDFLAGS+= -z max-page-size=2097152 -z common-page-size=4096
+.endif
+
NORMAL_C= ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.IMPSRC}
NORMAL_S= ${CC:N${CCACHE_BIN}} -c ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}
PROFILE_C= ${CC} -c ${CFLAGS} ${WERROR} ${.IMPSRC}
Index: sys/conf/ldscript.amd64
===================================================================
--- sys/conf/ldscript.amd64
+++ sys/conf/ldscript.amd64
@@ -80,6 +80,7 @@
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
+ PROVIDE (brwsection = .);
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }

File Metadata

Mime Type
text/plain
Expires
Wed, Nov 12, 5:20 AM (19 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25194885
Default Alt Text
D14282.id39229.diff (8 KB)

Event Timeline