Index: head/sys/amd64/amd64/bios.c =================================================================== --- head/sys/amd64/amd64/bios.c (revision 112840) +++ head/sys/amd64/amd64/bios.c (revision 112841) @@ -1,676 +1,680 @@ /*- * Copyright (c) 1997 Michael Smith * Copyright (c) 1998 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Code for dealing with the BIOS in x86 PC systems. */ #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #include #include #endif #define BIOS_START 0xe0000 #define BIOS_SIZE 0x20000 /* exported lookup results */ struct bios32_SDentry PCIbios; struct PnPBIOS_table *PnPBIOStable; static u_int bios32_SDCI; /* start fairly early */ static void bios32_init(void *junk); SYSINIT(bios32, SI_SUB_CPU, SI_ORDER_ANY, bios32_init, NULL); /* * bios32_init * * Locate various bios32 entities. */ static void bios32_init(void *junk) { u_long sigaddr; struct bios32_SDheader *sdh; struct PnPBIOS_table *pt; u_int8_t ck, *cv; int i; char *p; /* * BIOS32 Service Directory, PCI BIOS */ /* look for the signature */ if ((sigaddr = bios_sigsearch(0, "_32_", 4, 16, 0)) != 0) { /* get a virtual pointer to the structure */ sdh = (struct bios32_SDheader *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr); for (cv = (u_int8_t *)sdh, ck = 0, i = 0; i < (sdh->len * 16); i++) { ck += cv[i]; } /* If checksum is OK, enable use of the entrypoint */ if ((ck == 0) && (BIOS_START <= sdh->entry ) && (sdh->entry < (BIOS_START + BIOS_SIZE))) { bios32_SDCI = BIOS_PADDRTOVADDR(sdh->entry); if (bootverbose) { printf("bios32: Found BIOS32 Service Directory header at %p\n", sdh); printf("bios32: Entry = 0x%x (%x) Rev = %d Len = %d\n", sdh->entry, bios32_SDCI, sdh->revision, sdh->len); } /* Allow user override of PCI BIOS search */ if (((p = getenv("machdep.bios.pci")) == NULL) || strcmp(p, "disable")) { /* See if there's a PCI BIOS entrypoint here */ PCIbios.ident.id = 0x49435024; /* PCI systems should have this */ if (!bios32_SDlookup(&PCIbios) && bootverbose) printf("pcibios: PCI BIOS entry at 0x%x+0x%x\n", PCIbios.base, PCIbios.entry); } if (p != NULL) freeenv(p); } else { printf("bios32: Bad BIOS32 Service Directory\n"); } } /* * PnP BIOS * * Allow user override of PnP BIOS search */ if ((((p = getenv("machdep.bios.pnp")) == NULL) || strcmp(p, "disable")) && ((sigaddr = bios_sigsearch(0, "$PnP", 4, 16, 0)) != 0)) { /* get a virtual pointer to the structure */ pt = (struct PnPBIOS_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr); for (cv = (u_int8_t *)pt, ck = 0, i = 0; i < pt->len; i++) { ck += cv[i]; } /* If checksum is OK, enable use of the entrypoint */ if (ck == 0) { PnPBIOStable = pt; if (bootverbose) { printf("pnpbios: Found PnP BIOS data at %p\n", pt); printf("pnpbios: Entry = %x:%x Rev = %d.%d\n", pt->pmentrybase, pt->pmentryoffset, pt->version >> 4, pt->version & 0xf); if ((pt->control & 0x3) == 0x01) printf("pnpbios: Event flag at %x\n", pt->evflagaddr); if (pt->oemdevid != 0) printf("pnpbios: OEM ID %x\n", pt->oemdevid); } } else { printf("pnpbios: Bad PnP BIOS data checksum\n"); } } if (p != NULL) freeenv(p); if (bootverbose) { /* look for other know signatures */ printf("Other BIOS signatures found:\n"); } } /* * bios32_SDlookup * * Query the BIOS32 Service Directory for the service named in (ent), * returns nonzero if the lookup fails. The caller must fill in * (ent->ident), the remainder are populated on a successful lookup. */ int bios32_SDlookup(struct bios32_SDentry *ent) { struct bios_regs args; if (bios32_SDCI == 0) return (1); args.eax = ent->ident.id; /* set up arguments */ args.ebx = args.ecx = args.edx = 0; bios32(&args, bios32_SDCI, GSEL(GCODE_SEL, SEL_KPL)); if ((args.eax & 0xff) == 0) { /* success? */ ent->base = args.ebx; ent->len = args.ecx; ent->entry = args.edx; ent->ventry = BIOS_PADDRTOVADDR(ent->base + ent->entry); return (0); /* all OK */ } return (1); /* failed */ } /* * bios_sigsearch * * Search some or all of the BIOS region for a signature string. * * (start) Optional offset returned from this function * (for searching for multiple matches), or NULL * to start the search from the base of the BIOS. * Note that this will be a _physical_ address in * the range 0xe0000 - 0xfffff. * (sig) is a pointer to the byte(s) of the signature. * (siglen) number of bytes in the signature. * (paralen) signature paragraph (alignment) size. * (sigofs) offset of the signature within the paragraph. * * Returns the _physical_ address of the found signature, 0 if the * signature was not found. */ u_int32_t bios_sigsearch(u_int32_t start, u_char *sig, int siglen, int paralen, int sigofs) { u_char *sp, *end; /* compute the starting address */ if ((start >= BIOS_START) && (start <= (BIOS_START + BIOS_SIZE))) { sp = (char *)BIOS_PADDRTOVADDR(start); } else if (start == 0) { sp = (char *)BIOS_PADDRTOVADDR(BIOS_START); } else { return 0; /* bogus start address */ } /* compute the end address */ end = (u_char *)BIOS_PADDRTOVADDR(BIOS_START + BIOS_SIZE); /* loop searching */ while ((sp + sigofs + siglen) < end) { /* compare here */ if (!bcmp(sp + sigofs, sig, siglen)) { /* convert back to physical address */ return((u_int32_t)BIOS_VADDRTOPADDR(sp)); } sp += paralen; } return(0); } /* * do not staticize, used by bioscall.s */ union { struct { u_short offset; u_short segment; } vec16; struct { u_int offset; u_short segment; } vec32; } bioscall_vector; /* bios jump vector */ void set_bios_selectors(struct bios_segments *seg, int flags) { struct soft_segment_descriptor ssd = { 0, /* segment base address (overwritten) */ 0, /* length (overwritten) */ SDT_MEMERA, /* segment type (overwritten) */ 0, /* priority level */ 1, /* descriptor present */ 0, 0, 1, /* descriptor size (overwritten) */ 0 /* granularity == byte units */ }; union descriptor *p_gdt; #ifdef SMP p_gdt = &gdt[PCPU_GET(cpuid) * NGDT]; #else p_gdt = gdt; #endif ssd.ssd_base = seg->code32.base; ssd.ssd_limit = seg->code32.limit; ssdtosd(&ssd, &p_gdt[GBIOSCODE32_SEL].sd); ssd.ssd_def32 = 0; if (flags & BIOSCODE_FLAG) { ssd.ssd_base = seg->code16.base; ssd.ssd_limit = seg->code16.limit; ssdtosd(&ssd, &p_gdt[GBIOSCODE16_SEL].sd); } ssd.ssd_type = SDT_MEMRWA; if (flags & BIOSDATA_FLAG) { ssd.ssd_base = seg->data.base; ssd.ssd_limit = seg->data.limit; ssdtosd(&ssd, &p_gdt[GBIOSDATA_SEL].sd); } if (flags & BIOSUTIL_FLAG) { ssd.ssd_base = seg->util.base; ssd.ssd_limit = seg->util.limit; ssdtosd(&ssd, &p_gdt[GBIOSUTIL_SEL].sd); } if (flags & BIOSARGS_FLAG) { ssd.ssd_base = seg->args.base; ssd.ssd_limit = seg->args.limit; ssdtosd(&ssd, &p_gdt[GBIOSARGS_SEL].sd); } } extern int vm86pa; extern void bios16_jmp(void); /* * this routine is really greedy with selectors, and uses 5: * * 32-bit code selector: to return to kernel * 16-bit code selector: for running code * data selector: for 16-bit data * util selector: extra utility selector * args selector: to handle pointers * * the util selector is set from the util16 entry in bios16_args, if a * "U" specifier is seen. * * See for description of format specifiers */ int bios16(struct bios_args *args, char *fmt, ...) { char *p, *stack, *stack_top; va_list ap; int flags = BIOSCODE_FLAG | BIOSDATA_FLAG; u_int i, arg_start, arg_end; pt_entry_t *pte; pd_entry_t *ptd; arg_start = 0xffffffff; arg_end = 0; /* * Some BIOS entrypoints attempt to copy the largest-case * argument frame (in order to generalise handling for * different entry types). If our argument frame is * smaller than this, the BIOS will reach off the top of * our constructed stack segment. Pad the top of the stack * with some garbage to avoid this. */ stack = (caddr_t)PAGE_SIZE - 32; va_start(ap, fmt); for (p = fmt; p && *p; p++) { switch (*p) { case 'p': /* 32-bit pointer */ i = va_arg(ap, u_int); arg_start = min(arg_start, i); arg_end = max(arg_end, i); flags |= BIOSARGS_FLAG; stack -= 4; break; case 'i': /* 32-bit integer */ i = va_arg(ap, u_int); stack -= 4; break; case 'U': /* 16-bit selector */ flags |= BIOSUTIL_FLAG; /* FALLTHROUGH */ case 'D': /* 16-bit selector */ case 'C': /* 16-bit selector */ stack -= 2; break; case 's': /* 16-bit integer passed as an int */ i = va_arg(ap, int); stack -= 2; break; default: return (EINVAL); } } if (flags & BIOSARGS_FLAG) { if (arg_end - arg_start > ctob(16)) return (EACCES); args->seg.args.base = arg_start; args->seg.args.limit = 0xffff; } args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME; args->seg.code32.limit = 0xffff; ptd = (pd_entry_t *)rcr3(); - if (ptd == (u_int *)IdlePTD) { +#ifdef PAE + if (ptd == IdlePDPT) { +#else + if (ptd == IdlePTD) { +#endif /* * no page table, so create one and install it. */ pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = (pd_entry_t *)((u_int)ptd + KERNBASE); + ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE); *ptd = vtophys(pte) | PG_RW | PG_V; } else { /* * this is a user-level page table */ pte = PTmap; } /* * install pointer to page 0. we don't need to flush the tlb, * since there should not be a previous mapping for page 0. */ *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V; stack_top = stack; va_start(ap, fmt); for (p = fmt; p && *p; p++) { switch (*p) { case 'p': /* 32-bit pointer */ i = va_arg(ap, u_int); *(u_int *)stack = (i - arg_start) | (GSEL(GBIOSARGS_SEL, SEL_KPL) << 16); stack += 4; break; case 'i': /* 32-bit integer */ i = va_arg(ap, u_int); *(u_int *)stack = i; stack += 4; break; case 'U': /* 16-bit selector */ *(u_short *)stack = GSEL(GBIOSUTIL_SEL, SEL_KPL); stack += 2; break; case 'D': /* 16-bit selector */ *(u_short *)stack = GSEL(GBIOSDATA_SEL, SEL_KPL); stack += 2; break; case 'C': /* 16-bit selector */ *(u_short *)stack = GSEL(GBIOSCODE16_SEL, SEL_KPL); stack += 2; break; case 's': /* 16-bit integer passed as an int */ i = va_arg(ap, int); *(u_short *)stack = i; stack += 2; break; default: return (EINVAL); } } set_bios_selectors(&args->seg, flags); bioscall_vector.vec16.offset = (u_short)args->entry; bioscall_vector.vec16.segment = GSEL(GBIOSCODE16_SEL, SEL_KPL); i = bios16_call(&args->r, stack_top); if (pte == PTmap) { *pte = 0; /* remove entry */ } else { *ptd = 0; /* remove page table */ free(pte, M_TEMP); /* ... and free it */ } /* * XXX only needs to be invlpg(0) but that doesn't work on the 386 */ pmap_invalidate_all(kernel_pmap); return (i); } #ifdef DEV_ISA /* * PnP BIOS interface; enumerate devices only known to the system * BIOS and save information about them for later use. */ struct pnp_sysdev { u_int16_t size; u_int8_t handle; u_int32_t devid; u_int8_t type[3]; u_int16_t attrib; #define PNPATTR_NODISABLE (1<<0) /* can't be disabled */ #define PNPATTR_NOCONFIG (1<<1) /* can't be configured */ #define PNPATTR_OUTPUT (1<<2) /* can be primary output */ #define PNPATTR_INPUT (1<<3) /* can be primary input */ #define PNPATTR_BOOTABLE (1<<4) /* can be booted from */ #define PNPATTR_DOCK (1<<5) /* is a docking station */ #define PNPATTR_REMOVEABLE (1<<6) /* device is removeable */ #define PNPATTR_CONFIG_STATIC (0) #define PNPATTR_CONFIG_DYNAMIC (1) #define PNPATTR_CONFIG_DYNONLY (3) #define PNPATTR_CONFIG(a) (((a) >> 7) & 0x3) /* device-specific data comes here */ u_int8_t devdata[0]; } __packed; /* We have to cluster arguments within a 64k range for the bios16 call */ struct pnp_sysdevargs { u_int16_t next; struct pnp_sysdev node; }; /* * This function is called after the bus has assigned resource * locations for a logical device. */ static void pnpbios_set_config(void *arg, struct isa_config *config, int enable) { } /* * Quiz the PnP BIOS, build a list of PNP IDs and resource data. */ static void pnpbios_identify(driver_t *driver, device_t parent) { struct PnPBIOS_table *pt = PnPBIOStable; struct bios_args args; struct pnp_sysdev *pd; struct pnp_sysdevargs *pda; u_int16_t ndevs, bigdev; int error, currdev; u_int8_t *devnodebuf, tag; u_int32_t *devid, *compid; int idx, left; device_t dev; /* no PnP BIOS information */ if (pt == NULL) return; /* ACPI already active */ if (devclass_get_softc(devclass_find("ACPI"), 0) != NULL) return; /* get count of PnP devices */ bzero(&args, sizeof(args)); args.seg.code16.base = BIOS_PADDRTOVADDR(pt->pmentrybase); args.seg.code16.limit = 0xffff; /* XXX ? */ args.seg.data.base = BIOS_PADDRTOVADDR(pt->pmdataseg); args.seg.data.limit = 0xffff; args.entry = pt->pmentryoffset; if ((error = bios16(&args, PNP_COUNT_DEVNODES, &ndevs, &bigdev)) || (args.r.eax & 0xff)) printf("pnpbios: error %d/%x getting device count/size limit\n", error, args.r.eax); ndevs &= 0xff; /* clear high byte garbage */ if (bootverbose) printf("pnpbios: %d devices, largest %d bytes\n", ndevs, bigdev); devnodebuf = malloc(bigdev + (sizeof(struct pnp_sysdevargs) - sizeof(struct pnp_sysdev)), M_DEVBUF, M_NOWAIT); pda = (struct pnp_sysdevargs *)devnodebuf; pd = &pda->node; for (currdev = 0, left = ndevs; (currdev != 0xff) && (left > 0); left--) { bzero(pd, bigdev); pda->next = currdev; /* get current configuration */ if ((error = bios16(&args, PNP_GET_DEVNODE, &pda->next, &pda->node, 1))) { printf("pnpbios: error %d making BIOS16 call\n", error); break; } if ((error = (args.r.eax & 0xff))) { if (bootverbose) printf("pnpbios: %s 0x%x fetching node %d\n", error & 0x80 ? "error" : "warning", error, currdev); if (error & 0x80) break; } currdev = pda->next; if (pd->size < sizeof(struct pnp_sysdev)) { printf("pnpbios: bogus system node data, aborting scan\n"); break; } /* * If we are in APIC_IO mode, we should ignore the ISA PIC if it * shows up. Likewise, in !APIC_IO mode, we should ignore the * APIC (less important). * This is significant because the ISA PIC will claim IRQ 2 (which * it uses for chaining), while in APIC mode this is a valid IRQ * available for general use. */ #ifdef APIC_IO if (!strcmp(pnp_eisaformat(pd->devid), "PNP0000")) /* ISA PIC */ continue; #else if (!strcmp(pnp_eisaformat(pd->devid), "PNP0003")) /* APIC */ continue; #endif /* Add the device and parse its resources */ dev = BUS_ADD_CHILD(parent, ISA_ORDER_PNP, NULL, -1); isa_set_vendorid(dev, pd->devid); isa_set_logicalid(dev, pd->devid); /* * It appears that some PnP BIOS doesn't allow us to re-enable * the embedded system device once it is disabled. We shall * mark all system device nodes as "cannot be disabled", regardless * of actual settings in the device attribute byte. * XXX isa_set_configattr(dev, ((pd->attrib & PNPATTR_NODISABLE) ? 0 : ISACFGATTR_CANDISABLE) | ((!(pd->attrib & PNPATTR_NOCONFIG) && PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC) ? ISACFGATTR_DYNAMIC : 0)); */ isa_set_configattr(dev, (!(pd->attrib & PNPATTR_NOCONFIG) && PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC) ? ISACFGATTR_DYNAMIC : 0); ISA_SET_CONFIG_CALLBACK(parent, dev, pnpbios_set_config, 0); pnp_parse_resources(dev, &pd->devdata[0], pd->size - sizeof(struct pnp_sysdev), 0); if (!device_get_desc(dev)) device_set_desc_copy(dev, pnp_eisaformat(pd->devid)); /* Find device IDs */ devid = &pd->devid; compid = NULL; /* look for a compatible device ID too */ left = pd->size - sizeof(struct pnp_sysdev); idx = 0; while (idx < left) { tag = pd->devdata[idx++]; if (PNP_RES_TYPE(tag) == 0) { /* Small resource */ switch (PNP_SRES_NUM(tag)) { case PNP_TAG_COMPAT_DEVICE: compid = (u_int32_t *)(pd->devdata + idx); if (bootverbose) printf("pnpbios: node %d compat ID 0x%08x\n", pd->handle, *compid); /* FALLTHROUGH */ case PNP_TAG_END: idx = left; break; default: idx += PNP_SRES_LEN(tag); break; } } else /* Large resource, skip it */ idx += *(u_int16_t *)(pd->devdata + idx) + 2; } if (bootverbose) { printf("pnpbios: handle %d device ID %s (%08x)", pd->handle, pnp_eisaformat(*devid), *devid); if (compid != NULL) printf(" compat ID %s (%08x)", pnp_eisaformat(*compid), *compid); printf("\n"); } } } static device_method_t pnpbios_methods[] = { /* Device interface */ DEVMETHOD(device_identify, pnpbios_identify), { 0, 0 } }; static driver_t pnpbios_driver = { "pnpbios", pnpbios_methods, 1, /* no softc */ }; static devclass_t pnpbios_devclass; DRIVER_MODULE(pnpbios, isa, pnpbios_driver, pnpbios_devclass, 0, 0); #endif /* DEV_ISA */ Index: head/sys/amd64/amd64/locore.S =================================================================== --- head/sys/amd64/amd64/locore.S (revision 112840) +++ head/sys/amd64/amd64/locore.S (revision 112841) @@ -1,892 +1,927 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 * $FreeBSD$ * * originally from: locore.s, by William F. Jolitz * * Substantially rewritten by David Greenman, Rod Grimes, * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp * and many others. */ #include "opt_bootp.h" #include "opt_compat.h" #include "opt_nfsroot.h" #include #include #include #include #include #include #include #include "assym.s" /* * XXX * * Note: This version greatly munged to avoid various assembler errors * that may be fixed in newer versions of gas. Perhaps newer versions * will have more pleasant appearance. */ /* * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). * * NOTE: PTDpde, PTmap, and PTD are being defined as address symbols. * In C you access them directly, and not with a '*'. Storage is not being * allocated. They will magically address the correct locations in KVM * which C will treat as normal variables of the type they are defined in * machine/pmap.h, i.e. PTDpde = XX ; to set a PDE entry, NOT *PTDpde = XX; */ .globl PTmap,PTD,PTDpde .set PTmap,(PTDPTDI << PDRSHIFT) .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) .set PTDpde,PTD + (PTDPTDI * PDESIZE) /* * APTmap, APTD is the alternate recursive pagemap. * It's used when modifying another process's page tables. * See the note above. It is true here as well. */ .globl APTmap,APTD,APTDpde .set APTmap,APTDPTDI << PDRSHIFT .set APTD,APTmap + (APTDPTDI * PAGE_SIZE) .set APTDpde,PTD + (APTDPTDI * PDESIZE) #ifdef SMP /* * Define layout of per-cpu address space. * This is "constructed" in locore.s on the BSP and in mp_machdep.c * for each AP. DO NOT REORDER THESE WITHOUT UPDATING THE REST! */ .globl SMP_prvspace, lapic .set SMP_prvspace,(MPPTDI << PDRSHIFT) .set lapic,SMP_prvspace + (NPTEPG-1) * PAGE_SIZE #endif /* SMP */ /* * Compiled KERNBASE location */ .globl kernbase .set kernbase,KERNBASE /* * Globals */ .data ALIGN_DATA /* just to be sure */ .globl HIDENAME(tmpstk) .space 0x2000 /* space for tmpstk - temporary stack */ HIDENAME(tmpstk): .globl bootinfo bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ .globl KERNend KERNend: .long 0 /* phys addr end of kernel (just after bss) */ physfree: .long 0 /* phys addr of next free page */ #ifdef SMP .globl cpu0prvpage cpu0pp: .long 0 /* phys addr cpu0 private pg */ cpu0prvpage: .long 0 /* relocated version */ .globl SMPpt SMPptpa: .long 0 /* phys addr SMP page table */ SMPpt: .long 0 /* relocated version */ #endif /* SMP */ .globl IdlePTD IdlePTD: .long 0 /* phys addr of kernel PTD */ +#ifdef PAE + .globl IdlePDPT +IdlePDPT: .long 0 /* phys addr of kernel PDPT */ +#endif + #ifdef SMP .globl KPTphys #endif KPTphys: .long 0 /* phys addr of kernel page tables */ .globl proc0uarea, proc0kstack proc0uarea: .long 0 /* address of proc 0 uarea space */ proc0kstack: .long 0 /* address of proc 0 kstack space */ p0upa: .long 0 /* phys addr of proc0's UAREA */ p0kpa: .long 0 /* phys addr of proc0's STACK */ vm86phystk: .long 0 /* PA of vm86/bios stack */ .globl vm86paddr, vm86pa vm86paddr: .long 0 /* address of vm86 region */ vm86pa: .long 0 /* phys addr of vm86 region */ #ifdef PC98 .globl pc98_system_parameter pc98_system_parameter: .space 0x240 #endif /********************************************************************** * * Some handy macros * */ #define R(foo) ((foo)-KERNBASE) #define ALLOCPAGES(foo) \ movl R(physfree), %esi ; \ movl $((foo)*PAGE_SIZE), %eax ; \ addl %esi, %eax ; \ movl %eax, R(physfree) ; \ movl %esi, %edi ; \ movl $((foo)*PAGE_SIZE),%ecx ; \ xorl %eax,%eax ; \ cld ; \ rep ; \ stosb /* * fillkpt * eax = page frame address * ebx = index into page table * ecx = how many pages to map * base = base address of page dir/table * prot = protection bits */ #define fillkpt(base, prot) \ shll $PTESHIFT,%ebx ; \ addl base,%ebx ; \ orl $PG_V,%eax ; \ orl prot,%eax ; \ 1: movl %eax,(%ebx) ; \ addl $PAGE_SIZE,%eax ; /* increment physical address */ \ addl $PTESIZE,%ebx ; /* next pte */ \ loop 1b /* * fillkptphys(prot) * eax = physical address * ecx = how many pages to map * prot = protection bits */ #define fillkptphys(prot) \ movl %eax, %ebx ; \ shrl $PAGE_SHIFT, %ebx ; \ fillkpt(R(KPTphys), prot) .text /********************************************************************** * * This is where the bootblocks start us, set the ball rolling... * */ NON_GPROF_ENTRY(btext) #ifdef PC98 /* save SYSTEM PARAMETER for resume (NS/T or other) */ movl $0xa1400,%esi movl $R(pc98_system_parameter),%edi movl $0x0240,%ecx cld rep movsb #else /* IBM-PC */ /* Tell the bios to warmboot next time */ movw $0x1234,0x472 #endif /* PC98 */ /* Set up a real frame in case the double return in newboot is executed. */ pushl %ebp movl %esp, %ebp /* Don't trust what the BIOS gives for eflags. */ pushl $PSL_KERNEL popfl /* * Don't trust what the BIOS gives for %fs and %gs. Trust the bootstrap * to set %cs, %ds, %es and %ss. */ mov %ds, %ax mov %ax, %fs mov %ax, %gs call recover_bootinfo /* Get onto a stack that we can trust. */ /* * XXX this step is delayed in case recover_bootinfo needs to return via * the old stack, but it need not be, since recover_bootinfo actually * returns via the old frame. */ movl $R(HIDENAME(tmpstk)),%esp #ifdef PC98 /* pc98_machine_type & M_EPSON_PC98 */ testb $0x02,R(pc98_system_parameter)+220 jz 3f /* epson_machine_id <= 0x0b */ cmpb $0x0b,R(pc98_system_parameter)+224 ja 3f /* count up memory */ movl $0x100000,%eax /* next, talley remaining memory */ movl $0xFFF-0x100,%ecx 1: movl 0(%eax),%ebx /* save location to check */ movl $0xa55a5aa5,0(%eax) /* write test pattern */ cmpl $0xa55a5aa5,0(%eax) /* does not check yet for rollover */ jne 2f movl %ebx,0(%eax) /* restore memory */ addl $PAGE_SIZE,%eax loop 1b 2: subl $0x100000,%eax shrl $17,%eax movb %al,R(pc98_system_parameter)+1 3: movw R(pc98_system_parameter+0x86),%ax movw %ax,R(cpu_id) #endif call identify_cpu /* clear bss */ /* * XXX this should be done a little earlier. * * XXX we don't check that there is memory for our bss and page tables * before using it. * * XXX the boot program somewhat bogusly clears the bss. We still have * to do it in case we were unzipped by kzipboot. Then the boot program * only clears kzipboot's bss. * * XXX the gdt and idt are still somewhere in the boot program. We * depend on the convention that the boot program is below 1MB and we * are above 1MB to keep the gdt and idt away from the bss and page * tables. */ movl $R(end),%ecx movl $R(edata),%edi subl %edi,%ecx xorl %eax,%eax cld rep stosb call create_pagetables /* * If the CPU has support for VME, turn it on. */ testl $CPUID_VME, R(cpu_feature) jz 1f movl %cr4, %eax orl $CR4_VME, %eax movl %eax, %cr4 1: /* Now enable paging */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else movl R(IdlePTD), %eax movl %eax,%cr3 /* load ptd addr into mmu */ +#endif movl %cr0,%eax /* get control word */ orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* and let's page NOW! */ pushl $begin /* jump to high virtualized address */ ret /* now running relocated at KERNBASE where the system is linked to run */ begin: /* set up bootstrap stack */ movl proc0kstack,%eax /* location of in-kernel stack */ /* bootstrap stack end location */ leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp xorl %ebp,%ebp /* mark end of frames */ +#ifdef PAE + movl IdlePDPT,%esi +#else movl IdlePTD,%esi +#endif movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ /* * Clean up the stack in a way that db_numargs() understands, so * that backtraces in ddb don't underrun the stack. Traps for * inaccessible memory are more fatal than usual this early. */ addl $4,%esp call mi_startup /* autoconfiguration, mountroot etc */ /* NOTREACHED */ addl $0,%esp /* for db_numargs() again */ /* * Signal trampoline, copied to top of user stack */ NON_GPROF_ENTRY(sigcode) calll *SIGF_HANDLER(%esp) leal SIGF_UC(%esp),%eax /* get ucontext */ pushl %eax testl $PSL_VM,UC_EFLAGS(%eax) jne 1f movl UC_GS(%eax),%gs /* restore %gs */ 1: movl $SYS_sigreturn,%eax pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ /* on stack */ 1: jmp 1b #ifdef COMPAT_FREEBSD4 ALIGN_TEXT freebsd4_sigcode: calll *SIGF_HANDLER(%esp) leal SIGF_UC4(%esp),%eax /* get ucontext */ pushl %eax testl $PSL_VM,UC4_EFLAGS(%eax) jne 1f movl UC4_GS(%eax),%gs /* restore %gs */ 1: movl $344,%eax /* 4.x SYS_sigreturn */ pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ /* on stack */ 1: jmp 1b #endif #ifdef COMPAT_43 ALIGN_TEXT osigcode: call *SIGF_HANDLER(%esp) /* call signal handler */ lea SIGF_SC(%esp),%eax /* get sigcontext */ pushl %eax testl $PSL_VM,SC_PS(%eax) jne 9f movl SC_GS(%eax),%gs /* restore %gs */ 9: movl $103,%eax /* 3.x SYS_sigreturn */ pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ 0: jmp 0b #endif /* COMPAT_43 */ ALIGN_TEXT esigcode: .data .globl szsigcode szsigcode: .long esigcode-sigcode #ifdef COMPAT_FREEBSD4 .globl szfreebsd4_sigcode szfreebsd4_sigcode: .long esigcode-freebsd4_sigcode #endif #ifdef COMPAT_43 .globl szosigcode szosigcode: .long esigcode-osigcode #endif .text /********************************************************************** * * Recover the bootinfo passed to us from the boot program * */ recover_bootinfo: /* * This code is called in different ways depending on what loaded * and started the kernel. This is used to detect how we get the * arguments from the other code and what we do with them. * * Old disk boot blocks: * (*btext)(howto, bootdev, cyloffset, esym); * [return address == 0, and can NOT be returned to] * [cyloffset was not supported by the FreeBSD boot code * and always passed in as 0] * [esym is also known as total in the boot code, and * was never properly supported by the FreeBSD boot code] * * Old diskless netboot code: * (*btext)(0,0,0,0,&nfsdiskless,0,0,0); * [return address != 0, and can NOT be returned to] * If we are being booted by this code it will NOT work, * so we are just going to halt if we find this case. * * New uniform boot code: * (*btext)(howto, bootdev, 0, 0, 0, &bootinfo) * [return address != 0, and can be returned to] * * There may seem to be a lot of wasted arguments in here, but * that is so the newer boot code can still load very old kernels * and old boot code can load new kernels. */ /* * The old style disk boot blocks fake a frame on the stack and * did an lret to get here. The frame on the stack has a return * address of 0. */ cmpl $0,4(%ebp) je olddiskboot /* * We have some form of return address, so this is either the * old diskless netboot code, or the new uniform code. That can * be detected by looking at the 5th argument, if it is 0 * we are being booted by the new uniform boot code. */ cmpl $0,24(%ebp) je newboot /* * Seems we have been loaded by the old diskless boot code, we * don't stand a chance of running as the diskless structure * changed considerably between the two, so just halt. */ hlt /* * We have been loaded by the new uniform boot code. * Let's check the bootinfo version, and if we do not understand * it we return to the loader with a status of 1 to indicate this error */ newboot: movl 28(%ebp),%ebx /* &bootinfo.version */ movl BI_VERSION(%ebx),%eax cmpl $1,%eax /* We only understand version 1 */ je 1f movl $1,%eax /* Return status */ leave /* * XXX this returns to our caller's caller (as is required) since * we didn't set up a frame and our caller did. */ ret 1: /* * If we have a kernelname copy it in */ movl BI_KERNELNAME(%ebx),%esi cmpl $0,%esi je 2f /* No kernelname */ movl $MAXPATHLEN,%ecx /* Brute force!!! */ movl $R(kernelname),%edi cmpb $'/',(%esi) /* Make sure it starts with a slash */ je 1f movb $'/',(%edi) incl %edi decl %ecx 1: cld rep movsb 2: /* * Determine the size of the boot loader's copy of the bootinfo * struct. This is impossible to do properly because old versions * of the struct don't contain a size field and there are 2 old * versions with the same version number. */ movl $BI_ENDCOMMON,%ecx /* prepare for sizeless version */ testl $RB_BOOTINFO,8(%ebp) /* bi_size (and bootinfo) valid? */ je got_bi_size /* no, sizeless version */ movl BI_SIZE(%ebx),%ecx got_bi_size: /* * Copy the common part of the bootinfo struct */ movl %ebx,%esi movl $R(bootinfo),%edi cmpl $BOOTINFO_SIZE,%ecx jbe got_common_bi_size movl $BOOTINFO_SIZE,%ecx got_common_bi_size: cld rep movsb #ifdef NFS_ROOT #ifndef BOOTP_NFSV3 /* * If we have a nfs_diskless structure copy it in */ movl BI_NFS_DISKLESS(%ebx),%esi cmpl $0,%esi je olddiskboot movl $R(nfs_diskless),%edi movl $NFSDISKLESS_SIZE,%ecx cld rep movsb movl $R(nfs_diskless_valid),%edi movl $1,(%edi) #endif #endif /* * The old style disk boot. * (*btext)(howto, bootdev, cyloffset, esym); * Note that the newer boot code just falls into here to pick * up howto and bootdev, cyloffset and esym are no longer used */ olddiskboot: movl 8(%ebp),%eax movl %eax,R(boothowto) movl 12(%ebp),%eax movl %eax,R(bootdev) ret /********************************************************************** * * Identify the CPU and initialize anything special about it * */ identify_cpu: /* Try to toggle alignment check flag; does not exist on 386. */ pushfl popl %eax movl %eax,%ecx orl $PSL_AC,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax andl $PSL_AC,%eax pushl %ecx popfl testl %eax,%eax jnz try486 /* NexGen CPU does not have aligment check flag. */ pushfl movl $0x5555, %eax xorl %edx, %edx movl $2, %ecx clc divl %ecx jz trynexgen popfl movl $CPU_386,R(cpu) jmp 3f trynexgen: popfl movl $CPU_NX586,R(cpu) movl $0x4778654e,R(cpu_vendor) # store vendor string movl $0x72446e65,R(cpu_vendor+4) movl $0x6e657669,R(cpu_vendor+8) movl $0,R(cpu_vendor+12) jmp 3f try486: /* Try to toggle identification flag; does not exist on early 486s. */ pushfl popl %eax movl %eax,%ecx xorl $PSL_ID,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax andl $PSL_ID,%eax pushl %ecx popfl testl %eax,%eax jnz trycpuid movl $CPU_486,R(cpu) /* * Check Cyrix CPU * Cyrix CPUs do not change the undefined flags following * execution of the divide instruction which divides 5 by 2. * * Note: CPUID is enabled on M2, so it passes another way. */ pushfl movl $0x5555, %eax xorl %edx, %edx movl $2, %ecx clc divl %ecx jnc trycyrix popfl jmp 3f /* You may use Intel CPU. */ trycyrix: popfl /* * IBM Bluelighting CPU also doesn't change the undefined flags. * Because IBM doesn't disclose the information for Bluelighting * CPU, we couldn't distinguish it from Cyrix's (including IBM * brand of Cyrix CPUs). */ movl $0x69727943,R(cpu_vendor) # store vendor string movl $0x736e4978,R(cpu_vendor+4) movl $0x64616574,R(cpu_vendor+8) jmp 3f trycpuid: /* Use the `cpuid' instruction. */ xorl %eax,%eax cpuid # cpuid 0 movl %eax,R(cpu_high) # highest capability movl %ebx,R(cpu_vendor) # store vendor string movl %edx,R(cpu_vendor+4) movl %ecx,R(cpu_vendor+8) movb $0,R(cpu_vendor+12) movl $1,%eax cpuid # cpuid 1 movl %eax,R(cpu_id) # store cpu_id movl %ebx,R(cpu_procinfo) # store cpu_procinfo movl %edx,R(cpu_feature) # store cpu_feature rorl $8,%eax # extract family type andl $15,%eax cmpl $5,%eax jae 1f /* less than Pentium; must be 486 */ movl $CPU_486,R(cpu) jmp 3f 1: /* a Pentium? */ cmpl $5,%eax jne 2f movl $CPU_586,R(cpu) jmp 3f 2: /* Greater than Pentium...call it a Pentium Pro */ movl $CPU_686,R(cpu) 3: ret /********************************************************************** * * Create the first page directory and its page tables. * */ create_pagetables: /* Find end of kernel image (rounded up to a page boundary). */ movl $R(_end),%esi /* Include symbols, if any. */ movl R(bootinfo+BI_ESYMTAB),%edi testl %edi,%edi je over_symalloc movl %edi,%esi movl $KERNBASE,%edi addl %edi,R(bootinfo+BI_SYMTAB) addl %edi,R(bootinfo+BI_ESYMTAB) over_symalloc: /* If we are told where the end of the kernel space is, believe it. */ movl R(bootinfo+BI_KERNEND),%edi testl %edi,%edi je no_kernend movl %edi,%esi no_kernend: addl $PAGE_MASK,%esi andl $~PAGE_MASK,%esi movl %esi,R(KERNend) /* save end of kernel */ movl %esi,R(physfree) /* next free page is at end of kernel */ /* Allocate Kernel Page Tables */ ALLOCPAGES(NKPT) movl %esi,R(KPTphys) /* Allocate Page Table Directory */ +#ifdef PAE + /* XXX only need 32 bytes (easier for now) */ + ALLOCPAGES(1) + movl %esi,R(IdlePDPT) +#endif ALLOCPAGES(NPGPTD) movl %esi,R(IdlePTD) /* Allocate UPAGES */ ALLOCPAGES(UAREA_PAGES) movl %esi,R(p0upa) addl $KERNBASE, %esi movl %esi, R(proc0uarea) ALLOCPAGES(KSTACK_PAGES) movl %esi,R(p0kpa) addl $KERNBASE, %esi movl %esi, R(proc0kstack) ALLOCPAGES(1) /* vm86/bios stack */ movl %esi,R(vm86phystk) ALLOCPAGES(3) /* pgtable + ext + IOPAGES */ movl %esi,R(vm86pa) addl $KERNBASE, %esi movl %esi, R(vm86paddr) #ifdef SMP /* Allocate cpu0's private data page */ ALLOCPAGES(1) movl %esi,R(cpu0pp) addl $KERNBASE, %esi movl %esi, R(cpu0prvpage) /* relocated to KVM space */ /* Allocate SMP page table page */ ALLOCPAGES(1) movl %esi,R(SMPptpa) addl $KERNBASE, %esi movl %esi, R(SMPpt) /* relocated to KVM space */ #endif /* SMP */ /* Map read-only from zero to the end of the kernel text section */ xorl %eax, %eax xorl %edx,%edx movl $R(etext),%ecx addl $PAGE_MASK,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) /* Map read-write, data, bss and symbols */ movl $R(etext),%eax addl $PAGE_MASK, %eax andl $~PAGE_MASK, %eax movl $PG_RW,%edx movl R(KERNend),%ecx subl %eax,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) /* Map page directory. */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl $1, %ecx + fillkptphys($PG_RW) +#endif + movl R(IdlePTD), %eax movl $NPGPTD, %ecx fillkptphys($PG_RW) /* Map proc0's UPAGES in the physical way ... */ movl R(p0upa), %eax movl $(UAREA_PAGES), %ecx fillkptphys($PG_RW) /* Map proc0's KSTACK in the physical way ... */ movl R(p0kpa), %eax movl $(KSTACK_PAGES), %ecx fillkptphys($PG_RW) /* Map ISA hole */ movl $ISA_HOLE_START, %eax movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx fillkptphys($PG_RW) /* Map space for the vm86 region */ movl R(vm86phystk), %eax movl $4, %ecx fillkptphys($PG_RW) /* Map page 0 into the vm86 page table */ movl $0, %eax movl $0, %ebx movl $1, %ecx fillkpt(R(vm86pa), $PG_RW|PG_U) /* ...likewise for the ISA hole */ movl $ISA_HOLE_START, %eax movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx fillkpt(R(vm86pa), $PG_RW|PG_U) #ifdef SMP /* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */ movl R(cpu0pp), %eax movl $1, %ecx fillkptphys($PG_RW) /* Map SMP page table page into global kmem FWIW */ movl R(SMPptpa), %eax movl $1, %ecx fillkptphys($PG_RW) /* Map the private page into the SMP page table */ movl R(cpu0pp), %eax movl $0, %ebx /* pte offset = 0 */ movl $1, %ecx /* one private page coming right up */ fillkpt(R(SMPptpa), $PG_RW) /* ... and put the page table table in the pde. */ movl R(SMPptpa), %eax movl $MPPTDI, %ebx movl $1, %ecx fillkpt(R(IdlePTD), $PG_RW) /* Fakeup VA for the local apic to allow early traps. */ ALLOCPAGES(1) movl %esi, %eax movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */ movl $1, %ecx /* one private pt coming right up */ fillkpt(R(SMPptpa), $PG_RW) #endif /* SMP */ /* install a pde for temporary double map of bottom of VA */ movl R(KPTphys), %eax xorl %ebx, %ebx movl $NKPT, %ecx fillkpt(R(IdlePTD), $PG_RW) /* install pde's for pt's */ movl R(KPTphys), %eax movl $KPTDI, %ebx movl $NKPT, %ecx fillkpt(R(IdlePTD), $PG_RW) /* install a pde recursively mapping page directory as a page table */ movl R(IdlePTD), %eax movl $PTDPTDI, %ebx movl $NPGPTD,%ecx fillkpt(R(IdlePTD), $PG_RW) + +#ifdef PAE + movl R(IdlePTD), %eax + xorl %ebx, %ebx + movl $NPGPTD, %ecx + fillkpt(R(IdlePDPT), $0x0) +#endif ret Index: head/sys/amd64/amd64/locore.s =================================================================== --- head/sys/amd64/amd64/locore.s (revision 112840) +++ head/sys/amd64/amd64/locore.s (revision 112841) @@ -1,892 +1,927 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 * $FreeBSD$ * * originally from: locore.s, by William F. Jolitz * * Substantially rewritten by David Greenman, Rod Grimes, * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp * and many others. */ #include "opt_bootp.h" #include "opt_compat.h" #include "opt_nfsroot.h" #include #include #include #include #include #include #include #include "assym.s" /* * XXX * * Note: This version greatly munged to avoid various assembler errors * that may be fixed in newer versions of gas. Perhaps newer versions * will have more pleasant appearance. */ /* * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). * * NOTE: PTDpde, PTmap, and PTD are being defined as address symbols. * In C you access them directly, and not with a '*'. Storage is not being * allocated. They will magically address the correct locations in KVM * which C will treat as normal variables of the type they are defined in * machine/pmap.h, i.e. PTDpde = XX ; to set a PDE entry, NOT *PTDpde = XX; */ .globl PTmap,PTD,PTDpde .set PTmap,(PTDPTDI << PDRSHIFT) .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) .set PTDpde,PTD + (PTDPTDI * PDESIZE) /* * APTmap, APTD is the alternate recursive pagemap. * It's used when modifying another process's page tables. * See the note above. It is true here as well. */ .globl APTmap,APTD,APTDpde .set APTmap,APTDPTDI << PDRSHIFT .set APTD,APTmap + (APTDPTDI * PAGE_SIZE) .set APTDpde,PTD + (APTDPTDI * PDESIZE) #ifdef SMP /* * Define layout of per-cpu address space. * This is "constructed" in locore.s on the BSP and in mp_machdep.c * for each AP. DO NOT REORDER THESE WITHOUT UPDATING THE REST! */ .globl SMP_prvspace, lapic .set SMP_prvspace,(MPPTDI << PDRSHIFT) .set lapic,SMP_prvspace + (NPTEPG-1) * PAGE_SIZE #endif /* SMP */ /* * Compiled KERNBASE location */ .globl kernbase .set kernbase,KERNBASE /* * Globals */ .data ALIGN_DATA /* just to be sure */ .globl HIDENAME(tmpstk) .space 0x2000 /* space for tmpstk - temporary stack */ HIDENAME(tmpstk): .globl bootinfo bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ .globl KERNend KERNend: .long 0 /* phys addr end of kernel (just after bss) */ physfree: .long 0 /* phys addr of next free page */ #ifdef SMP .globl cpu0prvpage cpu0pp: .long 0 /* phys addr cpu0 private pg */ cpu0prvpage: .long 0 /* relocated version */ .globl SMPpt SMPptpa: .long 0 /* phys addr SMP page table */ SMPpt: .long 0 /* relocated version */ #endif /* SMP */ .globl IdlePTD IdlePTD: .long 0 /* phys addr of kernel PTD */ +#ifdef PAE + .globl IdlePDPT +IdlePDPT: .long 0 /* phys addr of kernel PDPT */ +#endif + #ifdef SMP .globl KPTphys #endif KPTphys: .long 0 /* phys addr of kernel page tables */ .globl proc0uarea, proc0kstack proc0uarea: .long 0 /* address of proc 0 uarea space */ proc0kstack: .long 0 /* address of proc 0 kstack space */ p0upa: .long 0 /* phys addr of proc0's UAREA */ p0kpa: .long 0 /* phys addr of proc0's STACK */ vm86phystk: .long 0 /* PA of vm86/bios stack */ .globl vm86paddr, vm86pa vm86paddr: .long 0 /* address of vm86 region */ vm86pa: .long 0 /* phys addr of vm86 region */ #ifdef PC98 .globl pc98_system_parameter pc98_system_parameter: .space 0x240 #endif /********************************************************************** * * Some handy macros * */ #define R(foo) ((foo)-KERNBASE) #define ALLOCPAGES(foo) \ movl R(physfree), %esi ; \ movl $((foo)*PAGE_SIZE), %eax ; \ addl %esi, %eax ; \ movl %eax, R(physfree) ; \ movl %esi, %edi ; \ movl $((foo)*PAGE_SIZE),%ecx ; \ xorl %eax,%eax ; \ cld ; \ rep ; \ stosb /* * fillkpt * eax = page frame address * ebx = index into page table * ecx = how many pages to map * base = base address of page dir/table * prot = protection bits */ #define fillkpt(base, prot) \ shll $PTESHIFT,%ebx ; \ addl base,%ebx ; \ orl $PG_V,%eax ; \ orl prot,%eax ; \ 1: movl %eax,(%ebx) ; \ addl $PAGE_SIZE,%eax ; /* increment physical address */ \ addl $PTESIZE,%ebx ; /* next pte */ \ loop 1b /* * fillkptphys(prot) * eax = physical address * ecx = how many pages to map * prot = protection bits */ #define fillkptphys(prot) \ movl %eax, %ebx ; \ shrl $PAGE_SHIFT, %ebx ; \ fillkpt(R(KPTphys), prot) .text /********************************************************************** * * This is where the bootblocks start us, set the ball rolling... * */ NON_GPROF_ENTRY(btext) #ifdef PC98 /* save SYSTEM PARAMETER for resume (NS/T or other) */ movl $0xa1400,%esi movl $R(pc98_system_parameter),%edi movl $0x0240,%ecx cld rep movsb #else /* IBM-PC */ /* Tell the bios to warmboot next time */ movw $0x1234,0x472 #endif /* PC98 */ /* Set up a real frame in case the double return in newboot is executed. */ pushl %ebp movl %esp, %ebp /* Don't trust what the BIOS gives for eflags. */ pushl $PSL_KERNEL popfl /* * Don't trust what the BIOS gives for %fs and %gs. Trust the bootstrap * to set %cs, %ds, %es and %ss. */ mov %ds, %ax mov %ax, %fs mov %ax, %gs call recover_bootinfo /* Get onto a stack that we can trust. */ /* * XXX this step is delayed in case recover_bootinfo needs to return via * the old stack, but it need not be, since recover_bootinfo actually * returns via the old frame. */ movl $R(HIDENAME(tmpstk)),%esp #ifdef PC98 /* pc98_machine_type & M_EPSON_PC98 */ testb $0x02,R(pc98_system_parameter)+220 jz 3f /* epson_machine_id <= 0x0b */ cmpb $0x0b,R(pc98_system_parameter)+224 ja 3f /* count up memory */ movl $0x100000,%eax /* next, talley remaining memory */ movl $0xFFF-0x100,%ecx 1: movl 0(%eax),%ebx /* save location to check */ movl $0xa55a5aa5,0(%eax) /* write test pattern */ cmpl $0xa55a5aa5,0(%eax) /* does not check yet for rollover */ jne 2f movl %ebx,0(%eax) /* restore memory */ addl $PAGE_SIZE,%eax loop 1b 2: subl $0x100000,%eax shrl $17,%eax movb %al,R(pc98_system_parameter)+1 3: movw R(pc98_system_parameter+0x86),%ax movw %ax,R(cpu_id) #endif call identify_cpu /* clear bss */ /* * XXX this should be done a little earlier. * * XXX we don't check that there is memory for our bss and page tables * before using it. * * XXX the boot program somewhat bogusly clears the bss. We still have * to do it in case we were unzipped by kzipboot. Then the boot program * only clears kzipboot's bss. * * XXX the gdt and idt are still somewhere in the boot program. We * depend on the convention that the boot program is below 1MB and we * are above 1MB to keep the gdt and idt away from the bss and page * tables. */ movl $R(end),%ecx movl $R(edata),%edi subl %edi,%ecx xorl %eax,%eax cld rep stosb call create_pagetables /* * If the CPU has support for VME, turn it on. */ testl $CPUID_VME, R(cpu_feature) jz 1f movl %cr4, %eax orl $CR4_VME, %eax movl %eax, %cr4 1: /* Now enable paging */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else movl R(IdlePTD), %eax movl %eax,%cr3 /* load ptd addr into mmu */ +#endif movl %cr0,%eax /* get control word */ orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* and let's page NOW! */ pushl $begin /* jump to high virtualized address */ ret /* now running relocated at KERNBASE where the system is linked to run */ begin: /* set up bootstrap stack */ movl proc0kstack,%eax /* location of in-kernel stack */ /* bootstrap stack end location */ leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp xorl %ebp,%ebp /* mark end of frames */ +#ifdef PAE + movl IdlePDPT,%esi +#else movl IdlePTD,%esi +#endif movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ /* * Clean up the stack in a way that db_numargs() understands, so * that backtraces in ddb don't underrun the stack. Traps for * inaccessible memory are more fatal than usual this early. */ addl $4,%esp call mi_startup /* autoconfiguration, mountroot etc */ /* NOTREACHED */ addl $0,%esp /* for db_numargs() again */ /* * Signal trampoline, copied to top of user stack */ NON_GPROF_ENTRY(sigcode) calll *SIGF_HANDLER(%esp) leal SIGF_UC(%esp),%eax /* get ucontext */ pushl %eax testl $PSL_VM,UC_EFLAGS(%eax) jne 1f movl UC_GS(%eax),%gs /* restore %gs */ 1: movl $SYS_sigreturn,%eax pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ /* on stack */ 1: jmp 1b #ifdef COMPAT_FREEBSD4 ALIGN_TEXT freebsd4_sigcode: calll *SIGF_HANDLER(%esp) leal SIGF_UC4(%esp),%eax /* get ucontext */ pushl %eax testl $PSL_VM,UC4_EFLAGS(%eax) jne 1f movl UC4_GS(%eax),%gs /* restore %gs */ 1: movl $344,%eax /* 4.x SYS_sigreturn */ pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ /* on stack */ 1: jmp 1b #endif #ifdef COMPAT_43 ALIGN_TEXT osigcode: call *SIGF_HANDLER(%esp) /* call signal handler */ lea SIGF_SC(%esp),%eax /* get sigcontext */ pushl %eax testl $PSL_VM,SC_PS(%eax) jne 9f movl SC_GS(%eax),%gs /* restore %gs */ 9: movl $103,%eax /* 3.x SYS_sigreturn */ pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ 0: jmp 0b #endif /* COMPAT_43 */ ALIGN_TEXT esigcode: .data .globl szsigcode szsigcode: .long esigcode-sigcode #ifdef COMPAT_FREEBSD4 .globl szfreebsd4_sigcode szfreebsd4_sigcode: .long esigcode-freebsd4_sigcode #endif #ifdef COMPAT_43 .globl szosigcode szosigcode: .long esigcode-osigcode #endif .text /********************************************************************** * * Recover the bootinfo passed to us from the boot program * */ recover_bootinfo: /* * This code is called in different ways depending on what loaded * and started the kernel. This is used to detect how we get the * arguments from the other code and what we do with them. * * Old disk boot blocks: * (*btext)(howto, bootdev, cyloffset, esym); * [return address == 0, and can NOT be returned to] * [cyloffset was not supported by the FreeBSD boot code * and always passed in as 0] * [esym is also known as total in the boot code, and * was never properly supported by the FreeBSD boot code] * * Old diskless netboot code: * (*btext)(0,0,0,0,&nfsdiskless,0,0,0); * [return address != 0, and can NOT be returned to] * If we are being booted by this code it will NOT work, * so we are just going to halt if we find this case. * * New uniform boot code: * (*btext)(howto, bootdev, 0, 0, 0, &bootinfo) * [return address != 0, and can be returned to] * * There may seem to be a lot of wasted arguments in here, but * that is so the newer boot code can still load very old kernels * and old boot code can load new kernels. */ /* * The old style disk boot blocks fake a frame on the stack and * did an lret to get here. The frame on the stack has a return * address of 0. */ cmpl $0,4(%ebp) je olddiskboot /* * We have some form of return address, so this is either the * old diskless netboot code, or the new uniform code. That can * be detected by looking at the 5th argument, if it is 0 * we are being booted by the new uniform boot code. */ cmpl $0,24(%ebp) je newboot /* * Seems we have been loaded by the old diskless boot code, we * don't stand a chance of running as the diskless structure * changed considerably between the two, so just halt. */ hlt /* * We have been loaded by the new uniform boot code. * Let's check the bootinfo version, and if we do not understand * it we return to the loader with a status of 1 to indicate this error */ newboot: movl 28(%ebp),%ebx /* &bootinfo.version */ movl BI_VERSION(%ebx),%eax cmpl $1,%eax /* We only understand version 1 */ je 1f movl $1,%eax /* Return status */ leave /* * XXX this returns to our caller's caller (as is required) since * we didn't set up a frame and our caller did. */ ret 1: /* * If we have a kernelname copy it in */ movl BI_KERNELNAME(%ebx),%esi cmpl $0,%esi je 2f /* No kernelname */ movl $MAXPATHLEN,%ecx /* Brute force!!! */ movl $R(kernelname),%edi cmpb $'/',(%esi) /* Make sure it starts with a slash */ je 1f movb $'/',(%edi) incl %edi decl %ecx 1: cld rep movsb 2: /* * Determine the size of the boot loader's copy of the bootinfo * struct. This is impossible to do properly because old versions * of the struct don't contain a size field and there are 2 old * versions with the same version number. */ movl $BI_ENDCOMMON,%ecx /* prepare for sizeless version */ testl $RB_BOOTINFO,8(%ebp) /* bi_size (and bootinfo) valid? */ je got_bi_size /* no, sizeless version */ movl BI_SIZE(%ebx),%ecx got_bi_size: /* * Copy the common part of the bootinfo struct */ movl %ebx,%esi movl $R(bootinfo),%edi cmpl $BOOTINFO_SIZE,%ecx jbe got_common_bi_size movl $BOOTINFO_SIZE,%ecx got_common_bi_size: cld rep movsb #ifdef NFS_ROOT #ifndef BOOTP_NFSV3 /* * If we have a nfs_diskless structure copy it in */ movl BI_NFS_DISKLESS(%ebx),%esi cmpl $0,%esi je olddiskboot movl $R(nfs_diskless),%edi movl $NFSDISKLESS_SIZE,%ecx cld rep movsb movl $R(nfs_diskless_valid),%edi movl $1,(%edi) #endif #endif /* * The old style disk boot. * (*btext)(howto, bootdev, cyloffset, esym); * Note that the newer boot code just falls into here to pick * up howto and bootdev, cyloffset and esym are no longer used */ olddiskboot: movl 8(%ebp),%eax movl %eax,R(boothowto) movl 12(%ebp),%eax movl %eax,R(bootdev) ret /********************************************************************** * * Identify the CPU and initialize anything special about it * */ identify_cpu: /* Try to toggle alignment check flag; does not exist on 386. */ pushfl popl %eax movl %eax,%ecx orl $PSL_AC,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax andl $PSL_AC,%eax pushl %ecx popfl testl %eax,%eax jnz try486 /* NexGen CPU does not have aligment check flag. */ pushfl movl $0x5555, %eax xorl %edx, %edx movl $2, %ecx clc divl %ecx jz trynexgen popfl movl $CPU_386,R(cpu) jmp 3f trynexgen: popfl movl $CPU_NX586,R(cpu) movl $0x4778654e,R(cpu_vendor) # store vendor string movl $0x72446e65,R(cpu_vendor+4) movl $0x6e657669,R(cpu_vendor+8) movl $0,R(cpu_vendor+12) jmp 3f try486: /* Try to toggle identification flag; does not exist on early 486s. */ pushfl popl %eax movl %eax,%ecx xorl $PSL_ID,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax andl $PSL_ID,%eax pushl %ecx popfl testl %eax,%eax jnz trycpuid movl $CPU_486,R(cpu) /* * Check Cyrix CPU * Cyrix CPUs do not change the undefined flags following * execution of the divide instruction which divides 5 by 2. * * Note: CPUID is enabled on M2, so it passes another way. */ pushfl movl $0x5555, %eax xorl %edx, %edx movl $2, %ecx clc divl %ecx jnc trycyrix popfl jmp 3f /* You may use Intel CPU. */ trycyrix: popfl /* * IBM Bluelighting CPU also doesn't change the undefined flags. * Because IBM doesn't disclose the information for Bluelighting * CPU, we couldn't distinguish it from Cyrix's (including IBM * brand of Cyrix CPUs). */ movl $0x69727943,R(cpu_vendor) # store vendor string movl $0x736e4978,R(cpu_vendor+4) movl $0x64616574,R(cpu_vendor+8) jmp 3f trycpuid: /* Use the `cpuid' instruction. */ xorl %eax,%eax cpuid # cpuid 0 movl %eax,R(cpu_high) # highest capability movl %ebx,R(cpu_vendor) # store vendor string movl %edx,R(cpu_vendor+4) movl %ecx,R(cpu_vendor+8) movb $0,R(cpu_vendor+12) movl $1,%eax cpuid # cpuid 1 movl %eax,R(cpu_id) # store cpu_id movl %ebx,R(cpu_procinfo) # store cpu_procinfo movl %edx,R(cpu_feature) # store cpu_feature rorl $8,%eax # extract family type andl $15,%eax cmpl $5,%eax jae 1f /* less than Pentium; must be 486 */ movl $CPU_486,R(cpu) jmp 3f 1: /* a Pentium? */ cmpl $5,%eax jne 2f movl $CPU_586,R(cpu) jmp 3f 2: /* Greater than Pentium...call it a Pentium Pro */ movl $CPU_686,R(cpu) 3: ret /********************************************************************** * * Create the first page directory and its page tables. * */ create_pagetables: /* Find end of kernel image (rounded up to a page boundary). */ movl $R(_end),%esi /* Include symbols, if any. */ movl R(bootinfo+BI_ESYMTAB),%edi testl %edi,%edi je over_symalloc movl %edi,%esi movl $KERNBASE,%edi addl %edi,R(bootinfo+BI_SYMTAB) addl %edi,R(bootinfo+BI_ESYMTAB) over_symalloc: /* If we are told where the end of the kernel space is, believe it. */ movl R(bootinfo+BI_KERNEND),%edi testl %edi,%edi je no_kernend movl %edi,%esi no_kernend: addl $PAGE_MASK,%esi andl $~PAGE_MASK,%esi movl %esi,R(KERNend) /* save end of kernel */ movl %esi,R(physfree) /* next free page is at end of kernel */ /* Allocate Kernel Page Tables */ ALLOCPAGES(NKPT) movl %esi,R(KPTphys) /* Allocate Page Table Directory */ +#ifdef PAE + /* XXX only need 32 bytes (easier for now) */ + ALLOCPAGES(1) + movl %esi,R(IdlePDPT) +#endif ALLOCPAGES(NPGPTD) movl %esi,R(IdlePTD) /* Allocate UPAGES */ ALLOCPAGES(UAREA_PAGES) movl %esi,R(p0upa) addl $KERNBASE, %esi movl %esi, R(proc0uarea) ALLOCPAGES(KSTACK_PAGES) movl %esi,R(p0kpa) addl $KERNBASE, %esi movl %esi, R(proc0kstack) ALLOCPAGES(1) /* vm86/bios stack */ movl %esi,R(vm86phystk) ALLOCPAGES(3) /* pgtable + ext + IOPAGES */ movl %esi,R(vm86pa) addl $KERNBASE, %esi movl %esi, R(vm86paddr) #ifdef SMP /* Allocate cpu0's private data page */ ALLOCPAGES(1) movl %esi,R(cpu0pp) addl $KERNBASE, %esi movl %esi, R(cpu0prvpage) /* relocated to KVM space */ /* Allocate SMP page table page */ ALLOCPAGES(1) movl %esi,R(SMPptpa) addl $KERNBASE, %esi movl %esi, R(SMPpt) /* relocated to KVM space */ #endif /* SMP */ /* Map read-only from zero to the end of the kernel text section */ xorl %eax, %eax xorl %edx,%edx movl $R(etext),%ecx addl $PAGE_MASK,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) /* Map read-write, data, bss and symbols */ movl $R(etext),%eax addl $PAGE_MASK, %eax andl $~PAGE_MASK, %eax movl $PG_RW,%edx movl R(KERNend),%ecx subl %eax,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) /* Map page directory. */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl $1, %ecx + fillkptphys($PG_RW) +#endif + movl R(IdlePTD), %eax movl $NPGPTD, %ecx fillkptphys($PG_RW) /* Map proc0's UPAGES in the physical way ... */ movl R(p0upa), %eax movl $(UAREA_PAGES), %ecx fillkptphys($PG_RW) /* Map proc0's KSTACK in the physical way ... */ movl R(p0kpa), %eax movl $(KSTACK_PAGES), %ecx fillkptphys($PG_RW) /* Map ISA hole */ movl $ISA_HOLE_START, %eax movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx fillkptphys($PG_RW) /* Map space for the vm86 region */ movl R(vm86phystk), %eax movl $4, %ecx fillkptphys($PG_RW) /* Map page 0 into the vm86 page table */ movl $0, %eax movl $0, %ebx movl $1, %ecx fillkpt(R(vm86pa), $PG_RW|PG_U) /* ...likewise for the ISA hole */ movl $ISA_HOLE_START, %eax movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx fillkpt(R(vm86pa), $PG_RW|PG_U) #ifdef SMP /* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */ movl R(cpu0pp), %eax movl $1, %ecx fillkptphys($PG_RW) /* Map SMP page table page into global kmem FWIW */ movl R(SMPptpa), %eax movl $1, %ecx fillkptphys($PG_RW) /* Map the private page into the SMP page table */ movl R(cpu0pp), %eax movl $0, %ebx /* pte offset = 0 */ movl $1, %ecx /* one private page coming right up */ fillkpt(R(SMPptpa), $PG_RW) /* ... and put the page table table in the pde. */ movl R(SMPptpa), %eax movl $MPPTDI, %ebx movl $1, %ecx fillkpt(R(IdlePTD), $PG_RW) /* Fakeup VA for the local apic to allow early traps. */ ALLOCPAGES(1) movl %esi, %eax movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */ movl $1, %ecx /* one private pt coming right up */ fillkpt(R(SMPptpa), $PG_RW) #endif /* SMP */ /* install a pde for temporary double map of bottom of VA */ movl R(KPTphys), %eax xorl %ebx, %ebx movl $NKPT, %ecx fillkpt(R(IdlePTD), $PG_RW) /* install pde's for pt's */ movl R(KPTphys), %eax movl $KPTDI, %ebx movl $NKPT, %ecx fillkpt(R(IdlePTD), $PG_RW) /* install a pde recursively mapping page directory as a page table */ movl R(IdlePTD), %eax movl $PTDPTDI, %ebx movl $NPGPTD,%ecx fillkpt(R(IdlePTD), $PG_RW) + +#ifdef PAE + movl R(IdlePTD), %eax + xorl %ebx, %ebx + movl $NPGPTD, %ecx + fillkpt(R(IdlePDPT), $0x0) +#endif ret Index: head/sys/amd64/amd64/machdep.c =================================================================== --- head/sys/amd64/amd64/machdep.c (revision 112840) +++ head/sys/amd64/amd64/machdep.c (revision 112841) @@ -1,2731 +1,2741 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD$ */ #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* pcb.h included via sys/user.h */ #include #ifdef PERFMON #include #endif #ifdef SMP #include #include #endif #include #include #include #include #include #include extern void init386(int first); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) #if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif #if defined(CPU_DISABLE_SSE) #undef CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) int _udatasel, _ucodesel; u_int atdevbase; #if defined(SWTCH_OPTIM_STATS) extern int swtch_optim_stats; SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, CTLFLAG_RD, &swtch_optim_stats, 0, ""); SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, CTLFLAG_RD, &tlb_flush_count, 0, ""); #endif int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code); #endif long Maxmem = 0; vm_paddr_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; #ifndef SMP static struct pcpu __pcpu; #endif struct mtx icu_lock; static void cpu_startup(dummy) void *dummy; { /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), ptoa((uintmax_t)Maxmem) / 1048576); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); #ifndef SMP /* For SMP, we delay the cpu_setregs() until after SMP startup. */ cpu_setregs(); #endif } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) p->p_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; PROC_UNLOCK(p); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; PROC_LOCK(p); if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_eflags &= ~PSL_T; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) p->p_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; PROC_UNLOCK(p); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; PROC_LOCK(p); if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; regs->tf_eflags &= ~PSL_T; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); } #endif /* COMPAT_FREEBSD4 */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, sig, mask, code); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, sig, mask, code); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) p->p_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); PROC_UNLOCK(p); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; PROC_LOCK(p); if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_eflags &= ~PSL_T; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; struct proc *p = td->td_proc; int eflags, error; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; PROC_LOCK(p); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) if (scp->sc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; #endif SIGSETOLD(p->p_sigmask, scp->sc_mask); SIG_CANTMASK(p->p_sigmask); signotify(p); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct proc *p = td->td_proc; struct trapframe *regs; const struct ucontext4 *ucp; int cs, eflags, error; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("freebsd4_sigreturn: cs = 0x%x\n", cs); trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) if (ucp->uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; #endif p->p_sigmask = ucp->uc_sigmask; SIG_CANTMASK(p->p_sigmask); signotify(p); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; int cs, eflags, error, ret; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) if (ucp->uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; #endif p->p_sigmask = ucp->uc_sigmask; SIG_CANTMASK(p->p_sigmask); signotify(p); PROC_UNLOCK(p); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Hook to idle the CPU when possible. In the SMP case we default to * off because a halted cpu will not currently pick up a new thread in the * run queue until the next timer tick. If turned on this will result in * approximately a 4.2% loss in real time performance in buildworld tests * (but improves user and sys times oddly enough), and saves approximately * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). * * XXX we need to have a cpu mask of idle cpus and generate an IPI or * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. * Then we can have our cake and eat it too. * * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ static int cpu_idle_hlt = 1; SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); /* * Note that we have to be careful here to avoid a race between checking * sched_runnable() and actually halting. If we don't do this, we may waste * the time between calling hlt and the next interrupt even though there * is a runnable process. */ void cpu_idle(void) { #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif if (cpu_idle_hlt) { disable_intr(); if (sched_runnable()) { enable_intr(); } else { /* * we must absolutely guarentee that hlt is the * absolute next instruction after sti or we * introduce a timing window. */ __asm __volatile("sti; hlt"); } } } /* * Clear registers on exec */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == PCPU_GET(curpcb)) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ td->td_pcb->pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); /* Initialize the npx (if any) for the current process. */ /* * XXX the above load_cr0() also initializes it and is a layering * violation if NPX is configured. It drops the npx partially * and this would be fatal if we were interrupted now, and decided * to force the state to the pcb, and checked the invariant * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL). * ALL of this can happen except the check. The check used to * happen and be fatal later when we didn't complete the drop * before returning to user mode. This should be fixed properly * soon. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); #ifdef SMP cr0 |= CR0_NE; /* Done by npxinit() */ #endif cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */ #ifndef I386_CPU cr0 |= CR0_WP | CR0_AM; #endif load_cr0(cr0); load_gs(_udatasel); } static int sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); u_long bootdev; /* not a dev_t - encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in dev_t format)"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ #ifdef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif int private_tss; /* flag indicating private tss */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0uarea; extern vm_offset_t proc0kstack; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 4 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 5 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 6 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 7 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 9 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #define PHYSMAP_SIZE (2 * 8) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int i, physmap_idx, pa_indx; int hasbrokenint12; u_int basemem, extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; char *cp; struct bios_smap *smap; hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); bzero(&vmf, sizeof(struct vm86frame)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Some newer BIOSes has broken INT 12H implementation which cause * kernel panic immediately. In this case, we need to scan SMAP * with INT 15:E820 first, then determine base memory size. */ if (hasbrokenint12) { goto int15e820; } /* * Perform "base memory" related probes & setup */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * if basemem != 640, map pages r/w into vm86 page table so * that the bios can scribble on it. */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; int15e820: /* * map page 1 R/W into the kernel page table so we can use it * as a buffer. The kernel will unmap this page later. */ pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); /* * get memory map with INT 15:E820 */ vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); physmap_idx = 0; vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != 0x01) goto next_run; if (smap->length == 0) goto next_run; +#ifndef PAE if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); goto next_run; } +#endif for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-montonic memory region, ignoring second region\n"); goto next_run; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; goto next_run; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; next_run: ; } while (vmf.vmf_ebx != 0); /* * Perform "base memory" related probes & setup based on SMAP */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } if (basemem == 0) { basemem = 640; } if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; } if (physmap[1] != 0) goto physmap_done; /* * If we failed above, try memory map with INT 15:E801 */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else /* * Prefer the RTC value for extended memory. */ extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); /* look for the MP hardware - needed for apic addresses */ i386_mp_probe(); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif /* * hw.physmem is a size in bytes; we also allow k, m, and g suffixes * for the appropriate modifiers. This overrides MAXMEM. */ if ((cp = getenv("hw.physmem")) != NULL) { u_int64_t AllowMem, sanity; char *ep; sanity = AllowMem = strtouq(cp, &ep, 0); if ((ep != cp) && (*ep != 0)) { switch(*ep) { case 'g': case 'G': AllowMem <<= 10; case 'm': case 'M': AllowMem <<= 10; case 'k': case 'K': AllowMem <<= 10; break; default: AllowMem = sanity = 0; } if (AllowMem < sanity) AllowMem = 0; } if (AllowMem == 0) printf("Ignoring invalid memory size of '%s'\n", cp); else Maxmem = atop(AllowMem); freeenv(cp); } if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first, 0); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; pte = CMAP1; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad; int *ptr = (int *)CADDR1; /* * block out kernel memory as not available. */ if (pa >= 0x100000 && pa < first) continue; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) { page_bad = TRUE; } /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) { continue; } /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } void init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, off, x; #ifndef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif struct pcpu *pc; proc0.p_uarea = proc0uarea; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; atdevbase = ISA_HOLE_START + KERNBASE; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup(&proc0, &ksegrp0, &kse0, &thread0); metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); #ifdef SMP pc = &SMP_prvspace[0].pcpu; gdt_segs[GPRIV_SEL].ssd_limit = atop(sizeof(struct privatespace) - 1); #else pc = &__pcpu; gdt_segs[GPRIV_SEL].ssd_limit = atop(sizeof(struct pcpu) - 1); #endif gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_RECURSE); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); /* make ldt memory segments */ /* * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA isa_defaultirq(); #endif #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); private_tss = 0; PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); +#ifdef PAE + dblfault_tss.tss_cr3 = (int)IdlePDPT; +#else dblfault_tss.tss_cr3 = (int)IdlePTD; +#endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ +#ifdef PAE + thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; +#else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; +#endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; #ifndef SMP struct region_descriptor r_idt; #endif vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); r_idt.rd_limit = sizeof(idt0) - 1; tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0) panic("kmem_alloc returned non-page-aligned memory"); /* Put the first seven entries in the lower page */ new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); return; } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; pcb = td->td_pcb; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb = td->td_pcb; pcb->pcb_gs = regs->r_gs; return (0); } #ifdef CPU_ENABLE_SSE static void fill_fpregs_xmm(sv_xmm, sv_87) struct savexmm *sv_xmm; struct save87 *sv_87; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; bzero(sv_87, sizeof(*sv_87)); /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_tw = penv_xmm->en_tw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; } static void set_fpregs_xmm(sv_87, sv_xmm) struct save87 *sv_87; struct savexmm *sv_xmm; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_tw = penv_87->en_tw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; } #endif /* CPU_ENABLE_SSE */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, &td->td_pcb->pcb_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tp; tp = td->td_frame; mcp->mc_onstack = sigonstack(tp->tf_esp); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_ebx = tp->tf_ebx; mcp->mc_edx = tp->tf_edx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eax = tp->tf_eax; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_eflags = tp->tf_eflags; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if ((ret = set_fpcontext(td, mcp)) == 0) { tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; ret = 0; } return (ret); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; #else union savefpu *addr; /* * XXX mc_fpstate might be misaligned, since its declaration is not * unportabilized using __attribute__((aligned(16))) like the * declaration of struct savemm, and anyway, alignment doesn't work * for auto variables since we don't use gcc's pessimal stack * alignment. Work around this by abusing the spare fields after * mcp->mc_fpstate. * * XXX unpessimize most cases by only aligning when fxsave might be * called, although this requires knowing too much about * npxgetregs()'s internals. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } mcp->mc_ownedfp = npxgetregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); } mcp->mc_fpformat = npxformat(); #endif } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { union savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* XXX align as above. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); } #ifdef DEV_NPX /* * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ npxsetregs(td, addr); #endif /* * Don't bother putting things back where they were in the * misaligned case, since we know that the caller won't use * them again. */ } else return (EINVAL); return (0); } static void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; u_int32_t mask1, mask2; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; i++, mask1 <<= 2, mask2 <<= 2) if ((dbregs->dr[7] & mask1) == mask2) return (EINVAL); pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space, unless, perhaps, we were called by * uid 0. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (suser(td) != 0) { if (dbregs->dr[7] & 0x3) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr[7] & (0x3<<2)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr[7] & (0x3<<4)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr[7] & (0x3<<6)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i=0; i, and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/amd64/amd64/mpboot.S =================================================================== --- head/sys/amd64/amd64/mpboot.S (revision 112840) +++ head/sys/amd64/amd64/mpboot.S (revision 112841) @@ -1,272 +1,282 @@ /* * Copyright (c) 1995, Jack F. Vogel * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Jack F. Vogel * 4. The name of the developer may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * mpboot.s: FreeBSD machine support for the Intel MP Spec * multiprocessor systems. * * $FreeBSD$ */ #include /* miscellaneous asm macros */ #include #include #include "assym.s" +#define R(x) ((x)-KERNBASE) + /* * this code MUST be enabled here and in mp_machdep.c * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * #define CHECK_POINTS */ #if defined(CHECK_POINTS) && !defined(PC98) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define CHECKPOINT(A,D) \ movb $(A),%al ; \ outb %al,$CMOS_REG ; \ movb $(D),%al ; \ outb %al,$CMOS_DATA #else #define CHECKPOINT(A,D) #endif /* CHECK_POINTS */ /* * the APs enter here from their trampoline code (bootMP, below) */ .p2align 4 NON_GPROF_ENTRY(MPentry) CHECKPOINT(0x36, 3) /* Now enable paging mode */ - movl IdlePTD-KERNBASE, %eax +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else + movl R(IdlePTD), %eax movl %eax,%cr3 +#endif movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* let the games begin! */ movl bootSTK,%esp /* boot stack end loc. */ pushl $mp_begin /* jump to high mem */ ret /* * Wait for the booting CPU to signal startup */ mp_begin: /* now running relocated at KERNBASE */ CHECKPOINT(0x37, 4) call init_secondary /* load i386 tables */ CHECKPOINT(0x38, 5) /* * If the [BSP] CPU has support for VME, turn it on. */ testl $CPUID_VME, cpu_feature /* XXX WRONG! BSP! */ jz 1f movl %cr4, %eax orl $CR4_VME, %eax movl %eax, %cr4 1: /* disable the APIC, just to be SURE */ movl lapic+LA_SVR, %eax /* get spurious vector reg. */ andl $~APIC_SVR_SWEN, %eax /* clear software enable bit */ movl %eax, lapic+LA_SVR /* signal our startup to the BSP */ movl lapic+LA_VER, %eax /* our version reg contents */ movl %eax, cpu_apic_versions /* into [ 0 ] */ incl mp_ncpus /* signal BSP */ CHECKPOINT(0x39, 6) /* Now, let's prepare for some REAL WORK :-) This doesn't return. */ call ap_init /* * This is the embedded trampoline or bootstrap that is * copied into 'real-mode' low memory, it is where the * secondary processor "wakes up". When it is executed * the processor will eventually jump into the routine * MPentry, which resides in normal kernel text above * 1Meg. -jackv */ .data ALIGN_DATA /* just to be sure */ BOOTMP1: NON_GPROF_ENTRY(bootMP) .code16 cli CHECKPOINT(0x34, 1) /* First guarantee a 'clean slate' */ xorl %eax, %eax movl %eax, %ebx movl %eax, %ecx movl %eax, %edx movl %eax, %esi movl %eax, %edi /* set up data segments */ mov %cs, %ax mov %ax, %ds mov %ax, %es mov %ax, %fs mov %ax, %gs mov %ax, %ss mov $(boot_stk-bootMP), %esp /* Now load the global descriptor table */ lgdt MP_GDTptr-bootMP /* Enable protected mode */ movl %cr0, %eax orl $CR0_PE, %eax movl %eax, %cr0 /* * make intrasegment jump to flush the processor pipeline and * reload CS register */ pushl $0x18 pushl $(protmode-bootMP) lretl .code32 protmode: CHECKPOINT(0x35, 2) /* * we are NOW running for the first time with %eip * having the full physical address, BUT we still * are using a segment descriptor with the origin * not matching the booting kernel. * * SO NOW... for the BIG Jump into kernel's segment * and physical text above 1 Meg. */ mov $0x10, %ebx movw %bx, %ds movw %bx, %es movw %bx, %fs movw %bx, %gs movw %bx, %ss .globl bigJump bigJump: /* this will be modified by mpInstallTramp() */ ljmp $0x08, $0 /* far jmp to MPentry() */ dead: hlt /* We should never get here */ jmp dead /* * MP boot strap Global Descriptor Table */ .p2align 4 .globl MP_GDT .globl bootCodeSeg .globl bootDataSeg MP_GDT: nulldesc: /* offset = 0x0 */ .word 0x0 .word 0x0 .byte 0x0 .byte 0x0 .byte 0x0 .byte 0x0 kernelcode: /* offset = 0x08 */ .word 0xffff /* segment limit 0..15 */ .word 0x0000 /* segment base 0..15 */ .byte 0x0 /* segment base 16..23; set for 0K */ .byte 0x9f /* flags; Type */ .byte 0xcf /* flags; Limit */ .byte 0x0 /* segment base 24..32 */ kerneldata: /* offset = 0x10 */ .word 0xffff /* segment limit 0..15 */ .word 0x0000 /* segment base 0..15 */ .byte 0x0 /* segment base 16..23; set for 0k */ .byte 0x93 /* flags; Type */ .byte 0xcf /* flags; Limit */ .byte 0x0 /* segment base 24..32 */ bootcode: /* offset = 0x18 */ .word 0xffff /* segment limit 0..15 */ bootCodeSeg: /* this will be modified by mpInstallTramp() */ .word 0x0000 /* segment base 0..15 */ .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ .byte 0x9e /* flags; Type */ .byte 0xcf /* flags; Limit */ .byte 0x0 /*segment base 24..32 */ bootdata: /* offset = 0x20 */ .word 0xffff bootDataSeg: /* this will be modified by mpInstallTramp() */ .word 0x0000 /* segment base 0..15 */ .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ .byte 0x92 .byte 0xcf .byte 0x0 /* * GDT pointer for the lgdt call */ .globl mp_gdtbase MP_GDTptr: mp_gdtlimit: .word 0x0028 mp_gdtbase: /* this will be modified by mpInstallTramp() */ .long 0 .space 0x100 /* space for boot_stk - 1st temporary stack */ boot_stk: BOOTMP2: .globl bootMP_size bootMP_size: .long BOOTMP2 - BOOTMP1 Index: head/sys/amd64/amd64/pmap.c =================================================================== --- head/sys/amd64/amd64/pmap.c (revision 112840) +++ head/sys/amd64/amd64/pmap.c (revision 112841) @@ -1,3425 +1,3473 @@ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD$ */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Manages physical address maps. * * In addition to hardware address maps, this * module is called upon to provide software-use-only * maps which may or may not be stored in the same * form as hardware maps. These pseudo-maps are * used to store intermediate results from copy * operations to and from address spaces. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_pmap.h" #include "opt_msgbuf.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) || defined(APIC_IO) #include #include #include #include #endif /* SMP || APIC_IO */ #define PMAP_KEEP_PDIRS #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #if defined(DIAGNOSTIC) #define PMAP_DIAGNOSTIC #endif #define MINPV 2048 #if !defined(PMAP_DIAGNOSTIC) #define PMAP_INLINE __inline #else #define PMAP_INLINE #endif /* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) #define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) /* * Given a map and a machine independent protection code, * convert to a vax protection code. */ #define pte_prot(m, p) (protection_codes[p]) static int protection_codes[8]; struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; vm_paddr_t avail_start; /* PA of first available physical page */ vm_paddr_t avail_end; /* PA of last available physical page */ vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ static int pgeflag; /* PG_G or-in */ static int pseflag; /* PG_PS or-in */ static int nkpt; vm_offset_t kernel_vm_end; extern u_int32_t KERNend; +#ifdef PAE +static uma_zone_t pdptzone; +#endif + /* * Data for the pv entry allocation mechanism */ static uma_zone_t pvzone; static struct vm_object pvzone_obj; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; int pmap_pagedaemon_waken; /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1 = 0; static pt_entry_t *CMAP2, *CMAP3, *ptmmap; caddr_t CADDR1 = 0, ptvmmap = 0; static caddr_t CADDR2, CADDR3; static struct mtx CMAPCADDR12_lock; static pt_entry_t *msgbufmap; struct msgbuf *msgbufp = 0; /* * Crashdump maps. */ static pt_entry_t *pt_crashdumpmap; static caddr_t crashdumpmap; #ifdef SMP extern pt_entry_t *SMPpt; #endif static pt_entry_t *PMAP1 = 0; static pt_entry_t *PADDR1 = 0; static PMAP_INLINE void free_pv_entry(pv_entry_t pv); static pv_entry_t get_pv_entry(void); static void i386_protection_init(void); static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); static int pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va); static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex); static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); -static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#ifdef PAE +static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#endif static pd_entry_t pdir4mb; CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); /* * Move the kernel virtual free pointer to the next * 4MB. This is used to help improve performance * by using a large (4MB) page for much of the kernel * (.text, .data, .bss) */ static vm_offset_t pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; #ifdef I686_CPU_not /* Problem seems to have gone away */ /* Deal with un-resolved Pentium4 issues */ if (cpu_class == CPUCLASS_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 && (cpu_id & 0xf00) == 0xf00) return newaddr; #endif #ifndef DISABLE_PSE if (cpu_feature & CPUID_PSE) newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); #endif return newaddr; } /* * Bootstrap the system enough to run with virtual memory. * * On the i386 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ void pmap_bootstrap(firstaddr, loadaddr) vm_paddr_t firstaddr; vm_paddr_t loadaddr; { vm_offset_t va; pt_entry_t *pte; int i; avail_start = firstaddr; /* * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too * large. It should instead be correctly calculated in locore.s and * not based on 'first' (which is a physical address, not a virtual * address, for the start of unused physical memory). The kernel * page tables are NOT double mapped and thus should not be included * in this calculation. */ virtual_avail = (vm_offset_t) KERNBASE + firstaddr; virtual_avail = pmap_kmem_choose(virtual_avail); virtual_end = VM_MAX_KERNEL_ADDRESS; /* * Initialize protection array. */ i386_protection_init(); /* * Initialize the kernel pmap (which is statically allocated). */ kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); +#ifdef PAE + kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); +#endif kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvlist); LIST_INIT(&allpmaps); mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); nkpt = NKPT; /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. * CMAP3 is used for the idle process page zeroing. */ SYSMAP(caddr_t, CMAP1, CADDR1, 1) SYSMAP(caddr_t, CMAP2, CADDR2, 1) SYSMAP(caddr_t, CMAP3, CADDR3, 1) mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF); /* * Crashdump maps. */ SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); /* * ptvmmap is used for reading arbitrary physical pages via /dev/mem. * XXX ptmmap is not used. */ SYSMAP(caddr_t, ptmmap, ptvmmap, 1) /* * msgbufp is used to map the system message buffer. * XXX msgbufmap is not used. */ SYSMAP(struct msgbuf *, msgbufmap, msgbufp, atop(round_page(MSGBUF_SIZE))) /* * ptemap is used for pmap_pte_quick */ SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); virtual_avail = va; *CMAP1 = *CMAP2 = 0; for (i = 0; i < NKPT; i++) PTD[i] = 0; pgeflag = 0; #ifndef DISABLE_PG_G if (cpu_feature & CPUID_PGE) pgeflag = PG_G; #endif #ifdef I686_CPU_not /* Problem seems to have gone away */ /* Deal with un-resolved Pentium4 issues */ if (cpu_class == CPUCLASS_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 && (cpu_id & 0xf00) == 0xf00) { printf("Warning: Pentium 4 cpu: PG_G disabled (global flag)\n"); pgeflag = 0; } #endif /* * Initialize the 4MB page size flag */ pseflag = 0; /* * The 4MB page version of the initial * kernel page mapping. */ pdir4mb = 0; #ifndef DISABLE_PSE if (cpu_feature & CPUID_PSE) pseflag = PG_PS; #endif #ifdef I686_CPU_not /* Problem seems to have gone away */ /* Deal with un-resolved Pentium4 issues */ if (cpu_class == CPUCLASS_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 && (cpu_id & 0xf00) == 0xf00) { printf("Warning: Pentium 4 cpu: PG_PS disabled (4MB pages)\n"); pseflag = 0; } #endif #ifndef DISABLE_PSE if (pseflag) { pd_entry_t ptditmp; /* * Note that we have enabled PSE mode */ ptditmp = *(PTmap + i386_btop(KERNBASE)); ptditmp &= ~(NBPDR - 1); ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; pdir4mb = ptditmp; } #endif #ifndef SMP /* * Turn on PGE/PSE. SMP does this later on since the * 4K page tables are required for AP boot (for now). * XXX fixme. */ pmap_set_opt(); #endif #ifdef SMP if (cpu_apic_address == 0) panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); /* local apic is mapped on last page */ SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | (cpu_apic_address & PG_FRAME)); #endif invltlb(); } /* * Enable 4MB page mode for MP startup. Turn on PG_G support. * BSP will run this after all the AP's have started up. */ void pmap_set_opt(void) { pt_entry_t *pte; vm_offset_t va, endva; if (pgeflag && (cpu_feature & CPUID_PGE)) { load_cr4(rcr4() | CR4_PGE); invltlb(); /* Insurance */ } #ifndef DISABLE_PSE if (pseflag && (cpu_feature & CPUID_PSE)) { load_cr4(rcr4() | CR4_PSE); invltlb(); /* Insurance */ } #endif if (PCPU_GET(cpuid) == 0) { #ifndef DISABLE_PSE if (pdir4mb) { kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb; invltlb(); /* Insurance */ } #endif if (pgeflag) { /* Turn on PG_G for text, data, bss pages. */ va = (vm_offset_t)btext; #ifndef DISABLE_PSE if (pseflag && (cpu_feature & CPUID_PSE)) { if (va < KERNBASE + (1 << PDRSHIFT)) va = KERNBASE + (1 << PDRSHIFT); } #endif endva = KERNBASE + KERNend; while (va < endva) { pte = vtopte(va); if (*pte) *pte |= pgeflag; va += PAGE_SIZE; } invltlb(); /* Insurance */ } /* * We do not need to broadcast the invltlb here, because * each AP does it the moment it is released from the boot * lock. See ap_init(). */ } } static void * -pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { *flags = UMA_SLAB_PRIV; return (void *)kmem_alloc(kernel_map, bytes); } +#ifdef PAE +static void * +pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0)); +} +#endif + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. * pmap_init has been enhanced to support in a fairly consistant * way, discontiguous physical memory. */ void pmap_init(phys_start, phys_end) vm_paddr_t phys_start, phys_end; { int i; int initial_pvs; /* * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ for(i = 0; i < vm_page_array_size; i++) { vm_page_t m; m = &vm_page_array[i]; TAILQ_INIT(&m->md.pv_list); m->md.pv_list_count = 0; } /* * init the pv free list */ initial_pvs = vm_page_array_size; if (initial_pvs < MINPV) initial_pvs = MINPV; pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); - uma_zone_set_allocf(pvzone, pmap_allocf); + uma_zone_set_allocf(pvzone, pmap_pv_allocf); uma_prealloc(pvzone, initial_pvs); +#ifdef PAE + pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, + NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0); + uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); +#endif + /* * Now it is safe to enable pv_table recording. */ pmap_initialized = TRUE; } /* * Initialize the address space (zone) for the pv_entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ void pmap_init2() { int shpgperproc = PMAP_SHPGPERPROC; TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_page_array_size; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_high_water = 9 * (pv_entry_max / 10); uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); } /*************************************************** * Low level helper routines..... ***************************************************/ #if defined(PMAP_DIAGNOSTIC) /* * This code checks for non-writeable/modified pages. * This should be an invalid condition. */ static int pmap_nw_modified(pt_entry_t ptea) { int pte; pte = (int) ptea; if ((pte & (PG_M|PG_RW)) == PG_M) return 1; else return 0; } #endif /* * this routine defines the region(s) of memory that should * not be tested for the modified bit. */ static PMAP_INLINE int pmap_track_modified(vm_offset_t va) { if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) return 1; else return 0; } #ifdef I386_CPU /* * i386 only has "invalidate everything" and no SMP to worry about. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } #else /* !I386_CPU */ #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. */ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { u_int cpumask; u_int other_cpus; critical_enter(); /* * We need to disable interrupt preemption but MUST NOT have * interrupts disabled here. * XXX we may need to hold schedlock to get a coherent pm_active */ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invlpg(va); if (pmap->pm_active & other_cpus) smp_masked_invlpg(pmap->pm_active & other_cpus, va); } critical_exit(); } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { u_int cpumask; u_int other_cpus; vm_offset_t addr; critical_enter(); /* * We need to disable interrupt preemption but MUST NOT have * interrupts disabled here. * XXX we may need to hold schedlock to get a coherent pm_active */ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); if (pmap->pm_active & other_cpus) smp_masked_invlpg_range(pmap->pm_active & other_cpus, sva, eva); } critical_exit(); } void pmap_invalidate_all(pmap_t pmap) { u_int cpumask; u_int other_cpus; #ifdef SWTCH_OPTIM_STATS tlb_flush_count++; #endif critical_enter(); /* * We need to disable interrupt preemption but MUST NOT have * interrupts disabled here. * XXX we may need to hold schedlock to get a coherent pm_active */ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invltlb(); if (pmap->pm_active & other_cpus) smp_masked_invltlb(pmap->pm_active & other_cpus); } critical_exit(); } #else /* !SMP */ /* * Normal, non-SMP, 486+ invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || pmap->pm_active) invlpg(va); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (pmap == kernel_pmap || pmap->pm_active) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } #endif /* !SMP */ #endif /* !I386_CPU */ /* * Are we current address space or kernel? */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)); } /* * Super fast pmap_pte routine best used when scanning * the pv lists. This eliminates many coarse-grained * invltlb calls. Note that many of the pv list * scans are across different pmaps. It is very wasteful * to do an entire invltlb for checking a single mapping. */ pt_entry_t * pmap_pte_quick(pmap, va) register pmap_t pmap; vm_offset_t va; { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return vtopte(va); newpf = *pde & PG_FRAME; if (((*PMAP1) & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V; pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1); } return PADDR1 + (i386_btop(va) & (NPTEPG - 1)); } return (0); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap, va) register pmap_t pmap; vm_offset_t va; { vm_paddr_t rtval; pt_entry_t *pte; pd_entry_t pde; if (pmap == 0) return 0; pde = pmap->pm_pdir[va >> PDRSHIFT]; if (pde != 0) { if ((pde & PG_PS) != 0) { rtval = (pde & ~PDRMASK) | (va & PDRMASK); return rtval; } pte = pmap_pte_quick(pmap, va); rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); return rtval; } return 0; } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; pte = vtopte(va); *pte = pa | PG_RW | PG_V | pgeflag; } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); *pte = 0; } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { vm_offset_t va, sva; va = sva = *virt; while (start < end) { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); *virt = va; return (sva); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); va += PAGE_SIZE; m++; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; retry: m = vm_page_lookup(object, pindex); if (m != NULL) { vm_page_lock_queues(); if (vm_page_sleep_if_busy(m, FALSE, "pplookp")) goto retry; vm_page_unlock_queues(); } return m; } #ifndef KSTACK_MAX_PAGES #define KSTACK_MAX_PAGES 32 #endif /* * Create the kernel stack (including pcb for i386) for a new thread. * This routine directly affects the fork perf for a process and * create performance for a thread. */ void pmap_new_thread(struct thread *td, int pages) { int i; vm_page_t ma[KSTACK_MAX_PAGES]; vm_object_t ksobj; vm_page_t m; vm_offset_t ks; /* Bounds check */ if (pages <= 1) pages = KSTACK_PAGES; else if (pages > KSTACK_MAX_PAGES) pages = KSTACK_MAX_PAGES; /* * allocate object for the kstack */ ksobj = vm_object_allocate(OBJT_DEFAULT, pages); td->td_kstack_obj = ksobj; /* get a kernel virtual address for the kstack for this thread */ #ifdef KSTACK_GUARD ks = kmem_alloc_nofault(kernel_map, (pages + 1) * PAGE_SIZE); if (ks == 0) panic("pmap_new_thread: kstack allocation failed"); if (*vtopte(ks) != 0) pmap_qremove(ks, 1); ks += PAGE_SIZE; td->td_kstack = ks; #else /* get a kernel virtual address for the kstack for this thread */ ks = kmem_alloc_nofault(kernel_map, pages * PAGE_SIZE); if (ks == 0) panic("pmap_new_thread: kstack allocation failed"); td->td_kstack = ks; #endif /* * Knowing the number of pages allocated is useful when you * want to deallocate them. */ td->td_kstack_pages = pages; /* * For the length of the stack, link in a real page of ram for each * page of stack. */ for (i = 0; i < pages; i++) { /* * Get a kernel stack page */ m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED); ma[i] = m; vm_page_lock_queues(); vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); m->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); } pmap_qenter(ks, ma, pages); } /* * Dispose the kernel stack for a thread that has exited. * This routine directly impacts the exit perf of a process and thread. */ void pmap_dispose_thread(td) struct thread *td; { int i; int pages; vm_object_t ksobj; vm_offset_t ks; vm_page_t m; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; ks = td->td_kstack; pmap_qremove(ks, pages); for (i = 0; i < pages; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_dispose_thread: kstack already missing?"); vm_page_lock_queues(); vm_page_busy(m); vm_page_unwire(m, 0); vm_page_free(m); vm_page_unlock_queues(); } /* * Free the space that this stack was mapped to in the kernel * address map. */ #ifdef KSTACK_GUARD kmem_free(kernel_map, ks - PAGE_SIZE, (pages + 1) * PAGE_SIZE); #else kmem_free(kernel_map, ks, pages * PAGE_SIZE); #endif vm_object_deallocate(ksobj); } /* * Set up a variable sized alternate kstack. Though it may look MI, it may * need to be different on certain arches like ia64. */ void pmap_new_altkstack(struct thread *td, int pages) { /* shuffle the original stack */ td->td_altkstack_obj = td->td_kstack_obj; td->td_altkstack = td->td_kstack; td->td_altkstack_pages = td->td_kstack_pages; pmap_new_thread(td, pages); } void pmap_dispose_altkstack(td) struct thread *td; { pmap_dispose_thread(td); /* restore the original kstack */ td->td_kstack = td->td_altkstack; td->td_kstack_obj = td->td_altkstack_obj; td->td_kstack_pages = td->td_altkstack_pages; td->td_altkstack = 0; td->td_altkstack_obj = NULL; td->td_altkstack_pages = 0; } /* * Allow the Kernel stack for a thread to be prejudicially paged out. */ void pmap_swapout_thread(td) struct thread *td; { int i; int pages; vm_object_t ksobj; vm_offset_t ks; vm_page_t m; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; ks = td->td_kstack; pmap_qremove(ks, pages); for (i = 0; i < pages; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_swapout_thread: kstack already missing?"); vm_page_lock_queues(); vm_page_dirty(m); vm_page_unwire(m, 0); vm_page_unlock_queues(); } } /* * Bring the kernel stack for a specified thread back in. */ void pmap_swapin_thread(td) struct thread *td; { int i, rv; int pages; vm_page_t ma[KSTACK_MAX_PAGES]; vm_object_t ksobj; vm_offset_t ks; vm_page_t m; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; ks = td->td_kstack; for (i = 0; i < pages; i++) { m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { rv = vm_pager_get_pages(ksobj, &m, 1, 0); if (rv != VM_PAGER_OK) panic("pmap_swapin_thread: cannot get kstack for proc: %d\n", td->td_proc->p_pid); m = vm_page_lookup(ksobj, i); m->valid = VM_PAGE_BITS_ALL; } ma[i] = m; vm_page_lock_queues(); vm_page_wire(m); vm_page_wakeup(m); vm_page_unlock_queues(); } pmap_qenter(ks, ma, pages); } /*************************************************** * Page table page management routines..... ***************************************************/ /* * This routine unholds page table pages, and if the hold count * drops to zero, then it decrements the wire count. */ static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt")) vm_page_lock_queues(); if (m->hold_count == 0) { vm_offset_t pteva; /* * unmap the page table page */ pmap->pm_pdir[m->pindex] = 0; --pmap->pm_stats.resident_count; if (pmap_is_current(pmap)) { /* * Do an invltlb to make the invalidated mapping * take effect immediately. */ pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); pmap_invalidate_page(pmap, pteva); } /* * If the page is finally unwired, simply free it. */ --m->wire_count; if (m->wire_count == 0) { vm_page_busy(m); vm_page_free_zero(m); atomic_subtract_int(&cnt.v_wire_count, 1); } return 1; } return 0; } static PMAP_INLINE int pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { vm_page_unhold(m); if (m->hold_count == 0) return _pmap_unwire_pte_hold(pmap, m); else return 0; } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) { unsigned ptepindex; if (va >= VM_MAXUSER_ADDRESS) return 0; if (mpte == NULL) { ptepindex = (va >> PDRSHIFT); if (pmap->pm_pteobj->root && (pmap->pm_pteobj->root->pindex == ptepindex)) { mpte = pmap->pm_pteobj->root; } else { while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL && vm_page_sleep_if_busy(mpte, FALSE, "pulook")) vm_page_lock_queues(); } } return pmap_unwire_pte_hold(pmap, mpte); } void pmap_pinit0(pmap) struct pmap *pmap; { pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); +#ifdef PAE + pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); +#endif pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ void pmap_pinit(pmap) register struct pmap *pmap; { vm_page_t ptdpg[NPGPTD]; vm_paddr_t pa; int i; /* * No need to allocate page table space yet but we do need a valid * page directory table. */ - if (pmap->pm_pdir == NULL) + if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, NBPTD); +#ifdef PAE + pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); + KASSERT(((vm_offset_t)pmap->pm_pdpt & + ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, + ("pmap_pinit: pdpt misaligned")); + KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), + ("pmap_pinit: pdpt above 4g")); +#endif + } /* * allocate object for the ptes */ if (pmap->pm_pteobj == NULL) pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + NPGPTD); /* * allocate the page directory page(s) */ for (i = 0; i < NPGPTD; i++) { ptdpg[i] = vm_page_grab(pmap->pm_pteobj, PTDPTDI + i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); vm_page_lock_queues(); vm_page_flag_clear(ptdpg[i], PG_BUSY); ptdpg[i]->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); } pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); for (i = 0; i < NPGPTD; i++) { if ((ptdpg[i]->flags & PG_ZERO) == 0) bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); } mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* Wire in kernel global address entries. */ /* XXX copies current process, does not fill in MPPTDI */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); #ifdef SMP pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; #endif /* install self-referential address mapping entry(s) */ for (i = 0; i < NPGPTD; i++) { pa = VM_PAGE_TO_PHYS(ptdpg[i]); pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; +#ifdef PAE + pmap->pm_pdpt[i] = pa | PG_V; +#endif } pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } /* * Wire in kernel global address entries. To avoid a race condition * between pmap initialization and pmap_growkernel, this procedure * should be called after the vmspace is attached to the process * but before this pmap is activated. */ void pmap_pinit2(pmap) struct pmap *pmap; { /* XXX: Remove this stub when no longer called */ } /* * this routine is called if the page table page is not * mapped correctly. */ static vm_page_t _pmap_allocpte(pmap, ptepindex) pmap_t pmap; unsigned ptepindex; { vm_paddr_t ptepa; vm_offset_t pteva; vm_page_t m; /* * Find or fabricate a new pagetable page */ m = vm_page_grab(pmap->pm_pteobj, ptepindex, VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY); KASSERT(m->queue == PQ_NONE, ("_pmap_allocpte: %p->queue != PQ_NONE", m)); /* * Increment the hold count for the page table page * (denoting a new mapping.) */ m->hold_count++; /* * Map the pagetable page into the process address space, if * it isn't already there. */ pmap->pm_stats.resident_count++; ptepa = VM_PAGE_TO_PHYS(m); pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); /* * Try to use the new mapping, but if we cannot, then * do it with the routine that maps the page explicitly. */ if ((m->flags & PG_ZERO) == 0) { if (pmap_is_current(pmap)) { pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex); bzero((caddr_t) pteva, PAGE_SIZE); } else { pmap_zero_page(m); } } vm_page_lock_queues(); m->valid = VM_PAGE_BITS_ALL; vm_page_flag_clear(m, PG_ZERO); vm_page_wakeup(m); vm_page_unlock_queues(); return m; } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va) { unsigned ptepindex; pd_entry_t ptepa; vm_page_t m; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; /* * Get the page directory entry */ ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; /* * This supports switching from a 4MB page to a * normal 4K page. */ if (ptepa & PG_PS) { pmap->pm_pdir[ptepindex] = 0; ptepa = 0; pmap_invalidate_all(kernel_pmap); } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (ptepa) { /* * In order to get the page table page, try the * hint first. */ if (pmap->pm_pteobj->root && (pmap->pm_pteobj->root->pindex == ptepindex)) { m = pmap->pm_pteobj->root; } else { m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); } m->hold_count++; return m; } /* * Here if the pte page isn't mapped, or if it has been deallocated. */ return _pmap_allocpte(pmap, ptepindex); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_object_t object; vm_page_t m; int i; object = pmap->pm_pteobj; KASSERT(object->ref_count == 1, ("pmap_release: pteobj reference count %d != 1", object->ref_count)); KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); bzero(pmap->pm_pdir + KPTDI, nkpt * sizeof(*pmap->pm_pdir)); for (i = 0; i < NPGPTD; i++) { pmap->pm_pdir[PTDPTDI + i] = 0; pmap->pm_pdir[APTDPTDI + i] = 0; } #ifdef SMP pmap->pm_pdir[MPPTDI] = 0; #endif pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); vm_page_lock_queues(); for (i = 0; i < NPGPTD; i++) { m = TAILQ_FIRST(&object->memq); +#ifdef PAE + KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), + ("pmap_release: got wrong ptd page")); +#endif m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_busy(m); vm_page_free_zero(m); } KASSERT(TAILQ_EMPTY(&object->memq), ("pmap_release: leaking page table pages")); vm_page_unlock_queues(); } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { struct pmap *pmap; int s; vm_paddr_t ptppaddr; vm_page_t nkpg; pd_entry_t newpdir; s = splhigh(); mtx_assert(&kernel_map->system_mtx, MA_OWNED); if (kernel_vm_end == 0) { kernel_vm_end = KERNBASE; nkpt = 0; while (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); nkpt++; } } addr = roundup2(addr, PAGE_SIZE * NPTEPG); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); continue; } /* * This index is bogus, but out of the way */ nkpg = vm_page_alloc(NULL, nkpt, VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); if (!nkpg) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; pmap_zero_page(nkpg); ptppaddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); pdir_pde(PTD, kernel_vm_end) = newpdir; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { *pmap_pde(pmap, kernel_vm_end) = newpdir; } mtx_unlock_spin(&allpmaps_lock); kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); } splx(s); } /*************************************************** * page management routines. ***************************************************/ /* * free the pv_entry back to the free list */ static PMAP_INLINE void free_pv_entry(pv_entry_t pv) { pv_entry_count--; uma_zfree(pvzone, pv); } /* * get a new pv_entry, allocating a block from the system * when needed. * the memory allocation is performed bypassing the malloc code * because of the possibility of allocations at interrupt time. */ static pv_entry_t get_pv_entry(void) { pv_entry_count++; if (pv_entry_high_water && (pv_entry_count > pv_entry_high_water) && (pmap_pagedaemon_waken == 0)) { pmap_pagedaemon_waken = 1; wakeup (&vm_pages_needed); } return uma_zalloc(pvzone, M_NOWAIT); } /* * If it is the first entry on the list, it is actually * in the header and we must copy the following entry up * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ static int pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { pv_entry_t pv; int rtval; int s; s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (m->md.pv_list_count < pmap->pm_stats.resident_count) { TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (pmap == pv->pv_pmap && va == pv->pv_va) break; } } else { TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { if (va == pv->pv_va) break; } } rtval = 0; if (pv) { rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; if (TAILQ_FIRST(&m->md.pv_list) == NULL) vm_page_flag_clear(m, PG_WRITEABLE); TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); free_pv_entry(pv); } splx(s); return rtval; } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) { int s; pv_entry_t pv; s = splvm(); pv = get_pv_entry(); pv->pv_va = va; pv->pv_pmap = pmap; pv->pv_ptem = mpte; TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); m->md.pv_list_count++; splx(s); } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) { pt_entry_t oldpte; vm_page_t m; - oldpte = atomic_readandclear_int(ptq); + oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpte & PG_G) pmap_invalidate_page(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte); if (oldpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) oldpte)) { printf( "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", va, oldpte); } #endif if (pmap_track_modified(va)) vm_page_dirty(m); } if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); return pmap_remove_entry(pmap, m, va); } else { return pmap_unuse_pt(pmap, va, NULL); } return 0; } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va) { pt_entry_t *pte; if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) return; pmap_remove_pte(pmap, pte, va); pmap_invalidate_page(pmap, va); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; int anyvalid; if (pmap == NULL) return; if (pmap->pm_stats.resident_count == 0) return; /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if ((sva + PAGE_SIZE == eva) && ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { pmap_remove_page(pmap, sva); return; } anyvalid = 0; for (; sva < eva; sva = pdnxt) { unsigned pdirindex; /* * Calculate index for next page table. */ pdnxt = (sva + NBPDR) & ~PDRMASK; if (pmap->pm_stats.resident_count == 0) break; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { pmap->pm_pdir[pdirindex] = 0; pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; anyvalid = 1; continue; } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (pdnxt > eva) pdnxt = eva; for (; sva != pdnxt; sva += PAGE_SIZE) { if ((pte = pmap_pte_quick(pmap, sva)) == NULL || *pte == 0) continue; anyvalid = 1; if (pmap_remove_pte(pmap, pte, sva)) break; } } if (anyvalid) pmap_invalidate_all(pmap); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { register pv_entry_t pv; pt_entry_t *pte, tpte; int s; #if defined(PMAP_DIAGNOSTIC) /* * XXX This makes pmap_remove_all() illegal for non-managed pages! */ if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m)); } #endif mtx_assert(&vm_page_queue_mtx, MA_OWNED); s = splvm(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pv->pv_pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - tpte = atomic_readandclear_int(pte); + tpte = pte_load_clear(pte); if (tpte & PG_W) pv->pv_pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if (tpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) tpte)) { printf( "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", pv->pv_va, tpte); } #endif if (pmap_track_modified(pv->pv_va)) vm_page_dirty(m); } pmap_invalidate_page(pv->pv_pmap, pv->pv_va); TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); free_pv_entry(pv); } vm_page_flag_clear(m, PG_WRITEABLE); splx(s); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t pdnxt; pd_entry_t ptpaddr; int anychanged; if (pmap == NULL) return; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if (prot & VM_PROT_WRITE) return; anychanged = 0; for (; sva < eva; sva = pdnxt) { unsigned pdirindex; pdnxt = (sva + NBPDR) & ~PDRMASK; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; anychanged = 1; continue; } if (pdnxt > eva) pdnxt = eva; for (; sva != pdnxt; sva += PAGE_SIZE) { pt_entry_t pbits; pt_entry_t *pte; vm_page_t m; if ((pte = pmap_pte_quick(pmap, sva)) == NULL) continue; pbits = *pte; if (pbits & PG_MANAGED) { m = NULL; if (pbits & PG_A) { m = PHYS_TO_VM_PAGE(pbits); vm_page_flag_set(m, PG_REFERENCED); pbits &= ~PG_A; } if ((pbits & PG_M) != 0 && pmap_track_modified(sva)) { if (m == NULL) m = PHYS_TO_VM_PAGE(pbits); vm_page_dirty(m); pbits &= ~PG_M; } } pbits &= ~PG_RW; if (pbits != *pte) { *pte = pbits; anychanged = 1; } } } if (anychanged) pmap_invalidate_all(pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ void pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, boolean_t wired) { vm_paddr_t pa; register pt_entry_t *pte; vm_paddr_t opa; pt_entry_t origpte, newpte; vm_page_t mpte; if (pmap == NULL) return; va &= PG_FRAME; #ifdef PMAP_DIAGNOSTIC if (va > VM_MAX_KERNEL_ADDRESS) panic("pmap_enter: toobig"); if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); #endif mpte = NULL; /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { mpte = pmap_allocpte(pmap, va); } #if 0 && defined(PMAP_DIAGNOSTIC) else { pd_entry_t *pdeaddr = pmap_pde(pmap, va); origpte = *pdeaddr; if ((origpte & PG_V) == 0) { panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", pmap->pm_pdir[PTDPTDI], origpte, va); } } #endif pte = pmap_pte_quick(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ if (pte == NULL) { panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n", (uintmax_t)pmap->pm_pdir[PTDPTDI], va); } pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; origpte = *pte; opa = origpte & PG_FRAME; if (origpte & PG_PS) panic("pmap_enter: attempted pmap_enter on 4MB page"); /* * Mapping has not changed, must be protection or wiring change. */ if (origpte && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if (wired && ((origpte & PG_W) == 0)) pmap->pm_stats.wired_count++; else if (!wired && (origpte & PG_W)) pmap->pm_stats.wired_count--; #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) origpte)) { printf( "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", va, origpte); } #endif /* * Remove extra pte reference */ if (mpte) mpte->hold_count--; if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { if ((origpte & PG_RW) == 0) { *pte |= PG_RW; pmap_invalidate_page(pmap, va); } return; } /* * We might be turning off write access to the page, * so we go ahead and sense modify status. */ if (origpte & PG_MANAGED) { if ((origpte & PG_M) && pmap_track_modified(va)) { vm_page_t om; om = PHYS_TO_VM_PAGE(opa); vm_page_dirty(om); } pa |= PG_MANAGED; } goto validate; } /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { int err; vm_page_lock_queues(); err = pmap_remove_pte(pmap, pte, va); vm_page_unlock_queues(); if (err) panic("pmap_enter: pte vanished, va: 0x%x", va); } /* * Enter on the PV list if part of our managed memory. Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ if (pmap_initialized && (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { pmap_insert_entry(pmap, va, mpte, m); pa |= PG_MANAGED; } /* * Increment counters */ pmap->pm_stats.resident_count++; if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. */ newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V); if (wired) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) newpte |= pgeflag; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { *pte = newpte | PG_A; /*if (origpte)*/ { pmap_invalidate_page(pmap, va); } } } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * 5. Tlbflush is deferred to calling procedure. * 6. Page IS managed. * but is *MUCH* faster than pmap_enter... */ static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) { pt_entry_t *pte; vm_paddr_t pa; /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { unsigned ptepindex; pd_entry_t ptepa; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; if (mpte && (mpte->pindex == ptepindex)) { mpte->hold_count++; } else { retry: /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * If the page table page is mapped, we just increment * the hold count, and activate it. */ if (ptepa) { if (ptepa & PG_PS) panic("pmap_enter_quick: unexpected mapping into 4MB page"); if (pmap->pm_pteobj->root && (pmap->pm_pteobj->root->pindex == ptepindex)) { mpte = pmap->pm_pteobj->root; } else { mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); } if (mpte == NULL) goto retry; mpte->hold_count++; } else { mpte = _pmap_allocpte(pmap, ptepindex); } } } else { mpte = NULL; } /* * This call to vtopte makes the assumption that we are * entering the page into the current pmap. In order to support * quick entry into any pmap, one would likely use pmap_pte_quick. * But that isn't as quick as vtopte. */ pte = vtopte(va); if (*pte) { if (mpte != NULL) { vm_page_lock_queues(); pmap_unwire_pte_hold(pmap, mpte); vm_page_unlock_queues(); } return 0; } /* * Enter on the PV list if part of our managed memory. Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) pmap_insert_entry(pmap, va, mpte, m); /* * Increment counters */ pmap->pm_stats.resident_count++; pa = VM_PAGE_TO_PHYS(m); /* * Now validate mapping with RO protection */ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) *pte = pa | PG_V | PG_U; else *pte = pa | PG_V | PG_U | PG_MANAGED; return mpte; } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_offset_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); #ifndef I386_CPU invlpg(va); #else invltlb(); #endif return ((void *)crashdumpmap); } #define MAX_INIT_PT (96) /* * pmap_object_init_pt preloads the ptes for a given object * into the specified pmap. This eliminates the blast of soft * faults on process startup and immediately after an mmap. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size, int limit) { vm_offset_t tmpidx; int psize; vm_page_t p, mpte; if (pmap == NULL || object == NULL) return; /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ if (pseflag && (object->type == OBJT_DEVICE) && ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { int i; vm_page_t m[1]; unsigned int ptepindex; int npdes; pd_entry_t ptepa; if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) return; retry: p = vm_page_lookup(object, pindex); if (p != NULL) { vm_page_lock_queues(); if (vm_page_sleep_if_busy(p, FALSE, "init4p")) goto retry; } else { p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); if (p == NULL) return; m[0] = p; if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { vm_page_lock_queues(); vm_page_free(p); vm_page_unlock_queues(); return; } p = vm_page_lookup(object, pindex); vm_page_lock_queues(); vm_page_wakeup(p); } vm_page_unlock_queues(); ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) { return; } p->valid = VM_PAGE_BITS_ALL; pmap->pm_stats.resident_count += size >> PAGE_SHIFT; npdes = size >> PDRSHIFT; for(i = 0; i < npdes; i++) { pmap->pm_pdir[ptepindex] = ptepa | PG_U | PG_RW | PG_V | PG_PS; ptepa += NBPDR; ptepindex += 1; } pmap_invalidate_all(kernel_pmap); return; } psize = i386_btop(size); if ((object->type != OBJT_VNODE) || ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && (object->resident_page_count > MAX_INIT_PT))) { return; } if (psize + pindex > object->size) { if (object->size < pindex) return; psize = object->size - pindex; } mpte = NULL; if ((p = TAILQ_FIRST(&object->memq)) != NULL) { if (p->pindex < pindex) { p = vm_page_splay(pindex, object->root); if ((object->root = p)->pindex < pindex) p = TAILQ_NEXT(p, listq); } } /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex * or (2) NULL. */ for (; p != NULL && (tmpidx = p->pindex - pindex) < psize; p = TAILQ_NEXT(p, listq)) { /* * don't allow an madvise to blow away our really * free pages allocating pv entries. */ if ((limit & MAP_PREFAULT_MADVISE) && cnt.v_free_count < cnt.v_free_reserved) { break; } vm_page_lock_queues(); if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL && (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { if ((p->queue - p->pc) == PQ_CACHE) vm_page_deactivate(p); vm_page_busy(p); vm_page_unlock_queues(); mpte = pmap_enter_quick(pmap, addr + i386_ptob(tmpidx), p, mpte); vm_page_lock_queues(); vm_page_wakeup(p); } vm_page_unlock_queues(); } return; } /* * pmap_prefault provides a quick way of clustering * pagefaults into a processes address space. It is a "cousin" * of pmap_object_init_pt, except it runs at page fault time instead * of mmap time. */ #define PFBAK 4 #define PFFOR 4 #define PAGEORDER_SIZE (PFBAK+PFFOR) static int pmap_prefault_pageorder[] = { -1 * PAGE_SIZE, 1 * PAGE_SIZE, -2 * PAGE_SIZE, 2 * PAGE_SIZE, -3 * PAGE_SIZE, 3 * PAGE_SIZE, -4 * PAGE_SIZE, 4 * PAGE_SIZE }; void pmap_prefault(pmap, addra, entry) pmap_t pmap; vm_offset_t addra; vm_map_entry_t entry; { int i; vm_offset_t starta; vm_offset_t addr; vm_pindex_t pindex; vm_page_t m, mpte; vm_object_t object; if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) return; object = entry->object.vm_object; starta = addra - PFBAK * PAGE_SIZE; if (starta < entry->start) { starta = entry->start; } else if (starta > addra) { starta = 0; } mpte = NULL; for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t lobject; pt_entry_t *pte; addr = addra + pmap_prefault_pageorder[i]; if (addr > addra + (PFFOR * PAGE_SIZE)) addr = 0; if (addr < starta || addr >= entry->end) continue; if ((*pmap_pde(pmap, addr)) == 0) continue; pte = vtopte(addr); if (*pte) continue; pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; lobject = object; for (m = vm_page_lookup(lobject, pindex); (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); lobject = lobject->backing_object) { if (lobject->backing_object_offset & PAGE_MASK) break; pindex += (lobject->backing_object_offset >> PAGE_SHIFT); m = vm_page_lookup(lobject->backing_object, pindex); } /* * give-up when a page is not in memory */ if (m == NULL) break; vm_page_lock_queues(); if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (m->busy == 0) && (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { if ((m->queue - m->pc) == PQ_CACHE) { vm_page_deactivate(m); } vm_page_busy(m); vm_page_unlock_queues(); mpte = pmap_enter_quick(pmap, addr, m, mpte); vm_page_lock_queues(); vm_page_wakeup(m); } vm_page_unlock_queues(); } } /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. * In/out conditions: * The mapping must already exist in the pmap. */ void pmap_change_wiring(pmap, va, wired) register pmap_t pmap; vm_offset_t va; boolean_t wired; { register pt_entry_t *pte; if (pmap == NULL) return; pte = pmap_pte_quick(pmap, va); if (wired && !pmap_pte_w(pte)) pmap->pm_stats.wired_count++; else if (!wired && pmap_pte_w(pte)) pmap->pm_stats.wired_count--; /* * Wiring is not a hardware characteristic so there is no need to * invalidate TLB. */ pmap_pte_set_w(pte, wired); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t pdnxt; vm_page_t m; if (dst_addr != src_addr) return; if (!pmap_is_current(src_pmap)) return; for (addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; unsigned ptepindex; if (addr >= UPT_MIN_ADDRESS) panic("pmap_copy: invalid to pmap_copy page tables\n"); /* * Don't let optional prefaulting of pages make us go * way below the low water mark of free pages or way * above high water mark of used pv entries. */ if (cnt.v_free_count < cnt.v_free_reserved || pv_entry_count > pv_entry_high_water) break; pdnxt = (addr + NBPDR) & ~PDRMASK; ptepindex = addr >> PDRSHIFT; srcptepaddr = src_pmap->pm_pdir[ptepindex]; if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { if (dst_pmap->pm_pdir[ptepindex] == 0) { dst_pmap->pm_pdir[ptepindex] = srcptepaddr; dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; } continue; } srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); if ((srcmpte == NULL) || (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) continue; if (pdnxt > end_addr) pdnxt = end_addr; src_pte = vtopte(addr); while (addr < pdnxt) { pt_entry_t ptetemp; ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { /* * We have to check after allocpte for the * pte still being around... allocpte can * block. */ dstmpte = pmap_allocpte(dst_pmap, addr); dst_pte = pmap_pte_quick(dst_pmap, addr); if ((*dst_pte == 0) && (ptetemp = *src_pte)) { /* * Clear the modified and * accessed (referenced) bits * during the copy. */ m = PHYS_TO_VM_PAGE(ptetemp); *dst_pte = ptetemp & ~(PG_M | PG_A); dst_pmap->pm_stats.resident_count++; pmap_insert_entry(dst_pmap, addr, dstmpte, m); } else { vm_page_lock_queues(); pmap_unwire_pte_hold(dst_pmap, dstmpte); vm_page_unlock_queues(); } if (dstmpte->hold_count >= srcmpte->hold_count) break; } addr += PAGE_SIZE; src_pte++; } } } #ifdef SMP /* * pmap_zpi_switchin*() * * These functions allow us to avoid doing IPIs alltogether in certain * temporary page-mapping situations (page zeroing). Instead to deal * with being preempted and moved onto a different cpu we invalidate * the page when the scheduler switches us in. This does not occur * very often so we remain relatively optimal with very little effort. */ static void pmap_zpi_switchin12(void) { invlpg((u_int)CADDR1); invlpg((u_int)CADDR2); } static void pmap_zpi_switchin2(void) { invlpg((u_int)CADDR2); } static void pmap_zpi_switchin3(void) { invlpg((u_int)CADDR3); } #endif /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { mtx_lock(&CMAPCADDR12_lock); if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; #ifdef I386_CPU invltlb(); #else #ifdef SMP curthread->td_switchin = pmap_zpi_switchin2; #endif invlpg((u_int)CADDR2); #endif #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR2); else #endif bzero(CADDR2, PAGE_SIZE); #ifdef SMP curthread->td_switchin = NULL; #endif *CMAP2 = 0; mtx_unlock(&CMAPCADDR12_lock); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { mtx_lock(&CMAPCADDR12_lock); if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; #ifdef I386_CPU invltlb(); #else #ifdef SMP curthread->td_switchin = pmap_zpi_switchin2; #endif invlpg((u_int)CADDR2); #endif #if defined(I686_CPU) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) i686_pagezero(CADDR2); else #endif bzero((char *)CADDR2 + off, size); #ifdef SMP curthread->td_switchin = NULL; #endif *CMAP2 = 0; mtx_unlock(&CMAPCADDR12_lock); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { if (*CMAP3) panic("pmap_zero_page: CMAP3 busy"); *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; #ifdef I386_CPU invltlb(); #else #ifdef SMP curthread->td_switchin = pmap_zpi_switchin3; #endif invlpg((u_int)CADDR3); #endif #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR3); else #endif bzero(CADDR3, PAGE_SIZE); #ifdef SMP curthread->td_switchin = NULL; #endif *CMAP3 = 0; } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t src, vm_page_t dst) { mtx_lock(&CMAPCADDR12_lock); if (*CMAP1) panic("pmap_copy_page: CMAP1 busy"); if (*CMAP2) panic("pmap_copy_page: CMAP2 busy"); *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A; *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M; #ifdef I386_CPU invltlb(); #else #ifdef SMP curthread->td_switchin = pmap_zpi_switchin12; #endif invlpg((u_int)CADDR1); invlpg((u_int)CADDR2); #endif bcopy(CADDR1, CADDR2, PAGE_SIZE); #ifdef SMP curthread->td_switchin = NULL; #endif *CMAP1 = 0; *CMAP2 = 0; mtx_unlock(&CMAPCADDR12_lock); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap, m) pmap_t pmap; vm_page_t m; { pv_entry_t pv; int loops = 0; int s; if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) return FALSE; s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (pv->pv_pmap == pmap) { splx(s); return TRUE; } loops++; if (loops >= 16) break; } splx(s); return (FALSE); } #define PMAP_REMOVE_PAGES_CURPROC_ONLY /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ void pmap_remove_pages(pmap, sva, eva) pmap_t pmap; vm_offset_t sva, eva; { pt_entry_t *pte, tpte; vm_page_t m; pv_entry_t pv, npv; int s; #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } #endif mtx_assert(&vm_page_queue_mtx, MA_OWNED); s = splvm(); for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { if (pv->pv_va >= eva || pv->pv_va < sva) { npv = TAILQ_NEXT(pv, pv_plist); continue; } #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY pte = vtopte(pv->pv_va); #else pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); #endif tpte = *pte; if (tpte == 0) { printf("TPTE at %p IS ZERO @ VA %08x\n", pte, pv->pv_va); panic("bad pte"); } /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { npv = TAILQ_NEXT(pv, pv_plist); continue; } m = PHYS_TO_VM_PAGE(tpte); KASSERT(m->phys_addr == (tpte & PG_FRAME), ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT(m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); pv->pv_pmap->pm_stats.resident_count--; *pte = 0; /* * Update the vm_page_t clean and reference bits. */ if (tpte & PG_M) { vm_page_dirty(m); } npv = TAILQ_NEXT(pv, pv_plist); TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); m->md.pv_list_count--; TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_FIRST(&m->md.pv_list) == NULL) { vm_page_flag_clear(m, PG_WRITEABLE); } pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); free_pv_entry(pv); } splx(s); pmap_invalidate_all(pmap); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { pv_entry_t pv; pt_entry_t *pte; int s; if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) return FALSE; s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { /* * if the bit being tested is the modified bit, then * mark clean_map and ptes as never * modified. */ if (!pmap_track_modified(pv->pv_va)) continue; #if defined(PMAP_DIAGNOSTIC) if (!pv->pv_pmap) { printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); continue; } #endif pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (*pte & PG_M) { splx(s); return TRUE; } } splx(s); return (FALSE); } /* * this routine is used to modify bits in ptes */ static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem) { register pv_entry_t pv; register pt_entry_t *pte; int s; if (!pmap_initialized || (m->flags & PG_FICTITIOUS) || (!setem && bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) return; s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); /* * Loop over all current mappings setting/clearing as appropos If * setting RO do we need to clear the VAC? */ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { /* * don't write protect pager mappings */ if (!setem && (bit == PG_RW)) { if (!pmap_track_modified(pv->pv_va)) continue; } #if defined(PMAP_DIAGNOSTIC) if (!pv->pv_pmap) { printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); continue; } #endif pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (setem) { *pte |= bit; pmap_invalidate_page(pv->pv_pmap, pv->pv_va); } else { pt_entry_t pbits = *pte; if (pbits & bit) { if (bit == PG_RW) { if (pbits & PG_M) { vm_page_dirty(m); } *pte = pbits & ~(PG_M|PG_RW); } else { *pte = pbits & ~bit; } pmap_invalidate_page(pv->pv_pmap, pv->pv_va); } } } if (!setem && bit == PG_RW) vm_page_flag_clear(m, PG_WRITEABLE); splx(s); } /* * pmap_page_protect: * * Lower the permission for all mappings to a given page. */ void pmap_page_protect(vm_page_t m, vm_prot_t prot) { if ((prot & VM_PROT_WRITE) == 0) { if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { pmap_changebit(m, PG_RW, FALSE); } else { pmap_remove_all(m); } } } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { register pv_entry_t pv, pvf, pvn; pt_entry_t *pte; int s; int rtval = 0; if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) return (rtval); s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { pvn = TAILQ_NEXT(pv, pv_list); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); if (!pmap_track_modified(pv->pv_va)) continue; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (pte && (*pte & PG_A)) { *pte &= ~PG_A; pmap_invalidate_page(pv->pv_pmap, pv->pv_va); rtval++; if (rtval > 4) { break; } } } while ((pv = pvn) != NULL && pv != pvf); } splx(s); return (rtval); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { pmap_changebit(m, PG_M, FALSE); } /* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. */ void pmap_clear_reference(vm_page_t m) { pmap_changebit(m, PG_A, FALSE); } /* * Miscellaneous support routines follow */ static void i386_protection_init() { register int *kp, prot; kp = protection_codes; for (prot = 0; prot < 8; prot++) { switch (prot) { case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: /* * Read access is also 0. There isn't any execute bit, * so just make it readable. */ case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: *kp++ = 0; break; case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: *kp++ = PG_RW; break; } } } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev(pa, size) vm_paddr_t pa; vm_size_t size; { vm_offset_t va, tmpva, offset; offset = pa & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); GIANT_REQUIRED; va = kmem_alloc_pageable(kernel_map, size); if (!va) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); pa = pa & PG_FRAME; for (tmpva = va; size > 0; ) { pmap_kenter(tmpva, pa); size -= PAGE_SIZE; tmpva += PAGE_SIZE; pa += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, va, tmpva); return ((void *)(va + offset)); } void pmap_unmapdev(va, size) vm_offset_t va; vm_size_t size; { vm_offset_t base, offset, tmpva; pt_entry_t *pte; base = va & PG_FRAME; offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { pte = vtopte(tmpva); *pte = 0; } pmap_invalidate_range(kernel_pmap, va, tmpva); kmem_free(kernel_map, base, size); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap, addr) pmap_t pmap; vm_offset_t addr; { pt_entry_t *ptep, pte; vm_page_t m; int val = 0; ptep = pmap_pte_quick(pmap, addr); if (ptep == 0) { return 0; } if ((pte = *ptep) != 0) { vm_paddr_t pa; val = MINCORE_INCORE; if ((pte & PG_MANAGED) == 0) return val; pa = pte & PG_FRAME; m = PHYS_TO_VM_PAGE(pa); /* * Modified by us */ if (pte & PG_M) val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; else { /* * Modified by someone else */ vm_page_lock_queues(); if (m->dirty || pmap_is_modified(m)) val |= MINCORE_MODIFIED_OTHER; vm_page_unlock_queues(); } /* * Referenced by us */ if (pte & PG_A) val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; else { /* * Referenced by someone else */ vm_page_lock_queues(); if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { val |= MINCORE_REFERENCED_OTHER; vm_page_flag_set(m, PG_REFERENCED); } vm_page_unlock_queues(); } } return val; } void pmap_activate(struct thread *td) { struct proc *p = td->td_proc; pmap_t pmap; u_int32_t cr3; pmap = vmspace_pmap(td->td_proc->p_vmspace); #if defined(SMP) pmap->pm_active |= PCPU_GET(cpumask); #else pmap->pm_active |= 1; #endif +#ifdef PAE + cr3 = vtophys(pmap->pm_pdpt); +#else cr3 = vtophys(pmap->pm_pdir); +#endif /* XXXKSE this is wrong. * pmap_activate is for the current thread on the current cpu */ if (p->p_flag & P_THREADED) { /* Make sure all other cr3 entries are updated. */ /* what if they are running? XXXKSE (maybe abort them) */ FOREACH_THREAD_IN_PROC(p, td) { td->td_pcb->pcb_cr3 = cr3; } } else { td->td_pcb->pcb_cr3 = cr3; } load_cr3(cr3); #ifdef SWTCH_OPTIM_STATS tlb_flush_count++; #endif } vm_offset_t pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) { if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { return addr; } addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); return addr; } #if defined(PMAP_DEBUG) pmap_pid_dump(int pid) { pmap_t pmap; struct proc *p; int npte = 0; int index; sx_slock(&allproc_lock); LIST_FOREACH(p, &allproc, p_list) { if (p->p_pid != pid) continue; if (p->p_vmspace) { int i,j; index = 0; pmap = vmspace_pmap(p->p_vmspace); for (i = 0; i < NPDEPTD; i++) { pd_entry_t *pde; pt_entry_t *pte; vm_offset_t base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { for (j = 0; j < NPTEPG; j++) { vm_offset_t va = base + (j << PAGE_SHIFT); if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { if (index) { index = 0; printf("\n"); } sx_sunlock(&allproc_lock); return npte; } pte = pmap_pte_quick(pmap, va); if (pte && pmap_pte_v(pte)) { pt_entry_t pa; vm_page_t m; pa = *pte; m = PHYS_TO_VM_PAGE(pa); printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", va, pa, m->hold_count, m->wire_count, m->flags); npte++; index++; if (index >= 2) { index = 0; printf("\n"); } else { printf(" "); } } } } } } } sx_sunlock(&allproc_lock); return npte; } #endif #if defined(DEBUG) static void pads(pmap_t pm); void pmap_pvdump(vm_offset_t pa); /* print address space of pmap*/ static void pads(pm) pmap_t pm; { int i, j; vm_paddr_t va; pt_entry_t *ptep; if (pm == kernel_pmap) return; for (i = 0; i < NPDEPTD; i++) if (pm->pm_pdir[i]) for (j = 0; j < NPTEPG; j++) { va = (i << PDRSHIFT) + (j << PAGE_SHIFT); if (pm == kernel_pmap && va < KERNBASE) continue; if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) continue; ptep = pmap_pte_quick(pm, va); if (pmap_pte_v(ptep)) printf("%x:%x ", va, *ptep); }; } void pmap_pvdump(pa) vm_paddr_t pa; { pv_entry_t pv; vm_page_t m; printf("pa %x", pa); m = PHYS_TO_VM_PAGE(pa); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); pads(pv->pv_pmap); } printf(" "); } #endif Index: head/sys/amd64/amd64/vm_machdep.c =================================================================== --- head/sys/amd64/amd64/vm_machdep.c (revision 112840) +++ head/sys/amd64/amd64/vm_machdep.c (revision 112841) @@ -1,559 +1,567 @@ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD$ */ #include "opt_npx.h" #ifdef PC98 #include "opt_pc98.h" #endif #include "opt_reset.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PC98 #include #else #include #endif static void cpu_reset_real(void); #ifdef SMP static void cpu_reset_proxy(void); static u_int cpu_reset_proxyid; static volatile u_int cpu_reset_proxy_active; #endif extern int _ucodesel, _udatasel; /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(td1, p2, td2, flags) register struct thread *td1; register struct proc *p2; struct thread *td2; int flags; { register struct proc *p1; struct pcb *pcb2; struct mdproc *mdp2; #ifdef DEV_NPX register_t savecrit; #endif p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ struct mdproc *mdp1 = &p1->p_md; struct proc_ldt *pldt = mdp1->md_ldt; if (pldt && pldt->ldt_refcnt > 1) { pldt = user_ldt_alloc(mdp1, pldt->ldt_len); if (pldt == NULL) panic("could not copy LDT"); mdp1->md_ldt = pldt; set_user_ldt(mdp1); user_ldt_free(td1); } } return; } /* Ensure that p1's pcb is up to date. */ #ifdef DEV_NPX if (td1 == curthread) td1->td_pcb->pcb_gs = rgs(); savecrit = intr_disable(); if (PCPU_GET(fpcurthread) == td1) npxsave(&td1->td_pcb->pcb_save); intr_restore(savecrit); #endif /* Point the pcb to the top of the stack */ pcb2 = (struct pcb *)(td2->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; td2->td_pcb = pcb2; /* Copy p1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. * The -16 is so we can expand the trapframe if we go to vm86. */ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_eax = 0; /* Child returns zero */ td2->td_frame->tf_eflags &= ~PSL_C; /* success */ td2->td_frame->tf_edx = 1; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline; pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */ /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ /* * XXX don't copy the i/o pages. this should probably be fixed. */ pcb2->pcb_ext = 0; /* Copy the LDT, if necessary. */ mtx_lock_spin(&sched_lock); if (mdp2->md_ldt != 0) { if (flags & RFMEM) { mdp2->md_ldt->ldt_refcnt++; } else { mdp2->md_ldt = user_ldt_alloc(mdp2, mdp2->md_ldt->ldt_len); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); } } mtx_unlock_spin(&sched_lock); /* * Now, cpu_switch() can schedule the new process. * pcb_esp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_set_fork_handler(td, func, arg) struct thread *td; void (*func)(void *); void *arg; { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_esi = (int) func; /* function */ td->td_pcb->pcb_ebx = (int) arg; /* first arg */ } void cpu_exit(struct thread *td) { struct mdproc *mdp; mdp = &td->td_proc->p_md; if (mdp->md_ldt) user_ldt_free(td); reset_dbregs(); } void cpu_thread_exit(struct thread *td) { struct pcb *pcb = td->td_pcb; #ifdef DEV_NPX npxexit(td); #endif if (pcb->pcb_flags & PCB_DBREGS) { /* * disable all hardware breakpoints */ reset_dbregs(); pcb->pcb_flags &= ~PCB_DBREGS; } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (pcb->pcb_ext != 0) { /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */ /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ /* * XXX do we need to move the TSS off the allocated pages * before freeing them? (not done here) */ mtx_lock(&Giant); kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext, ctob(IOPAGES + 1)); mtx_unlock(&Giant); pcb->pcb_ext = 0; } } void cpu_sched_exit(td) register struct thread *td; { } void cpu_thread_setup(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; } /* * Initialize machine state (pcb and trap frame) for a new thread about to * upcall. Pu t enough state in the new thread's PCB to get it to go back * userret(), where we can intercept it again to set the return (upcall) * Address and stack, along with those from upcals that are from other sources * such as those generated in thread_userret() itself. */ void cpu_set_upcall(struct thread *td, void *pcb) { struct pcb *pcb2; /* Point the pcb to the top of the stack. */ pcb2 = td->td_pcb; /* * Copy the upcall pcb. This loads kernel regs. * Those not loaded individually below get their default * values here. * * XXXKSE It might be a good idea to simply skip this as * the values of the other registers may be unimportant. * This would remove any requirement for knowing the KSE * at this time (see the matching comment below for * more analysis) (need a good safe default). */ bcopy(pcb, pcb2, sizeof(*pcb2)); /* * Create a new fresh stack for the new thread. * The -16 is so we can expand the trapframe if we go to vm86. * Don't forget to set this stack value into whatever supplies * the address for the fault handlers. * The contexts are filled in at the time we actually DO the * upcall as only then do we know which KSE we got. */ td->td_frame = (struct trapframe *)((caddr_t)pcb2 - 16) - 1; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */ pcb2->pcb_ebx = (int)td; /* trampoline arg */ pcb2->pcb_eip = (int)fork_trampoline; pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */ /* * If we didn't copy the pcb, we'd need to do the following registers: * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. XXXKSE ??? * pcb2->pcb_ext: cleared below. */ pcb2->pcb_ext = NULL; } /* * Set that machine state for performing an upcall that has to * be done in thread_userret() so that those upcalls generated * in thread_userret() itself can be done as well. */ void cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku) { /* * Do any extra cleaning that needs to be done. * The thread may have optional components * that are not present in a fresh thread. * This may be a recycled thread so make it look * as though it's newly allocated. */ cpu_thread_clean(td); /* * Set the trap frame to point at the beginning of the uts * function. */ td->td_frame->tf_esp = (int)ku->ku_stack.ss_sp + ku->ku_stack.ss_size - 16; td->td_frame->tf_eip = (int)ku->ku_func; /* * Pass the address of the mailbox for this kse to the uts * function as a parameter on the stack. */ suword((void *)(td->td_frame->tf_esp + sizeof(void *)), (int)ku->ku_mailbox); } void cpu_wait(p) struct proc *p; { } /* * Convert kernel VA to physical address */ vm_paddr_t kvtop(void *addr) { vm_paddr_t pa; pa = pmap_kextract((vm_offset_t)addr); if (pa == 0) panic("kvtop: zero page frame"); return (pa); } /* * Force reset the processor by invalidating the entire address space! */ #ifdef SMP static void cpu_reset_proxy() { cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ stop_cpus((1<" */ invltlb(); /* NOTREACHED */ while(1); } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending != 0) busdma_swi(); } /* * Tell whether this address is in some physical memory region. * Currently used by the kernel coredump code in order to avoid * dumping the ``ISA memory hole'' which could cause indefinite hangs, * or other unpredictable behaviour. */ int is_physical_memory(addr) vm_offset_t addr; { #ifdef DEV_ISA /* The ISA ``memory hole''. */ if (addr >= 0xa0000 && addr < 0x100000) return 0; #endif /* * stuff other tests for known memory-mapped devices (PCI?) * here */ return 1; } Index: head/sys/amd64/include/bus_amd64.h =================================================================== --- head/sys/amd64/include/bus_amd64.h (revision 112840) +++ head/sys/amd64/include/bus_amd64.h (revision 112841) @@ -1,1216 +1,1224 @@ /* $NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $ */ /*- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* $FreeBSD$ */ #ifndef _I386_BUS_AT386_H_ #define _I386_BUS_AT386_H_ #include /* * To remain compatible with NetBSD's interface, default to both memio and * pio when neither of them is defined. */ #if !defined(_I386_BUS_PIO_H_) && !defined(_I386_BUS_MEMIO_H_) #define _I386_BUS_PIO_H_ #define _I386_BUS_MEMIO_H_ #endif /* * Values for the i386 bus space tag, not to be used directly by MI code. */ #define I386_BUS_SPACE_IO 0 /* space is i/o space */ #define I386_BUS_SPACE_MEM 1 /* space is mem space */ /* * Bus address and size types */ -typedef u_int bus_addr_t; -typedef u_int bus_size_t; +#ifdef PAE +typedef uint64_t bus_addr_t; +#else +typedef uint32_t bus_addr_t; +#endif +typedef uint32_t bus_size_t; #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF +#ifdef PAE +#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL +#else #define BUS_SPACE_MAXADDR 0xFFFFFFFF +#endif #define BUS_SPACE_UNRESTRICTED (~0) /* * Access methods for bus resources and address space. */ typedef int bus_space_tag_t; typedef u_int bus_space_handle_t; /* * Map a region of device bus space into CPU virtual address space. */ #define BUS_SPACE_MAP_CACHEABLE 0x01 #define BUS_SPACE_MAP_LINEAR 0x02 int bus_space_map(bus_space_tag_t t, bus_addr_t addr, bus_size_t size, int flags, bus_space_handle_t *bshp); /* * Unmap a region of device bus space. */ static __inline void bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size); static __inline void bus_space_unmap(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused, bus_size_t size __unused) { } /* * Get a new handle for a subregion of an already-mapped area of bus space. */ static __inline int bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp); static __inline int bus_space_subregion(bus_space_tag_t t __unused, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size __unused, bus_space_handle_t *nbshp) { *nbshp = bsh + offset; return (0); } /* * Allocate a region of memory that is accessible to devices in bus space. */ int bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend, bus_size_t size, bus_size_t align, bus_size_t boundary, int flags, bus_addr_t *addrp, bus_space_handle_t *bshp); /* * Free a region of bus space accessible memory. */ static __inline void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size); static __inline void bus_space_free(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused, bus_size_t size __unused) { } #if defined(_I386_BUS_PIO_H_) || defined(_I386_BUS_MEMIO_H_) /* * Read a 1, 2, 4, or 8 byte quantity from bus space * described by tag/handle/offset. */ static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { #if defined (_I386_BUS_PIO_H_) #if defined (_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif return (inb(handle + offset)); #endif #if defined (_I386_BUS_MEMIO_H_) return (*(volatile u_int8_t *)(handle + offset)); #endif } static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif return (inw(handle + offset)); #endif #if defined(_I386_BUS_MEMIO_H_) return (*(volatile u_int16_t *)(handle + offset)); #endif } static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif return (inl(handle + offset)); #endif #if defined(_I386_BUS_MEMIO_H_) return (*(volatile u_int32_t *)(handle + offset)); #endif } #if 0 /* Cause a link error for bus_space_read_8 */ #define bus_space_read_8(t, h, o) !!! bus_space_read_8 unimplemented !!! #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle/offset and copy into buffer provided. */ static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif insb(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: movb (%2),%%al \n\ stosb \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } #endif } static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif insw(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: movw (%2),%%ax \n\ stosw \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } #endif } static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif insl(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: movl (%2),%%eax \n\ stosl \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } #endif } #if 0 /* Cause a link error for bus_space_read_multi_8 */ #define bus_space_read_multi_8 !!! bus_space_read_multi_8 unimplemented !!! #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle and starting at `offset' and copy into * buffer provided. */ static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: inb %w2,%%al \n\ stosb \n\ incl %2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsb" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } #endif } static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: inw %w2,%%ax \n\ stosw \n\ addl $2,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsw" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } #endif } static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: inl %w2,%%eax \n\ stosl \n\ addl $4,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsl" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } #endif } #if 0 /* Cause a link error for bus_space_read_region_8 */ #define bus_space_read_region_8 !!! bus_space_read_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `value' to bus space * described by tag/handle/offset. */ static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value); static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value); static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value); static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outb(bsh + offset, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif *(volatile u_int8_t *)(bsh + offset) = value; #endif } static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outw(bsh + offset, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif *(volatile u_int16_t *)(bsh + offset) = value; #endif } static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outl(bsh + offset, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif *(volatile u_int32_t *)(bsh + offset) = value; #endif } #if 0 /* Cause a link error for bus_space_write_8 */ #define bus_space_write_8 !!! bus_space_write_8 not implemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer * provided to bus space described by tag/handle/offset. */ static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outsb(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsb \n\ movb %%al,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } #endif } static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outsw(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsw \n\ movw %%ax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } #endif } static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outsl(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsl \n\ movl %%eax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } #endif } #if 0 /* Cause a link error for bus_space_write_multi_8 */ #define bus_space_write_multi_8(t, h, o, a, c) \ !!! bus_space_write_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer provided * to bus space described by tag/handle starting at `offset'. */ static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsb \n\ outb %%al,%w0 \n\ incl %0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsb" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } #endif } static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsw \n\ outw %%ax,%w0 \n\ addl $2,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsw" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } #endif } static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsl \n\ outl %%eax,%w0 \n\ addl $4,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsl" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } #endif } #if 0 /* Cause a link error for bus_space_write_region_8 */ #define bus_space_write_region_8 \ !!! bus_space_write_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle/offset `count' times. */ static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif while (count--) outb(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif while (count--) *(volatile u_int8_t *)(addr) = value; #endif } static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif while (count--) outw(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif while (count--) *(volatile u_int16_t *)(addr) = value; #endif } static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif while (count--) outl(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif while (count--) *(volatile u_int32_t *)(addr) = value; #endif } #if 0 /* Cause a link error for bus_space_set_multi_8 */ #define bus_space_set_multi_8 !!! bus_space_set_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle starting at `offset'. */ static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif for (; count != 0; count--, addr++) outb(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif for (; count != 0; count--, addr++) *(volatile u_int8_t *)(addr) = value; #endif } static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif for (; count != 0; count--, addr += 2) outw(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif for (; count != 0; count--, addr += 2) *(volatile u_int16_t *)(addr) = value; #endif } static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif for (; count != 0; count--, addr += 4) outl(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif for (; count != 0; count--, addr += 4) *(volatile u_int32_t *)(addr) = value; #endif } #if 0 /* Cause a link error for bus_space_set_region_8 */ #define bus_space_set_region_8 !!! bus_space_set_region_8 unimplemented !!! #endif /* * Copy `count' 1, 2, 4, or 8 byte values from bus space starting * at tag/bsh1/off1 to bus space starting at tag/bsh2/off2. */ static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) outb(addr2, inb(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) outb(addr2, inb(addr1)); } } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } } #endif } static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) outw(addr2, inw(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) outw(addr2, inw(addr1)); } } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } } #endif } static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) outl(addr2, inl(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) outl(addr2, inl(addr1)); } } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } } #endif } #endif /* defined(_I386_BUS_PIO_H_) || defined(_I386_MEM_IO_H_) */ #if 0 /* Cause a link error for bus_space_copy_8 */ #define bus_space_copy_region_8 !!! bus_space_copy_region_8 unimplemented !!! #endif /* * Bus read/write barrier methods. * * void bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh, * bus_size_t offset, bus_size_t len, int flags); * * * Note that BUS_SPACE_BARRIER_WRITE doesn't do anything other than * prevent reordering by the compiler; all Intel x86 processors currently * retire operations outside the CPU in program order. */ #define BUS_SPACE_BARRIER_READ 0x01 /* force read barrier */ #define BUS_SPACE_BARRIER_WRITE 0x02 /* force write barrier */ static __inline void bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused, bus_size_t offset __unused, bus_size_t len __unused, int flags) { #ifdef __GNUC__ if (flags & BUS_SPACE_BARRIER_READ) __asm __volatile("lock; addl $0,0(%%esp)" : : : "memory"); else __asm __volatile("" : : : "memory"); #endif } #endif /* _I386_BUS_AT386_H_ */ Index: head/sys/amd64/include/bus_at386.h =================================================================== --- head/sys/amd64/include/bus_at386.h (revision 112840) +++ head/sys/amd64/include/bus_at386.h (revision 112841) @@ -1,1216 +1,1224 @@ /* $NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $ */ /*- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* $FreeBSD$ */ #ifndef _I386_BUS_AT386_H_ #define _I386_BUS_AT386_H_ #include /* * To remain compatible with NetBSD's interface, default to both memio and * pio when neither of them is defined. */ #if !defined(_I386_BUS_PIO_H_) && !defined(_I386_BUS_MEMIO_H_) #define _I386_BUS_PIO_H_ #define _I386_BUS_MEMIO_H_ #endif /* * Values for the i386 bus space tag, not to be used directly by MI code. */ #define I386_BUS_SPACE_IO 0 /* space is i/o space */ #define I386_BUS_SPACE_MEM 1 /* space is mem space */ /* * Bus address and size types */ -typedef u_int bus_addr_t; -typedef u_int bus_size_t; +#ifdef PAE +typedef uint64_t bus_addr_t; +#else +typedef uint32_t bus_addr_t; +#endif +typedef uint32_t bus_size_t; #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF +#ifdef PAE +#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL +#else #define BUS_SPACE_MAXADDR 0xFFFFFFFF +#endif #define BUS_SPACE_UNRESTRICTED (~0) /* * Access methods for bus resources and address space. */ typedef int bus_space_tag_t; typedef u_int bus_space_handle_t; /* * Map a region of device bus space into CPU virtual address space. */ #define BUS_SPACE_MAP_CACHEABLE 0x01 #define BUS_SPACE_MAP_LINEAR 0x02 int bus_space_map(bus_space_tag_t t, bus_addr_t addr, bus_size_t size, int flags, bus_space_handle_t *bshp); /* * Unmap a region of device bus space. */ static __inline void bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size); static __inline void bus_space_unmap(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused, bus_size_t size __unused) { } /* * Get a new handle for a subregion of an already-mapped area of bus space. */ static __inline int bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp); static __inline int bus_space_subregion(bus_space_tag_t t __unused, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size __unused, bus_space_handle_t *nbshp) { *nbshp = bsh + offset; return (0); } /* * Allocate a region of memory that is accessible to devices in bus space. */ int bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend, bus_size_t size, bus_size_t align, bus_size_t boundary, int flags, bus_addr_t *addrp, bus_space_handle_t *bshp); /* * Free a region of bus space accessible memory. */ static __inline void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size); static __inline void bus_space_free(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused, bus_size_t size __unused) { } #if defined(_I386_BUS_PIO_H_) || defined(_I386_BUS_MEMIO_H_) /* * Read a 1, 2, 4, or 8 byte quantity from bus space * described by tag/handle/offset. */ static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { #if defined (_I386_BUS_PIO_H_) #if defined (_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif return (inb(handle + offset)); #endif #if defined (_I386_BUS_MEMIO_H_) return (*(volatile u_int8_t *)(handle + offset)); #endif } static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif return (inw(handle + offset)); #endif #if defined(_I386_BUS_MEMIO_H_) return (*(volatile u_int16_t *)(handle + offset)); #endif } static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif return (inl(handle + offset)); #endif #if defined(_I386_BUS_MEMIO_H_) return (*(volatile u_int32_t *)(handle + offset)); #endif } #if 0 /* Cause a link error for bus_space_read_8 */ #define bus_space_read_8(t, h, o) !!! bus_space_read_8 unimplemented !!! #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle/offset and copy into buffer provided. */ static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif insb(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: movb (%2),%%al \n\ stosb \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } #endif } static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif insw(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: movw (%2),%%ax \n\ stosw \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } #endif } static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif insl(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: movl (%2),%%eax \n\ stosl \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } #endif } #if 0 /* Cause a link error for bus_space_read_multi_8 */ #define bus_space_read_multi_8 !!! bus_space_read_multi_8 unimplemented !!! #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle and starting at `offset' and copy into * buffer provided. */ static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: inb %w2,%%al \n\ stosb \n\ incl %2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsb" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } #endif } static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: inw %w2,%%ax \n\ stosw \n\ addl $2,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsw" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } #endif } static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: inl %w2,%%eax \n\ stosl \n\ addl $4,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsl" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } #endif } #if 0 /* Cause a link error for bus_space_read_region_8 */ #define bus_space_read_region_8 !!! bus_space_read_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `value' to bus space * described by tag/handle/offset. */ static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value); static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value); static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value); static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outb(bsh + offset, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif *(volatile u_int8_t *)(bsh + offset) = value; #endif } static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outw(bsh + offset, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif *(volatile u_int16_t *)(bsh + offset) = value; #endif } static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outl(bsh + offset, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif *(volatile u_int32_t *)(bsh + offset) = value; #endif } #if 0 /* Cause a link error for bus_space_write_8 */ #define bus_space_write_8 !!! bus_space_write_8 not implemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer * provided to bus space described by tag/handle/offset. */ static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outsb(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsb \n\ movb %%al,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } #endif } static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outsw(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsw \n\ movw %%ax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } #endif } static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outsl(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsl \n\ movl %%eax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } #endif } #if 0 /* Cause a link error for bus_space_write_multi_8 */ #define bus_space_write_multi_8(t, h, o, a, c) \ !!! bus_space_write_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer provided * to bus space described by tag/handle starting at `offset'. */ static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsb \n\ outb %%al,%w0 \n\ incl %0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsb" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } #endif } static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsw \n\ outw %%ax,%w0 \n\ addl $2,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsw" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } #endif } static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsl \n\ outl %%eax,%w0 \n\ addl $4,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsl" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } #endif } #if 0 /* Cause a link error for bus_space_write_region_8 */ #define bus_space_write_region_8 \ !!! bus_space_write_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle/offset `count' times. */ static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif while (count--) outb(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif while (count--) *(volatile u_int8_t *)(addr) = value; #endif } static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif while (count--) outw(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif while (count--) *(volatile u_int16_t *)(addr) = value; #endif } static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif while (count--) outl(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif while (count--) *(volatile u_int32_t *)(addr) = value; #endif } #if 0 /* Cause a link error for bus_space_set_multi_8 */ #define bus_space_set_multi_8 !!! bus_space_set_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle starting at `offset'. */ static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif for (; count != 0; count--, addr++) outb(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif for (; count != 0; count--, addr++) *(volatile u_int8_t *)(addr) = value; #endif } static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif for (; count != 0; count--, addr += 2) outw(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif for (; count != 0; count--, addr += 2) *(volatile u_int16_t *)(addr) = value; #endif } static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif for (; count != 0; count--, addr += 4) outl(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif for (; count != 0; count--, addr += 4) *(volatile u_int32_t *)(addr) = value; #endif } #if 0 /* Cause a link error for bus_space_set_region_8 */ #define bus_space_set_region_8 !!! bus_space_set_region_8 unimplemented !!! #endif /* * Copy `count' 1, 2, 4, or 8 byte values from bus space starting * at tag/bsh1/off1 to bus space starting at tag/bsh2/off2. */ static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) outb(addr2, inb(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) outb(addr2, inb(addr1)); } } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } } #endif } static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) outw(addr2, inw(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) outw(addr2, inw(addr1)); } } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } } #endif } static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) outl(addr2, inl(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) outl(addr2, inl(addr1)); } } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } } #endif } #endif /* defined(_I386_BUS_PIO_H_) || defined(_I386_MEM_IO_H_) */ #if 0 /* Cause a link error for bus_space_copy_8 */ #define bus_space_copy_region_8 !!! bus_space_copy_region_8 unimplemented !!! #endif /* * Bus read/write barrier methods. * * void bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh, * bus_size_t offset, bus_size_t len, int flags); * * * Note that BUS_SPACE_BARRIER_WRITE doesn't do anything other than * prevent reordering by the compiler; all Intel x86 processors currently * retire operations outside the CPU in program order. */ #define BUS_SPACE_BARRIER_READ 0x01 /* force read barrier */ #define BUS_SPACE_BARRIER_WRITE 0x02 /* force write barrier */ static __inline void bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused, bus_size_t offset __unused, bus_size_t len __unused, int flags) { #ifdef __GNUC__ if (flags & BUS_SPACE_BARRIER_READ) __asm __volatile("lock; addl $0,0(%%esp)" : : : "memory"); else __asm __volatile("" : : : "memory"); #endif } #endif /* _I386_BUS_AT386_H_ */ Index: head/sys/amd64/include/pmap.h =================================================================== --- head/sys/amd64/include/pmap.h (revision 112840) +++ head/sys/amd64/include/pmap.h (revision 112841) @@ -1,265 +1,317 @@ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Derived from hp300 version by Mike Hibler, this version by William * Jolitz uses a recursive map [a pde points to the page directory] to * map the page tables using the pagetables themselves. This is done to * reduce the impact on kernel virtual memory for lots of sparse address * space, and to reduce the cost of memory to each process. * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 * $FreeBSD$ */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ /* * Page-directory and page-table entires follow this format, with a few * of the fields not present here and there, depending on a lot of things. */ /* ---- Intel Nomenclature ---- */ #define PG_V 0x001 /* P Valid */ #define PG_RW 0x002 /* R/W Read/Write */ #define PG_U 0x004 /* U/S User/Supervisor */ #define PG_NC_PWT 0x008 /* PWT Write through */ #define PG_NC_PCD 0x010 /* PCD Cache disable */ #define PG_A 0x020 /* A Accessed */ #define PG_M 0x040 /* D Dirty */ #define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */ #define PG_G 0x100 /* G Global */ #define PG_AVAIL1 0x200 /* / Available for system */ #define PG_AVAIL2 0x400 /* < programmers use */ #define PG_AVAIL3 0x800 /* \ */ /* Our various interpretations of the above */ #define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ #define PG_MANAGED PG_AVAIL2 #define PG_FRAME (~((vm_paddr_t)PAGE_MASK)) #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ /* * Page Protection Exception bits */ #define PGEX_P 0x01 /* Protection violation vs. not present */ #define PGEX_W 0x02 /* during a Write cycle */ #define PGEX_U 0x04 /* access from User mode (UPL) */ /* * Size of Kernel address space. This is the number of page table pages * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). */ #ifndef KVA_PAGES +#ifdef PAE +#define KVA_PAGES 512 +#else #define KVA_PAGES 256 #endif +#endif /* * Pte related macros */ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< 0xffbfffff */ #define APTDPTDI (NPDEPTD-NPGPTD) /* alt ptd entry that points to APTD */ #ifdef SMP #define MPPTDI (APTDPTDI-1) /* per cpu ptd entry */ #define KPTDI (MPPTDI-NKPDE) /* start of kernel virtual pde's */ #else #define KPTDI (APTDPTDI-NKPDE)/* start of kernel virtual pde's */ #endif /* SMP */ #define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to ptd! */ /* * XXX doesn't really belong here I guess... */ #define ISA_HOLE_START 0xa0000 #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START) #ifndef LOCORE #include -typedef u_int32_t pd_entry_t; -typedef u_int32_t pt_entry_t; +#ifdef PAE +typedef uint64_t pdpt_entry_t; +typedef uint64_t pd_entry_t; +typedef uint64_t pt_entry_t; + +#define PTESHIFT (3) +#define PDESHIFT (3) + +#else + +typedef uint32_t pd_entry_t; +typedef uint32_t pt_entry_t; + #define PTESHIFT (2) #define PDESHIFT (2) +#endif + /* * Address of current and alternate address space page table maps * and directories. */ #ifdef _KERNEL extern pt_entry_t PTmap[], APTmap[]; extern pd_entry_t PTD[], APTD[]; extern pd_entry_t PTDpde[], APTDpde[]; +#ifdef PAE +extern pdpt_entry_t *IdlePDPT; +#endif extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ #endif #ifdef _KERNEL /* * virtual address to page table entry and * to physical address. Likewise for alternate address space. * Note: these work recursively, thus vtopte of a pte will give * the corresponding pde that in turn maps it. */ #define vtopte(va) (PTmap + i386_btop(va)) #define avtopte(va) (APTmap + i386_btop(va)) /* * Routine: pmap_kextract * Function: * Extract the physical page address associated * kernel virtual address. */ static __inline vm_paddr_t pmap_kextract(vm_offset_t va) { vm_paddr_t pa; if ((pa = (vm_offset_t) PTD[va >> PDRSHIFT]) & PG_PS) { pa = (pa & ~(NBPDR - 1)) | (va & (NBPDR - 1)); } else { pa = *vtopte(va); pa = (pa & PG_FRAME) | (va & PAGE_MASK); } return pa; } #define vtophys(va) pmap_kextract(((vm_offset_t) (va))) + +#ifdef PAE + +static __inline pt_entry_t +pte_load_clear(pt_entry_t *pte) +{ + pt_entry_t r; + + r = *pte; + __asm __volatile( + "1:\n" + "\tcmpxchg8b %1\n" + "\tjnz 1b" + : "+A" (r) + : "m" (*pte), "b" (0), "c" (0)); + return (r); +} + +#else + +#define pte_load_clear(pte) atomic_readandclear_int(pte) + #endif +#endif + /* * Pmap stuff */ struct pv_entry; struct md_page { int pv_list_count; TAILQ_HEAD(,pv_entry) pv_list; }; struct pmap { pd_entry_t *pm_pdir; /* KVA of page directory */ vm_object_t pm_pteobj; /* Container for pte's */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ int pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ +#ifdef PAE + pdpt_entry_t *pm_pdpt; /* KVA of page director pointer + table */ +#endif }; #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) #define pmap_resident_count(pmap) (pmap)->pm_stats.resident_count typedef struct pmap *pmap_t; #ifdef _KERNEL extern struct pmap kernel_pmap_store; #define kernel_pmap (&kernel_pmap_store) #endif /* * For each vm_page_t, there is a list of all currently valid virtual * mappings of that page. An entry is a pv_entry_t, the list is pv_table. */ typedef struct pv_entry { pmap_t pv_pmap; /* pmap where mapping lies */ vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_list; TAILQ_ENTRY(pv_entry) pv_plist; vm_page_t pv_ptem; /* VM page for pte */ } *pv_entry_t; #ifdef _KERNEL #define NPPROVMTRR 8 #define PPRO_VMTRRphysBase0 0x200 #define PPRO_VMTRRphysMask0 0x201 struct ppro_vmtrr { u_int64_t base, mask; }; extern struct ppro_vmtrr PPro_vmtrr[NPPROVMTRR]; extern caddr_t CADDR1; extern pt_entry_t *CMAP1; extern vm_paddr_t avail_end; extern vm_paddr_t avail_start; extern vm_offset_t clean_eva; extern vm_offset_t clean_sva; extern vm_paddr_t phys_avail[]; extern char *ptvmmap; /* poor name! */ extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; void pmap_bootstrap(vm_paddr_t, vm_paddr_t); void pmap_kenter(vm_offset_t va, vm_paddr_t pa); void pmap_kremove(vm_offset_t); void *pmap_mapdev(vm_paddr_t, vm_size_t); void pmap_unmapdev(vm_offset_t, vm_size_t); pt_entry_t *pmap_pte_quick(pmap_t, vm_offset_t) __pure2; void pmap_set_opt(void); void pmap_invalidate_page(pmap_t, vm_offset_t); void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); void pmap_invalidate_all(pmap_t); #endif /* _KERNEL */ #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ Index: head/sys/conf/options.i386 =================================================================== --- head/sys/conf/options.i386 (revision 112840) +++ head/sys/conf/options.i386 (revision 112841) @@ -1,184 +1,187 @@ # $FreeBSD$ # Options specific to the i386 platform kernels MATH_EMULATE opt_math_emulate.h GPL_MATH_EMULATE opt_math_emulate.h DISABLE_PSE opt_pmap.h PMAP_SHPGPERPROC opt_pmap.h DISABLE_PG_G opt_pmap.h PPC_PROBE_CHIPSET opt_ppc.h PPC_DEBUG opt_ppc.h MAXMEM PERFMON opt_perfmon.h POWERFAIL_NMI opt_trap.h AUTO_EOI_1 opt_auto_eoi.h AUTO_EOI_2 opt_auto_eoi.h I586_PMC_GUPROF opt_i586_guprof.h COMPAT_OLDISA BROKEN_KEYBOARD_RESET opt_reset.h # Options for emulators. These should only be used at config time, so # they are handled like options for static filesystems # (see src/sys/conf/options), except for broken debugging options. COMPAT_AOUT opt_dontuse.h IBCS2 opt_dontuse.h COMPAT_LINUX opt_dontuse.h COMPAT_SVR4 opt_dontuse.h DEBUG_SVR4 opt_svr4.h PECOFF_SUPPORT opt_dontuse.h PECOFF_DEBUG opt_pecoff.h # i386 SMP options APIC_IO opt_global.h # Change KVM size. Changes things all over the kernel. KVA_PAGES opt_global.h +# Physical address extensions and support for >4G ram. As above. +PAE opt_global.h + CLK_CALIBRATION_LOOP opt_clock.h CLK_USE_I8254_CALIBRATION opt_clock.h CLK_USE_TSC_CALIBRATION opt_clock.h TIMER_FREQ opt_clock.h NO_F00F_HACK opt_cpu.h CPU_BLUELIGHTNING_FPU_OP_CACHE opt_cpu.h CPU_BLUELIGHTNING_3X opt_cpu.h CPU_BTB_EN opt_cpu.h CPU_CYRIX_NO_LOCK opt_cpu.h CPU_DIRECT_MAPPED_CACHE opt_cpu.h CPU_DISABLE_5X86_LSSER opt_cpu.h CPU_ELAN opt_cpu.h CPU_FASTER_5X86_FPU opt_cpu.h CPU_I486_ON_386 opt_cpu.h CPU_IORT opt_cpu.h CPU_L2_LATENCY opt_cpu.h CPU_LOOP_EN opt_cpu.h CPU_PPRO2CELERON opt_cpu.h CPU_RSTK_EN opt_cpu.h CPU_SUSP_HLT opt_cpu.h CPU_UPGRADE_HW_CACHE opt_cpu.h CPU_WT_ALLOC opt_cpu.h CYRIX_CACHE_WORKS opt_cpu.h CYRIX_CACHE_REALLY_WORKS opt_cpu.h NO_MEMORY_HOLE opt_cpu.h CPU_ENABLE_SSE opt_cpu.h CPU_ATHLON_SSE_HACK opt_cpu.h CPU_DISABLE_SSE opt_cpu.h CPU_DISABLE_CMPXCHG opt_global.h # Options for the AMD Elan CPU ELAN_PPS opt_cpu.h ELAN_XTAL opt_cpu.h # The CPU type affects the endian conversion functions all over the kernel. I386_CPU opt_global.h I486_CPU opt_global.h I586_CPU opt_global.h I686_CPU opt_global.h MAXCONS opt_syscons.h SC_ALT_MOUSE_IMAGE opt_syscons.h SC_CUT_SPACES2TABS opt_syscons.h SC_CUT_SEPCHARS opt_syscons.h SC_DEBUG_LEVEL opt_syscons.h SC_DFLT_FONT opt_syscons.h SC_DISABLE_DDBKEY opt_syscons.h SC_DISABLE_REBOOT opt_syscons.h SC_HISTORY_SIZE opt_syscons.h SC_KERNEL_CONS_ATTR opt_syscons.h SC_KERNEL_CONS_REV_ATTR opt_syscons.h SC_MOUSE_CHAR opt_syscons.h SC_NO_CUTPASTE opt_syscons.h SC_NO_FONT_LOADING opt_syscons.h SC_NO_HISTORY opt_syscons.h SC_NO_SYSMOUSE opt_syscons.h SC_NORM_ATTR opt_syscons.h SC_NORM_REV_ATTR opt_syscons.h SC_PIXEL_MODE opt_syscons.h SC_RENDER_DEBUG opt_syscons.h SC_TWOBUTTON_MOUSE opt_syscons.h SC_NO_SUSPEND_VTYSWITCH opt_syscons.h VGA_ALT_SEQACCESS opt_vga.h VGA_DEBUG opt_vga.h VGA_NO_FONT_LOADING opt_vga.h VGA_NO_MODE_CHANGE opt_vga.h VGA_SLOW_IOACCESS opt_vga.h VGA_WIDTH90 opt_vga.h VESA opt_vesa.h VESA_DEBUG opt_vesa.h PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h PSM_DEBUG opt_psm.h ATKBD_DFLT_KEYMAP opt_atkbd.h KBD_DISABLE_KEYMAP_LOAD opt_kbd.h KBD_INSTALL_CDEV opt_kbd.h KBD_MAXRETRY opt_kbd.h KBD_MAXWAIT opt_kbd.h KBD_RESETDELAY opt_kbd.h KBDIO_DEBUG opt_kbd.h EISA_SLOTS opt_eisa.h # pcvt(4) has a bunch of options FAT_CURSOR opt_pcvt.h XSERVER opt_pcvt.h PCVT_24LINESDEF opt_pcvt.h PCVT_CTRL_ALT_DEL opt_pcvt.h PCVT_META_ESC opt_pcvt.h PCVT_NSCREENS opt_pcvt.h PCVT_PRETTYSCRNS opt_pcvt.h PCVT_SCANSET opt_pcvt.h PCVT_SCREENSAVER opt_pcvt.h PCVT_USEKBDSEC opt_pcvt.h PCVT_VT220KEYB opt_pcvt.h PCVT_GREENSAVER opt_pcvt.h # Video spigot SPIGOT_UNSECURE opt_spigot.h # ------------------------------- # isdn4bsd: passive ISA cards # ------------------------------- TEL_S0_8 opt_i4b.h TEL_S0_16 opt_i4b.h TEL_S0_16_3 opt_i4b.h AVM_A1 opt_i4b.h USR_STI opt_i4b.h ITKIX1 opt_i4b.h ELSA_PCC16 opt_i4b.h # ------------------------------- # isdn4bsd: passive ISA PnP cards # ------------------------------- CRTX_S0_P opt_i4b.h DRN_NGO opt_i4b.h TEL_S0_16_3_P opt_i4b.h SEDLBAUER opt_i4b.h DYNALINK opt_i4b.h ASUSCOM_IPAC opt_i4b.h ELSA_QS1ISA opt_i4b.h SIEMENS_ISURF2 opt_i4b.h EICON_DIVA opt_i4b.h COMPAQ_M610 opt_i4b.h # ------------------------------- # isdn4bsd: passive PCI cards # ------------------------------- ELSA_QS1PCI opt_i4b.h # ------------------------------- # isdn4bsd: misc options # ------------------------------- # temporary workaround for SMP machines I4B_SMP_WORKAROUND opt_i4b.h # enable VJ compression code for ipr i/f IPR_VJ opt_i4b.h IPR_LOG opt_i4b.h # Device options DEV_NPX opt_npx.h DEV_SPLASH opt_splash.h # ------------------------------- # EOF # ------------------------------- Index: head/sys/i386/i386/bios.c =================================================================== --- head/sys/i386/i386/bios.c (revision 112840) +++ head/sys/i386/i386/bios.c (revision 112841) @@ -1,676 +1,680 @@ /*- * Copyright (c) 1997 Michael Smith * Copyright (c) 1998 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Code for dealing with the BIOS in x86 PC systems. */ #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #include #include #endif #define BIOS_START 0xe0000 #define BIOS_SIZE 0x20000 /* exported lookup results */ struct bios32_SDentry PCIbios; struct PnPBIOS_table *PnPBIOStable; static u_int bios32_SDCI; /* start fairly early */ static void bios32_init(void *junk); SYSINIT(bios32, SI_SUB_CPU, SI_ORDER_ANY, bios32_init, NULL); /* * bios32_init * * Locate various bios32 entities. */ static void bios32_init(void *junk) { u_long sigaddr; struct bios32_SDheader *sdh; struct PnPBIOS_table *pt; u_int8_t ck, *cv; int i; char *p; /* * BIOS32 Service Directory, PCI BIOS */ /* look for the signature */ if ((sigaddr = bios_sigsearch(0, "_32_", 4, 16, 0)) != 0) { /* get a virtual pointer to the structure */ sdh = (struct bios32_SDheader *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr); for (cv = (u_int8_t *)sdh, ck = 0, i = 0; i < (sdh->len * 16); i++) { ck += cv[i]; } /* If checksum is OK, enable use of the entrypoint */ if ((ck == 0) && (BIOS_START <= sdh->entry ) && (sdh->entry < (BIOS_START + BIOS_SIZE))) { bios32_SDCI = BIOS_PADDRTOVADDR(sdh->entry); if (bootverbose) { printf("bios32: Found BIOS32 Service Directory header at %p\n", sdh); printf("bios32: Entry = 0x%x (%x) Rev = %d Len = %d\n", sdh->entry, bios32_SDCI, sdh->revision, sdh->len); } /* Allow user override of PCI BIOS search */ if (((p = getenv("machdep.bios.pci")) == NULL) || strcmp(p, "disable")) { /* See if there's a PCI BIOS entrypoint here */ PCIbios.ident.id = 0x49435024; /* PCI systems should have this */ if (!bios32_SDlookup(&PCIbios) && bootverbose) printf("pcibios: PCI BIOS entry at 0x%x+0x%x\n", PCIbios.base, PCIbios.entry); } if (p != NULL) freeenv(p); } else { printf("bios32: Bad BIOS32 Service Directory\n"); } } /* * PnP BIOS * * Allow user override of PnP BIOS search */ if ((((p = getenv("machdep.bios.pnp")) == NULL) || strcmp(p, "disable")) && ((sigaddr = bios_sigsearch(0, "$PnP", 4, 16, 0)) != 0)) { /* get a virtual pointer to the structure */ pt = (struct PnPBIOS_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr); for (cv = (u_int8_t *)pt, ck = 0, i = 0; i < pt->len; i++) { ck += cv[i]; } /* If checksum is OK, enable use of the entrypoint */ if (ck == 0) { PnPBIOStable = pt; if (bootverbose) { printf("pnpbios: Found PnP BIOS data at %p\n", pt); printf("pnpbios: Entry = %x:%x Rev = %d.%d\n", pt->pmentrybase, pt->pmentryoffset, pt->version >> 4, pt->version & 0xf); if ((pt->control & 0x3) == 0x01) printf("pnpbios: Event flag at %x\n", pt->evflagaddr); if (pt->oemdevid != 0) printf("pnpbios: OEM ID %x\n", pt->oemdevid); } } else { printf("pnpbios: Bad PnP BIOS data checksum\n"); } } if (p != NULL) freeenv(p); if (bootverbose) { /* look for other know signatures */ printf("Other BIOS signatures found:\n"); } } /* * bios32_SDlookup * * Query the BIOS32 Service Directory for the service named in (ent), * returns nonzero if the lookup fails. The caller must fill in * (ent->ident), the remainder are populated on a successful lookup. */ int bios32_SDlookup(struct bios32_SDentry *ent) { struct bios_regs args; if (bios32_SDCI == 0) return (1); args.eax = ent->ident.id; /* set up arguments */ args.ebx = args.ecx = args.edx = 0; bios32(&args, bios32_SDCI, GSEL(GCODE_SEL, SEL_KPL)); if ((args.eax & 0xff) == 0) { /* success? */ ent->base = args.ebx; ent->len = args.ecx; ent->entry = args.edx; ent->ventry = BIOS_PADDRTOVADDR(ent->base + ent->entry); return (0); /* all OK */ } return (1); /* failed */ } /* * bios_sigsearch * * Search some or all of the BIOS region for a signature string. * * (start) Optional offset returned from this function * (for searching for multiple matches), or NULL * to start the search from the base of the BIOS. * Note that this will be a _physical_ address in * the range 0xe0000 - 0xfffff. * (sig) is a pointer to the byte(s) of the signature. * (siglen) number of bytes in the signature. * (paralen) signature paragraph (alignment) size. * (sigofs) offset of the signature within the paragraph. * * Returns the _physical_ address of the found signature, 0 if the * signature was not found. */ u_int32_t bios_sigsearch(u_int32_t start, u_char *sig, int siglen, int paralen, int sigofs) { u_char *sp, *end; /* compute the starting address */ if ((start >= BIOS_START) && (start <= (BIOS_START + BIOS_SIZE))) { sp = (char *)BIOS_PADDRTOVADDR(start); } else if (start == 0) { sp = (char *)BIOS_PADDRTOVADDR(BIOS_START); } else { return 0; /* bogus start address */ } /* compute the end address */ end = (u_char *)BIOS_PADDRTOVADDR(BIOS_START + BIOS_SIZE); /* loop searching */ while ((sp + sigofs + siglen) < end) { /* compare here */ if (!bcmp(sp + sigofs, sig, siglen)) { /* convert back to physical address */ return((u_int32_t)BIOS_VADDRTOPADDR(sp)); } sp += paralen; } return(0); } /* * do not staticize, used by bioscall.s */ union { struct { u_short offset; u_short segment; } vec16; struct { u_int offset; u_short segment; } vec32; } bioscall_vector; /* bios jump vector */ void set_bios_selectors(struct bios_segments *seg, int flags) { struct soft_segment_descriptor ssd = { 0, /* segment base address (overwritten) */ 0, /* length (overwritten) */ SDT_MEMERA, /* segment type (overwritten) */ 0, /* priority level */ 1, /* descriptor present */ 0, 0, 1, /* descriptor size (overwritten) */ 0 /* granularity == byte units */ }; union descriptor *p_gdt; #ifdef SMP p_gdt = &gdt[PCPU_GET(cpuid) * NGDT]; #else p_gdt = gdt; #endif ssd.ssd_base = seg->code32.base; ssd.ssd_limit = seg->code32.limit; ssdtosd(&ssd, &p_gdt[GBIOSCODE32_SEL].sd); ssd.ssd_def32 = 0; if (flags & BIOSCODE_FLAG) { ssd.ssd_base = seg->code16.base; ssd.ssd_limit = seg->code16.limit; ssdtosd(&ssd, &p_gdt[GBIOSCODE16_SEL].sd); } ssd.ssd_type = SDT_MEMRWA; if (flags & BIOSDATA_FLAG) { ssd.ssd_base = seg->data.base; ssd.ssd_limit = seg->data.limit; ssdtosd(&ssd, &p_gdt[GBIOSDATA_SEL].sd); } if (flags & BIOSUTIL_FLAG) { ssd.ssd_base = seg->util.base; ssd.ssd_limit = seg->util.limit; ssdtosd(&ssd, &p_gdt[GBIOSUTIL_SEL].sd); } if (flags & BIOSARGS_FLAG) { ssd.ssd_base = seg->args.base; ssd.ssd_limit = seg->args.limit; ssdtosd(&ssd, &p_gdt[GBIOSARGS_SEL].sd); } } extern int vm86pa; extern void bios16_jmp(void); /* * this routine is really greedy with selectors, and uses 5: * * 32-bit code selector: to return to kernel * 16-bit code selector: for running code * data selector: for 16-bit data * util selector: extra utility selector * args selector: to handle pointers * * the util selector is set from the util16 entry in bios16_args, if a * "U" specifier is seen. * * See for description of format specifiers */ int bios16(struct bios_args *args, char *fmt, ...) { char *p, *stack, *stack_top; va_list ap; int flags = BIOSCODE_FLAG | BIOSDATA_FLAG; u_int i, arg_start, arg_end; pt_entry_t *pte; pd_entry_t *ptd; arg_start = 0xffffffff; arg_end = 0; /* * Some BIOS entrypoints attempt to copy the largest-case * argument frame (in order to generalise handling for * different entry types). If our argument frame is * smaller than this, the BIOS will reach off the top of * our constructed stack segment. Pad the top of the stack * with some garbage to avoid this. */ stack = (caddr_t)PAGE_SIZE - 32; va_start(ap, fmt); for (p = fmt; p && *p; p++) { switch (*p) { case 'p': /* 32-bit pointer */ i = va_arg(ap, u_int); arg_start = min(arg_start, i); arg_end = max(arg_end, i); flags |= BIOSARGS_FLAG; stack -= 4; break; case 'i': /* 32-bit integer */ i = va_arg(ap, u_int); stack -= 4; break; case 'U': /* 16-bit selector */ flags |= BIOSUTIL_FLAG; /* FALLTHROUGH */ case 'D': /* 16-bit selector */ case 'C': /* 16-bit selector */ stack -= 2; break; case 's': /* 16-bit integer passed as an int */ i = va_arg(ap, int); stack -= 2; break; default: return (EINVAL); } } if (flags & BIOSARGS_FLAG) { if (arg_end - arg_start > ctob(16)) return (EACCES); args->seg.args.base = arg_start; args->seg.args.limit = 0xffff; } args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME; args->seg.code32.limit = 0xffff; ptd = (pd_entry_t *)rcr3(); - if (ptd == (u_int *)IdlePTD) { +#ifdef PAE + if (ptd == IdlePDPT) { +#else + if (ptd == IdlePTD) { +#endif /* * no page table, so create one and install it. */ pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = (pd_entry_t *)((u_int)ptd + KERNBASE); + ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE); *ptd = vtophys(pte) | PG_RW | PG_V; } else { /* * this is a user-level page table */ pte = PTmap; } /* * install pointer to page 0. we don't need to flush the tlb, * since there should not be a previous mapping for page 0. */ *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V; stack_top = stack; va_start(ap, fmt); for (p = fmt; p && *p; p++) { switch (*p) { case 'p': /* 32-bit pointer */ i = va_arg(ap, u_int); *(u_int *)stack = (i - arg_start) | (GSEL(GBIOSARGS_SEL, SEL_KPL) << 16); stack += 4; break; case 'i': /* 32-bit integer */ i = va_arg(ap, u_int); *(u_int *)stack = i; stack += 4; break; case 'U': /* 16-bit selector */ *(u_short *)stack = GSEL(GBIOSUTIL_SEL, SEL_KPL); stack += 2; break; case 'D': /* 16-bit selector */ *(u_short *)stack = GSEL(GBIOSDATA_SEL, SEL_KPL); stack += 2; break; case 'C': /* 16-bit selector */ *(u_short *)stack = GSEL(GBIOSCODE16_SEL, SEL_KPL); stack += 2; break; case 's': /* 16-bit integer passed as an int */ i = va_arg(ap, int); *(u_short *)stack = i; stack += 2; break; default: return (EINVAL); } } set_bios_selectors(&args->seg, flags); bioscall_vector.vec16.offset = (u_short)args->entry; bioscall_vector.vec16.segment = GSEL(GBIOSCODE16_SEL, SEL_KPL); i = bios16_call(&args->r, stack_top); if (pte == PTmap) { *pte = 0; /* remove entry */ } else { *ptd = 0; /* remove page table */ free(pte, M_TEMP); /* ... and free it */ } /* * XXX only needs to be invlpg(0) but that doesn't work on the 386 */ pmap_invalidate_all(kernel_pmap); return (i); } #ifdef DEV_ISA /* * PnP BIOS interface; enumerate devices only known to the system * BIOS and save information about them for later use. */ struct pnp_sysdev { u_int16_t size; u_int8_t handle; u_int32_t devid; u_int8_t type[3]; u_int16_t attrib; #define PNPATTR_NODISABLE (1<<0) /* can't be disabled */ #define PNPATTR_NOCONFIG (1<<1) /* can't be configured */ #define PNPATTR_OUTPUT (1<<2) /* can be primary output */ #define PNPATTR_INPUT (1<<3) /* can be primary input */ #define PNPATTR_BOOTABLE (1<<4) /* can be booted from */ #define PNPATTR_DOCK (1<<5) /* is a docking station */ #define PNPATTR_REMOVEABLE (1<<6) /* device is removeable */ #define PNPATTR_CONFIG_STATIC (0) #define PNPATTR_CONFIG_DYNAMIC (1) #define PNPATTR_CONFIG_DYNONLY (3) #define PNPATTR_CONFIG(a) (((a) >> 7) & 0x3) /* device-specific data comes here */ u_int8_t devdata[0]; } __packed; /* We have to cluster arguments within a 64k range for the bios16 call */ struct pnp_sysdevargs { u_int16_t next; struct pnp_sysdev node; }; /* * This function is called after the bus has assigned resource * locations for a logical device. */ static void pnpbios_set_config(void *arg, struct isa_config *config, int enable) { } /* * Quiz the PnP BIOS, build a list of PNP IDs and resource data. */ static void pnpbios_identify(driver_t *driver, device_t parent) { struct PnPBIOS_table *pt = PnPBIOStable; struct bios_args args; struct pnp_sysdev *pd; struct pnp_sysdevargs *pda; u_int16_t ndevs, bigdev; int error, currdev; u_int8_t *devnodebuf, tag; u_int32_t *devid, *compid; int idx, left; device_t dev; /* no PnP BIOS information */ if (pt == NULL) return; /* ACPI already active */ if (devclass_get_softc(devclass_find("ACPI"), 0) != NULL) return; /* get count of PnP devices */ bzero(&args, sizeof(args)); args.seg.code16.base = BIOS_PADDRTOVADDR(pt->pmentrybase); args.seg.code16.limit = 0xffff; /* XXX ? */ args.seg.data.base = BIOS_PADDRTOVADDR(pt->pmdataseg); args.seg.data.limit = 0xffff; args.entry = pt->pmentryoffset; if ((error = bios16(&args, PNP_COUNT_DEVNODES, &ndevs, &bigdev)) || (args.r.eax & 0xff)) printf("pnpbios: error %d/%x getting device count/size limit\n", error, args.r.eax); ndevs &= 0xff; /* clear high byte garbage */ if (bootverbose) printf("pnpbios: %d devices, largest %d bytes\n", ndevs, bigdev); devnodebuf = malloc(bigdev + (sizeof(struct pnp_sysdevargs) - sizeof(struct pnp_sysdev)), M_DEVBUF, M_NOWAIT); pda = (struct pnp_sysdevargs *)devnodebuf; pd = &pda->node; for (currdev = 0, left = ndevs; (currdev != 0xff) && (left > 0); left--) { bzero(pd, bigdev); pda->next = currdev; /* get current configuration */ if ((error = bios16(&args, PNP_GET_DEVNODE, &pda->next, &pda->node, 1))) { printf("pnpbios: error %d making BIOS16 call\n", error); break; } if ((error = (args.r.eax & 0xff))) { if (bootverbose) printf("pnpbios: %s 0x%x fetching node %d\n", error & 0x80 ? "error" : "warning", error, currdev); if (error & 0x80) break; } currdev = pda->next; if (pd->size < sizeof(struct pnp_sysdev)) { printf("pnpbios: bogus system node data, aborting scan\n"); break; } /* * If we are in APIC_IO mode, we should ignore the ISA PIC if it * shows up. Likewise, in !APIC_IO mode, we should ignore the * APIC (less important). * This is significant because the ISA PIC will claim IRQ 2 (which * it uses for chaining), while in APIC mode this is a valid IRQ * available for general use. */ #ifdef APIC_IO if (!strcmp(pnp_eisaformat(pd->devid), "PNP0000")) /* ISA PIC */ continue; #else if (!strcmp(pnp_eisaformat(pd->devid), "PNP0003")) /* APIC */ continue; #endif /* Add the device and parse its resources */ dev = BUS_ADD_CHILD(parent, ISA_ORDER_PNP, NULL, -1); isa_set_vendorid(dev, pd->devid); isa_set_logicalid(dev, pd->devid); /* * It appears that some PnP BIOS doesn't allow us to re-enable * the embedded system device once it is disabled. We shall * mark all system device nodes as "cannot be disabled", regardless * of actual settings in the device attribute byte. * XXX isa_set_configattr(dev, ((pd->attrib & PNPATTR_NODISABLE) ? 0 : ISACFGATTR_CANDISABLE) | ((!(pd->attrib & PNPATTR_NOCONFIG) && PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC) ? ISACFGATTR_DYNAMIC : 0)); */ isa_set_configattr(dev, (!(pd->attrib & PNPATTR_NOCONFIG) && PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC) ? ISACFGATTR_DYNAMIC : 0); ISA_SET_CONFIG_CALLBACK(parent, dev, pnpbios_set_config, 0); pnp_parse_resources(dev, &pd->devdata[0], pd->size - sizeof(struct pnp_sysdev), 0); if (!device_get_desc(dev)) device_set_desc_copy(dev, pnp_eisaformat(pd->devid)); /* Find device IDs */ devid = &pd->devid; compid = NULL; /* look for a compatible device ID too */ left = pd->size - sizeof(struct pnp_sysdev); idx = 0; while (idx < left) { tag = pd->devdata[idx++]; if (PNP_RES_TYPE(tag) == 0) { /* Small resource */ switch (PNP_SRES_NUM(tag)) { case PNP_TAG_COMPAT_DEVICE: compid = (u_int32_t *)(pd->devdata + idx); if (bootverbose) printf("pnpbios: node %d compat ID 0x%08x\n", pd->handle, *compid); /* FALLTHROUGH */ case PNP_TAG_END: idx = left; break; default: idx += PNP_SRES_LEN(tag); break; } } else /* Large resource, skip it */ idx += *(u_int16_t *)(pd->devdata + idx) + 2; } if (bootverbose) { printf("pnpbios: handle %d device ID %s (%08x)", pd->handle, pnp_eisaformat(*devid), *devid); if (compid != NULL) printf(" compat ID %s (%08x)", pnp_eisaformat(*compid), *compid); printf("\n"); } } } static device_method_t pnpbios_methods[] = { /* Device interface */ DEVMETHOD(device_identify, pnpbios_identify), { 0, 0 } }; static driver_t pnpbios_driver = { "pnpbios", pnpbios_methods, 1, /* no softc */ }; static devclass_t pnpbios_devclass; DRIVER_MODULE(pnpbios, isa, pnpbios_driver, pnpbios_devclass, 0, 0); #endif /* DEV_ISA */ Index: head/sys/i386/i386/locore.s =================================================================== --- head/sys/i386/i386/locore.s (revision 112840) +++ head/sys/i386/i386/locore.s (revision 112841) @@ -1,892 +1,927 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 * $FreeBSD$ * * originally from: locore.s, by William F. Jolitz * * Substantially rewritten by David Greenman, Rod Grimes, * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp * and many others. */ #include "opt_bootp.h" #include "opt_compat.h" #include "opt_nfsroot.h" #include #include #include #include #include #include #include #include "assym.s" /* * XXX * * Note: This version greatly munged to avoid various assembler errors * that may be fixed in newer versions of gas. Perhaps newer versions * will have more pleasant appearance. */ /* * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). * * NOTE: PTDpde, PTmap, and PTD are being defined as address symbols. * In C you access them directly, and not with a '*'. Storage is not being * allocated. They will magically address the correct locations in KVM * which C will treat as normal variables of the type they are defined in * machine/pmap.h, i.e. PTDpde = XX ; to set a PDE entry, NOT *PTDpde = XX; */ .globl PTmap,PTD,PTDpde .set PTmap,(PTDPTDI << PDRSHIFT) .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) .set PTDpde,PTD + (PTDPTDI * PDESIZE) /* * APTmap, APTD is the alternate recursive pagemap. * It's used when modifying another process's page tables. * See the note above. It is true here as well. */ .globl APTmap,APTD,APTDpde .set APTmap,APTDPTDI << PDRSHIFT .set APTD,APTmap + (APTDPTDI * PAGE_SIZE) .set APTDpde,PTD + (APTDPTDI * PDESIZE) #ifdef SMP /* * Define layout of per-cpu address space. * This is "constructed" in locore.s on the BSP and in mp_machdep.c * for each AP. DO NOT REORDER THESE WITHOUT UPDATING THE REST! */ .globl SMP_prvspace, lapic .set SMP_prvspace,(MPPTDI << PDRSHIFT) .set lapic,SMP_prvspace + (NPTEPG-1) * PAGE_SIZE #endif /* SMP */ /* * Compiled KERNBASE location */ .globl kernbase .set kernbase,KERNBASE /* * Globals */ .data ALIGN_DATA /* just to be sure */ .globl HIDENAME(tmpstk) .space 0x2000 /* space for tmpstk - temporary stack */ HIDENAME(tmpstk): .globl bootinfo bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ .globl KERNend KERNend: .long 0 /* phys addr end of kernel (just after bss) */ physfree: .long 0 /* phys addr of next free page */ #ifdef SMP .globl cpu0prvpage cpu0pp: .long 0 /* phys addr cpu0 private pg */ cpu0prvpage: .long 0 /* relocated version */ .globl SMPpt SMPptpa: .long 0 /* phys addr SMP page table */ SMPpt: .long 0 /* relocated version */ #endif /* SMP */ .globl IdlePTD IdlePTD: .long 0 /* phys addr of kernel PTD */ +#ifdef PAE + .globl IdlePDPT +IdlePDPT: .long 0 /* phys addr of kernel PDPT */ +#endif + #ifdef SMP .globl KPTphys #endif KPTphys: .long 0 /* phys addr of kernel page tables */ .globl proc0uarea, proc0kstack proc0uarea: .long 0 /* address of proc 0 uarea space */ proc0kstack: .long 0 /* address of proc 0 kstack space */ p0upa: .long 0 /* phys addr of proc0's UAREA */ p0kpa: .long 0 /* phys addr of proc0's STACK */ vm86phystk: .long 0 /* PA of vm86/bios stack */ .globl vm86paddr, vm86pa vm86paddr: .long 0 /* address of vm86 region */ vm86pa: .long 0 /* phys addr of vm86 region */ #ifdef PC98 .globl pc98_system_parameter pc98_system_parameter: .space 0x240 #endif /********************************************************************** * * Some handy macros * */ #define R(foo) ((foo)-KERNBASE) #define ALLOCPAGES(foo) \ movl R(physfree), %esi ; \ movl $((foo)*PAGE_SIZE), %eax ; \ addl %esi, %eax ; \ movl %eax, R(physfree) ; \ movl %esi, %edi ; \ movl $((foo)*PAGE_SIZE),%ecx ; \ xorl %eax,%eax ; \ cld ; \ rep ; \ stosb /* * fillkpt * eax = page frame address * ebx = index into page table * ecx = how many pages to map * base = base address of page dir/table * prot = protection bits */ #define fillkpt(base, prot) \ shll $PTESHIFT,%ebx ; \ addl base,%ebx ; \ orl $PG_V,%eax ; \ orl prot,%eax ; \ 1: movl %eax,(%ebx) ; \ addl $PAGE_SIZE,%eax ; /* increment physical address */ \ addl $PTESIZE,%ebx ; /* next pte */ \ loop 1b /* * fillkptphys(prot) * eax = physical address * ecx = how many pages to map * prot = protection bits */ #define fillkptphys(prot) \ movl %eax, %ebx ; \ shrl $PAGE_SHIFT, %ebx ; \ fillkpt(R(KPTphys), prot) .text /********************************************************************** * * This is where the bootblocks start us, set the ball rolling... * */ NON_GPROF_ENTRY(btext) #ifdef PC98 /* save SYSTEM PARAMETER for resume (NS/T or other) */ movl $0xa1400,%esi movl $R(pc98_system_parameter),%edi movl $0x0240,%ecx cld rep movsb #else /* IBM-PC */ /* Tell the bios to warmboot next time */ movw $0x1234,0x472 #endif /* PC98 */ /* Set up a real frame in case the double return in newboot is executed. */ pushl %ebp movl %esp, %ebp /* Don't trust what the BIOS gives for eflags. */ pushl $PSL_KERNEL popfl /* * Don't trust what the BIOS gives for %fs and %gs. Trust the bootstrap * to set %cs, %ds, %es and %ss. */ mov %ds, %ax mov %ax, %fs mov %ax, %gs call recover_bootinfo /* Get onto a stack that we can trust. */ /* * XXX this step is delayed in case recover_bootinfo needs to return via * the old stack, but it need not be, since recover_bootinfo actually * returns via the old frame. */ movl $R(HIDENAME(tmpstk)),%esp #ifdef PC98 /* pc98_machine_type & M_EPSON_PC98 */ testb $0x02,R(pc98_system_parameter)+220 jz 3f /* epson_machine_id <= 0x0b */ cmpb $0x0b,R(pc98_system_parameter)+224 ja 3f /* count up memory */ movl $0x100000,%eax /* next, talley remaining memory */ movl $0xFFF-0x100,%ecx 1: movl 0(%eax),%ebx /* save location to check */ movl $0xa55a5aa5,0(%eax) /* write test pattern */ cmpl $0xa55a5aa5,0(%eax) /* does not check yet for rollover */ jne 2f movl %ebx,0(%eax) /* restore memory */ addl $PAGE_SIZE,%eax loop 1b 2: subl $0x100000,%eax shrl $17,%eax movb %al,R(pc98_system_parameter)+1 3: movw R(pc98_system_parameter+0x86),%ax movw %ax,R(cpu_id) #endif call identify_cpu /* clear bss */ /* * XXX this should be done a little earlier. * * XXX we don't check that there is memory for our bss and page tables * before using it. * * XXX the boot program somewhat bogusly clears the bss. We still have * to do it in case we were unzipped by kzipboot. Then the boot program * only clears kzipboot's bss. * * XXX the gdt and idt are still somewhere in the boot program. We * depend on the convention that the boot program is below 1MB and we * are above 1MB to keep the gdt and idt away from the bss and page * tables. */ movl $R(end),%ecx movl $R(edata),%edi subl %edi,%ecx xorl %eax,%eax cld rep stosb call create_pagetables /* * If the CPU has support for VME, turn it on. */ testl $CPUID_VME, R(cpu_feature) jz 1f movl %cr4, %eax orl $CR4_VME, %eax movl %eax, %cr4 1: /* Now enable paging */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else movl R(IdlePTD), %eax movl %eax,%cr3 /* load ptd addr into mmu */ +#endif movl %cr0,%eax /* get control word */ orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* and let's page NOW! */ pushl $begin /* jump to high virtualized address */ ret /* now running relocated at KERNBASE where the system is linked to run */ begin: /* set up bootstrap stack */ movl proc0kstack,%eax /* location of in-kernel stack */ /* bootstrap stack end location */ leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp xorl %ebp,%ebp /* mark end of frames */ +#ifdef PAE + movl IdlePDPT,%esi +#else movl IdlePTD,%esi +#endif movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ /* * Clean up the stack in a way that db_numargs() understands, so * that backtraces in ddb don't underrun the stack. Traps for * inaccessible memory are more fatal than usual this early. */ addl $4,%esp call mi_startup /* autoconfiguration, mountroot etc */ /* NOTREACHED */ addl $0,%esp /* for db_numargs() again */ /* * Signal trampoline, copied to top of user stack */ NON_GPROF_ENTRY(sigcode) calll *SIGF_HANDLER(%esp) leal SIGF_UC(%esp),%eax /* get ucontext */ pushl %eax testl $PSL_VM,UC_EFLAGS(%eax) jne 1f movl UC_GS(%eax),%gs /* restore %gs */ 1: movl $SYS_sigreturn,%eax pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ /* on stack */ 1: jmp 1b #ifdef COMPAT_FREEBSD4 ALIGN_TEXT freebsd4_sigcode: calll *SIGF_HANDLER(%esp) leal SIGF_UC4(%esp),%eax /* get ucontext */ pushl %eax testl $PSL_VM,UC4_EFLAGS(%eax) jne 1f movl UC4_GS(%eax),%gs /* restore %gs */ 1: movl $344,%eax /* 4.x SYS_sigreturn */ pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ /* on stack */ 1: jmp 1b #endif #ifdef COMPAT_43 ALIGN_TEXT osigcode: call *SIGF_HANDLER(%esp) /* call signal handler */ lea SIGF_SC(%esp),%eax /* get sigcontext */ pushl %eax testl $PSL_VM,SC_PS(%eax) jne 9f movl SC_GS(%eax),%gs /* restore %gs */ 9: movl $103,%eax /* 3.x SYS_sigreturn */ pushl %eax /* junk to fake return addr. */ int $0x80 /* enter kernel with args */ 0: jmp 0b #endif /* COMPAT_43 */ ALIGN_TEXT esigcode: .data .globl szsigcode szsigcode: .long esigcode-sigcode #ifdef COMPAT_FREEBSD4 .globl szfreebsd4_sigcode szfreebsd4_sigcode: .long esigcode-freebsd4_sigcode #endif #ifdef COMPAT_43 .globl szosigcode szosigcode: .long esigcode-osigcode #endif .text /********************************************************************** * * Recover the bootinfo passed to us from the boot program * */ recover_bootinfo: /* * This code is called in different ways depending on what loaded * and started the kernel. This is used to detect how we get the * arguments from the other code and what we do with them. * * Old disk boot blocks: * (*btext)(howto, bootdev, cyloffset, esym); * [return address == 0, and can NOT be returned to] * [cyloffset was not supported by the FreeBSD boot code * and always passed in as 0] * [esym is also known as total in the boot code, and * was never properly supported by the FreeBSD boot code] * * Old diskless netboot code: * (*btext)(0,0,0,0,&nfsdiskless,0,0,0); * [return address != 0, and can NOT be returned to] * If we are being booted by this code it will NOT work, * so we are just going to halt if we find this case. * * New uniform boot code: * (*btext)(howto, bootdev, 0, 0, 0, &bootinfo) * [return address != 0, and can be returned to] * * There may seem to be a lot of wasted arguments in here, but * that is so the newer boot code can still load very old kernels * and old boot code can load new kernels. */ /* * The old style disk boot blocks fake a frame on the stack and * did an lret to get here. The frame on the stack has a return * address of 0. */ cmpl $0,4(%ebp) je olddiskboot /* * We have some form of return address, so this is either the * old diskless netboot code, or the new uniform code. That can * be detected by looking at the 5th argument, if it is 0 * we are being booted by the new uniform boot code. */ cmpl $0,24(%ebp) je newboot /* * Seems we have been loaded by the old diskless boot code, we * don't stand a chance of running as the diskless structure * changed considerably between the two, so just halt. */ hlt /* * We have been loaded by the new uniform boot code. * Let's check the bootinfo version, and if we do not understand * it we return to the loader with a status of 1 to indicate this error */ newboot: movl 28(%ebp),%ebx /* &bootinfo.version */ movl BI_VERSION(%ebx),%eax cmpl $1,%eax /* We only understand version 1 */ je 1f movl $1,%eax /* Return status */ leave /* * XXX this returns to our caller's caller (as is required) since * we didn't set up a frame and our caller did. */ ret 1: /* * If we have a kernelname copy it in */ movl BI_KERNELNAME(%ebx),%esi cmpl $0,%esi je 2f /* No kernelname */ movl $MAXPATHLEN,%ecx /* Brute force!!! */ movl $R(kernelname),%edi cmpb $'/',(%esi) /* Make sure it starts with a slash */ je 1f movb $'/',(%edi) incl %edi decl %ecx 1: cld rep movsb 2: /* * Determine the size of the boot loader's copy of the bootinfo * struct. This is impossible to do properly because old versions * of the struct don't contain a size field and there are 2 old * versions with the same version number. */ movl $BI_ENDCOMMON,%ecx /* prepare for sizeless version */ testl $RB_BOOTINFO,8(%ebp) /* bi_size (and bootinfo) valid? */ je got_bi_size /* no, sizeless version */ movl BI_SIZE(%ebx),%ecx got_bi_size: /* * Copy the common part of the bootinfo struct */ movl %ebx,%esi movl $R(bootinfo),%edi cmpl $BOOTINFO_SIZE,%ecx jbe got_common_bi_size movl $BOOTINFO_SIZE,%ecx got_common_bi_size: cld rep movsb #ifdef NFS_ROOT #ifndef BOOTP_NFSV3 /* * If we have a nfs_diskless structure copy it in */ movl BI_NFS_DISKLESS(%ebx),%esi cmpl $0,%esi je olddiskboot movl $R(nfs_diskless),%edi movl $NFSDISKLESS_SIZE,%ecx cld rep movsb movl $R(nfs_diskless_valid),%edi movl $1,(%edi) #endif #endif /* * The old style disk boot. * (*btext)(howto, bootdev, cyloffset, esym); * Note that the newer boot code just falls into here to pick * up howto and bootdev, cyloffset and esym are no longer used */ olddiskboot: movl 8(%ebp),%eax movl %eax,R(boothowto) movl 12(%ebp),%eax movl %eax,R(bootdev) ret /********************************************************************** * * Identify the CPU and initialize anything special about it * */ identify_cpu: /* Try to toggle alignment check flag; does not exist on 386. */ pushfl popl %eax movl %eax,%ecx orl $PSL_AC,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax andl $PSL_AC,%eax pushl %ecx popfl testl %eax,%eax jnz try486 /* NexGen CPU does not have aligment check flag. */ pushfl movl $0x5555, %eax xorl %edx, %edx movl $2, %ecx clc divl %ecx jz trynexgen popfl movl $CPU_386,R(cpu) jmp 3f trynexgen: popfl movl $CPU_NX586,R(cpu) movl $0x4778654e,R(cpu_vendor) # store vendor string movl $0x72446e65,R(cpu_vendor+4) movl $0x6e657669,R(cpu_vendor+8) movl $0,R(cpu_vendor+12) jmp 3f try486: /* Try to toggle identification flag; does not exist on early 486s. */ pushfl popl %eax movl %eax,%ecx xorl $PSL_ID,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax andl $PSL_ID,%eax pushl %ecx popfl testl %eax,%eax jnz trycpuid movl $CPU_486,R(cpu) /* * Check Cyrix CPU * Cyrix CPUs do not change the undefined flags following * execution of the divide instruction which divides 5 by 2. * * Note: CPUID is enabled on M2, so it passes another way. */ pushfl movl $0x5555, %eax xorl %edx, %edx movl $2, %ecx clc divl %ecx jnc trycyrix popfl jmp 3f /* You may use Intel CPU. */ trycyrix: popfl /* * IBM Bluelighting CPU also doesn't change the undefined flags. * Because IBM doesn't disclose the information for Bluelighting * CPU, we couldn't distinguish it from Cyrix's (including IBM * brand of Cyrix CPUs). */ movl $0x69727943,R(cpu_vendor) # store vendor string movl $0x736e4978,R(cpu_vendor+4) movl $0x64616574,R(cpu_vendor+8) jmp 3f trycpuid: /* Use the `cpuid' instruction. */ xorl %eax,%eax cpuid # cpuid 0 movl %eax,R(cpu_high) # highest capability movl %ebx,R(cpu_vendor) # store vendor string movl %edx,R(cpu_vendor+4) movl %ecx,R(cpu_vendor+8) movb $0,R(cpu_vendor+12) movl $1,%eax cpuid # cpuid 1 movl %eax,R(cpu_id) # store cpu_id movl %ebx,R(cpu_procinfo) # store cpu_procinfo movl %edx,R(cpu_feature) # store cpu_feature rorl $8,%eax # extract family type andl $15,%eax cmpl $5,%eax jae 1f /* less than Pentium; must be 486 */ movl $CPU_486,R(cpu) jmp 3f 1: /* a Pentium? */ cmpl $5,%eax jne 2f movl $CPU_586,R(cpu) jmp 3f 2: /* Greater than Pentium...call it a Pentium Pro */ movl $CPU_686,R(cpu) 3: ret /********************************************************************** * * Create the first page directory and its page tables. * */ create_pagetables: /* Find end of kernel image (rounded up to a page boundary). */ movl $R(_end),%esi /* Include symbols, if any. */ movl R(bootinfo+BI_ESYMTAB),%edi testl %edi,%edi je over_symalloc movl %edi,%esi movl $KERNBASE,%edi addl %edi,R(bootinfo+BI_SYMTAB) addl %edi,R(bootinfo+BI_ESYMTAB) over_symalloc: /* If we are told where the end of the kernel space is, believe it. */ movl R(bootinfo+BI_KERNEND),%edi testl %edi,%edi je no_kernend movl %edi,%esi no_kernend: addl $PAGE_MASK,%esi andl $~PAGE_MASK,%esi movl %esi,R(KERNend) /* save end of kernel */ movl %esi,R(physfree) /* next free page is at end of kernel */ /* Allocate Kernel Page Tables */ ALLOCPAGES(NKPT) movl %esi,R(KPTphys) /* Allocate Page Table Directory */ +#ifdef PAE + /* XXX only need 32 bytes (easier for now) */ + ALLOCPAGES(1) + movl %esi,R(IdlePDPT) +#endif ALLOCPAGES(NPGPTD) movl %esi,R(IdlePTD) /* Allocate UPAGES */ ALLOCPAGES(UAREA_PAGES) movl %esi,R(p0upa) addl $KERNBASE, %esi movl %esi, R(proc0uarea) ALLOCPAGES(KSTACK_PAGES) movl %esi,R(p0kpa) addl $KERNBASE, %esi movl %esi, R(proc0kstack) ALLOCPAGES(1) /* vm86/bios stack */ movl %esi,R(vm86phystk) ALLOCPAGES(3) /* pgtable + ext + IOPAGES */ movl %esi,R(vm86pa) addl $KERNBASE, %esi movl %esi, R(vm86paddr) #ifdef SMP /* Allocate cpu0's private data page */ ALLOCPAGES(1) movl %esi,R(cpu0pp) addl $KERNBASE, %esi movl %esi, R(cpu0prvpage) /* relocated to KVM space */ /* Allocate SMP page table page */ ALLOCPAGES(1) movl %esi,R(SMPptpa) addl $KERNBASE, %esi movl %esi, R(SMPpt) /* relocated to KVM space */ #endif /* SMP */ /* Map read-only from zero to the end of the kernel text section */ xorl %eax, %eax xorl %edx,%edx movl $R(etext),%ecx addl $PAGE_MASK,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) /* Map read-write, data, bss and symbols */ movl $R(etext),%eax addl $PAGE_MASK, %eax andl $~PAGE_MASK, %eax movl $PG_RW,%edx movl R(KERNend),%ecx subl %eax,%ecx shrl $PAGE_SHIFT,%ecx fillkptphys(%edx) /* Map page directory. */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl $1, %ecx + fillkptphys($PG_RW) +#endif + movl R(IdlePTD), %eax movl $NPGPTD, %ecx fillkptphys($PG_RW) /* Map proc0's UPAGES in the physical way ... */ movl R(p0upa), %eax movl $(UAREA_PAGES), %ecx fillkptphys($PG_RW) /* Map proc0's KSTACK in the physical way ... */ movl R(p0kpa), %eax movl $(KSTACK_PAGES), %ecx fillkptphys($PG_RW) /* Map ISA hole */ movl $ISA_HOLE_START, %eax movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx fillkptphys($PG_RW) /* Map space for the vm86 region */ movl R(vm86phystk), %eax movl $4, %ecx fillkptphys($PG_RW) /* Map page 0 into the vm86 page table */ movl $0, %eax movl $0, %ebx movl $1, %ecx fillkpt(R(vm86pa), $PG_RW|PG_U) /* ...likewise for the ISA hole */ movl $ISA_HOLE_START, %eax movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx fillkpt(R(vm86pa), $PG_RW|PG_U) #ifdef SMP /* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */ movl R(cpu0pp), %eax movl $1, %ecx fillkptphys($PG_RW) /* Map SMP page table page into global kmem FWIW */ movl R(SMPptpa), %eax movl $1, %ecx fillkptphys($PG_RW) /* Map the private page into the SMP page table */ movl R(cpu0pp), %eax movl $0, %ebx /* pte offset = 0 */ movl $1, %ecx /* one private page coming right up */ fillkpt(R(SMPptpa), $PG_RW) /* ... and put the page table table in the pde. */ movl R(SMPptpa), %eax movl $MPPTDI, %ebx movl $1, %ecx fillkpt(R(IdlePTD), $PG_RW) /* Fakeup VA for the local apic to allow early traps. */ ALLOCPAGES(1) movl %esi, %eax movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */ movl $1, %ecx /* one private pt coming right up */ fillkpt(R(SMPptpa), $PG_RW) #endif /* SMP */ /* install a pde for temporary double map of bottom of VA */ movl R(KPTphys), %eax xorl %ebx, %ebx movl $NKPT, %ecx fillkpt(R(IdlePTD), $PG_RW) /* install pde's for pt's */ movl R(KPTphys), %eax movl $KPTDI, %ebx movl $NKPT, %ecx fillkpt(R(IdlePTD), $PG_RW) /* install a pde recursively mapping page directory as a page table */ movl R(IdlePTD), %eax movl $PTDPTDI, %ebx movl $NPGPTD,%ecx fillkpt(R(IdlePTD), $PG_RW) + +#ifdef PAE + movl R(IdlePTD), %eax + xorl %ebx, %ebx + movl $NPGPTD, %ecx + fillkpt(R(IdlePDPT), $0x0) +#endif ret Index: head/sys/i386/i386/machdep.c =================================================================== --- head/sys/i386/i386/machdep.c (revision 112840) +++ head/sys/i386/i386/machdep.c (revision 112841) @@ -1,2731 +1,2741 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * $FreeBSD$ */ #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* pcb.h included via sys/user.h */ #include #ifdef PERFMON #include #endif #ifdef SMP #include #include #endif #include #include #include #include #include #include extern void init386(int first); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) #if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif #if defined(CPU_DISABLE_SSE) #undef CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) int _udatasel, _ucodesel; u_int atdevbase; #if defined(SWTCH_OPTIM_STATS) extern int swtch_optim_stats; SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, CTLFLAG_RD, &swtch_optim_stats, 0, ""); SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, CTLFLAG_RD, &tlb_flush_count, 0, ""); #endif int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code); #endif long Maxmem = 0; vm_paddr_t phys_avail[10]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; #ifndef SMP static struct pcpu __pcpu; #endif struct mtx icu_lock; static void cpu_startup(dummy) void *dummy; { /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), ptoa((uintmax_t)Maxmem) / 1048576); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); #ifndef SMP /* For SMP, we delay the cpu_setregs() until after SMP startup. */ cpu_setregs(); #endif } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) p->p_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; PROC_UNLOCK(p); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; PROC_LOCK(p); if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_eflags &= ~PSL_T; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)(p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) p->p_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; PROC_UNLOCK(p); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; PROC_LOCK(p); if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; regs->tf_eflags &= ~PSL_T; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); } #endif /* COMPAT_FREEBSD4 */ void sendsig(catcher, sig, mask, code) sig_t catcher; int sig; sigset_t *mask; u_long code; { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, sig, mask, code); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, sig, mask, code); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = p->p_sigstk; sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = p->p_sigstk.ss_sp + p->p_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) p->p_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); PROC_UNLOCK(p); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; PROC_LOCK(p); if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; sf.sf_si.si_addr = (void *)regs->tf_err; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = code; sf.sf_addr = regs->tf_err; sf.sf_ahu.sf_handler = catcher; } PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_eflags &= ~PSL_T; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; struct proc *p = td->td_proc; int eflags, error; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; PROC_LOCK(p); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) if (scp->sc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; #endif SIGSETOLD(p->p_sigmask, scp->sc_mask); SIG_CANTMASK(p->p_sigmask); signotify(p); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct proc *p = td->td_proc; struct trapframe *regs; const struct ucontext4 *ucp; int cs, eflags, error; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("freebsd4_sigreturn: cs = 0x%x\n", cs); trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) if (ucp->uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; #endif p->p_sigmask = ucp->uc_sigmask; SIG_CANTMASK(p->p_sigmask); signotify(p); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; int cs, eflags, error, ret; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) trapsignal(p, SIGBUS, 0); if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); trapsignal(p, SIGBUS, T_PROTFLT); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) if (ucp->uc_mcontext.mc_onstack & 1) p->p_sigstk.ss_flags |= SS_ONSTACK; else p->p_sigstk.ss_flags &= ~SS_ONSTACK; #endif p->p_sigmask = ucp->uc_sigmask; SIG_CANTMASK(p->p_sigmask); signotify(p); PROC_UNLOCK(p); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Hook to idle the CPU when possible. In the SMP case we default to * off because a halted cpu will not currently pick up a new thread in the * run queue until the next timer tick. If turned on this will result in * approximately a 4.2% loss in real time performance in buildworld tests * (but improves user and sys times oddly enough), and saves approximately * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). * * XXX we need to have a cpu mask of idle cpus and generate an IPI or * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. * Then we can have our cake and eat it too. * * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ static int cpu_idle_hlt = 1; SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); /* * Note that we have to be careful here to avoid a race between checking * sched_runnable() and actually halting. If we don't do this, we may waste * the time between calling hlt and the next interrupt even though there * is a runnable process. */ void cpu_idle(void) { #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif if (cpu_idle_hlt) { disable_intr(); if (sched_runnable()) { enable_intr(); } else { /* * we must absolutely guarentee that hlt is the * absolute next instruction after sti or we * introduce a timing window. */ __asm __volatile("sti; hlt"); } } } /* * Clear registers on exec */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == PCPU_GET(curpcb)) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ td->td_pcb->pcb_flags &= ~FP_SOFTFP; /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an * emulator). This is mainly to handle the case where npx0 is not * configured, since the npx routines normally set up the trap * otherwise. It should be done only at boot time, but doing it * here allows modifying `npx_exists' for testing the emulator on * systems with an npx. */ load_cr0(rcr0() | CR0_MP | CR0_TS); /* Initialize the npx (if any) for the current process. */ /* * XXX the above load_cr0() also initializes it and is a layering * violation if NPX is configured. It drops the npx partially * and this would be fatal if we were interrupted now, and decided * to force the state to the pcb, and checked the invariant * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL). * ALL of this can happen except the check. The check used to * happen and be fatal later when we didn't complete the drop * before returning to user mode. This should be fixed properly * soon. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); #ifdef SMP cr0 |= CR0_NE; /* Done by npxinit() */ #endif cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */ #ifndef I386_CPU cr0 |= CR0_WP | CR0_AM; #endif load_cr0(cr0); load_gs(_udatasel); } static int sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, ""); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); u_long bootdev; /* not a dev_t - encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in dev_t format)"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ #ifdef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif int private_tss; /* flag indicating private tss */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0uarea; extern vm_offset_t proc0kstack; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 4 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 5 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 6 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GTGATE_SEL 7 Null Descriptor - Placeholder */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 9 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #define PHYSMAP_SIZE (2 * 8) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int i, physmap_idx, pa_indx; int hasbrokenint12; u_int basemem, extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; char *cp; struct bios_smap *smap; hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); bzero(&vmf, sizeof(struct vm86frame)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Some newer BIOSes has broken INT 12H implementation which cause * kernel panic immediately. In this case, we need to scan SMAP * with INT 15:E820 first, then determine base memory size. */ if (hasbrokenint12) { goto int15e820; } /* * Perform "base memory" related probes & setup */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * if basemem != 640, map pages r/w into vm86 page table so * that the bios can scribble on it. */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; int15e820: /* * map page 1 R/W into the kernel page table so we can use it * as a buffer. The kernel will unmap this page later. */ pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); /* * get memory map with INT 15:E820 */ vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); physmap_idx = 0; vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != 0x01) goto next_run; if (smap->length == 0) goto next_run; +#ifndef PAE if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); goto next_run; } +#endif for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-montonic memory region, ignoring second region\n"); goto next_run; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; goto next_run; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; next_run: ; } while (vmf.vmf_ebx != 0); /* * Perform "base memory" related probes & setup based on SMAP */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } if (basemem == 0) { basemem = 640; } if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; } if (physmap[1] != 0) goto physmap_done; /* * If we failed above, try memory map with INT 15:E801 */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else /* * Prefer the RTC value for extended memory. */ extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); /* look for the MP hardware - needed for apic addresses */ i386_mp_probe(); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif /* * hw.physmem is a size in bytes; we also allow k, m, and g suffixes * for the appropriate modifiers. This overrides MAXMEM. */ if ((cp = getenv("hw.physmem")) != NULL) { u_int64_t AllowMem, sanity; char *ep; sanity = AllowMem = strtouq(cp, &ep, 0); if ((ep != cp) && (*ep != 0)) { switch(*ep) { case 'g': case 'G': AllowMem <<= 10; case 'm': case 'M': AllowMem <<= 10; case 'k': case 'K': AllowMem <<= 10; break; default: AllowMem = sanity = 0; } if (AllowMem < sanity) AllowMem = 0; } if (AllowMem == 0) printf("Ignoring invalid memory size of '%s'\n", cp); else Maxmem = atop(AllowMem); freeenv(cp); } if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first, 0); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; pte = CMAP1; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad; int *ptr = (int *)CADDR1; /* * block out kernel memory as not available. */ if (pa >= 0x100000 && pa < first) continue; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) { page_bad = TRUE; } /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) { page_bad = TRUE; } /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) { page_bad = TRUE; } /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) { page_bad = TRUE; } /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) { continue; } /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; break; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); avail_end = phys_avail[pa_indx]; } void init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, off, x; #ifndef SMP /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; #endif struct pcpu *pc; proc0.p_uarea = proc0uarea; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; atdevbase = ISA_HOLE_START + KERNBASE; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup(&proc0, &ksegrp0, &kse0, &thread0); metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* * make gdt memory segments, the code segment goes up to end of the * page with etext in it, the data segment goes to the end of * the address space */ /* * XXX text protection is temporarily (?) disabled. The limit was * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); #ifdef SMP pc = &SMP_prvspace[0].pcpu; gdt_segs[GPRIV_SEL].ssd_limit = atop(sizeof(struct privatespace) - 1); #else pc = &__pcpu; gdt_segs[GPRIV_SEL].ssd_limit = atop(sizeof(struct pcpu) - 1); #endif gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_RECURSE); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); /* make ldt memory segments */ /* * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it * should be spelled ...MAX_USER... */ ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(1, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(3, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA isa_defaultirq(); #endif #ifdef DDB kdb_init(); if (boothowto & RB_KDB) Debugger("Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); private_tss = 0; PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); +#ifdef PAE + dblfault_tss.tss_cr3 = (int)IdlePDPT; +#else dblfault_tss.tss_cr3 = (int)IdlePTD; +#endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); _udatasel = LSEL(LUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ +#ifdef PAE + thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; +#else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; +#endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; #ifndef SMP struct region_descriptor r_idt; #endif vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); r_idt.rd_limit = sizeof(idt0) - 1; tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0) panic("kmem_alloc returned non-page-aligned memory"); /* Put the first seven entries in the lower page */ new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); return; } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; pcb = td->td_pcb; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb = td->td_pcb; pcb->pcb_gs = regs->r_gs; return (0); } #ifdef CPU_ENABLE_SSE static void fill_fpregs_xmm(sv_xmm, sv_87) struct savexmm *sv_xmm; struct save87 *sv_87; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; bzero(sv_87, sizeof(*sv_87)); /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_tw = penv_xmm->en_tw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; } static void set_fpregs_xmm(sv_87, sv_xmm) struct save87 *sv_87; struct savexmm *sv_xmm; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_tw = penv_87->en_tw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; } #endif /* CPU_ENABLE_SSE */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, &td->td_pcb->pcb_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tp; tp = td->td_frame; mcp->mc_onstack = sigonstack(tp->tf_esp); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_ebx = tp->tf_ebx; mcp->mc_edx = tp->tf_edx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eax = tp->tf_eax; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_eflags = tp->tf_eflags; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if ((ret = set_fpcontext(td, mcp)) == 0) { tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; ret = 0; } return (ret); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; #else union savefpu *addr; /* * XXX mc_fpstate might be misaligned, since its declaration is not * unportabilized using __attribute__((aligned(16))) like the * declaration of struct savemm, and anyway, alignment doesn't work * for auto variables since we don't use gcc's pessimal stack * alignment. Work around this by abusing the spare fields after * mcp->mc_fpstate. * * XXX unpessimize most cases by only aligning when fxsave might be * called, although this requires knowing too much about * npxgetregs()'s internals. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } mcp->mc_ownedfp = npxgetregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); } mcp->mc_fpformat = npxformat(); #endif } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { union savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* XXX align as above. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); } #ifdef DEV_NPX /* * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ npxsetregs(td, addr); #endif /* * Don't bother putting things back where they were in the * misaligned case, since we know that the caller won't use * them again. */ } else return (EINVAL); return (0); } static void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; u_int32_t mask1, mask2; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; i++, mask1 <<= 2, mask2 <<= 2) if ((dbregs->dr[7] & mask1) == mask2) return (EINVAL); pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space, unless, perhaps, we were called by * uid 0. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (suser(td) != 0) { if (dbregs->dr[7] & 0x3) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr[7] & (0x3<<2)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr[7] & (0x3<<4)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (dbregs->dr[7] & (0x3<<6)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i=0; i, and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* DDB */ Index: head/sys/i386/i386/mpboot.s =================================================================== --- head/sys/i386/i386/mpboot.s (revision 112840) +++ head/sys/i386/i386/mpboot.s (revision 112841) @@ -1,272 +1,282 @@ /* * Copyright (c) 1995, Jack F. Vogel * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Jack F. Vogel * 4. The name of the developer may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * mpboot.s: FreeBSD machine support for the Intel MP Spec * multiprocessor systems. * * $FreeBSD$ */ #include /* miscellaneous asm macros */ #include #include #include "assym.s" +#define R(x) ((x)-KERNBASE) + /* * this code MUST be enabled here and in mp_machdep.c * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * #define CHECK_POINTS */ #if defined(CHECK_POINTS) && !defined(PC98) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define CHECKPOINT(A,D) \ movb $(A),%al ; \ outb %al,$CMOS_REG ; \ movb $(D),%al ; \ outb %al,$CMOS_DATA #else #define CHECKPOINT(A,D) #endif /* CHECK_POINTS */ /* * the APs enter here from their trampoline code (bootMP, below) */ .p2align 4 NON_GPROF_ENTRY(MPentry) CHECKPOINT(0x36, 3) /* Now enable paging mode */ - movl IdlePTD-KERNBASE, %eax +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else + movl R(IdlePTD), %eax movl %eax,%cr3 +#endif movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* let the games begin! */ movl bootSTK,%esp /* boot stack end loc. */ pushl $mp_begin /* jump to high mem */ ret /* * Wait for the booting CPU to signal startup */ mp_begin: /* now running relocated at KERNBASE */ CHECKPOINT(0x37, 4) call init_secondary /* load i386 tables */ CHECKPOINT(0x38, 5) /* * If the [BSP] CPU has support for VME, turn it on. */ testl $CPUID_VME, cpu_feature /* XXX WRONG! BSP! */ jz 1f movl %cr4, %eax orl $CR4_VME, %eax movl %eax, %cr4 1: /* disable the APIC, just to be SURE */ movl lapic+LA_SVR, %eax /* get spurious vector reg. */ andl $~APIC_SVR_SWEN, %eax /* clear software enable bit */ movl %eax, lapic+LA_SVR /* signal our startup to the BSP */ movl lapic+LA_VER, %eax /* our version reg contents */ movl %eax, cpu_apic_versions /* into [ 0 ] */ incl mp_ncpus /* signal BSP */ CHECKPOINT(0x39, 6) /* Now, let's prepare for some REAL WORK :-) This doesn't return. */ call ap_init /* * This is the embedded trampoline or bootstrap that is * copied into 'real-mode' low memory, it is where the * secondary processor "wakes up". When it is executed * the processor will eventually jump into the routine * MPentry, which resides in normal kernel text above * 1Meg. -jackv */ .data ALIGN_DATA /* just to be sure */ BOOTMP1: NON_GPROF_ENTRY(bootMP) .code16 cli CHECKPOINT(0x34, 1) /* First guarantee a 'clean slate' */ xorl %eax, %eax movl %eax, %ebx movl %eax, %ecx movl %eax, %edx movl %eax, %esi movl %eax, %edi /* set up data segments */ mov %cs, %ax mov %ax, %ds mov %ax, %es mov %ax, %fs mov %ax, %gs mov %ax, %ss mov $(boot_stk-bootMP), %esp /* Now load the global descriptor table */ lgdt MP_GDTptr-bootMP /* Enable protected mode */ movl %cr0, %eax orl $CR0_PE, %eax movl %eax, %cr0 /* * make intrasegment jump to flush the processor pipeline and * reload CS register */ pushl $0x18 pushl $(protmode-bootMP) lretl .code32 protmode: CHECKPOINT(0x35, 2) /* * we are NOW running for the first time with %eip * having the full physical address, BUT we still * are using a segment descriptor with the origin * not matching the booting kernel. * * SO NOW... for the BIG Jump into kernel's segment * and physical text above 1 Meg. */ mov $0x10, %ebx movw %bx, %ds movw %bx, %es movw %bx, %fs movw %bx, %gs movw %bx, %ss .globl bigJump bigJump: /* this will be modified by mpInstallTramp() */ ljmp $0x08, $0 /* far jmp to MPentry() */ dead: hlt /* We should never get here */ jmp dead /* * MP boot strap Global Descriptor Table */ .p2align 4 .globl MP_GDT .globl bootCodeSeg .globl bootDataSeg MP_GDT: nulldesc: /* offset = 0x0 */ .word 0x0 .word 0x0 .byte 0x0 .byte 0x0 .byte 0x0 .byte 0x0 kernelcode: /* offset = 0x08 */ .word 0xffff /* segment limit 0..15 */ .word 0x0000 /* segment base 0..15 */ .byte 0x0 /* segment base 16..23; set for 0K */ .byte 0x9f /* flags; Type */ .byte 0xcf /* flags; Limit */ .byte 0x0 /* segment base 24..32 */ kerneldata: /* offset = 0x10 */ .word 0xffff /* segment limit 0..15 */ .word 0x0000 /* segment base 0..15 */ .byte 0x0 /* segment base 16..23; set for 0k */ .byte 0x93 /* flags; Type */ .byte 0xcf /* flags; Limit */ .byte 0x0 /* segment base 24..32 */ bootcode: /* offset = 0x18 */ .word 0xffff /* segment limit 0..15 */ bootCodeSeg: /* this will be modified by mpInstallTramp() */ .word 0x0000 /* segment base 0..15 */ .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ .byte 0x9e /* flags; Type */ .byte 0xcf /* flags; Limit */ .byte 0x0 /*segment base 24..32 */ bootdata: /* offset = 0x20 */ .word 0xffff bootDataSeg: /* this will be modified by mpInstallTramp() */ .word 0x0000 /* segment base 0..15 */ .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ .byte 0x92 .byte 0xcf .byte 0x0 /* * GDT pointer for the lgdt call */ .globl mp_gdtbase MP_GDTptr: mp_gdtlimit: .word 0x0028 mp_gdtbase: /* this will be modified by mpInstallTramp() */ .long 0 .space 0x100 /* space for boot_stk - 1st temporary stack */ boot_stk: BOOTMP2: .globl bootMP_size bootMP_size: .long BOOTMP2 - BOOTMP1 Index: head/sys/i386/i386/pmap.c =================================================================== --- head/sys/i386/i386/pmap.c (revision 112840) +++ head/sys/i386/i386/pmap.c (revision 112841) @@ -1,3425 +1,3473 @@ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 * $FreeBSD$ */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Manages physical address maps. * * In addition to hardware address maps, this * module is called upon to provide software-use-only * maps which may or may not be stored in the same * form as hardware maps. These pseudo-maps are * used to store intermediate results from copy * operations to and from address spaces. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_pmap.h" #include "opt_msgbuf.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) || defined(APIC_IO) #include #include #include #include #endif /* SMP || APIC_IO */ #define PMAP_KEEP_PDIRS #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #if defined(DIAGNOSTIC) #define PMAP_DIAGNOSTIC #endif #define MINPV 2048 #if !defined(PMAP_DIAGNOSTIC) #define PMAP_INLINE __inline #else #define PMAP_INLINE #endif /* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) #define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) /* * Given a map and a machine independent protection code, * convert to a vax protection code. */ #define pte_prot(m, p) (protection_codes[p]) static int protection_codes[8]; struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; vm_paddr_t avail_start; /* PA of first available physical page */ vm_paddr_t avail_end; /* PA of last available physical page */ vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ static int pgeflag; /* PG_G or-in */ static int pseflag; /* PG_PS or-in */ static int nkpt; vm_offset_t kernel_vm_end; extern u_int32_t KERNend; +#ifdef PAE +static uma_zone_t pdptzone; +#endif + /* * Data for the pv entry allocation mechanism */ static uma_zone_t pvzone; static struct vm_object pvzone_obj; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; int pmap_pagedaemon_waken; /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1 = 0; static pt_entry_t *CMAP2, *CMAP3, *ptmmap; caddr_t CADDR1 = 0, ptvmmap = 0; static caddr_t CADDR2, CADDR3; static struct mtx CMAPCADDR12_lock; static pt_entry_t *msgbufmap; struct msgbuf *msgbufp = 0; /* * Crashdump maps. */ static pt_entry_t *pt_crashdumpmap; static caddr_t crashdumpmap; #ifdef SMP extern pt_entry_t *SMPpt; #endif static pt_entry_t *PMAP1 = 0; static pt_entry_t *PADDR1 = 0; static PMAP_INLINE void free_pv_entry(pv_entry_t pv); static pv_entry_t get_pv_entry(void); static void i386_protection_init(void); static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); static int pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va); static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex); static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); -static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#ifdef PAE +static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#endif static pd_entry_t pdir4mb; CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); /* * Move the kernel virtual free pointer to the next * 4MB. This is used to help improve performance * by using a large (4MB) page for much of the kernel * (.text, .data, .bss) */ static vm_offset_t pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; #ifdef I686_CPU_not /* Problem seems to have gone away */ /* Deal with un-resolved Pentium4 issues */ if (cpu_class == CPUCLASS_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 && (cpu_id & 0xf00) == 0xf00) return newaddr; #endif #ifndef DISABLE_PSE if (cpu_feature & CPUID_PSE) newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); #endif return newaddr; } /* * Bootstrap the system enough to run with virtual memory. * * On the i386 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ void pmap_bootstrap(firstaddr, loadaddr) vm_paddr_t firstaddr; vm_paddr_t loadaddr; { vm_offset_t va; pt_entry_t *pte; int i; avail_start = firstaddr; /* * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too * large. It should instead be correctly calculated in locore.s and * not based on 'first' (which is a physical address, not a virtual * address, for the start of unused physical memory). The kernel * page tables are NOT double mapped and thus should not be included * in this calculation. */ virtual_avail = (vm_offset_t) KERNBASE + firstaddr; virtual_avail = pmap_kmem_choose(virtual_avail); virtual_end = VM_MAX_KERNEL_ADDRESS; /* * Initialize protection array. */ i386_protection_init(); /* * Initialize the kernel pmap (which is statically allocated). */ kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); +#ifdef PAE + kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); +#endif kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvlist); LIST_INIT(&allpmaps); mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); nkpt = NKPT; /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. * CMAP3 is used for the idle process page zeroing. */ SYSMAP(caddr_t, CMAP1, CADDR1, 1) SYSMAP(caddr_t, CMAP2, CADDR2, 1) SYSMAP(caddr_t, CMAP3, CADDR3, 1) mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF); /* * Crashdump maps. */ SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); /* * ptvmmap is used for reading arbitrary physical pages via /dev/mem. * XXX ptmmap is not used. */ SYSMAP(caddr_t, ptmmap, ptvmmap, 1) /* * msgbufp is used to map the system message buffer. * XXX msgbufmap is not used. */ SYSMAP(struct msgbuf *, msgbufmap, msgbufp, atop(round_page(MSGBUF_SIZE))) /* * ptemap is used for pmap_pte_quick */ SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); virtual_avail = va; *CMAP1 = *CMAP2 = 0; for (i = 0; i < NKPT; i++) PTD[i] = 0; pgeflag = 0; #ifndef DISABLE_PG_G if (cpu_feature & CPUID_PGE) pgeflag = PG_G; #endif #ifdef I686_CPU_not /* Problem seems to have gone away */ /* Deal with un-resolved Pentium4 issues */ if (cpu_class == CPUCLASS_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 && (cpu_id & 0xf00) == 0xf00) { printf("Warning: Pentium 4 cpu: PG_G disabled (global flag)\n"); pgeflag = 0; } #endif /* * Initialize the 4MB page size flag */ pseflag = 0; /* * The 4MB page version of the initial * kernel page mapping. */ pdir4mb = 0; #ifndef DISABLE_PSE if (cpu_feature & CPUID_PSE) pseflag = PG_PS; #endif #ifdef I686_CPU_not /* Problem seems to have gone away */ /* Deal with un-resolved Pentium4 issues */ if (cpu_class == CPUCLASS_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 && (cpu_id & 0xf00) == 0xf00) { printf("Warning: Pentium 4 cpu: PG_PS disabled (4MB pages)\n"); pseflag = 0; } #endif #ifndef DISABLE_PSE if (pseflag) { pd_entry_t ptditmp; /* * Note that we have enabled PSE mode */ ptditmp = *(PTmap + i386_btop(KERNBASE)); ptditmp &= ~(NBPDR - 1); ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; pdir4mb = ptditmp; } #endif #ifndef SMP /* * Turn on PGE/PSE. SMP does this later on since the * 4K page tables are required for AP boot (for now). * XXX fixme. */ pmap_set_opt(); #endif #ifdef SMP if (cpu_apic_address == 0) panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); /* local apic is mapped on last page */ SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | (cpu_apic_address & PG_FRAME)); #endif invltlb(); } /* * Enable 4MB page mode for MP startup. Turn on PG_G support. * BSP will run this after all the AP's have started up. */ void pmap_set_opt(void) { pt_entry_t *pte; vm_offset_t va, endva; if (pgeflag && (cpu_feature & CPUID_PGE)) { load_cr4(rcr4() | CR4_PGE); invltlb(); /* Insurance */ } #ifndef DISABLE_PSE if (pseflag && (cpu_feature & CPUID_PSE)) { load_cr4(rcr4() | CR4_PSE); invltlb(); /* Insurance */ } #endif if (PCPU_GET(cpuid) == 0) { #ifndef DISABLE_PSE if (pdir4mb) { kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb; invltlb(); /* Insurance */ } #endif if (pgeflag) { /* Turn on PG_G for text, data, bss pages. */ va = (vm_offset_t)btext; #ifndef DISABLE_PSE if (pseflag && (cpu_feature & CPUID_PSE)) { if (va < KERNBASE + (1 << PDRSHIFT)) va = KERNBASE + (1 << PDRSHIFT); } #endif endva = KERNBASE + KERNend; while (va < endva) { pte = vtopte(va); if (*pte) *pte |= pgeflag; va += PAGE_SIZE; } invltlb(); /* Insurance */ } /* * We do not need to broadcast the invltlb here, because * each AP does it the moment it is released from the boot * lock. See ap_init(). */ } } static void * -pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { *flags = UMA_SLAB_PRIV; return (void *)kmem_alloc(kernel_map, bytes); } +#ifdef PAE +static void * +pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0)); +} +#endif + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. * pmap_init has been enhanced to support in a fairly consistant * way, discontiguous physical memory. */ void pmap_init(phys_start, phys_end) vm_paddr_t phys_start, phys_end; { int i; int initial_pvs; /* * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ for(i = 0; i < vm_page_array_size; i++) { vm_page_t m; m = &vm_page_array[i]; TAILQ_INIT(&m->md.pv_list); m->md.pv_list_count = 0; } /* * init the pv free list */ initial_pvs = vm_page_array_size; if (initial_pvs < MINPV) initial_pvs = MINPV; pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); - uma_zone_set_allocf(pvzone, pmap_allocf); + uma_zone_set_allocf(pvzone, pmap_pv_allocf); uma_prealloc(pvzone, initial_pvs); +#ifdef PAE + pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, + NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0); + uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); +#endif + /* * Now it is safe to enable pv_table recording. */ pmap_initialized = TRUE; } /* * Initialize the address space (zone) for the pv_entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ void pmap_init2() { int shpgperproc = PMAP_SHPGPERPROC; TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_page_array_size; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_high_water = 9 * (pv_entry_max / 10); uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); } /*************************************************** * Low level helper routines..... ***************************************************/ #if defined(PMAP_DIAGNOSTIC) /* * This code checks for non-writeable/modified pages. * This should be an invalid condition. */ static int pmap_nw_modified(pt_entry_t ptea) { int pte; pte = (int) ptea; if ((pte & (PG_M|PG_RW)) == PG_M) return 1; else return 0; } #endif /* * this routine defines the region(s) of memory that should * not be tested for the modified bit. */ static PMAP_INLINE int pmap_track_modified(vm_offset_t va) { if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) return 1; else return 0; } #ifdef I386_CPU /* * i386 only has "invalidate everything" and no SMP to worry about. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } #else /* !I386_CPU */ #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. */ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { u_int cpumask; u_int other_cpus; critical_enter(); /* * We need to disable interrupt preemption but MUST NOT have * interrupts disabled here. * XXX we may need to hold schedlock to get a coherent pm_active */ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invlpg(va); if (pmap->pm_active & other_cpus) smp_masked_invlpg(pmap->pm_active & other_cpus, va); } critical_exit(); } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { u_int cpumask; u_int other_cpus; vm_offset_t addr; critical_enter(); /* * We need to disable interrupt preemption but MUST NOT have * interrupts disabled here. * XXX we may need to hold schedlock to get a coherent pm_active */ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); if (pmap->pm_active & other_cpus) smp_masked_invlpg_range(pmap->pm_active & other_cpus, sva, eva); } critical_exit(); } void pmap_invalidate_all(pmap_t pmap) { u_int cpumask; u_int other_cpus; #ifdef SWTCH_OPTIM_STATS tlb_flush_count++; #endif critical_enter(); /* * We need to disable interrupt preemption but MUST NOT have * interrupts disabled here. * XXX we may need to hold schedlock to get a coherent pm_active */ if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invltlb(); if (pmap->pm_active & other_cpus) smp_masked_invltlb(pmap->pm_active & other_cpus); } critical_exit(); } #else /* !SMP */ /* * Normal, non-SMP, 486+ invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || pmap->pm_active) invlpg(va); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (pmap == kernel_pmap || pmap->pm_active) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } #endif /* !SMP */ #endif /* !I386_CPU */ /* * Are we current address space or kernel? */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)); } /* * Super fast pmap_pte routine best used when scanning * the pv lists. This eliminates many coarse-grained * invltlb calls. Note that many of the pv list * scans are across different pmaps. It is very wasteful * to do an entire invltlb for checking a single mapping. */ pt_entry_t * pmap_pte_quick(pmap, va) register pmap_t pmap; vm_offset_t va; { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return vtopte(va); newpf = *pde & PG_FRAME; if (((*PMAP1) & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V; pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1); } return PADDR1 + (i386_btop(va) & (NPTEPG - 1)); } return (0); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap, va) register pmap_t pmap; vm_offset_t va; { vm_paddr_t rtval; pt_entry_t *pte; pd_entry_t pde; if (pmap == 0) return 0; pde = pmap->pm_pdir[va >> PDRSHIFT]; if (pde != 0) { if ((pde & PG_PS) != 0) { rtval = (pde & ~PDRMASK) | (va & PDRMASK); return rtval; } pte = pmap_pte_quick(pmap, va); rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); return rtval; } return 0; } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; pte = vtopte(va); *pte = pa | PG_RW | PG_V | pgeflag; } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); *pte = 0; } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { vm_offset_t va, sva; va = sva = *virt; while (start < end) { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); *virt = va; return (sva); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); va += PAGE_SIZE; m++; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; retry: m = vm_page_lookup(object, pindex); if (m != NULL) { vm_page_lock_queues(); if (vm_page_sleep_if_busy(m, FALSE, "pplookp")) goto retry; vm_page_unlock_queues(); } return m; } #ifndef KSTACK_MAX_PAGES #define KSTACK_MAX_PAGES 32 #endif /* * Create the kernel stack (including pcb for i386) for a new thread. * This routine directly affects the fork perf for a process and * create performance for a thread. */ void pmap_new_thread(struct thread *td, int pages) { int i; vm_page_t ma[KSTACK_MAX_PAGES]; vm_object_t ksobj; vm_page_t m; vm_offset_t ks; /* Bounds check */ if (pages <= 1) pages = KSTACK_PAGES; else if (pages > KSTACK_MAX_PAGES) pages = KSTACK_MAX_PAGES; /* * allocate object for the kstack */ ksobj = vm_object_allocate(OBJT_DEFAULT, pages); td->td_kstack_obj = ksobj; /* get a kernel virtual address for the kstack for this thread */ #ifdef KSTACK_GUARD ks = kmem_alloc_nofault(kernel_map, (pages + 1) * PAGE_SIZE); if (ks == 0) panic("pmap_new_thread: kstack allocation failed"); if (*vtopte(ks) != 0) pmap_qremove(ks, 1); ks += PAGE_SIZE; td->td_kstack = ks; #else /* get a kernel virtual address for the kstack for this thread */ ks = kmem_alloc_nofault(kernel_map, pages * PAGE_SIZE); if (ks == 0) panic("pmap_new_thread: kstack allocation failed"); td->td_kstack = ks; #endif /* * Knowing the number of pages allocated is useful when you * want to deallocate them. */ td->td_kstack_pages = pages; /* * For the length of the stack, link in a real page of ram for each * page of stack. */ for (i = 0; i < pages; i++) { /* * Get a kernel stack page */ m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED); ma[i] = m; vm_page_lock_queues(); vm_page_wakeup(m); vm_page_flag_clear(m, PG_ZERO); m->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); } pmap_qenter(ks, ma, pages); } /* * Dispose the kernel stack for a thread that has exited. * This routine directly impacts the exit perf of a process and thread. */ void pmap_dispose_thread(td) struct thread *td; { int i; int pages; vm_object_t ksobj; vm_offset_t ks; vm_page_t m; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; ks = td->td_kstack; pmap_qremove(ks, pages); for (i = 0; i < pages; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_dispose_thread: kstack already missing?"); vm_page_lock_queues(); vm_page_busy(m); vm_page_unwire(m, 0); vm_page_free(m); vm_page_unlock_queues(); } /* * Free the space that this stack was mapped to in the kernel * address map. */ #ifdef KSTACK_GUARD kmem_free(kernel_map, ks - PAGE_SIZE, (pages + 1) * PAGE_SIZE); #else kmem_free(kernel_map, ks, pages * PAGE_SIZE); #endif vm_object_deallocate(ksobj); } /* * Set up a variable sized alternate kstack. Though it may look MI, it may * need to be different on certain arches like ia64. */ void pmap_new_altkstack(struct thread *td, int pages) { /* shuffle the original stack */ td->td_altkstack_obj = td->td_kstack_obj; td->td_altkstack = td->td_kstack; td->td_altkstack_pages = td->td_kstack_pages; pmap_new_thread(td, pages); } void pmap_dispose_altkstack(td) struct thread *td; { pmap_dispose_thread(td); /* restore the original kstack */ td->td_kstack = td->td_altkstack; td->td_kstack_obj = td->td_altkstack_obj; td->td_kstack_pages = td->td_altkstack_pages; td->td_altkstack = 0; td->td_altkstack_obj = NULL; td->td_altkstack_pages = 0; } /* * Allow the Kernel stack for a thread to be prejudicially paged out. */ void pmap_swapout_thread(td) struct thread *td; { int i; int pages; vm_object_t ksobj; vm_offset_t ks; vm_page_t m; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; ks = td->td_kstack; pmap_qremove(ks, pages); for (i = 0; i < pages; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("pmap_swapout_thread: kstack already missing?"); vm_page_lock_queues(); vm_page_dirty(m); vm_page_unwire(m, 0); vm_page_unlock_queues(); } } /* * Bring the kernel stack for a specified thread back in. */ void pmap_swapin_thread(td) struct thread *td; { int i, rv; int pages; vm_page_t ma[KSTACK_MAX_PAGES]; vm_object_t ksobj; vm_offset_t ks; vm_page_t m; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; ks = td->td_kstack; for (i = 0; i < pages; i++) { m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { rv = vm_pager_get_pages(ksobj, &m, 1, 0); if (rv != VM_PAGER_OK) panic("pmap_swapin_thread: cannot get kstack for proc: %d\n", td->td_proc->p_pid); m = vm_page_lookup(ksobj, i); m->valid = VM_PAGE_BITS_ALL; } ma[i] = m; vm_page_lock_queues(); vm_page_wire(m); vm_page_wakeup(m); vm_page_unlock_queues(); } pmap_qenter(ks, ma, pages); } /*************************************************** * Page table page management routines..... ***************************************************/ /* * This routine unholds page table pages, and if the hold count * drops to zero, then it decrements the wire count. */ static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt")) vm_page_lock_queues(); if (m->hold_count == 0) { vm_offset_t pteva; /* * unmap the page table page */ pmap->pm_pdir[m->pindex] = 0; --pmap->pm_stats.resident_count; if (pmap_is_current(pmap)) { /* * Do an invltlb to make the invalidated mapping * take effect immediately. */ pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); pmap_invalidate_page(pmap, pteva); } /* * If the page is finally unwired, simply free it. */ --m->wire_count; if (m->wire_count == 0) { vm_page_busy(m); vm_page_free_zero(m); atomic_subtract_int(&cnt.v_wire_count, 1); } return 1; } return 0; } static PMAP_INLINE int pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { vm_page_unhold(m); if (m->hold_count == 0) return _pmap_unwire_pte_hold(pmap, m); else return 0; } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) { unsigned ptepindex; if (va >= VM_MAXUSER_ADDRESS) return 0; if (mpte == NULL) { ptepindex = (va >> PDRSHIFT); if (pmap->pm_pteobj->root && (pmap->pm_pteobj->root->pindex == ptepindex)) { mpte = pmap->pm_pteobj->root; } else { while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL && vm_page_sleep_if_busy(mpte, FALSE, "pulook")) vm_page_lock_queues(); } } return pmap_unwire_pte_hold(pmap, mpte); } void pmap_pinit0(pmap) struct pmap *pmap; { pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); +#ifdef PAE + pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); +#endif pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ void pmap_pinit(pmap) register struct pmap *pmap; { vm_page_t ptdpg[NPGPTD]; vm_paddr_t pa; int i; /* * No need to allocate page table space yet but we do need a valid * page directory table. */ - if (pmap->pm_pdir == NULL) + if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, NBPTD); +#ifdef PAE + pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); + KASSERT(((vm_offset_t)pmap->pm_pdpt & + ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, + ("pmap_pinit: pdpt misaligned")); + KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), + ("pmap_pinit: pdpt above 4g")); +#endif + } /* * allocate object for the ptes */ if (pmap->pm_pteobj == NULL) pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + NPGPTD); /* * allocate the page directory page(s) */ for (i = 0; i < NPGPTD; i++) { ptdpg[i] = vm_page_grab(pmap->pm_pteobj, PTDPTDI + i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); vm_page_lock_queues(); vm_page_flag_clear(ptdpg[i], PG_BUSY); ptdpg[i]->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); } pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); for (i = 0; i < NPGPTD; i++) { if ((ptdpg[i]->flags & PG_ZERO) == 0) bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); } mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* Wire in kernel global address entries. */ /* XXX copies current process, does not fill in MPPTDI */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); #ifdef SMP pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; #endif /* install self-referential address mapping entry(s) */ for (i = 0; i < NPGPTD; i++) { pa = VM_PAGE_TO_PHYS(ptdpg[i]); pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; +#ifdef PAE + pmap->pm_pdpt[i] = pa | PG_V; +#endif } pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } /* * Wire in kernel global address entries. To avoid a race condition * between pmap initialization and pmap_growkernel, this procedure * should be called after the vmspace is attached to the process * but before this pmap is activated. */ void pmap_pinit2(pmap) struct pmap *pmap; { /* XXX: Remove this stub when no longer called */ } /* * this routine is called if the page table page is not * mapped correctly. */ static vm_page_t _pmap_allocpte(pmap, ptepindex) pmap_t pmap; unsigned ptepindex; { vm_paddr_t ptepa; vm_offset_t pteva; vm_page_t m; /* * Find or fabricate a new pagetable page */ m = vm_page_grab(pmap->pm_pteobj, ptepindex, VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY); KASSERT(m->queue == PQ_NONE, ("_pmap_allocpte: %p->queue != PQ_NONE", m)); /* * Increment the hold count for the page table page * (denoting a new mapping.) */ m->hold_count++; /* * Map the pagetable page into the process address space, if * it isn't already there. */ pmap->pm_stats.resident_count++; ptepa = VM_PAGE_TO_PHYS(m); pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); /* * Try to use the new mapping, but if we cannot, then * do it with the routine that maps the page explicitly. */ if ((m->flags & PG_ZERO) == 0) { if (pmap_is_current(pmap)) { pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex); bzero((caddr_t) pteva, PAGE_SIZE); } else { pmap_zero_page(m); } } vm_page_lock_queues(); m->valid = VM_PAGE_BITS_ALL; vm_page_flag_clear(m, PG_ZERO); vm_page_wakeup(m); vm_page_unlock_queues(); return m; } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va) { unsigned ptepindex; pd_entry_t ptepa; vm_page_t m; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; /* * Get the page directory entry */ ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; /* * This supports switching from a 4MB page to a * normal 4K page. */ if (ptepa & PG_PS) { pmap->pm_pdir[ptepindex] = 0; ptepa = 0; pmap_invalidate_all(kernel_pmap); } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (ptepa) { /* * In order to get the page table page, try the * hint first. */ if (pmap->pm_pteobj->root && (pmap->pm_pteobj->root->pindex == ptepindex)) { m = pmap->pm_pteobj->root; } else { m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); } m->hold_count++; return m; } /* * Here if the pte page isn't mapped, or if it has been deallocated. */ return _pmap_allocpte(pmap, ptepindex); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_object_t object; vm_page_t m; int i; object = pmap->pm_pteobj; KASSERT(object->ref_count == 1, ("pmap_release: pteobj reference count %d != 1", object->ref_count)); KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); bzero(pmap->pm_pdir + KPTDI, nkpt * sizeof(*pmap->pm_pdir)); for (i = 0; i < NPGPTD; i++) { pmap->pm_pdir[PTDPTDI + i] = 0; pmap->pm_pdir[APTDPTDI + i] = 0; } #ifdef SMP pmap->pm_pdir[MPPTDI] = 0; #endif pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); vm_page_lock_queues(); for (i = 0; i < NPGPTD; i++) { m = TAILQ_FIRST(&object->memq); +#ifdef PAE + KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), + ("pmap_release: got wrong ptd page")); +#endif m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_busy(m); vm_page_free_zero(m); } KASSERT(TAILQ_EMPTY(&object->memq), ("pmap_release: leaking page table pages")); vm_page_unlock_queues(); } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { struct pmap *pmap; int s; vm_paddr_t ptppaddr; vm_page_t nkpg; pd_entry_t newpdir; s = splhigh(); mtx_assert(&kernel_map->system_mtx, MA_OWNED); if (kernel_vm_end == 0) { kernel_vm_end = KERNBASE; nkpt = 0; while (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); nkpt++; } } addr = roundup2(addr, PAGE_SIZE * NPTEPG); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); continue; } /* * This index is bogus, but out of the way */ nkpg = vm_page_alloc(NULL, nkpt, VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); if (!nkpg) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; pmap_zero_page(nkpg); ptppaddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); pdir_pde(PTD, kernel_vm_end) = newpdir; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { *pmap_pde(pmap, kernel_vm_end) = newpdir; } mtx_unlock_spin(&allpmaps_lock); kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); } splx(s); } /*************************************************** * page management routines. ***************************************************/ /* * free the pv_entry back to the free list */ static PMAP_INLINE void free_pv_entry(pv_entry_t pv) { pv_entry_count--; uma_zfree(pvzone, pv); } /* * get a new pv_entry, allocating a block from the system * when needed. * the memory allocation is performed bypassing the malloc code * because of the possibility of allocations at interrupt time. */ static pv_entry_t get_pv_entry(void) { pv_entry_count++; if (pv_entry_high_water && (pv_entry_count > pv_entry_high_water) && (pmap_pagedaemon_waken == 0)) { pmap_pagedaemon_waken = 1; wakeup (&vm_pages_needed); } return uma_zalloc(pvzone, M_NOWAIT); } /* * If it is the first entry on the list, it is actually * in the header and we must copy the following entry up * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ static int pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { pv_entry_t pv; int rtval; int s; s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (m->md.pv_list_count < pmap->pm_stats.resident_count) { TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (pmap == pv->pv_pmap && va == pv->pv_va) break; } } else { TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { if (va == pv->pv_va) break; } } rtval = 0; if (pv) { rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; if (TAILQ_FIRST(&m->md.pv_list) == NULL) vm_page_flag_clear(m, PG_WRITEABLE); TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); free_pv_entry(pv); } splx(s); return rtval; } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) { int s; pv_entry_t pv; s = splvm(); pv = get_pv_entry(); pv->pv_va = va; pv->pv_pmap = pmap; pv->pv_ptem = mpte; TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); m->md.pv_list_count++; splx(s); } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) { pt_entry_t oldpte; vm_page_t m; - oldpte = atomic_readandclear_int(ptq); + oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpte & PG_G) pmap_invalidate_page(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte); if (oldpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) oldpte)) { printf( "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", va, oldpte); } #endif if (pmap_track_modified(va)) vm_page_dirty(m); } if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); return pmap_remove_entry(pmap, m, va); } else { return pmap_unuse_pt(pmap, va, NULL); } return 0; } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va) { pt_entry_t *pte; if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) return; pmap_remove_pte(pmap, pte, va); pmap_invalidate_page(pmap, va); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; int anyvalid; if (pmap == NULL) return; if (pmap->pm_stats.resident_count == 0) return; /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if ((sva + PAGE_SIZE == eva) && ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { pmap_remove_page(pmap, sva); return; } anyvalid = 0; for (; sva < eva; sva = pdnxt) { unsigned pdirindex; /* * Calculate index for next page table. */ pdnxt = (sva + NBPDR) & ~PDRMASK; if (pmap->pm_stats.resident_count == 0) break; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { pmap->pm_pdir[pdirindex] = 0; pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; anyvalid = 1; continue; } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (pdnxt > eva) pdnxt = eva; for (; sva != pdnxt; sva += PAGE_SIZE) { if ((pte = pmap_pte_quick(pmap, sva)) == NULL || *pte == 0) continue; anyvalid = 1; if (pmap_remove_pte(pmap, pte, sva)) break; } } if (anyvalid) pmap_invalidate_all(pmap); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { register pv_entry_t pv; pt_entry_t *pte, tpte; int s; #if defined(PMAP_DIAGNOSTIC) /* * XXX This makes pmap_remove_all() illegal for non-managed pages! */ if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m)); } #endif mtx_assert(&vm_page_queue_mtx, MA_OWNED); s = splvm(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pv->pv_pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - tpte = atomic_readandclear_int(pte); + tpte = pte_load_clear(pte); if (tpte & PG_W) pv->pv_pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if (tpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) tpte)) { printf( "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", pv->pv_va, tpte); } #endif if (pmap_track_modified(pv->pv_va)) vm_page_dirty(m); } pmap_invalidate_page(pv->pv_pmap, pv->pv_va); TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); free_pv_entry(pv); } vm_page_flag_clear(m, PG_WRITEABLE); splx(s); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t pdnxt; pd_entry_t ptpaddr; int anychanged; if (pmap == NULL) return; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if (prot & VM_PROT_WRITE) return; anychanged = 0; for (; sva < eva; sva = pdnxt) { unsigned pdirindex; pdnxt = (sva + NBPDR) & ~PDRMASK; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; anychanged = 1; continue; } if (pdnxt > eva) pdnxt = eva; for (; sva != pdnxt; sva += PAGE_SIZE) { pt_entry_t pbits; pt_entry_t *pte; vm_page_t m; if ((pte = pmap_pte_quick(pmap, sva)) == NULL) continue; pbits = *pte; if (pbits & PG_MANAGED) { m = NULL; if (pbits & PG_A) { m = PHYS_TO_VM_PAGE(pbits); vm_page_flag_set(m, PG_REFERENCED); pbits &= ~PG_A; } if ((pbits & PG_M) != 0 && pmap_track_modified(sva)) { if (m == NULL) m = PHYS_TO_VM_PAGE(pbits); vm_page_dirty(m); pbits &= ~PG_M; } } pbits &= ~PG_RW; if (pbits != *pte) { *pte = pbits; anychanged = 1; } } } if (anychanged) pmap_invalidate_all(pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ void pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, boolean_t wired) { vm_paddr_t pa; register pt_entry_t *pte; vm_paddr_t opa; pt_entry_t origpte, newpte; vm_page_t mpte; if (pmap == NULL) return; va &= PG_FRAME; #ifdef PMAP_DIAGNOSTIC if (va > VM_MAX_KERNEL_ADDRESS) panic("pmap_enter: toobig"); if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); #endif mpte = NULL; /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { mpte = pmap_allocpte(pmap, va); } #if 0 && defined(PMAP_DIAGNOSTIC) else { pd_entry_t *pdeaddr = pmap_pde(pmap, va); origpte = *pdeaddr; if ((origpte & PG_V) == 0) { panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", pmap->pm_pdir[PTDPTDI], origpte, va); } } #endif pte = pmap_pte_quick(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ if (pte == NULL) { panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n", (uintmax_t)pmap->pm_pdir[PTDPTDI], va); } pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; origpte = *pte; opa = origpte & PG_FRAME; if (origpte & PG_PS) panic("pmap_enter: attempted pmap_enter on 4MB page"); /* * Mapping has not changed, must be protection or wiring change. */ if (origpte && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if (wired && ((origpte & PG_W) == 0)) pmap->pm_stats.wired_count++; else if (!wired && (origpte & PG_W)) pmap->pm_stats.wired_count--; #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) origpte)) { printf( "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", va, origpte); } #endif /* * Remove extra pte reference */ if (mpte) mpte->hold_count--; if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { if ((origpte & PG_RW) == 0) { *pte |= PG_RW; pmap_invalidate_page(pmap, va); } return; } /* * We might be turning off write access to the page, * so we go ahead and sense modify status. */ if (origpte & PG_MANAGED) { if ((origpte & PG_M) && pmap_track_modified(va)) { vm_page_t om; om = PHYS_TO_VM_PAGE(opa); vm_page_dirty(om); } pa |= PG_MANAGED; } goto validate; } /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { int err; vm_page_lock_queues(); err = pmap_remove_pte(pmap, pte, va); vm_page_unlock_queues(); if (err) panic("pmap_enter: pte vanished, va: 0x%x", va); } /* * Enter on the PV list if part of our managed memory. Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ if (pmap_initialized && (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { pmap_insert_entry(pmap, va, mpte, m); pa |= PG_MANAGED; } /* * Increment counters */ pmap->pm_stats.resident_count++; if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. */ newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V); if (wired) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) newpte |= pgeflag; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { *pte = newpte | PG_A; /*if (origpte)*/ { pmap_invalidate_page(pmap, va); } } } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * 5. Tlbflush is deferred to calling procedure. * 6. Page IS managed. * but is *MUCH* faster than pmap_enter... */ static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) { pt_entry_t *pte; vm_paddr_t pa; /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { unsigned ptepindex; pd_entry_t ptepa; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; if (mpte && (mpte->pindex == ptepindex)) { mpte->hold_count++; } else { retry: /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * If the page table page is mapped, we just increment * the hold count, and activate it. */ if (ptepa) { if (ptepa & PG_PS) panic("pmap_enter_quick: unexpected mapping into 4MB page"); if (pmap->pm_pteobj->root && (pmap->pm_pteobj->root->pindex == ptepindex)) { mpte = pmap->pm_pteobj->root; } else { mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); } if (mpte == NULL) goto retry; mpte->hold_count++; } else { mpte = _pmap_allocpte(pmap, ptepindex); } } } else { mpte = NULL; } /* * This call to vtopte makes the assumption that we are * entering the page into the current pmap. In order to support * quick entry into any pmap, one would likely use pmap_pte_quick. * But that isn't as quick as vtopte. */ pte = vtopte(va); if (*pte) { if (mpte != NULL) { vm_page_lock_queues(); pmap_unwire_pte_hold(pmap, mpte); vm_page_unlock_queues(); } return 0; } /* * Enter on the PV list if part of our managed memory. Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) pmap_insert_entry(pmap, va, mpte, m); /* * Increment counters */ pmap->pm_stats.resident_count++; pa = VM_PAGE_TO_PHYS(m); /* * Now validate mapping with RO protection */ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) *pte = pa | PG_V | PG_U; else *pte = pa | PG_V | PG_U | PG_MANAGED; return mpte; } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_offset_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); #ifndef I386_CPU invlpg(va); #else invltlb(); #endif return ((void *)crashdumpmap); } #define MAX_INIT_PT (96) /* * pmap_object_init_pt preloads the ptes for a given object * into the specified pmap. This eliminates the blast of soft * faults on process startup and immediately after an mmap. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size, int limit) { vm_offset_t tmpidx; int psize; vm_page_t p, mpte; if (pmap == NULL || object == NULL) return; /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ if (pseflag && (object->type == OBJT_DEVICE) && ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { int i; vm_page_t m[1]; unsigned int ptepindex; int npdes; pd_entry_t ptepa; if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) return; retry: p = vm_page_lookup(object, pindex); if (p != NULL) { vm_page_lock_queues(); if (vm_page_sleep_if_busy(p, FALSE, "init4p")) goto retry; } else { p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); if (p == NULL) return; m[0] = p; if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { vm_page_lock_queues(); vm_page_free(p); vm_page_unlock_queues(); return; } p = vm_page_lookup(object, pindex); vm_page_lock_queues(); vm_page_wakeup(p); } vm_page_unlock_queues(); ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) { return; } p->valid = VM_PAGE_BITS_ALL; pmap->pm_stats.resident_count += size >> PAGE_SHIFT; npdes = size >> PDRSHIFT; for(i = 0; i < npdes; i++) { pmap->pm_pdir[ptepindex] = ptepa | PG_U | PG_RW | PG_V | PG_PS; ptepa += NBPDR; ptepindex += 1; } pmap_invalidate_all(kernel_pmap); return; } psize = i386_btop(size); if ((object->type != OBJT_VNODE) || ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && (object->resident_page_count > MAX_INIT_PT))) { return; } if (psize + pindex > object->size) { if (object->size < pindex) return; psize = object->size - pindex; } mpte = NULL; if ((p = TAILQ_FIRST(&object->memq)) != NULL) { if (p->pindex < pindex) { p = vm_page_splay(pindex, object->root); if ((object->root = p)->pindex < pindex) p = TAILQ_NEXT(p, listq); } } /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex * or (2) NULL. */ for (; p != NULL && (tmpidx = p->pindex - pindex) < psize; p = TAILQ_NEXT(p, listq)) { /* * don't allow an madvise to blow away our really * free pages allocating pv entries. */ if ((limit & MAP_PREFAULT_MADVISE) && cnt.v_free_count < cnt.v_free_reserved) { break; } vm_page_lock_queues(); if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL && (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { if ((p->queue - p->pc) == PQ_CACHE) vm_page_deactivate(p); vm_page_busy(p); vm_page_unlock_queues(); mpte = pmap_enter_quick(pmap, addr + i386_ptob(tmpidx), p, mpte); vm_page_lock_queues(); vm_page_wakeup(p); } vm_page_unlock_queues(); } return; } /* * pmap_prefault provides a quick way of clustering * pagefaults into a processes address space. It is a "cousin" * of pmap_object_init_pt, except it runs at page fault time instead * of mmap time. */ #define PFBAK 4 #define PFFOR 4 #define PAGEORDER_SIZE (PFBAK+PFFOR) static int pmap_prefault_pageorder[] = { -1 * PAGE_SIZE, 1 * PAGE_SIZE, -2 * PAGE_SIZE, 2 * PAGE_SIZE, -3 * PAGE_SIZE, 3 * PAGE_SIZE, -4 * PAGE_SIZE, 4 * PAGE_SIZE }; void pmap_prefault(pmap, addra, entry) pmap_t pmap; vm_offset_t addra; vm_map_entry_t entry; { int i; vm_offset_t starta; vm_offset_t addr; vm_pindex_t pindex; vm_page_t m, mpte; vm_object_t object; if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) return; object = entry->object.vm_object; starta = addra - PFBAK * PAGE_SIZE; if (starta < entry->start) { starta = entry->start; } else if (starta > addra) { starta = 0; } mpte = NULL; for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t lobject; pt_entry_t *pte; addr = addra + pmap_prefault_pageorder[i]; if (addr > addra + (PFFOR * PAGE_SIZE)) addr = 0; if (addr < starta || addr >= entry->end) continue; if ((*pmap_pde(pmap, addr)) == 0) continue; pte = vtopte(addr); if (*pte) continue; pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; lobject = object; for (m = vm_page_lookup(lobject, pindex); (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); lobject = lobject->backing_object) { if (lobject->backing_object_offset & PAGE_MASK) break; pindex += (lobject->backing_object_offset >> PAGE_SHIFT); m = vm_page_lookup(lobject->backing_object, pindex); } /* * give-up when a page is not in memory */ if (m == NULL) break; vm_page_lock_queues(); if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (m->busy == 0) && (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { if ((m->queue - m->pc) == PQ_CACHE) { vm_page_deactivate(m); } vm_page_busy(m); vm_page_unlock_queues(); mpte = pmap_enter_quick(pmap, addr, m, mpte); vm_page_lock_queues(); vm_page_wakeup(m); } vm_page_unlock_queues(); } } /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. * In/out conditions: * The mapping must already exist in the pmap. */ void pmap_change_wiring(pmap, va, wired) register pmap_t pmap; vm_offset_t va; boolean_t wired; { register pt_entry_t *pte; if (pmap == NULL) return; pte = pmap_pte_quick(pmap, va); if (wired && !pmap_pte_w(pte)) pmap->pm_stats.wired_count++; else if (!wired && pmap_pte_w(pte)) pmap->pm_stats.wired_count--; /* * Wiring is not a hardware characteristic so there is no need to * invalidate TLB. */ pmap_pte_set_w(pte, wired); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t pdnxt; vm_page_t m; if (dst_addr != src_addr) return; if (!pmap_is_current(src_pmap)) return; for (addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; unsigned ptepindex; if (addr >= UPT_MIN_ADDRESS) panic("pmap_copy: invalid to pmap_copy page tables\n"); /* * Don't let optional prefaulting of pages make us go * way below the low water mark of free pages or way * above high water mark of used pv entries. */ if (cnt.v_free_count < cnt.v_free_reserved || pv_entry_count > pv_entry_high_water) break; pdnxt = (addr + NBPDR) & ~PDRMASK; ptepindex = addr >> PDRSHIFT; srcptepaddr = src_pmap->pm_pdir[ptepindex]; if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { if (dst_pmap->pm_pdir[ptepindex] == 0) { dst_pmap->pm_pdir[ptepindex] = srcptepaddr; dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; } continue; } srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); if ((srcmpte == NULL) || (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) continue; if (pdnxt > end_addr) pdnxt = end_addr; src_pte = vtopte(addr); while (addr < pdnxt) { pt_entry_t ptetemp; ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { /* * We have to check after allocpte for the * pte still being around... allocpte can * block. */ dstmpte = pmap_allocpte(dst_pmap, addr); dst_pte = pmap_pte_quick(dst_pmap, addr); if ((*dst_pte == 0) && (ptetemp = *src_pte)) { /* * Clear the modified and * accessed (referenced) bits * during the copy. */ m = PHYS_TO_VM_PAGE(ptetemp); *dst_pte = ptetemp & ~(PG_M | PG_A); dst_pmap->pm_stats.resident_count++; pmap_insert_entry(dst_pmap, addr, dstmpte, m); } else { vm_page_lock_queues(); pmap_unwire_pte_hold(dst_pmap, dstmpte); vm_page_unlock_queues(); } if (dstmpte->hold_count >= srcmpte->hold_count) break; } addr += PAGE_SIZE; src_pte++; } } } #ifdef SMP /* * pmap_zpi_switchin*() * * These functions allow us to avoid doing IPIs alltogether in certain * temporary page-mapping situations (page zeroing). Instead to deal * with being preempted and moved onto a different cpu we invalidate * the page when the scheduler switches us in. This does not occur * very often so we remain relatively optimal with very little effort. */ static void pmap_zpi_switchin12(void) { invlpg((u_int)CADDR1); invlpg((u_int)CADDR2); } static void pmap_zpi_switchin2(void) { invlpg((u_int)CADDR2); } static void pmap_zpi_switchin3(void) { invlpg((u_int)CADDR3); } #endif /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { mtx_lock(&CMAPCADDR12_lock); if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; #ifdef I386_CPU invltlb(); #else #ifdef SMP curthread->td_switchin = pmap_zpi_switchin2; #endif invlpg((u_int)CADDR2); #endif #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR2); else #endif bzero(CADDR2, PAGE_SIZE); #ifdef SMP curthread->td_switchin = NULL; #endif *CMAP2 = 0; mtx_unlock(&CMAPCADDR12_lock); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { mtx_lock(&CMAPCADDR12_lock); if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; #ifdef I386_CPU invltlb(); #else #ifdef SMP curthread->td_switchin = pmap_zpi_switchin2; #endif invlpg((u_int)CADDR2); #endif #if defined(I686_CPU) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) i686_pagezero(CADDR2); else #endif bzero((char *)CADDR2 + off, size); #ifdef SMP curthread->td_switchin = NULL; #endif *CMAP2 = 0; mtx_unlock(&CMAPCADDR12_lock); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { if (*CMAP3) panic("pmap_zero_page: CMAP3 busy"); *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; #ifdef I386_CPU invltlb(); #else #ifdef SMP curthread->td_switchin = pmap_zpi_switchin3; #endif invlpg((u_int)CADDR3); #endif #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR3); else #endif bzero(CADDR3, PAGE_SIZE); #ifdef SMP curthread->td_switchin = NULL; #endif *CMAP3 = 0; } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t src, vm_page_t dst) { mtx_lock(&CMAPCADDR12_lock); if (*CMAP1) panic("pmap_copy_page: CMAP1 busy"); if (*CMAP2) panic("pmap_copy_page: CMAP2 busy"); *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A; *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M; #ifdef I386_CPU invltlb(); #else #ifdef SMP curthread->td_switchin = pmap_zpi_switchin12; #endif invlpg((u_int)CADDR1); invlpg((u_int)CADDR2); #endif bcopy(CADDR1, CADDR2, PAGE_SIZE); #ifdef SMP curthread->td_switchin = NULL; #endif *CMAP1 = 0; *CMAP2 = 0; mtx_unlock(&CMAPCADDR12_lock); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap, m) pmap_t pmap; vm_page_t m; { pv_entry_t pv; int loops = 0; int s; if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) return FALSE; s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (pv->pv_pmap == pmap) { splx(s); return TRUE; } loops++; if (loops >= 16) break; } splx(s); return (FALSE); } #define PMAP_REMOVE_PAGES_CURPROC_ONLY /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ void pmap_remove_pages(pmap, sva, eva) pmap_t pmap; vm_offset_t sva, eva; { pt_entry_t *pte, tpte; vm_page_t m; pv_entry_t pv, npv; int s; #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } #endif mtx_assert(&vm_page_queue_mtx, MA_OWNED); s = splvm(); for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { if (pv->pv_va >= eva || pv->pv_va < sva) { npv = TAILQ_NEXT(pv, pv_plist); continue; } #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY pte = vtopte(pv->pv_va); #else pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); #endif tpte = *pte; if (tpte == 0) { printf("TPTE at %p IS ZERO @ VA %08x\n", pte, pv->pv_va); panic("bad pte"); } /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { npv = TAILQ_NEXT(pv, pv_plist); continue; } m = PHYS_TO_VM_PAGE(tpte); KASSERT(m->phys_addr == (tpte & PG_FRAME), ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT(m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); pv->pv_pmap->pm_stats.resident_count--; *pte = 0; /* * Update the vm_page_t clean and reference bits. */ if (tpte & PG_M) { vm_page_dirty(m); } npv = TAILQ_NEXT(pv, pv_plist); TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); m->md.pv_list_count--; TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_FIRST(&m->md.pv_list) == NULL) { vm_page_flag_clear(m, PG_WRITEABLE); } pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); free_pv_entry(pv); } splx(s); pmap_invalidate_all(pmap); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { pv_entry_t pv; pt_entry_t *pte; int s; if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) return FALSE; s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { /* * if the bit being tested is the modified bit, then * mark clean_map and ptes as never * modified. */ if (!pmap_track_modified(pv->pv_va)) continue; #if defined(PMAP_DIAGNOSTIC) if (!pv->pv_pmap) { printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); continue; } #endif pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (*pte & PG_M) { splx(s); return TRUE; } } splx(s); return (FALSE); } /* * this routine is used to modify bits in ptes */ static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem) { register pv_entry_t pv; register pt_entry_t *pte; int s; if (!pmap_initialized || (m->flags & PG_FICTITIOUS) || (!setem && bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) return; s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); /* * Loop over all current mappings setting/clearing as appropos If * setting RO do we need to clear the VAC? */ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { /* * don't write protect pager mappings */ if (!setem && (bit == PG_RW)) { if (!pmap_track_modified(pv->pv_va)) continue; } #if defined(PMAP_DIAGNOSTIC) if (!pv->pv_pmap) { printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); continue; } #endif pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (setem) { *pte |= bit; pmap_invalidate_page(pv->pv_pmap, pv->pv_va); } else { pt_entry_t pbits = *pte; if (pbits & bit) { if (bit == PG_RW) { if (pbits & PG_M) { vm_page_dirty(m); } *pte = pbits & ~(PG_M|PG_RW); } else { *pte = pbits & ~bit; } pmap_invalidate_page(pv->pv_pmap, pv->pv_va); } } } if (!setem && bit == PG_RW) vm_page_flag_clear(m, PG_WRITEABLE); splx(s); } /* * pmap_page_protect: * * Lower the permission for all mappings to a given page. */ void pmap_page_protect(vm_page_t m, vm_prot_t prot) { if ((prot & VM_PROT_WRITE) == 0) { if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { pmap_changebit(m, PG_RW, FALSE); } else { pmap_remove_all(m); } } } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { register pv_entry_t pv, pvf, pvn; pt_entry_t *pte; int s; int rtval = 0; if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) return (rtval); s = splvm(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { pvn = TAILQ_NEXT(pv, pv_list); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); if (!pmap_track_modified(pv->pv_va)) continue; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (pte && (*pte & PG_A)) { *pte &= ~PG_A; pmap_invalidate_page(pv->pv_pmap, pv->pv_va); rtval++; if (rtval > 4) { break; } } } while ((pv = pvn) != NULL && pv != pvf); } splx(s); return (rtval); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { pmap_changebit(m, PG_M, FALSE); } /* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. */ void pmap_clear_reference(vm_page_t m) { pmap_changebit(m, PG_A, FALSE); } /* * Miscellaneous support routines follow */ static void i386_protection_init() { register int *kp, prot; kp = protection_codes; for (prot = 0; prot < 8; prot++) { switch (prot) { case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: /* * Read access is also 0. There isn't any execute bit, * so just make it readable. */ case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: *kp++ = 0; break; case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: *kp++ = PG_RW; break; } } } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev(pa, size) vm_paddr_t pa; vm_size_t size; { vm_offset_t va, tmpva, offset; offset = pa & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); GIANT_REQUIRED; va = kmem_alloc_pageable(kernel_map, size); if (!va) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); pa = pa & PG_FRAME; for (tmpva = va; size > 0; ) { pmap_kenter(tmpva, pa); size -= PAGE_SIZE; tmpva += PAGE_SIZE; pa += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, va, tmpva); return ((void *)(va + offset)); } void pmap_unmapdev(va, size) vm_offset_t va; vm_size_t size; { vm_offset_t base, offset, tmpva; pt_entry_t *pte; base = va & PG_FRAME; offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { pte = vtopte(tmpva); *pte = 0; } pmap_invalidate_range(kernel_pmap, va, tmpva); kmem_free(kernel_map, base, size); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap, addr) pmap_t pmap; vm_offset_t addr; { pt_entry_t *ptep, pte; vm_page_t m; int val = 0; ptep = pmap_pte_quick(pmap, addr); if (ptep == 0) { return 0; } if ((pte = *ptep) != 0) { vm_paddr_t pa; val = MINCORE_INCORE; if ((pte & PG_MANAGED) == 0) return val; pa = pte & PG_FRAME; m = PHYS_TO_VM_PAGE(pa); /* * Modified by us */ if (pte & PG_M) val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; else { /* * Modified by someone else */ vm_page_lock_queues(); if (m->dirty || pmap_is_modified(m)) val |= MINCORE_MODIFIED_OTHER; vm_page_unlock_queues(); } /* * Referenced by us */ if (pte & PG_A) val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; else { /* * Referenced by someone else */ vm_page_lock_queues(); if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { val |= MINCORE_REFERENCED_OTHER; vm_page_flag_set(m, PG_REFERENCED); } vm_page_unlock_queues(); } } return val; } void pmap_activate(struct thread *td) { struct proc *p = td->td_proc; pmap_t pmap; u_int32_t cr3; pmap = vmspace_pmap(td->td_proc->p_vmspace); #if defined(SMP) pmap->pm_active |= PCPU_GET(cpumask); #else pmap->pm_active |= 1; #endif +#ifdef PAE + cr3 = vtophys(pmap->pm_pdpt); +#else cr3 = vtophys(pmap->pm_pdir); +#endif /* XXXKSE this is wrong. * pmap_activate is for the current thread on the current cpu */ if (p->p_flag & P_THREADED) { /* Make sure all other cr3 entries are updated. */ /* what if they are running? XXXKSE (maybe abort them) */ FOREACH_THREAD_IN_PROC(p, td) { td->td_pcb->pcb_cr3 = cr3; } } else { td->td_pcb->pcb_cr3 = cr3; } load_cr3(cr3); #ifdef SWTCH_OPTIM_STATS tlb_flush_count++; #endif } vm_offset_t pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) { if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { return addr; } addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); return addr; } #if defined(PMAP_DEBUG) pmap_pid_dump(int pid) { pmap_t pmap; struct proc *p; int npte = 0; int index; sx_slock(&allproc_lock); LIST_FOREACH(p, &allproc, p_list) { if (p->p_pid != pid) continue; if (p->p_vmspace) { int i,j; index = 0; pmap = vmspace_pmap(p->p_vmspace); for (i = 0; i < NPDEPTD; i++) { pd_entry_t *pde; pt_entry_t *pte; vm_offset_t base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { for (j = 0; j < NPTEPG; j++) { vm_offset_t va = base + (j << PAGE_SHIFT); if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { if (index) { index = 0; printf("\n"); } sx_sunlock(&allproc_lock); return npte; } pte = pmap_pte_quick(pmap, va); if (pte && pmap_pte_v(pte)) { pt_entry_t pa; vm_page_t m; pa = *pte; m = PHYS_TO_VM_PAGE(pa); printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", va, pa, m->hold_count, m->wire_count, m->flags); npte++; index++; if (index >= 2) { index = 0; printf("\n"); } else { printf(" "); } } } } } } } sx_sunlock(&allproc_lock); return npte; } #endif #if defined(DEBUG) static void pads(pmap_t pm); void pmap_pvdump(vm_offset_t pa); /* print address space of pmap*/ static void pads(pm) pmap_t pm; { int i, j; vm_paddr_t va; pt_entry_t *ptep; if (pm == kernel_pmap) return; for (i = 0; i < NPDEPTD; i++) if (pm->pm_pdir[i]) for (j = 0; j < NPTEPG; j++) { va = (i << PDRSHIFT) + (j << PAGE_SHIFT); if (pm == kernel_pmap && va < KERNBASE) continue; if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) continue; ptep = pmap_pte_quick(pm, va); if (pmap_pte_v(ptep)) printf("%x:%x ", va, *ptep); }; } void pmap_pvdump(pa) vm_paddr_t pa; { pv_entry_t pv; vm_page_t m; printf("pa %x", pa); m = PHYS_TO_VM_PAGE(pa); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); pads(pv->pv_pmap); } printf(" "); } #endif Index: head/sys/i386/i386/vm86bios.s =================================================================== --- head/sys/i386/i386/vm86bios.s (revision 112840) +++ head/sys/i386/i386/vm86bios.s (revision 112841) @@ -1,175 +1,178 @@ /*- * Copyright (c) 1998 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_npx.h" #include /* miscellaneous asm macros */ #include #include "assym.s" #define SCR_NEWPTD PCB_ESI /* readability macros */ #define SCR_VMFRAME PCB_EBP /* see vm86.c for explanation */ #define SCR_STACK PCB_ESP #define SCR_PGTABLE PCB_EBX #define SCR_ARGFRAME PCB_EIP #define SCR_TSS0 PCB_SPARE #define SCR_TSS1 (PCB_SPARE+4) .data ALIGN_DATA .globl vm86pcb vm86pcb: .long 0 .text /* * vm86_bioscall(struct trapframe_vm86 *vm86) */ ENTRY(vm86_bioscall) movl vm86pcb,%edx /* scratch data area */ movl 4(%esp),%eax movl %eax,SCR_ARGFRAME(%edx) /* save argument pointer */ pushl %ebx pushl %ebp pushl %esi pushl %edi pushl %gs #ifdef DEV_NPX pushfl cli movl PCPU(CURTHREAD),%ecx cmpl %ecx,PCPU(FPCURTHREAD) /* do we need to save fp? */ jne 1f testl %ecx,%ecx je 1f /* no curproc/npxproc */ pushl %edx movl TD_PCB(%ecx),%ecx addl $PCB_SAVEFPU,%ecx pushl %ecx call npxsave popl %ecx popl %edx /* recover our pcb */ 1: popfl #endif movl SCR_VMFRAME(%edx),%ebx /* target frame location */ movl %ebx,%edi /* destination */ movl SCR_ARGFRAME(%edx),%esi /* source (set on entry) */ movl $VM86_FRAMESIZE/4,%ecx /* sizeof(struct vm86frame)/4 */ cld rep movsl /* copy frame to new stack */ movl PCPU(CURPCB),%eax pushl %eax /* save curpcb */ movl %edx,PCPU(CURPCB) /* set curpcb to vm86pcb */ movl PCPU(TSS_GDT),%ebx /* entry in GDT */ movl 0(%ebx),%eax movl %eax,SCR_TSS0(%edx) /* save first word */ movl 4(%ebx),%eax andl $~0x200, %eax /* flip 386BSY -> 386TSS */ movl %eax,SCR_TSS1(%edx) /* save second word */ movl PCB_EXT(%edx),%edi /* vm86 tssd entry */ movl 0(%edi),%eax movl %eax,0(%ebx) movl 4(%edi),%eax movl %eax,4(%ebx) movl $GPROC0_SEL*8,%esi /* GSEL(entry, SEL_KPL) */ ltr %si movl %cr3,%eax pushl %eax /* save address space */ movl IdlePTD,%ecx movl %ecx,%ebx addl $KERNBASE,%ebx /* va of Idle PTD */ movl 0(%ebx),%eax pushl %eax /* old ptde != 0 when booting */ pushl %ebx /* keep for reuse */ movl %esp,SCR_STACK(%edx) /* save current stack location */ movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */ movl %eax,0(%ebx) /* ... install as PTD entry 0 */ +#ifdef PAE + movl IdlePDPT,%ecx +#endif movl %ecx,%cr3 /* new page tables */ movl SCR_VMFRAME(%edx),%esp /* switch to new stack */ call vm86_prepcall /* finish setup */ /* * Return via doreti */ MEXITCOUNT jmp doreti /* * vm86_biosret(struct trapframe_vm86 *vm86) */ ENTRY(vm86_biosret) movl vm86pcb,%edx /* data area */ movl 4(%esp),%esi /* source */ movl SCR_ARGFRAME(%edx),%edi /* destination */ movl $VM86_FRAMESIZE/4,%ecx /* size */ cld rep movsl /* copy frame to original frame */ movl SCR_STACK(%edx),%esp /* back to old stack */ popl %ebx /* saved va of Idle PTD */ popl %eax movl %eax,0(%ebx) /* restore old pte */ popl %eax movl %eax,%cr3 /* install old page table */ movl PCPU(TSS_GDT),%ebx /* entry in GDT */ movl SCR_TSS0(%edx),%eax movl %eax,0(%ebx) /* restore first word */ movl SCR_TSS1(%edx),%eax movl %eax,4(%ebx) /* restore second word */ movl $GPROC0_SEL*8,%esi /* GSEL(entry, SEL_KPL) */ ltr %si popl PCPU(CURPCB) /* restore curpcb/curproc */ movl SCR_ARGFRAME(%edx),%edx /* original stack frame */ movl TF_TRAPNO(%edx),%eax /* return (trapno) */ popl %gs popl %edi popl %esi popl %ebp popl %ebx ret /* back to our normal program */ Index: head/sys/i386/i386/vm_machdep.c =================================================================== --- head/sys/i386/i386/vm_machdep.c (revision 112840) +++ head/sys/i386/i386/vm_machdep.c (revision 112841) @@ -1,559 +1,567 @@ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * $FreeBSD$ */ #include "opt_npx.h" #ifdef PC98 #include "opt_pc98.h" #endif #include "opt_reset.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PC98 #include #else #include #endif static void cpu_reset_real(void); #ifdef SMP static void cpu_reset_proxy(void); static u_int cpu_reset_proxyid; static volatile u_int cpu_reset_proxy_active; #endif extern int _ucodesel, _udatasel; /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(td1, p2, td2, flags) register struct thread *td1; register struct proc *p2; struct thread *td2; int flags; { register struct proc *p1; struct pcb *pcb2; struct mdproc *mdp2; #ifdef DEV_NPX register_t savecrit; #endif p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ struct mdproc *mdp1 = &p1->p_md; struct proc_ldt *pldt = mdp1->md_ldt; if (pldt && pldt->ldt_refcnt > 1) { pldt = user_ldt_alloc(mdp1, pldt->ldt_len); if (pldt == NULL) panic("could not copy LDT"); mdp1->md_ldt = pldt; set_user_ldt(mdp1); user_ldt_free(td1); } } return; } /* Ensure that p1's pcb is up to date. */ #ifdef DEV_NPX if (td1 == curthread) td1->td_pcb->pcb_gs = rgs(); savecrit = intr_disable(); if (PCPU_GET(fpcurthread) == td1) npxsave(&td1->td_pcb->pcb_save); intr_restore(savecrit); #endif /* Point the pcb to the top of the stack */ pcb2 = (struct pcb *)(td2->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; td2->td_pcb = pcb2; /* Copy p1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. * The -16 is so we can expand the trapframe if we go to vm86. */ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_eax = 0; /* Child returns zero */ td2->td_frame->tf_eflags &= ~PSL_C; /* success */ td2->td_frame->tf_edx = 1; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline; pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */ /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ /* * XXX don't copy the i/o pages. this should probably be fixed. */ pcb2->pcb_ext = 0; /* Copy the LDT, if necessary. */ mtx_lock_spin(&sched_lock); if (mdp2->md_ldt != 0) { if (flags & RFMEM) { mdp2->md_ldt->ldt_refcnt++; } else { mdp2->md_ldt = user_ldt_alloc(mdp2, mdp2->md_ldt->ldt_len); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); } } mtx_unlock_spin(&sched_lock); /* * Now, cpu_switch() can schedule the new process. * pcb_esp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_set_fork_handler(td, func, arg) struct thread *td; void (*func)(void *); void *arg; { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_esi = (int) func; /* function */ td->td_pcb->pcb_ebx = (int) arg; /* first arg */ } void cpu_exit(struct thread *td) { struct mdproc *mdp; mdp = &td->td_proc->p_md; if (mdp->md_ldt) user_ldt_free(td); reset_dbregs(); } void cpu_thread_exit(struct thread *td) { struct pcb *pcb = td->td_pcb; #ifdef DEV_NPX npxexit(td); #endif if (pcb->pcb_flags & PCB_DBREGS) { /* * disable all hardware breakpoints */ reset_dbregs(); pcb->pcb_flags &= ~PCB_DBREGS; } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (pcb->pcb_ext != 0) { /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */ /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ /* * XXX do we need to move the TSS off the allocated pages * before freeing them? (not done here) */ mtx_lock(&Giant); kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext, ctob(IOPAGES + 1)); mtx_unlock(&Giant); pcb->pcb_ext = 0; } } void cpu_sched_exit(td) register struct thread *td; { } void cpu_thread_setup(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; } /* * Initialize machine state (pcb and trap frame) for a new thread about to * upcall. Pu t enough state in the new thread's PCB to get it to go back * userret(), where we can intercept it again to set the return (upcall) * Address and stack, along with those from upcals that are from other sources * such as those generated in thread_userret() itself. */ void cpu_set_upcall(struct thread *td, void *pcb) { struct pcb *pcb2; /* Point the pcb to the top of the stack. */ pcb2 = td->td_pcb; /* * Copy the upcall pcb. This loads kernel regs. * Those not loaded individually below get their default * values here. * * XXXKSE It might be a good idea to simply skip this as * the values of the other registers may be unimportant. * This would remove any requirement for knowing the KSE * at this time (see the matching comment below for * more analysis) (need a good safe default). */ bcopy(pcb, pcb2, sizeof(*pcb2)); /* * Create a new fresh stack for the new thread. * The -16 is so we can expand the trapframe if we go to vm86. * Don't forget to set this stack value into whatever supplies * the address for the fault handlers. * The contexts are filled in at the time we actually DO the * upcall as only then do we know which KSE we got. */ td->td_frame = (struct trapframe *)((caddr_t)pcb2 - 16) - 1; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */ pcb2->pcb_ebx = (int)td; /* trampoline arg */ pcb2->pcb_eip = (int)fork_trampoline; pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */ /* * If we didn't copy the pcb, we'd need to do the following registers: * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. XXXKSE ??? * pcb2->pcb_ext: cleared below. */ pcb2->pcb_ext = NULL; } /* * Set that machine state for performing an upcall that has to * be done in thread_userret() so that those upcalls generated * in thread_userret() itself can be done as well. */ void cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku) { /* * Do any extra cleaning that needs to be done. * The thread may have optional components * that are not present in a fresh thread. * This may be a recycled thread so make it look * as though it's newly allocated. */ cpu_thread_clean(td); /* * Set the trap frame to point at the beginning of the uts * function. */ td->td_frame->tf_esp = (int)ku->ku_stack.ss_sp + ku->ku_stack.ss_size - 16; td->td_frame->tf_eip = (int)ku->ku_func; /* * Pass the address of the mailbox for this kse to the uts * function as a parameter on the stack. */ suword((void *)(td->td_frame->tf_esp + sizeof(void *)), (int)ku->ku_mailbox); } void cpu_wait(p) struct proc *p; { } /* * Convert kernel VA to physical address */ vm_paddr_t kvtop(void *addr) { vm_paddr_t pa; pa = pmap_kextract((vm_offset_t)addr); if (pa == 0) panic("kvtop: zero page frame"); return (pa); } /* * Force reset the processor by invalidating the entire address space! */ #ifdef SMP static void cpu_reset_proxy() { cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ stop_cpus((1<" */ invltlb(); /* NOTREACHED */ while(1); } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending != 0) busdma_swi(); } /* * Tell whether this address is in some physical memory region. * Currently used by the kernel coredump code in order to avoid * dumping the ``ISA memory hole'' which could cause indefinite hangs, * or other unpredictable behaviour. */ int is_physical_memory(addr) vm_offset_t addr; { #ifdef DEV_ISA /* The ISA ``memory hole''. */ if (addr >= 0xa0000 && addr < 0x100000) return 0; #endif /* * stuff other tests for known memory-mapped devices (PCI?) * here */ return 1; } Index: head/sys/i386/include/_types.h =================================================================== --- head/sys/i386/include/_types.h (revision 112840) +++ head/sys/i386/include/_types.h (revision 112841) @@ -1,122 +1,126 @@ /*- * Copyright (c) 2002 Mike Barcroft * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)ansi.h 8.2 (Berkeley) 1/4/94 * From: @(#)types.h 8.3 (Berkeley) 1/5/94 * $FreeBSD$ */ #ifndef _MACHINE__TYPES_H_ #define _MACHINE__TYPES_H_ /* * Basic types upon which most other types are built. */ typedef __signed char __int8_t; typedef unsigned char __uint8_t; typedef short __int16_t; typedef unsigned short __uint16_t; typedef int __int32_t; typedef unsigned int __uint32_t; #if defined(lint) /* LONGLONG */ typedef long long __int64_t; /* LONGLONG */ typedef unsigned long long __uint64_t; #elif defined(__GNUC__) typedef int __attribute__((__mode__(__DI__))) __int64_t; typedef unsigned int __attribute__((__mode__(__DI__))) __uint64_t; #else /* LONGLONG */ typedef long long __int64_t; /* LONGLONG */ typedef unsigned long long __uint64_t; #endif /* * Standard type definitions. */ typedef unsigned long __clock_t; /* clock()... */ typedef __int32_t __critical_t; typedef double __double_t; typedef double __float_t; typedef __int32_t __intfptr_t; typedef __int64_t __intmax_t; typedef __int32_t __intptr_t; typedef __int32_t __int_fast8_t; typedef __int32_t __int_fast16_t; typedef __int32_t __int_fast32_t; typedef __int64_t __int_fast64_t; typedef __int8_t __int_least8_t; typedef __int16_t __int_least16_t; typedef __int32_t __int_least32_t; typedef __int64_t __int_least64_t; typedef __int32_t __ptrdiff_t; /* ptr1 - ptr2 */ typedef __int32_t __register_t; typedef __int32_t __segsz_t; /* segment size (in pages) */ typedef __uint32_t __size_t; /* sizeof() */ typedef __int32_t __ssize_t; /* byte count or error */ typedef __int32_t __time_t; /* time()... */ typedef __uint32_t __uintfptr_t; typedef __uint64_t __uintmax_t; typedef __uint32_t __uintptr_t; typedef __uint32_t __uint_fast8_t; typedef __uint32_t __uint_fast16_t; typedef __uint32_t __uint_fast32_t; typedef __uint64_t __uint_fast64_t; typedef __uint8_t __uint_least8_t; typedef __uint16_t __uint_least16_t; typedef __uint32_t __uint_least32_t; typedef __uint64_t __uint_least64_t; typedef __uint32_t __u_register_t; typedef __uint32_t __vm_offset_t; typedef __int64_t __vm_ooffset_t; +#ifdef PAE +typedef __uint64_t __vm_paddr_t; +#else typedef __uint32_t __vm_paddr_t; +#endif typedef __uint64_t __vm_pindex_t; typedef __uint32_t __vm_size_t; /* * Unusual type definitions. */ #if defined(__GNUC__) && (__GNUC__ == 2 && __GNUC_MINOR__ > 95 || __GNUC__ >= 3) typedef __builtin_va_list __va_list; /* internally known to gcc */ #else typedef char * __va_list; #endif /* post GCC 2.95 */ #if defined __GNUC__ && !defined(__GNUC_VA_LIST) && !defined(__NO_GNUC_VA_LIST) #define __GNUC_VA_LIST typedef __va_list __gnuc_va_list; /* compatibility w/GNU headers*/ #endif #endif /* !_MACHINE__TYPES_H_ */ Index: head/sys/i386/include/bus_at386.h =================================================================== --- head/sys/i386/include/bus_at386.h (revision 112840) +++ head/sys/i386/include/bus_at386.h (revision 112841) @@ -1,1216 +1,1224 @@ /* $NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $ */ /*- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* $FreeBSD$ */ #ifndef _I386_BUS_AT386_H_ #define _I386_BUS_AT386_H_ #include /* * To remain compatible with NetBSD's interface, default to both memio and * pio when neither of them is defined. */ #if !defined(_I386_BUS_PIO_H_) && !defined(_I386_BUS_MEMIO_H_) #define _I386_BUS_PIO_H_ #define _I386_BUS_MEMIO_H_ #endif /* * Values for the i386 bus space tag, not to be used directly by MI code. */ #define I386_BUS_SPACE_IO 0 /* space is i/o space */ #define I386_BUS_SPACE_MEM 1 /* space is mem space */ /* * Bus address and size types */ -typedef u_int bus_addr_t; -typedef u_int bus_size_t; +#ifdef PAE +typedef uint64_t bus_addr_t; +#else +typedef uint32_t bus_addr_t; +#endif +typedef uint32_t bus_size_t; #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF +#ifdef PAE +#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL +#else #define BUS_SPACE_MAXADDR 0xFFFFFFFF +#endif #define BUS_SPACE_UNRESTRICTED (~0) /* * Access methods for bus resources and address space. */ typedef int bus_space_tag_t; typedef u_int bus_space_handle_t; /* * Map a region of device bus space into CPU virtual address space. */ #define BUS_SPACE_MAP_CACHEABLE 0x01 #define BUS_SPACE_MAP_LINEAR 0x02 int bus_space_map(bus_space_tag_t t, bus_addr_t addr, bus_size_t size, int flags, bus_space_handle_t *bshp); /* * Unmap a region of device bus space. */ static __inline void bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size); static __inline void bus_space_unmap(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused, bus_size_t size __unused) { } /* * Get a new handle for a subregion of an already-mapped area of bus space. */ static __inline int bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp); static __inline int bus_space_subregion(bus_space_tag_t t __unused, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size __unused, bus_space_handle_t *nbshp) { *nbshp = bsh + offset; return (0); } /* * Allocate a region of memory that is accessible to devices in bus space. */ int bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend, bus_size_t size, bus_size_t align, bus_size_t boundary, int flags, bus_addr_t *addrp, bus_space_handle_t *bshp); /* * Free a region of bus space accessible memory. */ static __inline void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size); static __inline void bus_space_free(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused, bus_size_t size __unused) { } #if defined(_I386_BUS_PIO_H_) || defined(_I386_BUS_MEMIO_H_) /* * Read a 1, 2, 4, or 8 byte quantity from bus space * described by tag/handle/offset. */ static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { #if defined (_I386_BUS_PIO_H_) #if defined (_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif return (inb(handle + offset)); #endif #if defined (_I386_BUS_MEMIO_H_) return (*(volatile u_int8_t *)(handle + offset)); #endif } static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif return (inw(handle + offset)); #endif #if defined(_I386_BUS_MEMIO_H_) return (*(volatile u_int16_t *)(handle + offset)); #endif } static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif return (inl(handle + offset)); #endif #if defined(_I386_BUS_MEMIO_H_) return (*(volatile u_int32_t *)(handle + offset)); #endif } #if 0 /* Cause a link error for bus_space_read_8 */ #define bus_space_read_8(t, h, o) !!! bus_space_read_8 unimplemented !!! #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle/offset and copy into buffer provided. */ static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif insb(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: movb (%2),%%al \n\ stosb \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } #endif } static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif insw(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: movw (%2),%%ax \n\ stosw \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } #endif } static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif insl(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: movl (%2),%%eax \n\ stosl \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); #endif } #endif } #if 0 /* Cause a link error for bus_space_read_multi_8 */ #define bus_space_read_multi_8 !!! bus_space_read_multi_8 unimplemented !!! #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle and starting at `offset' and copy into * buffer provided. */ static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: inb %w2,%%al \n\ stosb \n\ incl %2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsb" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } #endif } static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: inw %w2,%%ax \n\ stosw \n\ addl $2,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsw" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } #endif } static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: inl %w2,%%eax \n\ stosl \n\ addl $4,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsl" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); #endif } #endif } #if 0 /* Cause a link error for bus_space_read_region_8 */ #define bus_space_read_region_8 !!! bus_space_read_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `value' to bus space * described by tag/handle/offset. */ static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value); static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value); static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value); static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outb(bsh + offset, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif *(volatile u_int8_t *)(bsh + offset) = value; #endif } static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outw(bsh + offset, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif *(volatile u_int16_t *)(bsh + offset) = value; #endif } static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outl(bsh + offset, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif *(volatile u_int32_t *)(bsh + offset) = value; #endif } #if 0 /* Cause a link error for bus_space_write_8 */ #define bus_space_write_8 !!! bus_space_write_8 not implemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer * provided to bus space described by tag/handle/offset. */ static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outsb(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsb \n\ movb %%al,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } #endif } static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outsw(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsw \n\ movw %%ax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } #endif } static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif outsl(bsh + offset, addr, count); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsl \n\ movl %%eax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); #endif } #endif } #if 0 /* Cause a link error for bus_space_write_multi_8 */ #define bus_space_write_multi_8(t, h, o, a, c) \ !!! bus_space_write_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer provided * to bus space described by tag/handle starting at `offset'. */ static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsb \n\ outb %%al,%w0 \n\ incl %0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsb" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } #endif } static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsw \n\ outw %%ax,%w0 \n\ addl $2,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsw" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } #endif } static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ 1: lodsl \n\ outl %%eax,%w0 \n\ addl $4,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); #endif } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { int _port_ = bsh + offset; #ifdef __GNUC__ __asm __volatile(" \n\ cld \n\ repne \n\ movsl" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); #endif } #endif } #if 0 /* Cause a link error for bus_space_write_region_8 */ #define bus_space_write_region_8 \ !!! bus_space_write_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle/offset `count' times. */ static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif while (count--) outb(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif while (count--) *(volatile u_int8_t *)(addr) = value; #endif } static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif while (count--) outw(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif while (count--) *(volatile u_int16_t *)(addr) = value; #endif } static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif while (count--) outl(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif while (count--) *(volatile u_int32_t *)(addr) = value; #endif } #if 0 /* Cause a link error for bus_space_set_multi_8 */ #define bus_space_set_multi_8 !!! bus_space_set_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle starting at `offset'. */ static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif for (; count != 0; count--, addr++) outb(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif for (; count != 0; count--, addr++) *(volatile u_int8_t *)(addr) = value; #endif } static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif for (; count != 0; count--, addr += 2) outw(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif for (; count != 0; count--, addr += 2) *(volatile u_int16_t *)(addr) = value; #endif } static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif for (; count != 0; count--, addr += 4) outl(addr, value); #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif for (; count != 0; count--, addr += 4) *(volatile u_int32_t *)(addr) = value; #endif } #if 0 /* Cause a link error for bus_space_set_region_8 */ #define bus_space_set_region_8 !!! bus_space_set_region_8 unimplemented !!! #endif /* * Copy `count' 1, 2, 4, or 8 byte values from bus space starting * at tag/bsh1/off1 to bus space starting at tag/bsh2/off2. */ static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) outb(addr2, inb(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) outb(addr2, inb(addr1)); } } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } } #endif } static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) outw(addr2, inw(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) outw(addr2, inw(addr1)); } } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } } #endif } static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; #if defined(_I386_BUS_PIO_H_) #if defined(_I386_BUS_MEMIO_H_) if (tag == I386_BUS_SPACE_IO) #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) outl(addr2, inl(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) outl(addr2, inl(addr1)); } } #endif #if defined(_I386_BUS_MEMIO_H_) #if defined(_I386_BUS_PIO_H_) else #endif { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } } #endif } #endif /* defined(_I386_BUS_PIO_H_) || defined(_I386_MEM_IO_H_) */ #if 0 /* Cause a link error for bus_space_copy_8 */ #define bus_space_copy_region_8 !!! bus_space_copy_region_8 unimplemented !!! #endif /* * Bus read/write barrier methods. * * void bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh, * bus_size_t offset, bus_size_t len, int flags); * * * Note that BUS_SPACE_BARRIER_WRITE doesn't do anything other than * prevent reordering by the compiler; all Intel x86 processors currently * retire operations outside the CPU in program order. */ #define BUS_SPACE_BARRIER_READ 0x01 /* force read barrier */ #define BUS_SPACE_BARRIER_WRITE 0x02 /* force write barrier */ static __inline void bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused, bus_size_t offset __unused, bus_size_t len __unused, int flags) { #ifdef __GNUC__ if (flags & BUS_SPACE_BARRIER_READ) __asm __volatile("lock; addl $0,0(%%esp)" : : : "memory"); else __asm __volatile("" : : : "memory"); #endif } #endif /* _I386_BUS_AT386_H_ */ Index: head/sys/i386/include/param.h =================================================================== --- head/sys/i386/include/param.h (revision 112840) +++ head/sys/i386/include/param.h (revision 112841) @@ -1,142 +1,147 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)param.h 5.8 (Berkeley) 6/28/91 * $FreeBSD$ */ /* * Machine dependent constants for Intel 386. */ /* * Round p (pointer or byte index) up to a correctly-aligned value * for all data types (int, long, ...). The result is unsigned int * and must be cast to any desired pointer type. */ #ifndef _ALIGNBYTES #define _ALIGNBYTES (sizeof(int) - 1) #endif #ifndef _ALIGN #define _ALIGN(p) (((unsigned)(p) + _ALIGNBYTES) & ~_ALIGNBYTES) #endif #ifndef _MACHINE #define _MACHINE i386 #endif #ifndef _MACHINE_ARCH #define _MACHINE_ARCH i386 #endif #ifndef _NO_NAMESPACE_POLLUTION #ifndef _MACHINE_PARAM_H_ #define _MACHINE_PARAM_H_ #ifndef MACHINE #define MACHINE "i386" #endif #ifndef MACHINE_ARCH #define MACHINE_ARCH "i386" #endif #define MID_MACHINE MID_I386 #ifdef SMP #define MAXCPU 16 #else #define MAXCPU 1 #endif /* SMP */ #define ALIGNBYTES _ALIGNBYTES #define ALIGN(p) _ALIGN(p) #define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ #define PAGE_SIZE (1<> PAGE_SHIFT) #define ptoa(x) ((x) << PAGE_SHIFT) #define i386_btop(x) ((x) >> PAGE_SHIFT) #define i386_ptob(x) ((x) << PAGE_SHIFT) #define pgtok(x) ((x) * (PAGE_SIZE / 1024)) #endif /* !_MACHINE_PARAM_H_ */ #endif /* !_NO_NAMESPACE_POLLUTION */ Index: head/sys/i386/include/pmap.h =================================================================== --- head/sys/i386/include/pmap.h (revision 112840) +++ head/sys/i386/include/pmap.h (revision 112841) @@ -1,265 +1,317 @@ /* * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Derived from hp300 version by Mike Hibler, this version by William * Jolitz uses a recursive map [a pde points to the page directory] to * map the page tables using the pagetables themselves. This is done to * reduce the impact on kernel virtual memory for lots of sparse address * space, and to reduce the cost of memory to each process. * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 * $FreeBSD$ */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ /* * Page-directory and page-table entires follow this format, with a few * of the fields not present here and there, depending on a lot of things. */ /* ---- Intel Nomenclature ---- */ #define PG_V 0x001 /* P Valid */ #define PG_RW 0x002 /* R/W Read/Write */ #define PG_U 0x004 /* U/S User/Supervisor */ #define PG_NC_PWT 0x008 /* PWT Write through */ #define PG_NC_PCD 0x010 /* PCD Cache disable */ #define PG_A 0x020 /* A Accessed */ #define PG_M 0x040 /* D Dirty */ #define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */ #define PG_G 0x100 /* G Global */ #define PG_AVAIL1 0x200 /* / Available for system */ #define PG_AVAIL2 0x400 /* < programmers use */ #define PG_AVAIL3 0x800 /* \ */ /* Our various interpretations of the above */ #define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ #define PG_MANAGED PG_AVAIL2 #define PG_FRAME (~((vm_paddr_t)PAGE_MASK)) #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ /* * Page Protection Exception bits */ #define PGEX_P 0x01 /* Protection violation vs. not present */ #define PGEX_W 0x02 /* during a Write cycle */ #define PGEX_U 0x04 /* access from User mode (UPL) */ /* * Size of Kernel address space. This is the number of page table pages * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). */ #ifndef KVA_PAGES +#ifdef PAE +#define KVA_PAGES 512 +#else #define KVA_PAGES 256 #endif +#endif /* * Pte related macros */ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< 0xffbfffff */ #define APTDPTDI (NPDEPTD-NPGPTD) /* alt ptd entry that points to APTD */ #ifdef SMP #define MPPTDI (APTDPTDI-1) /* per cpu ptd entry */ #define KPTDI (MPPTDI-NKPDE) /* start of kernel virtual pde's */ #else #define KPTDI (APTDPTDI-NKPDE)/* start of kernel virtual pde's */ #endif /* SMP */ #define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to ptd! */ /* * XXX doesn't really belong here I guess... */ #define ISA_HOLE_START 0xa0000 #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START) #ifndef LOCORE #include -typedef u_int32_t pd_entry_t; -typedef u_int32_t pt_entry_t; +#ifdef PAE +typedef uint64_t pdpt_entry_t; +typedef uint64_t pd_entry_t; +typedef uint64_t pt_entry_t; + +#define PTESHIFT (3) +#define PDESHIFT (3) + +#else + +typedef uint32_t pd_entry_t; +typedef uint32_t pt_entry_t; + #define PTESHIFT (2) #define PDESHIFT (2) +#endif + /* * Address of current and alternate address space page table maps * and directories. */ #ifdef _KERNEL extern pt_entry_t PTmap[], APTmap[]; extern pd_entry_t PTD[], APTD[]; extern pd_entry_t PTDpde[], APTDpde[]; +#ifdef PAE +extern pdpt_entry_t *IdlePDPT; +#endif extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ #endif #ifdef _KERNEL /* * virtual address to page table entry and * to physical address. Likewise for alternate address space. * Note: these work recursively, thus vtopte of a pte will give * the corresponding pde that in turn maps it. */ #define vtopte(va) (PTmap + i386_btop(va)) #define avtopte(va) (APTmap + i386_btop(va)) /* * Routine: pmap_kextract * Function: * Extract the physical page address associated * kernel virtual address. */ static __inline vm_paddr_t pmap_kextract(vm_offset_t va) { vm_paddr_t pa; if ((pa = (vm_offset_t) PTD[va >> PDRSHIFT]) & PG_PS) { pa = (pa & ~(NBPDR - 1)) | (va & (NBPDR - 1)); } else { pa = *vtopte(va); pa = (pa & PG_FRAME) | (va & PAGE_MASK); } return pa; } #define vtophys(va) pmap_kextract(((vm_offset_t) (va))) + +#ifdef PAE + +static __inline pt_entry_t +pte_load_clear(pt_entry_t *pte) +{ + pt_entry_t r; + + r = *pte; + __asm __volatile( + "1:\n" + "\tcmpxchg8b %1\n" + "\tjnz 1b" + : "+A" (r) + : "m" (*pte), "b" (0), "c" (0)); + return (r); +} + +#else + +#define pte_load_clear(pte) atomic_readandclear_int(pte) + #endif +#endif + /* * Pmap stuff */ struct pv_entry; struct md_page { int pv_list_count; TAILQ_HEAD(,pv_entry) pv_list; }; struct pmap { pd_entry_t *pm_pdir; /* KVA of page directory */ vm_object_t pm_pteobj; /* Container for pte's */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ int pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ +#ifdef PAE + pdpt_entry_t *pm_pdpt; /* KVA of page director pointer + table */ +#endif }; #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) #define pmap_resident_count(pmap) (pmap)->pm_stats.resident_count typedef struct pmap *pmap_t; #ifdef _KERNEL extern struct pmap kernel_pmap_store; #define kernel_pmap (&kernel_pmap_store) #endif /* * For each vm_page_t, there is a list of all currently valid virtual * mappings of that page. An entry is a pv_entry_t, the list is pv_table. */ typedef struct pv_entry { pmap_t pv_pmap; /* pmap where mapping lies */ vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_list; TAILQ_ENTRY(pv_entry) pv_plist; vm_page_t pv_ptem; /* VM page for pte */ } *pv_entry_t; #ifdef _KERNEL #define NPPROVMTRR 8 #define PPRO_VMTRRphysBase0 0x200 #define PPRO_VMTRRphysMask0 0x201 struct ppro_vmtrr { u_int64_t base, mask; }; extern struct ppro_vmtrr PPro_vmtrr[NPPROVMTRR]; extern caddr_t CADDR1; extern pt_entry_t *CMAP1; extern vm_paddr_t avail_end; extern vm_paddr_t avail_start; extern vm_offset_t clean_eva; extern vm_offset_t clean_sva; extern vm_paddr_t phys_avail[]; extern char *ptvmmap; /* poor name! */ extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; void pmap_bootstrap(vm_paddr_t, vm_paddr_t); void pmap_kenter(vm_offset_t va, vm_paddr_t pa); void pmap_kremove(vm_offset_t); void *pmap_mapdev(vm_paddr_t, vm_size_t); void pmap_unmapdev(vm_offset_t, vm_size_t); pt_entry_t *pmap_pte_quick(pmap_t, vm_offset_t) __pure2; void pmap_set_opt(void); void pmap_invalidate_page(pmap_t, vm_offset_t); void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); void pmap_invalidate_all(pmap_t); #endif /* _KERNEL */ #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */