Page MenuHomeFreeBSD

D2362.id4983.diff
No OneTemporary

D2362.id4983.diff

This file is larger than 256 KB, so syntax highlighting was skipped.
Index: sys/conf/files.amd64
===================================================================
--- sys/conf/files.amd64
+++ sys/conf/files.amd64
@@ -576,7 +576,7 @@
x86/x86/tsc.c standard
x86/x86/delay.c standard
x86/xen/hvm.c optional xenhvm
-x86/xen/xen_intr.c optional xen | xenhvm
+x86/xen/xen_intr.c optional xenhvm
x86/xen/pv.c optional xenhvm
x86/xen/pvcpu_enum.c optional xenhvm
x86/xen/xen_apic.c optional xenhvm
Index: sys/conf/files.i386
===================================================================
--- sys/conf/files.i386
+++ sys/conf/files.i386
@@ -427,16 +427,15 @@
i386/i386/atomic.c standard \
compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
i386/i386/autoconf.c standard
-i386/i386/bios.c optional native
-i386/i386/bioscall.s optional native
+i386/i386/bios.c standard
+i386/i386/bioscall.s standard
i386/i386/bpf_jit_machdep.c optional bpf_jitter
i386/i386/db_disasm.c optional ddb
i386/i386/db_interface.c optional ddb
i386/i386/db_trace.c optional ddb
i386/i386/elan-mmcr.c optional cpu_elan | cpu_soekris
i386/i386/elf_machdep.c standard
-i386/i386/exception.s optional native
-i386/xen/exception.s optional xen
+i386/i386/exception.s standard
i386/i386/gdb_machdep.c optional gdb
i386/i386/geode.c optional cpu_geode
i386/i386/i686_mem.c optional mem
@@ -444,22 +443,17 @@
i386/i386/initcpu.c standard
i386/i386/io.c optional io
i386/i386/k6_mem.c optional mem
-i386/i386/locore.s optional native no-obj
-i386/xen/locore.s optional xen no-obj
+i386/i386/locore.s standard no-obj
i386/i386/longrun.c optional cpu_enable_longrun
i386/i386/machdep.c standard
-i386/xen/xen_machdep.c optional xen
i386/i386/mem.c optional mem
i386/i386/minidump_machdep.c standard
i386/i386/mp_clock.c optional smp
-i386/i386/mp_machdep.c optional native smp
-i386/xen/mp_machdep.c optional xen smp
+i386/i386/mp_machdep.c optional smp
i386/i386/mp_watchdog.c optional mp_watchdog smp
i386/i386/mpboot.s optional smp native
-i386/xen/mptable.c optional apic xen
i386/i386/perfmon.c optional perfmon
-i386/i386/pmap.c optional native
-i386/xen/pmap.c optional xen
+i386/i386/pmap.c standard
i386/i386/ptrace_machdep.c standard
i386/i386/stack_machdep.c optional ddb | stack
i386/i386/support.s standard
@@ -488,7 +482,6 @@
i386/ibcs2/ibcs2_xenix.c optional ibcs2
i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2
i386/ibcs2/imgact_coff.c optional ibcs2
-i386/xen/clock.c optional xen
i386/isa/elink.c optional ep | ie
i386/isa/npx.c optional npx
i386/isa/pmtimer.c optional pmtimer
@@ -565,8 +558,8 @@
x86/iommu/intel_quirks.c optional acpi acpi_dmar pci
x86/iommu/intel_utils.c optional acpi acpi_dmar pci
x86/isa/atpic.c optional atpic
-x86/isa/atrtc.c optional native
-x86/isa/clock.c optional native
+x86/isa/atrtc.c standard
+x86/isa/clock.c standard
x86/isa/elcr.c optional atpic | apic native
x86/isa/isa.c optional isa
x86/isa/isa_dma.c optional isa
@@ -582,20 +575,20 @@
x86/x86/identcpu.c standard
x86/x86/intr_machdep.c standard
x86/x86/io_apic.c optional apic
-x86/x86/legacy.c optional native
+x86/x86/legacy.c standard
x86/x86/local_apic.c optional apic
x86/x86/mca.c standard
x86/x86/mptable.c optional apic native
x86/x86/mptable_pci.c optional apic native pci
-x86/x86/mp_x86.c optional native smp
+x86/x86/mp_x86.c optional smp
x86/x86/msi.c optional apic pci
x86/x86/nexus.c standard
x86/x86/tsc.c standard
x86/x86/pvclock.c standard
x86/x86/delay.c standard
x86/xen/hvm.c optional xenhvm
-x86/xen/xen_intr.c optional xen | xenhvm
+x86/xen/xen_intr.c optional xenhvm
x86/xen/xen_apic.c optional xenhvm
-x86/xen/xenpv.c optional xen | xenhvm
-x86/xen/xen_nexus.c optional xen | xenhvm
-x86/xen/xen_msi.c optional xen | xenhvm
+x86/xen/xenpv.c optional xenhvm
+x86/xen/xen_nexus.c optional xenhvm
+x86/xen/xen_msi.c optional xenhvm
Index: sys/conf/options.i386
===================================================================
--- sys/conf/options.i386
+++ sys/conf/options.i386
@@ -121,8 +121,6 @@
# BPF just-in-time compiler
BPF_JITTER opt_bpf.h
-NATIVE opt_global.h
-XEN opt_global.h
XENHVM opt_global.h
# options for the Intel C600 SAS driver (isci)
Index: sys/i386/conf/DEFAULTS
===================================================================
--- sys/i386/conf/DEFAULTS
+++ sys/i386/conf/DEFAULTS
@@ -26,7 +26,6 @@
options GEOM_PART_MBR
# enable support for native hardware
-options NATIVE
device atpic
options NEW_PCIB
Index: sys/i386/conf/XEN
===================================================================
--- sys/i386/conf/XEN
+++ /dev/null
@@ -1,96 +0,0 @@
-#
-# XEN -- Kernel configuration for i386 XEN DomU
-#
-# $FreeBSD$
-
-cpu I686_CPU
-ident XEN
-
-makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
-
-# The following drivers don't build with PAE or XEN enabled.
-makeoptions WITHOUT_MODULES="ctl dpt drm drm2 hptmv ida"
-
-# The following drivers don't work with PAE enabled.
-makeoptions WITHOUT_MODULES+="ncr pst"
-
-options SCHED_ULE # ULE scheduler
-options PREEMPTION # Enable kernel thread preemption
-
-options INET # InterNETworking
-options INET6 # IPv6 communications protocols
-options SCTP # Stream Control Transmission Protocol
-options FFS # Berkeley Fast Filesystem
-options SOFTUPDATES # Enable FFS soft updates support
-options UFS_ACL # Support for access control lists
-options UFS_DIRHASH # Improve performance on big directories
-options UFS_GJOURNAL # Enable gjournal-based UFS journaling
-options NFSCL # Network Filesystem Client
-options NFSD # Network Filesystem Server
-options NFSLOCKD # Network Lock Manager
-options NFS_ROOT # NFS usable as /, requires NFSCL
-options MSDOSFS # MSDOS Filesystem
-options CD9660 # ISO 9660 Filesystem
-options PROCFS # Process filesystem (requires PSEUDOFS)
-options PSEUDOFS # Pseudo-filesystem framework
-options GEOM_PART_GPT # GUID Partition Tables.
-options GEOM_LABEL # Provides labelization
-options COMPAT_FREEBSD4 # Compatible with FreeBSD4
-options COMPAT_FREEBSD5 # Compatible with FreeBSD5
-options COMPAT_FREEBSD6 # Compatible with FreeBSD6
-options COMPAT_FREEBSD7 # Compatible with FreeBSD7
-options COMPAT_FREEBSD9 # Compatible with FreeBSD9
-options COMPAT_FREEBSD10 # Compatible with FreeBSD10
-options KTRACE # ktrace(1) support
-options STACK # stack(9) support
-options SYSVSHM # SYSV-style shared memory
-options SYSVMSG # SYSV-style message queues
-options SYSVSEM # SYSV-style semaphores
-options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
-options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options AUDIT # Security event auditing
-
-# Debugging for use in -current
-options KDB # Enable kernel debugger support.
-options DDB # Support DDB.
-options GDB # Support remote GDB.
-options DEADLKRES # Enable the deadlock resolver
-options INVARIANTS # Enable calls of extra sanity checking
-options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
-options WITNESS # Enable checks to detect deadlocks and cycles
-options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
-
-options PAE
-nooption NATIVE
-option XEN
-nodevice atpic
-nodevice isa
-options MCLSHIFT=12
-
-# To make an SMP kernel, the next two lines are needed
-options SMP # Symmetric MultiProcessor Kernel
-device apic # I/O APIC
-
-#device atkbdc # AT keyboard controller
-#device atkbd # AT keyboard
-device psm # PS/2 mouse
-device pci
-
-#device kbdmux # keyboard multiplexer
-
-# Pseudo devices.
-device loop # Network loopback
-device random # Entropy device
-device ether # Ethernet support
-device tun # Packet tunnel.
-device md # Memory "disks"
-device gif # IPv6 and IPv4 tunneling
-
-# Wireless cards
-options IEEE80211_SUPPORT_MESH
-options AH_SUPPORT_AR5416
-
-# The `bpf' device enables the Berkeley Packet Filter.
-# Be aware of the administrative consequences of enabling this!
-# Note that 'bpf' is required for DHCP.
-device bpf # Berkeley packet filter
Index: sys/i386/i386/apic_vector.s
===================================================================
--- sys/i386/i386/apic_vector.s
+++ sys/i386/i386/apic_vector.s
@@ -247,7 +247,6 @@
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.
*/
-#ifndef XEN
.text
SUPERALIGN_TEXT
IDTVEC(ipi_intr_bitmap_handler)
@@ -262,7 +261,7 @@
call ipi_bitmap_handler
MEXITCOUNT
jmp doreti
-#endif
+
/*
* Executed by a CPU when it receives an IPI_STOP from another CPU.
*/
@@ -282,7 +281,6 @@
/*
* Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
*/
-#ifndef XEN
.text
SUPERALIGN_TEXT
IDTVEC(cpususpend)
@@ -295,7 +293,6 @@
POP_FRAME
jmp doreti_iret
-#endif
/*
* Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
Index: sys/i386/i386/genassym.c
===================================================================
--- sys/i386/i386/genassym.c
+++ sys/i386/i386/genassym.c
@@ -238,11 +238,6 @@
ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
#endif
-#ifdef XEN
-ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3));
-ASSYM(XEN_HYPERVISOR_VIRT_START, HYPERVISOR_VIRT_START);
-#endif
-
#ifdef HWPMC_HOOKS
ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN);
#endif
Index: sys/i386/i386/machdep.c
===================================================================
--- sys/i386/i386/machdep.c
+++ sys/i386/i386/machdep.c
@@ -160,24 +160,6 @@
uint32_t arch_i386_xbox_memsize = 0;
#endif
-#ifdef XEN
-/* XEN includes */
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/xen_intr.h>
-
-void Xhypervisor_callback(void);
-void failsafe_callback(void);
-
-extern trap_info_t trap_table[];
-struct proc_ldt default_proc_ldt;
-extern int init_first;
-int running_xen = 1;
-extern unsigned long physfree;
-#endif /* XEN */
-
/* Sanity check for __curthread() */
CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
@@ -356,9 +338,7 @@
*/
bufinit();
vm_pager_bufferinit();
-#ifndef XEN
cpu_setregs();
-#endif
}
/*
@@ -1291,13 +1271,8 @@
int _default_ldt;
-#ifdef XEN
-union descriptor *gdt;
-union descriptor *ldt;
-#else
union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */
union descriptor ldt[NLDT]; /* local descriptor table */
-#endif
static struct gate_descriptor idt0[NIDT];
struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
struct region_descriptor r_gdt, r_idt; /* table descriptors */
@@ -1397,7 +1372,6 @@
.ssd_xx = 0, .ssd_xx1 = 0,
.ssd_def32 = 1,
.ssd_gran = 1 },
-#ifndef XEN
/* GPROC0_SEL 9 Proc 0 Tss Descriptor */
{
.ssd_base = 0x0,
@@ -1489,7 +1463,6 @@
.ssd_xx = 0, .ssd_xx1 = 0,
.ssd_def32 = 0,
.ssd_gran = 0 },
-#endif /* !XEN */
};
static struct soft_segment_descriptor ldt_segs[] = {
@@ -1641,7 +1614,7 @@
ssd->ssd_gran = sd->sd_gran;
}
-#if !defined(PC98) && !defined(XEN)
+#if !defined(PC98)
static int
add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
int *physmap_idxp)
@@ -1748,9 +1721,8 @@
if (!add_smap_entry(smap, physmap, physmap_idxp))
break;
}
-#endif /* !PC98 && !XEN */
+#endif /* !PC98 */
-#ifndef XEN
static void
basemem_setup(void)
{
@@ -1798,7 +1770,6 @@
for (i = basemem / 4; i < 160; i++)
pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
}
-#endif /* !XEN */
/*
* Populate the (physmap) array with base/bound pairs describing the
@@ -2074,8 +2045,6 @@
for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
off);
-
- PT_UPDATES_FLUSH();
}
#else /* PC98 */
static void
@@ -2086,7 +2055,6 @@
vm_paddr_t physmap[PHYSMAP_SIZE];
pt_entry_t *pte;
quad_t dcons_addr, dcons_size, physmem_tunable;
-#ifndef XEN
int hasbrokenint12, i, res;
u_int extmem;
struct vm86frame vmf;
@@ -2094,17 +2062,8 @@
vm_paddr_t pa;
struct bios_smap *smap, *smapbase;
caddr_t kmdp;
-#endif
has_smap = 0;
-#if defined(XEN)
- Maxmem = xen_start_info->nr_pages - init_first;
- physmem = Maxmem;
- basemem = 0;
- physmap[0] = init_first << PAGE_SHIFT;
- physmap[1] = ptoa(Maxmem) - round_page(msgbufsize);
- physmap_idx = 0;
-#else
#ifdef XBOX
if (arch_i386_is_xbox) {
/*
@@ -2247,7 +2206,6 @@
physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
physmap_done:
-#endif
/*
* Now, physmap contains a map of physical memory.
*/
@@ -2321,7 +2279,6 @@
getenv_quad("dcons.size", &dcons_size) == 0)
dcons_addr = 0;
-#ifndef XEN
/*
* physmap is in bytes, so when converting to page boundaries,
* round up the start address and round down the end address.
@@ -2442,13 +2399,6 @@
}
*pte = 0;
invltlb();
-#else
- phys_avail[0] = physfree;
- phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
- dump_avail[0] = 0;
- dump_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
-
-#endif
/*
* XXX
@@ -2472,272 +2422,9 @@
for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
off);
-
- PT_UPDATES_FLUSH();
}
#endif /* PC98 */
-#ifdef XEN
-#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-
-register_t
-init386(first)
- int first;
-{
- unsigned long gdtmachpfn;
- int error, gsel_tss, metadata_missing, x, pa;
- struct pcpu *pc;
-#ifdef CPU_ENABLE_SSE
- struct xstate_hdr *xhdr;
-#endif
- struct callback_register event = {
- .type = CALLBACKTYPE_event,
- .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
- };
- struct callback_register failsafe = {
- .type = CALLBACKTYPE_failsafe,
- .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback },
- };
-
- thread0.td_kstack = proc0kstack;
- thread0.td_kstack_pages = KSTACK_PAGES;
-
- /*
- * This may be done better later if it gets more high level
- * components in it. If so just link td->td_proc here.
- */
- proc_linkup0(&proc0, &thread0);
-
- metadata_missing = 0;
- if (xen_start_info->mod_start) {
- preload_metadata = (caddr_t)xen_start_info->mod_start;
- preload_bootstrap_relocate(KERNBASE);
- } else {
- metadata_missing = 1;
- }
- if (envmode == 1)
- kern_envp = static_env;
- else if ((caddr_t)xen_start_info->cmd_line)
- kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
-
- boothowto |= xen_boothowto(kern_envp);
-
- /* Init basic tunables, hz etc */
- init_param1();
-
- /*
- * XEN occupies a portion of the upper virtual address space
- * At its base it manages an array mapping machine page frames
- * to physical page frames - hence we need to be able to
- * access 4GB - (64MB - 4MB + 64k)
- */
- gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-
- pc = &__pcpu[0];
- gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
- gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
-
- PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW);
- bzero(gdt, PAGE_SIZE);
- for (x = 0; x < NGDT; x++)
- ssdtosd(&gdt_segs[x], &gdt[x].sd);
-
- mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
-
- gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
- PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V);
- PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);
- lgdt(&r_gdt);
- gdtset = 1;
-
- if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
- panic("set_trap_table failed - error %d\n", error);
- }
-
- error = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
- if (error == 0)
- error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
-#if CONFIG_XEN_COMPAT <= 0x030002
- if (error == -ENOXENSYS)
- HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL),
- (unsigned long)Xhypervisor_callback,
- GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
-#endif
- pcpu_init(pc, 0, sizeof(struct pcpu));
- for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
- pmap_kenter(pa + KERNBASE, pa);
- dpcpu_init((void *)(first + KERNBASE), 0);
- first += DPCPU_SIZE;
- physfree += DPCPU_SIZE;
- init_first += DPCPU_SIZE / PAGE_SIZE;
-
- PCPU_SET(prvspace, pc);
- PCPU_SET(curthread, &thread0);
-
- /*
- * Initialize mutexes.
- *
- * icu_lock: in order to allow an interrupt to occur in a critical
- * section, to set pcpu->ipending (etc...) properly, we
- * must be able to get the icu lock, so it can't be
- * under witness.
- */
- mutex_init();
- mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
-
- /* make ldt memory segments */
- PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW);
- bzero(ldt, PAGE_SIZE);
- ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
- ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
- for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
- ssdtosd(&ldt_segs[x], &ldt[x].sd);
-
- default_proc_ldt.ldt_base = (caddr_t)ldt;
- default_proc_ldt.ldt_len = 6;
- _default_ldt = (int)&default_proc_ldt;
- PCPU_SET(currentldt, _default_ldt);
- PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW);
- xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
-
-#if defined(XEN_PRIVILEGED)
- /*
- * Initialize the i8254 before the console so that console
- * initialization can use DELAY().
- */
- i8254_init();
-#endif
-
- /*
- * Initialize the console before we print anything out.
- */
- cninit();
-
- if (metadata_missing)
- printf("WARNING: loader(8) metadata is missing!\n");
-
-#ifdef DEV_ISA
-#ifdef DEV_ATPIC
- elcr_probe();
- atpic_startup();
-#else
- /* Reset and mask the atpics and leave them shut down. */
- atpic_reset();
-
- /*
- * Point the ICU spurious interrupt vectors at the APIC spurious
- * interrupt handler.
- */
- setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
-#endif
-#endif
-
-#ifdef DDB
- db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
-#endif
-
- kdb_init();
-
-#ifdef KDB
- if (boothowto & RB_KDB)
- kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
-#endif
-
- finishidentcpu(); /* Final stage of CPU initialization */
- setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- initializecpu(); /* Initialize CPU registers */
- initializecpucache();
-
- /* pointer to selector slot for %fs/%gs */
- PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
- dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
- dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
- dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
- dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-#if defined(PAE) || defined(PAE_TABLES)
- dblfault_tss.tss_cr3 = (int)IdlePDPT;
-#else
- dblfault_tss.tss_cr3 = (int)IdlePTD;
-#endif
- dblfault_tss.tss_eip = (int)dblfault_handler;
- dblfault_tss.tss_eflags = PSL_KERNEL;
- dblfault_tss.tss_ds = dblfault_tss.tss_es =
- dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
- dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
- dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
- dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
-
- vm86_initialize();
- getmemsize(first);
- init_param2(physmem);
-
- /* now running on new page tables, configured,and u/iom is accessible */
-
- msgbufinit(msgbufp, msgbufsize);
-#ifdef DEV_NPX
- npxinit(true);
-#endif
- /*
- * Set up thread0 pcb after npxinit calculated pcb + fpu save
- * area size. Zero out the extended state header in fpu save
- * area.
- */
- thread0.td_pcb = get_pcb_td(&thread0);
- bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
-#ifdef CPU_ENABLE_SSE
- if (use_xsave) {
- xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
- 1);
- xhdr->xstate_bv = xsave_mask;
- }
-#endif
- PCPU_SET(curpcb, thread0.td_pcb);
- /* make an initial tss so cpu can get interrupt stack on syscall! */
- /* Note: -16 is so we can grow the trapframe if we came from vm86 */
- PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
- HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
- PCPU_GET(common_tss.tss_esp0));
-
- /* transfer to user mode */
-
- _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
- _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
-
- /* setup proc 0's pcb */
- thread0.td_pcb->pcb_flags = 0;
-#if defined(PAE) || defined(PAE_TABLES)
- thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
-#else
- thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
-#endif
- thread0.td_pcb->pcb_ext = 0;
- thread0.td_frame = &proc0_tf;
- thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
- thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
-
- cpu_probe_amdc1e();
-
- /* Location of kernel stack for locore */
- return ((register_t)thread0.td_pcb);
-}
-
-#else
register_t
init386(first)
int first;
@@ -3061,7 +2748,6 @@
/* Location of kernel stack for locore */
return ((register_t)thread0.td_pcb);
}
-#endif
void
cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
Index: sys/i386/i386/minidump_machdep.c
===================================================================
--- sys/i386/i386/minidump_machdep.c
+++ sys/i386/i386/minidump_machdep.c
@@ -68,10 +68,6 @@
static uint64_t counter, progress;
CTASSERT(sizeof(*vm_page_dump) == 4);
-#ifndef XEN
-#define xpmap_mtop(x) (x)
-#define xpmap_ptom(x) (x)
-#endif
static int
@@ -205,7 +201,7 @@
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is an entire 2M page. */
- pa = xpmap_mtop(pd[j] & PG_PS_FRAME);
+ pa = pd[j] & PG_PS_FRAME;
for (k = 0; k < NPTEPG; k++) {
if (is_dumpable(pa))
dump_add_page(pa);
@@ -215,10 +211,10 @@
}
if ((pd[j] & PG_V) == PG_V) {
/* set bit for each valid page in this 2MB block */
- pt = pmap_kenter_temporary(xpmap_mtop(pd[j] & PG_FRAME), 0);
+ pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0);
for (k = 0; k < NPTEPG; k++) {
if ((pt[k] & PG_V) == PG_V) {
- pa = xpmap_mtop(pt[k] & PG_FRAME);
+ pa = pt[k] & PG_FRAME;
if (is_dumpable(pa))
dump_add_page(pa);
}
@@ -318,24 +314,8 @@
continue;
}
if ((pd[j] & PG_V) == PG_V) {
- pa = xpmap_mtop(pd[j] & PG_FRAME);
-#ifndef XEN
+ pa = pd[j] & PG_FRAME;
error = blk_write(di, 0, pa, PAGE_SIZE);
-#else
- pt = pmap_kenter_temporary(pa, 0);
- memcpy(fakept, pt, PAGE_SIZE);
- for (i = 0; i < NPTEPG; i++)
- fakept[i] = xpmap_mtop(fakept[i]);
- error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
- if (error)
- goto fail;
- /* flush, in case we reuse fakept in the same block */
- error = blk_flush(di);
- if (error)
- goto fail;
- bzero(fakept, sizeof(fakept));
-#endif
-
if (error)
goto fail;
} else {
Index: sys/i386/i386/support.s
===================================================================
--- sys/i386/i386/support.s
+++ sys/i386/i386/support.s
@@ -695,11 +695,9 @@
*/
/* void lgdt(struct region_descriptor *rdp); */
ENTRY(lgdt)
-#ifndef XEN
/* reload the descriptor table */
movl 4(%esp),%eax
lgdt (%eax)
-#endif
/* flush the prefetch q */
jmp 1f
Index: sys/i386/i386/swtch.s
===================================================================
--- sys/i386/i386/swtch.s
+++ sys/i386/i386/swtch.s
@@ -204,18 +204,6 @@
SETOP %esi,TD_LOCK(%edi) /* Switchout td_lock */
sw1:
BLOCK_SPIN(%ecx)
-#ifdef XEN
- pushl %eax
- pushl %ecx
- pushl %edx
- call xen_handle_thread_switch
- popl %edx
- popl %ecx
- popl %eax
- /*
- * XXX set IOPL
- */
-#else
/*
* At this point, we've switched address spaces and are ready
* to load up the rest of the next context.
@@ -264,7 +252,7 @@
movl 12(%esi), %ebx
movl %eax, 8(%edi)
movl %ebx, 12(%edi)
-#endif
+
/* Restore context. */
movl PCB_EBX(%edx),%ebx
movl PCB_ESP(%edx),%esp
Index: sys/i386/i386/sys_machdep.c
===================================================================
--- sys/i386/i386/sys_machdep.c
+++ sys/i386/i386/sys_machdep.c
@@ -59,20 +59,6 @@
#include <security/audit/audit.h>
-#ifdef XEN
-#include <machine/xen/xenfunc.h>
-
-void i386_reset_ldt(struct proc_ldt *pldt);
-
-void
-i386_reset_ldt(struct proc_ldt *pldt)
-{
- xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len);
-}
-#else
-#define i386_reset_ldt(x)
-#endif
-
#include <vm/vm_kern.h> /* for kernel_map */
#define MAX_LD 8192
@@ -211,12 +197,7 @@
*/
sd.sd_lobase = base & 0xffffff;
sd.sd_hibase = (base >> 24) & 0xff;
-#ifdef XEN
- /* need to do nosegneg like Linux */
- sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else
sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
-#endif
sd.sd_hilimit = 0xf;
sd.sd_type = SDT_MEMRWA;
sd.sd_dpl = SEL_UPL;
@@ -226,12 +207,7 @@
sd.sd_gran = 1;
critical_enter();
td->td_pcb->pcb_fsd = sd;
-#ifdef XEN
- HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]),
- *(uint64_t *)&sd);
-#else
PCPU_GET(fsgs_gdt)[0] = sd;
-#endif
critical_exit();
td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
}
@@ -252,12 +228,7 @@
sd.sd_lobase = base & 0xffffff;
sd.sd_hibase = (base >> 24) & 0xff;
-#ifdef XEN
- /* need to do nosegneg like Linux */
- sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else
sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
-#endif
sd.sd_hilimit = 0xf;
sd.sd_type = SDT_MEMRWA;
sd.sd_dpl = SEL_UPL;
@@ -267,12 +238,7 @@
sd.sd_gran = 1;
critical_enter();
td->td_pcb->pcb_gsd = sd;
-#ifdef XEN
- HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]),
- *(uint64_t *)&sd);
-#else
PCPU_GET(fsgs_gdt)[1] = sd;
-#endif
critical_exit();
load_gs(GSEL(GUGS_SEL, SEL_UPL));
}
@@ -434,10 +400,6 @@
}
pldt = mdp->md_ldt;
-#ifdef XEN
- i386_reset_ldt(pldt);
- PCPU_SET(currentldt, (int)pldt);
-#else
#ifdef SMP
gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd;
#else
@@ -445,7 +407,6 @@
#endif
lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
-#endif /* XEN */
if (dtlocked)
mtx_unlock_spin(&dt_lock);
}
@@ -464,43 +425,6 @@
}
#endif
-#ifdef XEN
-
-/*
- * dt_lock must be held. Returns with dt_lock held.
- */
-struct proc_ldt *
-user_ldt_alloc(struct mdproc *mdp, int len)
-{
- struct proc_ldt *pldt, *new_ldt;
-
- mtx_assert(&dt_lock, MA_OWNED);
- mtx_unlock_spin(&dt_lock);
- new_ldt = malloc(sizeof(struct proc_ldt),
- M_SUBPROC, M_WAITOK);
-
- new_ldt->ldt_len = len = NEW_MAX_LD(len);
- new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
- round_page(len * sizeof(union descriptor)), M_WAITOK);
- new_ldt->ldt_refcnt = 1;
- new_ldt->ldt_active = 0;
-
- mtx_lock_spin(&dt_lock);
- if ((pldt = mdp->md_ldt)) {
- if (len > pldt->ldt_len)
- len = pldt->ldt_len;
- bcopy(pldt->ldt_base, new_ldt->ldt_base,
- len * sizeof(union descriptor));
- } else {
- bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE);
- }
- mtx_unlock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */
- pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base,
- new_ldt->ldt_len*sizeof(union descriptor));
- mtx_lock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */
- return (new_ldt);
-}
-#else
/*
* dt_lock must be held. Returns with dt_lock held.
*/
@@ -535,7 +459,6 @@
return (new_ldt);
}
-#endif /* !XEN */
/*
* Must be called with dt_lock held. Returns with dt_lock unheld.
@@ -553,13 +476,8 @@
}
if (td == curthread) {
-#ifdef XEN
- i386_reset_ldt(&default_proc_ldt);
- PCPU_SET(currentldt, (int)&default_proc_ldt);
-#else
lldt(_default_ldt);
PCPU_SET(currentldt, _default_ldt);
-#endif
}
mdp->md_ldt = NULL;
@@ -785,27 +703,7 @@
td->td_retval[0] = uap->start;
return (error);
}
-#ifdef XEN
-static int
-i386_set_ldt_data(struct thread *td, int start, int num,
- union descriptor *descs)
-{
- struct mdproc *mdp = &td->td_proc->p_md;
- struct proc_ldt *pldt = mdp->md_ldt;
- mtx_assert(&dt_lock, MA_OWNED);
-
- while (num) {
- xen_update_descriptor(
- &((union descriptor *)(pldt->ldt_base))[start],
- descs);
- num--;
- start++;
- descs++;
- }
- return (0);
-}
-#else
static int
i386_set_ldt_data(struct thread *td, int start, int num,
union descriptor *descs)
@@ -821,7 +719,6 @@
num * sizeof(union descriptor));
return (0);
}
-#endif /* !XEN */
static int
i386_ldt_grow(struct thread *td, int len)
Index: sys/i386/i386/vm_machdep.c
===================================================================
--- sys/i386/i386/vm_machdep.c
+++ sys/i386/i386/vm_machdep.c
@@ -89,9 +89,6 @@
#include <vm/vm_map.h>
#include <vm/vm_param.h>
-#ifdef XEN
-#include <xen/hypervisor.h>
-#endif
#ifdef PC98
#include <pc98/cbus/cbus.h>
#else
@@ -304,10 +301,8 @@
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
- /*
- * XXX XEN need to check on PSL_USER is handled
- */
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+
/*
* Now, cpu_switch() can schedule the new process.
* pcb_esp is loaded pointing to the cpu_switch() stack frame
@@ -698,12 +693,6 @@
#endif
disable_intr();
-#ifdef XEN
- if (smp_processor_id() == 0)
- HYPERVISOR_shutdown(SHUTDOWN_reboot);
- else
- HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-#endif
#ifdef CPU_ELAN
if (elan_mmcr != NULL)
elan_mmcr->RESCFG = 1;
@@ -797,13 +786,8 @@
*/
ptep = vtopte(sf->kva);
opte = *ptep;
-#ifdef XEN
- PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag
- | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0));
-#else
*ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
pmap_cache_bits(sf->m->md.pat_mode, 0);
-#endif
/*
* Avoid unnecessary TLB invalidations: If the sf_buf's old
@@ -854,15 +838,8 @@
int
sf_buf_unmap(struct sf_buf *sf)
{
-#ifdef XEN
- /*
- * Xen doesn't like having dangling R/W mappings
- */
- pmap_qremove(sf->kva, 1);
- return (1);
-#else
+
return (0);
-#endif
}
static void
Index: sys/i386/include/asmacros.h
===================================================================
--- sys/i386/include/asmacros.h
+++ sys/i386/include/asmacros.h
@@ -176,36 +176,12 @@
movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \
movl %eax, %fs
-#ifdef XEN
-#define LOAD_CR3(reg) \
- movl reg,PCPU(CR3); \
- pushl %ecx ; \
- pushl %edx ; \
- pushl %esi ; \
- pushl reg ; \
- call xen_load_cr3 ; \
- addl $4,%esp ; \
- popl %esi ; \
- popl %edx ; \
- popl %ecx ; \
-
-#define READ_CR3(reg) movl PCPU(CR3),reg;
-#define LLDT(arg) \
- pushl %edx ; \
- pushl %eax ; \
- xorl %eax,%eax ; \
- movl %eax,%gs ; \
- call i386_reset_ldt ; \
- popl %eax ; \
- popl %edx
-#define CLI call ni_cli
-#else
+#if 0
#define LOAD_CR3(reg) movl reg,%cr3;
#define READ_CR3(reg) movl %cr3,reg;
#define LLDT(arg) lldt arg;
#define CLI cli
-#endif /* !XEN */
-
+#endif
#endif /* LOCORE */
Index: sys/i386/include/cpufunc.h
===================================================================
--- sys/i386/include/cpufunc.h
+++ sys/i386/include/cpufunc.h
@@ -42,17 +42,6 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
-#ifdef XEN
-extern void xen_cli(void);
-extern void xen_sti(void);
-extern u_int xen_rcr2(void);
-extern void xen_load_cr3(u_int data);
-extern void xen_tlb_flush(void);
-extern void xen_invlpg(u_int addr);
-extern void write_eflags(u_int eflags);
-extern u_int read_eflags(void);
-#endif
-
struct region_descriptor;
#define readb(va) (*(volatile uint8_t *) (va))
@@ -106,11 +95,8 @@
static __inline void
disable_intr(void)
{
-#ifdef XEN
- xen_cli();
-#else
+
__asm __volatile("cli" : : : "memory");
-#endif
}
static __inline void
@@ -132,11 +118,8 @@
static __inline void
enable_intr(void)
{
-#ifdef XEN
- xen_sti();
-#else
+
__asm __volatile("sti");
-#endif
}
static __inline void
@@ -325,11 +308,7 @@
}
static __inline u_int
-#ifdef XEN
-_read_eflags(void)
-#else
read_eflags(void)
-#endif
{
u_int ef;
@@ -389,11 +368,7 @@
}
static __inline void
-#ifdef XEN
-_write_eflags(u_int ef)
-#else
write_eflags(u_int ef)
-#endif
{
__asm __volatile("pushl %0; popfl" : : "r" (ef));
}
@@ -425,9 +400,6 @@
{
u_int data;
-#ifdef XEN
- return (xen_rcr2());
-#endif
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
@@ -435,11 +407,8 @@
static __inline void
load_cr3(u_int data)
{
-#ifdef XEN
- xen_load_cr3(data);
-#else
+
__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
-#endif
}
static __inline u_int
@@ -491,11 +460,8 @@
static __inline void
invltlb(void)
{
-#ifdef XEN
- xen_tlb_flush();
-#else
+
load_cr3(rcr3());
-#endif
}
/*
@@ -506,11 +472,7 @@
invlpg(u_int addr)
{
-#ifdef XEN
- xen_invlpg(addr);
-#else
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
-#endif
}
static __inline u_short
Index: sys/i386/include/intr_machdep.h
===================================================================
--- sys/i386/include/intr_machdep.h
+++ sys/i386/include/intr_machdep.h
@@ -58,13 +58,7 @@
(FIRST_MSI_INT + NUM_MSI_INTS)
#define LAST_EVTCHN_INT \
(FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
-#elif defined(XEN)
-#include <xen/xen-os.h>
-#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS
-#define FIRST_EVTCHN_INT 0
-#define LAST_EVTCHN_INT \
- (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
-#else /* !XEN && !XENHVM */
+#else /* !XENHVM */
#define NUM_EVTCHN_INTS 0
#endif
#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS)
Index: sys/i386/include/pcpu.h
===================================================================
--- sys/i386/include/pcpu.h
+++ sys/i386/include/pcpu.h
@@ -44,34 +44,6 @@
* other processors"
*/
-#if defined(XEN)
-
-/* These are peridically updated in shared_info, and then copied here. */
-struct shadow_time_info {
- uint64_t tsc_timestamp; /* TSC at last update of time vals. */
- uint64_t system_timestamp; /* Time, in nanosecs, since boot. */
- uint32_t tsc_to_nsec_mul;
- uint32_t tsc_to_usec_mul;
- int tsc_shift;
- uint32_t version;
-};
-
-#define PCPU_XEN_FIELDS \
- ; \
- u_int pc_cr3; /* track cr3 for R1/R3*/ \
- vm_paddr_t *pc_pdir_shadow; \
- uint64_t pc_processed_system_time; \
- struct shadow_time_info pc_shadow_time; \
- char __pad[185]
-
-#else /* !XEN */
-
-#define PCPU_XEN_FIELDS \
- ; \
- char __pad[233]
-
-#endif
-
#define PCPU_MD_FIELDS \
char pc_monitorbuf[128] __aligned(128); /* cache line */ \
struct pcpu *pc_prvspace; /* Self-reference */ \
@@ -85,8 +57,8 @@
u_int pc_apic_id; \
int pc_private_tss; /* Flag indicating private tss*/\
u_int pc_cmci_mask; /* MCx banks for CMCI */ \
- u_int pc_vcpu_id /* Xen vCPU ID */ \
- PCPU_XEN_FIELDS
+ u_int pc_vcpu_id; /* Xen vCPU ID */ \
+ char __pad[233]
#ifdef _KERNEL
Index: sys/i386/include/pmap.h
===================================================================
--- sys/i386/include/pmap.h
+++ sys/i386/include/pmap.h
@@ -219,76 +219,6 @@
*/
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
-#if defined(XEN)
-#include <sys/param.h>
-
-#include <xen/xen-os.h>
-
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenpmap.h>
-
-extern pt_entry_t pg_nx;
-
-#define PG_KERNEL (PG_V | PG_A | PG_RW | PG_M)
-
-#define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma)))
-#define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m)))
-
-#define VTOM(va) xpmap_ptom(VTOP(va))
-
-static __inline vm_paddr_t
-pmap_kextract_ma(vm_offset_t va)
-{
- vm_paddr_t ma;
- if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
- ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
- } else {
- ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
- }
- return ma;
-}
-
-static __inline vm_paddr_t
-pmap_kextract(vm_offset_t va)
-{
- return xpmap_mtop(pmap_kextract_ma(va));
-}
-#define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va)))
-
-vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va);
-
-void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
-void pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len);
-void pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len);
-
-static __inline pt_entry_t
-pte_load_store(pt_entry_t *ptep, pt_entry_t v)
-{
- pt_entry_t r;
-
- r = *ptep;
- PT_SET_VA(ptep, v, TRUE);
- return (r);
-}
-
-static __inline pt_entry_t
-pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v)
-{
- pt_entry_t r;
-
- r = *ptep;
- PT_SET_VA_MA(ptep, v, TRUE);
- return (r);
-}
-
-#define pte_load_clear(ptep) pte_load_store((ptep), (pt_entry_t)0ULL)
-
-#define pte_store(ptep, pte) pte_load_store((ptep), (pt_entry_t)pte)
-#define pte_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte)
-#define pde_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte)
-
-#elif !defined(XEN)
-
/*
* KPTmap is a linear mapping of the kernel page table. It differs from the
* recursive mapping in two ways: (1) it only provides access to kernel page
@@ -328,13 +258,8 @@
}
return (pa);
}
-#endif
-
-#if !defined(XEN)
-#define PT_UPDATES_FLUSH()
-#endif
-#if (defined(PAE) || defined(PAE_TABLES)) && !defined(XEN)
+#if (defined(PAE) || defined(PAE_TABLES))
#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte)
@@ -343,7 +268,7 @@
extern pt_entry_t pg_nx;
-#elif !defined(PAE) && !defined(PAE_TABLES) && !defined(XEN)
+#else /* !(PAE || PAE_TABLES) */
#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte)
@@ -352,7 +277,7 @@
*(u_int *)(ptep) = (u_int)(pte); \
} while (0)
-#endif /* PAE */
+#endif /* !(PAE || PAE_TABLES) */
#define pte_clear(ptep) pte_store(ptep, 0)
Index: sys/i386/include/segments.h
===================================================================
--- sys/i386/include/segments.h
+++ sys/i386/include/segments.h
@@ -82,14 +82,8 @@
#ifdef _KERNEL
extern int _default_ldt;
-#ifdef XEN
-extern struct proc_ldt default_proc_ldt;
-extern union descriptor *gdt;
-extern union descriptor *ldt;
-#else
extern union descriptor gdt[];
extern union descriptor ldt[NLDT];
-#endif
extern struct soft_segment_descriptor gdt_segs[];
extern struct gate_descriptor *idt;
extern struct region_descriptor r_gdt, r_idt;
Index: sys/i386/include/smp.h
===================================================================
--- sys/i386/include/smp.h
+++ sys/i386/include/smp.h
@@ -90,9 +90,7 @@
void assign_cpu_ids(void);
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
-#ifndef XEN
void cpususpend_handler(void);
-#endif
void init_secondary_tail(void);
void invltlb_handler(void);
void invlpg_handler(void);
@@ -101,9 +99,7 @@
void init_secondary(void);
void ipi_startup(int apic_id, int vector);
void ipi_all_but_self(u_int ipi);
-#ifndef XEN
void ipi_bitmap_handler(struct trapframe frame);
-#endif
void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
void ipi_selected(cpuset_t cpus, u_int ipi);
@@ -121,9 +117,6 @@
void topo_probe(void);
void ipi_send_cpu(int cpu, u_int ipi);
-#ifdef XEN
-void ipi_to_irq_init(void);
-#endif
#endif /* !LOCORE */
#endif /* SMP */
Index: sys/i386/include/vmparam.h
===================================================================
--- sys/i386/include/vmparam.h
+++ sys/i386/include/vmparam.h
@@ -135,11 +135,7 @@
* Kernel physical load address.
*/
#ifndef KERNLOAD
-#if defined(XEN) && !defined(XEN_PRIVILEGED_GUEST)
-#define KERNLOAD 0
-#else
#define KERNLOAD (1 << PDRSHIFT)
-#endif
#endif /* !defined(KERNLOAD) */
/*
@@ -149,11 +145,7 @@
* messy at times, but hey, we'll do anything to save a page :-)
*/
-#ifdef XEN
-#define VM_MAX_KERNEL_ADDRESS HYPERVISOR_VIRT_START
-#else
#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1)
-#endif
#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI)
Index: sys/i386/isa/npx.c
===================================================================
--- sys/i386/isa/npx.c
+++ sys/i386/isa/npx.c
@@ -69,10 +69,6 @@
#include <machine/ucontext.h>
#include <machine/intr_machdep.h>
-#ifdef XEN
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#endif
#ifdef DEV_ISA
#include <isa/isavar.h>
@@ -157,13 +153,8 @@
#endif /* __GNUCLIKE_ASM && !lint */
-#ifdef XEN
-#define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
-#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
-#else
#define start_emulating() load_cr0(rcr0() | CR0_TS)
#define stop_emulating() clts()
-#endif
#ifdef CPU_ENABLE_SSE
#define GET_FPU_CW(thread) \
Index: sys/i386/pci/pci_cfgreg.c
===================================================================
--- sys/i386/pci/pci_cfgreg.c
+++ sys/i386/pci/pci_cfgreg.c
@@ -93,9 +93,7 @@
int bytes);
static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
-#ifndef XEN
static int pcireg_cfgopen(void);
-#endif
static int pciereg_cfgread(int bus, unsigned slot, unsigned func,
unsigned reg, unsigned bytes);
static void pciereg_cfgwrite(int bus, unsigned slot, unsigned func,
@@ -116,7 +114,6 @@
return (line);
}
-#ifndef XEN
static u_int16_t
pcibios_get_version(void)
{
@@ -137,7 +134,6 @@
}
return (args.ebx & 0xffff);
}
-#endif
/*
* Initialise access to PCI configuration space
@@ -145,9 +141,6 @@
int
pci_cfgregopen(void)
{
-#ifdef XEN
- return (0);
-#else
static int opened = 0;
uint64_t pciebar;
u_int16_t vid, did;
@@ -202,7 +195,6 @@
}
return(1);
-#endif
}
static uint32_t
@@ -390,7 +382,6 @@
mtx_unlock_spin(&pcicfg_mtx);
}
-#ifndef XEN
/* check whether the configuration mechanism has been correctly identified */
static int
pci_cfgcheck(int maxdev)
@@ -607,7 +598,6 @@
return (1);
}
-#endif /* !XEN */
#define PCIE_PADDR(base, reg, bus, slot, func) \
((base) + \
Index: sys/i386/pci/pci_pir.c
===================================================================
--- sys/i386/pci/pci_pir.c
+++ sys/i386/pci/pci_pir.c
@@ -137,9 +137,6 @@
int i;
uint8_t ck, *cv;
-#ifdef XEN
- return;
-#else
/* Don't try if we've already found a table. */
if (pci_route_table != NULL)
return;
@@ -150,7 +147,7 @@
sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0);
if (sigaddr == 0)
return;
-#endif
+
/* If we found something, check the checksum and length. */
/* XXX - Use pmap_mapdev()? */
pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
@@ -481,11 +478,7 @@
args.eax = PCIBIOS_ROUTE_INTERRUPT;
args.ebx = (bus << 8) | (device << 3) | func;
args.ecx = (irq << 8) | (0xa + pin);
-#ifdef XEN
- return (0);
-#else
return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL)));
-#endif
}
Index: sys/i386/xen/clock.c
===================================================================
--- sys/i386/xen/clock.c
+++ /dev/null
@@ -1,570 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz and Don Ahn.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)clock.c 7.2 (Berkeley) 5/12/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/* #define DELAYDEBUG */
-/*
- * Routines to handle clock hardware.
- */
-
-#include "opt_ddb.h"
-#include "opt_clock.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/clock.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/time.h>
-#include <sys/timeet.h>
-#include <sys/timetc.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/sysctl.h>
-#include <sys/cons.h>
-#include <sys/power.h>
-
-#include <machine/clock.h>
-#include <machine/cputypes.h>
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/psl.h>
-#include <machine/pvclock.h>
-#if defined(SMP)
-#include <machine/smp.h>
-#endif
-#include <machine/specialreg.h>
-#include <machine/timerreg.h>
-
-#include <x86/isa/icu.h>
-#include <isa/isareg.h>
-#include <isa/rtc.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/pmap.h>
-#include <xen/hypervisor.h>
-#include <xen/xen-os.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/interface/vcpu.h>
-#include <machine/cpu.h>
-#include <xen/xen_intr.h>
-
-/*
- * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
- * can use a simple formula for leap years.
- */
-#define LEAPYEAR(y) (!((y) % 4))
-#define DAYSPERYEAR (28+30*4+31*7)
-
-#ifndef TIMER_FREQ
-#define TIMER_FREQ 1193182
-#endif
-
-#ifdef CYC2NS_SCALE_FACTOR
-#undef CYC2NS_SCALE_FACTOR
-#endif
-#define CYC2NS_SCALE_FACTOR 10
-
-/* Values for timerX_state: */
-#define RELEASED 0
-#define RELEASE_PENDING 1
-#define ACQUIRED 2
-#define ACQUIRE_PENDING 3
-
-struct mtx clock_lock;
-#define RTC_LOCK_INIT \
- mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_NOPROFILE)
-#define RTC_LOCK mtx_lock_spin(&clock_lock)
-#define RTC_UNLOCK mtx_unlock_spin(&clock_lock)
-#define NS_PER_TICK (1000000000ULL/hz)
-
-int adjkerntz; /* local offset from UTC in seconds */
-int clkintr_pending;
-int pscnt = 1;
-int psdiv = 1;
-int wall_cmos_clock;
-u_int timer_freq = TIMER_FREQ;
-static u_long cyc2ns_scale;
-static uint64_t processed_system_time; /* stime (ns) at last processing. */
-
-#define do_div(n,base) ({ \
- unsigned long __upper, __low, __high, __mod, __base; \
- __base = (base); \
- __asm("":"=a" (__low), "=d" (__high):"A" (n)); \
- __upper = __high; \
- if (__high) { \
- __upper = __high % (__base); \
- __high = __high / (__base); \
- } \
- __asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
- __asm("":"=A" (n):"a" (__low),"d" (__high)); \
- __mod; \
-})
-
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_mhz * 10^6))
- * ns = cycles * (10^3 / cpu_mhz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^3 * SC / cpu_mhz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return ((cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR);
-}
-
-static uint32_t
-getit(void)
-{
- return (pvclock_get_last_cycles());
-}
-
-
-/*
- * XXX: timer needs more SMP work.
- */
-void
-i8254_init(void)
-{
-
- RTC_LOCK_INIT;
-}
-
-/*
- * Wait "n" microseconds.
- * Relies on timer 1 counting down from (timer_freq / hz)
- * Note: timer had better have been programmed before this is first used!
- */
-void
-i8254_delay(int n)
-{
- int delta, ticks_left;
- uint32_t tick, prev_tick;
-#ifdef DELAYDEBUG
- int getit_calls = 1;
- int n1;
- static int state = 0;
-
- if (state == 0) {
- state = 1;
- for (n1 = 1; n1 <= 10000000; n1 *= 10)
- DELAY(n1);
- state = 2;
- }
- if (state == 1)
- printf("DELAY(%d)...", n);
-#endif
- /*
- * Read the counter first, so that the rest of the setup overhead is
- * counted. Guess the initial overhead is 20 usec (on most systems it
- * takes about 1.5 usec for each of the i/o's in getit(). The loop
- * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The
- * multiplications and divisions to scale the count take a while).
- *
- * However, if ddb is active then use a fake counter since reading
- * the i8254 counter involves acquiring a lock. ddb must not go
- * locking for many reasons, but it calls here for at least atkbd
- * input.
- */
- prev_tick = getit();
-
- n -= 0; /* XXX actually guess no initial overhead */
- /*
- * Calculate (n * (timer_freq / 1e6)) without using floating point
- * and without any avoidable overflows.
- */
- if (n <= 0)
- ticks_left = 0;
- else if (n < 256)
- /*
- * Use fixed point to avoid a slow division by 1000000.
- * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
- * 2^15 is the first power of 2 that gives exact results
- * for n between 0 and 256.
- */
- ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
- else
- /*
- * Don't bother using fixed point, although gcc-2.7.2
- * generates particularly poor code for the long long
- * division, since even the slow way will complete long
- * before the delay is up (unless we're interrupted).
- */
- ticks_left = ((u_int)n * (long long)timer_freq + 999999)
- / 1000000;
-
- while (ticks_left > 0) {
- tick = getit();
-#ifdef DELAYDEBUG
- ++getit_calls;
-#endif
- delta = tick - prev_tick;
- prev_tick = tick;
- if (delta < 0) {
- /*
- * Guard against timer0_max_count being wrong.
- * This shouldn't happen in normal operation,
- * but it may happen if set_timer_freq() is
- * traced.
- */
- /* delta += timer0_max_count; ??? */
- if (delta < 0)
- delta = 0;
- }
- ticks_left -= delta;
- }
-#ifdef DELAYDEBUG
- if (state == 1)
- printf(" %d calls to getit() at %d usec each\n",
- getit_calls, (n + 5) / getit_calls);
-#endif
-}
-
-void
-startrtclock()
-{
- uint64_t __cpu_khz;
- uint32_t cpu_khz;
- struct vcpu_time_info *info;
-
- __cpu_khz = 1000000ULL << 32;
- info = &HYPERVISOR_shared_info->vcpu_info[0].time;
-
- (void)do_div(__cpu_khz, info->tsc_to_system_mul);
- if ( info->tsc_shift < 0 )
- cpu_khz = __cpu_khz << -info->tsc_shift;
- else
- cpu_khz = __cpu_khz >> info->tsc_shift;
-
- printf("Xen reported: %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
-
- /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
- (2^32 * 1 / (clocks/us)) */
-
- set_cyc2ns_scale(cpu_khz/1000);
- tsc_freq = cpu_khz * 1000;
-}
-
-/*
- * RTC support routines
- */
-
-
-static __inline int
-readrtc(int port)
-{
- return(bcd2bin(rtcin(port)));
-}
-
-
-#ifdef XEN_PRIVILEGED_GUEST
-
-/*
- * Initialize the time of day register, based on the time base which is, e.g.
- * from a filesystem.
- */
-static void
-domu_inittodr(time_t base)
-{
- unsigned long sec;
- int s, y;
- struct timespec ts;
-
- update_wallclock();
- add_uptime_to_wallclock();
-
- RTC_LOCK;
-
- if (base) {
- ts.tv_sec = base;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- }
-
- sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- y = time_second - shadow_tv.tv_sec;
- if (y <= -2 || y >= 2) {
- /* badly off, adjust it */
- tc_setclock(&shadow_tv);
- }
- RTC_UNLOCK;
-}
-
-/*
- * Write system time back to RTC.
- */
-static void
-domu_resettodr(void)
-{
- unsigned long tm;
- int s;
- dom0_op_t op;
- struct shadow_time_info *shadow;
- struct pcpu *pc;
-
- pc = pcpu_find(smp_processor_id());
- shadow = &pc->pc_shadow_time;
- if (xen_disable_rtc_set)
- return;
-
- s = splclock();
- tm = time_second;
- splx(s);
-
- tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- if ((xen_start_info->flags & SIF_INITDOMAIN) &&
- !independent_wallclock)
- {
- op.cmd = DOM0_SETTIME;
- op.u.settime.secs = tm;
- op.u.settime.nsecs = 0;
- op.u.settime.system_time = shadow->system_timestamp;
- HYPERVISOR_dom0_op(&op);
- update_wallclock();
- add_uptime_to_wallclock();
- } else if (independent_wallclock) {
- /* notyet */
- ;
- }
-}
-
-/*
- * Initialize the time of day register, based on the time base which is, e.g.
- * from a filesystem.
- */
-void
-inittodr(time_t base)
-{
- unsigned long sec, days;
- int year, month;
- int y, m, s;
- struct timespec ts;
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- domu_inittodr(base);
- return;
- }
-
- if (base) {
- s = splclock();
- ts.tv_sec = base;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- splx(s);
- }
-
- /* Look if we have a RTC present and the time is valid */
- if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
- goto wrong_time;
-
- /* wait for time update to complete */
- /* If RTCSA_TUP is zero, we have at least 244us before next update */
- s = splhigh();
- while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
- splx(s);
- s = splhigh();
- }
-
- days = 0;
-#ifdef USE_RTC_CENTURY
- year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
-#else
- year = readrtc(RTC_YEAR) + 1900;
- if (year < 1970)
- year += 100;
-#endif
- if (year < 1970) {
- splx(s);
- goto wrong_time;
- }
- month = readrtc(RTC_MONTH);
- for (m = 1; m < month; m++)
- days += daysinmonth[m-1];
- if ((month > 2) && LEAPYEAR(year))
- days ++;
- days += readrtc(RTC_DAY) - 1;
- for (y = 1970; y < year; y++)
- days += DAYSPERYEAR + LEAPYEAR(y);
- sec = ((( days * 24 +
- readrtc(RTC_HRS)) * 60 +
- readrtc(RTC_MIN)) * 60 +
- readrtc(RTC_SEC));
- /* sec now contains the number of seconds, since Jan 1 1970,
- in the local time zone */
-
- sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- y = time_second - sec;
- if (y <= -2 || y >= 2) {
- /* badly off, adjust it */
- ts.tv_sec = sec;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- }
- splx(s);
- return;
-
- wrong_time:
- printf("Invalid time in real time clock.\n");
- printf("Check and reset the date immediately!\n");
-}
-
-
-/*
- * Write system time back to RTC
- */
-void
-resettodr()
-{
- unsigned long tm;
- int y, m, s;
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- domu_resettodr();
- return;
- }
-
- if (xen_disable_rtc_set)
- return;
-
- s = splclock();
- tm = time_second;
- splx(s);
-
- /* Disable RTC updates and interrupts. */
- writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
-
- /* Calculate local time to put in RTC */
-
- tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */
- writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */
- writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */
-
- /* We have now the days since 01-01-1970 in tm */
- writertc(RTC_WDAY, (tm + 4) % 7 + 1); /* Write back Weekday */
- for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
- tm >= m;
- y++, m = DAYSPERYEAR + LEAPYEAR(y))
- tm -= m;
-
- /* Now we have the years in y and the day-of-the-year in tm */
- writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */
-#ifdef USE_RTC_CENTURY
- writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */
-#endif
- for (m = 0; ; m++) {
- int ml;
-
- ml = daysinmonth[m];
- if (m == 1 && LEAPYEAR(y))
- ml++;
- if (tm < ml)
- break;
- tm -= ml;
- }
-
- writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */
- writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */
-
- /* Reenable RTC updates and interrupts. */
- writertc(RTC_STATUSB, RTCSB_24HR);
- rtcin(RTC_INTR);
-}
-#endif
-
-/*
- * Start clocks running.
- */
-void
-cpu_initclocks(void)
-{
- cpu_initclocks_bsp();
-}
-
-/* Return system time offset by ticks */
-uint64_t
-get_system_time(int ticks)
-{
- return (processed_system_time + (ticks * NS_PER_TICK));
-}
-
-int
-timer_spkr_acquire(void)
-{
-
- return (0);
-}
-
-int
-timer_spkr_release(void)
-{
-
- return (0);
-}
-
-void
-timer_spkr_setfreq(int freq)
-{
-
-}
-
Index: sys/i386/xen/exception.s
===================================================================
--- sys/i386/xen/exception.s
+++ /dev/null
@@ -1,494 +0,0 @@
-/*-
- * Copyright (c) 1989, 1990 William F. Jolitz.
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include "opt_apic.h"
-#include "opt_npx.h"
-
-#include <machine/asmacros.h>
-#include <machine/psl.h>
-#include <machine/trap.h>
-
-#include "assym.s"
-
-#define SEL_RPL_MASK 0x0002
-#define __HYPERVISOR_iret 23
-
-/* Offsets into shared_info_t. */
-
-#define evtchn_upcall_pending /* 0 */
-#define evtchn_upcall_mask 1
-
-#define sizeof_vcpu_shift 6
-
-
-#ifdef SMP
-#define GET_VCPU_INFO(reg) movl PCPU(CPUID),reg ; \
- shl $sizeof_vcpu_shift,reg ; \
- addl HYPERVISOR_shared_info,reg
-#else
-#define GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg
-#endif
-
-#define __DISABLE_INTERRUPTS(reg) movb $1,evtchn_upcall_mask(reg)
-#define __ENABLE_INTERRUPTS(reg) movb $0,evtchn_upcall_mask(reg)
-#define DISABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \
- __DISABLE_INTERRUPTS(reg)
-#define ENABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \
- __ENABLE_INTERRUPTS(reg)
-#define __TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
-
-#define POPA \
- popl %edi; \
- popl %esi; \
- popl %ebp; \
- popl %ebx; \
- popl %ebx; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
- .text
-
-/*****************************************************************************/
-/* Trap handling */
-/*****************************************************************************/
-/*
- * Trap and fault vector routines.
- *
- * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable
- * interrupts. A few of the traps we are use are interrupt gates,
- * SDT_SYS386IGT, which are nearly the same thing except interrupts are
- * disabled on entry.
- *
- * The cpu will push a certain amount of state onto the kernel stack for
- * the current process. The amount of state depends on the type of trap
- * and whether the trap crossed rings or not. See i386/include/frame.h.
- * At the very least the current EFLAGS (status register, which includes
- * the interrupt disable state prior to the trap), the code segment register,
- * and the return instruction pointer are pushed by the cpu. The cpu
- * will also push an 'error' code for certain traps. We push a dummy
- * error code for those traps where the cpu doesn't in order to maintain
- * a consistent frame. We also push a contrived 'trap number'.
- *
- * The cpu does not push the general registers, we must do that, and we
- * must restore them prior to calling 'iret'. The cpu adjusts the %cs and
- * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
- * must load them with appropriate values for supervisor mode operation.
- */
-
-MCOUNT_LABEL(user)
-MCOUNT_LABEL(btrap)
-
-#define TRAP(a) pushl $(a) ; jmp alltraps
-
-IDTVEC(div)
- pushl $0; TRAP(T_DIVIDE)
-IDTVEC(dbg)
- pushl $0; TRAP(T_TRCTRAP)
-IDTVEC(nmi)
- pushl $0; TRAP(T_NMI)
-IDTVEC(bpt)
- pushl $0; TRAP(T_BPTFLT)
-IDTVEC(ofl)
- pushl $0; TRAP(T_OFLOW)
-IDTVEC(bnd)
- pushl $0; TRAP(T_BOUND)
-IDTVEC(ill)
- pushl $0; TRAP(T_PRIVINFLT)
-IDTVEC(dna)
- pushl $0; TRAP(T_DNA)
-IDTVEC(fpusegm)
- pushl $0; TRAP(T_FPOPFLT)
-IDTVEC(tss)
- TRAP(T_TSSFLT)
-IDTVEC(missing)
- TRAP(T_SEGNPFLT)
-IDTVEC(stk)
- TRAP(T_STKFLT)
-IDTVEC(prot)
- TRAP(T_PROTFLT)
-IDTVEC(page)
- TRAP(T_PAGEFLT)
-IDTVEC(mchk)
- pushl $0; TRAP(T_MCHK)
-IDTVEC(rsvd)
- pushl $0; TRAP(T_RESERVED)
-IDTVEC(fpu)
- pushl $0; TRAP(T_ARITHTRAP)
-IDTVEC(align)
- TRAP(T_ALIGNFLT)
-IDTVEC(xmm)
- pushl $0; TRAP(T_XMMFLT)
-
-IDTVEC(hypervisor_callback)
- pushl $0;
- pushl $0;
- pushal
- pushl %ds
- pushl %es
- pushl %fs
-upcall_with_regs_pushed:
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
-call_evtchn_upcall:
- movl TF_EIP(%esp),%eax
- cmpl $scrit,%eax
- jb 10f
- cmpl $ecrit,%eax
- jb critical_region_fixup
-
-10: pushl %esp
- call xen_intr_handle_upcall
- addl $4,%esp
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-
-hypervisor_callback_pending:
- DISABLE_INTERRUPTS(%esi) /* cli */
- jmp 10b
- /*
- * alltraps entry point. Interrupts are enabled if this was a trap
- * gate (TGT), else disabled if this was an interrupt gate (IGT).
- * Note that int0x80_syscall is a trap gate. Only page faults
- * use an interrupt gate.
- */
- SUPERALIGN_TEXT
- .globl alltraps
- .type alltraps,@function
-alltraps:
- pushal
- pushl %ds
- pushl %es
- pushl %fs
-
-alltraps_with_regs_pushed:
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
-
-calltrap:
- push %esp
- call trap
- add $4, %esp
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-/*
- * SYSCALL CALL GATE (old entry point for a.out binaries)
- *
- * The intersegment call has been set up to specify one dummy parameter.
- *
- * This leaves a place to put eflags so that the call frame can be
- * converted to a trap frame. Note that the eflags is (semi-)bogusly
- * pushed into (what will be) tf_err and then copied later into the
- * final spot. It has to be done this way because esp can't be just
- * temporarily altered for the pushfl - an interrupt might come in
- * and clobber the saved cs/eip.
- */
- SUPERALIGN_TEXT
-IDTVEC(lcall_syscall)
- pushfl /* save eflags */
- popl 8(%esp) /* shuffle into tf_eflags */
- pushl $7 /* sizeof "lcall 7,0" */
- subl $4,%esp /* skip over tf_trapno */
- pushal
- pushl %ds
- pushl %es
- pushl %fs
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
- pushl %esp
- call syscall
- add $4, %esp
- MEXITCOUNT
- jmp doreti
-
-/*
- * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
- *
- * Even though the name says 'int0x80', this is actually a TGT (trap gate)
- * rather then an IGT (interrupt gate). Thus interrupts are enabled on
- * entry just as they are for a normal syscall.
- */
- SUPERALIGN_TEXT
-IDTVEC(int0x80_syscall)
- pushl $2 /* sizeof "int 0x80" */
- pushl $0xBEEF /* for debug */
- pushal
- pushl %ds
- pushl %es
- pushl %fs
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
- pushl %esp
- call syscall
- add $4, %esp
- MEXITCOUNT
- jmp doreti
-
-ENTRY(fork_trampoline)
- pushl %esp /* trapframe pointer */
- pushl %ebx /* arg1 */
- pushl %esi /* function */
- call fork_exit
- addl $12,%esp
- /* cut from syscall */
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-
-/*
- * To efficiently implement classification of trap and interrupt handlers
- * for profiling, there must be only trap handlers between the labels btrap
- * and bintr, and only interrupt handlers between the labels bintr and
- * eintr. This is implemented (partly) by including files that contain
- * some of the handlers. Before including the files, set up a normal asm
- * environment so that the included files doen't need to know that they are
- * included.
- */
-
- .data
- .p2align 4
- .text
- SUPERALIGN_TEXT
-MCOUNT_LABEL(bintr)
-
-#ifdef DEV_APIC
- .data
- .p2align 4
- .text
- SUPERALIGN_TEXT
-
-#include <i386/i386/apic_vector.s>
-#endif
-
- .data
- .p2align 4
- .text
- SUPERALIGN_TEXT
-#include <i386/i386/vm86bios.s>
-
- .text
-MCOUNT_LABEL(eintr)
-
-/*
- * void doreti(struct trapframe)
- *
- * Handle return from interrupts, traps and syscalls.
- */
- .text
- SUPERALIGN_TEXT
- .type doreti,@function
-doreti:
- FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */
-doreti_next:
-#ifdef notyet
- /*
- * Check if ASTs can be handled now. PSL_VM must be checked first
- * since segment registers only have an RPL in non-VM86 mode.
- */
- testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */
- jz doreti_notvm86
- movl PCPU(CURPCB),%ecx
- testl $PCB_VM86CALL,PCB_FLAGS(%ecx) /* are we in a vm86 call? */
- jz doreti_ast /* can handle ASTS now if not */
- jmp doreti_exit
-
-doreti_notvm86:
-#endif
- testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
- jz doreti_exit /* can't handle ASTs now if not */
-
-doreti_ast:
- /*
- * Check for ASTs atomically with returning. Disabling CPU
- * interrupts provides sufficient locking even in the SMP case,
- * since we will be informed of any new ASTs by an IPI.
- */
- DISABLE_INTERRUPTS(%esi) /* cli */
- movl PCPU(CURTHREAD),%eax
- testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
- je doreti_exit
- ENABLE_INTERRUPTS(%esi) /* sti */
- pushl %esp /* pass a pointer to the trapframe */
- call ast
- add $4,%esp
- jmp doreti_ast
-
- /*
- * doreti_exit: pop registers, iret.
- *
- * The segment register pop is a special case, since it may
- * fault if (for example) a sigreturn specifies bad segment
- * registers. The fault is handled in trap.c.
- */
-doreti_exit:
- ENABLE_INTERRUPTS(%esi) # reenable event callbacks (sti)
-
- .globl scrit
-scrit:
- __TEST_PENDING(%esi)
- jnz hypervisor_callback_pending /* More to go */
-
- MEXITCOUNT
-
- .globl doreti_popl_fs
-doreti_popl_fs:
- popl %fs
- .globl doreti_popl_es
-doreti_popl_es:
- popl %es
- .globl doreti_popl_ds
-doreti_popl_ds:
- popl %ds
-
- /*
- * This is important: as nothing is atomic over here (we can get
- * interrupted any time), we use the critical_region_fixup() in
- * order to figure out where out stack is. Therefore, do NOT use
- * 'popal' here without fixing up the table!
- */
- POPA
- addl $8,%esp
- .globl doreti_iret
-doreti_iret:
- jmp hypercall_page + (__HYPERVISOR_iret * 32)
- .globl ecrit
-ecrit:
- /*
- * doreti_iret_fault and friends. Alternative return code for
- * the case where we get a fault in the doreti_exit code
- * above. trap() (i386/i386/trap.c) catches this specific
- * case, sends the process a signal and continues in the
- * corresponding place in the code below.
- */
- ALIGN_TEXT
- .globl doreti_iret_fault
-doreti_iret_fault:
- subl $8,%esp
- pushal
- pushl %ds
- .globl doreti_popl_ds_fault
-doreti_popl_ds_fault:
- pushl %es
- .globl doreti_popl_es_fault
-doreti_popl_es_fault:
- pushl %fs
- .globl doreti_popl_fs_fault
-doreti_popl_fs_fault:
- movl $0,TF_ERR(%esp) /* XXX should be the error code */
- movl $T_PROTFLT,TF_TRAPNO(%esp)
- jmp alltraps_with_regs_pushed
-
- /*
-# [How we do the fixup]. We want to merge the current stack frame with the
-# just-interrupted frame. How we do this depends on where in the critical
-# region the interrupted handler was executing, and so how many saved
-# registers are in each frame. We do this quickly using the lookup table
-# 'critical_fixup_table'. For each byte offset in the critical region, it
-# provides the number of bytes which have already been popped from the
-# interrupted stack frame.
-*/
-
-.globl critical_region_fixup
-critical_region_fixup:
- addl $critical_fixup_table-scrit,%eax
- movzbl (%eax),%eax # %eax contains num bytes popped
- movl %esp,%esi
- add %eax,%esi # %esi points at end of src region
- movl %esp,%edi
- add $0x40,%edi # %edi points at end of dst region
- movl %eax,%ecx
- shr $2,%ecx # convert bytes to words
- je 16f # skip loop if nothing to copy
-15: subl $4,%esi # pre-decrementing copy loop
- subl $4,%edi
- movl (%esi),%eax
- movl %eax,(%edi)
- loop 15b
-16: movl %edi,%esp # final %edi is top of merged stack
- jmp hypervisor_callback_pending
-
-
-critical_fixup_table:
-.byte 0x0,0x0,0x0 #testb $0x1,(%esi)
-.byte 0x0,0x0,0x0,0x0,0x0,0x0 #jne ea
-.byte 0x0,0x0 #pop %fs
-.byte 0x04 #pop %es
-.byte 0x08 #pop %ds
-.byte 0x0c #pop %edi
-.byte 0x10 #pop %esi
-.byte 0x14 #pop %ebp
-.byte 0x18 #pop %ebx
-.byte 0x1c #pop %ebx
-.byte 0x20 #pop %edx
-.byte 0x24 #pop %ecx
-.byte 0x28 #pop %eax
-.byte 0x2c,0x2c,0x2c #add $0x8,%esp
-#if 0
- .byte 0x34 #iret
-#endif
-.byte 0x34,0x34,0x34,0x34,0x34 #HYPERVISOR_iret
-
-
-/* # Hypervisor uses this for application faults while it executes.*/
-ENTRY(failsafe_callback)
- pushal
- call xen_failsafe_handler
-/*# call install_safe_pf_handler */
- movl 28(%esp),%ebx
-1: movl %ebx,%ds
- movl 32(%esp),%ebx
-2: movl %ebx,%es
- movl 36(%esp),%ebx
-3: movl %ebx,%fs
- movl 40(%esp),%ebx
-4: movl %ebx,%gs
-/*# call install_normal_pf_handler */
- popal
- addl $12,%esp
- iret
-
-
Index: sys/i386/xen/locore.s
===================================================================
--- sys/i386/xen/locore.s
+++ /dev/null
@@ -1,360 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)locore.s 7.3 (Berkeley) 5/13/91
- * $FreeBSD$
- *
- * originally from: locore.s, by William F. Jolitz
- *
- * Substantially rewritten by David Greenman, Rod Grimes,
- * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
- * and many others.
- */
-
-#include "opt_bootp.h"
-#include "opt_compat.h"
-#include "opt_nfsroot.h"
-#include "opt_pmap.h"
-
-#include <sys/syscall.h>
-#include <sys/reboot.h>
-
-#include <machine/asmacros.h>
-#include <machine/cputypes.h>
-#include <machine/psl.h>
-#include <machine/pmap.h>
-#include <machine/specialreg.h>
-
-#define __ASSEMBLY__
-#include <xen/interface/elfnote.h>
-
-/* The defines below have been lifted out of <machine/xen-public/arch-x86_32.h> */
-#define FLAT_RING1_CS 0xe019 /* GDT index 259 */
-#define FLAT_RING1_DS 0xe021 /* GDT index 260 */
-#define KERNEL_CS FLAT_RING1_CS
-#define KERNEL_DS FLAT_RING1_DS
-
-#include "assym.s"
-
-.section __xen_guest
- .ascii "LOADER=generic,GUEST_OS=freebsd,GUEST_VER=7.0,XEN_VER=xen-3.0,BSD_SYMTAB,VIRT_BASE=0xc0000000"
- .byte 0
-
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "FreeBSD")
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "HEAD")
- ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
- ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, KERNBASE)
- ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, KERNBASE)
- ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, btext)
- ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, XEN_HYPERVISOR_VIRT_START)
-#if 0
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
-#endif
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|supervisor_mode_kernel|writable_descriptor_tables")
-
-#ifdef PAE
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
- ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V)
-#else
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no")
- ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V)
-#endif
- ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
- ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1)
-
-
-
-/*
- * XXX
- *
- * Note: This version greatly munged to avoid various assembler errors
- * that may be fixed in newer versions of gas. Perhaps newer versions
- * will have more pleasant appearance.
- */
-
-/*
- * PTmap is recursive pagemap at top of virtual address space.
- * Within PTmap, the page directory can be found (third indirection).
- */
- .globl PTmap,PTD,PTDpde
- .set PTmap,(PTDPTDI << PDRSHIFT)
- .set PTD,PTmap + (PTDPTDI * PAGE_SIZE)
- .set PTDpde,PTD + (PTDPTDI * PDESIZE)
-
-/*
- * Compiled KERNBASE location and the kernel load address
- */
- .globl kernbase
- .set kernbase,KERNBASE
- .globl kernload
- .set kernload,KERNLOAD
-
-/*
- * Globals
- */
- .data
- ALIGN_DATA /* just to be sure */
-
- .space 0x2000 /* space for tmpstk - temporary stack */
-tmpstk:
-
- .globl bootinfo
-bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
-
- .globl KERNend
-KERNend: .long 0 /* phys addr end of kernel (just after bss) */
- .globl physfree
-physfree: .long 0 /* phys addr of next free page */
-
- .globl IdlePTD
-IdlePTD: .long 0 /* phys addr of kernel PTD */
-
-#ifdef PAE
- .globl IdlePDPT
-IdlePDPT: .long 0 /* phys addr of kernel PDPT */
-#endif
-
-#ifdef SMP
- .globl KPTphys
-#endif
-KPTphys: .long 0 /* phys addr of kernel page tables */
- .globl gdtset
-gdtset: .long 0 /* GDT is valid */
-
- .globl proc0kstack
-proc0kstack: .long 0 /* address of proc 0 kstack space */
-p0kpa: .long 0 /* phys addr of proc0's STACK */
-
-vm86phystk: .long 0 /* PA of vm86/bios stack */
-
- .globl vm86paddr, vm86pa
-vm86paddr: .long 0 /* address of vm86 region */
-vm86pa: .long 0 /* phys addr of vm86 region */
-
-#ifdef PC98
- .globl pc98_system_parameter
-pc98_system_parameter:
- .space 0x240
-#endif
-
- .globl avail_space
-avail_space: .long 0
-
-/**********************************************************************
- *
- * Some handy macros
- *
- */
-
-/*
- * We're already in protected mode, so no remapping is needed.
- */
-#define R(foo) (foo)
-
-#define ALLOCPAGES(foo) \
- movl R(physfree), %esi ; \
- movl $((foo)*PAGE_SIZE), %eax ; \
- addl %esi, %eax ; \
- movl %eax, R(physfree) ; \
- movl %esi, %edi ; \
- movl $((foo)*PAGE_SIZE),%ecx ; \
- xorl %eax,%eax ; \
- cld ; \
- rep ; \
- stosb
-
-/*
- * fillkpt
- * eax = page frame address
- * ebx = index into page table
- * ecx = how many pages to map
- * base = base address of page dir/table
- * prot = protection bits
- */
-#define fillkpt(base, prot) \
- shll $PTESHIFT,%ebx ; \
- addl base,%ebx ; \
- orl $PG_V,%eax ; \
- orl prot,%eax ; \
-1: movl %eax,(%ebx) ; \
- addl $PAGE_SIZE,%eax ; /* increment physical address */ \
- addl $PTESIZE,%ebx ; /* next pte */ \
- loop 1b
-
-/*
- * fillkptphys(prot)
- * eax = physical address
- * ecx = how many pages to map
- * prot = protection bits
- */
-#define fillkptphys(prot) \
- movl %eax, %ebx ; \
- shrl $PAGE_SHIFT, %ebx ; \
- fillkpt(R(KPTphys), prot)
-
-/* Temporary stack */
-.space 8192
-tmpstack:
- .long tmpstack, KERNEL_DS
-
- .text
-
-.p2align 12, 0x90
-
-#define HYPERCALL_PAGE_OFFSET 0x1000
-.org HYPERCALL_PAGE_OFFSET
-ENTRY(hypercall_page)
- .cfi_startproc
- .skip 0x1000
- .cfi_endproc
-
-/**********************************************************************
- *
- * This is where the bootblocks start us, set the ball rolling...
- *
- */
-NON_GPROF_ENTRY(btext)
- /* At the end of our stack, we shall have free space - so store it */
- movl %esp,%ebx
- movl %ebx,R(avail_space)
-
- lss tmpstack,%esp
-
- pushl %esi
- call initvalues
- popl %esi
-
- /* Store the CPUID information */
- xorl %eax,%eax
- cpuid # cpuid 0
- movl %eax,R(cpu_high) # highest capability
- movl %ebx,R(cpu_vendor) # store vendor string
- movl %edx,R(cpu_vendor+4)
- movl %ecx,R(cpu_vendor+8)
- movb $0,R(cpu_vendor+12)
-
- movl $1,%eax
- cpuid # cpuid 1
- movl %eax,R(cpu_id) # store cpu_id
- movl %ebx,R(cpu_procinfo) # store cpu_procinfo
- movl %edx,R(cpu_feature) # store cpu_feature
- movl %ecx,R(cpu_feature2) # store cpu_feature2
- rorl $8,%eax # extract family type
- andl $15,%eax
- cmpl $5,%eax
- movl $CPU_686,R(cpu)
-
- movl proc0kstack,%eax
- leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
- xorl %ebp,%ebp /* mark end of frames */
-#ifdef PAE
- movl IdlePDPT,%esi
-#else
- movl IdlePTD,%esi
-#endif
- movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
- pushl physfree
- call init386
- addl $4, %esp
- call mi_startup
- /* NOTREACHED */
- int $3
-
-/*
- * Signal trampoline, copied to top of user stack
- */
-NON_GPROF_ENTRY(sigcode)
- calll *SIGF_HANDLER(%esp)
- leal SIGF_UC(%esp),%eax /* get ucontext */
- pushl %eax
- testl $PSL_VM,UC_EFLAGS(%eax)
- jne 1f
- mov UC_GS(%eax), %gs /* restore %gs */
-1:
- movl $SYS_sigreturn,%eax
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
- /* on stack */
-1:
- jmp 1b
-
-#ifdef COMPAT_FREEBSD4
- ALIGN_TEXT
-freebsd4_sigcode:
- calll *SIGF_HANDLER(%esp)
- leal SIGF_UC4(%esp),%eax /* get ucontext */
- pushl %eax
- testl $PSL_VM,UC4_EFLAGS(%eax)
- jne 1f
- mov UC4_GS(%eax),%gs /* restore %gs */
-1:
- movl $344,%eax /* 4.x SYS_sigreturn */
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
- /* on stack */
-1:
- jmp 1b
-#endif
-
-#ifdef COMPAT_43
- ALIGN_TEXT
-osigcode:
- call *SIGF_HANDLER(%esp) /* call signal handler */
- lea SIGF_SC(%esp),%eax /* get sigcontext */
- pushl %eax
- testl $PSL_VM,SC_PS(%eax)
- jne 9f
- movl SC_GS(%eax),%gs /* restore %gs */
-9:
- movl $103,%eax /* 3.x SYS_sigreturn */
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
-0: jmp 0b
-#endif /* COMPAT_43 */
-
- ALIGN_TEXT
-esigcode:
-
- .data
- .globl szsigcode
-szsigcode:
- .long esigcode-sigcode
-#ifdef COMPAT_FREEBSD4
- .globl szfreebsd4_sigcode
-szfreebsd4_sigcode:
- .long esigcode-freebsd4_sigcode
-#endif
-#ifdef COMPAT_43
- .globl szosigcode
-szosigcode:
- .long esigcode-osigcode
-#endif
Index: sys/i386/xen/mp_machdep.c
===================================================================
--- sys/i386/xen/mp_machdep.c
+++ /dev/null
@@ -1,1292 +0,0 @@
-/*-
- * Copyright (c) 1996, by Steve Passe
- * Copyright (c) 2008, by Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_apic.h"
-#include "opt_cpu.h"
-#include "opt_kstack_pages.h"
-#include "opt_mp_watchdog.h"
-#include "opt_pmap.h"
-#include "opt_sched.h"
-#include "opt_smp.h"
-
-#if !defined(lint)
-#if !defined(SMP)
-#error How did you get here?
-#endif
-
-#ifndef DEV_APIC
-#error The apic device is required for SMP, add "device apic" to your config file.
-#endif
-#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
-#error SMP not supported with CPU_DISABLE_CMPXCHG
-#endif
-#endif /* not lint */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/cons.h> /* cngetc() */
-#include <sys/cpuset.h>
-#ifdef GPROF
-#include <sys/gmon.h>
-#endif
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/memrange.h>
-#include <sys/mutex.h>
-#include <sys/pcpu.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/sched.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_page.h>
-
-#include <x86/apicreg.h>
-#include <machine/md_var.h>
-#include <machine/mp_watchdog.h>
-#include <machine/pcb.h>
-#include <machine/psl.h>
-#include <machine/smp.h>
-#include <machine/specialreg.h>
-#include <machine/pcpu.h>
-
-#include <xen/xen-os.h>
-#include <xen/evtchn.h>
-#include <xen/xen_intr.h>
-#include <xen/hypervisor.h>
-#include <xen/interface/vcpu.h>
-
-/*---------------------------- Extern Declarations ---------------------------*/
-extern struct pcpu __pcpu[];
-
-extern void Xhypervisor_callback(void);
-extern void failsafe_callback(void);
-
-/*--------------------------- Forward Declarations ---------------------------*/
-static driver_filter_t smp_reschedule_interrupt;
-static driver_filter_t smp_call_function_interrupt;
-static int start_all_aps(void);
-static int start_ap(int apic_id);
-static void release_aps(void *dummy);
-
-/*---------------------------------- Macros ----------------------------------*/
-#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
-
-/*-------------------------------- Local Types -------------------------------*/
-typedef void call_data_func_t(uintptr_t , uintptr_t);
-
-struct xen_ipi_handler
-{
- driver_filter_t *filter;
- const char *description;
-};
-
-enum {
- RESCHEDULE_VECTOR,
- CALL_FUNCTION_VECTOR,
-};
-
-/*-------------------------------- Global Data -------------------------------*/
-static u_int hyperthreading_cpus;
-static cpuset_t hyperthreading_cpus_mask;
-
-int mp_naps; /* # of Applications processors */
-int boot_cpu_id = -1; /* designated BSP */
-
-int bootAP;
-static union descriptor *bootAPgdt;
-
-/* Free these after use */
-void *bootstacks[MAXCPU];
-
-struct pcb stoppcbs[MAXCPU];
-
-/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1;
-vm_offset_t smp_tlb_addr2;
-volatile int smp_tlb_wait;
-
-static u_int logical_cpus;
-static volatile cpuset_t ipi_nmi_pending;
-
-/* used to hold the AP's until we are ready to release them */
-struct mtx ap_boot_mtx;
-
-/* Set to 1 once we're ready to let the APs out of the pen. */
-volatile int aps_ready = 0;
-
-/*
- * Store data from cpu_add() until later in the boot when we actually setup
- * the APs.
- */
-struct cpu_info cpu_info[MAX_APIC_ID + 1];
-int cpu_apic_ids[MAXCPU];
-int apic_cpuids[MAX_APIC_ID + 1];
-
-/* Holds pending bitmap based IPIs per CPU */
-volatile u_int cpu_ipi_pending[MAXCPU];
-
-int cpu_logical;
-int cpu_cores;
-
-static const struct xen_ipi_handler xen_ipis[] =
-{
- [RESCHEDULE_VECTOR] = { smp_reschedule_interrupt, "resched" },
- [CALL_FUNCTION_VECTOR] = { smp_call_function_interrupt,"callfunc" }
-};
-
-/*------------------------------- Per-CPU Data -------------------------------*/
-DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
-DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
-
-/*------------------------------ Implementation ------------------------------*/
-struct cpu_group *
-cpu_topo(void)
-{
- if (cpu_cores == 0)
- cpu_cores = 1;
- if (cpu_logical == 0)
- cpu_logical = 1;
- if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
- printf("WARNING: Non-uniform processors.\n");
- printf("WARNING: Using suboptimal topology.\n");
- return (smp_topo_none());
- }
- /*
- * No multi-core or hyper-threaded.
- */
- if (cpu_logical * cpu_cores == 1)
- return (smp_topo_none());
- /*
- * Only HTT no multi-core.
- */
- if (cpu_logical > 1 && cpu_cores == 1)
- return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
- /*
- * Only multi-core no HTT.
- */
- if (cpu_cores > 1 && cpu_logical == 1)
- return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
- /*
- * Both HTT and multi-core.
- */
- return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
- CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
-}
-
-/*
- * Calculate usable address in base memory for AP trampoline code.
- */
-u_int
-mp_bootaddress(u_int basemem)
-{
-
- return (basemem);
-}
-
-void
-cpu_add(u_int apic_id, char boot_cpu)
-{
-
- if (apic_id > MAX_APIC_ID) {
- panic("SMP: APIC ID %d too high", apic_id);
- return;
- }
- KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
- apic_id));
- cpu_info[apic_id].cpu_present = 1;
- if (boot_cpu) {
- KASSERT(boot_cpu_id == -1,
- ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
- boot_cpu_id));
- boot_cpu_id = apic_id;
- cpu_info[apic_id].cpu_bsp = 1;
- }
- if (mp_ncpus < MAXCPU)
- mp_ncpus++;
- if (bootverbose)
- printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
- "AP");
-}
-
-void
-cpu_mp_setmaxid(void)
-{
-
- mp_maxid = MAXCPU - 1;
-}
-
-int
-cpu_mp_probe(void)
-{
-
- /*
- * Always record BSP in CPU map so that the mbuf init code works
- * correctly.
- */
- CPU_SETOF(0, &all_cpus);
- if (mp_ncpus == 0) {
- /*
- * No CPUs were found, so this must be a UP system. Setup
- * the variables to represent a system with a single CPU
- * with an id of 0.
- */
- mp_ncpus = 1;
- return (0);
- }
-
- /* At least one CPU was found. */
- if (mp_ncpus == 1) {
- /*
- * One CPU was found, so this must be a UP system with
- * an I/O APIC.
- */
- return (0);
- }
-
- /* At least two CPUs were found. */
- return (1);
-}
-
-/*
- * Initialize the IPI handlers and start up the AP's.
- */
-void
-cpu_mp_start(void)
-{
- int i;
-
- /* Initialize the logical ID to APIC ID table. */
- for (i = 0; i < MAXCPU; i++) {
- cpu_apic_ids[i] = -1;
- cpu_ipi_pending[i] = 0;
- }
-
- /* Set boot_cpu_id if needed. */
- if (boot_cpu_id == -1) {
- boot_cpu_id = PCPU_GET(apic_id);
- cpu_info[boot_cpu_id].cpu_bsp = 1;
- } else
- KASSERT(boot_cpu_id == PCPU_GET(apic_id),
- ("BSP's APIC ID doesn't match boot_cpu_id"));
- cpu_apic_ids[0] = boot_cpu_id;
- apic_cpuids[boot_cpu_id] = 0;
-
- assign_cpu_ids();
-
- /* Start each Application Processor */
- start_all_aps();
-
- /* Setup the initial logical CPUs info. */
- logical_cpus = 0;
- CPU_ZERO(&logical_cpus_mask);
- if (cpu_feature & CPUID_HTT)
- logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
-
- set_interrupt_apic_ids();
-}
-
-
-static void
-iv_rendezvous(uintptr_t a, uintptr_t b)
-{
- smp_rendezvous_action();
-}
-
-static void
-iv_invltlb(uintptr_t a, uintptr_t b)
-{
- xen_tlb_flush();
-}
-
-static void
-iv_invlpg(uintptr_t a, uintptr_t b)
-{
- xen_invlpg(a);
-}
-
-static void
-iv_invlrng(uintptr_t a, uintptr_t b)
-{
- vm_offset_t start = (vm_offset_t)a;
- vm_offset_t end = (vm_offset_t)b;
-
- while (start < end) {
- xen_invlpg(start);
- start += PAGE_SIZE;
- }
-}
-
-
-static void
-iv_invlcache(uintptr_t a, uintptr_t b)
-{
-
- wbinvd();
- atomic_add_int(&smp_tlb_wait, 1);
-}
-
-/*
- * These start from "IPI offset" APIC_IPI_INTS
- */
-static call_data_func_t *ipi_vectors[5] =
-{
- iv_rendezvous,
- iv_invltlb,
- iv_invlpg,
- iv_invlrng,
- iv_invlcache,
-};
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-static int
-smp_reschedule_interrupt(void *unused)
-{
- int cpu = PCPU_GET(cpuid);
- u_int ipi_bitmap;
-
- ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
- if (ipi_bitmap & (1 << IPI_PREEMPT)) {
-#ifdef COUNT_IPIS
- (*ipi_preempt_counts[cpu])++;
-#endif
- sched_preempt(curthread);
- }
-
- if (ipi_bitmap & (1 << IPI_AST)) {
-#ifdef COUNT_IPIS
- (*ipi_ast_counts[cpu])++;
-#endif
- /* Nothing to do for AST */
- }
- return (FILTER_HANDLED);
-}
-
-struct _call_data {
- uint16_t func_id;
- uint16_t wait;
- uintptr_t arg1;
- uintptr_t arg2;
- atomic_t started;
- atomic_t finished;
-};
-
-static struct _call_data *call_data;
-
-static int
-smp_call_function_interrupt(void *unused)
-{
- call_data_func_t *func;
- uintptr_t arg1 = call_data->arg1;
- uintptr_t arg2 = call_data->arg2;
- int wait = call_data->wait;
- atomic_t *started = &call_data->started;
- atomic_t *finished = &call_data->finished;
-
- /* We only handle function IPIs, not bitmap IPIs */
- if (call_data->func_id < APIC_IPI_INTS ||
- call_data->func_id > IPI_BITMAP_VECTOR)
- panic("invalid function id %u", call_data->func_id);
-
- func = ipi_vectors[IPI_TO_IDX(call_data->func_id)];
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- (*func)(arg1, arg2);
-
- if (wait) {
- mb();
- atomic_inc(finished);
- }
- atomic_add_int(&smp_tlb_wait, 1);
- return (FILTER_HANDLED);
-}
-
-/*
- * Print various information about the SMP system hardware and setup.
- */
-void
-cpu_mp_announce(void)
-{
- int i, x;
-
- /* List CPUs */
- printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
- for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
- if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
- continue;
- if (cpu_info[x].cpu_disabled)
- printf(" cpu (AP): APIC ID: %2d (disabled)\n", x);
- else {
- KASSERT(i < mp_ncpus,
- ("mp_ncpus and actual cpus are out of whack"));
- printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
- }
- }
-}
-
-static int
-xen_smp_cpu_init(unsigned int cpu)
-{
- xen_intr_handle_t *ipi_handle;
- const struct xen_ipi_handler *ipi;
- int idx, rc;
-
- ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
- for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
-
- /*
- * The PCPU variable pc_device is not initialized on i386 PV,
- * so we have to use the root_bus device in order to setup
- * the IPIs.
- */
- rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu,
- ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]);
- if (rc != 0) {
- printf("Unable to allocate a XEN IPI port. "
- "Error %d\n", rc);
- break;
- }
- xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
- }
-
- for (;idx < nitems(xen_ipis); idx++)
- ipi_handle[idx] = NULL;
-
- if (rc == 0)
- return (0);
-
- /* Either all are successfully mapped, or none at all. */
- for (idx = 0; idx < nitems(xen_ipis); idx++) {
- if (ipi_handle[idx] == NULL)
- continue;
-
- xen_intr_unbind(ipi_handle[idx]);
- ipi_handle[idx] = NULL;
- }
-
- return (rc);
-}
-
-static void
-xen_smp_intr_init_cpus(void *unused)
-{
- int i;
-
- for (i = 0; i < mp_ncpus; i++)
- xen_smp_cpu_init(i);
-}
-
-static void
-xen_smp_intr_setup_cpus(void *unused)
-{
- int i;
-
- for (i = 0; i < mp_ncpus; i++)
- DPCPU_ID_SET(i, vcpu_info,
- &HYPERVISOR_shared_info->vcpu_info[i]);
-}
-
-#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-
-/*
- * AP CPU's call this to initialize themselves.
- */
-void
-init_secondary(void)
-{
- vm_offset_t addr;
- u_int cpuid;
- int gsel_tss;
-
-
- /* bootAP is set in start_ap() to our ID. */
- PCPU_SET(currentldt, _default_ldt);
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-#if 0
- gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
-#endif
- PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
-#if 0
- PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd);
-
- PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
-#endif
- PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
- /*
- * Set to a known state:
- * Set by mpboot.s: CR0_PG, CR0_PE
- * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
- */
- /*
- * signal our startup to the BSP.
- */
- mp_naps++;
-
- /* Spin until the BSP releases the AP's. */
- while (!aps_ready)
- ia32_pause();
-
- /* BSP may have changed PTD while we were waiting */
- invltlb();
- for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
- invlpg(addr);
-
-#if 0
- /* set up SSE/NX */
- initializecpu();
-#endif
-
- /* set up FPU state on the AP */
- npxinit(false);
-#if 0
- /* A quick check from sanity claus */
- if (PCPU_GET(apic_id) != lapic_id()) {
- printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
- printf("SMP: actual apic_id = %d\n", lapic_id());
- printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
- panic("cpuid mismatch! boom!!");
- }
-#endif
-
- /* Initialize curthread. */
- KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
- PCPU_SET(curthread, PCPU_GET(idlethread));
-
- mtx_lock_spin(&ap_boot_mtx);
-#if 0
-
- /* Init local apic for irq's */
- lapic_setup(1);
-#endif
- smp_cpus++;
-
- cpuid = PCPU_GET(cpuid);
- CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
- printf("SMP: AP CPU #%d Launched!\n", cpuid);
-
- /* Determine if we are a logical CPU. */
- if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
- CPU_SET(cpuid, &logical_cpus_mask);
-
- /* Determine if we are a hyperthread. */
- if (hyperthreading_cpus > 1 &&
- PCPU_GET(apic_id) % hyperthreading_cpus != 0)
- CPU_SET(cpuid, &hyperthreading_cpus_mask);
-#if 0
- if (bootverbose)
- lapic_dump("AP");
-#endif
- if (smp_cpus == mp_ncpus) {
- /* enable IPI's, tlb shootdown, freezes etc */
- atomic_store_rel_int(&smp_started, 1);
- }
-
- mtx_unlock_spin(&ap_boot_mtx);
-
- /* wait until all the AP's are up */
- while (smp_started == 0)
- ia32_pause();
-
- PCPU_SET(curthread, PCPU_GET(idlethread));
-
- /* Start per-CPU event timers. */
- cpu_initclocks_ap();
-
- /* enter the scheduler */
- sched_throw(NULL);
-
- panic("scheduler returned us to %s", __func__);
- /* NOTREACHED */
-}
-
-/*******************************************************************
- * local functions and data
- */
-
-/*
- * We tell the I/O APIC code about all the CPUs we want to receive
- * interrupts. If we don't want certain CPUs to receive IRQs we
- * can simply not tell the I/O APIC code about them in this function.
- * We also do not tell it about the BSP since it tells itself about
- * the BSP internally to work with UP kernels and on UP machines.
- */
-void
-set_interrupt_apic_ids(void)
-{
- u_int i, apic_id;
-
- for (i = 0; i < MAXCPU; i++) {
- apic_id = cpu_apic_ids[i];
- if (apic_id == -1)
- continue;
- if (cpu_info[apic_id].cpu_bsp)
- continue;
- if (cpu_info[apic_id].cpu_disabled)
- continue;
-
- /* Don't let hyperthreads service interrupts. */
- if (hyperthreading_cpus > 1 &&
- apic_id % hyperthreading_cpus != 0)
- continue;
-
- intr_add_cpu(i);
- }
-}
-
-/*
- * Assign logical CPU IDs to local APICs.
- */
-void
-assign_cpu_ids(void)
-{
- u_int i;
-
- /* Check for explicitly disabled CPUs. */
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
- continue;
-
- /* Don't use this CPU if it has been disabled by a tunable. */
- if (resource_disabled("lapic", i)) {
- cpu_info[i].cpu_disabled = 1;
- continue;
- }
- }
-
- /*
- * Assign CPU IDs to local APIC IDs and disable any CPUs
- * beyond MAXCPU. CPU 0 has already been assigned to the BSP,
- * so we only have to assign IDs for APs.
- */
- mp_ncpus = 1;
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
- cpu_info[i].cpu_disabled)
- continue;
-
- if (mp_ncpus < MAXCPU) {
- cpu_apic_ids[mp_ncpus] = i;
- apic_cpuids[i] = mp_ncpus;
- mp_ncpus++;
- } else
- cpu_info[i].cpu_disabled = 1;
- }
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
- mp_ncpus));
-}
-
-/*
- * start each AP in our list
- */
-/* Lowest 1MB is already mapped: don't touch*/
-#define TMPMAP_START 1
-int
-start_all_aps(void)
-{
- int x,apic_id, cpu;
- struct pcpu *pc;
-
- mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
-
- /* set up temporary P==V mapping for AP boot */
- /* XXX this is a hack, we should boot the AP on its own stack/PTD */
-
- /* start each AP */
- for (cpu = 1; cpu < mp_ncpus; cpu++) {
- apic_id = cpu_apic_ids[cpu];
-
-
- bootAP = cpu;
- bootAPgdt = gdt + (512*cpu);
-
- /* Get per-cpu data */
- pc = &__pcpu[bootAP];
- pcpu_init(pc, bootAP, sizeof(struct pcpu));
- dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
- M_WAITOK | M_ZERO), bootAP);
- pc->pc_apic_id = cpu_apic_ids[bootAP];
- pc->pc_vcpu_id = cpu_apic_ids[bootAP];
- pc->pc_prvspace = pc;
- pc->pc_curthread = 0;
-
- gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
- gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
-
- PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW);
- bzero(bootAPgdt, PAGE_SIZE);
- for (x = 0; x < NGDT; x++)
- ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd);
- PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V);
-#ifdef notyet
-
- if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
- apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
- acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
-#ifdef CONFIG_ACPI
- if (acpiid != 0xff)
- x86_acpiid_to_apicid[acpiid] = apicid;
-#endif
- }
-#endif
-
- /* attempt to start the Application Processor */
- if (!start_ap(cpu)) {
- printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
- /* better panic as the AP may be running loose */
- printf("panic y/n? [y] ");
- if (cngetc() != 'n')
- panic("bye-bye");
- }
-
- CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
- }
-
-
- pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
-
- /* number of APs actually started */
- return (mp_naps);
-}
-
-extern uint8_t *pcpu_boot_stack;
-extern trap_info_t trap_table[];
-
-static void
-smp_trap_init(trap_info_t *trap_ctxt)
-{
- const trap_info_t *t = trap_table;
-
- for (t = trap_table; t->address; t++) {
- trap_ctxt[t->vector].flags = t->flags;
- trap_ctxt[t->vector].cs = t->cs;
- trap_ctxt[t->vector].address = t->address;
- }
-}
-
-extern struct rwlock pvh_global_lock;
-extern int nkpt;
-static void
-cpu_initialize_context(unsigned int cpu)
-{
- /* vcpu_guest_context_t is too large to allocate on the stack.
- * Hence we allocate statically and protect it with a lock */
- vm_page_t m[NPGPTD + 2];
- static vcpu_guest_context_t ctxt;
- vm_offset_t boot_stack;
- vm_offset_t newPTD;
- vm_paddr_t ma[NPGPTD];
- int i;
-
- /*
- * Page 0,[0-3] PTD
- * Page 1, [4] boot stack
- * Page [5] PDPT
- *
- */
- for (i = 0; i < NPGPTD + 2; i++) {
- m[i] = vm_page_alloc(NULL, 0,
- VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
-
- pmap_zero_page(m[i]);
-
- }
- boot_stack = kva_alloc(PAGE_SIZE);
- newPTD = kva_alloc(NPGPTD * PAGE_SIZE);
- ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V;
-
-#ifdef PAE
- pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
- for (i = 0; i < NPGPTD; i++) {
- ((vm_paddr_t *)boot_stack)[i] =
- ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V;
- }
-#endif
-
- /*
- * Copy cpu0 IdlePTD to new IdlePTD - copying only
- * kernel mappings
- */
- pmap_qenter(newPTD, m, 4);
-
- memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
- (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
- nkpt*sizeof(vm_paddr_t));
-
- pmap_qremove(newPTD, 4);
- kva_free(newPTD, 4 * PAGE_SIZE);
- /*
- * map actual idle stack to boot_stack
- */
- pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));
-
-
- xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1]));
- rw_wlock(&pvh_global_lock);
- for (i = 0; i < 4; i++) {
- int pdir = (PTDPTDI + i) / NPDEPG;
- int curoffset = (PTDPTDI + i) % NPDEPG;
-
- xen_queue_pt_update((vm_paddr_t)
- ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))),
- ma[i]);
- }
- PT_UPDATES_FLUSH();
- rw_wunlock(&pvh_global_lock);
-
- memset(&ctxt, 0, sizeof(ctxt));
- ctxt.flags = VGCF_IN_KERNEL;
- ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
- ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
- ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.eip = (unsigned long)init_secondary;
- ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */
-
- memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
- smp_trap_init(ctxt.trap_ctxt);
-
- ctxt.ldt_ents = 0;
- ctxt.gdt_frames[0] =
- (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
- ctxt.gdt_ents = 512;
-
-#ifdef __i386__
- ctxt.user_regs.esp = boot_stack + PAGE_SIZE;
-
- ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.kernel_sp = boot_stack + PAGE_SIZE;
-
- ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
- ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback;
- ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
- ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]);
-#else /* __x86_64__ */
- ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
- ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.kernel_sp = idle->thread.rsp0;
-
- ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
- ctxt.syscall_callback_eip = (unsigned long)system_call;
-
- ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
-
- ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
-#endif
-
- printf("gdtpfn=%lx pdptpfn=%lx\n",
- ctxt.gdt_frames[0],
- ctxt.ctrlreg[3] >> PAGE_SHIFT);
-
- PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
- DELAY(3000);
- PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL));
-}
-
-/*
- * This function starts the AP (application processor) identified
- * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
- * to accomplish this. This is necessary because of the nuances
- * of the different hardware we might encounter. It isn't pretty,
- * but it seems to work.
- */
-
-int cpus;
-static int
-start_ap(int apic_id)
-{
- int ms;
-
- /* used as a watchpoint to signal AP startup */
- cpus = mp_naps;
-
- cpu_initialize_context(apic_id);
-
- /* Wait up to 5 seconds for it to start. */
- for (ms = 0; ms < 5000; ms++) {
- if (mp_naps > cpus)
- return (1); /* return SUCCESS */
- DELAY(1000);
- }
- return (0); /* return FAILURE */
-}
-
-static void
-ipi_pcpu(int cpu, u_int ipi)
-{
- KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI"));
- xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi]));
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_send_cpu(int cpu, u_int ipi)
-{
- u_int bitmap, old_pending, new_pending;
-
- if (IPI_IS_BITMAPED(ipi)) {
- bitmap = 1 << ipi;
- ipi = IPI_BITMAP_VECTOR;
- do {
- old_pending = cpu_ipi_pending[cpu];
- new_pending = old_pending | bitmap;
- } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
- old_pending, new_pending));
- if (!old_pending)
- ipi_pcpu(cpu, RESCHEDULE_VECTOR);
- } else {
- KASSERT(call_data != NULL, ("call_data not set"));
- ipi_pcpu(cpu, CALL_FUNCTION_VECTOR);
- }
-}
-
-/*
- * Flush the TLB on all other CPU's
- */
-static void
-smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
-{
- u_int ncpu;
- struct _call_data data;
-
- ncpu = mp_ncpus - 1; /* does not shootdown self */
- if (ncpu < 1)
- return; /* no other cpus */
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- KASSERT(call_data == NULL, ("call_data isn't null?!"));
- call_data = &data;
- call_data->func_id = vector;
- call_data->arg1 = addr1;
- call_data->arg2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- ipi_all_but_self(vector);
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- call_data = NULL;
- mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-static void
-smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1,
- vm_offset_t addr2)
-{
- int cpu, ncpu, othercpus;
- struct _call_data data;
-
- othercpus = mp_ncpus - 1;
- if (CPU_ISFULLSET(&mask)) {
- if (othercpus < 1)
- return;
- } else {
- CPU_CLR(PCPU_GET(cpuid), &mask);
- if (CPU_EMPTY(&mask))
- return;
- }
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- KASSERT(call_data == NULL, ("call_data isn't null?!"));
- call_data = &data;
- call_data->func_id = vector;
- call_data->arg1 = addr1;
- call_data->arg2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- if (CPU_ISFULLSET(&mask)) {
- ncpu = othercpus;
- ipi_all_but_self(vector);
- } else {
- ncpu = 0;
- while ((cpu = CPU_FFS(&mask)) != 0) {
- cpu--;
- CPU_CLR(cpu, &mask);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
- vector);
- ipi_send_cpu(cpu, vector);
- ncpu++;
- }
- }
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- call_data = NULL;
- mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-void
-smp_cache_flush(void)
-{
-
- if (smp_started)
- smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
-}
-
-void
-smp_invltlb(void)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
- }
-}
-
-void
-smp_invlpg(vm_offset_t addr)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLPG, addr, 0);
- }
-}
-
-void
-smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
- }
-}
-
-void
-smp_masked_invltlb(cpuset_t mask)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
- }
-}
-
-void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
- }
-}
-
-void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
- }
-}
-
-/*
- * send an IPI to a set of cpus.
- */
-void
-ipi_selected(cpuset_t cpus, u_int ipi)
-{
- int cpu;
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
-
- while ((cpu = CPU_FFS(&cpus)) != 0) {
- cpu--;
- CPU_CLR(cpu, &cpus);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
- }
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_cpu(int cpu, u_int ipi)
-{
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
-
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
-}
-
-/*
- * send an IPI to all CPUs EXCEPT myself
- */
-void
-ipi_all_but_self(u_int ipi)
-{
- cpuset_t other_cpus;
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- other_cpus = all_cpus;
- CPU_CLR(PCPU_GET(cpuid), &other_cpus);
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
-
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- ipi_selected(other_cpus, ipi);
-}
-
-int
-ipi_nmi_handler()
-{
- u_int cpuid;
-
- /*
- * As long as there is not a simple way to know about a NMI's
- * source, if the bitmask for the current CPU is present in
- * the global pending bitword an IPI_STOP_HARD has been issued
- * and should be handled.
- */
- cpuid = PCPU_GET(cpuid);
- if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
- return (1);
-
- CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
- cpustop_handler();
- return (0);
-}
-
-/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpustop_handler(void)
-{
- int cpu;
-
- cpu = PCPU_GET(cpuid);
-
- savectx(&stoppcbs[cpu]);
-
- /* Indicate that we are stopped */
- CPU_SET_ATOMIC(cpu, &stopped_cpus);
-
- /* Wait for restart */
- while (!CPU_ISSET(cpu, &started_cpus))
- ia32_pause();
-
- CPU_CLR_ATOMIC(cpu, &started_cpus);
- CPU_CLR_ATOMIC(cpu, &stopped_cpus);
-
- if (cpu == 0 && cpustop_restartfunc != NULL) {
- cpustop_restartfunc();
- cpustop_restartfunc = NULL;
- }
-}
-
-/*
- * Handlers for TLB related IPIs
- *
- * On i386 Xen PV this are no-ops since this port doesn't support SMP.
- */
-void
-invltlb_handler(void)
-{
-}
-
-void
-invlpg_handler(void)
-{
-}
-
-void
-invlrng_handler(void)
-{
-}
-
-void
-invlcache_handler(void)
-{
-}
-
-/*
- * This is called once the rest of the system is up and running and we're
- * ready to let the AP's out of the pen.
- */
-static void
-release_aps(void *dummy __unused)
-{
-
- if (mp_ncpus == 1)
- return;
- atomic_store_rel_int(&aps_ready, 1);
- while (smp_started == 0)
- ia32_pause();
-}
-SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-SYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL);
-SYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL);
Index: sys/i386/xen/mptable.c
===================================================================
--- sys/i386/xen/mptable.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * Copyright (c) 1996, by Steve Passe
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/bus.h>
-#include <sys/kernel.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <x86/apicvar.h>
-
-#include <xen/hypervisor.h>
-#include <xen/xen-os.h>
-#include <machine/smp.h>
-#include <xen/interface/vcpu.h>
-
-
-static int mptable_probe(void);
-static int mptable_probe_cpus(void);
-static void mptable_register(void *dummy);
-static int mptable_setup_local(void);
-static int mptable_setup_io(void);
-
-static struct apic_enumerator mptable_enumerator = {
- "MPTable",
- mptable_probe,
- mptable_probe_cpus,
- mptable_setup_local,
- mptable_setup_io
-};
-
-static int
-mptable_probe(void)
-{
-
- return (-100);
-}
-
-static int
-mptable_probe_cpus(void)
-{
- int i, rc;
-
- for (i = 0; i < MAXCPU; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0)
- cpu_add(i, (i == 0));
- }
-
- return (0);
-}
-
-/*
- * Initialize the local APIC on the BSP.
- */
-static int
-mptable_setup_local(void)
-{
-
- PCPU_SET(apic_id, 0);
- PCPU_SET(vcpu_id, 0);
- return (0);
-}
-
-static int
-mptable_setup_io(void)
-{
-
- return (0);
-}
-
-static void
-mptable_register(void *dummy __unused)
-{
-
- apic_register_enumerator(&mptable_enumerator);
-}
-SYSINIT(mptable_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, mptable_register,
- NULL);
Index: sys/i386/xen/pmap.c
===================================================================
--- sys/i386/xen/pmap.c
+++ /dev/null
@@ -1,4420 +0,0 @@
-/*-
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- * Copyright (c) 1994 John S. Dyson
- * All rights reserved.
- * Copyright (c) 1994 David Greenman
- * All rights reserved.
- * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu>
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department and William Jolitz of UUNET Technologies Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
- */
-/*-
- * Copyright (c) 2003 Networks Associates Technology, Inc.
- * All rights reserved.
- *
- * This software was developed for the FreeBSD Project by Jake Burkholder,
- * Safeport Network Services, and Network Associates Laboratories, the
- * Security Research Division of Network Associates, Inc. under
- * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
- * CHATS research program.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Manages physical address maps.
- *
- * Since the information managed by this module is
- * also stored by the logical address mapping module,
- * this module may throw away valid virtual-to-physical
- * mappings at almost any time. However, invalidations
- * of virtual-to-physical mappings must be done as
- * requested.
- *
- * In order to cope with hardware architectures which
- * make virtual-to-physical map invalidates expensive,
- * this module may delay invalidate or reduced protection
- * operations until such time as they are actually
- * necessary. This module is given full information as
- * to which processors are currently using which maps,
- * and to when physical maps must be made correct.
- */
-
-#include "opt_cpu.h"
-#include "opt_pmap.h"
-#include "opt_smp.h"
-#include "opt_xbox.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mman.h>
-#include <sys/msgbuf.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/sf_buf.h>
-#include <sys/sx.h>
-#include <sys/vmmeter.h>
-#include <sys/sched.h>
-#include <sys/sysctl.h>
-#ifdef SMP
-#include <sys/smp.h>
-#else
-#include <sys/cpuset.h>
-#endif
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_object.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
-#include <vm/uma.h>
-
-#include <machine/cpu.h>
-#include <machine/cputypes.h>
-#include <machine/md_var.h>
-#include <machine/pcb.h>
-#include <machine/specialreg.h>
-#ifdef SMP
-#include <machine/smp.h>
-#endif
-
-#ifdef XBOX
-#include <machine/xbox.h>
-#endif
-
-#include <xen/interface/xen.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/hypercall.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-
-#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
-#define CPU_ENABLE_SSE
-#endif
-
-#ifndef PMAP_SHPGPERPROC
-#define PMAP_SHPGPERPROC 200
-#endif
-
-#define DIAGNOSTIC
-
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINE extern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
-#ifdef PV_STATS
-#define PV_STAT(x) do { x ; } while (0)
-#else
-#define PV_STAT(x) do { } while (0)
-#endif
-
-/*
- * Get PDEs and PTEs for user/kernel address space
- */
-#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
-#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
-
-#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0)
-#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0)
-#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0)
-#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
-#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
-
-#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
-
-#define HAMFISTED_LOCKING
-#ifdef HAMFISTED_LOCKING
-static struct mtx createdelete_lock;
-#endif
-
-struct pmap kernel_pmap_store;
-LIST_HEAD(pmaplist, pmap);
-static struct pmaplist allpmaps;
-static struct mtx allpmaps_lock;
-
-vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
-vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
-int pgeflag = 0; /* PG_G or-in */
-int pseflag = 0; /* PG_PS or-in */
-
-int nkpt;
-vm_offset_t kernel_vm_end;
-extern u_int32_t KERNend;
-
-#ifdef PAE
-pt_entry_t pg_nx;
-#endif
-
-static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
-
-static int pat_works; /* Is page attribute table sane? */
-
-/*
- * This lock is defined as static in other pmap implementations. It cannot,
- * however, be defined as static here, because it is (ab)used to serialize
- * queued page table changes in other sources files.
- */
-struct rwlock pvh_global_lock;
-
-/*
- * Data for the pv entry allocation mechanism
- */
-static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
-static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
-static int shpgperproc = PMAP_SHPGPERPROC;
-
-struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */
-int pv_maxchunks; /* How many chunks we have KVA for */
-vm_offset_t pv_vafree; /* freelist stored in the PTE */
-
-/*
- * All those kernel PT submaps that BSD is so fond of
- */
-struct sysmaps {
- struct mtx lock;
- pt_entry_t *CMAP1;
- pt_entry_t *CMAP2;
- caddr_t CADDR1;
- caddr_t CADDR2;
-};
-static struct sysmaps sysmaps_pcpu[MAXCPU];
-pt_entry_t *CMAP3;
-caddr_t ptvmmap = 0;
-caddr_t CADDR3;
-struct msgbuf *msgbufp = 0;
-
-/*
- * Crashdump maps.
- */
-static caddr_t crashdumpmap;
-
-static pt_entry_t *PMAP1 = 0, *PMAP2;
-static pt_entry_t *PADDR1 = 0, *PADDR2;
-#ifdef SMP
-static int PMAP1cpu;
-static int PMAP1changedcpu;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD,
- &PMAP1changedcpu, 0,
- "Number of times pmap_pte_quick changed CPU with same PMAP1");
-#endif
-static int PMAP1changed;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD,
- &PMAP1changed, 0,
- "Number of times pmap_pte_quick changed PMAP1");
-static int PMAP1unchanged;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
- &PMAP1unchanged, 0,
- "Number of times pmap_pte_quick didn't change PMAP1");
-static struct mtx PMAP2mutex;
-
-static void free_pv_chunk(struct pv_chunk *pc);
-static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
-static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
-static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
- vm_offset_t va);
-
-static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va,
- vm_page_t m, vm_prot_t prot, vm_page_t mpte);
-static void pmap_flush_page(vm_page_t m);
-static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
-static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
- vm_page_t *free);
-static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
- vm_page_t *free);
-static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
- vm_offset_t va);
-static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
- vm_page_t m);
-
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
-
-static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags);
-static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free);
-static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
-static void pmap_pte_release(pt_entry_t *pte);
-static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
-
-static __inline void pagezero(void *page);
-
-CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
-CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
-
-/*
- * If you get an error here, then you set KVA_PAGES wrong! See the
- * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
- * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
- */
-CTASSERT(KERNBASE % (1 << 24) == 0);
-
-void
-pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
-{
- vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]);
-
- switch (type) {
- case SH_PD_SET_VA:
-#if 0
- xen_queue_pt_update(shadow_pdir_ma,
- xpmap_ptom(val & ~(PG_RW)));
-#endif
- xen_queue_pt_update(pdir_ma,
- xpmap_ptom(val));
- break;
- case SH_PD_SET_VA_MA:
-#if 0
- xen_queue_pt_update(shadow_pdir_ma,
- val & ~(PG_RW));
-#endif
- xen_queue_pt_update(pdir_ma, val);
- break;
- case SH_PD_SET_VA_CLEAR:
-#if 0
- xen_queue_pt_update(shadow_pdir_ma, 0);
-#endif
- xen_queue_pt_update(pdir_ma, 0);
- break;
- }
-}
-
-/*
- * Bootstrap the system enough to run with virtual memory.
- *
- * On the i386 this is called after mapping has already been enabled
- * and just syncs the pmap module with what has already been done.
- * [We can't call it easily with mapping off since the kernel is not
- * mapped with PA == VA, hence we would have to relocate every address
- * from the linked base (virtual) address "KERNBASE" to the actual
- * (physical) address starting relative to 0]
- */
-void
-pmap_bootstrap(vm_paddr_t firstaddr)
-{
- vm_offset_t va;
- pt_entry_t *pte, *unused;
- struct sysmaps *sysmaps;
- int i;
-
- /*
- * Initialize the first available kernel virtual address. However,
- * using "firstaddr" may waste a few pages of the kernel virtual
- * address space, because locore may not have mapped every physical
- * page that it allocated. Preferably, locore would provide a first
- * unused virtual address in addition to "firstaddr".
- */
- virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
-
- virtual_end = VM_MAX_KERNEL_ADDRESS;
-
- /*
- * Initialize the kernel pmap (which is statically allocated).
- */
- PMAP_LOCK_INIT(kernel_pmap);
- kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
-#ifdef PAE
- kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
-#endif
- CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
- TAILQ_INIT(&kernel_pmap->pm_pvchunk);
-
- /*
- * Initialize the global pv list lock.
- */
- rw_init_flags(&pvh_global_lock, "pmap pv global", RW_RECURSE);
-
- LIST_INIT(&allpmaps);
- mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
- if (nkpt == 0)
- nkpt = NKPT;
-
- /*
- * Reserve some special page table entries/VA space for temporary
- * mapping of pages.
- */
-#define SYSMAP(c, p, v, n) \
- v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
-
- va = virtual_avail;
- pte = vtopte(va);
-
- /*
- * CMAP1/CMAP2 are used for zeroing and copying pages.
- * CMAP3 is used for the idle process page zeroing.
- */
- for (i = 0; i < MAXCPU; i++) {
- sysmaps = &sysmaps_pcpu[i];
- mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
- SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
- SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
- PT_SET_MA(sysmaps->CADDR1, 0);
- PT_SET_MA(sysmaps->CADDR2, 0);
- }
- SYSMAP(caddr_t, CMAP3, CADDR3, 1)
- PT_SET_MA(CADDR3, 0);
-
- /*
- * Crashdump maps.
- */
- SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
-
- /*
- * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
- */
- SYSMAP(caddr_t, unused, ptvmmap, 1)
-
- /*
- * msgbufp is used to map the system message buffer.
- */
- SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize)))
-
- /*
- * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(),
- * respectively.
- */
- SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
- SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
-
- mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
-
- virtual_avail = va;
-
- /*
- * Leave in place an identity mapping (virt == phys) for the low 1 MB
- * physical memory region that is used by the ACPI wakeup code. This
- * mapping must not have PG_G set.
- */
-#ifndef XEN
- /*
- * leave here deliberately to show that this is not supported
- */
-#ifdef XBOX
- /* FIXME: This is gross, but needed for the XBOX. Since we are in such
- * an early stadium, we cannot yet neatly map video memory ... :-(
- * Better fixes are very welcome! */
- if (!arch_i386_is_xbox)
-#endif
- for (i = 1; i < NKPT; i++)
- PTD[i] = 0;
-
- /* Initialize the PAT MSR if present. */
- pmap_init_pat();
-
- /* Turn on PG_G on kernel page(s) */
- pmap_set_pg();
-#endif
-
-#ifdef HAMFISTED_LOCKING
- mtx_init(&createdelete_lock, "pmap create/delete", NULL, MTX_DEF);
-#endif
-}
-
-/*
- * Setup the PAT MSR.
- */
-void
-pmap_init_pat(void)
-{
- uint64_t pat_msr;
-
- /* Bail if this CPU doesn't implement PAT. */
- if (!(cpu_feature & CPUID_PAT))
- return;
-
- if (cpu_vendor_id != CPU_VENDOR_INTEL ||
- (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) {
- /*
- * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
- * Program 4 and 5 as WP and WC.
- * Leave 6 and 7 as UC and UC-.
- */
- pat_msr = rdmsr(MSR_PAT);
- pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
- pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
- PAT_VALUE(5, PAT_WRITE_COMBINING);
- pat_works = 1;
- } else {
- /*
- * Due to some Intel errata, we can only safely use the lower 4
- * PAT entries. Thus, just replace PAT Index 2 with WC instead
- * of UC-.
- *
- * Intel Pentium III Processor Specification Update
- * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
- * or Mode C Paging)
- *
- * Intel Pentium IV Processor Specification Update
- * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
- */
- pat_msr = rdmsr(MSR_PAT);
- pat_msr &= ~PAT_MASK(2);
- pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
- pat_works = 0;
- }
- wrmsr(MSR_PAT, pat_msr);
-}
-
-/*
- * Initialize a vm_page's machine-dependent fields.
- */
-void
-pmap_page_init(vm_page_t m)
-{
-
- TAILQ_INIT(&m->md.pv_list);
- m->md.pat_mode = PAT_WRITE_BACK;
-}
-
-/*
- * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
- * Requirements:
- * - Must deal with pages in order to ensure that none of the PG_* bits
- * are ever set, PG_V in particular.
- * - Assumes we can write to ptes without pte_store() atomic ops, even
- * on PAE systems. This should be ok.
- * - Assumes nothing will ever test these addresses for 0 to indicate
- * no mapping instead of correctly checking PG_V.
- * - Assumes a vm_offset_t will fit in a pte (true for i386).
- * Because PG_V is never set, there can be no mappings to invalidate.
- */
-static int ptelist_count = 0;
-static vm_offset_t
-pmap_ptelist_alloc(vm_offset_t *head)
-{
- vm_offset_t va;
- vm_offset_t *phead = (vm_offset_t *)*head;
-
- if (ptelist_count == 0) {
- printf("out of memory!!!!!!\n");
- return (0); /* Out of memory */
- }
- ptelist_count--;
- va = phead[ptelist_count];
- return (va);
-}
-
-static void
-pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
-{
- vm_offset_t *phead = (vm_offset_t *)*head;
-
- phead[ptelist_count++] = va;
-}
-
-static void
-pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
-{
- int i, nstackpages;
- vm_offset_t va;
- vm_page_t m;
-
- nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t));
- for (i = 0; i < nstackpages; i++) {
- va = (vm_offset_t)base + i * PAGE_SIZE;
- m = vm_page_alloc(NULL, i,
- VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- pmap_qenter(va, &m, 1);
- }
-
- *head = (vm_offset_t)base;
- for (i = npages - 1; i >= nstackpages; i--) {
- va = (vm_offset_t)base + i * PAGE_SIZE;
- pmap_ptelist_free(head, va);
- }
-}
-
-
-/*
- * Initialize the pmap module.
- * Called by vm_init, to initialize any structures that the pmap
- * system needs to map virtual memory.
- */
-void
-pmap_init(void)
-{
-
- /*
- * Initialize the address space (zone) for the pv entries. Set a
- * high water mark so that the system can recover from excessive
- * numbers of pv entries.
- */
- TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
- pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
- TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
- pv_entry_max = roundup(pv_entry_max, _NPCPV);
- pv_entry_high_water = 9 * (pv_entry_max / 10);
-
- pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
- pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
- if (pv_chunkbase == NULL)
- panic("pmap_init: not enough kvm for pv chunks");
- pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
-}
-
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
- "Max number of PV entries");
-SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
- "Page share factor per proc");
-
-static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
- "2/4MB page mapping counters");
-
-static u_long pmap_pde_mappings;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
- &pmap_pde_mappings, 0, "2/4MB page mappings");
-
-/***************************************************
- * Low level helper routines.....
- ***************************************************/
-
-/*
- * Determine the appropriate bits to set in a PTE or PDE for a specified
- * caching mode.
- */
-int
-pmap_cache_bits(int mode, boolean_t is_pde)
-{
- int pat_flag, pat_index, cache_bits;
-
- /* The PAT bit is different for PTE's and PDE's. */
- pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
-
- /* If we don't support PAT, map extended modes to older ones. */
- if (!(cpu_feature & CPUID_PAT)) {
- switch (mode) {
- case PAT_UNCACHEABLE:
- case PAT_WRITE_THROUGH:
- case PAT_WRITE_BACK:
- break;
- case PAT_UNCACHED:
- case PAT_WRITE_COMBINING:
- case PAT_WRITE_PROTECTED:
- mode = PAT_UNCACHEABLE;
- break;
- }
- }
-
- /* Map the caching mode to a PAT index. */
- if (pat_works) {
- switch (mode) {
- case PAT_UNCACHEABLE:
- pat_index = 3;
- break;
- case PAT_WRITE_THROUGH:
- pat_index = 1;
- break;
- case PAT_WRITE_BACK:
- pat_index = 0;
- break;
- case PAT_UNCACHED:
- pat_index = 2;
- break;
- case PAT_WRITE_COMBINING:
- pat_index = 5;
- break;
- case PAT_WRITE_PROTECTED:
- pat_index = 4;
- break;
- default:
- panic("Unknown caching mode %d\n", mode);
- }
- } else {
- switch (mode) {
- case PAT_UNCACHED:
- case PAT_UNCACHEABLE:
- case PAT_WRITE_PROTECTED:
- pat_index = 3;
- break;
- case PAT_WRITE_THROUGH:
- pat_index = 1;
- break;
- case PAT_WRITE_BACK:
- pat_index = 0;
- break;
- case PAT_WRITE_COMBINING:
- pat_index = 2;
- break;
- default:
- panic("Unknown caching mode %d\n", mode);
- }
- }
-
- /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
- cache_bits = 0;
- if (pat_index & 0x4)
- cache_bits |= pat_flag;
- if (pat_index & 0x2)
- cache_bits |= PG_NC_PCD;
- if (pat_index & 0x1)
- cache_bits |= PG_NC_PWT;
- return (cache_bits);
-}
-#ifdef SMP
-/*
- * For SMP, these functions have to use the IPI mechanism for coherence.
- *
- * N.B.: Before calling any of the following TLB invalidation functions,
- * the calling processor must ensure that all stores updating a non-
- * kernel page table are globally performed. Otherwise, another
- * processor could cache an old, pre-update entry without being
- * invalidated. This can happen one of two ways: (1) The pmap becomes
- * active on another processor after its pm_active field is checked by
- * one of the following functions but before a store updating the page
- * table is globally performed. (2) The pmap becomes active on another
- * processor before its pm_active field is checked but due to
- * speculative loads one of the following functions stills reads the
- * pmap as inactive on the other processor.
- *
- * The kernel page table is exempt because its pm_active field is
- * immutable. The kernel page table is always active on every
- * processor.
- */
-void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
- cpuset_t other_cpus;
- u_int cpuid;
-
- CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
- pmap, va);
-
- sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- invlpg(va);
- smp_invlpg(va);
- } else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- invlpg(va);
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg(other_cpus, va);
- }
- sched_unpin();
- PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- cpuset_t other_cpus;
- vm_offset_t addr;
- u_int cpuid;
-
- CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
- pmap, sva, eva);
-
- sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- smp_invlpg_range(sva, eva);
- } else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg_range(other_cpus, sva, eva);
- }
- sched_unpin();
- PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_all(pmap_t pmap)
-{
- cpuset_t other_cpus;
- u_int cpuid;
-
- CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
-
- sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- invltlb();
- smp_invltlb();
- } else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- invltlb();
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invltlb(other_cpus);
- }
- sched_unpin();
-}
-
-void
-pmap_invalidate_cache(void)
-{
-
- sched_pin();
- wbinvd();
- smp_cache_flush();
- sched_unpin();
-}
-#else /* !SMP */
-/*
- * Normal, non-SMP, 486+ invalidation functions.
- * We inline these within pmap.c for speed.
- */
-PMAP_INLINE void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
- CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
- pmap, va);
-
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- invlpg(va);
- PT_UPDATES_FLUSH();
-}
-
-PMAP_INLINE void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t addr;
-
- if (eva - sva > PAGE_SIZE)
- CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
- pmap, sva, eva);
-
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- PT_UPDATES_FLUSH();
-}
-
-PMAP_INLINE void
-pmap_invalidate_all(pmap_t pmap)
-{
-
- CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
-
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- invltlb();
-}
-
-PMAP_INLINE void
-pmap_invalidate_cache(void)
-{
-
- wbinvd();
-}
-#endif /* !SMP */
-
-#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
-
-void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
-{
-
- if (force) {
- sva &= ~(vm_offset_t)cpu_clflush_line_size;
- } else {
- KASSERT((sva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: sva not page-aligned"));
- KASSERT((eva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: eva not page-aligned"));
- }
-
- if ((cpu_feature & CPUID_SS) != 0 && !force)
- ; /* If "Self Snoop" is supported, do nothing. */
- else if ((cpu_feature & CPUID_CLFSH) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
- /*
- * Otherwise, do per-cache line flush. Use the mfence
- * instruction to insure that previous stores are
- * included in the write-back. The processor
- * propagates flush to other processors in the cache
- * coherence domain.
- */
- mfence();
- for (; sva < eva; sva += cpu_clflush_line_size)
- clflush(sva);
- mfence();
- } else {
-
- /*
- * No targeted cache flush methods are supported by CPU,
- * or the supplied range is bigger than 2MB.
- * Globally invalidate cache.
- */
- pmap_invalidate_cache();
- }
-}
-
-void
-pmap_invalidate_cache_pages(vm_page_t *pages, int count)
-{
- int i;
-
- if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
- (cpu_feature & CPUID_CLFSH) == 0) {
- pmap_invalidate_cache();
- } else {
- for (i = 0; i < count; i++)
- pmap_flush_page(pages[i]);
- }
-}
-
-/*
- * Are we current address space or kernel? N.B. We return FALSE when
- * a pmap's page table is in use because a kernel thread is borrowing
- * it. The borrowed page table can change spontaneously, making any
- * dependence on its continued use subject to a race condition.
- */
-static __inline int
-pmap_is_current(pmap_t pmap)
-{
-
- return (pmap == kernel_pmap ||
- (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
- (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
-}
-
-/*
- * If the given pmap is not the current or kernel pmap, the returned pte must
- * be released by passing it to pmap_pte_release().
- */
-pt_entry_t *
-pmap_pte(pmap_t pmap, vm_offset_t va)
-{
- pd_entry_t newpf;
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (*pde & PG_PS)
- return (pde);
- if (*pde != 0) {
- /* are we current address space or kernel? */
- if (pmap_is_current(pmap))
- return (vtopte(va));
- mtx_lock(&PMAP2mutex);
- newpf = *pde & PG_FRAME;
- if ((*PMAP2 & PG_FRAME) != newpf) {
- PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M);
- CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
- pmap, va, (*PMAP2 & 0xffffffff));
- }
- return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
- }
- return (NULL);
-}
-
-/*
- * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte
- * being NULL.
- */
-static __inline void
-pmap_pte_release(pt_entry_t *pte)
-{
-
- if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) {
- CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx",
- *PMAP2);
- rw_wlock(&pvh_global_lock);
- PT_SET_VA(PMAP2, 0, TRUE);
- rw_wunlock(&pvh_global_lock);
- mtx_unlock(&PMAP2mutex);
- }
-}
-
-static __inline void
-invlcaddr(void *caddr)
-{
-
- invlpg((u_int)caddr);
- PT_UPDATES_FLUSH();
-}
-
-/*
- * Super fast pmap_pte routine best used when scanning
- * the pv lists. This eliminates many coarse-grained
- * invltlb calls. Note that many of the pv list
- * scans are across different pmaps. It is very wasteful
- * to do an entire invltlb for checking a single mapping.
- *
- * If the given pmap is not the current pmap, pvh_global_lock
- * must be held and curthread pinned to a CPU.
- */
-static pt_entry_t *
-pmap_pte_quick(pmap_t pmap, vm_offset_t va)
-{
- pd_entry_t newpf;
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (*pde & PG_PS)
- return (pde);
- if (*pde != 0) {
- /* are we current address space or kernel? */
- if (pmap_is_current(pmap))
- return (vtopte(va));
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
- newpf = *pde & PG_FRAME;
- if ((*PMAP1 & PG_FRAME) != newpf) {
- PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M);
- CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x",
- pmap, va, (u_long)*PMAP1);
-
-#ifdef SMP
- PMAP1cpu = PCPU_GET(cpuid);
-#endif
- PMAP1changed++;
- } else
-#ifdef SMP
- if (PMAP1cpu != PCPU_GET(cpuid)) {
- PMAP1cpu = PCPU_GET(cpuid);
- invlcaddr(PADDR1);
- PMAP1changedcpu++;
- } else
-#endif
- PMAP1unchanged++;
- return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
- }
- return (0);
-}
-
-/*
- * Routine: pmap_extract
- * Function:
- * Extract the physical page address associated
- * with the given map/virtual_address pair.
- */
-vm_paddr_t
-pmap_extract(pmap_t pmap, vm_offset_t va)
-{
- vm_paddr_t rtval;
- pt_entry_t *pte;
- pd_entry_t pde;
- pt_entry_t pteval;
-
- rtval = 0;
- PMAP_LOCK(pmap);
- pde = pmap->pm_pdir[va >> PDRSHIFT];
- if (pde != 0) {
- if ((pde & PG_PS) != 0) {
- rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK);
- PMAP_UNLOCK(pmap);
- return rtval;
- }
- pte = pmap_pte(pmap, va);
- pteval = *pte ? xpmap_mtop(*pte) : 0;
- rtval = (pteval & PG_FRAME) | (va & PAGE_MASK);
- pmap_pte_release(pte);
- }
- PMAP_UNLOCK(pmap);
- return (rtval);
-}
-
-/*
- * Routine: pmap_extract_ma
- * Function:
- * Like pmap_extract, but returns machine address
- */
-vm_paddr_t
-pmap_extract_ma(pmap_t pmap, vm_offset_t va)
-{
- vm_paddr_t rtval;
- pt_entry_t *pte;
- pd_entry_t pde;
-
- rtval = 0;
- PMAP_LOCK(pmap);
- pde = pmap->pm_pdir[va >> PDRSHIFT];
- if (pde != 0) {
- if ((pde & PG_PS) != 0) {
- rtval = (pde & ~PDRMASK) | (va & PDRMASK);
- PMAP_UNLOCK(pmap);
- return rtval;
- }
- pte = pmap_pte(pmap, va);
- rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
- pmap_pte_release(pte);
- }
- PMAP_UNLOCK(pmap);
- return (rtval);
-}
-
-/*
- * Routine: pmap_extract_and_hold
- * Function:
- * Atomically extract and hold the physical page
- * with the given pmap and virtual address pair
- * if that mapping permits the given protection.
- */
-vm_page_t
-pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
-{
- pd_entry_t pde;
- pt_entry_t pte, *ptep;
- vm_page_t m;
- vm_paddr_t pa;
-
- pa = 0;
- m = NULL;
- PMAP_LOCK(pmap);
-retry:
- pde = PT_GET(pmap_pde(pmap, va));
- if (pde != 0) {
- if (pde & PG_PS) {
- if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde &
- PG_PS_FRAME) | (va & PDRMASK), &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
- (va & PDRMASK));
- vm_page_hold(m);
- }
- } else {
- ptep = pmap_pte(pmap, va);
- pte = PT_GET(ptep);
- pmap_pte_release(ptep);
- if (pte != 0 &&
- ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
- &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
- vm_page_hold(m);
- }
- }
- }
- PA_UNLOCK_COND(pa);
- PMAP_UNLOCK(pmap);
- return (m);
-}
-
-/***************************************************
- * Low level mapping routines.....
- ***************************************************/
-
-/*
- * Add a wired page to the kva.
- * Note: not SMP coherent.
- *
- * This function may be used before pmap_bootstrap() is called.
- */
-void
-pmap_kenter(vm_offset_t va, vm_paddr_t pa)
-{
-
- PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
-}
-
-void
-pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
-{
- pt_entry_t *pte;
-
- pte = vtopte(va);
- pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
-}
-
-static __inline void
-pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
-{
-
- PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
-}
-
-/*
- * Remove a page from the kernel pagetables.
- * Note: not SMP coherent.
- *
- * This function may be used before pmap_bootstrap() is called.
- */
-PMAP_INLINE void
-pmap_kremove(vm_offset_t va)
-{
- pt_entry_t *pte;
-
- pte = vtopte(va);
- PT_CLEAR_VA(pte, FALSE);
-}
-
-/*
- * Used to map a range of physical addresses into kernel
- * virtual address space.
- *
- * The value passed in '*virt' is a suggested virtual address for
- * the mapping. Architectures which can support a direct-mapped
- * physical to virtual region can return the appropriate address
- * within that region, leaving '*virt' unchanged. Other
- * architectures should map the pages starting at '*virt' and
- * update '*virt' with the first usable address after the mapped
- * region.
- */
-vm_offset_t
-pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
-{
- vm_offset_t va, sva;
-
- va = sva = *virt;
- CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x",
- va, start, end, prot);
- while (start < end) {
- pmap_kenter(va, start);
- va += PAGE_SIZE;
- start += PAGE_SIZE;
- }
- pmap_invalidate_range(kernel_pmap, sva, va);
- *virt = va;
- return (sva);
-}
-
-
-/*
- * Add a list of wired pages to the kva
- * this routine is only used for temporary
- * kernel mappings that do not need to have
- * page modification or references recorded.
- * Note that old mappings are simply written
- * over. The page *must* be wired.
- * Note: SMP coherent. Uses a ranged shootdown IPI.
- */
-void
-pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
-{
- pt_entry_t *endpte, *pte;
- vm_paddr_t pa;
- vm_offset_t va = sva;
- int mclcount = 0;
- multicall_entry_t mcl[16];
- multicall_entry_t *mclp = mcl;
- int error;
-
- CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count);
- pte = vtopte(sva);
- endpte = pte + count;
- while (pte < endpte) {
- pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A;
-
- mclp->op = __HYPERVISOR_update_va_mapping;
- mclp->args[0] = va;
- mclp->args[1] = (uint32_t)(pa & 0xffffffff);
- mclp->args[2] = (uint32_t)(pa >> 32);
- mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0;
-
- va += PAGE_SIZE;
- pte++;
- ma++;
- mclp++;
- mclcount++;
- if (mclcount == 16) {
- error = HYPERVISOR_multicall(mcl, mclcount);
- mclp = mcl;
- mclcount = 0;
- KASSERT(error == 0, ("bad multicall %d", error));
- }
- }
- if (mclcount) {
- error = HYPERVISOR_multicall(mcl, mclcount);
- KASSERT(error == 0, ("bad multicall %d", error));
- }
-
-#ifdef INVARIANTS
- for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++)
- KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE));
-#endif
-}
-
-/*
- * This routine tears out page mappings from the
- * kernel -- it is meant only for temporary mappings.
- * Note: SMP coherent. Uses a ranged shootdown IPI.
- */
-void
-pmap_qremove(vm_offset_t sva, int count)
-{
- vm_offset_t va;
-
- CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count);
- va = sva;
- rw_wlock(&pvh_global_lock);
- critical_enter();
- while (count-- > 0) {
- pmap_kremove(va);
- va += PAGE_SIZE;
- }
- PT_UPDATES_FLUSH();
- pmap_invalidate_range(kernel_pmap, sva, va);
- critical_exit();
- rw_wunlock(&pvh_global_lock);
-}
-
-/***************************************************
- * Page table page management routines.....
- ***************************************************/
-static __inline void
-pmap_free_zero_pages(vm_page_t free)
-{
- vm_page_t m;
-
- while (free != NULL) {
- m = free;
- free = (void *)m->object;
- m->object = NULL;
- vm_page_free_zero(m);
- }
-}
-
-/*
- * Decrements a page table page's wire count, which is used to record the
- * number of valid page table entries within the page. If the wire count
- * drops to zero, then the page table page is unmapped. Returns TRUE if the
- * page table page was unmapped and FALSE otherwise.
- */
-static inline boolean_t
-pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
-{
-
- --m->wire_count;
- if (m->wire_count == 0) {
- _pmap_unwire_ptp(pmap, m, free);
- return (TRUE);
- } else
- return (FALSE);
-}
-
-static void
-_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
-{
- vm_offset_t pteva;
-
- PT_UPDATES_FLUSH();
- /*
- * unmap the page table page
- */
- xen_pt_unpin(pmap->pm_pdir[m->pindex]);
- /*
- * page *might* contain residual mapping :-/
- */
- PD_CLEAR_VA(pmap, m->pindex, TRUE);
- pmap_zero_page(m);
- --pmap->pm_stats.resident_count;
-
- /*
- * This is a release store so that the ordinary store unmapping
- * the page table page is globally performed before TLB shoot-
- * down is begun.
- */
- atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
-
- /*
- * Do an invltlb to make the invalidated mapping
- * take effect immediately.
- */
- pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
- pmap_invalidate_page(pmap, pteva);
-
- /*
- * Put page on a list so that it is released after
- * *ALL* TLB shootdown is done
- */
- m->object = (void *)*free;
- *free = m;
-}
-
-/*
- * After removing a page table entry, this routine is used to
- * conditionally free the page, and manage the hold/wire counts.
- */
-static int
-pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
-{
- pd_entry_t ptepde;
- vm_page_t mpte;
-
- if (va >= VM_MAXUSER_ADDRESS)
- return (0);
- ptepde = PT_GET(pmap_pde(pmap, va));
- mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
- return (pmap_unwire_ptp(pmap, mpte, free));
-}
-
-/*
- * Initialize the pmap for the swapper process.
- */
-void
-pmap_pinit0(pmap_t pmap)
-{
-
- PMAP_LOCK_INIT(pmap);
- /*
- * Since the page table directory is shared with the kernel pmap,
- * which is already included in the list "allpmaps", this pmap does
- * not need to be inserted into that list.
- */
- pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
-#ifdef PAE
- pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
-#endif
- CPU_ZERO(&pmap->pm_active);
- PCPU_SET(curpmap, pmap);
- TAILQ_INIT(&pmap->pm_pvchunk);
- bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-}
-
-/*
- * Initialize a preallocated and zeroed pmap structure,
- * such as one in a vmspace structure.
- */
-int
-pmap_pinit(pmap_t pmap)
-{
- vm_page_t m, ptdpg[NPGPTD + 1];
- int npgptd = NPGPTD + 1;
- int i;
-
-#ifdef HAMFISTED_LOCKING
- mtx_lock(&createdelete_lock);
-#endif
-
- /*
- * No need to allocate page table space yet but we do need a valid
- * page directory table.
- */
- if (pmap->pm_pdir == NULL) {
- pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD);
- if (pmap->pm_pdir == NULL) {
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
- return (0);
- }
-#ifdef PAE
- pmap->pm_pdpt = (pd_entry_t *)kva_alloc(1);
-#endif
- }
-
- /*
- * allocate the page directory page(s)
- */
- for (i = 0; i < npgptd;) {
- m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
- VM_ALLOC_WIRED | VM_ALLOC_ZERO);
- if (m == NULL)
- VM_WAIT;
- else {
- ptdpg[i++] = m;
- }
- }
-
- pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
-
- for (i = 0; i < NPGPTD; i++)
- if ((ptdpg[i]->flags & PG_ZERO) == 0)
- pagezero(pmap->pm_pdir + (i * NPDEPG));
-
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
- /* Copy the kernel page table directory entries. */
- bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
- mtx_unlock_spin(&allpmaps_lock);
-
-#ifdef PAE
- pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
- if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdpt, PAGE_SIZE);
- for (i = 0; i < NPGPTD; i++) {
- vm_paddr_t ma;
-
- ma = VM_PAGE_TO_MACH(ptdpg[i]);
- pmap->pm_pdpt[i] = ma | PG_V;
-
- }
-#endif
- for (i = 0; i < NPGPTD; i++) {
- pt_entry_t *pd;
- vm_paddr_t ma;
-
- ma = VM_PAGE_TO_MACH(ptdpg[i]);
- pd = pmap->pm_pdir + (i * NPDEPG);
- PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW));
-#if 0
- xen_pgd_pin(ma);
-#endif
- }
-
-#ifdef PAE
- PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW);
-#endif
- rw_wlock(&pvh_global_lock);
- xen_flush_queue();
- xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD]));
- for (i = 0; i < NPGPTD; i++) {
- vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]);
- PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE);
- }
- xen_flush_queue();
- rw_wunlock(&pvh_global_lock);
- CPU_ZERO(&pmap->pm_active);
- TAILQ_INIT(&pmap->pm_pvchunk);
- bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
- return (1);
-}
-
-/*
- * this routine is called if the page table page is not
- * mapped correctly.
- */
-static vm_page_t
-_pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags)
-{
- vm_paddr_t ptema;
- vm_page_t m;
-
- /*
- * Allocate a page table page.
- */
- if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
- VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
- if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
- PMAP_UNLOCK(pmap);
- rw_wunlock(&pvh_global_lock);
- VM_WAIT;
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- }
-
- /*
- * Indicate the need to retry. While waiting, the page table
- * page may have been allocated.
- */
- return (NULL);
- }
- if ((m->flags & PG_ZERO) == 0)
- pmap_zero_page(m);
-
- /*
- * Map the pagetable page into the process address space, if
- * it isn't already there.
- */
-
- pmap->pm_stats.resident_count++;
-
- ptema = VM_PAGE_TO_MACH(m);
- xen_pt_pin(ptema);
- PT_SET_VA_MA(&pmap->pm_pdir[ptepindex],
- (ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
-
- KASSERT(pmap->pm_pdir[ptepindex],
- ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex));
- return (m);
-}
-
-static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
-{
- u_int ptepindex;
- pd_entry_t ptema;
- vm_page_t m;
-
- /*
- * Calculate pagetable page index
- */
- ptepindex = va >> PDRSHIFT;
-retry:
- /*
- * Get the page directory entry
- */
- ptema = pmap->pm_pdir[ptepindex];
-
- /*
- * This supports switching from a 4MB page to a
- * normal 4K page.
- */
- if (ptema & PG_PS) {
- /*
- * XXX
- */
- pmap->pm_pdir[ptepindex] = 0;
- ptema = 0;
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- pmap_invalidate_all(kernel_pmap);
- }
-
- /*
- * If the page table page is mapped, we just increment the
- * hold count, and activate it.
- */
- if (ptema & PG_V) {
- m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
- m->wire_count++;
- } else {
- /*
- * Here if the pte page isn't mapped, or if it has
- * been deallocated.
- */
- CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x",
- pmap, va, flags);
- m = _pmap_allocpte(pmap, ptepindex, flags);
- if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
- goto retry;
-
- KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex));
- }
- return (m);
-}
-
-
-/***************************************************
-* Pmap allocation/deallocation routines.
- ***************************************************/
-
-
-/*
- * Release any resources held by the given physical map.
- * Called when a pmap initialized by pmap_pinit is being released.
- * Should only be called if the map contains no valid mappings.
- */
-void
-pmap_release(pmap_t pmap)
-{
- vm_page_t m, ptdpg[2*NPGPTD+1];
- vm_paddr_t ma;
- int i;
-#ifdef PAE
- int npgptd = NPGPTD + 1;
-#else
- int npgptd = NPGPTD;
-#endif
-
- KASSERT(pmap->pm_stats.resident_count == 0,
- ("pmap_release: pmap resident count %ld != 0",
- pmap->pm_stats.resident_count));
- PT_UPDATES_FLUSH();
-
-#ifdef HAMFISTED_LOCKING
- mtx_lock(&createdelete_lock);
-#endif
-
- KASSERT(CPU_EMPTY(&pmap->pm_active),
- ("releasing active pmap %p", pmap));
- mtx_lock_spin(&allpmaps_lock);
- LIST_REMOVE(pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
-
- for (i = 0; i < NPGPTD; i++)
- ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME);
- pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
-#ifdef PAE
- ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt));
-#endif
-
- for (i = 0; i < npgptd; i++) {
- m = ptdpg[i];
- ma = VM_PAGE_TO_MACH(m);
- /* unpinning L1 and L2 treated the same */
-#if 0
- xen_pgd_unpin(ma);
-#else
- if (i == NPGPTD)
- xen_pgd_unpin(ma);
-#endif
-#ifdef PAE
- if (i < NPGPTD)
- KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME),
- ("pmap_release: got wrong ptd page"));
-#endif
- m->wire_count--;
- atomic_subtract_int(&vm_cnt.v_wire_count, 1);
- vm_page_free(m);
- }
-#ifdef PAE
- pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1);
-#endif
-
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
-}
-
-static int
-kvm_size(SYSCTL_HANDLER_ARGS)
-{
- unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
-
- return (sysctl_handle_long(oidp, &ksize, 0, req));
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
- 0, 0, kvm_size, "IU", "Size of KVM");
-
-static int
-kvm_free(SYSCTL_HANDLER_ARGS)
-{
- unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
-
- return (sysctl_handle_long(oidp, &kfree, 0, req));
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
- 0, 0, kvm_free, "IU", "Amount of KVM free");
-
-/*
- * grow the number of kernel page table entries, if needed
- */
-void
-pmap_growkernel(vm_offset_t addr)
-{
- struct pmap *pmap;
- vm_paddr_t ptppaddr;
- vm_page_t nkpg;
- pd_entry_t newpdir;
-
- mtx_assert(&kernel_map->system_mtx, MA_OWNED);
- if (kernel_vm_end == 0) {
- kernel_vm_end = KERNBASE;
- nkpt = 0;
- while (pdir_pde(PTD, kernel_vm_end)) {
- kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
- nkpt++;
- if (kernel_vm_end - 1 >= kernel_map->max_offset) {
- kernel_vm_end = kernel_map->max_offset;
- break;
- }
- }
- }
- addr = roundup2(addr, NBPDR);
- if (addr - 1 >= kernel_map->max_offset)
- addr = kernel_map->max_offset;
- while (kernel_vm_end < addr) {
- if (pdir_pde(PTD, kernel_vm_end)) {
- kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
- if (kernel_vm_end - 1 >= kernel_map->max_offset) {
- kernel_vm_end = kernel_map->max_offset;
- break;
- }
- continue;
- }
-
- nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
- VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- if (nkpg == NULL)
- panic("pmap_growkernel: no memory to grow kernel");
-
- nkpt++;
-
- if ((nkpg->flags & PG_ZERO) == 0)
- pmap_zero_page(nkpg);
- ptppaddr = VM_PAGE_TO_PHYS(nkpg);
- newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
- rw_wlock(&pvh_global_lock);
- PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(pmap, &allpmaps, pm_list)
- PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
-
- mtx_unlock_spin(&allpmaps_lock);
- rw_wunlock(&pvh_global_lock);
-
- kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
- if (kernel_vm_end - 1 >= kernel_map->max_offset) {
- kernel_vm_end = kernel_map->max_offset;
- break;
- }
- }
-}
-
-
-/***************************************************
- * page management routines.
- ***************************************************/
-
-CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
-CTASSERT(_NPCM == 11);
-CTASSERT(_NPCPV == 336);
-
-static __inline struct pv_chunk *
-pv_to_chunk(pv_entry_t pv)
-{
-
- return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
-}
-
-#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
-
-#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */
-#define PC_FREE10 0x0000fffful /* Free values for index 10 */
-
-static const uint32_t pc_freemask[_NPCM] = {
- PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
- PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
- PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
- PC_FREE0_9, PC_FREE10
-};
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
- "Current number of pv entries");
-
-#ifdef PV_STATS
-static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
- "Current number of pv entry chunks");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
- "Current number of pv entry chunks allocated");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
- "Current number of pv entry chunks frees");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
- "Number of times tried to get a chunk page but failed.");
-
-static long pv_entry_frees, pv_entry_allocs;
-static int pv_entry_spare;
-
-SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
- "Current number of pv entry frees");
-SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
- "Current number of pv entry allocs");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
- "Current number of spare pv entries");
-#endif
-
-/*
- * We are in a serious low memory condition. Resort to
- * drastic measures to free some pages so we can allocate
- * another pv entry chunk.
- */
-static vm_page_t
-pmap_pv_reclaim(pmap_t locked_pmap)
-{
- struct pch newtail;
- struct pv_chunk *pc;
- pmap_t pmap;
- pt_entry_t *pte, tpte;
- pv_entry_t pv;
- vm_offset_t va;
- vm_page_t free, m, m_pc;
- uint32_t inuse;
- int bit, field, freed;
-
- PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
- pmap = NULL;
- free = m_pc = NULL;
- TAILQ_INIT(&newtail);
- while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
- free == NULL)) {
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
- if (pmap != pc->pc_pmap) {
- if (pmap != NULL) {
- pmap_invalidate_all(pmap);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
- }
- pmap = pc->pc_pmap;
- /* Avoid deadlock and lock recursion. */
- if (pmap > locked_pmap)
- PMAP_LOCK(pmap);
- else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
- pmap = NULL;
- TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
- continue;
- }
- }
-
- /*
- * Destroy every non-wired, 4 KB page mapping in the chunk.
- */
- freed = 0;
- for (field = 0; field < _NPCM; field++) {
- for (inuse = ~pc->pc_map[field] & pc_freemask[field];
- inuse != 0; inuse &= ~(1UL << bit)) {
- bit = bsfl(inuse);
- pv = &pc->pc_pventry[field * 32 + bit];
- va = pv->pv_va;
- pte = pmap_pte(pmap, va);
- tpte = *pte;
- if ((tpte & PG_W) == 0)
- tpte = pte_load_clear(pte);
- pmap_pte_release(pte);
- if ((tpte & PG_W) != 0)
- continue;
- KASSERT(tpte != 0,
- ("pmap_pv_reclaim: pmap %p va %x zero pte",
- pmap, va));
- if ((tpte & PG_G) != 0)
- pmap_invalidate_page(pmap, va);
- m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- if ((tpte & PG_A) != 0)
- vm_page_aflag_set(m, PGA_REFERENCED);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- pc->pc_map[field] |= 1UL << bit;
- pmap_unuse_pt(pmap, va, &free);
- freed++;
- }
- }
- if (freed == 0) {
- TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
- continue;
- }
- /* Every freed mapping is for a 4 KB page. */
- pmap->pm_stats.resident_count -= freed;
- PV_STAT(pv_entry_frees += freed);
- PV_STAT(pv_entry_spare += freed);
- pv_entry_count -= freed;
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- for (field = 0; field < _NPCM; field++)
- if (pc->pc_map[field] != pc_freemask[field]) {
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
- pc_list);
- TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
-
- /*
- * One freed pv entry in locked_pmap is
- * sufficient.
- */
- if (pmap == locked_pmap)
- goto out;
- break;
- }
- if (field == _NPCM) {
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
- /* Entire chunk is free; return it. */
- m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
- pmap_qremove((vm_offset_t)pc, 1);
- pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
- break;
- }
- }
-out:
- TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
- if (pmap != NULL) {
- pmap_invalidate_all(pmap);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
- }
- if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
- m_pc = free;
- free = (void *)m_pc->object;
- /* Recycle a freed page table page. */
- m_pc->wire_count = 1;
- atomic_add_int(&vm_cnt.v_wire_count, 1);
- }
- pmap_free_zero_pages(free);
- return (m_pc);
-}
-
-/*
- * free the pv_entry back to the free list
- */
-static void
-free_pv_entry(pmap_t pmap, pv_entry_t pv)
-{
- struct pv_chunk *pc;
- int idx, field, bit;
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- PV_STAT(pv_entry_frees++);
- PV_STAT(pv_entry_spare++);
- pv_entry_count--;
- pc = pv_to_chunk(pv);
- idx = pv - &pc->pc_pventry[0];
- field = idx / 32;
- bit = idx % 32;
- pc->pc_map[field] |= 1ul << bit;
- for (idx = 0; idx < _NPCM; idx++)
- if (pc->pc_map[idx] != pc_freemask[idx]) {
- /*
- * 98% of the time, pc is already at the head of the
- * list. If it isn't already, move it to the head.
- */
- if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
- pc)) {
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
- pc_list);
- }
- return;
- }
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- free_pv_chunk(pc);
-}
-
-static void
-free_pv_chunk(struct pv_chunk *pc)
-{
- vm_page_t m;
-
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
- /* entire chunk is free, return it */
- m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
- pmap_qremove((vm_offset_t)pc, 1);
- vm_page_unwire(m, PQ_INACTIVE);
- vm_page_free(m);
- pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
-}
-
-/*
- * get a new pv_entry, allocating a block from the system
- * when needed.
- */
-static pv_entry_t
-get_pv_entry(pmap_t pmap, boolean_t try)
-{
- static const struct timeval printinterval = { 60, 0 };
- static struct timeval lastprint;
- int bit, field;
- pv_entry_t pv;
- struct pv_chunk *pc;
- vm_page_t m;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PV_STAT(pv_entry_allocs++);
- pv_entry_count++;
- if (pv_entry_count > pv_entry_high_water)
- if (ratecheck(&lastprint, &printinterval))
- printf("Approaching the limit on PV entries, consider "
- "increasing either the vm.pmap.shpgperproc or the "
- "vm.pmap.pv_entry_max tunable.\n");
-retry:
- pc = TAILQ_FIRST(&pmap->pm_pvchunk);
- if (pc != NULL) {
- for (field = 0; field < _NPCM; field++) {
- if (pc->pc_map[field]) {
- bit = bsfl(pc->pc_map[field]);
- break;
- }
- }
- if (field < _NPCM) {
- pv = &pc->pc_pventry[field * 32 + bit];
- pc->pc_map[field] &= ~(1ul << bit);
- /* If this was the last item, move it to tail */
- for (field = 0; field < _NPCM; field++)
- if (pc->pc_map[field] != 0) {
- PV_STAT(pv_entry_spare--);
- return (pv); /* not full, return */
- }
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
- PV_STAT(pv_entry_spare--);
- return (pv);
- }
- }
- /*
- * Access to the ptelist "pv_vafree" is synchronized by the page
- * queues lock. If "pv_vafree" is currently non-empty, it will
- * remain non-empty until pmap_ptelist_alloc() completes.
- */
- if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
- VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
- if (try) {
- pv_entry_count--;
- PV_STAT(pc_chunk_tryfail++);
- return (NULL);
- }
- m = pmap_pv_reclaim(pmap);
- if (m == NULL)
- goto retry;
- }
- PV_STAT(pc_chunk_count++);
- PV_STAT(pc_chunk_allocs++);
- pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
- pmap_qenter((vm_offset_t)pc, &m, 1);
- if ((m->flags & PG_ZERO) == 0)
- pagezero(pc);
- pc->pc_pmap = pmap;
- pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
- for (field = 1; field < _NPCM; field++)
- pc->pc_map[field] = pc_freemask[field];
- TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
- pv = &pc->pc_pventry[0];
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- PV_STAT(pv_entry_spare += _NPCPV - 1);
- return (pv);
-}
-
-static __inline pv_entry_t
-pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
-{
- pv_entry_t pv;
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
- if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
- TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
- break;
- }
- }
- return (pv);
-}
-
-static void
-pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
-{
- pv_entry_t pv;
-
- pv = pmap_pvh_remove(pvh, pmap, va);
- KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
- free_pv_entry(pmap, pv);
-}
-
-static void
-pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
-{
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- pmap_pvh_free(&m->md, pmap, va);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
-}
-
-/*
- * Conditionally create a pv entry.
- */
-static boolean_t
-pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
-{
- pv_entry_t pv;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- if (pv_entry_count < pv_entry_high_water &&
- (pv = get_pv_entry(pmap, TRUE)) != NULL) {
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- return (TRUE);
- } else
- return (FALSE);
-}
-
-/*
- * pmap_remove_pte: do the things to unmap a page in a process
- */
-static int
-pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
-{
- pt_entry_t oldpte;
- vm_page_t m;
-
- CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x",
- pmap, (u_long)*ptq, va);
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- oldpte = *ptq;
- PT_SET_VA_MA(ptq, 0, TRUE);
- KASSERT(oldpte != 0,
- ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va));
- if (oldpte & PG_W)
- pmap->pm_stats.wired_count -= 1;
- /*
- * Machines that don't support invlpg, also don't support
- * PG_G.
- */
- if (oldpte & PG_G)
- pmap_invalidate_page(kernel_pmap, va);
- pmap->pm_stats.resident_count -= 1;
- if (oldpte & PG_MANAGED) {
- m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME);
- if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- if (oldpte & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
- pmap_remove_entry(pmap, m, va);
- }
- return (pmap_unuse_pt(pmap, va, free));
-}
-
-/*
- * Remove a single page from a process address space
- */
-static void
-pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
-{
- pt_entry_t *pte;
-
- CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x",
- pmap, va);
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0)
- return;
- pmap_remove_pte(pmap, pte, va, free);
- pmap_invalidate_page(pmap, va);
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
-
-}
-
-/*
- * Remove the given range of addresses from the specified map.
- *
- * It is assumed that the start and end are properly
- * rounded to the page size.
- */
-void
-pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- pt_entry_t *pte;
- vm_page_t free = NULL;
- int anyvalid;
-
- CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
- pmap, sva, eva);
-
- /*
- * Perform an unsynchronized read. This is, however, safe.
- */
- if (pmap->pm_stats.resident_count == 0)
- return;
-
- anyvalid = 0;
-
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
-
- /*
- * special handling of removing one page. a very
- * common operation and easy to short circuit some
- * code.
- */
- if ((sva + PAGE_SIZE == eva) &&
- ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
- pmap_remove_page(pmap, sva, &free);
- goto out;
- }
-
- for (; sva < eva; sva = pdnxt) {
- u_int pdirindex;
-
- /*
- * Calculate index for next page table.
- */
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
- if (pmap->pm_stats.resident_count == 0)
- break;
-
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = pmap->pm_pdir[pdirindex];
-
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
-
- /*
- * Check for large page.
- */
- if ((ptpaddr & PG_PS) != 0) {
- PD_CLEAR_VA(pmap, pdirindex, TRUE);
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- anyvalid = 1;
- continue;
- }
-
- /*
- * Limit our scan to either the end of the va represented
- * by the current page table page, or to the end of the
- * range being removed.
- */
- if (pdnxt > eva)
- pdnxt = eva;
-
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if ((*pte & PG_V) == 0)
- continue;
-
- /*
- * The TLB entry for a PG_G mapping is invalidated
- * by pmap_remove_pte().
- */
- if ((*pte & PG_G) == 0)
- anyvalid = 1;
- if (pmap_remove_pte(pmap, pte, sva, &free))
- break;
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
-out:
- if (anyvalid)
- pmap_invalidate_all(pmap);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- pmap_free_zero_pages(free);
-}
-
-/*
- * Routine: pmap_remove_all
- * Function:
- * Removes this physical page from
- * all physical maps in which it resides.
- * Reflects back modify bits to the pager.
- *
- * Notes:
- * Original versions of this routine were very
- * inefficient because they iteratively called
- * pmap_remove (slow...)
- */
-
-void
-pmap_remove_all(vm_page_t m)
-{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t *pte, tpte;
- vm_page_t free;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_remove_all: page %p is not managed", m));
- free = NULL;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pmap->pm_stats.resident_count--;
- pte = pmap_pte_quick(pmap, pv->pv_va);
- tpte = *pte;
- PT_SET_VA_MA(pte, 0, TRUE);
- KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte",
- pmap, pv->pv_va));
- if (tpte & PG_W)
- pmap->pm_stats.wired_count--;
- if (tpte & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
-
- /*
- * Update the vm_page_t clean and reference bits.
- */
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- pmap_unuse_pt(pmap, pv->pv_va, &free);
- pmap_invalidate_page(pmap, pv->pv_va);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- free_pv_entry(pmap, pv);
- PMAP_UNLOCK(pmap);
- }
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- pmap_free_zero_pages(free);
-}
-
-/*
- * Set the physical protection on the
- * specified range of this map as requested.
- */
-void
-pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
-{
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- pt_entry_t *pte;
- int anychanged;
-
- CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x",
- pmap, sva, eva, prot);
-
- if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
- pmap_remove(pmap, sva, eva);
- return;
- }
-
-#ifdef PAE
- if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
- (VM_PROT_WRITE|VM_PROT_EXECUTE))
- return;
-#else
- if (prot & VM_PROT_WRITE)
- return;
-#endif
-
- anychanged = 0;
-
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- pt_entry_t obits, pbits;
- u_int pdirindex;
-
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
-
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = pmap->pm_pdir[pdirindex];
-
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
-
- /*
- * Check for large page.
- */
- if ((ptpaddr & PG_PS) != 0) {
- if ((prot & VM_PROT_WRITE) == 0)
- pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pmap->pm_pdir[pdirindex] |= pg_nx;
-#endif
- anychanged = 1;
- continue;
- }
-
- if (pdnxt > eva)
- pdnxt = eva;
-
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- vm_page_t m;
-
-retry:
- /*
- * Regardless of whether a pte is 32 or 64 bits in
- * size, PG_RW, PG_A, and PG_M are among the least
- * significant 32 bits.
- */
- obits = pbits = *pte;
- if ((pbits & PG_V) == 0)
- continue;
-
- if ((prot & VM_PROT_WRITE) == 0) {
- if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
- (PG_MANAGED | PG_M | PG_RW)) {
- m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) &
- PG_FRAME);
- vm_page_dirty(m);
- }
- pbits &= ~(PG_RW | PG_M);
- }
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pbits |= pg_nx;
-#endif
-
- if (pbits != obits) {
- obits = *pte;
- PT_SET_VA_MA(pte, pbits, TRUE);
- if (*pte != pbits)
- goto retry;
- if (obits & PG_G)
- pmap_invalidate_page(pmap, sva);
- else
- anychanged = 1;
- }
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
- if (anychanged)
- pmap_invalidate_all(pmap);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * Insert the given physical page (p) at
- * the specified virtual address (v) in the
- * target physical map with the protection requested.
- *
- * If specified, the page will be wired down, meaning
- * that the related pte can not be reclaimed.
- *
- * NB: This is the only routine which MAY NOT lazy-evaluate
- * or lose information. That is, this routine must actually
- * insert this page into the given map NOW.
- */
-int
-pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
- u_int flags, int8_t psind __unused)
-{
- pd_entry_t *pde;
- pt_entry_t *pte;
- pt_entry_t newpte, origpte;
- pv_entry_t pv;
- vm_paddr_t opa, pa;
- vm_page_t mpte, om;
- boolean_t invlva, wired;
-
- CTR5(KTR_PMAP,
- "pmap_enter: pmap=%08p va=0x%08x ma=0x%08x prot=0x%x flags=0x%x",
- pmap, va, VM_PAGE_TO_MACH(m), prot, flags);
- va = trunc_page(va);
- KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
- KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
- ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
- va));
- if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
- VM_OBJECT_ASSERT_LOCKED(m->object);
-
- mpte = NULL;
- wired = (flags & PMAP_ENTER_WIRED) != 0;
-
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- sched_pin();
-
- /*
- * In the case that a page table page is not
- * resident, we are creating it here.
- */
- if (va < VM_MAXUSER_ADDRESS) {
- mpte = pmap_allocpte(pmap, va, flags);
- if (mpte == NULL) {
- KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
- ("pmap_allocpte failed with sleep allowed"));
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- return (KERN_RESOURCE_SHORTAGE);
- }
- }
-
- pde = pmap_pde(pmap, va);
- if ((*pde & PG_PS) != 0)
- panic("pmap_enter: attempted pmap_enter on 4MB page");
- pte = pmap_pte_quick(pmap, va);
-
- /*
- * Page Directory table entry not valid, we need a new PT page
- */
- if (pte == NULL) {
- panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
- (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va);
- }
-
- pa = VM_PAGE_TO_PHYS(m);
- om = NULL;
- opa = origpte = 0;
-
-#if 0
- KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx",
- pte, *pte));
-#endif
- origpte = *pte;
- if (origpte)
- origpte = xpmap_mtop(origpte);
- opa = origpte & PG_FRAME;
-
- /*
- * Mapping has not changed, must be protection or wiring change.
- */
- if (origpte && (opa == pa)) {
- /*
- * Wiring change, just update stats. We don't worry about
- * wiring PT pages as they remain resident as long as there
- * are valid mappings in them. Hence, if a user page is wired,
- * the PT page will be also.
- */
- if (wired && ((origpte & PG_W) == 0))
- pmap->pm_stats.wired_count++;
- else if (!wired && (origpte & PG_W))
- pmap->pm_stats.wired_count--;
-
- /*
- * Remove extra pte reference
- */
- if (mpte)
- mpte->wire_count--;
-
- if (origpte & PG_MANAGED) {
- om = m;
- pa |= PG_MANAGED;
- }
- goto validate;
- }
-
- pv = NULL;
-
- /*
- * Mapping has changed, invalidate old range and fall through to
- * handle validating new mapping.
- */
- if (opa) {
- if (origpte & PG_W)
- pmap->pm_stats.wired_count--;
- if (origpte & PG_MANAGED) {
- om = PHYS_TO_VM_PAGE(opa);
- pv = pmap_pvh_remove(&om->md, pmap, va);
- } else if (va < VM_MAXUSER_ADDRESS)
- printf("va=0x%x is unmanaged :-( \n", va);
-
- if (mpte != NULL) {
- mpte->wire_count--;
- KASSERT(mpte->wire_count > 0,
- ("pmap_enter: missing reference to page table page,"
- " va: 0x%x", va));
- }
- } else
- pmap->pm_stats.resident_count++;
-
- /*
- * Enter on the PV list if part of our managed memory.
- */
- if ((m->oflags & VPO_UNMANAGED) == 0) {
- KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
- ("pmap_enter: managed mapping within the clean submap"));
- if (pv == NULL)
- pv = get_pv_entry(pmap, FALSE);
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- pa |= PG_MANAGED;
- } else if (pv != NULL)
- free_pv_entry(pmap, pv);
-
- /*
- * Increment counters
- */
- if (wired)
- pmap->pm_stats.wired_count++;
-
-validate:
- /*
- * Now validate mapping with desired protection/wiring.
- */
- newpte = (pt_entry_t)(pa | PG_V);
- if ((prot & VM_PROT_WRITE) != 0) {
- newpte |= PG_RW;
- if ((newpte & PG_MANAGED) != 0)
- vm_page_aflag_set(m, PGA_WRITEABLE);
- }
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- newpte |= pg_nx;
-#endif
- if (wired)
- newpte |= PG_W;
- if (va < VM_MAXUSER_ADDRESS)
- newpte |= PG_U;
- if (pmap == kernel_pmap)
- newpte |= pgeflag;
-
- critical_enter();
- /*
- * if the mapping or permission bits are different, we need
- * to update the pte.
- */
- if ((origpte & ~(PG_M|PG_A)) != newpte) {
- if (origpte) {
- invlva = FALSE;
- origpte = *pte;
- PT_SET_VA(pte, newpte | PG_A, FALSE);
- if (origpte & PG_A) {
- if (origpte & PG_MANAGED)
- vm_page_aflag_set(om, PGA_REFERENCED);
- if (opa != VM_PAGE_TO_PHYS(m))
- invlva = TRUE;
-#ifdef PAE
- if ((origpte & PG_NX) == 0 &&
- (newpte & PG_NX) != 0)
- invlva = TRUE;
-#endif
- }
- if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- if ((origpte & PG_MANAGED) != 0)
- vm_page_dirty(om);
- if ((prot & VM_PROT_WRITE) == 0)
- invlva = TRUE;
- }
- if ((origpte & PG_MANAGED) != 0 &&
- TAILQ_EMPTY(&om->md.pv_list))
- vm_page_aflag_clear(om, PGA_WRITEABLE);
- if (invlva)
- pmap_invalidate_page(pmap, va);
- } else{
- PT_SET_VA(pte, newpte | PG_A, FALSE);
- }
-
- }
- PT_UPDATES_FLUSH();
- critical_exit();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- return (KERN_SUCCESS);
-}
-
-/*
- * Maps a sequence of resident pages belonging to the same object.
- * The sequence begins with the given page m_start. This page is
- * mapped at the given virtual address start. Each subsequent page is
- * mapped at a virtual address that is offset from start by the same
- * amount as the page is offset from m_start within the object. The
- * last page in the sequence is the page with the largest offset from
- * m_start that can be mapped at a virtual address less than the given
- * virtual address end. Not every virtual page between start and end
- * is mapped; only those for which a resident page exists with the
- * corresponding offset from m_start are mapped.
- */
-void
-pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
- vm_page_t m_start, vm_prot_t prot)
-{
- vm_page_t m, mpte;
- vm_pindex_t diff, psize;
- multicall_entry_t mcl[16];
- multicall_entry_t *mclp = mcl;
- int error, count = 0;
-
- VM_OBJECT_ASSERT_LOCKED(m_start->object);
-
- psize = atop(end - start);
- mpte = NULL;
- m = m_start;
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
- mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m,
- prot, mpte);
- m = TAILQ_NEXT(m, listq);
- if (count == 16) {
- error = HYPERVISOR_multicall(mcl, count);
- KASSERT(error == 0, ("bad multicall %d", error));
- mclp = mcl;
- count = 0;
- }
- }
- if (count) {
- error = HYPERVISOR_multicall(mcl, count);
- KASSERT(error == 0, ("bad multicall %d", error));
- }
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * this code makes some *MAJOR* assumptions:
- * 1. Current pmap & pmap exists.
- * 2. Not wired.
- * 3. Read access.
- * 4. No page table pages.
- * but is *MUCH* faster than pmap_enter...
- */
-
-void
-pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
-{
- multicall_entry_t mcl, *mclp;
- int count = 0;
- mclp = &mcl;
-
- CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
- pmap, va, m, prot);
-
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL);
- if (count)
- HYPERVISOR_multicall(&mcl, count);
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-#ifdef notyet
-void
-pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count)
-{
- int i, error, index = 0;
- multicall_entry_t mcl[16];
- multicall_entry_t *mclp = mcl;
-
- PMAP_LOCK(pmap);
- for (i = 0; i < count; i++, addrs++, pages++, prots++) {
- if (!pmap_is_prefaultable_locked(pmap, *addrs))
- continue;
-
- (void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL);
- if (index == 16) {
- error = HYPERVISOR_multicall(mcl, index);
- mclp = mcl;
- index = 0;
- KASSERT(error == 0, ("bad multicall %d", error));
- }
- }
- if (index) {
- error = HYPERVISOR_multicall(mcl, index);
- KASSERT(error == 0, ("bad multicall %d", error));
- }
-
- PMAP_UNLOCK(pmap);
-}
-#endif
-
-static vm_page_t
-pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, vm_page_t mpte)
-{
- pt_entry_t *pte;
- vm_paddr_t pa;
- vm_page_t free;
- multicall_entry_t *mcl = *mclpp;
-
- KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
- (m->oflags & VPO_UNMANAGED) != 0,
- ("pmap_enter_quick_locked: managed mapping within the clean submap"));
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
- /*
- * In the case that a page table page is not
- * resident, we are creating it here.
- */
- if (va < VM_MAXUSER_ADDRESS) {
- u_int ptepindex;
- pd_entry_t ptema;
-
- /*
- * Calculate pagetable page index
- */
- ptepindex = va >> PDRSHIFT;
- if (mpte && (mpte->pindex == ptepindex)) {
- mpte->wire_count++;
- } else {
- /*
- * Get the page directory entry
- */
- ptema = pmap->pm_pdir[ptepindex];
-
- /*
- * If the page table page is mapped, we just increment
- * the hold count, and activate it.
- */
- if (ptema & PG_V) {
- if (ptema & PG_PS)
- panic("pmap_enter_quick: unexpected mapping into 4MB page");
- mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
- mpte->wire_count++;
- } else {
- mpte = _pmap_allocpte(pmap, ptepindex,
- PMAP_ENTER_NOSLEEP);
- if (mpte == NULL)
- return (mpte);
- }
- }
- } else {
- mpte = NULL;
- }
-
- /*
- * This call to vtopte makes the assumption that we are
- * entering the page into the current pmap. In order to support
- * quick entry into any pmap, one would likely use pmap_pte_quick.
- * But that isn't as quick as vtopte.
- */
- KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap"));
- pte = vtopte(va);
- if (*pte & PG_V) {
- if (mpte != NULL) {
- mpte->wire_count--;
- mpte = NULL;
- }
- return (mpte);
- }
-
- /*
- * Enter on the PV list if part of our managed memory.
- */
- if ((m->oflags & VPO_UNMANAGED) == 0 &&
- !pmap_try_insert_pv_entry(pmap, va, m)) {
- if (mpte != NULL) {
- free = NULL;
- if (pmap_unwire_ptp(pmap, mpte, &free)) {
- pmap_invalidate_page(pmap, va);
- pmap_free_zero_pages(free);
- }
-
- mpte = NULL;
- }
- return (mpte);
- }
-
- /*
- * Increment counters
- */
- pmap->pm_stats.resident_count++;
-
- pa = VM_PAGE_TO_PHYS(m);
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pa |= pg_nx;
-#endif
-
-#if 0
- /*
- * Now validate mapping with RO protection
- */
- if ((m->oflags & VPO_UNMANAGED) != 0)
- pte_store(pte, pa | PG_V | PG_U);
- else
- pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
-#else
- /*
- * Now validate mapping with RO protection
- */
- if ((m->oflags & VPO_UNMANAGED) != 0)
- pa = xpmap_ptom(pa | PG_V | PG_U);
- else
- pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED);
-
- mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = va;
- mcl->args[1] = (uint32_t)(pa & 0xffffffff);
- mcl->args[2] = (uint32_t)(pa >> 32);
- mcl->args[3] = 0;
- *mclpp = mcl + 1;
- *count = *count + 1;
-#endif
- return (mpte);
-}
-
-/*
- * Make a temporary mapping for a physical address. This is only intended
- * to be used for panic dumps.
- */
-void *
-pmap_kenter_temporary(vm_paddr_t pa, int i)
-{
- vm_offset_t va;
- vm_paddr_t ma = xpmap_ptom(pa);
-
- va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
- PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag);
- invlpg(va);
- return ((void *)crashdumpmap);
-}
-
-/*
- * This code maps large physical mmap regions into the
- * processor address space. Note that some shortcuts
- * are taken, but the code works.
- */
-void
-pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
- vm_pindex_t pindex, vm_size_t size)
-{
- pd_entry_t *pde;
- vm_paddr_t pa, ptepa;
- vm_page_t p;
- int pat_mode;
-
- VM_OBJECT_ASSERT_WLOCKED(object);
- KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
- ("pmap_object_init_pt: non-device object"));
- if (pseflag &&
- (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
- if (!vm_object_populate(object, pindex, pindex + atop(size)))
- return;
- p = vm_page_lookup(object, pindex);
- KASSERT(p->valid == VM_PAGE_BITS_ALL,
- ("pmap_object_init_pt: invalid page %p", p));
- pat_mode = p->md.pat_mode;
-
- /*
- * Abort the mapping if the first page is not physically
- * aligned to a 2/4MB page boundary.
- */
- ptepa = VM_PAGE_TO_PHYS(p);
- if (ptepa & (NBPDR - 1))
- return;
-
- /*
- * Skip the first page. Abort the mapping if the rest of
- * the pages are not physically contiguous or have differing
- * memory attributes.
- */
- p = TAILQ_NEXT(p, listq);
- for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
- pa += PAGE_SIZE) {
- KASSERT(p->valid == VM_PAGE_BITS_ALL,
- ("pmap_object_init_pt: invalid page %p", p));
- if (pa != VM_PAGE_TO_PHYS(p) ||
- pat_mode != p->md.pat_mode)
- return;
- p = TAILQ_NEXT(p, listq);
- }
-
- /*
- * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and
- * "size" is a multiple of 2/4M, adding the PAT setting to
- * "pa" will not affect the termination of this loop.
- */
- PMAP_LOCK(pmap);
- for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
- size; pa += NBPDR) {
- pde = pmap_pde(pmap, addr);
- if (*pde == 0) {
- pde_store(pde, pa | PG_PS | PG_M | PG_A |
- PG_U | PG_RW | PG_V);
- pmap->pm_stats.resident_count += NBPDR /
- PAGE_SIZE;
- pmap_pde_mappings++;
- }
- /* Else continue on if the PDE is already valid. */
- addr += NBPDR;
- }
- PMAP_UNLOCK(pmap);
- }
-}
-
-/*
- * Clear the wired attribute from the mappings for the specified range of
- * addresses in the given pmap. Every valid mapping within that range
- * must have the wired attribute set. In contrast, invalid mappings
- * cannot have the wired attribute set, so they are ignored.
- *
- * The wired attribute of the page table entry is not a hardware feature,
- * so there is no need to invalidate any TLB entries.
- */
-void
-pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t pdnxt;
- pd_entry_t *pde;
- pt_entry_t *pte;
-
- CTR3(KTR_PMAP, "pmap_unwire: pmap=%p sva=0x%x eva=0x%x", pmap, sva,
- eva);
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
- pde = pmap_pde(pmap, sva);
- if ((*pde & PG_V) == 0)
- continue;
- if ((*pde & PG_PS) != 0)
- panic("pmap_unwire: unexpected PG_PS in pde %#jx",
- (uintmax_t)*pde);
- if (pdnxt > eva)
- pdnxt = eva;
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if ((*pte & PG_V) == 0)
- continue;
- if ((*pte & PG_W) == 0)
- panic("pmap_unwire: pte %#jx is missing PG_W",
- (uintmax_t)*pte);
- PT_SET_VA_MA(pte, *pte & ~PG_W, FALSE);
- pmap->pm_stats.wired_count--;
- }
- }
- if (*PMAP1)
- PT_CLEAR_VA(PMAP1, FALSE);
- PT_UPDATES_FLUSH();
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-
-/*
- * Copy the range specified by src_addr/len
- * from the source map to the range dst_addr/len
- * in the destination map.
- *
- * This routine is only advisory and need not do anything.
- */
-
-void
-pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
- vm_offset_t src_addr)
-{
- vm_page_t free;
- vm_offset_t addr;
- vm_offset_t end_addr = src_addr + len;
- vm_offset_t pdnxt;
-
- if (dst_addr != src_addr)
- return;
-
- if (!pmap_is_current(src_pmap)) {
- CTR2(KTR_PMAP,
- "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx",
- (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME));
-
- return;
- }
- CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x",
- dst_pmap, src_pmap, dst_addr, len, src_addr);
-
-#ifdef HAMFISTED_LOCKING
- mtx_lock(&createdelete_lock);
-#endif
-
- rw_wlock(&pvh_global_lock);
- if (dst_pmap < src_pmap) {
- PMAP_LOCK(dst_pmap);
- PMAP_LOCK(src_pmap);
- } else {
- PMAP_LOCK(src_pmap);
- PMAP_LOCK(dst_pmap);
- }
- sched_pin();
- for (addr = src_addr; addr < end_addr; addr = pdnxt) {
- pt_entry_t *src_pte, *dst_pte;
- vm_page_t dstmpte, srcmpte;
- pd_entry_t srcptepaddr;
- u_int ptepindex;
-
- KASSERT(addr < UPT_MIN_ADDRESS,
- ("pmap_copy: invalid to pmap_copy page tables"));
-
- pdnxt = (addr + NBPDR) & ~PDRMASK;
- if (pdnxt < addr)
- pdnxt = end_addr;
- ptepindex = addr >> PDRSHIFT;
-
- srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
- if (srcptepaddr == 0)
- continue;
-
- if (srcptepaddr & PG_PS) {
- if (dst_pmap->pm_pdir[ptepindex] == 0) {
- PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE);
- dst_pmap->pm_stats.resident_count +=
- NBPDR / PAGE_SIZE;
- }
- continue;
- }
-
- srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
- KASSERT(srcmpte->wire_count > 0,
- ("pmap_copy: source page table page is unused"));
-
- if (pdnxt > end_addr)
- pdnxt = end_addr;
-
- src_pte = vtopte(addr);
- while (addr < pdnxt) {
- pt_entry_t ptetemp;
- ptetemp = *src_pte;
- /*
- * we only virtual copy managed pages
- */
- if ((ptetemp & PG_MANAGED) != 0) {
- dstmpte = pmap_allocpte(dst_pmap, addr,
- PMAP_ENTER_NOSLEEP);
- if (dstmpte == NULL)
- goto out;
- dst_pte = pmap_pte_quick(dst_pmap, addr);
- if (*dst_pte == 0 &&
- pmap_try_insert_pv_entry(dst_pmap, addr,
- PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) {
- /*
- * Clear the wired, modified, and
- * accessed (referenced) bits
- * during the copy.
- */
- KASSERT(ptetemp != 0, ("src_pte not set"));
- PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */);
- KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)),
- ("no pmap copy expected: 0x%jx saw: 0x%jx",
- ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte));
- dst_pmap->pm_stats.resident_count++;
- } else {
- free = NULL;
- if (pmap_unwire_ptp(dst_pmap, dstmpte,
- &free)) {
- pmap_invalidate_page(dst_pmap,
- addr);
- pmap_free_zero_pages(free);
- }
- goto out;
- }
- if (dstmpte->wire_count >= srcmpte->wire_count)
- break;
- }
- addr += PAGE_SIZE;
- src_pte++;
- }
- }
-out:
- PT_UPDATES_FLUSH();
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(src_pmap);
- PMAP_UNLOCK(dst_pmap);
-
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
-}
-
-static __inline void
-pagezero(void *page)
-{
-#if defined(I686_CPU)
- if (cpu_class == CPUCLASS_686) {
-#if defined(CPU_ENABLE_SSE)
- if (cpu_feature & CPUID_SSE2)
- sse2_pagezero(page);
- else
-#endif
- i686_pagezero(page);
- } else
-#endif
- bzero(page, PAGE_SIZE);
-}
-
-/*
- * pmap_zero_page zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents.
- */
-void
-pmap_zero_page(vm_page_t m)
-{
- struct sysmaps *sysmaps;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP2)
- panic("pmap_zero_page: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
- pagezero(sysmaps->CADDR2);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * pmap_zero_page_area zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents.
- *
- * off and size may not cover an area beyond a single hardware page.
- */
-void
-pmap_zero_page_area(vm_page_t m, int off, int size)
-{
- struct sysmaps *sysmaps;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP2)
- panic("pmap_zero_page_area: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
-
- if (off == 0 && size == PAGE_SIZE)
- pagezero(sysmaps->CADDR2);
- else
- bzero((char *)sysmaps->CADDR2 + off, size);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * pmap_zero_page_idle zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents. This
- * is intended to be called from the vm_pagezero process only and
- * outside of Giant.
- */
-void
-pmap_zero_page_idle(vm_page_t m)
-{
-
- if (*CMAP3)
- panic("pmap_zero_page_idle: CMAP3 busy");
- sched_pin();
- PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
- pagezero(CADDR3);
- PT_SET_MA(CADDR3, 0);
- sched_unpin();
-}
-
-/*
- * pmap_copy_page copies the specified (machine independent)
- * page by mapping the page into virtual memory and using
- * bcopy to copy the page, one machine dependent page at a
- * time.
- */
-void
-pmap_copy_page(vm_page_t src, vm_page_t dst)
-{
- struct sysmaps *sysmaps;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP1)
- panic("pmap_copy_page: CMAP1 busy");
- if (*sysmaps->CMAP2)
- panic("pmap_copy_page: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A);
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M);
- bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
- PT_SET_MA(sysmaps->CADDR1, 0);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-int unmapped_buf_allowed = 1;
-
-void
-pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
- vm_offset_t b_offset, int xfersize)
-{
- struct sysmaps *sysmaps;
- vm_page_t a_pg, b_pg;
- char *a_cp, *b_cp;
- vm_offset_t a_pg_offset, b_pg_offset;
- int cnt;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP1 != 0)
- panic("pmap_copy_pages: CMAP1 busy");
- if (*sysmaps->CMAP2 != 0)
- panic("pmap_copy_pages: CMAP2 busy");
- sched_pin();
- while (xfersize > 0) {
- a_pg = ma[a_offset >> PAGE_SHIFT];
- a_pg_offset = a_offset & PAGE_MASK;
- cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
- b_pg = mb[b_offset >> PAGE_SHIFT];
- b_pg_offset = b_offset & PAGE_MASK;
- cnt = min(cnt, PAGE_SIZE - b_pg_offset);
- PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(a_pg) | PG_A);
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
- VM_PAGE_TO_MACH(b_pg) | PG_A | PG_M);
- a_cp = sysmaps->CADDR1 + a_pg_offset;
- b_cp = sysmaps->CADDR2 + b_pg_offset;
- bcopy(a_cp, b_cp, cnt);
- a_offset += cnt;
- b_offset += cnt;
- xfersize -= cnt;
- }
- PT_SET_MA(sysmaps->CADDR1, 0);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * Returns true if the pmap's pv is one of the first
- * 16 pvs linked to from this page. This count may
- * be changed upwards or downwards in the future; it
- * is only necessary that true be returned for a small
- * subset of pmaps for proper page aging.
- */
-boolean_t
-pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
-{
- pv_entry_t pv;
- int loops = 0;
- boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_page_exists_quick: page %p is not managed", m));
- rv = FALSE;
- rw_wlock(&pvh_global_lock);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- if (PV_PMAP(pv) == pmap) {
- rv = TRUE;
- break;
- }
- loops++;
- if (loops >= 16)
- break;
- }
- rw_wunlock(&pvh_global_lock);
- return (rv);
-}
-
-/*
- * pmap_page_wired_mappings:
- *
- * Return the number of managed mappings to the given physical page
- * that are wired.
- */
-int
-pmap_page_wired_mappings(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- int count;
-
- count = 0;
- if ((m->oflags & VPO_UNMANAGED) != 0)
- return (count);
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & PG_W) != 0)
- count++;
- PMAP_UNLOCK(pmap);
- }
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (count);
-}
-
-/*
- * Returns TRUE if the given page is mapped. Otherwise, returns FALSE.
- */
-boolean_t
-pmap_page_is_mapped(vm_page_t m)
-{
-
- if ((m->oflags & VPO_UNMANAGED) != 0)
- return (FALSE);
- return (!TAILQ_EMPTY(&m->md.pv_list));
-}
-
-/*
- * Remove all pages from specified address space
- * this aids process exit speeds. Also, this code
- * is special cased for current process only, but
- * can have the more generic (and slightly slower)
- * mode enabled. This is much faster than pmap_remove
- * in the case of running down an entire address space.
- */
-void
-pmap_remove_pages(pmap_t pmap)
-{
- pt_entry_t *pte, tpte;
- vm_page_t m, free = NULL;
- pv_entry_t pv;
- struct pv_chunk *pc, *npc;
- int field, idx;
- int32_t bit;
- uint32_t inuse, bitmask;
- int allfree;
-
- CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap);
-
- if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
- printf("warning: pmap_remove_pages called with non-current pmap\n");
- return;
- }
- rw_wlock(&pvh_global_lock);
- KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap"));
- PMAP_LOCK(pmap);
- sched_pin();
- TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
- KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap,
- pc->pc_pmap));
- allfree = 1;
- for (field = 0; field < _NPCM; field++) {
- inuse = ~pc->pc_map[field] & pc_freemask[field];
- while (inuse != 0) {
- bit = bsfl(inuse);
- bitmask = 1UL << bit;
- idx = field * 32 + bit;
- pv = &pc->pc_pventry[idx];
- inuse &= ~bitmask;
-
- pte = vtopte(pv->pv_va);
- tpte = *pte ? xpmap_mtop(*pte) : 0;
-
- if (tpte == 0) {
- printf(
- "TPTE at %p IS ZERO @ VA %08x\n",
- pte, pv->pv_va);
- panic("bad pte");
- }
-
-/*
- * We cannot remove wired pages from a process' mapping at this time
- */
- if (tpte & PG_W) {
- allfree = 0;
- continue;
- }
-
- m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
- KASSERT(m->phys_addr == (tpte & PG_FRAME),
- ("vm_page_t %p phys_addr mismatch %016jx %016jx",
- m, (uintmax_t)m->phys_addr,
- (uintmax_t)tpte));
-
- KASSERT(m < &vm_page_array[vm_page_array_size],
- ("pmap_remove_pages: bad tpte %#jx",
- (uintmax_t)tpte));
-
-
- PT_CLEAR_VA(pte, FALSE);
-
- /*
- * Update the vm_page_t clean/reference bits.
- */
- if (tpte & PG_M)
- vm_page_dirty(m);
-
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
-
- pmap_unuse_pt(pmap, pv->pv_va, &free);
-
- /* Mark free */
- PV_STAT(pv_entry_frees++);
- PV_STAT(pv_entry_spare++);
- pv_entry_count--;
- pc->pc_map[field] |= bitmask;
- pmap->pm_stats.resident_count--;
- }
- }
- PT_UPDATES_FLUSH();
- if (allfree) {
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- free_pv_chunk(pc);
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
-
- sched_unpin();
- pmap_invalidate_all(pmap);
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- pmap_free_zero_pages(free);
-}
-
-/*
- * pmap_is_modified:
- *
- * Return whether or not the specified physical page was modified
- * in any physical maps.
- */
-boolean_t
-pmap_is_modified(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_is_modified: page %p is not managed", m));
- rv = FALSE;
-
- /*
- * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
- * concurrently set while the object is locked. Thus, if PGA_WRITEABLE
- * is clear, no PTEs can have PG_M set.
- */
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
- return (rv);
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- rv = (*pte & PG_M) != 0;
- PMAP_UNLOCK(pmap);
- if (rv)
- break;
- }
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (rv);
-}
-
-/*
- * pmap_is_prefaultable:
- *
- * Return whether or not the specified virtual address is elgible
- * for prefault.
- */
-static boolean_t
-pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr)
-{
- pt_entry_t *pte;
- boolean_t rv = FALSE;
-
- return (rv);
-
- if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) {
- pte = vtopte(addr);
- rv = (*pte == 0);
- }
- return (rv);
-}
-
-boolean_t
-pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
-{
- boolean_t rv;
-
- PMAP_LOCK(pmap);
- rv = pmap_is_prefaultable_locked(pmap, addr);
- PMAP_UNLOCK(pmap);
- return (rv);
-}
-
-boolean_t
-pmap_is_referenced(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_is_referenced: page %p is not managed", m));
- rv = FALSE;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
- PMAP_UNLOCK(pmap);
- if (rv)
- break;
- }
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (rv);
-}
-
-void
-pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
-{
- int i, npages = round_page(len) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- pt_entry_t *pte;
- pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
- rw_wlock(&pvh_global_lock);
- pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
- rw_wunlock(&pvh_global_lock);
- PMAP_MARK_PRIV(xpmap_mtop(*pte));
- pmap_pte_release(pte);
- }
-}
-
-void
-pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
-{
- int i, npages = round_page(len) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- pt_entry_t *pte;
- pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
- PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
- rw_wlock(&pvh_global_lock);
- pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
- rw_wunlock(&pvh_global_lock);
- pmap_pte_release(pte);
- }
-}
-
-/*
- * Clear the write and modified bits in each of the given page's mappings.
- */
-void
-pmap_remove_write(vm_page_t m)
-{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t oldpte, *pte;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_remove_write: page %p is not managed", m));
-
- /*
- * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
- * set by another thread while the object is locked. Thus,
- * if PGA_WRITEABLE is clear, no page table entries need updating.
- */
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
- return;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
-retry:
- oldpte = *pte;
- if ((oldpte & PG_RW) != 0) {
- vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M);
-
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_RW and PG_M are among the least
- * significant 32 bits.
- */
- PT_SET_VA_MA(pte, newpte, TRUE);
- if (*pte != newpte)
- goto retry;
-
- if ((oldpte & PG_M) != 0)
- vm_page_dirty(m);
- pmap_invalidate_page(pmap, pv->pv_va);
- }
- PMAP_UNLOCK(pmap);
- }
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
-}
-
-/*
- * pmap_ts_referenced:
- *
- * Return a count of reference bits for a page, clearing those bits.
- * It is not necessary for every reference bit to be cleared, but it
- * is necessary that 0 only be returned when there are truly no
- * reference bits set.
- *
- * XXX: The exact number of bits to check and clear is a matter that
- * should be tested and standardized at some point in the future for
- * optimal aging of shared pages.
- */
-int
-pmap_ts_referenced(vm_page_t m)
-{
- pv_entry_t pv, pvf, pvn;
- pmap_t pmap;
- pt_entry_t *pte;
- int rtval = 0;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_ts_referenced: page %p is not managed", m));
- rw_wlock(&pvh_global_lock);
- sched_pin();
- if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- pvf = pv;
- do {
- pvn = TAILQ_NEXT(pv, pv_next);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & PG_A) != 0) {
- PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
- pmap_invalidate_page(pmap, pv->pv_va);
- rtval++;
- if (rtval > 4)
- pvn = NULL;
- }
- PMAP_UNLOCK(pmap);
- } while ((pv = pvn) != NULL && pv != pvf);
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (rtval);
-}
-
-/*
- * Apply the given advice to the specified range of addresses within the
- * given pmap. Depending on the advice, clear the referenced and/or
- * modified flags in each mapping and set the mapped page's dirty field.
- */
-void
-pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
-{
- pd_entry_t oldpde;
- pt_entry_t *pte;
- vm_offset_t pdnxt;
- vm_page_t m;
- boolean_t anychanged;
-
- if (advice != MADV_DONTNEED && advice != MADV_FREE)
- return;
- anychanged = FALSE;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
- oldpde = pmap->pm_pdir[sva >> PDRSHIFT];
- if ((oldpde & (PG_PS | PG_V)) != PG_V)
- continue;
- if (pdnxt > eva)
- pdnxt = eva;
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
- PG_V))
- continue;
- else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- if (advice == MADV_DONTNEED) {
- /*
- * Future calls to pmap_is_modified()
- * can be avoided by making the page
- * dirty now.
- */
- m = PHYS_TO_VM_PAGE(xpmap_mtop(*pte) &
- PG_FRAME);
- vm_page_dirty(m);
- }
- PT_SET_VA_MA(pte, *pte & ~(PG_M | PG_A), TRUE);
- } else if ((*pte & PG_A) != 0)
- PT_SET_VA_MA(pte, *pte & ~PG_A, TRUE);
- else
- continue;
- if ((*pte & PG_G) != 0)
- pmap_invalidate_page(pmap, sva);
- else
- anychanged = TRUE;
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
- if (anychanged)
- pmap_invalidate_all(pmap);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * Clear the modify bits on the specified physical page.
- */
-void
-pmap_clear_modify(vm_page_t m)
-{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t *pte;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_clear_modify: page %p is not managed", m));
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT(!vm_page_xbusied(m),
- ("pmap_clear_modify: page %p is exclusive busied", m));
-
- /*
- * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
- * If the object containing the page is locked and the page is not
- * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
- */
- if ((m->aflags & PGA_WRITEABLE) == 0)
- return;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_M is among the least significant
- * 32 bits.
- */
- PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE);
- pmap_invalidate_page(pmap, pv->pv_va);
- }
- PMAP_UNLOCK(pmap);
- }
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
-}
-
-/*
- * Miscellaneous support routines follow
- */
-
-/*
- * Map a set of physical memory pages into the kernel virtual
- * address space. Return a pointer to where it is mapped. This
- * routine is intended to be used for mapping device memory,
- * NOT real memory.
- */
-void *
-pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
-{
- vm_offset_t va, offset;
- vm_size_t tmpsize;
-
- offset = pa & PAGE_MASK;
- size = round_page(offset + size);
- pa = pa & PG_FRAME;
-
- if (pa < KERNLOAD && pa + size <= KERNLOAD)
- va = KERNBASE + pa;
- else
- va = kva_alloc(size);
- if (!va)
- panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
-
- for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
- pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
- pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
- pmap_invalidate_cache_range(va, va + size, FALSE);
- return ((void *)(va + offset));
-}
-
-void *
-pmap_mapdev(vm_paddr_t pa, vm_size_t size)
-{
-
- return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
-}
-
-void *
-pmap_mapbios(vm_paddr_t pa, vm_size_t size)
-{
-
- return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
-}
-
-void
-pmap_unmapdev(vm_offset_t va, vm_size_t size)
-{
- vm_offset_t base, offset;
-
- if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
- return;
- base = trunc_page(va);
- offset = va & PAGE_MASK;
- size = round_page(offset + size);
- kva_free(base, size);
-}
-
-/*
- * Sets the memory attribute for the specified page.
- */
-void
-pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
-{
-
- m->md.pat_mode = ma;
- if ((m->flags & PG_FICTITIOUS) != 0)
- return;
-
- /*
- * If "m" is a normal page, flush it from the cache.
- * See pmap_invalidate_cache_range().
- *
- * First, try to find an existing mapping of the page by sf
- * buffer. sf_buf_invalidate_cache() modifies mapping and
- * flushes the cache.
- */
- if (sf_buf_invalidate_cache(m))
- return;
-
- /*
- * If page is not mapped by sf buffer, but CPU does not
- * support self snoop, map the page transient and do
- * invalidation. In the worst case, whole cache is flushed by
- * pmap_invalidate_cache_range().
- */
- if ((cpu_feature & CPUID_SS) == 0)
- pmap_flush_page(m);
-}
-
-static void
-pmap_flush_page(vm_page_t m)
-{
- struct sysmaps *sysmaps;
- vm_offset_t sva, eva;
-
- if ((cpu_feature & CPUID_CLFSH) != 0) {
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP2)
- panic("pmap_flush_page: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
- VM_PAGE_TO_MACH(m) | PG_A | PG_M |
- pmap_cache_bits(m->md.pat_mode, 0));
- invlcaddr(sysmaps->CADDR2);
- sva = (vm_offset_t)sysmaps->CADDR2;
- eva = sva + PAGE_SIZE;
-
- /*
- * Use mfence despite the ordering implied by
- * mtx_{un,}lock() because clflush is not guaranteed
- * to be ordered by any other instruction.
- */
- mfence();
- for (; sva < eva; sva += cpu_clflush_line_size)
- clflush(sva);
- mfence();
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
- } else
- pmap_invalidate_cache();
-}
-
-/*
- * Changes the specified virtual address range's memory type to that given by
- * the parameter "mode". The specified virtual address range must be
- * completely contained within either the kernel map.
- *
- * Returns zero if the change completed successfully, and either EINVAL or
- * ENOMEM if the change failed. Specifically, EINVAL is returned if some part
- * of the virtual address range was not mapped, and ENOMEM is returned if
- * there was insufficient memory available to complete the change.
- */
-int
-pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
-{
- vm_offset_t base, offset, tmpva;
- pt_entry_t *pte;
- u_int opte, npte;
- pd_entry_t *pde;
- boolean_t changed;
-
- base = trunc_page(va);
- offset = va & PAGE_MASK;
- size = round_page(offset + size);
-
- /* Only supported on kernel virtual addresses. */
- if (base <= VM_MAXUSER_ADDRESS)
- return (EINVAL);
-
- /* 4MB pages and pages that aren't mapped aren't supported. */
- for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
- pde = pmap_pde(kernel_pmap, tmpva);
- if (*pde & PG_PS)
- return (EINVAL);
- if ((*pde & PG_V) == 0)
- return (EINVAL);
- pte = vtopte(va);
- if ((*pte & PG_V) == 0)
- return (EINVAL);
- }
-
- changed = FALSE;
-
- /*
- * Ok, all the pages exist and are 4k, so run through them updating
- * their cache mode.
- */
- for (tmpva = base; size > 0; ) {
- pte = vtopte(tmpva);
-
- /*
- * The cache mode bits are all in the low 32-bits of the
- * PTE, so we can just spin on updating the low 32-bits.
- */
- do {
- opte = *(u_int *)pte;
- npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
- npte |= pmap_cache_bits(mode, 0);
- PT_SET_VA_MA(pte, npte, TRUE);
- } while (npte != opte && (*pte != npte));
- if (npte != opte)
- changed = TRUE;
- tmpva += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
-
- /*
- * Flush CPU caches to make sure any data isn't cached that
- * shouldn't be, etc.
- */
- if (changed) {
- pmap_invalidate_range(kernel_pmap, base, tmpva);
- pmap_invalidate_cache_range(base, tmpva, FALSE);
- }
- return (0);
-}
-
-/*
- * perform the pmap work for mincore
- */
-int
-pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
-{
- pt_entry_t *ptep, pte;
- vm_paddr_t pa;
- int val;
-
- PMAP_LOCK(pmap);
-retry:
- ptep = pmap_pte(pmap, addr);
- pte = (ptep != NULL) ? PT_GET(ptep) : 0;
- pmap_pte_release(ptep);
- val = 0;
- if ((pte & PG_V) != 0) {
- val |= MINCORE_INCORE;
- if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
- if ((pte & PG_A) != 0)
- val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
- }
- if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
- (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
- (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
- pa = pte & PG_FRAME;
- /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
- if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
- goto retry;
- } else
- PA_UNLOCK_COND(*locked_pa);
- PMAP_UNLOCK(pmap);
- return (val);
-}
-
-void
-pmap_activate(struct thread *td)
-{
- pmap_t pmap, oldpmap;
- u_int cpuid;
- u_int32_t cr3;
-
- critical_enter();
- pmap = vmspace_pmap(td->td_proc->p_vmspace);
- oldpmap = PCPU_GET(curpmap);
- cpuid = PCPU_GET(cpuid);
-#if defined(SMP)
- CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
- CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
-#else
- CPU_CLR(cpuid, &oldpmap->pm_active);
- CPU_SET(cpuid, &pmap->pm_active);
-#endif
-#ifdef PAE
- cr3 = vtophys(pmap->pm_pdpt);
-#else
- cr3 = vtophys(pmap->pm_pdir);
-#endif
- /*
- * pmap_activate is for the current thread on the current cpu
- */
- td->td_pcb->pcb_cr3 = cr3;
- PT_UPDATES_FLUSH();
- load_cr3(cr3);
- PCPU_SET(curpmap, pmap);
- critical_exit();
-}
-
-void
-pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
-{
-}
-
-/*
- * Increase the starting virtual address of the given mapping if a
- * different alignment might result in more superpage mappings.
- */
-void
-pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
- vm_offset_t *addr, vm_size_t size)
-{
- vm_offset_t superpage_offset;
-
- if (size < NBPDR)
- return;
- if (object != NULL && (object->flags & OBJ_COLORED) != 0)
- offset += ptoa(object->pg_color);
- superpage_offset = offset & PDRMASK;
- if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
- (*addr & PDRMASK) == superpage_offset)
- return;
- if ((*addr & PDRMASK) < superpage_offset)
- *addr = (*addr & ~PDRMASK) + superpage_offset;
- else
- *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
-}
-
-void
-pmap_suspend()
-{
- pmap_t pmap;
- int i, pdir, offset;
- vm_paddr_t pdirma;
- mmu_update_t mu[4];
-
- /*
- * We need to remove the recursive mapping structure from all
- * our pmaps so that Xen doesn't get confused when it restores
- * the page tables. The recursive map lives at page directory
- * index PTDPTDI. We assume that the suspend code has stopped
- * the other vcpus (if any).
- */
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- for (i = 0; i < 4; i++) {
- /*
- * Figure out which page directory (L2) page
- * contains this bit of the recursive map and
- * the offset within that page of the map
- * entry
- */
- pdir = (PTDPTDI + i) / NPDEPG;
- offset = (PTDPTDI + i) % NPDEPG;
- pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
- mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
- mu[i].val = 0;
- }
- HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
- }
-}
-
-void
-pmap_resume()
-{
- pmap_t pmap;
- int i, pdir, offset;
- vm_paddr_t pdirma;
- mmu_update_t mu[4];
-
- /*
- * Restore the recursive map that we removed on suspend.
- */
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- for (i = 0; i < 4; i++) {
- /*
- * Figure out which page directory (L2) page
- * contains this bit of the recursive map and
- * the offset within that page of the map
- * entry
- */
- pdir = (PTDPTDI + i) / NPDEPG;
- offset = (PTDPTDI + i) % NPDEPG;
- pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
- mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
- mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V;
- }
- HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
- }
-}
-
-#if defined(PMAP_DEBUG)
-pmap_pid_dump(int pid)
-{
- pmap_t pmap;
- struct proc *p;
- int npte = 0;
- int index;
-
- sx_slock(&allproc_lock);
- FOREACH_PROC_IN_SYSTEM(p) {
- if (p->p_pid != pid)
- continue;
-
- if (p->p_vmspace) {
- int i,j;
- index = 0;
- pmap = vmspace_pmap(p->p_vmspace);
- for (i = 0; i < NPDEPTD; i++) {
- pd_entry_t *pde;
- pt_entry_t *pte;
- vm_offset_t base = i << PDRSHIFT;
-
- pde = &pmap->pm_pdir[i];
- if (pde && pmap_pde_v(pde)) {
- for (j = 0; j < NPTEPG; j++) {
- vm_offset_t va = base + (j << PAGE_SHIFT);
- if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
- if (index) {
- index = 0;
- printf("\n");
- }
- sx_sunlock(&allproc_lock);
- return (npte);
- }
- pte = pmap_pte(pmap, va);
- if (pte && pmap_pte_v(pte)) {
- pt_entry_t pa;
- vm_page_t m;
- pa = PT_GET(pte);
- m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
- printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
- va, pa, m->hold_count, m->wire_count, m->flags);
- npte++;
- index++;
- if (index >= 2) {
- index = 0;
- printf("\n");
- } else {
- printf(" ");
- }
- }
- }
- }
- }
- }
- }
- sx_sunlock(&allproc_lock);
- return (npte);
-}
-#endif
-
-#if defined(DEBUG)
-
-static void pads(pmap_t pm);
-void pmap_pvdump(vm_paddr_t pa);
-
-/* print address space of pmap*/
-static void
-pads(pmap_t pm)
-{
- int i, j;
- vm_paddr_t va;
- pt_entry_t *ptep;
-
- if (pm == kernel_pmap)
- return;
- for (i = 0; i < NPDEPTD; i++)
- if (pm->pm_pdir[i])
- for (j = 0; j < NPTEPG; j++) {
- va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
- if (pm == kernel_pmap && va < KERNBASE)
- continue;
- if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
- continue;
- ptep = pmap_pte(pm, va);
- if (pmap_pte_v(ptep))
- printf("%x:%x ", va, *ptep);
- };
-
-}
-
-void
-pmap_pvdump(vm_paddr_t pa)
-{
- pv_entry_t pv;
- pmap_t pmap;
- vm_page_t m;
-
- printf("pa %x", pa);
- m = PHYS_TO_VM_PAGE(pa);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
- pads(pmap);
- }
- printf(" ");
-}
-#endif
Index: sys/i386/xen/xen_machdep.c
===================================================================
--- sys/i386/xen/xen_machdep.c
+++ /dev/null
@@ -1,1236 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004-2006,2008 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/mount.h>
-#include <sys/malloc.h>
-#include <sys/mutex.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-#include <sys/reboot.h>
-#include <sys/rwlock.h>
-#include <sys/sysproto.h>
-#include <sys/boot.h>
-
-#include <xen/xen-os.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/segments.h>
-#include <machine/pcb.h>
-#include <machine/stdarg.h>
-#include <machine/vmparam.h>
-#include <machine/cpu.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/asmacros.h>
-
-
-
-#include <xen/hypervisor.h>
-#include <xen/xenstore/xenstorevar.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <machine/xen/xenpmap.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/interface/memory.h>
-#include <machine/xen/features.h>
-#ifdef SMP
-#include <machine/privatespace.h>
-#endif
-
-
-#include <vm/vm_page.h>
-
-
-#define IDTVEC(name) __CONCAT(X,name)
-
-extern inthand_t
-IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
- IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
- IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
- IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
- IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
-
-
-int xendebug_flags;
-start_info_t *xen_start_info;
-start_info_t *HYPERVISOR_start_info;
-shared_info_t *HYPERVISOR_shared_info;
-xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
-xen_pfn_t *xen_phys_machine;
-xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
-xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
-int preemptable, init_first;
-extern unsigned int avail_space;
-int xen_vector_callback_enabled = 0;
-enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN;
-
-void ni_cli(void);
-void ni_sti(void);
-
-
-void
-ni_cli(void)
-{
- CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
- __asm__("pushl %edx;"
- "pushl %eax;"
- );
- __cli();
- __asm__("popl %eax;"
- "popl %edx;"
- );
-}
-
-
-void
-ni_sti(void)
-{
- __asm__("pushl %edx;"
- "pushl %esi;"
- "pushl %eax;"
- );
- __sti();
- __asm__("popl %eax;"
- "popl %esi;"
- "popl %edx;"
- );
-}
-
-void
-force_evtchn_callback(void)
-{
- (void)HYPERVISOR_xen_version(0, NULL);
-}
-
-/*
- * Modify the cmd_line by converting ',' to NULLs so that it is in a format
- * suitable for the static env vars.
- */
-char *
-xen_setbootenv(char *cmd_line)
-{
- char *cmd_line_next;
-
- /* Skip leading spaces */
- for (; *cmd_line == ' '; cmd_line++);
-
- xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
-
- for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
- return cmd_line;
-}
-
-int
-xen_boothowto(char *envp)
-{
- int i, howto = 0;
-
- /* get equivalents from the environment */
- for (i = 0; howto_names[i].ev != NULL; i++)
- if (kern_getenv(howto_names[i].ev) != NULL)
- howto |= howto_names[i].mask;
- return howto;
-}
-
-
-#define XPQUEUE_SIZE 128
-
-struct mmu_log {
- char *file;
- int line;
-};
-
-#ifdef SMP
-/* per-cpu queues and indices */
-#ifdef INVARIANTS
-static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
-#endif
-
-static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
-static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
-
-#define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
-#define XPQ_QUEUE xpq_queue[vcpu]
-#define XPQ_IDX xpq_idx[vcpu]
-#define SET_VCPU() int vcpu = smp_processor_id()
-#else
-
-static mmu_update_t xpq_queue[XPQUEUE_SIZE];
-#ifdef INVARIANTS
-static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
-#endif
-static int xpq_idx = 0;
-
-#define XPQ_QUEUE_LOG xpq_queue_log
-#define XPQ_QUEUE xpq_queue
-#define XPQ_IDX xpq_idx
-#define SET_VCPU()
-#endif /* !SMP */
-
-#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
-
-#if 0
-static void
-xen_dump_queue(void)
-{
- int _xpq_idx = XPQ_IDX;
- int i;
-
- if (_xpq_idx <= 1)
- return;
-
- xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx);
- for (i = 0; i < _xpq_idx; i++) {
- xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val,
- XPQ_QUEUE[i].ptr);
- }
-}
-#endif
-
-
-static __inline void
-_xen_flush_queue(void)
-{
- SET_VCPU();
- int _xpq_idx = XPQ_IDX;
- int error, i;
-
-#ifdef INVARIANTS
- if (__predict_true(gdtset))
- CRITICAL_ASSERT(curthread);
-#endif
-
- XPQ_IDX = 0;
- /* Make sure index is cleared first to avoid double updates. */
- error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
- _xpq_idx, NULL, DOMID_SELF);
-
-#if 0
- if (__predict_true(gdtset))
- for (i = _xpq_idx; i > 0;) {
- if (i >= 3) {
- CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
- "ptr: %lx val: %lx ptr: %lx",
- (XPQ_QUEUE[i-1].val & 0xffffffff),
- (XPQ_QUEUE[i-1].ptr & 0xffffffff),
- (XPQ_QUEUE[i-2].val & 0xffffffff),
- (XPQ_QUEUE[i-2].ptr & 0xffffffff),
- (XPQ_QUEUE[i-3].val & 0xffffffff),
- (XPQ_QUEUE[i-3].ptr & 0xffffffff));
- i -= 3;
- } else if (i == 2) {
- CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
- (XPQ_QUEUE[i-1].val & 0xffffffff),
- (XPQ_QUEUE[i-1].ptr & 0xffffffff),
- (XPQ_QUEUE[i-2].val & 0xffffffff),
- (XPQ_QUEUE[i-2].ptr & 0xffffffff));
- i = 0;
- } else {
- CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx",
- (XPQ_QUEUE[i-1].val & 0xffffffff),
- (XPQ_QUEUE[i-1].ptr & 0xffffffff));
- i = 0;
- }
- }
-#endif
- if (__predict_false(error < 0)) {
- for (i = 0; i < _xpq_idx; i++)
- printf("val: %llx ptr: %llx\n",
- XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
- panic("Failed to execute MMU updates: %d", error);
- }
-
-}
-
-void
-xen_flush_queue(void)
-{
- SET_VCPU();
-
- if (__predict_true(gdtset))
- critical_enter();
- if (XPQ_IDX != 0) _xen_flush_queue();
- if (__predict_true(gdtset))
- critical_exit();
-}
-
-static __inline void
-xen_increment_idx(void)
-{
- SET_VCPU();
-
- XPQ_IDX++;
- if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
- xen_flush_queue();
-}
-
-void
-xen_check_queue(void)
-{
-#ifdef INVARIANTS
- SET_VCPU();
-
- KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
-#endif
-}
-
-void
-xen_invlpg(vm_offset_t va)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_INVLPG_ALL;
- op.arg1.linear_addr = va & ~PAGE_MASK;
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_load_cr3(u_int val)
-{
- struct mmuext_op op;
-#ifdef INVARIANTS
- SET_VCPU();
-
- KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
-#endif
- op.cmd = MMUEXT_NEW_BASEPTR;
- op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-#ifdef KTR
-static __inline u_int
-rebp(void)
-{
- u_int data;
-
- __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));
- return (data);
-}
-#endif
-
-u_int
-read_eflags(void)
-{
- vcpu_info_t *_vcpu;
- u_int eflags;
-
- eflags = _read_eflags();
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];
- if (_vcpu->evtchn_upcall_mask)
- eflags &= ~PSL_I;
-
- return (eflags);
-}
-
-void
-write_eflags(u_int eflags)
-{
- u_int intr;
-
- CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
- intr = ((eflags & PSL_I) == 0);
- __restore_flags(intr);
- _write_eflags(eflags);
-}
-
-void
-xen_cli(void)
-{
- CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
- __cli();
-}
-
-void
-xen_sti(void)
-{
- CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
- __sti();
-}
-
-u_int
-xen_rcr2(void)
-{
-
- return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
-}
-
-void
-_xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
-{
- SET_VCPU();
-
- if (__predict_true(gdtset))
- critical_enter();
- XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- XPQ_QUEUE[XPQ_IDX].val = pfn;
-#ifdef INVARIANTS
- XPQ_QUEUE_LOG[XPQ_IDX].file = file;
- XPQ_QUEUE_LOG[XPQ_IDX].line = line;
-#endif
- xen_increment_idx();
- if (__predict_true(gdtset))
- critical_exit();
-}
-
-extern struct rwlock pvh_global_lock;
-
-void
-_xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
-{
- SET_VCPU();
-
- if (__predict_true(gdtset))
- rw_assert(&pvh_global_lock, RA_WLOCKED);
-
- KASSERT((ptr & 7) == 0, ("misaligned update"));
-
- if (__predict_true(gdtset))
- critical_enter();
-
- XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
- XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
-#ifdef INVARIANTS
- XPQ_QUEUE_LOG[XPQ_IDX].file = file;
- XPQ_QUEUE_LOG[XPQ_IDX].line = line;
-#endif
- xen_increment_idx();
- if (__predict_true(gdtset))
- critical_exit();
-}
-
-void
-xen_pgdpt_pin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L3_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pgd_pin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L2_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pgd_unpin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pt_pin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L1_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pt_unpin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_set_ldt(vm_paddr_t ptr, unsigned long len)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_SET_LDT;
- op.arg1.linear_addr = ptr;
- op.arg2.nr_ents = len;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_tlb_flush(void)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_update_descriptor(union descriptor *table, union descriptor *entry)
-{
- vm_paddr_t pa;
- pt_entry_t *ptp;
-
- ptp = vtopte((vm_offset_t)table);
- pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
- if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
- panic("HYPERVISOR_update_descriptor failed\n");
-}
-
-
-#if 0
-/*
- * Bitmap is indexed by page number. If bit is set, the page is part of a
- * xen_create_contiguous_region() area of memory.
- */
-unsigned long *contiguous_bitmap;
-
-static void
-contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] |=
- ((1UL<<end_off)-1) & -(1UL<<start_off);
- } else {
- contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
- while ( ++curr_idx < end_idx )
- contiguous_bitmap[curr_idx] = ~0UL;
- contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
- }
-}
-
-static void
-contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] &=
- -(1UL<<end_off) | ((1UL<<start_off)-1);
- } else {
- contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
- while ( ++curr_idx != end_idx )
- contiguous_bitmap[curr_idx] = 0;
- contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
- }
-}
-#endif
-
-/* Ensure multi-page extents are contiguous in machine memory. */
-int
-xen_create_contiguous_region(vm_page_t pages, int npages)
-{
- unsigned long mfn, i, flags;
- int order;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &mfn);
-
- balloon_lock(flags);
-
- /* can currently only handle power of two allocation */
- PANIC_IF(ffs(npages) != fls(npages));
-
- /* 0. determine order */
- order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
-
- /* 1. give away machine pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
- mfn = PFNTOMFN(pfn);
- PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
- PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
- }
-
-
- /* 2. Get a new contiguous memory extent. */
- reservation.extent_order = order;
- /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not
- * running with a broxen driver XXXEN
- */
- reservation.address_bits = 31;
- if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
- goto fail;
-
- /* 3. Map the new extent in place of old pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
- xen_machphys_update(mfn+i, pfn);
- PFNTOMFN(pfn) = mfn+i;
- }
-
- xen_tlb_flush();
-
-#if 0
- contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
-#endif
-
- balloon_unlock(flags);
-
- return 0;
-
- fail:
- reservation.extent_order = 0;
- reservation.address_bits = 0;
-
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation) != 1);
- xen_machphys_update(mfn, pfn);
- PFNTOMFN(pfn) = mfn;
- }
-
- xen_tlb_flush();
-
- balloon_unlock(flags);
-
- return ENOMEM;
-}
-
-void
-xen_destroy_contiguous_region(void *addr, int npages)
-{
- unsigned long mfn, i, flags, order, pfn0;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &mfn);
-
- pfn0 = vtophys(addr) >> PAGE_SHIFT;
-#if 0
- scrub_pages(vstart, 1 << order);
-#endif
- /* can currently only handle power of two allocation */
- PANIC_IF(ffs(npages) != fls(npages));
-
- /* 0. determine order */
- order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
-
- balloon_lock(flags);
-
-#if 0
- contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
-#endif
-
- /* 1. Zap current PTEs, giving away the underlying pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- uint64_t new_val = 0;
- pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
-
- PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
- PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
- PANIC_IF(HYPERVISOR_memory_op(
- XENMEM_decrease_reservation, &reservation) != 1);
- }
-
- /* 2. Map new pages in place of old pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- uint64_t new_val;
- pfn = pfn0 + i;
- PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
-
- new_val = mfn << PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE),
- new_val, PG_KERNEL));
- xen_machphys_update(mfn, pfn);
- PFNTOMFN(pfn) = mfn;
- }
-
- xen_tlb_flush();
-
- balloon_unlock(flags);
-}
-
-extern vm_offset_t proc0kstack;
-extern int vm86paddr, vm86phystk;
-char *bootmem_start, *bootmem_current, *bootmem_end;
-
-pteinfo_t *pteinfo_list;
-void initvalues(start_info_t *startinfo);
-
-void *
-bootmem_alloc(unsigned int size)
-{
- char *retptr;
-
- retptr = bootmem_current;
- PANIC_IF(retptr + size > bootmem_end);
- bootmem_current += size;
-
- return retptr;
-}
-
-void
-bootmem_free(void *ptr, unsigned int size)
-{
- char *tptr;
-
- tptr = ptr;
- PANIC_IF(tptr != bootmem_current - size ||
- bootmem_current - size < bootmem_start);
-
- bootmem_current -= size;
-}
-
-#if 0
-static vm_paddr_t
-xpmap_mtop2(vm_paddr_t mpa)
-{
- return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
- ) | (mpa & ~PG_FRAME);
-}
-
-static pd_entry_t
-xpmap_get_bootpde(vm_paddr_t va)
-{
-
- return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
-}
-
-static pd_entry_t
-xpmap_get_vbootpde(vm_paddr_t va)
-{
- pd_entry_t pde;
-
- pde = xpmap_get_bootpde(va);
- if ((pde & PG_V) == 0)
- return (pde & ~PG_FRAME);
- return (pde & ~PG_FRAME) |
- (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
-}
-
-static pt_entry_t 8*
-xpmap_get_bootptep(vm_paddr_t va)
-{
- pd_entry_t pde;
-
- pde = xpmap_get_vbootpde(va);
- if ((pde & PG_V) == 0)
- return (void *)-1;
-#define PT_MASK 0x003ff000 /* page table address bits */
- return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
-}
-
-static pt_entry_t
-xpmap_get_bootpte(vm_paddr_t va)
-{
-
- return xpmap_get_bootptep(va)[0];
-}
-#endif
-
-
-#ifdef ADD_ISA_HOLE
-static void
-shift_phys_machine(unsigned long *phys_machine, int nr_pages)
-{
-
- unsigned long *tmp_page, *current_page, *next_page;
- int i;
-
- tmp_page = bootmem_alloc(PAGE_SIZE);
- current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));
- next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));
- bcopy(phys_machine, tmp_page, PAGE_SIZE);
-
- while (current_page > phys_machine) {
- /* save next page */
- bcopy(next_page, tmp_page, PAGE_SIZE);
- /* shift down page */
- bcopy(current_page, next_page, PAGE_SIZE);
- /* finish swap */
- bcopy(tmp_page, current_page, PAGE_SIZE);
-
- current_page -= (PAGE_SIZE/sizeof(unsigned long));
- next_page -= (PAGE_SIZE/sizeof(unsigned long));
- }
- bootmem_free(tmp_page, PAGE_SIZE);
-
- for (i = 0; i < nr_pages; i++) {
- xen_machphys_update(phys_machine[i], i);
- }
- memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
-
-}
-#endif /* ADD_ISA_HOLE */
-
-/*
- * Build a directory of the pages that make up our Physical to Machine
- * mapping table. The Xen suspend/restore code uses this to find our
- * mapping table.
- */
-static void
-init_frame_list_list(void *arg)
-{
- unsigned long nr_pages = xen_start_info->nr_pages;
-#define FPP (PAGE_SIZE/sizeof(xen_pfn_t))
- int i, j, k;
-
- xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
- for (i = 0, j = 0, k = -1; i < nr_pages;
- i += FPP, j++) {
- if ((j & (FPP - 1)) == 0) {
- k++;
- xen_pfn_to_mfn_frame_list[k] =
- malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
- xen_pfn_to_mfn_frame_list_list[k] =
- VTOMFN(xen_pfn_to_mfn_frame_list[k]);
- j = 0;
- }
- xen_pfn_to_mfn_frame_list[k][j] =
- VTOMFN(&xen_phys_machine[i]);
- }
-
- HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
- = VTOMFN(xen_pfn_to_mfn_frame_list_list);
-}
-SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
-
-extern unsigned long physfree;
-
-int pdir, curoffset;
-extern int nkpt;
-
-extern uint32_t kernbase;
-
-void
-initvalues(start_info_t *startinfo)
-{
- vm_offset_t cur_space, cur_space_pt;
- struct physdev_set_iopl set_iopl;
-
- int l3_pages, l2_pages, l1_pages, offset;
- vm_paddr_t console_page_ma, xen_store_ma;
- vm_offset_t tmpva;
- vm_paddr_t shinfo;
-#ifdef PAE
- vm_paddr_t IdlePDPTma, IdlePDPTnewma;
- vm_paddr_t IdlePTDnewma[4];
- pd_entry_t *IdlePDPTnew, *IdlePTDnew;
- vm_paddr_t IdlePTDma[4];
-#else
- vm_paddr_t IdlePTDma[1];
-#endif
- unsigned long i;
- int ncpus = MAXCPU;
-
- nkpt = min(
- min(
- max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
- NPGPTD*NPDEPG - KPTDI),
- (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
-
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
-#ifdef notyet
- /*
- * need to install handler
- */
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);
-#endif
- xen_start_info = startinfo;
- HYPERVISOR_start_info = startinfo;
- xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
-
- IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
- l1_pages = 0;
-
-#ifdef PAE
- l3_pages = 1;
- l2_pages = 0;
- IdlePDPT = (pd_entry_t *)startinfo->pt_base;
- IdlePDPTma = VTOM(startinfo->pt_base);
- for (i = (KERNBASE >> 30);
- (i < 4) && (IdlePDPT[i] != 0); i++)
- l2_pages++;
- /*
- * Note that only one page directory has been allocated at this point.
- * Thus, if KERNBASE
- */
- for (i = 0; i < l2_pages; i++)
- IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
-
- l2_pages = (l2_pages == 0) ? 1 : l2_pages;
-#else
- l3_pages = 0;
- l2_pages = 1;
-#endif
- for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
- (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
-
- if (IdlePTD[i] == 0)
- break;
- l1_pages++;
- }
-
- /* number of pages allocated after the pts + 1*/;
- cur_space = xen_start_info->pt_base +
- (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
-
- xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space,
- cur_space);
-
- xc_printf("KERNBASE=%x,pt_base=%lx, VTOPFN(base)=%x, nr_pt_frames=%lx\n",
- KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
- xen_start_info->nr_pt_frames);
- xendebug_flags = 0; /* 0xffffffff; */
-
-#ifdef ADD_ISA_HOLE
- shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
-#endif
- XENPRINTF("IdlePTD %p\n", IdlePTD);
- XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%x pt_base: 0x%lx "
- "mod_start: 0x%lx mod_len: 0x%lx\n",
- xen_start_info->nr_pages, xen_start_info->shared_info,
- xen_start_info->flags, xen_start_info->pt_base,
- xen_start_info->mod_start, xen_start_info->mod_len);
-
-#ifdef PAE
- IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
- bzero(IdlePDPTnew, PAGE_SIZE);
-
- IdlePDPTnewma = VTOM(IdlePDPTnew);
- IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
- bzero(IdlePTDnew, 4*PAGE_SIZE);
-
- for (i = 0; i < 4; i++)
- IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
- /*
- * L3
- *
- * Copy the 4 machine addresses of the new PTDs in to the PDPT
- *
- */
- for (i = 0; i < 4; i++)
- IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
-
- __asm__("nop;");
- /*
- *
- * re-map the new PDPT read-only
- */
- PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
- /*
- *
- * Unpin the current PDPT
- */
- xen_pt_unpin(IdlePDPTma);
-
-#endif /* PAE */
-
- /* Map proc0's KSTACK */
- proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
- xc_printf("proc0kstack=%u\n", proc0kstack);
-
- /* vm86/bios stack */
- cur_space += PAGE_SIZE;
-
- /* Map space for the vm86 region */
- vm86paddr = (vm_offset_t)cur_space;
- cur_space += (PAGE_SIZE * 3);
-
- /* allocate 4 pages for bootmem allocator */
- bootmem_start = bootmem_current = (char *)cur_space;
- cur_space += (4 * PAGE_SIZE);
- bootmem_end = (char *)cur_space;
-
- /* allocate pages for gdt */
- gdt = (union descriptor *)cur_space;
- cur_space += PAGE_SIZE*ncpus;
-
- /* allocate page for ldt */
- ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
- cur_space += PAGE_SIZE;
-
- /* unmap remaining pages from initial chunk
- *
- */
- for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
- tmpva += PAGE_SIZE) {
- bzero((char *)tmpva, PAGE_SIZE);
- PT_SET_MA(tmpva, (vm_paddr_t)0);
- }
-
- PT_UPDATES_FLUSH();
-
- memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
- ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
- l1_pages*sizeof(pt_entry_t));
-
- for (i = 0; i < 4; i++) {
- PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
- IdlePTDnewma[i] | PG_V);
- }
- xen_load_cr3(VTOP(IdlePDPTnew));
- xen_pgdpt_pin(VTOM(IdlePDPTnew));
-
- /* allocate remainder of nkpt pages */
- cur_space_pt = cur_space;
- for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
- i++, cur_space += PAGE_SIZE) {
- pdir = (offset + i) / NPDEPG;
- curoffset = ((offset + i) % NPDEPG);
- if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
- break;
-
- /*
- * make sure that all the initial page table pages
- * have been zeroed
- */
- PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
- bzero((char *)cur_space, PAGE_SIZE);
- PT_SET_MA(cur_space, (vm_paddr_t)0);
- xen_pt_pin(VTOM(cur_space));
- xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
- curoffset*sizeof(vm_paddr_t)),
- VTOM(cur_space) | PG_KERNEL);
- PT_UPDATES_FLUSH();
- }
-
- for (i = 0; i < 4; i++) {
- pdir = (PTDPTDI + i) / NPDEPG;
- curoffset = (PTDPTDI + i) % NPDEPG;
-
- xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
- curoffset*sizeof(vm_paddr_t)),
- IdlePTDnewma[i] | PG_V);
- }
-
- PT_UPDATES_FLUSH();
-
- IdlePTD = IdlePTDnew;
- IdlePDPT = IdlePDPTnew;
- IdlePDPTma = IdlePDPTnewma;
-
- HYPERVISOR_shared_info = (shared_info_t *)cur_space;
- cur_space += PAGE_SIZE;
-
- xen_store = (struct xenstore_domain_interface *)cur_space;
- cur_space += PAGE_SIZE;
-
- console_page = (char *)cur_space;
- cur_space += PAGE_SIZE;
-
- /*
- * shared_info is an unsigned long so this will randomly break if
- * it is allocated above 4GB - I guess people are used to that
- * sort of thing with Xen ... sigh
- */
- shinfo = xen_start_info->shared_info;
- PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
-
- xc_printf("#4\n");
-
- xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
- PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
- console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
- PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
-
- xc_printf("#5\n");
-
- set_iopl.iopl = 1;
- PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
- xc_printf("#6\n");
-#if 0
- /* add page table for KERNBASE */
- xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t),
- VTOM(cur_space) | PG_KERNEL);
- xen_flush_queue();
-#ifdef PAE
- xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t),
- VTOM(cur_space) | PG_V | PG_A);
-#else
- xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t),
- VTOM(cur_space) | PG_V | PG_A);
-#endif
- xen_flush_queue();
- cur_space += PAGE_SIZE;
- xc_printf("#6\n");
-#endif /* 0 */
-#ifdef notyet
- if (xen_start_info->flags & SIF_INITDOMAIN) {
- /* Map first megabyte */
- for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE)
- PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
- xen_flush_queue();
- }
-#endif
- /*
- * re-map kernel text read-only
- *
- */
- for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
- i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
- PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
-
- xc_printf("#7\n");
- physfree = VTOP(cur_space);
- init_first = physfree >> PAGE_SHIFT;
- IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
- IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
- setup_xen_features();
- xc_printf("#8, proc0kstack=%u\n", proc0kstack);
-}
-
-
-trap_info_t trap_table[] = {
- { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
- { 1, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
- { 3, 3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
- { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
- /* This is UPL on Linux and KPL on BSD */
- { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
- { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
- { 7, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
- /*
- * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
- * no handler for double fault
- */
- { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
- {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
- {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
- {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
- {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
- {14, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
- {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
- {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
- {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
- {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
- {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
- {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
- { 0, 0, 0, 0 }
-};
-
-/* Perform a multicall and check that individual calls succeeded. */
-int
-HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
-{
- int ret = 0;
- int i;
-
- /* Perform the multicall. */
- PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
-
- /* Check the results of individual hypercalls. */
- for (i = 0; i < nr_calls; i++)
- if (__predict_false(call_list[i].result < 0))
- ret++;
- if (__predict_false(ret > 0))
- panic("%d multicall(s) failed: cpu %d\n",
- ret, smp_processor_id());
-
- /* If we didn't panic already, everything succeeded. */
- return (0);
-}
-
-/********** CODE WORTH KEEPING ABOVE HERE *****************/
-
-void xen_failsafe_handler(void);
-
-void
-xen_failsafe_handler(void)
-{
-
- panic("xen_failsafe_handler called!\n");
-}
-
-void xen_handle_thread_switch(struct pcb *pcb);
-
-/* This is called by cpu_switch() when switching threads. */
-/* The pcb arg refers to the process control block of the */
-/* next thread which is to run */
-void
-xen_handle_thread_switch(struct pcb *pcb)
-{
- uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
- uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
- multicall_entry_t mcl[3];
- int i = 0;
-
- /* Notify Xen of task switch */
- mcl[i].op = __HYPERVISOR_stack_switch;
- mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
- mcl[i++].args[1] = (unsigned long)pcb;
-
- /* Check for update of fsd */
- if (*a != *b || *(a+1) != *(b+1)) {
- mcl[i].op = __HYPERVISOR_update_descriptor;
- *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
- *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
- }
-
- a += 2;
- b += 2;
-
- /* Check for update of gsd */
- if (*a != *b || *(a+1) != *(b+1)) {
- mcl[i].op = __HYPERVISOR_update_descriptor;
- *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
- *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
- }
-
- (void)HYPERVISOR_multicall(mcl, i);
-}

File Metadata

Mime Type
text/plain
Expires
Sat, Jan 18, 2:51 PM (17 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15867025
Default Alt Text
D2362.id4983.diff (261 KB)

Event Timeline