Index: head/lib/libkvm/kvm_i386.h =================================================================== --- head/lib/libkvm/kvm_i386.h (revision 343566) +++ head/lib/libkvm/kvm_i386.h (revision 343567) @@ -1,82 +1,84 @@ /*- * Copyright (c) 2015 John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __KVM_I386_H__ #define __KVM_I386_H__ #ifdef __i386__ #include #include #endif typedef uint32_t i386_physaddr_t; typedef uint32_t i386_pte_t; typedef uint32_t i386_pde_t; typedef uint64_t i386_physaddr_pae_t; typedef uint64_t i386_pte_pae_t; typedef uint64_t i386_pde_pae_t; #define I386_PAGE_SHIFT 12 #define I386_PAGE_SIZE (1 << I386_PAGE_SHIFT) #define I386_PAGE_MASK (I386_PAGE_SIZE - 1) #define I386_NPTEPG (I386_PAGE_SIZE / sizeof(i386_pte_t)) #define I386_PDRSHIFT 22 #define I386_NBPDR (1 << I386_PDRSHIFT) #define I386_PAGE_PS_MASK (I386_NBPDR - 1) #define I386_NPTEPG_PAE (I386_PAGE_SIZE / sizeof(i386_pte_pae_t)) #define I386_PDRSHIFT_PAE 21 #define I386_NBPDR_PAE (1 << I386_PDRSHIFT_PAE) #define I386_PAGE_PS_MASK_PAE (I386_NBPDR_PAE - 1) /* Source: i386/include/pmap.h */ #define I386_PG_V 0x001 #define I386_PG_RW 0x002 #define I386_PG_PS 0x080 #define I386_PG_NX (1ULL << 63) #define I386_PG_FRAME_PAE (0x000ffffffffff000ull) #define I386_PG_PS_FRAME_PAE (0x000fffffffe00000ull) #define I386_PG_FRAME (0xfffff000) #define I386_PG_PS_FRAME (0xffc00000) #ifdef __i386__ _Static_assert(PAGE_SHIFT == I386_PAGE_SHIFT, "PAGE_SHIFT mismatch"); _Static_assert(PAGE_SIZE == I386_PAGE_SIZE, "PAGE_SIZE mismatch"); _Static_assert(PAGE_MASK == I386_PAGE_MASK, "PAGE_MASK mismatch"); +#if 0 _Static_assert(NPTEPG == I386_NPTEPG, "NPTEPG mismatch"); -_Static_assert(PDRSHIFT == I386_PDRSHIFT, "PDRSHIFT mismatch"); _Static_assert(NBPDR == I386_NBPDR, "NBPDR mismatch"); +#endif +_Static_assert(PDRSHIFT_NOPAE == I386_PDRSHIFT, "PDRSHIFT mismatch"); _Static_assert(PG_V == I386_PG_V, "PG_V mismatch"); _Static_assert(PG_PS == I386_PG_PS, "PG_PS mismatch"); -_Static_assert((u_int)PG_FRAME == I386_PG_FRAME, "PG_FRAME mismatch"); -_Static_assert(PG_PS_FRAME == I386_PG_PS_FRAME, "PG_PS_FRAME mismatch"); +_Static_assert((u_int)PG_FRAME_NOPAE == I386_PG_FRAME, "PG_FRAME mismatch"); +_Static_assert(PG_PS_FRAME_NOPAE == I386_PG_PS_FRAME, "PG_PS_FRAME mismatch"); #endif int _i386_native(kvm_t *); #endif /* !__KVM_I386_H__ */ Index: head/sys/conf/files.i386 =================================================================== --- head/sys/conf/files.i386 (revision 343566) +++ head/sys/conf/files.i386 (revision 343567) @@ -1,611 +1,615 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # cloudabi32_vdso.o optional compat_cloudabi32 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_i686.S" \ compile-with "${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_i686.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi32_vdso.o" # cloudabi32_vdso_blob.o optional compat_cloudabi32 \ dependency "cloudabi32_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd --binary-architecture i386 cloudabi32_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi32_vdso_blob.o" # linux_genassym.o optional compat_linux \ dependency "$S/i386/linux/linux_genassym.c offset.inc" \ compile-with "${CC} ${CFLAGS:N-flto:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "linux_genassym.o" # linux_assym.h optional compat_linux \ dependency "$S/kern/genassym.sh linux_genassym.o" \ compile-with "sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "linux_assym.h" # linux_locore.o optional compat_linux \ dependency "linux_assym.h $S/i386/linux/linux_locore.s" \ compile-with "${CC} -x assembler-with-cpp -DLOCORE -shared -s -pipe -I. -I$S -Werror -Wall -fPIC -fno-common -nostdinc -nostdlib -Wl,-T$S/i386/linux/linux_vdso.lds.s -Wl,-soname=linux_vdso.so,--eh-frame-hdr,-warn-common ${.IMPSRC} -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "linux_locore.o" # linux_vdso.so optional compat_linux \ dependency "linux_locore.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd --binary-architecture i386 linux_locore.o ${.TARGET}" \ no-implicit-rule \ clean "linux_vdso.so" # font.h optional sc_dflt_font \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" # atkbdmap.h optional atkbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "atkbdmap.h" # ukbdmap.h optional ukbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "ukbdmap.h" # hpt27xx_lib.o optional hpt27xx \ dependency "$S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ compile-with "uudecode < $S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ no-implicit-rule # hptmvraid.o optional hptmv \ dependency "$S/dev/hptmv/i386-elf.raid.o.uu" \ compile-with "uudecode < $S/dev/hptmv/i386-elf.raid.o.uu" \ no-implicit-rule # hptnr_lib.o optional hptnr \ dependency "$S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ no-implicit-rule # hptrr_lib.o optional hptrr \ dependency "$S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ no-implicit-rule # cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs | dtrace compile-with "${ZFS_S}" cddl/dev/dtrace/i386/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/i386/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/x86/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" cddl/dev/dtrace/x86/dis_tables.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" cddl/dev/dtrace/x86/instr_size.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs compat/linux/linux_event.c optional compat_linux compat/linux/linux_emul.c optional compat_linux compat/linux/linux_errno.c optional compat_linux compat/linux/linux_file.c optional compat_linux compat/linux/linux_fork.c optional compat_linux compat/linux/linux_futex.c optional compat_linux compat/linux/linux_getcwd.c optional compat_linux compat/linux/linux_ioctl.c optional compat_linux compat/linux/linux_ipc.c optional compat_linux compat/linux/linux_mib.c optional compat_linux compat/linux/linux_misc.c optional compat_linux compat/linux/linux_mmap.c optional compat_linux compat/linux/linux_signal.c optional compat_linux compat/linux/linux_socket.c optional compat_linux compat/linux/linux_stats.c optional compat_linux compat/linux/linux_sysctl.c optional compat_linux compat/linux/linux_time.c optional compat_linux compat/linux/linux_timer.c optional compat_linux compat/linux/linux_uid16.c optional compat_linux compat/linux/linux_util.c optional compat_linux compat/linux/linux_vdso.c optional compat_linux compat/linux/linux.c optional compat_linux compat/ndis/kern_ndis.c optional ndisapi pci compat/ndis/kern_windrv.c optional ndisapi pci compat/ndis/subr_hal.c optional ndisapi pci compat/ndis/subr_ndis.c optional ndisapi pci compat/ndis/subr_ntoskrnl.c optional ndisapi pci compat/ndis/subr_pe.c optional ndisapi pci compat/ndis/subr_usbd.c optional ndisapi pci compat/ndis/winx32_wrap.S optional ndisapi pci bf_enc.o optional crypto | ipsec | ipsec_support \ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ no-implicit-rule crypto/aesni/aeskeys_i386.S optional aesni crypto/aesni/aesni.c optional aesni aesni_ghash.o optional aesni \ dependency "$S/crypto/aesni/aesni_ghash.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_ghash.o" aesni_wrap.o optional aesni \ dependency "$S/crypto/aesni/aesni_wrap.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" crypto/des/arch/i386/des_enc.S optional crypto | ipsec | ipsec_support | netsmb intel_sha1.o optional aesni \ dependency "$S/crypto/aesni/intel_sha1.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -msha ${.IMPSRC}" \ no-implicit-rule \ clean "intel_sha1.o" intel_sha256.o optional aesni \ dependency "$S/crypto/aesni/intel_sha256.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -msha ${.IMPSRC}" \ no-implicit-rule \ clean "intel_sha256.o" crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock crypto/via/padlock_hash.c optional padlock dev/acpica/acpi_pci.c optional acpi pci dev/acpica/acpi_pci_link.c optional acpi pci dev/acpica/acpi_pcib.c optional acpi pci dev/acpica/acpi_pcib_acpi.c optional acpi pci dev/acpica/acpi_pcib_pci.c optional acpi pci dev/agp/agp_ali.c optional agp dev/agp/agp_amd.c optional agp dev/agp/agp_amd64.c optional agp dev/agp/agp_ati.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_intel.c optional agp dev/agp/agp_nvidia.c optional agp dev/agp/agp_sis.c optional agp dev/agp/agp_via.c optional agp dev/amdsbwd/amdsbwd.c optional amdsbwd dev/amdsmn/amdsmn.c optional amdsmn | amdtemp dev/amdtemp/amdtemp.c optional amdtemp dev/arcmsr/arcmsr.c optional arcmsr pci dev/asmc/asmc.c optional asmc isa dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc dev/atkbdc/atkbdc.c optional atkbdc dev/atkbdc/atkbdc_isa.c optional atkbdc isa dev/atkbdc/atkbdc_subr.c optional atkbdc dev/atkbdc/psm.c optional psm atkbdc dev/bxe/bxe.c optional bxe pci dev/bxe/bxe_stats.c optional bxe pci dev/bxe/bxe_debug.c optional bxe pci dev/bxe/ecore_sp.c optional bxe pci dev/bxe/bxe_elink.c optional bxe pci dev/bxe/57710_init_values.c optional bxe pci dev/bxe/57711_init_values.c optional bxe pci dev/bxe/57712_init_values.c optional bxe pci dev/ce/ceddk.c optional ce dev/ce/if_ce.c optional ce dev/ce/tau32-ddk.c optional ce \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/coretemp/coretemp.c optional coretemp dev/cp/cpddk.c optional cp dev/cp/if_cp.c optional cp dev/cpuctl/cpuctl.c optional cpuctl dev/ctau/ctau.c optional ctau dev/ctau/ctddk.c optional ctau dev/ctau/if_ct.c optional ctau dev/cx/csigma.c optional cx dev/cx/cxddk.c optional cx dev/cx/if_cx.c optional cx dev/dpms/dpms.c optional dpms dev/ed/if_ed_3c503.c optional ed isa ed_3c503 dev/ed/if_ed_isa.c optional ed isa dev/ed/if_ed_wd80x3.c optional ed isa dev/ed/if_ed_hpp.c optional ed isa ed_hpp dev/ed/if_ed_sic.c optional ed isa ed_sic dev/ep/elink.c optional ep dev/fb/fb.c optional fb | vga dev/fb/s3_pci.c optional s3pci dev/fb/vesa.c optional vga vesa dev/fb/vga.c optional vga dev/fdc/fdc.c optional fdc dev/fdc/fdc_acpi.c optional fdc dev/fdc/fdc_isa.c optional fdc isa dev/fdc/fdc_pccard.c optional fdc pccard dev/fe/if_fe_isa.c optional fe isa dev/glxiic/glxiic.c optional glxiic dev/glxsb/glxsb.c optional glxsb dev/glxsb/glxsb_hash.c optional glxsb dev/gpio/bytgpio.c optional bytgpio dev/gpio/chvgpio.c optional chvgpio dev/hpt27xx/hpt27xx_os_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_osm_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_config.c optional hpt27xx dev/hptmv/entry.c optional hptmv dev/hptmv/mv.c optional hptmv dev/hptmv/gui_lib.c optional hptmv dev/hptmv/hptproc.c optional hptmv dev/hptmv/ioctl.c optional hptmv dev/hptnr/hptnr_os_bsd.c optional hptnr dev/hptnr/hptnr_osm_bsd.c optional hptnr dev/hptnr/hptnr_config.c optional hptnr dev/hptrr/hptrr_os_bsd.c optional hptrr dev/hptrr/hptrr_osm_bsd.c optional hptrr dev/hptrr/hptrr_config.c optional hptrr dev/hwpmc/hwpmc_amd.c optional hwpmc dev/hwpmc/hwpmc_intel.c optional hwpmc dev/hwpmc/hwpmc_core.c optional hwpmc dev/hwpmc/hwpmc_uncore.c optional hwpmc dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci dev/hyperv/netvsc/hn_nvs.c optional hyperv dev/hyperv/netvsc/hn_rndis.c optional hyperv dev/hyperv/netvsc/if_hn.c optional hyperv dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c optional hyperv dev/hyperv/utilities/hv_kvp.c optional hyperv dev/hyperv/utilities/hv_snapshot.c optional hyperv dev/hyperv/utilities/vmbus_heartbeat.c optional hyperv dev/hyperv/utilities/vmbus_ic.c optional hyperv dev/hyperv/utilities/vmbus_shutdown.c optional hyperv dev/hyperv/utilities/vmbus_timesync.c optional hyperv dev/hyperv/vmbus/hyperv.c optional hyperv dev/hyperv/vmbus/hyperv_busdma.c optional hyperv dev/hyperv/vmbus/vmbus.c optional hyperv pci dev/hyperv/vmbus/vmbus_br.c optional hyperv dev/hyperv/vmbus/vmbus_chan.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/vmbus_if.m optional hyperv dev/hyperv/vmbus/vmbus_res.c optional hyperv dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/i386/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/i386/vmbus_vector.S optional hyperv dev/ichwd/ichwd.c optional ichwd dev/if_ndis/if_ndis.c optional ndis dev/if_ndis/if_ndis_pccard.c optional ndis pccard dev/if_ndis/if_ndis_pci.c optional ndis cardbus | ndis pci dev/if_ndis/if_ndis_usb.c optional ndis usb dev/imcsmb/imcsmb.c optional imcsmb dev/imcsmb/imcsmb_pci.c optional imcsmb pci dev/intel/spi.c optional intelspi dev/io/iodev.c optional io dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_acpi.c optional ipmi acpi dev/ipmi/ipmi_isa.c optional ipmi isa dev/ipmi/ipmi_kcs.c optional ipmi dev/ipmi/ipmi_smic.c optional ipmi dev/ipmi/ipmi_smbus.c optional ipmi smbus dev/ipmi/ipmi_smbios.c optional ipmi dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux dev/le/if_le_isa.c optional le isa dev/nctgpio/nctgpio.c optional nctgpio dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb dev/ntb/ntb_transport.c optional ntb_transport | if_ntb dev/ntb/ntb.c optional ntb | ntb_transport | if_ntb | ntb_hw_intel | ntb_hw_plx | ntb_hw dev/ntb/ntb_if.m optional ntb | ntb_transport | if_ntb | ntb_hw_intel | ntb_hw_plx | ntb_hw dev/ntb/ntb_hw/ntb_hw_intel.c optional ntb_hw_intel | ntb_hw dev/ntb/ntb_hw/ntb_hw_plx.c optional ntb_hw_plx | ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nvme/nvme.c optional nvme dev/nvme/nvme_ctrlr.c optional nvme dev/nvme/nvme_ctrlr_cmd.c optional nvme dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme dev/nvram/nvram.c optional nvram isa dev/ofw/ofwpci.c optional fdt pci dev/pcf/pcf_isa.c optional pcf dev/random/ivy.c optional rdrand_rng dev/random/nehemiah.c optional padlock_rng dev/sbni/if_sbni.c optional sbni dev/sbni/if_sbni_isa.c optional sbni isa dev/sbni/if_sbni_pci.c optional sbni pci dev/sio/sio.c optional sio dev/sio/sio_isa.c optional sio isa dev/sio/sio_pccard.c optional sio pccard dev/sio/sio_pci.c optional sio pci dev/sio/sio_puc.c optional sio puc dev/speaker/spkr.c optional speaker dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/scterm-teken.c optional sc dev/syscons/scvesactl.c optional sc vga vesa dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvtb.c optional sc dev/tpm/tpm.c optional tpm dev/tpm/tpm_acpi.c optional tpm acpi dev/tpm/tpm_isa.c optional tpm isa dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmware/vmxnet3/if_vmx.c optional vmx dev/vmware/vmci/vmci.c optional vmci dev/vmware/vmci/vmci_datagram.c optional vmci dev/vmware/vmci/vmci_doorbell.c optional vmci dev/vmware/vmci/vmci_driver.c optional vmci dev/vmware/vmci/vmci_event.c optional vmci dev/vmware/vmci/vmci_hashtable.c optional vmci dev/vmware/vmci/vmci_kernel_if.c optional vmci dev/vmware/vmci/vmci_qpair.c optional vmci dev/vmware/vmci/vmci_queue_pair.c optional vmci dev/vmware/vmci/vmci_resource.c optional vmci dev/acpica/acpi_if.m standard dev/acpica/acpi_hpet.c optional acpi dev/acpica/acpi_timer.c optional acpi dev/acpica/acpi_pxm.c optional acpi dev/acpi_support/acpi_wmi_if.m standard dev/wbwd/wbwd.c optional wbwd dev/isci/isci.c optional isci dev/isci/isci_controller.c optional isci dev/isci/isci_domain.c optional isci dev/isci/isci_interrupt.c optional isci dev/isci/isci_io_request.c optional isci dev/isci/isci_logger.c optional isci dev/isci/isci_oem_parameters.c optional isci dev/isci/isci_remote_device.c optional isci dev/isci/isci_sysctl.c optional isci dev/isci/isci_task_request.c optional isci dev/isci/isci_timer.c optional isci dev/isci/scil/sati.c optional isci dev/isci/scil/sati_abort_task_set.c optional isci dev/isci/scil/sati_atapi.c optional isci dev/isci/scil/sati_device.c optional isci dev/isci/scil/sati_inquiry.c optional isci dev/isci/scil/sati_log_sense.c optional isci dev/isci/scil/sati_lun_reset.c optional isci dev/isci/scil/sati_mode_pages.c optional isci dev/isci/scil/sati_mode_select.c optional isci dev/isci/scil/sati_mode_sense.c optional isci dev/isci/scil/sati_mode_sense_10.c optional isci dev/isci/scil/sati_mode_sense_6.c optional isci dev/isci/scil/sati_move.c optional isci dev/isci/scil/sati_passthrough.c optional isci dev/isci/scil/sati_read.c optional isci dev/isci/scil/sati_read_buffer.c optional isci dev/isci/scil/sati_read_capacity.c optional isci dev/isci/scil/sati_reassign_blocks.c optional isci dev/isci/scil/sati_report_luns.c optional isci dev/isci/scil/sati_request_sense.c optional isci dev/isci/scil/sati_start_stop_unit.c optional isci dev/isci/scil/sati_synchronize_cache.c optional isci dev/isci/scil/sati_test_unit_ready.c optional isci dev/isci/scil/sati_unmap.c optional isci dev/isci/scil/sati_util.c optional isci dev/isci/scil/sati_verify.c optional isci dev/isci/scil/sati_write.c optional isci dev/isci/scil/sati_write_and_verify.c optional isci dev/isci/scil/sati_write_buffer.c optional isci dev/isci/scil/sati_write_long.c optional isci dev/isci/scil/sci_abstract_list.c optional isci dev/isci/scil/sci_base_controller.c optional isci dev/isci/scil/sci_base_domain.c optional isci dev/isci/scil/sci_base_iterator.c optional isci dev/isci/scil/sci_base_library.c optional isci dev/isci/scil/sci_base_logger.c optional isci dev/isci/scil/sci_base_memory_descriptor_list.c optional isci dev/isci/scil/sci_base_memory_descriptor_list_decorator.c optional isci dev/isci/scil/sci_base_object.c optional isci dev/isci/scil/sci_base_observer.c optional isci dev/isci/scil/sci_base_phy.c optional isci dev/isci/scil/sci_base_port.c optional isci dev/isci/scil/sci_base_remote_device.c optional isci dev/isci/scil/sci_base_request.c optional isci dev/isci/scil/sci_base_state_machine.c optional isci dev/isci/scil/sci_base_state_machine_logger.c optional isci dev/isci/scil/sci_base_state_machine_observer.c optional isci dev/isci/scil/sci_base_subject.c optional isci dev/isci/scil/sci_util.c optional isci dev/isci/scil/scic_sds_controller.c optional isci dev/isci/scil/scic_sds_library.c optional isci dev/isci/scil/scic_sds_pci.c optional isci dev/isci/scil/scic_sds_phy.c optional isci dev/isci/scil/scic_sds_port.c optional isci dev/isci/scil/scic_sds_port_configuration_agent.c optional isci dev/isci/scil/scic_sds_remote_device.c optional isci dev/isci/scil/scic_sds_remote_node_context.c optional isci dev/isci/scil/scic_sds_remote_node_table.c optional isci dev/isci/scil/scic_sds_request.c optional isci dev/isci/scil/scic_sds_sgpio.c optional isci dev/isci/scil/scic_sds_smp_remote_device.c optional isci dev/isci/scil/scic_sds_smp_request.c optional isci dev/isci/scil/scic_sds_ssp_request.c optional isci dev/isci/scil/scic_sds_stp_packet_request.c optional isci dev/isci/scil/scic_sds_stp_remote_device.c optional isci dev/isci/scil/scic_sds_stp_request.c optional isci dev/isci/scil/scic_sds_unsolicited_frame_control.c optional isci dev/isci/scil/scif_sas_controller.c optional isci dev/isci/scil/scif_sas_controller_state_handlers.c optional isci dev/isci/scil/scif_sas_controller_states.c optional isci dev/isci/scil/scif_sas_domain.c optional isci dev/isci/scil/scif_sas_domain_state_handlers.c optional isci dev/isci/scil/scif_sas_domain_states.c optional isci dev/isci/scil/scif_sas_high_priority_request_queue.c optional isci dev/isci/scil/scif_sas_internal_io_request.c optional isci dev/isci/scil/scif_sas_io_request.c optional isci dev/isci/scil/scif_sas_io_request_state_handlers.c optional isci dev/isci/scil/scif_sas_io_request_states.c optional isci dev/isci/scil/scif_sas_library.c optional isci dev/isci/scil/scif_sas_remote_device.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substates.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substates.c optional isci dev/isci/scil/scif_sas_remote_device_state_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_states.c optional isci dev/isci/scil/scif_sas_request.c optional isci dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c optional isci dev/isci/scil/scif_sas_smp_io_request.c optional isci dev/isci/scil/scif_sas_smp_phy.c optional isci dev/isci/scil/scif_sas_smp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_io_request.c optional isci dev/isci/scil/scif_sas_stp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_task_request.c optional isci dev/isci/scil/scif_sas_task_request.c optional isci dev/isci/scil/scif_sas_task_request_state_handlers.c optional isci dev/isci/scil/scif_sas_task_request_states.c optional isci dev/isci/scil/scif_sas_timer.c optional isci i386/acpica/acpi_machdep.c optional acpi acpi_wakecode.o optional acpi \ dependency "$S/i386/acpica/acpi_wakecode.S assym.inc" \ compile-with "${NORMAL_S}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.o" acpi_wakecode.bin optional acpi \ dependency "acpi_wakecode.o" \ compile-with "${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.bin" acpi_wakecode.h optional acpi \ dependency "acpi_wakecode.bin" \ compile-with "file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.h" acpi_wakedata.h optional acpi \ dependency "acpi_wakecode.o" \ compile-with '${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define $${what} 0x$${offset}"; done > ${.TARGET}' \ no-obj no-implicit-rule before-depend \ clean "acpi_wakedata.h" # i386/bios/apm.c optional apm i386/bios/smapi.c optional smapi i386/bios/smapi_bios.S optional smapi i386/cloudabi32/cloudabi32_sysvec.c optional compat_cloudabi32 #i386/i386/apic_vector.s optional apic i386/i386/bios.c standard i386/i386/bioscall.s standard i386/i386/bpf_jit_machdep.c optional bpf_jitter i386/i386/copyout.c standard i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb i386/i386/elan-mmcr.c optional cpu_elan | cpu_soekris i386/i386/elf_machdep.c standard i386/i386/exception.s standard i386/i386/gdb_machdep.c optional gdb i386/i386/geode.c optional cpu_geode i386/i386/in_cksum.c optional inet | inet6 i386/i386/initcpu.c standard i386/i386/io.c optional io i386/i386/k6_mem.c optional mem i386/i386/locore.s standard no-obj i386/i386/longrun.c optional cpu_enable_longrun i386/i386/machdep.c standard i386/i386/mem.c optional mem i386/i386/minidump_machdep.c standard +i386/i386/minidump_machdep_pae.c standard +i386/i386/minidump_machdep_nopae.c standard i386/i386/mp_clock.c optional smp i386/i386/mp_machdep.c optional smp i386/i386/mpboot.s optional smp i386/i386/npx.c standard i386/i386/perfmon.c optional perfmon -i386/i386/pmap.c standard +i386/i386/pmap_base.c standard +i386/i386/pmap_nopae.c standard +i386/i386/pmap_pae.c standard i386/i386/prof_machdep.c optional profiling-routine i386/i386/ptrace_machdep.c standard i386/i386/sigtramp.s standard i386/i386/support.s standard i386/i386/swtch.s standard i386/i386/sys_machdep.c standard i386/i386/trap.c standard i386/i386/uio_machdep.c standard i386/i386/vm86.c standard i386/i386/vm_machdep.c standard i386/linux/imgact_linux.c optional compat_linux i386/linux/linux_copyout.c optional compat_linux i386/linux/linux_dummy.c optional compat_linux i386/linux/linux_machdep.c optional compat_linux i386/linux/linux_ptrace.c optional compat_linux i386/linux/linux_sysent.c optional compat_linux i386/linux/linux_sysvec.c optional compat_linux i386/pci/pci_cfgreg.c optional pci i386/pci/pci_pir.c optional pci isa/syscons_isa.c optional sc isa/vga_isa.c optional vga kern/kern_clocksource.c standard kern/imgact_aout.c optional compat_aout kern/imgact_gzip.c optional gzip kern/subr_sfbuf.c standard libkern/divdi3.c standard libkern/ffsll.c standard libkern/flsll.c standard libkern/memcmp.c standard libkern/memset.c standard libkern/moddi3.c standard libkern/qdivrem.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard libkern/x86/crc32_sse42.c standard # # x86 real mode BIOS support, required by dpms/pci/vesa # compat/x86bios/x86bios.c optional x86bios | dpms | pci | vesa # # bvm console # dev/bvm/bvm_console.c optional bvmconsole dev/bvm/bvm_dbg.c optional bvmdebug # # x86 shared code between IA32 and AMD64 architectures # x86/acpica/OsdEnvironment.c optional acpi x86/acpica/acpi_apm.c optional acpi x86/acpica/acpi_wakeup.c optional acpi x86/acpica/madt.c optional acpi apic x86/acpica/srat.c optional acpi x86/bios/smbios.c optional smbios x86/bios/vpd.c optional vpd x86/cpufreq/est.c optional cpufreq x86/cpufreq/hwpstate.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/cpufreq/powernow.c optional cpufreq x86/cpufreq/smist.c optional cpufreq x86/iommu/busdma_dmar.c optional acpi acpi_dmar pci x86/iommu/intel_ctx.c optional acpi acpi_dmar pci x86/iommu/intel_drv.c optional acpi acpi_dmar pci x86/iommu/intel_fault.c optional acpi acpi_dmar pci x86/iommu/intel_gas.c optional acpi acpi_dmar pci x86/iommu/intel_idpgtbl.c optional acpi acpi_dmar pci x86/iommu/intel_intrmap.c optional acpi acpi_dmar pci x86/iommu/intel_qi.c optional acpi acpi_dmar pci x86/iommu/intel_quirks.c optional acpi acpi_dmar pci x86/iommu/intel_utils.c optional acpi acpi_dmar pci x86/isa/atpic.c optional atpic x86/isa/atrtc.c standard x86/isa/clock.c standard x86/isa/elcr.c optional atpic | apic x86/isa/isa.c optional isa x86/isa/isa_dma.c optional isa x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci x86/x86/autoconf.c standard x86/x86/bus_machdep.c standard x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard x86/x86/cpu_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt x86/x86/identcpu.c standard x86/x86/intr_machdep.c standard x86/x86/io_apic.c optional apic x86/x86/legacy.c standard x86/x86/local_apic.c optional apic x86/x86/mca.c standard x86/x86/x86_mem.c optional mem x86/x86/mptable.c optional apic x86/x86/mptable_pci.c optional apic pci x86/x86/mp_x86.c optional smp x86/x86/mp_watchdog.c optional mp_watchdog smp x86/x86/msi.c optional apic pci x86/x86/nexus.c standard x86/x86/stack_machdep.c optional ddb | stack x86/x86/tsc.c standard x86/x86/ucode.c standard x86/x86/pvclock.c standard x86/x86/delay.c standard x86/xen/hvm.c optional xenhvm x86/xen/xen_intr.c optional xenhvm x86/xen/xen_apic.c optional xenhvm x86/xen/xenpv.c optional xenhvm x86/xen/xen_nexus.c optional xenhvm x86/xen/xen_msi.c optional xenhvm Index: head/sys/conf/options.i386 =================================================================== --- head/sys/conf/options.i386 (revision 343566) +++ head/sys/conf/options.i386 (revision 343567) @@ -1,121 +1,116 @@ # $FreeBSD$ # Options specific to the i386 platform kernels AUTO_EOI_1 opt_auto_eoi.h AUTO_EOI_2 opt_auto_eoi.h BROKEN_KEYBOARD_RESET opt_reset.h COUNT_XINVLTLB_HITS opt_smp.h COUNT_IPIS opt_smp.h DISABLE_PG_G opt_pmap.h DISABLE_PSE opt_pmap.h I586_PMC_GUPROF opt_i586_guprof.h MAXMEM MPTABLE_FORCE_HTT MP_WATCHDOG NKPT opt_pmap.h PERFMON PMAP_SHPGPERPROC opt_pmap.h POWERFAIL_NMI opt_trap.h PV_STATS opt_pmap.h # Options for emulators. These should only be used at config time, so # they are handled like options for static filesystems # (see src/sys/conf/options), except for broken debugging options. COMPAT_AOUT opt_dontuse.h COMPAT_LINUX opt_dontuse.h LINPROCFS opt_dontuse.h LINSYSFS opt_dontuse.h NDISAPI opt_dontuse.h # Change KVM size. Changes things all over the kernel. KVA_PAGES opt_global.h # Physical address extensions and support for >4G ram. As above. PAE opt_global.h -# Use PAE page tables, but limit memory support to 4GB. -# This keeps the i386 non-PAE KBI, in particular, drivers see -# 32bit vm_paddr_t. -PAE_TABLES opt_global.h - TIMER_FREQ opt_clock.h CPU_ATHLON_SSE_HACK opt_cpu.h CPU_BLUELIGHTNING_3X opt_cpu.h CPU_BLUELIGHTNING_FPU_OP_CACHE opt_cpu.h CPU_BTB_EN opt_cpu.h CPU_CYRIX_NO_LOCK opt_cpu.h CPU_DIRECT_MAPPED_CACHE opt_cpu.h CPU_DISABLE_5X86_LSSER opt_cpu.h CPU_ELAN opt_cpu.h CPU_ELAN_PPS opt_cpu.h CPU_ELAN_XTAL opt_cpu.h CPU_ENABLE_LONGRUN opt_cpu.h CPU_FASTER_5X86_FPU opt_cpu.h CPU_GEODE opt_cpu.h CPU_I486_ON_386 opt_cpu.h CPU_IORT opt_cpu.h CPU_L2_LATENCY opt_cpu.h CPU_LOOP_EN opt_cpu.h CPU_PPRO2CELERON opt_cpu.h CPU_RSTK_EN opt_cpu.h CPU_SOEKRIS opt_cpu.h CPU_SUSP_HLT opt_cpu.h CPU_UPGRADE_HW_CACHE opt_cpu.h CPU_WT_ALLOC opt_cpu.h CYRIX_CACHE_REALLY_WORKS opt_cpu.h CYRIX_CACHE_WORKS opt_cpu.h NO_F00F_HACK opt_cpu.h NO_MEMORY_HOLE opt_cpu.h # The CPU type affects the endian conversion functions all over the kernel. I486_CPU opt_global.h I586_CPU opt_global.h I686_CPU opt_global.h # options for serial support COM_ESP opt_sio.h COM_MULTIPORT opt_sio.h CONSPEED opt_sio.h GDBSPEED opt_sio.h COM_NO_ACPI opt_sio.h VGA_ALT_SEQACCESS opt_vga.h VGA_DEBUG opt_vga.h VGA_NO_FONT_LOADING opt_vga.h VGA_NO_MODE_CHANGE opt_vga.h VGA_SLOW_IOACCESS opt_vga.h VGA_WIDTH90 opt_vga.h VESA VESA_DEBUG opt_vesa.h # AGP debugging support AGP_DEBUG opt_agp.h PSM_DEBUG opt_psm.h PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h ATKBD_DFLT_KEYMAP opt_atkbd.h # Video spigot SPIGOT_UNSECURE opt_spigot.h # Enables NETGRAPH support for Cronyx adapters NETGRAPH_CRONYX opt_ng_cronyx.h # Device options DEV_APIC opt_apic.h DEV_ATPIC opt_atpic.h # Debugging NPX_DEBUG opt_npx.h # BPF just-in-time compiler BPF_JITTER opt_bpf.h XENHVM opt_global.h # options for the Intel C600 SAS driver (isci) ISCI_LOGGING opt_isci.h Index: head/sys/dev/dcons/dcons_os.c =================================================================== --- head/sys/dev/dcons/dcons_os.c (revision 343566) +++ head/sys/dev/dcons/dcons_os.c (revision 343567) @@ -1,495 +1,495 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 2003,2004 * Hidetoshi Shimokawa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * * This product includes software developed by Hidetoshi Shimokawa. * * 4. Neither the name of the author nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_dcons.h" #include "opt_kdb.h" #include "opt_gdb.h" #include "opt_ddb.h" #ifndef DCONS_POLL_HZ #define DCONS_POLL_HZ 25 #endif #ifndef DCONS_POLL_IDLE #define DCONS_POLL_IDLE 256 #endif #ifndef DCONS_BUF_SIZE #define DCONS_BUF_SIZE (16*1024) #endif #ifndef DCONS_FORCE_CONSOLE #define DCONS_FORCE_CONSOLE 0 /* Mostly for FreeBSD-4/DragonFly */ #endif #ifndef KLD_MODULE static char bssbuf[DCONS_BUF_SIZE]; /* buf in bss */ #endif /* global data */ static struct dcons_global dg; struct dcons_global *dcons_conf; static int poll_hz = DCONS_POLL_HZ; static u_int poll_idle = DCONS_POLL_HZ * DCONS_POLL_IDLE; static struct dcons_softc sc[DCONS_NPORT]; static SYSCTL_NODE(_kern, OID_AUTO, dcons, CTLFLAG_RD, 0, "Dumb Console"); SYSCTL_INT(_kern_dcons, OID_AUTO, poll_hz, CTLFLAG_RW, &poll_hz, 0, "dcons polling rate"); static int drv_init = 0; static struct callout dcons_callout; struct dcons_buf *dcons_buf; /* for local dconschat */ static void dcons_timeout(void *); static int dcons_drv_init(int); static cn_probe_t dcons_cnprobe; static cn_init_t dcons_cninit; static cn_term_t dcons_cnterm; static cn_getc_t dcons_cngetc; static cn_putc_t dcons_cnputc; static cn_grab_t dcons_cngrab; static cn_ungrab_t dcons_cnungrab; CONSOLE_DRIVER(dcons); #if defined(GDB) static gdb_probe_f dcons_dbg_probe; static gdb_init_f dcons_dbg_init; static gdb_term_f dcons_dbg_term; static gdb_getc_f dcons_dbg_getc; static gdb_putc_f dcons_dbg_putc; GDB_DBGPORT(dcons, dcons_dbg_probe, dcons_dbg_init, dcons_dbg_term, dcons_dbg_getc, dcons_dbg_putc); extern struct gdb_dbgport *gdb_cur; #endif static tsw_outwakeup_t dcons_outwakeup; static struct ttydevsw dcons_ttydevsw = { .tsw_flags = TF_NOPREFIX, .tsw_outwakeup = dcons_outwakeup, }; #if (defined(GDB) || defined(DDB)) static int dcons_check_break(struct dcons_softc *dc, int c) { if (c < 0) return (c); #ifdef GDB if ((dc->flags & DC_GDB) != 0 && gdb_cur == &dcons_gdb_dbgport) kdb_alt_break_gdb(c, &dc->brk_state); else #endif kdb_alt_break(c, &dc->brk_state); return (c); } #else #define dcons_check_break(dc, c) (c) #endif static int dcons_os_checkc_nopoll(struct dcons_softc *dc) { int c; if (dg.dma_tag != NULL) bus_dmamap_sync(dg.dma_tag, dg.dma_map, BUS_DMASYNC_POSTREAD); c = dcons_check_break(dc, dcons_checkc(dc)); if (dg.dma_tag != NULL) bus_dmamap_sync(dg.dma_tag, dg.dma_map, BUS_DMASYNC_PREREAD); return (c); } static int dcons_os_checkc(struct dcons_softc *dc) { EVENTHANDLER_INVOKE(dcons_poll, 0); return (dcons_os_checkc_nopoll(dc)); } static void dcons_os_putc(struct dcons_softc *dc, int c) { if (dg.dma_tag != NULL) bus_dmamap_sync(dg.dma_tag, dg.dma_map, BUS_DMASYNC_POSTWRITE); dcons_putc(dc, c); if (dg.dma_tag != NULL) bus_dmamap_sync(dg.dma_tag, dg.dma_map, BUS_DMASYNC_PREWRITE); } static void dcons_outwakeup(struct tty *tp) { struct dcons_softc *dc; char ch; dc = tty_softc(tp); while (ttydisc_getc(tp, &ch, sizeof ch) != 0) dcons_os_putc(dc, ch); } static void dcons_timeout(void *v) { struct tty *tp; struct dcons_softc *dc; int i, c, polltime; for (i = 0; i < DCONS_NPORT; i ++) { dc = &sc[i]; tp = dc->tty; tty_lock(tp); while ((c = dcons_os_checkc_nopoll(dc)) != -1) { ttydisc_rint(tp, c, 0); poll_idle = 0; } ttydisc_rint_done(tp); tty_unlock(tp); } poll_idle++; polltime = hz; if (poll_idle <= (poll_hz * DCONS_POLL_IDLE)) polltime /= poll_hz; callout_reset(&dcons_callout, polltime, dcons_timeout, tp); } static void dcons_cnprobe(struct consdev *cp) { sprintf(cp->cn_name, "dcons"); #if DCONS_FORCE_CONSOLE cp->cn_pri = CN_REMOTE; #else cp->cn_pri = CN_NORMAL; #endif } static void dcons_cninit(struct consdev *cp) { dcons_drv_init(0); cp->cn_arg = (void *)&sc[DCONS_CON]; /* share port0 with unit0 */ } static void dcons_cnterm(struct consdev *cp) { } static void dcons_cngrab(struct consdev *cp) { } static void dcons_cnungrab(struct consdev *cp) { } static int dcons_cngetc(struct consdev *cp) { struct dcons_softc *dc = (struct dcons_softc *)cp->cn_arg; return (dcons_os_checkc(dc)); } static void dcons_cnputc(struct consdev *cp, int c) { struct dcons_softc *dc = (struct dcons_softc *)cp->cn_arg; dcons_os_putc(dc, c); } static int dcons_drv_init(int stage) { #if defined(__i386__) || defined(__amd64__) quad_t addr, size; #endif if (drv_init) return(drv_init); drv_init = -1; bzero(&dg, sizeof(dg)); dcons_conf = &dg; dg.cdev = &dcons_consdev; dg.buf = NULL; dg.size = DCONS_BUF_SIZE; #if defined(__i386__) || defined(__amd64__) if (getenv_quad("dcons.addr", &addr) > 0 && getenv_quad("dcons.size", &size) > 0) { #ifdef __i386__ vm_paddr_t pa; /* * Allow read/write access to dcons buffer. */ for (pa = trunc_page(addr); pa < addr + size; pa += PAGE_SIZE) - *vtopte(PMAP_MAP_LOW + pa) |= PG_RW; + pmap_ksetrw(PMAP_MAP_LOW + pa); invltlb(); #endif /* XXX P to V */ #ifdef __amd64__ dg.buf = (struct dcons_buf *)(vm_offset_t)(KERNBASE + addr); #else /* __i386__ */ dg.buf = (struct dcons_buf *)((vm_offset_t)PMAP_MAP_LOW + addr); #endif dg.size = size; if (dcons_load_buffer(dg.buf, dg.size, sc) < 0) dg.buf = NULL; } #endif if (dg.buf != NULL) goto ok; #ifndef KLD_MODULE if (stage == 0) { /* XXX or cold */ /* * DCONS_FORCE_CONSOLE == 1 and statically linked. * called from cninit(). can't use contigmalloc yet . */ dg.buf = (struct dcons_buf *) bssbuf; dcons_init(dg.buf, dg.size, sc); } else #endif { /* * DCONS_FORCE_CONSOLE == 0 or kernel module case. * if the module is loaded after boot, * bssbuf could be non-continuous. */ dg.buf = (struct dcons_buf *) contigmalloc(dg.size, M_DEVBUF, 0, 0x10000, 0xffffffff, PAGE_SIZE, 0ul); if (dg.buf == NULL) return (-1); dcons_init(dg.buf, dg.size, sc); } ok: dcons_buf = dg.buf; drv_init = 1; return 0; } static int dcons_attach_port(int port, char *name, int flags) { struct dcons_softc *dc; struct tty *tp; dc = &sc[port]; tp = tty_alloc(&dcons_ttydevsw, dc); dc->flags = flags; dc->tty = tp; tty_init_console(tp, 0); tty_makedev(tp, NULL, "%s", name); return(0); } static int dcons_attach(void) { int polltime; dcons_attach_port(DCONS_CON, "dcons", 0); dcons_attach_port(DCONS_GDB, "dgdb", DC_GDB); callout_init(&dcons_callout, 1); polltime = hz / poll_hz; callout_reset(&dcons_callout, polltime, dcons_timeout, NULL); return(0); } static int dcons_detach(int port) { struct tty *tp; struct dcons_softc *dc; dc = &sc[port]; tp = dc->tty; tty_lock(tp); tty_rel_gone(tp); return(0); } static int dcons_modevent(module_t mode, int type, void *data) { int err = 0, ret; switch (type) { case MOD_LOAD: ret = dcons_drv_init(1); if (ret != -1) dcons_attach(); if (ret == 0) { dcons_cnprobe(&dcons_consdev); dcons_cninit(&dcons_consdev); cnadd(&dcons_consdev); } break; case MOD_UNLOAD: printf("dcons: unload\n"); if (drv_init == 1) { callout_stop(&dcons_callout); cnremove(&dcons_consdev); dcons_detach(DCONS_CON); dcons_detach(DCONS_GDB); dg.buf->magic = 0; contigfree(dg.buf, DCONS_BUF_SIZE, M_DEVBUF); } break; case MOD_SHUTDOWN: #if 0 /* Keep connection after halt */ dg.buf->magic = 0; #endif break; default: err = EOPNOTSUPP; break; } return(err); } #if defined(GDB) /* Debugger interface */ static int dcons_os_getc(struct dcons_softc *dc) { int c; while ((c = dcons_os_checkc(dc)) == -1); return (c & 0xff); } static int dcons_dbg_probe(void) { int dcons_gdb; if (getenv_int("dcons_gdb", &dcons_gdb) == 0) return (-1); return (dcons_gdb); } static void dcons_dbg_init(void) { } static void dcons_dbg_term(void) { } static void dcons_dbg_putc(int c) { struct dcons_softc *dc = &sc[DCONS_GDB]; dcons_os_putc(dc, c); } static int dcons_dbg_getc(void) { struct dcons_softc *dc = &sc[DCONS_GDB]; return (dcons_os_getc(dc)); } #endif DEV_MODULE(dcons, dcons_modevent, NULL); MODULE_VERSION(dcons, DCONS_VERSION); Index: head/sys/dev/fb/fb.c =================================================================== --- head/sys/dev/fb/fb.c (revision 343566) +++ head/sys/dev/fb/fb.c (revision 343567) @@ -1,760 +1,760 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1999 Kazutaka YOKOTA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer as * the first lines of this file unmodified. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_fb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include SET_DECLARE(videodriver_set, const video_driver_t); /* local arrays */ /* * We need at least one entry each in order to initialize a video card * for the kernel console. The arrays will be increased dynamically * when necessary. */ static int vid_malloc; static int adapters = 1; static video_adapter_t *adp_ini; static video_adapter_t **adapter = &adp_ini; static video_switch_t *vidsw_ini; video_switch_t **vidsw = &vidsw_ini; #ifdef FB_INSTALL_CDEV static struct cdevsw *vidcdevsw_ini; static struct cdevsw **vidcdevsw = &vidcdevsw_ini; #endif #define ARRAY_DELTA 4 static int vid_realloc_array(void) { video_adapter_t **new_adp; video_switch_t **new_vidsw; #ifdef FB_INSTALL_CDEV struct cdevsw **new_cdevsw; #endif int newsize; int s; if (!vid_malloc) return ENOMEM; s = spltty(); newsize = rounddown(adapters + ARRAY_DELTA, ARRAY_DELTA); new_adp = malloc(sizeof(*new_adp)*newsize, M_DEVBUF, M_WAITOK | M_ZERO); new_vidsw = malloc(sizeof(*new_vidsw)*newsize, M_DEVBUF, M_WAITOK | M_ZERO); #ifdef FB_INSTALL_CDEV new_cdevsw = malloc(sizeof(*new_cdevsw)*newsize, M_DEVBUF, M_WAITOK | M_ZERO); #endif bcopy(adapter, new_adp, sizeof(*adapter)*adapters); bcopy(vidsw, new_vidsw, sizeof(*vidsw)*adapters); #ifdef FB_INSTALL_CDEV bcopy(vidcdevsw, new_cdevsw, sizeof(*vidcdevsw)*adapters); #endif if (adapters > 1) { free(adapter, M_DEVBUF); free(vidsw, M_DEVBUF); #ifdef FB_INSTALL_CDEV free(vidcdevsw, M_DEVBUF); #endif } adapter = new_adp; vidsw = new_vidsw; #ifdef FB_INSTALL_CDEV vidcdevsw = new_cdevsw; #endif adapters = newsize; splx(s); if (bootverbose) printf("fb: new array size %d\n", adapters); return 0; } static void vid_malloc_init(void *arg) { vid_malloc = TRUE; } SYSINIT(vid_mem, SI_SUB_KMEM, SI_ORDER_ANY, vid_malloc_init, NULL); /* * Low-level frame buffer driver functions * frame buffer subdrivers, such as the VGA driver, call these functions * to initialize the video_adapter structure and register it to the virtual * frame buffer driver `fb'. */ /* initialize the video_adapter_t structure */ void vid_init_struct(video_adapter_t *adp, char *name, int type, int unit) { adp->va_flags = 0; adp->va_name = name; adp->va_type = type; adp->va_unit = unit; } /* Register a video adapter */ int vid_register(video_adapter_t *adp) { const video_driver_t **list; const video_driver_t *p; int index; for (index = 0; index < adapters; ++index) { if (adapter[index] == NULL) break; } if (index >= adapters) { if (vid_realloc_array()) return -1; } adp->va_index = index; adp->va_token = NULL; SET_FOREACH(list, videodriver_set) { p = *list; if (strcmp(p->name, adp->va_name) == 0) { adapter[index] = adp; vidsw[index] = p->vidsw; return index; } } return -1; } int vid_unregister(video_adapter_t *adp) { if ((adp->va_index < 0) || (adp->va_index >= adapters)) return ENOENT; if (adapter[adp->va_index] != adp) return ENOENT; adapter[adp->va_index] = NULL; vidsw[adp->va_index] = NULL; return 0; } /* Get video I/O function table */ video_switch_t *vid_get_switch(char *name) { const video_driver_t **list; const video_driver_t *p; SET_FOREACH(list, videodriver_set) { p = *list; if (strcmp(p->name, name) == 0) return p->vidsw; } return NULL; } /* * Video card client functions * Video card clients, such as the console driver `syscons' and the frame * buffer cdev driver, use these functions to claim and release a card for * exclusive use. */ /* find the video card specified by a driver name and a unit number */ int vid_find_adapter(char *driver, int unit) { int i; for (i = 0; i < adapters; ++i) { if (adapter[i] == NULL) continue; if (strcmp("*", driver) && strcmp(adapter[i]->va_name, driver)) continue; if ((unit != -1) && (adapter[i]->va_unit != unit)) continue; return i; } return -1; } /* allocate a video card */ int vid_allocate(char *driver, int unit, void *id) { int index; int s; s = spltty(); index = vid_find_adapter(driver, unit); if (index >= 0) { if (adapter[index]->va_token) { splx(s); return -1; } adapter[index]->va_token = id; } splx(s); return index; } int vid_release(video_adapter_t *adp, void *id) { int error; int s; s = spltty(); if (adp->va_token == NULL) { error = EINVAL; } else if (adp->va_token != id) { error = EPERM; } else { adp->va_token = NULL; error = 0; } splx(s); return error; } /* Get a video adapter structure */ video_adapter_t *vid_get_adapter(int index) { if ((index < 0) || (index >= adapters)) return NULL; return adapter[index]; } /* Configure drivers: this is a backdoor for the console driver XXX */ int vid_configure(int flags) { const video_driver_t **list; const video_driver_t *p; SET_FOREACH(list, videodriver_set) { p = *list; if (p->configure != NULL) (*p->configure)(flags); } return 0; } /* * Virtual frame buffer cdev driver functions * The virtual frame buffer driver dispatches driver functions to * appropriate subdrivers. */ #define FB_DRIVER_NAME "fb" #ifdef FB_INSTALL_CDEV #if 0 /* experimental */ static devclass_t fb_devclass; static int fbprobe(device_t dev); static int fbattach(device_t dev); static device_method_t fb_methods[] = { DEVMETHOD(device_probe, fbprobe), DEVMETHOD(device_attach, fbattach), DEVMETHOD_END }; static driver_t fb_driver = { FB_DRIVER_NAME, fb_methods, 0, }; static int fbprobe(device_t dev) { int unit; unit = device_get_unit(dev); if (unit >= adapters) return ENXIO; if (adapter[unit] == NULL) return ENXIO; device_set_desc(dev, "generic frame buffer"); return 0; } static int fbattach(device_t dev) { printf("fbattach: about to attach children\n"); bus_generic_attach(dev); return 0; } #endif #define FB_UNIT(dev) dev2unit(dev) #define FB_MKMINOR(unit) (u) #if 0 /* experimental */ static d_open_t fbopen; static d_close_t fbclose; static d_read_t fbread; static d_write_t fbwrite; static d_ioctl_t fbioctl; static d_mmap_t fbmmap; static struct cdevsw fb_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT, .d_open = fbopen, .d_close = fbclose, .d_read = fbread, .d_write = fbwrite, .d_ioctl = fbioctl, .d_mmap = fbmmap, .d_name = FB_DRIVER_NAME, }; #endif static int fb_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: printf("fb module unload - not possible for this module type\n"); return EINVAL; default: return EOPNOTSUPP; } return 0; } static moduledata_t fb_mod = { "fb", fb_modevent, NULL }; DECLARE_MODULE(fb, fb_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); int fb_attach(int unit, video_adapter_t *adp, struct cdevsw *cdevsw) { int s; if (adp->va_index >= adapters) return EINVAL; if (adapter[adp->va_index] != adp) return EINVAL; s = spltty(); adp->va_minor = unit; vidcdevsw[adp->va_index] = cdevsw; splx(s); printf("fb%d at %s%d\n", adp->va_index, adp->va_name, adp->va_unit); return 0; } int fb_detach(int unit, video_adapter_t *adp, struct cdevsw *cdevsw) { int s; if (adp->va_index >= adapters) return EINVAL; if (adapter[adp->va_index] != adp) return EINVAL; if (vidcdevsw[adp->va_index] != cdevsw) return EINVAL; s = spltty(); vidcdevsw[adp->va_index] = NULL; splx(s); return 0; } /* * Generic frame buffer cdev driver functions * Frame buffer subdrivers may call these functions to implement common * driver functions. */ int genfbopen(genfb_softc_t *sc, video_adapter_t *adp, int flag, int mode, struct thread *td) { int s; s = spltty(); if (!(sc->gfb_flags & FB_OPEN)) sc->gfb_flags |= FB_OPEN; splx(s); return 0; } int genfbclose(genfb_softc_t *sc, video_adapter_t *adp, int flag, int mode, struct thread *td) { int s; s = spltty(); sc->gfb_flags &= ~FB_OPEN; splx(s); return 0; } int genfbread(genfb_softc_t *sc, video_adapter_t *adp, struct uio *uio, int flag) { int size; int offset; int error; int len; error = 0; size = adp->va_buffer_size/adp->va_info.vi_planes; while (uio->uio_resid > 0) { if (uio->uio_offset >= size) break; offset = uio->uio_offset%adp->va_window_size; len = imin(uio->uio_resid, size - uio->uio_offset); len = imin(len, adp->va_window_size - offset); if (len <= 0) break; vidd_set_win_org(adp, uio->uio_offset); error = uiomove((caddr_t)(adp->va_window + offset), len, uio); if (error) break; } return error; } int genfbwrite(genfb_softc_t *sc, video_adapter_t *adp, struct uio *uio, int flag) { return ENODEV; } int genfbioctl(genfb_softc_t *sc, video_adapter_t *adp, u_long cmd, caddr_t arg, int flag, struct thread *td) { int error; if (adp == NULL) /* XXX */ return ENXIO; error = vidd_ioctl(adp, cmd, arg); if (error == ENOIOCTL) error = ENODEV; return error; } int genfbmmap(genfb_softc_t *sc, video_adapter_t *adp, vm_ooffset_t offset, - vm_offset_t *paddr, int prot, vm_memattr_t *memattr) + vm_paddr_t *paddr, int prot, vm_memattr_t *memattr) { return vidd_mmap(adp, offset, paddr, prot, memattr); } #endif /* FB_INSTALL_CDEV */ static char *adapter_name(int type) { static struct { int type; char *name; } names[] = { { KD_MONO, "MDA" }, { KD_HERCULES, "Hercules" }, { KD_CGA, "CGA" }, { KD_EGA, "EGA" }, { KD_VGA, "VGA" }, { KD_TGA, "TGA" }, { -1, "Unknown" }, }; int i; for (i = 0; names[i].type != -1; ++i) if (names[i].type == type) break; return names[i].name; } /* * Generic low-level frame buffer functions * The low-level functions in the frame buffer subdriver may use these * functions. */ void fb_dump_adp_info(char *driver, video_adapter_t *adp, int level) { if (level <= 0) return; printf("%s%d: %s%d, %s, type:%s (%d), flags:0x%x\n", FB_DRIVER_NAME, adp->va_index, driver, adp->va_unit, adp->va_name, adapter_name(adp->va_type), adp->va_type, adp->va_flags); printf("%s%d: port:0x%lx-0x%lx, crtc:0x%lx, mem:0x%lx 0x%x\n", FB_DRIVER_NAME, adp->va_index, (u_long)adp->va_io_base, (u_long)adp->va_io_base + adp->va_io_size - 1, (u_long)adp->va_crtc_addr, (u_long)adp->va_mem_base, adp->va_mem_size); printf("%s%d: init mode:%d, bios mode:%d, current mode:%d\n", FB_DRIVER_NAME, adp->va_index, adp->va_initial_mode, adp->va_initial_bios_mode, adp->va_mode); printf("%s%d: window:%p size:%dk gran:%dk, buf:%p size:%dk\n", FB_DRIVER_NAME, adp->va_index, (void *)adp->va_window, (int)adp->va_window_size/1024, (int)adp->va_window_gran/1024, (void *)adp->va_buffer, (int)adp->va_buffer_size/1024); } void fb_dump_mode_info(char *driver, video_adapter_t *adp, video_info_t *info, int level) { if (level <= 0) return; printf("%s%d: %s, mode:%d, flags:0x%x ", driver, adp->va_unit, adp->va_name, info->vi_mode, info->vi_flags); if (info->vi_flags & V_INFO_GRAPHICS) printf("G %dx%dx%d, %d plane(s), font:%dx%d, ", info->vi_width, info->vi_height, info->vi_depth, info->vi_planes, info->vi_cwidth, info->vi_cheight); else printf("T %dx%d, font:%dx%d, ", info->vi_width, info->vi_height, info->vi_cwidth, info->vi_cheight); printf("win:0x%lx\n", (u_long)info->vi_window); } int fb_type(int adp_type) { static struct { int fb_type; int va_type; } types[] = { { FBTYPE_MDA, KD_MONO }, { FBTYPE_HERCULES, KD_HERCULES }, { FBTYPE_CGA, KD_CGA }, { FBTYPE_EGA, KD_EGA }, { FBTYPE_VGA, KD_VGA }, { FBTYPE_TGA, KD_TGA }, }; int i; for (i = 0; i < nitems(types); ++i) { if (types[i].va_type == adp_type) return types[i].fb_type; } return -1; } int fb_commonioctl(video_adapter_t *adp, u_long cmd, caddr_t arg) { int error; int s; /* assert(adp != NULL) */ error = 0; s = spltty(); switch (cmd) { case FBIO_ADAPTER: /* get video adapter index */ *(int *)arg = adp->va_index; break; case FBIO_ADPTYPE: /* get video adapter type */ *(int *)arg = adp->va_type; break; case FBIO_ADPINFO: /* get video adapter info */ ((video_adapter_info_t *)arg)->va_index = adp->va_index; ((video_adapter_info_t *)arg)->va_type = adp->va_type; bcopy(adp->va_name, ((video_adapter_info_t *)arg)->va_name, imin(strlen(adp->va_name) + 1, sizeof(((video_adapter_info_t *)arg)->va_name))); ((video_adapter_info_t *)arg)->va_unit = adp->va_unit; ((video_adapter_info_t *)arg)->va_flags = adp->va_flags; ((video_adapter_info_t *)arg)->va_io_base = adp->va_io_base; ((video_adapter_info_t *)arg)->va_io_size = adp->va_io_size; ((video_adapter_info_t *)arg)->va_crtc_addr = adp->va_crtc_addr; ((video_adapter_info_t *)arg)->va_mem_base = adp->va_mem_base; ((video_adapter_info_t *)arg)->va_mem_size = adp->va_mem_size; ((video_adapter_info_t *)arg)->va_window #if defined(__amd64__) || defined(__i386__) = vtophys(adp->va_window); #else = adp->va_window; #endif ((video_adapter_info_t *)arg)->va_window_size = adp->va_window_size; ((video_adapter_info_t *)arg)->va_window_gran = adp->va_window_gran; ((video_adapter_info_t *)arg)->va_window_orig = adp->va_window_orig; ((video_adapter_info_t *)arg)->va_unused0 #if defined(__amd64__) || defined(__i386__) = adp->va_buffer != 0 ? vtophys(adp->va_buffer) : 0; #else = adp->va_buffer; #endif ((video_adapter_info_t *)arg)->va_buffer_size = adp->va_buffer_size; ((video_adapter_info_t *)arg)->va_mode = adp->va_mode; ((video_adapter_info_t *)arg)->va_initial_mode = adp->va_initial_mode; ((video_adapter_info_t *)arg)->va_initial_bios_mode = adp->va_initial_bios_mode; ((video_adapter_info_t *)arg)->va_line_width = adp->va_line_width; ((video_adapter_info_t *)arg)->va_disp_start.x = adp->va_disp_start.x; ((video_adapter_info_t *)arg)->va_disp_start.y = adp->va_disp_start.y; break; case FBIO_MODEINFO: /* get mode information */ error = vidd_get_info(adp, ((video_info_t *)arg)->vi_mode, (video_info_t *)arg); if (error) error = ENODEV; break; case FBIO_FINDMODE: /* find a matching video mode */ error = vidd_query_mode(adp, (video_info_t *)arg); break; case FBIO_GETMODE: /* get video mode */ *(int *)arg = adp->va_mode; break; case FBIO_SETMODE: /* set video mode */ error = vidd_set_mode(adp, *(int *)arg); if (error) error = ENODEV; /* EINVAL? */ break; case FBIO_GETWINORG: /* get frame buffer window origin */ *(u_int *)arg = adp->va_window_orig; break; case FBIO_GETDISPSTART: /* get display start address */ ((video_display_start_t *)arg)->x = adp->va_disp_start.x; ((video_display_start_t *)arg)->y = adp->va_disp_start.y; break; case FBIO_GETLINEWIDTH: /* get scan line width in bytes */ *(u_int *)arg = adp->va_line_width; break; case FBIO_BLANK: /* blank display */ error = vidd_blank_display(adp, *(int *)arg); break; case FBIO_GETPALETTE: /* get color palette */ case FBIO_SETPALETTE: /* set color palette */ /* XXX */ case FBIOPUTCMAP: case FBIOGETCMAP: case FBIOPUTCMAPI: case FBIOGETCMAPI: /* XXX */ case FBIO_SETWINORG: /* set frame buffer window origin */ case FBIO_SETDISPSTART: /* set display start address */ case FBIO_SETLINEWIDTH: /* set scan line width in pixel */ case FBIOGTYPE: case FBIOGATTR: case FBIOSVIDEO: case FBIOGVIDEO: case FBIOVERTICAL: case FBIOSCURSOR: case FBIOGCURSOR: case FBIOSCURPOS: case FBIOGCURPOS: case FBIOGCURMAX: case FBIOMONINFO: case FBIOGXINFO: default: error = ENODEV; break; } splx(s); return error; } Index: head/sys/dev/fb/fbreg.h =================================================================== --- head/sys/dev/fb/fbreg.h (revision 343566) +++ head/sys/dev/fb/fbreg.h (revision 343567) @@ -1,345 +1,345 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 Kazutaka YOKOTA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer as * the first lines of this file unmodified. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _DEV_FB_FBREG_H_ #define _DEV_FB_FBREG_H_ #ifdef _KERNEL #define V_MAX_ADAPTERS 8 /* XXX */ /* some macros */ #if defined(__amd64__) || defined(__i386__) static __inline void copyw(uint16_t *src, uint16_t *dst, size_t size) { size >>= 1; while (size--) *dst++ = *src++; } #define bcopy_io(s, d, c) copyw((void*)(s), (void*)(d), (c)) #define bcopy_toio(s, d, c) copyw((void*)(s), (void*)(d), (c)) #define bcopy_fromio(s, d, c) copyw((void*)(s), (void*)(d), (c)) #define bzero_io(d, c) bzero((void *)(d), (c)) #define fill_io(p, d, c) fill((p), (void *)(d), (c)) #define fillw_io(p, d, c) fillw((p), (void *)(d), (c)) #elif defined(__sparc64__) static __inline void fillw(int val, uint16_t *buf, size_t size) { while (size--) *buf++ = val; } #elif defined(__powerpc__) #define bcopy_io(s, d, c) ofwfb_bcopy((void *)(s), (void *)(d), (c)) #define bcopy_toio(s, d, c) ofwfb_bcopy((void *)(s), (void *)(d), (c)) #define bcopy_fromio(s, d, c) ofwfb_bcopy((void *)(s), (void *)(d), (c)) #define bzero_io(d, c) ofwfb_bzero((void *)(d), (c)) #define fillw(p, d, c) ofwfb_fillw((p), (void *)(d), (c)) #define fillw_io(p, d, c) ofwfb_fillw((p), (void *)(d), (c)) #define readw(a) ofwfb_readw((u_int16_t *)(a)) #define writew(a, v) ofwfb_writew((u_int16_t *)(a), (v)) void ofwfb_bcopy(const void *s, void *d, size_t c); void ofwfb_bzero(void *d, size_t c); void ofwfb_fillw(int pat, void *base, size_t cnt); u_int16_t ofwfb_readw(u_int16_t *addr); void ofwfb_writew(u_int16_t *addr, u_int16_t val); #elif defined(__mips__) || defined(__arm__) /* * Use amd64/i386-like settings under the assumption that MIPS-based display * drivers will have to add a level of indirection between a syscons-managed * frame buffer and the actual video hardware. We are forced to do this * because syscons doesn't carry around required busspace handles and tags to * use here. This is only really a problem for true VGA devices hooked up to * MIPS, as others will be performing a translation anyway. */ #define bcopy_io(s, d, c) memcpy((void *)(d), (void *)(s), (c)) #define bcopy_toio(s, d, c) memcpy((void *)(d), (void *)(s), (c)) #define bcopy_fromio(s, d, c) memcpy((void *)(d), (void *)(s), (c)) #define bzero_io(d, c) memset((void *)(d), 0, (c)) #define fill_io(p, d, c) memset((void *)(d), (p), (c)) static __inline void fillw(int val, uint16_t *buf, size_t size) { while (size--) *buf++ = val; } #define fillw_io(p, d, c) fillw((p), (void *)(d), (c)) #if defined(__arm__) #define readw(a) (*(uint16_t*)(a)) #define writew(a, v) (*(uint16_t*)(a) = (v)) #endif #else /* !__i386__ && !__amd64__ && !__sparc64__ && !__powerpc__ */ #define bcopy_io(s, d, c) memcpy_io((d), (s), (c)) #define bcopy_toio(s, d, c) memcpy_toio((d), (void *)(s), (c)) #define bcopy_fromio(s, d, c) memcpy_fromio((void *)(d), (s), (c)) #define bzero_io(d, c) memset_io((d), 0, (c)) #define fill_io(p, d, c) memset_io((d), (p), (c)) #define fillw(p, d, c) memsetw((d), (p), (c)) #define fillw_io(p, d, c) memsetw_io((d), (p), (c)) #endif /* !__i386__ */ /* video function table */ typedef int vi_probe_t(int unit, video_adapter_t **adpp, void *arg, int flags); typedef int vi_init_t(int unit, video_adapter_t *adp, int flags); typedef int vi_get_info_t(video_adapter_t *adp, int mode, video_info_t *info); typedef int vi_query_mode_t(video_adapter_t *adp, video_info_t *info); typedef int vi_set_mode_t(video_adapter_t *adp, int mode); typedef int vi_save_font_t(video_adapter_t *adp, int page, int size, int width, u_char *data, int c, int count); typedef int vi_load_font_t(video_adapter_t *adp, int page, int size, int width, u_char *data, int c, int count); typedef int vi_show_font_t(video_adapter_t *adp, int page); typedef int vi_save_palette_t(video_adapter_t *adp, u_char *palette); typedef int vi_load_palette_t(video_adapter_t *adp, u_char *palette); typedef int vi_set_border_t(video_adapter_t *adp, int border); typedef int vi_save_state_t(video_adapter_t *adp, void *p, size_t size); typedef int vi_load_state_t(video_adapter_t *adp, void *p); typedef int vi_set_win_org_t(video_adapter_t *adp, off_t offset); typedef int vi_read_hw_cursor_t(video_adapter_t *adp, int *col, int *row); typedef int vi_set_hw_cursor_t(video_adapter_t *adp, int col, int row); typedef int vi_set_hw_cursor_shape_t(video_adapter_t *adp, int base, int height, int celsize, int blink); typedef int vi_blank_display_t(video_adapter_t *adp, int mode); /* defined in sys/fbio.h #define V_DISPLAY_ON 0 #define V_DISPLAY_BLANK 1 #define V_DISPLAY_STAND_BY 2 #define V_DISPLAY_SUSPEND 3 */ typedef int vi_mmap_t(video_adapter_t *adp, vm_ooffset_t offset, vm_paddr_t *paddr, int prot, vm_memattr_t *memattr); typedef int vi_ioctl_t(video_adapter_t *adp, u_long cmd, caddr_t data); typedef int vi_clear_t(video_adapter_t *adp); typedef int vi_fill_rect_t(video_adapter_t *adp, int val, int x, int y, int cx, int cy); typedef int vi_bitblt_t(video_adapter_t *adp, ...); typedef int vi_diag_t(video_adapter_t *adp, int level); typedef int vi_save_cursor_palette_t(video_adapter_t *adp, u_char *palette); typedef int vi_load_cursor_palette_t(video_adapter_t *adp, u_char *palette); typedef int vi_copy_t(video_adapter_t *adp, vm_offset_t src, vm_offset_t dst, int n); typedef int vi_putp_t(video_adapter_t *adp, vm_offset_t off, u_int32_t p, u_int32_t a, int size, int bpp, int bit_ltor, int byte_ltor); typedef int vi_putc_t(video_adapter_t *adp, vm_offset_t off, u_int8_t c, u_int8_t a); typedef int vi_puts_t(video_adapter_t *adp, vm_offset_t off, u_int16_t *s, int len); typedef int vi_putm_t(video_adapter_t *adp, int x, int y, u_int8_t *pixel_image, u_int32_t pixel_mask, int size, int width); typedef struct video_switch { vi_probe_t *probe; vi_init_t *init; vi_get_info_t *get_info; vi_query_mode_t *query_mode; vi_set_mode_t *set_mode; vi_save_font_t *save_font; vi_load_font_t *load_font; vi_show_font_t *show_font; vi_save_palette_t *save_palette; vi_load_palette_t *load_palette; vi_set_border_t *set_border; vi_save_state_t *save_state; vi_load_state_t *load_state; vi_set_win_org_t *set_win_org; vi_read_hw_cursor_t *read_hw_cursor; vi_set_hw_cursor_t *set_hw_cursor; vi_set_hw_cursor_shape_t *set_hw_cursor_shape; vi_blank_display_t *blank_display; vi_mmap_t *mmap; vi_ioctl_t *ioctl; vi_clear_t *clear; vi_fill_rect_t *fill_rect; vi_bitblt_t *bitblt; int (*reserved1)(void); int (*reserved2)(void); vi_diag_t *diag; vi_save_cursor_palette_t *save_cursor_palette; vi_load_cursor_palette_t *load_cursor_palette; vi_copy_t *copy; vi_putp_t *putp; vi_putc_t *putc; vi_puts_t *puts; vi_putm_t *putm; } video_switch_t; #define vidd_probe(unit, adpp, arg, flags) \ (*vidsw[(adp)->va_index]->probe)((unit), (adpp), (arg), (flags)) #define vidd_init(unit, adp, flags) \ (*vidsw[(adp)->va_index]->init)((unit), (adp), (flags)) #define vidd_get_info(adp, mode, info) \ (*vidsw[(adp)->va_index]->get_info)((adp), (mode), (info)) #define vidd_query_mode(adp, mode) \ (*vidsw[(adp)->va_index]->query_mode)((adp), (mode)) #define vidd_set_mode(adp, mode) \ (*vidsw[(adp)->va_index]->set_mode)((adp), (mode)) #define vidd_save_font(adp, page, size, width, data, c, count) \ (*vidsw[(adp)->va_index]->save_font)((adp), (page), (size), \ (width), (data), (c), (count)) #define vidd_load_font(adp, page, size, width, data, c, count) \ (*vidsw[(adp)->va_index]->load_font)((adp), (page), (size), \ (width), (data), (c), (count)) #define vidd_show_font(adp, page) \ (*vidsw[(adp)->va_index]->show_font)((adp), (page)) #define vidd_save_palette(adp, pallete) \ (*vidsw[(adp)->va_index]->save_palette)((adp), (pallete)) #define vidd_load_palette(adp, pallete) \ (*vidsw[(adp)->va_index]->load_palette)((adp), (pallete)) #define vidd_set_border(adp, border) \ (*vidsw[(adp)->va_index]->set_border)((adp), (border)) #define vidd_save_state(adp, p, size) \ (*vidsw[(adp)->va_index]->save_state)((adp), (p), (size)) #define vidd_load_state(adp, p) \ (*vidsw[(adp)->va_index]->load_state)((adp), (p)) #define vidd_set_win_org(adp, offset) \ (*vidsw[(adp)->va_index]->set_win_org)((adp), (offset)) #define vidd_read_hw_cursor(adp, col, row) \ (*vidsw[(adp)->va_index]->read_hw_cursor)((adp), (col), (row)) #define vidd_set_hw_cursor(adp, col, row) \ (*vidsw[(adp)->va_index]->set_hw_cursor)((adp), (col), (row)) #define vidd_set_hw_cursor_shape(adp, base, height, celsize, blink) \ (*vidsw[(adp)->va_index]->set_hw_cursor_shape)((adp), (base), \ (height), (celsize), (blink)) #define vidd_blank_display(adp, mode) \ (*vidsw[(adp)->va_index]->blank_display)((adp), (mode)) #define vidd_mmap(adp, offset, paddr, prot, memattr) \ (*vidsw[(adp)->va_index]->mmap)((adp), (offset), (paddr), \ (prot), (memattr)) #define vidd_ioctl(adp, cmd, data) \ (*vidsw[(adp)->va_index]->ioctl)((adp), (cmd), (data)) #define vidd_clear(adp) \ (*vidsw[(adp)->va_index]->clear)((adp)) #define vidd_fill_rect(adp, val, x, y, cx, cy) \ (*vidsw[(adp)->va_index]->fill_rect)((adp), (val), (x), (y), \ (cx), (cy)) #define vidd_bitblt(adp, ...) \ (*vidsw[(adp)->va_index]->bitblt)(adp, __VA_ARGS__) #define vidd_diag(adp, level) \ (*vidsw[(adp)->va_index]->diag)((adp), (level)) #define vidd_save_cursor_palette(adp, palette) \ (*vidsw[(adp)->va_index]->save_cursor_palette)((adp), (palette)) #define vidd_load_cursor_palette(adp, palette) \ (*vidsw[(adp)->va_index]->load_cursor_palette)((adp), (palette)) #define vidd_copy(adp, src, dst, n) \ (*vidsw[(adp)->va_index]->copy)((adp), (src), (dst), (n)) #define vidd_putp(adp, offset, p, a, size, bpp, bit_ltor1, byte_ltor2) \ (*vidsw[(adp)->va_index]->putp)((adp), (offset), (p), (a), \ (size), (bpp), (bit_ltor1), (bit_ltor2)) #define vidd_putc(adp, offset, c, a) \ (*vidsw[(adp)->va_index]->putc)((adp), (offset), (c), (a)) #define vidd_puts(adp, offset, s, len) \ (*vidsw[(adp)->va_index]->puts)((adp), (offset), (s), (len)) #define vidd_putm(adp, x, y, pixel_image, pixel_mask, size, width) \ (*vidsw[(adp)->va_index]->putm)((adp), (x), (y), (pixel_image), \ (pixel_mask), (size), (width)) /* video driver */ typedef struct video_driver { char *name; video_switch_t *vidsw; int (*configure)(int); /* backdoor for the console driver */ } video_driver_t; #define VIDEO_DRIVER(name, sw, config) \ static struct video_driver name##_driver = { \ #name, &sw, config \ }; \ DATA_SET(videodriver_set, name##_driver); /* global variables */ extern struct video_switch **vidsw; /* functions for the video card driver */ int vid_register(video_adapter_t *adp); int vid_unregister(video_adapter_t *adp); video_switch_t *vid_get_switch(char *name); void vid_init_struct(video_adapter_t *adp, char *name, int type, int unit); /* functions for the video card client */ int vid_allocate(char *driver, int unit, void *id); int vid_release(video_adapter_t *adp, void *id); int vid_find_adapter(char *driver, int unit); video_adapter_t *vid_get_adapter(int index); /* a backdoor for the console driver to tickle the video driver XXX */ int vid_configure(int flags); #define VIO_PROBE_ONLY (1 << 0) /* probe only, don't initialize */ #ifdef FB_INSTALL_CDEV /* virtual frame buffer driver functions */ int fb_attach(int unit, video_adapter_t *adp, struct cdevsw *cdevsw); int fb_detach(int unit, video_adapter_t *adp, struct cdevsw *cdevsw); /* generic frame buffer cdev driver functions */ typedef struct genfb_softc { int gfb_flags; /* flag/status bits */ #define FB_OPEN (1 << 0) } genfb_softc_t; int genfbopen(genfb_softc_t *sc, video_adapter_t *adp, int flag, int mode, struct thread *td); int genfbclose(genfb_softc_t *sc, video_adapter_t *adp, int flag, int mode, struct thread *td); int genfbread(genfb_softc_t *sc, video_adapter_t *adp, struct uio *uio, int flag); int genfbwrite(genfb_softc_t *sc, video_adapter_t *adp, struct uio *uio, int flag); int genfbioctl(genfb_softc_t *sc, video_adapter_t *adp, u_long cmd, caddr_t arg, int flag, struct thread *td); int genfbmmap(genfb_softc_t *sc, video_adapter_t *adp, - vm_ooffset_t offset, vm_offset_t *paddr, + vm_ooffset_t offset, vm_paddr_t *paddr, int prot, vm_memattr_t *memattr); #endif /* FB_INSTALL_CDEV */ /* generic low-level driver functions */ void fb_dump_adp_info(char *driver, video_adapter_t *adp, int level); void fb_dump_mode_info(char *driver, video_adapter_t *adp, video_info_t *info, int level); int fb_type(int adp_type); int fb_commonioctl(video_adapter_t *adp, u_long cmd, caddr_t arg); #endif /* _KERNEL */ #endif /* !_DEV_FB_FBREG_H_ */ Index: head/sys/dev/fb/vga.c =================================================================== --- head/sys/dev/fb/vga.c (revision 343566) +++ head/sys/dev/fb/vga.c (revision 343567) @@ -1,3119 +1,3119 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1999 Kazutaka YOKOTA * Copyright (c) 1992-1998 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer as * the first lines of this file unmodified. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_vga.h" #include "opt_fb.h" #ifndef FB_DEBUG #define FB_DEBUG 0 #endif #include "opt_syscons.h" /* should be removed in the future, XXX */ #include #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) || defined(__amd64__) #include #endif #include #include #include #include #ifndef VGA_DEBUG #define VGA_DEBUG 0 #endif /* XXX machine/pc/bios.h has got too much i386-specific stuff in it */ #ifndef BIOS_PADDRTOVADDR #define BIOS_PADDRTOVADDR(x) (x) #endif int vga_probe_unit(int unit, video_adapter_t *buf, int flags) { video_adapter_t *adp; video_switch_t *sw; int error; sw = vid_get_switch(VGA_DRIVER_NAME); if (sw == NULL) return 0; error = (*sw->probe)(unit, &adp, NULL, flags); if (error) return error; bcopy(adp, buf, sizeof(*buf)); return 0; } int vga_attach_unit(int unit, vga_softc_t *sc, int flags) { video_switch_t *sw; int error; sw = vid_get_switch(VGA_DRIVER_NAME); if (sw == NULL) return ENXIO; error = (*sw->probe)(unit, &sc->adp, NULL, flags); if (error) return error; return (*sw->init)(unit, sc->adp, flags); } /* cdev driver functions */ #ifdef FB_INSTALL_CDEV int vga_open(struct cdev *dev, vga_softc_t *sc, int flag, int mode, struct thread *td) { if (sc == NULL) return ENXIO; if (mode & (O_CREAT | O_APPEND | O_TRUNC)) return ENODEV; return genfbopen(&sc->gensc, sc->adp, flag, mode, td); } int vga_close(struct cdev *dev, vga_softc_t *sc, int flag, int mode, struct thread *td) { return genfbclose(&sc->gensc, sc->adp, flag, mode, td); } int vga_read(struct cdev *dev, vga_softc_t *sc, struct uio *uio, int flag) { return genfbread(&sc->gensc, sc->adp, uio, flag); } int vga_write(struct cdev *dev, vga_softc_t *sc, struct uio *uio, int flag) { return genfbread(&sc->gensc, sc->adp, uio, flag); } int vga_ioctl(struct cdev *dev, vga_softc_t *sc, u_long cmd, caddr_t arg, int flag, struct thread *td) { return genfbioctl(&sc->gensc, sc->adp, cmd, arg, flag, td); } int vga_mmap(struct cdev *dev, vga_softc_t *sc, vm_ooffset_t offset, - vm_offset_t *paddr, int prot, vm_memattr_t *memattr) + vm_paddr_t *paddr, int prot, vm_memattr_t *memattr) { return genfbmmap(&sc->gensc, sc->adp, offset, paddr, prot, memattr); } #endif /* FB_INSTALL_CDEV */ /* LOW-LEVEL */ #include #ifdef __i386__ #include #endif #define probe_done(adp) ((adp)->va_flags & V_ADP_PROBED) #define init_done(adp) ((adp)->va_flags & V_ADP_INITIALIZED) #define config_done(adp) ((adp)->va_flags & V_ADP_REGISTERED) /* for compatibility with old kernel options */ #ifdef SC_ALT_SEQACCESS #undef SC_ALT_SEQACCESS #undef VGA_ALT_SEQACCESS #define VGA_ALT_SEQACCESS 1 #endif #ifdef SLOW_VGA #undef SLOW_VGA #undef VGA_SLOW_IOACCESS #define VGA_SLOW_IOACCESS #endif /* architecture dependent option */ #if !defined(__i386__) && !defined(__amd64__) #define VGA_NO_BIOS 1 #endif /* this should really be in `rtc.h' */ #define RTC_EQUIPMENT 0x14 /* various sizes */ #define V_MODE_MAP_SIZE (M_VGA_CG320 + 1) #define V_MODE_PARAM_SIZE 64 /* video adapter state buffer */ struct adp_state { int sig; #define V_STATE_SIG 0x736f6962 u_char regs[V_MODE_PARAM_SIZE]; }; typedef struct adp_state adp_state_t; /* video adapter information */ #define DCC_MONO 0 #define DCC_CGA40 1 #define DCC_CGA80 2 #define DCC_EGAMONO 3 #define DCC_EGA40 4 #define DCC_EGA80 5 /* * NOTE: `va_window' should have a virtual address, but is initialized * with a physical address in the following table, as verify_adapter() * will perform address conversion at run-time. */ static video_adapter_t adapter_init_value[] = { /* DCC_MONO */ { 0, KD_MONO, "mda", 0, 0, 0, IO_MDA, IO_MDASIZE, MONO_CRTC, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, 0, 0, 7, 0, }, /* DCC_CGA40 */ { 0, KD_CGA, "cga", 0, 0, V_ADP_COLOR, IO_CGA, IO_CGASIZE, COLOR_CRTC, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, 0, 0, 3, 0, }, /* DCC_CGA80 */ { 0, KD_CGA, "cga", 0, 0, V_ADP_COLOR, IO_CGA, IO_CGASIZE, COLOR_CRTC, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, 0, 0, 3, 0, }, /* DCC_EGAMONO */ { 0, KD_EGA, "ega", 0, 0, 0, IO_MDA, 48, MONO_CRTC, EGA_BUF_BASE, EGA_BUF_SIZE, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, 0, 0, 7, 0, }, /* DCC_EGA40 */ { 0, KD_EGA, "ega", 0, 0, V_ADP_COLOR, IO_MDA, 48, COLOR_CRTC, EGA_BUF_BASE, EGA_BUF_SIZE, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, 0, 0, 3, 0, }, /* DCC_EGA80 */ { 0, KD_EGA, "ega", 0, 0, V_ADP_COLOR, IO_MDA, 48, COLOR_CRTC, EGA_BUF_BASE, EGA_BUF_SIZE, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, 0, 0, 3, 0, }, }; static video_adapter_t biosadapter[2]; static int biosadapters = 0; /* video driver declarations */ static int vga_configure(int flags); int (*vga_sub_configure)(int flags); #if 0 static int vga_nop(void); #endif static int vga_error(void); static vi_probe_t vga_probe; static vi_init_t vga_init; static vi_get_info_t vga_get_info; static vi_query_mode_t vga_query_mode; static vi_set_mode_t vga_set_mode; static vi_save_font_t vga_save_font; static vi_load_font_t vga_load_font; static vi_show_font_t vga_show_font; static vi_save_palette_t vga_save_palette; static vi_load_palette_t vga_load_palette; static vi_set_border_t vga_set_border; static vi_save_state_t vga_save_state; static vi_load_state_t vga_load_state; static vi_set_win_org_t vga_set_origin; static vi_read_hw_cursor_t vga_read_hw_cursor; static vi_set_hw_cursor_t vga_set_hw_cursor; static vi_set_hw_cursor_shape_t vga_set_hw_cursor_shape; static vi_blank_display_t vga_blank_display; static vi_mmap_t vga_mmap_buf; static vi_ioctl_t vga_dev_ioctl; #ifndef VGA_NO_MODE_CHANGE static vi_clear_t vga_clear; static vi_fill_rect_t vga_fill_rect; static vi_bitblt_t vga_bitblt; #else /* VGA_NO_MODE_CHANGE */ #define vga_clear (vi_clear_t *)vga_error #define vga_fill_rect (vi_fill_rect_t *)vga_error #define vga_bitblt (vi_bitblt_t *)vga_error #endif static vi_diag_t vga_diag; static video_switch_t vgavidsw = { vga_probe, vga_init, vga_get_info, vga_query_mode, vga_set_mode, vga_save_font, vga_load_font, vga_show_font, vga_save_palette, vga_load_palette, vga_set_border, vga_save_state, vga_load_state, vga_set_origin, vga_read_hw_cursor, vga_set_hw_cursor, vga_set_hw_cursor_shape, vga_blank_display, vga_mmap_buf, vga_dev_ioctl, vga_clear, vga_fill_rect, vga_bitblt, vga_error, vga_error, vga_diag, }; VIDEO_DRIVER(mda, vgavidsw, NULL); VIDEO_DRIVER(cga, vgavidsw, NULL); VIDEO_DRIVER(ega, vgavidsw, NULL); VIDEO_DRIVER(vga, vgavidsw, vga_configure); /* VGA BIOS standard video modes */ #define EOT (-1) #define NA (-2) static video_info_t bios_vmode[] = { /* CGA */ { M_B40x25, V_INFO_COLOR, 40, 25, 8, 8, 2, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_C40x25, V_INFO_COLOR, 40, 25, 8, 8, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_B80x25, V_INFO_COLOR, 80, 25, 8, 8, 2, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_C80x25, V_INFO_COLOR, 80, 25, 8, 8, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, /* EGA */ { M_ENH_B40x25, V_INFO_COLOR, 40, 25, 8, 14, 2, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_ENH_C40x25, V_INFO_COLOR, 40, 25, 8, 14, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_ENH_B80x25, V_INFO_COLOR, 80, 25, 8, 14, 2, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_ENH_C80x25, V_INFO_COLOR, 80, 25, 8, 14, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, /* VGA */ { M_VGA_C40x25, V_INFO_COLOR, 40, 25, 8, 16, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_M80x25, 0, 80, 25, 8, 16, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_C80x25, V_INFO_COLOR, 80, 25, 8, 16, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, /* MDA */ { M_EGAMONO80x25, 0, 80, 25, 8, 14, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, /* EGA */ { M_ENH_B80x43, V_INFO_COLOR, 80, 43, 8, 8, 2, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_ENH_C80x43, V_INFO_COLOR, 80, 43, 8, 8, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, /* VGA */ { M_VGA_M80x30, 0, 80, 30, 8, 16, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_C80x30, V_INFO_COLOR, 80, 30, 8, 16, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_M80x50, 0, 80, 50, 8, 8, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_C80x50, V_INFO_COLOR, 80, 50, 8, 8, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_M80x60, 0, 80, 60, 8, 8, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_C80x60, V_INFO_COLOR, 80, 60, 8, 8, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, #ifndef VGA_NO_MODE_CHANGE #ifdef VGA_WIDTH90 { M_VGA_M90x25, 0, 90, 25, 8, 16, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_C90x25, V_INFO_COLOR, 90, 25, 8, 16, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_M90x30, 0, 90, 30, 8, 16, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_C90x30, V_INFO_COLOR, 90, 30, 8, 16, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_M90x43, 0, 90, 43, 8, 8, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_C90x43, V_INFO_COLOR, 90, 43, 8, 8, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_M90x50, 0, 90, 50, 8, 8, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_C90x50, V_INFO_COLOR, 90, 50, 8, 8, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_M90x60, 0, 90, 60, 8, 8, 2, 1, MDA_BUF_BASE, MDA_BUF_SIZE, MDA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, { M_VGA_C90x60, V_INFO_COLOR, 90, 60, 8, 8, 4, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_TEXT }, #endif /* VGA_WIDTH90 */ /* CGA */ { M_BG320, V_INFO_COLOR | V_INFO_GRAPHICS, 320, 200, 8, 8, 2, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_CGA }, { M_CG320, V_INFO_COLOR | V_INFO_GRAPHICS, 320, 200, 8, 8, 2, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_CGA }, { M_BG640, V_INFO_COLOR | V_INFO_GRAPHICS, 640, 200, 8, 8, 1, 1, CGA_BUF_BASE, CGA_BUF_SIZE, CGA_BUF_SIZE, 0, 0, V_INFO_MM_CGA }, /* EGA */ { M_CG320_D, V_INFO_COLOR | V_INFO_GRAPHICS, 320, 200, 8, 8, 4, 4, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, GRAPHICS_BUF_SIZE, 0, 0, V_INFO_MM_PLANAR }, { M_CG640_E, V_INFO_COLOR | V_INFO_GRAPHICS, 640, 200, 8, 8, 4, 4, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, GRAPHICS_BUF_SIZE, 0, 0 , V_INFO_MM_PLANAR }, { M_EGAMONOAPA, V_INFO_GRAPHICS, 640, 350, 8, 14, 4, 4, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, 64*1024, 0, 0 , V_INFO_MM_PLANAR }, { M_ENHMONOAPA2,V_INFO_GRAPHICS, 640, 350, 8, 14, 4, 4, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, GRAPHICS_BUF_SIZE, 0, 0 , V_INFO_MM_PLANAR }, { M_CG640x350, V_INFO_COLOR | V_INFO_GRAPHICS, 640, 350, 8, 14, 2, 2, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, GRAPHICS_BUF_SIZE, 0, 0 , V_INFO_MM_PLANAR }, { M_ENH_CG640, V_INFO_COLOR | V_INFO_GRAPHICS, 640, 350, 8, 14, 4, 4, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, GRAPHICS_BUF_SIZE, 0, 0 , V_INFO_MM_PLANAR }, /* VGA */ { M_BG640x480, V_INFO_COLOR | V_INFO_GRAPHICS, 640, 480, 8, 16, 4, 4, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, GRAPHICS_BUF_SIZE, 0, 0 , V_INFO_MM_PLANAR }, { M_CG640x480, V_INFO_COLOR | V_INFO_GRAPHICS, 640, 480, 8, 16, 4, 4, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, GRAPHICS_BUF_SIZE, 0, 0 , V_INFO_MM_PLANAR }, { M_VGA_CG320, V_INFO_COLOR | V_INFO_GRAPHICS, 320, 200, 8, 8, 8, 1, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, GRAPHICS_BUF_SIZE, 0, 0, V_INFO_MM_PACKED, 1 }, { M_VGA_MODEX, V_INFO_COLOR | V_INFO_GRAPHICS, 320, 240, 8, 8, 8, 4, GRAPHICS_BUF_BASE, GRAPHICS_BUF_SIZE, GRAPHICS_BUF_SIZE, 0, 0, V_INFO_MM_VGAX, 1 }, #endif /* VGA_NO_MODE_CHANGE */ { EOT }, }; static int vga_init_done = FALSE; #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) static u_char *video_mode_ptr = NULL; /* EGA/VGA */ static u_char *video_mode_ptr2 = NULL; /* CGA/MDA */ #endif static u_char *mode_map[V_MODE_MAP_SIZE]; static adp_state_t adpstate; static adp_state_t adpstate2; static int rows_offset = 1; /* local macros and functions */ #define BIOS_SADDRTOLADDR(p) ((((p) & 0xffff0000) >> 12) + ((p) & 0x0000ffff)) #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) static void map_mode_table(u_char *map[], u_char *table, int max); #endif static void clear_mode_map(video_adapter_t *adp, u_char *map[], int max, int color); #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) static int map_mode_num(int mode); #endif static int map_gen_mode_num(int type, int color, int mode); static int map_bios_mode_num(int type, int color, int bios_mode); static u_char *get_mode_param(int mode); #ifndef VGA_NO_BIOS static void fill_adapter_param(int code, video_adapter_t *adp); #endif static int verify_adapter(video_adapter_t *adp); static void update_adapter_info(video_adapter_t *adp, video_info_t *info); #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) #define COMP_IDENTICAL 0 #define COMP_SIMILAR 1 #define COMP_DIFFERENT 2 static int comp_adpregs(u_char *buf1, u_char *buf2); #endif static int probe_adapters(void); static int set_line_length(video_adapter_t *adp, int pixel); static int set_display_start(video_adapter_t *adp, int x, int y); #ifndef VGA_NO_MODE_CHANGE #ifdef VGA_WIDTH90 static void set_width90(adp_state_t *params); #endif #endif /* !VGA_NO_MODE_CHANGE */ #ifndef VGA_NO_FONT_LOADING #define PARAM_BUFSIZE 6 static void set_font_mode(video_adapter_t *adp, u_char *buf); static void set_normal_mode(video_adapter_t *adp, u_char *buf); #endif #ifndef VGA_NO_MODE_CHANGE static void filll_io(int val, vm_offset_t d, size_t size); static void planar_fill(video_adapter_t *adp, int val); static void packed_fill(video_adapter_t *adp, int val); static void direct_fill(video_adapter_t *adp, int val); #ifdef notyet static void planar_fill_rect(video_adapter_t *adp, int val, int x, int y, int cx, int cy); static void packed_fill_rect(video_adapter_t *adp, int val, int x, int y, int cx, int cy); static void direct_fill_rect16(video_adapter_t *adp, int val, int x, int y, int cx, int cy); static void direct_fill_rect24(video_adapter_t *adp, int val, int x, int y, int cx, int cy); static void direct_fill_rect32(video_adapter_t *adp, int val, int x, int y, int cx, int cy); #endif /* notyet */ #endif /* !VGA_NO_MODE_CHANGE */ static void dump_buffer(u_char *buf, size_t len); #define ISMAPPED(pa, width) \ (((pa) <= (u_long)0x1000 - (width)) \ || ((pa) >= ISA_HOLE_START && (pa) <= 0x100000 - (width))) #define prologue(adp, flag, err) \ if (!vga_init_done || !((adp)->va_flags & (flag))) \ return (err) /* a backdoor for the console driver */ static int vga_configure(int flags) { int i; probe_adapters(); for (i = 0; i < biosadapters; ++i) { if (!probe_done(&biosadapter[i])) continue; biosadapter[i].va_flags |= V_ADP_INITIALIZED; if (!config_done(&biosadapter[i])) { if (vid_register(&biosadapter[i]) < 0) continue; biosadapter[i].va_flags |= V_ADP_REGISTERED; } } if (vga_sub_configure != NULL) (*vga_sub_configure)(flags); return biosadapters; } /* local subroutines */ #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) /* construct the mode parameter map */ static void map_mode_table(u_char *map[], u_char *table, int max) { int i; for(i = 0; i < max; ++i) map[i] = table + i*V_MODE_PARAM_SIZE; for(; i < V_MODE_MAP_SIZE; ++i) map[i] = NULL; } #endif /* !VGA_NO_BIOS && !VGA_NO_MODE_CHANGE */ static void clear_mode_map(video_adapter_t *adp, u_char *map[], int max, int color) { video_info_t info; int i; /* * NOTE: we don't touch `bios_vmode[]' because it is shared * by all adapters. */ for(i = 0; i < max; ++i) { if (vga_get_info(adp, i, &info)) continue; if ((info.vi_flags & V_INFO_COLOR) != color) map[i] = NULL; } } #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) /* map the non-standard video mode to a known mode number */ static int map_mode_num(int mode) { static struct { int from; int to; } mode_map[] = { { M_ENH_B80x43, M_ENH_B80x25 }, { M_ENH_C80x43, M_ENH_C80x25 }, { M_VGA_M80x30, M_VGA_M80x25 }, { M_VGA_C80x30, M_VGA_C80x25 }, { M_VGA_M80x50, M_VGA_M80x25 }, { M_VGA_C80x50, M_VGA_C80x25 }, { M_VGA_M80x60, M_VGA_M80x25 }, { M_VGA_C80x60, M_VGA_C80x25 }, #ifdef VGA_WIDTH90 { M_VGA_M90x25, M_VGA_M80x25 }, { M_VGA_C90x25, M_VGA_C80x25 }, { M_VGA_M90x30, M_VGA_M80x25 }, { M_VGA_C90x30, M_VGA_C80x25 }, { M_VGA_M90x43, M_VGA_M80x25 }, { M_VGA_C90x43, M_ENH_C80x25 }, { M_VGA_M90x50, M_VGA_M80x25 }, { M_VGA_C90x50, M_VGA_C80x25 }, { M_VGA_M90x60, M_VGA_M80x25 }, { M_VGA_C90x60, M_VGA_C80x25 }, #endif { M_VGA_MODEX, M_VGA_CG320 }, }; int i; for (i = 0; i < nitems(mode_map); ++i) { if (mode_map[i].from == mode) return mode_map[i].to; } return mode; } #endif /* !VGA_NO_BIOS && !VGA_NO_MODE_CHANGE */ /* map a generic video mode to a known mode number */ static int map_gen_mode_num(int type, int color, int mode) { static struct { int from; int to_color; int to_mono; } mode_map[] = { { M_TEXT_80x30, M_VGA_C80x30, M_VGA_M80x30, }, { M_TEXT_80x43, M_ENH_C80x43, M_ENH_B80x43, }, { M_TEXT_80x50, M_VGA_C80x50, M_VGA_M80x50, }, { M_TEXT_80x60, M_VGA_C80x60, M_VGA_M80x60, }, }; int i; if (mode == M_TEXT_80x25) { switch (type) { case KD_VGA: if (color) return M_VGA_C80x25; else return M_VGA_M80x25; break; case KD_EGA: if (color) return M_ENH_C80x25; else return M_EGAMONO80x25; break; case KD_CGA: return M_C80x25; case KD_MONO: case KD_HERCULES: return M_EGAMONO80x25; /* XXX: this name is confusing */ default: return -1; } } for (i = 0; i < nitems(mode_map); ++i) { if (mode_map[i].from == mode) return ((color) ? mode_map[i].to_color : mode_map[i].to_mono); } return mode; } /* turn the BIOS video number into our video mode number */ static int map_bios_mode_num(int type, int color, int bios_mode) { static int cga_modes[7] = { M_B40x25, M_C40x25, /* 0, 1 */ M_B80x25, M_C80x25, /* 2, 3 */ M_BG320, M_CG320, M_BG640, }; static int ega_modes[17] = { M_ENH_B40x25, M_ENH_C40x25, /* 0, 1 */ M_ENH_B80x25, M_ENH_C80x25, /* 2, 3 */ M_BG320, M_CG320, M_BG640, M_EGAMONO80x25, /* 7 */ 8, 9, 10, 11, 12, M_CG320_D, M_CG640_E, M_ENHMONOAPA2, /* XXX: video momery > 64K */ M_ENH_CG640, /* XXX: video momery > 64K */ }; static int vga_modes[20] = { M_VGA_C40x25, M_VGA_C40x25, /* 0, 1 */ M_VGA_C80x25, M_VGA_C80x25, /* 2, 3 */ M_BG320, M_CG320, M_BG640, M_VGA_M80x25, /* 7 */ 8, 9, 10, 11, 12, M_CG320_D, M_CG640_E, M_ENHMONOAPA2, M_ENH_CG640, M_BG640x480, M_CG640x480, M_VGA_CG320, }; switch (type) { case KD_VGA: if (bios_mode < nitems(vga_modes)) return vga_modes[bios_mode]; else if (color) return M_VGA_C80x25; else return M_VGA_M80x25; break; case KD_EGA: if (bios_mode < nitems(ega_modes)) return ega_modes[bios_mode]; else if (color) return M_ENH_C80x25; else return M_EGAMONO80x25; break; case KD_CGA: if (bios_mode < nitems(cga_modes)) return cga_modes[bios_mode]; else return M_C80x25; break; case KD_MONO: case KD_HERCULES: return M_EGAMONO80x25; /* XXX: this name is confusing */ default: break; } return -1; } /* look up a parameter table entry */ static u_char *get_mode_param(int mode) { #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) if (mode >= V_MODE_MAP_SIZE) mode = map_mode_num(mode); #endif if ((mode >= 0) && (mode < V_MODE_MAP_SIZE)) return mode_map[mode]; else return NULL; } #ifndef VGA_NO_BIOS static void fill_adapter_param(int code, video_adapter_t *adp) { static struct { int primary; int secondary; } dcc[] = { { DCC_MONO, DCC_EGA40 /* CGA monitor */ }, { DCC_MONO, DCC_EGA80 /* CGA monitor */ }, { DCC_MONO, DCC_EGA80 }, { DCC_MONO, DCC_EGA80 }, { DCC_CGA40, DCC_EGAMONO }, { DCC_CGA80, DCC_EGAMONO }, { DCC_EGA40 /* CGA monitor */, DCC_MONO}, { DCC_EGA80 /* CGA monitor */, DCC_MONO}, { DCC_EGA80, DCC_MONO }, { DCC_EGA80, DCC_MONO }, { DCC_EGAMONO, DCC_CGA40 }, { DCC_EGAMONO, DCC_CGA80 }, }; if ((code < 0) || (code >= nitems(dcc))) { adp[V_ADP_PRIMARY] = adapter_init_value[DCC_MONO]; adp[V_ADP_SECONDARY] = adapter_init_value[DCC_CGA80]; } else { adp[V_ADP_PRIMARY] = adapter_init_value[dcc[code].primary]; adp[V_ADP_SECONDARY] = adapter_init_value[dcc[code].secondary]; } } #endif /* VGA_NO_BIOS */ static int verify_adapter(video_adapter_t *adp) { vm_offset_t buf; u_int16_t v; #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) u_int32_t p; #endif buf = BIOS_PADDRTOVADDR(adp->va_window); v = readw(buf); writew(buf, 0xA55A); if (readw(buf) != 0xA55A) return ENXIO; writew(buf, v); switch (adp->va_type) { case KD_EGA: outb(adp->va_crtc_addr, 7); if (inb(adp->va_crtc_addr) == 7) { adp->va_type = KD_VGA; adp->va_name = "vga"; adp->va_flags |= V_ADP_STATESAVE | V_ADP_PALETTE; } adp->va_flags |= V_ADP_STATELOAD | V_ADP_BORDER; /* the color adapter may be in the 40x25 mode... XXX */ #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) /* get the BIOS video mode pointer */ p = *(u_int32_t *)BIOS_PADDRTOVADDR(0x4a8); p = BIOS_SADDRTOLADDR(p); if (ISMAPPED(p, sizeof(u_int32_t))) { p = *(u_int32_t *)BIOS_PADDRTOVADDR(p); p = BIOS_SADDRTOLADDR(p); if (ISMAPPED(p, V_MODE_PARAM_SIZE)) video_mode_ptr = (u_char *)BIOS_PADDRTOVADDR(p); } #endif break; case KD_CGA: adp->va_flags |= V_ADP_COLOR | V_ADP_BORDER; /* may be in the 40x25 mode... XXX */ #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) /* get the BIOS video mode pointer */ p = *(u_int32_t *)BIOS_PADDRTOVADDR(0x1d*4); p = BIOS_SADDRTOLADDR(p); video_mode_ptr2 = (u_char *)BIOS_PADDRTOVADDR(p); #endif break; case KD_MONO: #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) /* get the BIOS video mode pointer */ p = *(u_int32_t *)BIOS_PADDRTOVADDR(0x1d*4); p = BIOS_SADDRTOLADDR(p); video_mode_ptr2 = (u_char *)BIOS_PADDRTOVADDR(p); #endif break; } return 0; } static void update_adapter_info(video_adapter_t *adp, video_info_t *info) { adp->va_flags &= ~V_ADP_COLOR; adp->va_flags |= (info->vi_flags & V_INFO_COLOR) ? V_ADP_COLOR : 0; adp->va_crtc_addr = (adp->va_flags & V_ADP_COLOR) ? COLOR_CRTC : MONO_CRTC; adp->va_window = BIOS_PADDRTOVADDR(info->vi_window); adp->va_window_size = info->vi_window_size; adp->va_window_gran = info->vi_window_gran; adp->va_window_orig = 0; /* XXX */ adp->va_buffer = info->vi_buffer; adp->va_buffer_size = info->vi_buffer_size; adp->va_flags &= ~V_ADP_CWIDTH9; if (info->vi_flags & V_INFO_CWIDTH9) adp->va_flags |= V_ADP_CWIDTH9; if (info->vi_mem_model == V_INFO_MM_VGAX) { adp->va_line_width = info->vi_width/2; } else if (info->vi_flags & V_INFO_GRAPHICS) { switch (info->vi_depth/info->vi_planes) { case 1: adp->va_line_width = info->vi_width/8; break; case 2: adp->va_line_width = info->vi_width/4; break; case 4: adp->va_line_width = info->vi_width/2; break; case 8: default: /* shouldn't happen */ adp->va_line_width = info->vi_width; break; } } else { adp->va_line_width = info->vi_width; } adp->va_disp_start.x = 0; adp->va_disp_start.y = 0; bcopy(info, &adp->va_info, sizeof(adp->va_info)); } #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) /* compare two parameter table entries */ static int comp_adpregs(u_char *buf1, u_char *buf2) { static struct { u_char mask; } params[V_MODE_PARAM_SIZE] = { {0xff}, {0x00}, {0xff}, /* COLS}, ROWS}, POINTS */ {0x00}, {0x00}, /* page length */ {0xfe}, {0xff}, {0xff}, {0xff}, /* sequencer registers */ {0xf3}, /* misc register */ {0xff}, {0xff}, {0xff}, {0x7f}, {0xff}, /* CRTC */ {0xff}, {0xff}, {0xff}, {0x7f}, {0xff}, {0x00}, {0x00}, {0x00}, {0x00}, {0x00}, {0x00}, {0xff}, {0x7f}, {0xff}, {0xff}, {0x7f}, {0xff}, {0xff}, {0xef}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, /* attribute controller regs */ {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, {0xf0}, {0xff}, {0xff}, {0xff}, {0xff}, {0xff}, /* GDC register */ {0xff}, {0xff}, {0xff}, {0xff}, }; int identical = TRUE; int i; if ((buf1 == NULL) || (buf2 == NULL)) return COMP_DIFFERENT; for (i = 0; i < nitems(params); ++i) { if (params[i].mask == 0) /* don't care */ continue; if ((buf1[i] & params[i].mask) != (buf2[i] & params[i].mask)) return COMP_DIFFERENT; if (buf1[i] != buf2[i]) identical = FALSE; } return (identical) ? COMP_IDENTICAL : COMP_SIMILAR; } #endif /* !VGA_NO_BIOS && !VGA_NO_MODE_CHANGE */ /* probe video adapters and return the number of detected adapters */ static int probe_adapters(void) { video_adapter_t *adp; video_info_t info; #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) u_char *mp; #endif int height, i, width; /* do this test only once */ if (vga_init_done) return biosadapters; vga_init_done = TRUE; /* * Locate display adapters. * The AT architecture supports up to two adapters. `syscons' allows * the following combinations of adapters: * 1) MDA + CGA * 2) MDA + EGA/VGA color * 3) CGA + EGA/VGA mono * Note that `syscons' doesn't bother with MCGA as it is only * avaiable for low end PS/2 models which has 80286 or earlier CPUs, * thus, they are not running FreeBSD! * When there are two adapaters in the system, one becomes `primary' * and the other `secondary'. The EGA adapter has a set of DIP * switches on board for this information and the EGA BIOS copies * it in the BIOS data area BIOSDATA_VIDEOSWITCH (40:88). * The VGA BIOS has more sophisticated mechanism and has this * information in BIOSDATA_DCCINDEX (40:8a), but it also maintains * compatibility with the EGA BIOS by updating BIOSDATA_VIDEOSWITCH. */ /* * Check rtc and BIOS data area. * XXX: we don't use BIOSDATA_EQUIPMENT, since it is not a dead * copy of RTC_EQUIPMENT. Bits 4 and 5 of ETC_EQUIPMENT are * zeros for EGA and VGA. However, the EGA/VGA BIOS sets * these bits in BIOSDATA_EQUIPMENT according to the monitor * type detected. */ #ifndef VGA_NO_BIOS if (*(u_int32_t *)BIOS_PADDRTOVADDR(0x4a8)) { /* EGA/VGA BIOS is present */ fill_adapter_param(readb(BIOS_PADDRTOVADDR(0x488)) & 0x0f, biosadapter); } else { switch ((rtcin(RTC_EQUIPMENT) >> 4) & 3) { /* bit 4 and 5 */ case 0: /* EGA/VGA: shouldn't be happening */ fill_adapter_param(readb(BIOS_PADDRTOVADDR(0x488)) & 0x0f, biosadapter); break; case 1: /* CGA 40x25 */ /* FIXME: switch to the 80x25 mode? XXX */ biosadapter[V_ADP_PRIMARY] = adapter_init_value[DCC_CGA40]; biosadapter[V_ADP_SECONDARY] = adapter_init_value[DCC_MONO]; break; case 2: /* CGA 80x25 */ biosadapter[V_ADP_PRIMARY] = adapter_init_value[DCC_CGA80]; biosadapter[V_ADP_SECONDARY] = adapter_init_value[DCC_MONO]; break; case 3: /* MDA */ biosadapter[V_ADP_PRIMARY] = adapter_init_value[DCC_MONO]; biosadapter[V_ADP_SECONDARY] = adapter_init_value[DCC_CGA80]; break; } } #else /* assume EGA/VGA? XXX */ biosadapter[V_ADP_PRIMARY] = adapter_init_value[DCC_EGA80]; biosadapter[V_ADP_SECONDARY] = adapter_init_value[DCC_MONO]; #endif /* VGA_NO_BIOS */ biosadapters = 0; if (verify_adapter(&biosadapter[V_ADP_SECONDARY]) == 0) { ++biosadapters; biosadapter[V_ADP_SECONDARY].va_flags |= V_ADP_PROBED; biosadapter[V_ADP_SECONDARY].va_mode = biosadapter[V_ADP_SECONDARY].va_initial_mode = map_bios_mode_num(biosadapter[V_ADP_SECONDARY].va_type, biosadapter[V_ADP_SECONDARY].va_flags & V_ADP_COLOR, biosadapter[V_ADP_SECONDARY].va_initial_bios_mode); } else { biosadapter[V_ADP_SECONDARY].va_type = -1; } if (verify_adapter(&biosadapter[V_ADP_PRIMARY]) == 0) { ++biosadapters; biosadapter[V_ADP_PRIMARY].va_flags |= V_ADP_PROBED; #ifndef VGA_NO_BIOS biosadapter[V_ADP_PRIMARY].va_initial_bios_mode = readb(BIOS_PADDRTOVADDR(0x449)); #else biosadapter[V_ADP_PRIMARY].va_initial_bios_mode = 3; /* XXX */ #endif biosadapter[V_ADP_PRIMARY].va_mode = biosadapter[V_ADP_PRIMARY].va_initial_mode = map_bios_mode_num(biosadapter[V_ADP_PRIMARY].va_type, biosadapter[V_ADP_PRIMARY].va_flags & V_ADP_COLOR, biosadapter[V_ADP_PRIMARY].va_initial_bios_mode); } else { biosadapter[V_ADP_PRIMARY] = biosadapter[V_ADP_SECONDARY]; biosadapter[V_ADP_SECONDARY].va_type = -1; } if (biosadapters == 0) return biosadapters; biosadapter[V_ADP_PRIMARY].va_unit = V_ADP_PRIMARY; biosadapter[V_ADP_SECONDARY].va_unit = V_ADP_SECONDARY; #if 0 /* we don't need these... */ fb_init_struct(&biosadapter[V_ADP_PRIMARY], ...); fb_init_struct(&biosadapter[V_ADP_SECONDARY], ...); #endif #ifdef notyet /* * We cannot have two video adapter of the same type; there must be * only one of color or mono adapter, or one each of them. */ if (biosadapters > 1) { if (!((biosadapter[0].va_flags ^ biosadapter[1].va_flags) & V_ADP_COLOR)) /* we have two mono or color adapters!! */ return (biosadapters = 0); } #endif /* * Ensure a zero start address. The registers are w/o * for old hardware so it's too hard to relocate the active screen * memory. * This must be done before vga_save_state() for VGA. */ outb(biosadapter[V_ADP_PRIMARY].va_crtc_addr, 12); outb(biosadapter[V_ADP_PRIMARY].va_crtc_addr + 1, 0); outb(biosadapter[V_ADP_PRIMARY].va_crtc_addr, 13); outb(biosadapter[V_ADP_PRIMARY].va_crtc_addr + 1, 0); /* the video mode parameter table in EGA/VGA BIOS */ /* NOTE: there can be only one EGA/VGA, wheather color or mono, * recognized by the video BIOS. */ if ((biosadapter[V_ADP_PRIMARY].va_type == KD_EGA) || (biosadapter[V_ADP_PRIMARY].va_type == KD_VGA)) { adp = &biosadapter[V_ADP_PRIMARY]; } else if ((biosadapter[V_ADP_SECONDARY].va_type == KD_EGA) || (biosadapter[V_ADP_SECONDARY].va_type == KD_VGA)) { adp = &biosadapter[V_ADP_SECONDARY]; } else { adp = NULL; } bzero(mode_map, sizeof(mode_map)); if (adp != NULL) { if (adp->va_type == KD_VGA) { vga_save_state(adp, &adpstate, sizeof(adpstate)); #if defined(VGA_NO_BIOS) || defined(VGA_NO_MODE_CHANGE) mode_map[adp->va_initial_mode] = adpstate.regs; rows_offset = 1; #else /* VGA_NO_BIOS || VGA_NO_MODE_CHANGE */ if (video_mode_ptr == NULL) { mode_map[adp->va_initial_mode] = adpstate.regs; rows_offset = 1; } else { /* discard the table if we are not familiar with it... */ map_mode_table(mode_map, video_mode_ptr, M_VGA_CG320 + 1); mp = get_mode_param(adp->va_initial_mode); if (mp != NULL) bcopy(mp, adpstate2.regs, sizeof(adpstate2.regs)); switch (comp_adpregs(adpstate.regs, mp)) { case COMP_IDENTICAL: /* * OK, this parameter table looks reasonably familiar * to us... */ /* * This is a kludge for Toshiba DynaBook SS433 * whose BIOS video mode table entry has the actual # * of rows at the offset 1; BIOSes from other * manufacturers store the # of rows - 1 there. XXX */ rows_offset = adpstate.regs[1] + 1 - mp[1]; break; case COMP_SIMILAR: /* * Not exactly the same, but similar enough to be * trusted. However, use the saved register values * for the initial mode and other modes which are * based on the initial mode. */ mode_map[adp->va_initial_mode] = adpstate.regs; rows_offset = adpstate.regs[1] + 1 - mp[1]; adpstate.regs[1] -= rows_offset - 1; break; case COMP_DIFFERENT: default: /* * Don't use the parameter table in the BIOS, since * even the BIOS doesn't use it for the initial mode. * Restrict the tweaked modes to (in practice) 80x50 * from 80x25 with 400 scan lines, since the only safe * tweak is changing the characters from 8x16 to 8x8. */ video_mode_ptr = NULL; bzero(mode_map, sizeof(mode_map)); mode_map[adp->va_initial_mode] = adpstate.regs; rows_offset = 1; width = height = -1; for (i = 0; i < nitems(bios_vmode); ++i) { if (bios_vmode[i].vi_mode == adp->va_initial_mode) { width = bios_vmode[i].vi_width; height = bios_vmode[i].vi_height; break; } } for (i = 0; i < nitems(bios_vmode); ++i) { if (bios_vmode[i].vi_mode != adp->va_initial_mode && map_mode_num(bios_vmode[i].vi_mode) == adp->va_initial_mode && (bios_vmode[i].vi_width != width || bios_vmode[i].vi_height != 2 * height)) { bios_vmode[i].vi_mode = NA; } } break; } } #endif /* VGA_NO_BIOS || VGA_NO_MODE_CHANGE */ #ifndef VGA_NO_MODE_CHANGE adp->va_flags |= V_ADP_MODECHANGE; #endif #ifndef VGA_NO_FONT_LOADING adp->va_flags |= V_ADP_FONT; #endif } else if (adp->va_type == KD_EGA) { #if defined(VGA_NO_BIOS) || defined(VGA_NO_MODE_CHANGE) rows_offset = 1; #else /* VGA_NO_BIOS || VGA_NO_MODE_CHANGE */ if (video_mode_ptr == NULL) { rows_offset = 1; } else { map_mode_table(mode_map, video_mode_ptr, M_ENH_C80x25 + 1); /* XXX how can one validate the EGA table... */ mp = get_mode_param(adp->va_initial_mode); if (mp != NULL) { adp->va_flags |= V_ADP_MODECHANGE; #ifndef VGA_NO_FONT_LOADING adp->va_flags |= V_ADP_FONT; #endif rows_offset = 1; } else { /* * This is serious. We will not be able to switch video * modes at all... */ video_mode_ptr = NULL; bzero(mode_map, sizeof(mode_map)); rows_offset = 1; } } #endif /* VGA_NO_BIOS || VGA_NO_MODE_CHANGE */ } } /* remove conflicting modes if we have more than one adapter */ if (biosadapters > 0) { for (i = 0; i < biosadapters; ++i) { if (!(biosadapter[i].va_flags & V_ADP_MODECHANGE)) continue; clear_mode_map(&biosadapter[i], mode_map, M_VGA_CG320 + 1, (biosadapter[i].va_flags & V_ADP_COLOR) ? V_INFO_COLOR : 0); if ((biosadapter[i].va_type == KD_VGA) || (biosadapter[i].va_type == KD_EGA)) { biosadapter[i].va_io_base = (biosadapter[i].va_flags & V_ADP_COLOR) ? IO_VGA : IO_MDA; biosadapter[i].va_io_size = 32; } } } #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) /* * Attempt to determine the real character width for each mode. 9 wide * is supposed to be standard for EGA mono mode and most VGA text modes, * but some hardware doesn't support it, so dynamic configuration is * needed. Bit 0 in sequencer register 1 is supposed control the width * (set = 8), but this is unreliable too. Trust that 0 in the sequencer * bit means 9 wide after verifying that 9 is consistent with some CRTC * timing. The ratio (Horizontal Total) / (Horizontal Displayed) is * about 1.2 in all standard 9-wide modes and should be about 9/8 larger * again in similar 8-wide modes; in practice it is usually about 1.4 * times larger. */ for (i = 0; i < nitems(bios_vmode); ++i) { if (bios_vmode[i].vi_mem_model == V_INFO_MM_TEXT && bios_vmode[i].vi_width != 90) { mp = get_mode_param(map_mode_num(bios_vmode[i].vi_mode)); if (mp != NULL && !(mp[5] & 1) && mp[10] <= mp[11] * 125 / 100) bios_vmode[i].vi_flags |= V_INFO_CWIDTH9; } } #endif /* buffer address */ vga_get_info(&biosadapter[V_ADP_PRIMARY], biosadapter[V_ADP_PRIMARY].va_initial_mode, &info); info.vi_flags &= ~V_INFO_LINEAR; /* XXX */ update_adapter_info(&biosadapter[V_ADP_PRIMARY], &info); if (biosadapters > 1) { vga_get_info(&biosadapter[V_ADP_SECONDARY], biosadapter[V_ADP_SECONDARY].va_initial_mode, &info); info.vi_flags &= ~V_INFO_LINEAR; /* XXX */ update_adapter_info(&biosadapter[V_ADP_SECONDARY], &info); } /* * XXX: we should verify the following values for the primary adapter... * crtc I/O port address: *(u_int16_t *)BIOS_PADDRTOVADDR(0x463); * color/mono display: (*(u_int8_t *)BIOS_PADDRTOVADDR(0x487) & 0x02) * ? 0 : V_ADP_COLOR; * columns: *(u_int8_t *)BIOS_PADDRTOVADDR(0x44a); * rows: *(u_int8_t *)BIOS_PADDRTOVADDR(0x484); * font size: *(u_int8_t *)BIOS_PADDRTOVADDR(0x485); * buffer size: *(u_int16_t *)BIOS_PADDRTOVADDR(0x44c); */ return biosadapters; } /* set the scan line length in pixel */ static int set_line_length(video_adapter_t *adp, int pixel) { u_char *mp; int ppw; /* pixels per word */ int bpl; /* bytes per line */ int count; if ((adp->va_type != KD_VGA) && (adp->va_type != KD_EGA)) return ENODEV; mp = get_mode_param(adp->va_mode); if (mp == NULL) return EINVAL; switch (adp->va_info.vi_mem_model) { case V_INFO_MM_PLANAR: ppw = 16/(adp->va_info.vi_depth/adp->va_info.vi_planes); count = howmany(pixel, ppw)/2; bpl = (howmany(pixel, ppw)/2)*4; break; case V_INFO_MM_PACKED: count = (pixel + 7)/8; bpl = rounddown(pixel + 7, 8); break; case V_INFO_MM_TEXT: count = (pixel + 7)/8; /* columns */ bpl = (pixel + 7)/8; /* columns */ break; default: return ENODEV; } if (mp[10 + 0x17] & 0x40) /* CRTC mode control reg */ count *= 2; /* byte mode */ outb(adp->va_crtc_addr, 0x13); outb(adp->va_crtc_addr + 1, count); adp->va_line_width = bpl; return 0; } static int set_display_start(video_adapter_t *adp, int x, int y) { int off; /* byte offset (graphics mode)/word offset (text mode) */ int poff; /* pixel offset */ int roff; /* row offset */ int ppb; /* pixels per byte */ if ((adp->va_type != KD_VGA) && (adp->va_type != KD_EGA)) x &= ~7; if (adp->va_info.vi_flags & V_INFO_GRAPHICS) { ppb = 8/(adp->va_info.vi_depth/adp->va_info.vi_planes); off = y*adp->va_line_width + x/ppb; roff = 0; poff = x%ppb; } else { if ((adp->va_type == KD_VGA) || (adp->va_type == KD_EGA)) { outb(TSIDX, 1); if (inb(TSREG) & 1) ppb = 9; else ppb = 8; } else { ppb = 8; } off = y/adp->va_info.vi_cheight*adp->va_line_width + x/ppb; roff = y%adp->va_info.vi_cheight; /* FIXME: is this correct? XXX */ if (ppb == 8) poff = x%ppb; else poff = (x + 8)%ppb; } /* start address */ outb(adp->va_crtc_addr, 0xc); /* high */ outb(adp->va_crtc_addr + 1, off >> 8); outb(adp->va_crtc_addr, 0xd); /* low */ outb(adp->va_crtc_addr + 1, off & 0xff); /* horizontal pel pan */ if ((adp->va_type == KD_VGA) || (adp->va_type == KD_EGA)) { inb(adp->va_crtc_addr + 6); outb(ATC, 0x13 | 0x20); outb(ATC, poff); inb(adp->va_crtc_addr + 6); outb(ATC, 0x20); } /* preset raw scan */ outb(adp->va_crtc_addr, 8); outb(adp->va_crtc_addr + 1, roff); adp->va_disp_start.x = x; adp->va_disp_start.y = y; return 0; } #ifndef VGA_NO_MODE_CHANGE #if defined(__i386__) || defined(__amd64__) /* XXX */ static void fill(int val, void *d, size_t size) { u_char *p = d; while (size-- > 0) *p++ = val; } #endif /* __i386__ */ static void filll_io(int val, vm_offset_t d, size_t size) { while (size-- > 0) { writel(d, val); d += sizeof(u_int32_t); } } #endif /* !VGA_NO_MODE_CHANGE */ /* entry points */ #if 0 static int vga_nop(void) { return 0; } #endif static int vga_error(void) { return ENODEV; } static int vga_probe(int unit, video_adapter_t **adpp, void *arg, int flags) { probe_adapters(); if (unit >= biosadapters) return ENXIO; *adpp = &biosadapter[unit]; return 0; } static int vga_init(int unit, video_adapter_t *adp, int flags) { if ((unit >= biosadapters) || (adp == NULL) || !probe_done(adp)) return ENXIO; if (!init_done(adp)) { /* nothing to do really... */ adp->va_flags |= V_ADP_INITIALIZED; } if (!config_done(adp)) { if (vid_register(adp) < 0) return ENXIO; adp->va_flags |= V_ADP_REGISTERED; } if (vga_sub_configure != NULL) (*vga_sub_configure)(0); return 0; } /* * get_info(): * Return the video_info structure of the requested video mode. * * all adapters */ static int vga_get_info(video_adapter_t *adp, int mode, video_info_t *info) { int i; if (!vga_init_done) return ENXIO; mode = map_gen_mode_num(adp->va_type, adp->va_flags & V_ADP_COLOR, mode); #ifndef VGA_NO_MODE_CHANGE if (adp->va_flags & V_ADP_MODECHANGE) { /* * If the parameter table entry for this mode is not found, * the mode is not supported... */ if (get_mode_param(mode) == NULL) return EINVAL; } else #endif /* VGA_NO_MODE_CHANGE */ { /* * Even if we don't support video mode switching on this adapter, * the information on the initial (thus current) video mode * should be made available. */ if (mode != adp->va_initial_mode) return EINVAL; } for (i = 0; bios_vmode[i].vi_mode != EOT; ++i) { if (bios_vmode[i].vi_mode == NA) continue; if (mode == bios_vmode[i].vi_mode) { *info = bios_vmode[i]; /* XXX */ info->vi_buffer_size = info->vi_window_size*info->vi_planes; return 0; } } return EINVAL; } /* * query_mode(): * Find a video mode matching the requested parameters. * Fields filled with 0 are considered "don't care" fields and * match any modes. * * all adapters */ static int vga_query_mode(video_adapter_t *adp, video_info_t *info) { int i; if (!vga_init_done) return ENXIO; for (i = 0; bios_vmode[i].vi_mode != EOT; ++i) { if (bios_vmode[i].vi_mode == NA) continue; if ((info->vi_width != 0) && (info->vi_width != bios_vmode[i].vi_width)) continue; if ((info->vi_height != 0) && (info->vi_height != bios_vmode[i].vi_height)) continue; if ((info->vi_cwidth != 0) && (info->vi_cwidth != bios_vmode[i].vi_cwidth)) continue; if ((info->vi_cheight != 0) && (info->vi_cheight != bios_vmode[i].vi_cheight)) continue; if ((info->vi_depth != 0) && (info->vi_depth != bios_vmode[i].vi_depth)) continue; if ((info->vi_planes != 0) && (info->vi_planes != bios_vmode[i].vi_planes)) continue; /* XXX: should check pixel format, memory model */ if ((info->vi_flags != 0) && (info->vi_flags != bios_vmode[i].vi_flags)) continue; /* verify if this mode is supported on this adapter */ if (vga_get_info(adp, bios_vmode[i].vi_mode, info)) continue; return 0; } return ENODEV; } /* * set_mode(): * Change the video mode. * * EGA/VGA */ #ifndef VGA_NO_MODE_CHANGE #ifdef VGA_WIDTH90 static void set_width90(adp_state_t *params) { /* * Based on code submitted by Kelly Yancey (kbyanc@freedomnet.com) * and alexv@sui.gda.itesm.mx. */ params->regs[5] |= 1; /* toggle 8 pixel wide fonts */ params->regs[10+0x0] = 0x6b; params->regs[10+0x1] = 0x59; params->regs[10+0x2] = 0x5a; params->regs[10+0x3] = 0x8e; params->regs[10+0x4] = 0x5e; params->regs[10+0x5] = 0x8a; params->regs[10+0x13] = 45; params->regs[35+0x13] = 0; } #endif /* VGA_WIDTH90 */ #endif /* !VGA_NO_MODE_CHANGE */ static int vga_set_mode(video_adapter_t *adp, int mode) { #ifndef VGA_NO_MODE_CHANGE video_info_t info; adp_state_t params; prologue(adp, V_ADP_MODECHANGE, ENODEV); mode = map_gen_mode_num(adp->va_type, adp->va_flags & V_ADP_COLOR, mode); if (vga_get_info(adp, mode, &info)) return EINVAL; #if VGA_DEBUG > 1 printf("vga_set_mode(): setting mode %d\n", mode); #endif params.sig = V_STATE_SIG; bcopy(get_mode_param(mode), params.regs, sizeof(params.regs)); switch (mode) { #ifdef VGA_WIDTH90 case M_VGA_C90x60: case M_VGA_M90x60: set_width90(¶ms); /* FALLTHROUGH */ #endif case M_VGA_C80x60: case M_VGA_M80x60: params.regs[2] = 0x08; params.regs[19] = 0x47; goto special_480l; #ifdef VGA_WIDTH90 case M_VGA_C90x30: case M_VGA_M90x30: set_width90(¶ms); /* FALLTHROUGH */ #endif case M_VGA_C80x30: case M_VGA_M80x30: params.regs[19] = 0x4f; special_480l: params.regs[9] |= 0xc0; params.regs[16] = 0x08; params.regs[17] = 0x3e; params.regs[26] = 0xea; params.regs[28] = 0xdf; params.regs[31] = 0xe7; params.regs[32] = 0x04; goto setup_mode; #ifdef VGA_WIDTH90 case M_VGA_C90x43: case M_VGA_M90x43: set_width90(¶ms); /* FALLTHROUGH */ #endif case M_ENH_C80x43: case M_ENH_B80x43: params.regs[28] = 87; goto special_80x50; #ifdef VGA_WIDTH90 case M_VGA_C90x50: case M_VGA_M90x50: set_width90(¶ms); /* FALLTHROUGH */ #endif case M_VGA_C80x50: case M_VGA_M80x50: special_80x50: params.regs[2] = 8; params.regs[19] = 7; goto setup_mode; #ifdef VGA_WIDTH90 case M_VGA_C90x25: case M_VGA_M90x25: set_width90(¶ms); /* FALLTHROUGH */ #endif case M_VGA_C40x25: case M_VGA_C80x25: case M_VGA_M80x25: case M_B40x25: case M_C40x25: case M_B80x25: case M_C80x25: case M_ENH_B40x25: case M_ENH_C40x25: case M_ENH_B80x25: case M_ENH_C80x25: case M_EGAMONO80x25: setup_mode: vga_load_state(adp, ¶ms); break; case M_VGA_MODEX: /* "unchain" the VGA mode */ params.regs[5-1+0x04] &= 0xf7; params.regs[5-1+0x04] |= 0x04; /* turn off doubleword mode */ params.regs[10+0x14] &= 0xbf; /* turn off word addressing */ params.regs[10+0x17] |= 0x40; /* set logical screen width */ params.regs[10+0x13] = 80; /* set 240 lines */ params.regs[10+0x11] = 0x2c; params.regs[10+0x06] = 0x0d; params.regs[10+0x07] = 0x3e; params.regs[10+0x10] = 0xea; params.regs[10+0x11] = 0xac; params.regs[10+0x12] = 0xdf; params.regs[10+0x15] = 0xe7; params.regs[10+0x16] = 0x06; /* set vertical sync polarity to reflect aspect ratio */ params.regs[9] = 0xe3; goto setup_grmode; case M_BG320: case M_CG320: case M_BG640: case M_CG320_D: case M_CG640_E: case M_CG640x350: case M_ENH_CG640: case M_BG640x480: case M_CG640x480: case M_VGA_CG320: setup_grmode: vga_load_state(adp, ¶ms); break; default: return EINVAL; } adp->va_mode = mode; info.vi_flags &= ~V_INFO_LINEAR; /* XXX */ update_adapter_info(adp, &info); /* move hardware cursor out of the way */ vidd_set_hw_cursor(adp, -1, -1); return 0; #else /* VGA_NO_MODE_CHANGE */ return ENODEV; #endif /* VGA_NO_MODE_CHANGE */ } #ifndef VGA_NO_FONT_LOADING static void set_font_mode(video_adapter_t *adp, u_char *buf) { u_char *mp; int s; s = splhigh(); /* save register values */ if (adp->va_type == KD_VGA) { outb(TSIDX, 0x02); buf[0] = inb(TSREG); outb(TSIDX, 0x04); buf[1] = inb(TSREG); outb(GDCIDX, 0x04); buf[2] = inb(GDCREG); outb(GDCIDX, 0x05); buf[3] = inb(GDCREG); outb(GDCIDX, 0x06); buf[4] = inb(GDCREG); inb(adp->va_crtc_addr + 6); outb(ATC, 0x10); buf[5] = inb(ATC + 1); } else /* if (adp->va_type == KD_EGA) */ { /* * EGA cannot be read; copy parameters from the mode parameter * table. */ mp = get_mode_param(adp->va_mode); buf[0] = mp[5 + 0x02 - 1]; buf[1] = mp[5 + 0x04 - 1]; buf[2] = mp[55 + 0x04]; buf[3] = mp[55 + 0x05]; buf[4] = mp[55 + 0x06]; buf[5] = mp[35 + 0x10]; } /* setup vga for loading fonts */ inb(adp->va_crtc_addr + 6); /* reset flip-flop */ outb(ATC, 0x10); outb(ATC, buf[5] & ~0x01); inb(adp->va_crtc_addr + 6); /* reset flip-flop */ outb(ATC, 0x20); /* enable palette */ #ifdef VGA_SLOW_IOACCESS #ifdef VGA_ALT_SEQACCESS outb(TSIDX, 0x00); outb(TSREG, 0x01); #endif outb(TSIDX, 0x02); outb(TSREG, 0x04); outb(TSIDX, 0x04); outb(TSREG, 0x07); #ifdef VGA_ALT_SEQACCESS outb(TSIDX, 0x00); outb(TSREG, 0x03); #endif outb(GDCIDX, 0x04); outb(GDCREG, 0x02); outb(GDCIDX, 0x05); outb(GDCREG, 0x00); outb(GDCIDX, 0x06); outb(GDCREG, 0x04); #else /* VGA_SLOW_IOACCESS */ #ifdef VGA_ALT_SEQACCESS outw(TSIDX, 0x0100); #endif outw(TSIDX, 0x0402); outw(TSIDX, 0x0704); #ifdef VGA_ALT_SEQACCESS outw(TSIDX, 0x0300); #endif outw(GDCIDX, 0x0204); outw(GDCIDX, 0x0005); outw(GDCIDX, 0x0406); /* addr = a0000, 64kb */ #endif /* VGA_SLOW_IOACCESS */ splx(s); } static void set_normal_mode(video_adapter_t *adp, u_char *buf) { int s; s = splhigh(); /* setup vga for normal operation mode again */ inb(adp->va_crtc_addr + 6); /* reset flip-flop */ outb(ATC, 0x10); outb(ATC, buf[5]); inb(adp->va_crtc_addr + 6); /* reset flip-flop */ outb(ATC, 0x20); /* enable palette */ #ifdef VGA_SLOW_IOACCESS #ifdef VGA_ALT_SEQACCESS outb(TSIDX, 0x00); outb(TSREG, 0x01); #endif outb(TSIDX, 0x02); outb(TSREG, buf[0]); outb(TSIDX, 0x04); outb(TSREG, buf[1]); #ifdef VGA_ALT_SEQACCESS outb(TSIDX, 0x00); outb(TSREG, 0x03); #endif outb(GDCIDX, 0x04); outb(GDCREG, buf[2]); outb(GDCIDX, 0x05); outb(GDCREG, buf[3]); if (adp->va_crtc_addr == MONO_CRTC) { outb(GDCIDX, 0x06); outb(GDCREG,(buf[4] & 0x03) | 0x08); } else { outb(GDCIDX, 0x06); outb(GDCREG,(buf[4] & 0x03) | 0x0c); } #else /* VGA_SLOW_IOACCESS */ #ifdef VGA_ALT_SEQACCESS outw(TSIDX, 0x0100); #endif outw(TSIDX, 0x0002 | (buf[0] << 8)); outw(TSIDX, 0x0004 | (buf[1] << 8)); #ifdef VGA_ALT_SEQACCESS outw(TSIDX, 0x0300); #endif outw(GDCIDX, 0x0004 | (buf[2] << 8)); outw(GDCIDX, 0x0005 | (buf[3] << 8)); if (adp->va_crtc_addr == MONO_CRTC) outw(GDCIDX, 0x0006 | (((buf[4] & 0x03) | 0x08)<<8)); else outw(GDCIDX, 0x0006 | (((buf[4] & 0x03) | 0x0c)<<8)); #endif /* VGA_SLOW_IOACCESS */ splx(s); } #endif /* VGA_NO_FONT_LOADING */ /* * save_font(): * Read the font data in the requested font page from the video adapter. * * EGA/VGA */ static int vga_save_font(video_adapter_t *adp, int page, int fontsize, int fontwidth, u_char *data, int ch, int count) { #ifndef VGA_NO_FONT_LOADING u_char buf[PARAM_BUFSIZE]; vm_offset_t segment; int c; #ifdef VGA_ALT_SEQACCESS int s; u_char val = 0; #endif prologue(adp, V_ADP_FONT, ENODEV); if (fontsize < 14) { /* FONT_8 */ fontsize = 8; } else if (fontsize >= 32) { fontsize = 32; } else if (fontsize >= 16) { /* FONT_16 */ fontsize = 16; } else { /* FONT_14 */ fontsize = 14; } if (page < 0 || page >= 8 || fontwidth != 8) return EINVAL; segment = FONT_BUF + 0x4000*page; if (page > 3) segment -= 0xe000; #ifdef VGA_ALT_SEQACCESS if (adp->va_type == KD_VGA) { /* what about EGA? XXX */ s = splhigh(); outb(TSIDX, 0x00); outb(TSREG, 0x01); outb(TSIDX, 0x01); val = inb(TSREG); /* disable screen */ outb(TSIDX, 0x01); outb(TSREG, val | 0x20); outb(TSIDX, 0x00); outb(TSREG, 0x03); splx(s); } #endif set_font_mode(adp, buf); if (fontsize == 32) { bcopy_fromio((uintptr_t)segment + ch*32, data, fontsize*count); } else { for (c = ch; count > 0; ++c, --count) { bcopy_fromio((uintptr_t)segment + c*32, data, fontsize); data += fontsize; } } set_normal_mode(adp, buf); #ifdef VGA_ALT_SEQACCESS if (adp->va_type == KD_VGA) { s = splhigh(); outb(TSIDX, 0x00); outb(TSREG, 0x01); outb(TSIDX, 0x01); outb(TSREG, val & 0xdf); /* enable screen */ outb(TSIDX, 0x00); outb(TSREG, 0x03); splx(s); } #endif return 0; #else /* VGA_NO_FONT_LOADING */ return ENODEV; #endif /* VGA_NO_FONT_LOADING */ } /* * load_font(): * Set the font data in the requested font page. * NOTE: it appears that some recent video adapters do not support * the font page other than 0... XXX * * EGA/VGA */ static int vga_load_font(video_adapter_t *adp, int page, int fontsize, int fontwidth, u_char *data, int ch, int count) { #ifndef VGA_NO_FONT_LOADING u_char buf[PARAM_BUFSIZE]; vm_offset_t segment; int c; #ifdef VGA_ALT_SEQACCESS int s; u_char val = 0; #endif prologue(adp, V_ADP_FONT, ENODEV); if (fontsize < 14) { /* FONT_8 */ fontsize = 8; } else if (fontsize >= 32) { fontsize = 32; } else if (fontsize >= 16) { /* FONT_16 */ fontsize = 16; } else { /* FONT_14 */ fontsize = 14; } if (page < 0 || page >= 8 || fontwidth != 8) return EINVAL; segment = FONT_BUF + 0x4000*page; if (page > 3) segment -= 0xe000; #ifdef VGA_ALT_SEQACCESS if (adp->va_type == KD_VGA) { /* what about EGA? XXX */ s = splhigh(); outb(TSIDX, 0x00); outb(TSREG, 0x01); outb(TSIDX, 0x01); val = inb(TSREG); /* disable screen */ outb(TSIDX, 0x01); outb(TSREG, val | 0x20); outb(TSIDX, 0x00); outb(TSREG, 0x03); splx(s); } #endif set_font_mode(adp, buf); if (fontsize == 32) { bcopy_toio(data, (uintptr_t)segment + ch*32, fontsize*count); } else { for (c = ch; count > 0; ++c, --count) { bcopy_toio(data, (uintptr_t)segment + c*32, fontsize); data += fontsize; } } set_normal_mode(adp, buf); #ifdef VGA_ALT_SEQACCESS if (adp->va_type == KD_VGA) { s = splhigh(); outb(TSIDX, 0x00); outb(TSREG, 0x01); outb(TSIDX, 0x01); outb(TSREG, val & 0xdf); /* enable screen */ outb(TSIDX, 0x00); outb(TSREG, 0x03); splx(s); } #endif return 0; #else /* VGA_NO_FONT_LOADING */ return ENODEV; #endif /* VGA_NO_FONT_LOADING */ } /* * show_font(): * Activate the requested font page. * NOTE: it appears that some recent video adapters do not support * the font page other than 0... XXX * * EGA/VGA */ static int vga_show_font(video_adapter_t *adp, int page) { #ifndef VGA_NO_FONT_LOADING static u_char cg[] = { 0x00, 0x05, 0x0a, 0x0f, 0x30, 0x35, 0x3a, 0x3f }; int s; prologue(adp, V_ADP_FONT, ENODEV); if (page < 0 || page >= 8) return EINVAL; s = splhigh(); outb(TSIDX, 0x03); outb(TSREG, cg[page]); splx(s); return 0; #else /* VGA_NO_FONT_LOADING */ return ENODEV; #endif /* VGA_NO_FONT_LOADING */ } /* * save_palette(): * Read DAC values. The values have expressed in 8 bits. * * VGA */ static int vga_save_palette(video_adapter_t *adp, u_char *palette) { int bits; int i; prologue(adp, V_ADP_PALETTE, ENODEV); /* * We store 8 bit values in the palette buffer, while the standard * VGA has 6 bit DAC . */ outb(PALRADR, 0x00); bits = (adp->va_flags & V_ADP_DAC8) != 0 ? 0 : 2; for (i = 0; i < 256*3; ++i) palette[i] = inb(PALDATA) << bits; inb(adp->va_crtc_addr + 6); /* reset flip/flop */ return 0; } static int vga_save_palette2(video_adapter_t *adp, int base, int count, u_char *r, u_char *g, u_char *b) { int bits; int i; prologue(adp, V_ADP_PALETTE, ENODEV); outb(PALRADR, base); bits = (adp->va_flags & V_ADP_DAC8) != 0 ? 0 : 2; for (i = 0; i < count; ++i) { r[i] = inb(PALDATA) << bits; g[i] = inb(PALDATA) << bits; b[i] = inb(PALDATA) << bits; } inb(adp->va_crtc_addr + 6); /* reset flip/flop */ return 0; } /* * load_palette(): * Set DAC values. * * VGA */ static int vga_load_palette(video_adapter_t *adp, u_char *palette) { int bits; int i; prologue(adp, V_ADP_PALETTE, ENODEV); outb(PIXMASK, 0xff); /* no pixelmask */ outb(PALWADR, 0x00); bits = (adp->va_flags & V_ADP_DAC8) != 0 ? 0 : 2; for (i = 0; i < 256*3; ++i) outb(PALDATA, palette[i] >> bits); inb(adp->va_crtc_addr + 6); /* reset flip/flop */ outb(ATC, 0x20); /* enable palette */ return 0; } static int vga_load_palette2(video_adapter_t *adp, int base, int count, u_char *r, u_char *g, u_char *b) { int bits; int i; prologue(adp, V_ADP_PALETTE, ENODEV); outb(PIXMASK, 0xff); /* no pixelmask */ outb(PALWADR, base); bits = (adp->va_flags & V_ADP_DAC8) != 0 ? 0 : 2; for (i = 0; i < count; ++i) { outb(PALDATA, r[i] >> bits); outb(PALDATA, g[i] >> bits); outb(PALDATA, b[i] >> bits); } inb(adp->va_crtc_addr + 6); /* reset flip/flop */ outb(ATC, 0x20); /* enable palette */ return 0; } /* * set_border(): * Change the border color. * * CGA/EGA/VGA */ static int vga_set_border(video_adapter_t *adp, int color) { prologue(adp, V_ADP_BORDER, ENODEV); switch (adp->va_type) { case KD_EGA: case KD_VGA: inb(adp->va_crtc_addr + 6); /* reset flip-flop */ outb(ATC, 0x31); outb(ATC, color & 0xff); break; case KD_CGA: outb(adp->va_crtc_addr + 5, color & 0x0f); /* color select register */ break; case KD_MONO: case KD_HERCULES: default: break; } return 0; } /* * save_state(): * Read video register values. * NOTE: this function only reads the standard EGA/VGA registers. * any extra/extended registers of SVGA adapters are not saved. * * VGA */ static int vga_save_state(video_adapter_t *adp, void *p, size_t size) { video_info_t info; u_char *buf; int crtc_addr; int i, j; int s; if (size == 0) { /* return the required buffer size */ prologue(adp, V_ADP_STATESAVE, 0); return sizeof(adp_state_t); } else { prologue(adp, V_ADP_STATESAVE, ENODEV); if (size < sizeof(adp_state_t)) return EINVAL; } ((adp_state_t *)p)->sig = V_STATE_SIG; buf = ((adp_state_t *)p)->regs; bzero(buf, V_MODE_PARAM_SIZE); crtc_addr = adp->va_crtc_addr; s = splhigh(); outb(TSIDX, 0x00); outb(TSREG, 0x01); /* stop sequencer */ for (i = 0, j = 5; i < 4; i++) { outb(TSIDX, i + 1); buf[j++] = inb(TSREG); } buf[9] = inb(MISC + 10); /* dot-clock */ outb(TSIDX, 0x00); outb(TSREG, 0x03); /* start sequencer */ for (i = 0, j = 10; i < 25; i++) { /* crtc */ outb(crtc_addr, i); buf[j++] = inb(crtc_addr + 1); } for (i = 0, j = 35; i < 20; i++) { /* attribute ctrl */ inb(crtc_addr + 6); /* reset flip-flop */ outb(ATC, i); buf[j++] = inb(ATC + 1); } for (i = 0, j = 55; i < 9; i++) { /* graph data ctrl */ outb(GDCIDX, i); buf[j++] = inb(GDCREG); } inb(crtc_addr + 6); /* reset flip-flop */ outb(ATC, 0x20); /* enable palette */ splx(s); #if 1 if (vga_get_info(adp, adp->va_mode, &info) == 0) { if (info.vi_flags & V_INFO_GRAPHICS) { buf[0] = info.vi_width/info.vi_cwidth; /* COLS */ buf[1] = info.vi_height/info.vi_cheight - 1; /* ROWS */ } else { buf[0] = info.vi_width; /* COLS */ buf[1] = info.vi_height - 1; /* ROWS */ } buf[2] = info.vi_cheight; /* POINTS */ } #else buf[0] = readb(BIOS_PADDRTOVADDR(0x44a)); /* COLS */ buf[1] = readb(BIOS_PADDRTOVADDR(0x484)); /* ROWS */ buf[2] = readb(BIOS_PADDRTOVADDR(0x485)); /* POINTS */ buf[3] = readb(BIOS_PADDRTOVADDR(0x44c)); buf[4] = readb(BIOS_PADDRTOVADDR(0x44d)); #endif return 0; } /* * load_state(): * Set video registers at once. * NOTE: this function only updates the standard EGA/VGA registers. * any extra/extended registers of SVGA adapters are not changed. * * EGA/VGA */ static int vga_load_state(video_adapter_t *adp, void *p) { u_char *buf; int crtc_addr; int s; int i; prologue(adp, V_ADP_STATELOAD, ENODEV); if (((adp_state_t *)p)->sig != V_STATE_SIG) return EINVAL; buf = ((adp_state_t *)p)->regs; crtc_addr = adp->va_crtc_addr; #if VGA_DEBUG > 1 dump_buffer(buf, V_MODE_PARAM_SIZE); #endif s = splhigh(); outb(TSIDX, 0x00); outb(TSREG, 0x01); /* stop sequencer */ for (i = 0; i < 4; ++i) { /* program sequencer */ outb(TSIDX, i + 1); outb(TSREG, buf[i + 5]); } outb(MISC, buf[9]); /* set dot-clock */ outb(TSIDX, 0x00); outb(TSREG, 0x03); /* start sequencer */ outb(crtc_addr, 0x11); outb(crtc_addr + 1, inb(crtc_addr + 1) & 0x7F); for (i = 0; i < 25; ++i) { /* program crtc */ outb(crtc_addr, i); outb(crtc_addr + 1, buf[i + 10]); } inb(crtc_addr+6); /* reset flip-flop */ for (i = 0; i < 20; ++i) { /* program attribute ctrl */ outb(ATC, i); outb(ATC, buf[i + 35]); } for (i = 0; i < 9; ++i) { /* program graph data ctrl */ outb(GDCIDX, i); outb(GDCREG, buf[i + 55]); } inb(crtc_addr + 6); /* reset flip-flop */ outb(ATC, 0x20); /* enable palette */ #ifdef notyet /* a temporary workaround for kernel panic, XXX */ #ifndef VGA_NO_BIOS if (adp->va_unit == V_ADP_PRIMARY) { writeb(BIOS_PADDRTOVADDR(0x44a), buf[0]); /* COLS */ writeb(BIOS_PADDRTOVADDR(0x484), buf[1] + rows_offset - 1); /* ROWS */ writeb(BIOS_PADDRTOVADDR(0x485), buf[2]); /* POINTS */ #if 0 writeb(BIOS_PADDRTOVADDR(0x44c), buf[3]); writeb(BIOS_PADDRTOVADDR(0x44d), buf[4]); #endif } #endif /* VGA_NO_BIOS */ #endif /* notyet */ splx(s); return 0; } /* * set_origin(): * Change the origin (window mapping) of the banked frame buffer. */ static int vga_set_origin(video_adapter_t *adp, off_t offset) { /* * The standard video modes do not require window mapping; * always return error. */ return ENODEV; } /* * read_hw_cursor(): * Read the position of the hardware text cursor. * * all adapters */ static int vga_read_hw_cursor(video_adapter_t *adp, int *col, int *row) { u_int16_t off; int s; if (!vga_init_done) return ENXIO; if (adp->va_info.vi_flags & V_INFO_GRAPHICS) return ENODEV; s = spltty(); outb(adp->va_crtc_addr, 14); off = inb(adp->va_crtc_addr + 1); outb(adp->va_crtc_addr, 15); off = (off << 8) | inb(adp->va_crtc_addr + 1); splx(s); *row = off / adp->va_info.vi_width; *col = off % adp->va_info.vi_width; return 0; } /* * set_hw_cursor(): * Move the hardware text cursor. If col and row are both -1, * the cursor won't be shown. * * all adapters */ static int vga_set_hw_cursor(video_adapter_t *adp, int col, int row) { u_int16_t off; int s; if (!vga_init_done) return ENXIO; if ((col == -1) && (row == -1)) { off = -1; } else { if (adp->va_info.vi_flags & V_INFO_GRAPHICS) return ENODEV; off = row*adp->va_info.vi_width + col; } s = spltty(); outb(adp->va_crtc_addr, 14); outb(adp->va_crtc_addr + 1, off >> 8); outb(adp->va_crtc_addr, 15); outb(adp->va_crtc_addr + 1, off & 0x00ff); splx(s); return 0; } /* * set_hw_cursor_shape(): * Change the shape of the hardware text cursor. If the height is * zero or negative, the cursor won't be shown. * * all adapters */ static int vga_set_hw_cursor_shape(video_adapter_t *adp, int base, int height, int celsize, int blink) { int s; if (!vga_init_done) return ENXIO; s = spltty(); switch (adp->va_type) { case KD_VGA: case KD_CGA: case KD_MONO: case KD_HERCULES: default: if (height <= 0) { /* make the cursor invisible */ outb(adp->va_crtc_addr, 10); outb(adp->va_crtc_addr + 1, 32); outb(adp->va_crtc_addr, 11); outb(adp->va_crtc_addr + 1, 0); } else { outb(adp->va_crtc_addr, 10); outb(adp->va_crtc_addr + 1, celsize - base - height); outb(adp->va_crtc_addr, 11); outb(adp->va_crtc_addr + 1, celsize - base - 1); } break; case KD_EGA: if (height <= 0) { /* make the cursor invisible */ outb(adp->va_crtc_addr, 10); outb(adp->va_crtc_addr + 1, celsize); outb(adp->va_crtc_addr, 11); outb(adp->va_crtc_addr + 1, 0); } else { outb(adp->va_crtc_addr, 10); outb(adp->va_crtc_addr + 1, celsize - base - height); outb(adp->va_crtc_addr, 11); outb(adp->va_crtc_addr + 1, celsize - base); } break; } splx(s); return 0; } /* * blank_display() * Put the display in power save/power off mode. * * all adapters */ static int vga_blank_display(video_adapter_t *adp, int mode) { u_char val; int s; s = splhigh(); switch (adp->va_type) { case KD_VGA: switch (mode) { case V_DISPLAY_SUSPEND: case V_DISPLAY_STAND_BY: outb(TSIDX, 0x01); val = inb(TSREG); outb(TSIDX, 0x01); outb(TSREG, val | 0x20); outb(adp->va_crtc_addr, 0x17); val = inb(adp->va_crtc_addr + 1); outb(adp->va_crtc_addr + 1, val & ~0x80); break; case V_DISPLAY_BLANK: outb(TSIDX, 0x01); val = inb(TSREG); outb(TSIDX, 0x01); outb(TSREG, val | 0x20); break; case V_DISPLAY_ON: outb(TSIDX, 0x01); val = inb(TSREG); outb(TSIDX, 0x01); outb(TSREG, val & 0xDF); outb(adp->va_crtc_addr, 0x17); val = inb(adp->va_crtc_addr + 1); outb(adp->va_crtc_addr + 1, val | 0x80); break; } break; case KD_EGA: /* no support yet */ splx(s); return ENODEV; case KD_CGA: switch (mode) { case V_DISPLAY_SUSPEND: case V_DISPLAY_STAND_BY: case V_DISPLAY_BLANK: outb(adp->va_crtc_addr + 4, 0x25); break; case V_DISPLAY_ON: outb(adp->va_crtc_addr + 4, 0x2d); break; } break; case KD_MONO: case KD_HERCULES: switch (mode) { case V_DISPLAY_SUSPEND: case V_DISPLAY_STAND_BY: case V_DISPLAY_BLANK: outb(adp->va_crtc_addr + 4, 0x21); break; case V_DISPLAY_ON: outb(adp->va_crtc_addr + 4, 0x29); break; } break; default: break; } splx(s); return 0; } /* * mmap(): * Mmap frame buffer. * * all adapters */ static int vga_mmap_buf(video_adapter_t *adp, vm_ooffset_t offset, vm_paddr_t *paddr, int prot, vm_memattr_t *memattr) { if (adp->va_info.vi_flags & V_INFO_LINEAR) return -1; #if VGA_DEBUG > 0 printf("vga_mmap_buf(): window:0x%jx, offset:0x%jx\n", (uintmax_t)adp->va_info.vi_window, (uintmax_t)offset); #endif /* XXX: is this correct? */ if (offset > adp->va_window_size - PAGE_SIZE) return -1; *paddr = adp->va_info.vi_window + offset; return 0; } #ifndef VGA_NO_MODE_CHANGE static void planar_fill(video_adapter_t *adp, int val) { int length; int at; /* position in the frame buffer */ int l; outw(GDCIDX, 0x0005); /* read mode 0, write mode 0 */ outw(GDCIDX, 0x0003); /* data rotate/function select */ outw(GDCIDX, 0x0f01); /* set/reset enable */ outw(GDCIDX, 0xff08); /* bit mask */ outw(GDCIDX, (val << 8) | 0x00); /* set/reset */ at = 0; length = adp->va_line_width*adp->va_info.vi_height; while (length > 0) { l = imin(length, adp->va_window_size); vidd_set_win_org(adp, at); bzero_io(adp->va_window, l); length -= l; at += l; } outw(GDCIDX, 0x0000); /* set/reset */ outw(GDCIDX, 0x0001); /* set/reset enable */ } static void packed_fill(video_adapter_t *adp, int val) { int length; int at; /* position in the frame buffer */ int l; at = 0; length = adp->va_line_width*adp->va_info.vi_height; while (length > 0) { l = imin(length, adp->va_window_size); vidd_set_win_org(adp, at); fill_io(val, adp->va_window, l); length -= l; at += l; } } static void direct_fill(video_adapter_t *adp, int val) { int length; int at; /* position in the frame buffer */ int l; at = 0; length = adp->va_line_width*adp->va_info.vi_height; while (length > 0) { l = imin(length, adp->va_window_size); vidd_set_win_org(adp, at); switch (adp->va_info.vi_pixel_size) { case sizeof(u_int16_t): fillw_io(val, adp->va_window, l/sizeof(u_int16_t)); break; case 3: /* FIXME */ break; case sizeof(u_int32_t): filll_io(val, adp->va_window, l/sizeof(u_int32_t)); break; } length -= l; at += l; } } static int vga_clear(video_adapter_t *adp) { switch (adp->va_info.vi_mem_model) { case V_INFO_MM_TEXT: /* do nothing? XXX */ break; case V_INFO_MM_PLANAR: planar_fill(adp, 0); break; case V_INFO_MM_PACKED: packed_fill(adp, 0); break; case V_INFO_MM_DIRECT: direct_fill(adp, 0); break; } return 0; } #ifdef notyet static void planar_fill_rect(video_adapter_t *adp, int val, int x, int y, int cx, int cy) { int banksize; int bank; int pos; int offset; /* offset within window */ int bx; int l; outw(GDCIDX, 0x0005); /* read mode 0, write mode 0 */ outw(GDCIDX, 0x0003); /* data rotate/function select */ outw(GDCIDX, 0x0f01); /* set/reset enable */ outw(GDCIDX, 0xff08); /* bit mask */ outw(GDCIDX, (val << 8) | 0x00); /* set/reset */ banksize = adp->va_window_size; bank = -1; while (cy > 0) { pos = adp->va_line_width*y + x/8; if (bank != pos/banksize) { vidd_set_win_org(adp, pos); bank = pos/banksize; } offset = pos%banksize; bx = (x + cx)/8 - x/8; if (x % 8) { outw(GDCIDX, ((0xff00 >> (x % 8)) & 0xff00) | 0x08); writeb(adp->va_window + offset, 0); ++offset; --bx; if (offset >= banksize) { offset = 0; ++bank; /* next bank */ vidd_set_win_org(adp, bank*banksize); } outw(GDCIDX, 0xff08); /* bit mask */ } while (bx > 0) { l = imin(bx, banksize); bzero_io(adp->va_window + offset, l); offset += l; bx -= l; if (offset >= banksize) { offset = 0; ++bank; /* next bank */ vidd_set_win_org(adp, bank*banksize); } } if ((x + cx) % 8) { outw(GDCIDX, (~(0xff00 >> ((x + cx) % 8)) & 0xff00) | 0x08); writeb(adp->va_window + offset, 0); ++offset; if (offset >= banksize) { offset = 0; ++bank; /* next bank */ vidd_set_win_org(adp, bank*banksize); } outw(GDCIDX, 0xff08); /* bit mask */ } ++y; --cy; } outw(GDCIDX, 0xff08); /* bit mask */ outw(GDCIDX, 0x0000); /* set/reset */ outw(GDCIDX, 0x0001); /* set/reset enable */ } static void packed_fill_rect(video_adapter_t *adp, int val, int x, int y, int cx, int cy) { int banksize; int bank; int pos; int offset; /* offset within window */ int end; banksize = adp->va_window_size; bank = -1; cx *= adp->va_info.vi_pixel_size; while (cy > 0) { pos = adp->va_line_width*y + x*adp->va_info.vi_pixel_size; if (bank != pos/banksize) { vidd_set_win_org(adp, pos); bank = pos/banksize; } offset = pos%banksize; end = imin(offset + cx, banksize); fill_io(val, adp->va_window + offset, (end - offset)/adp->va_info.vi_pixel_size); /* the line may cross the window boundary */ if (offset + cx > banksize) { ++bank; /* next bank */ vidd_set_win_org(adp, bank*banksize); end = offset + cx - banksize; fill_io(val, adp->va_window, end/adp->va_info.vi_pixel_size); } ++y; --cy; } } static void direct_fill_rect16(video_adapter_t *adp, int val, int x, int y, int cx, int cy) { int banksize; int bank; int pos; int offset; /* offset within window */ int end; /* * XXX: the function assumes that banksize is a muliple of * sizeof(u_int16_t). */ banksize = adp->va_window_size; bank = -1; cx *= sizeof(u_int16_t); while (cy > 0) { pos = adp->va_line_width*y + x*sizeof(u_int16_t); if (bank != pos/banksize) { vidd_set_win_org(adp, pos); bank = pos/banksize; } offset = pos%banksize; end = imin(offset + cx, banksize); fillw_io(val, adp->va_window + offset, (end - offset)/sizeof(u_int16_t)); /* the line may cross the window boundary */ if (offset + cx > banksize) { ++bank; /* next bank */ vidd_set_win_org(adp, bank*banksize); end = offset + cx - banksize; fillw_io(val, adp->va_window, end/sizeof(u_int16_t)); } ++y; --cy; } } static void direct_fill_rect24(video_adapter_t *adp, int val, int x, int y, int cx, int cy) { int banksize; int bank; int pos; int offset; /* offset within window */ int end; int i; int j; u_int8_t b[3]; b[0] = val & 0x0000ff; b[1] = (val >> 8) & 0x0000ff; b[2] = (val >> 16) & 0x0000ff; banksize = adp->va_window_size; bank = -1; cx *= 3; while (cy > 0) { pos = adp->va_line_width*y + x*3; if (bank != pos/banksize) { vidd_set_win_org(adp, pos); bank = pos/banksize; } offset = pos%banksize; end = imin(offset + cx, banksize); for (i = 0, j = offset; j < end; i = (++i)%3, ++j) { writeb(adp->va_window + j, b[i]); } /* the line may cross the window boundary */ if (offset + cx >= banksize) { ++bank; /* next bank */ vidd_set_win_org(adp, bank*banksize); j = 0; end = offset + cx - banksize; for (; j < end; i = (++i)%3, ++j) { writeb(adp->va_window + j, b[i]); } } ++y; --cy; } } static void direct_fill_rect32(video_adapter_t *adp, int val, int x, int y, int cx, int cy) { int banksize; int bank; int pos; int offset; /* offset within window */ int end; /* * XXX: the function assumes that banksize is a muliple of * sizeof(u_int32_t). */ banksize = adp->va_window_size; bank = -1; cx *= sizeof(u_int32_t); while (cy > 0) { pos = adp->va_line_width*y + x*sizeof(u_int32_t); if (bank != pos/banksize) { vidd_set_win_org(adp, pos); bank = pos/banksize; } offset = pos%banksize; end = imin(offset + cx, banksize); filll_io(val, adp->va_window + offset, (end - offset)/sizeof(u_int32_t)); /* the line may cross the window boundary */ if (offset + cx > banksize) { ++bank; /* next bank */ vidd_set_win_org(adp, bank*banksize); end = offset + cx - banksize; filll_io(val, adp->va_window, end/sizeof(u_int32_t)); } ++y; --cy; } } static int vga_fill_rect(video_adapter_t *adp, int val, int x, int y, int cx, int cy) { switch (adp->va_info.vi_mem_model) { case V_INFO_MM_TEXT: /* do nothing? XXX */ break; case V_INFO_MM_PLANAR: planar_fill_rect(adp, val, x, y, cx, cy); break; case V_INFO_MM_PACKED: packed_fill_rect(adp, val, x, y, cx, cy); break; case V_INFO_MM_DIRECT: switch (adp->va_info.vi_pixel_size) { case sizeof(u_int16_t): direct_fill_rect16(adp, val, x, y, cx, cy); break; case 3: direct_fill_rect24(adp, val, x, y, cx, cy); break; case sizeof(u_int32_t): direct_fill_rect32(adp, val, x, y, cx, cy); break; } break; } return 0; } #else /* !notyet */ static int vga_fill_rect(video_adapter_t *adp, int val, int x, int y, int cx, int cy) { return ENODEV; } #endif /* notyet */ static int vga_bitblt(video_adapter_t *adp,...) { /* FIXME */ return ENODEV; } #endif /* !VGA_NO_MODE_CHANGE */ static int get_palette(video_adapter_t *adp, int base, int count, u_char *red, u_char *green, u_char *blue, u_char *trans) { u_char *r; u_char *g; u_char *b; if (count < 0 || base < 0 || count > 256 || base > 256 || base + count > 256) return EINVAL; r = malloc(count*3, M_DEVBUF, M_WAITOK); g = r + count; b = g + count; if (vga_save_palette2(adp, base, count, r, g, b)) { free(r, M_DEVBUF); return ENODEV; } copyout(r, red, count); copyout(g, green, count); copyout(b, blue, count); if (trans != NULL) { bzero(r, count); copyout(r, trans, count); } free(r, M_DEVBUF); return 0; } static int set_palette(video_adapter_t *adp, int base, int count, u_char *red, u_char *green, u_char *blue, u_char *trans) { u_char *r; u_char *g; u_char *b; int err; if (count < 0 || base < 0 || count > 256 || base > 256 || base + count > 256) return EINVAL; r = malloc(count*3, M_DEVBUF, M_WAITOK); g = r + count; b = g + count; err = copyin(red, r, count); if (!err) err = copyin(green, g, count); if (!err) err = copyin(blue, b, count); if (!err) err = vga_load_palette2(adp, base, count, r, g, b); free(r, M_DEVBUF); return (err ? ENODEV : 0); } static int vga_dev_ioctl(video_adapter_t *adp, u_long cmd, caddr_t arg) { switch (cmd) { case FBIO_GETWINORG: /* get frame buffer window origin */ *(u_int *)arg = 0; return 0; case FBIO_SETWINORG: /* set frame buffer window origin */ return ENODEV; case FBIO_SETDISPSTART: /* set display start address */ return (set_display_start(adp, ((video_display_start_t *)arg)->x, ((video_display_start_t *)arg)->y) ? ENODEV : 0); case FBIO_SETLINEWIDTH: /* set scan line length in pixel */ return (set_line_length(adp, *(u_int *)arg) ? ENODEV : 0); case FBIO_GETPALETTE: /* get color palette */ return get_palette(adp, ((video_color_palette_t *)arg)->index, ((video_color_palette_t *)arg)->count, ((video_color_palette_t *)arg)->red, ((video_color_palette_t *)arg)->green, ((video_color_palette_t *)arg)->blue, ((video_color_palette_t *)arg)->transparent); case FBIO_SETPALETTE: /* set color palette */ return set_palette(adp, ((video_color_palette_t *)arg)->index, ((video_color_palette_t *)arg)->count, ((video_color_palette_t *)arg)->red, ((video_color_palette_t *)arg)->green, ((video_color_palette_t *)arg)->blue, ((video_color_palette_t *)arg)->transparent); case FBIOGTYPE: /* get frame buffer type info. */ ((struct fbtype *)arg)->fb_type = fb_type(adp->va_type); ((struct fbtype *)arg)->fb_height = adp->va_info.vi_height; ((struct fbtype *)arg)->fb_width = adp->va_info.vi_width; ((struct fbtype *)arg)->fb_depth = adp->va_info.vi_depth; if ((adp->va_info.vi_depth <= 1) || (adp->va_info.vi_depth > 8)) ((struct fbtype *)arg)->fb_cmsize = 0; else ((struct fbtype *)arg)->fb_cmsize = 1 << adp->va_info.vi_depth; ((struct fbtype *)arg)->fb_size = adp->va_buffer_size; return 0; case FBIOGETCMAP: /* get color palette */ return get_palette(adp, ((struct fbcmap *)arg)->index, ((struct fbcmap *)arg)->count, ((struct fbcmap *)arg)->red, ((struct fbcmap *)arg)->green, ((struct fbcmap *)arg)->blue, NULL); case FBIOPUTCMAP: /* set color palette */ return set_palette(adp, ((struct fbcmap *)arg)->index, ((struct fbcmap *)arg)->count, ((struct fbcmap *)arg)->red, ((struct fbcmap *)arg)->green, ((struct fbcmap *)arg)->blue, NULL); default: return fb_commonioctl(adp, cmd, arg); } } static void dump_buffer(u_char *buf, size_t len) { int i; for(i = 0; i < len;) { printf("%02x ", buf[i]); if ((++i % 16) == 0) printf("\n"); } } /* * diag(): * Print some information about the video adapter and video modes, * with requested level of details. * * all adapters */ static int vga_diag(video_adapter_t *adp, int level) { u_char *mp; #if FB_DEBUG > 1 video_info_t info; int i; #endif if (!vga_init_done) return ENXIO; #if FB_DEBUG > 1 #ifndef VGA_NO_BIOS printf("vga: RTC equip. code:0x%02x, DCC code:0x%02x\n", rtcin(RTC_EQUIPMENT), readb(BIOS_PADDRTOVADDR(0x488))); printf("vga: CRTC:0x%x, video option:0x%02x, ", readw(BIOS_PADDRTOVADDR(0x463)), readb(BIOS_PADDRTOVADDR(0x487))); printf("rows:%d, cols:%d, font height:%d\n", readb(BIOS_PADDRTOVADDR(0x44a)), readb(BIOS_PADDRTOVADDR(0x484)) + 1, readb(BIOS_PADDRTOVADDR(0x485))); #endif /* VGA_NO_BIOS */ #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) printf("vga: param table EGA/VGA:%p", video_mode_ptr); printf(", CGA/MDA:%p\n", video_mode_ptr2); printf("vga: rows_offset:%d\n", rows_offset); #endif #endif /* FB_DEBUG > 1 */ fb_dump_adp_info(VGA_DRIVER_NAME, adp, level); #if FB_DEBUG > 1 if (adp->va_flags & V_ADP_MODECHANGE) { for (i = 0; bios_vmode[i].vi_mode != EOT; ++i) { if (bios_vmode[i].vi_mode == NA) continue; if (get_mode_param(bios_vmode[i].vi_mode) == NULL) continue; fb_dump_mode_info(VGA_DRIVER_NAME, adp, &bios_vmode[i], level); } } else { vga_get_info(adp, adp->va_initial_mode, &info); /* shouldn't fail */ fb_dump_mode_info(VGA_DRIVER_NAME, adp, &info, level); } #endif /* FB_DEBUG > 1 */ if ((adp->va_type != KD_EGA) && (adp->va_type != KD_VGA)) return 0; #if !defined(VGA_NO_BIOS) && !defined(VGA_NO_MODE_CHANGE) if (video_mode_ptr == NULL) printf("vga%d: %s: WARNING: video mode switching is not " "fully supported on this adapter\n", adp->va_unit, adp->va_name); #endif if (level <= 0) return 0; if (adp->va_type == KD_VGA) { printf("VGA parameters upon power-up\n"); dump_buffer(adpstate.regs, sizeof(adpstate.regs)); printf("VGA parameters in BIOS for mode %d\n", adp->va_initial_mode); dump_buffer(adpstate2.regs, sizeof(adpstate2.regs)); } mp = get_mode_param(adp->va_initial_mode); if (mp == NULL) /* this shouldn't be happening */ return 0; printf("EGA/VGA parameters to be used for mode %d\n", adp->va_initial_mode); dump_buffer(mp, V_MODE_PARAM_SIZE); return 0; } Index: head/sys/dev/fb/vgareg.h =================================================================== --- head/sys/dev/fb/vgareg.h (revision 343566) +++ head/sys/dev/fb/vgareg.h (revision 343567) @@ -1,101 +1,101 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 Kazutaka YOKOTA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer as * the first lines of this file unmodified. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _DEV_FB_VGAREG_H_ #define _DEV_FB_VGAREG_H_ /* physical addresses */ #define MDA_BUF_BASE 0xb0000 #define MDA_BUF_SIZE 0x08000 #define MDA_BUF BIOS_PADDRTOVADDR(MDA_BUF_BASE) #define CGA_BUF_BASE 0xb8000 #define CGA_BUF_SIZE 0x08000 #define CGA_BUF BIOS_PADDRTOVADDR(CGA_BUF_BASE) #define EGA_BUF_BASE 0xa0000 #define EGA_BUF_SIZE 0x20000 #define EGA_BUF BIOS_PADDRTOVADDR(EGA_BUF_BASE) #define GRAPHICS_BUF_BASE 0xa0000 #define GRAPHICS_BUF_SIZE 0x10000 #define GRAPHICS_BUF BIOS_PADDRTOVADDR(GRAPHICS_BUF_BASE) #define FONT_BUF BIOS_PADDRTOVADDR(GRAPHICS_BUF_BASE) #define VIDEO_BUF_BASE 0xa0000 #define VIDEO_BUF_SIZE 0x20000 /* I/O port addresses */ #define MONO_CRTC (IO_MDA + 0x04) /* crt controller base mono */ #define COLOR_CRTC (IO_CGA + 0x04) /* crt controller base color */ #define MISC (IO_VGA + 0x02) /* misc output register */ #define ATC (IO_VGA + 0x00) /* attribute controller */ #define TSIDX (IO_VGA + 0x04) /* timing sequencer idx */ #define TSREG (IO_VGA + 0x05) /* timing sequencer data */ #define PIXMASK (IO_VGA + 0x06) /* pixel write mask */ #define PALRADR (IO_VGA + 0x07) /* palette read address */ #define PALWADR (IO_VGA + 0x08) /* palette write address */ #define PALDATA (IO_VGA + 0x09) /* palette data register */ #define GDCIDX (IO_VGA + 0x0E) /* graph data controller idx */ #define GDCREG (IO_VGA + 0x0F) /* graph data controller data */ #define VGA_DRIVER_NAME "vga" #define VGA_UNIT(dev) dev2unit(dev) #define VGA_MKMINOR(unit) (unit) #ifdef _KERNEL struct video_adapter; typedef struct vga_softc { struct video_adapter *adp; void *state_buf; void *pal_buf; #ifdef FB_INSTALL_CDEV genfb_softc_t gensc; #endif } vga_softc_t; int vga_probe_unit(int unit, struct video_adapter *adp, int flags); int vga_attach_unit(int unit, vga_softc_t *sc, int flags); #ifdef FB_INSTALL_CDEV int vga_open(struct cdev *dev, vga_softc_t *sc, int flag, int mode, struct thread *td); int vga_close(struct cdev *dev, vga_softc_t *sc, int flag, int mode, struct thread *td); int vga_read(struct cdev *dev, vga_softc_t *sc, struct uio *uio, int flag); int vga_write(struct cdev *dev, vga_softc_t *sc, struct uio *uio, int flag); int vga_ioctl(struct cdev *dev, vga_softc_t *sc, u_long cmd, caddr_t arg, int flag, struct thread *td); int vga_mmap(struct cdev *dev, vga_softc_t *sc, vm_ooffset_t offset, - vm_offset_t *paddr, int prot, vm_memattr_t *memattr); + vm_paddr_t *paddr, int prot, vm_memattr_t *memattr); #endif extern int (*vga_sub_configure)(int flags); #endif /* _KERNEL */ #endif /* _DEV_FB_VGAREG_H_ */ Index: head/sys/dev/syscons/syscons.c =================================================================== --- head/sys/dev/syscons/syscons.c (revision 343566) +++ head/sys/dev/syscons/syscons.c (revision 343567) @@ -1,4274 +1,4274 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992-1998 Søren Schmidt * All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Sascha Wildner * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_syscons.h" #include "opt_splash.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__arm__) || defined(__mips__) || \ defined(__powerpc__) || defined(__sparc64__) #include #else #include #endif #if defined( __i386__) || defined(__amd64__) #include #include #endif #include #if defined(__amd64__) || defined(__i386__) #include #include #include #endif #include #include #include #include #define COLD 0 #define WARM 1 #define DEFAULT_BLANKTIME (5*60) /* 5 minutes */ #define MAX_BLANKTIME (7*24*60*60) /* 7 days!? */ #define KEYCODE_BS 0x0e /* "<-- Backspace" key, XXX */ /* NULL-safe version of "tty_opened()" */ #define tty_opened_ns(tp) ((tp) != NULL && tty_opened(tp)) static u_char sc_kattrtab[MAXCPU]; static int sc_console_unit = -1; static int sc_saver_keyb_only = 1; static scr_stat *sc_console; static struct consdev *sc_consptr; static void *sc_kts[MAXCPU]; static struct sc_term_sw *sc_ktsw; static scr_stat main_console; static struct tty *main_devs[MAXCONS]; static char init_done = COLD; static int shutdown_in_progress = FALSE; static int suspend_in_progress = FALSE; static char sc_malloc = FALSE; static int saver_mode = CONS_NO_SAVER; /* LKM/user saver */ static int run_scrn_saver = FALSE; /* should run the saver? */ static int enable_bell = TRUE; /* enable beeper */ #ifndef SC_DISABLE_REBOOT static int enable_reboot = TRUE; /* enable keyboard reboot */ #endif #ifndef SC_DISABLE_KDBKEY static int enable_kdbkey = TRUE; /* enable keyboard debug */ #endif static long scrn_blank_time = 0; /* screen saver timeout value */ #ifdef DEV_SPLASH static int scrn_blanked; /* # of blanked screen */ static int sticky_splash = FALSE; static void none_saver(sc_softc_t *sc, int blank) { } static void (*current_saver)(sc_softc_t *, int) = none_saver; #endif #ifdef SC_NO_SUSPEND_VTYSWITCH static int sc_no_suspend_vtswitch = 1; #else static int sc_no_suspend_vtswitch = 0; #endif static int sc_susp_scr; static SYSCTL_NODE(_hw, OID_AUTO, syscons, CTLFLAG_RD, 0, "syscons"); static SYSCTL_NODE(_hw_syscons, OID_AUTO, saver, CTLFLAG_RD, 0, "saver"); SYSCTL_INT(_hw_syscons_saver, OID_AUTO, keybonly, CTLFLAG_RW, &sc_saver_keyb_only, 0, "screen saver interrupted by input only"); SYSCTL_INT(_hw_syscons, OID_AUTO, bell, CTLFLAG_RW, &enable_bell, 0, "enable bell"); #ifndef SC_DISABLE_REBOOT SYSCTL_INT(_hw_syscons, OID_AUTO, kbd_reboot, CTLFLAG_RW|CTLFLAG_SECURE, &enable_reboot, 0, "enable keyboard reboot"); #endif #ifndef SC_DISABLE_KDBKEY SYSCTL_INT(_hw_syscons, OID_AUTO, kbd_debug, CTLFLAG_RW|CTLFLAG_SECURE, &enable_kdbkey, 0, "enable keyboard debug"); #endif SYSCTL_INT(_hw_syscons, OID_AUTO, sc_no_suspend_vtswitch, CTLFLAG_RWTUN, &sc_no_suspend_vtswitch, 0, "Disable VT switch before suspend."); #if !defined(SC_NO_FONT_LOADING) && defined(SC_DFLT_FONT) #include "font.h" #endif tsw_ioctl_t *sc_user_ioctl; static bios_values_t bios_value; static int enable_panic_key; SYSCTL_INT(_machdep, OID_AUTO, enable_panic_key, CTLFLAG_RW, &enable_panic_key, 0, "Enable panic via keypress specified in kbdmap(5)"); #define SC_CONSOLECTL 255 #define VTY_WCHAN(sc, vty) (&SC_DEV(sc, vty)) /* prototypes */ static int sc_allocate_keyboard(sc_softc_t *sc, int unit); static int scvidprobe(int unit, int flags, int cons); static int sckbdprobe(int unit, int flags, int cons); static void scmeminit(void *arg); static int scdevtounit(struct tty *tp); static kbd_callback_func_t sckbdevent; static void scinit(int unit, int flags); static scr_stat *sc_get_stat(struct tty *tp); static void scterm(int unit, int flags); static void scshutdown(void *, int); static void scsuspend(void *); static void scresume(void *); static u_int scgetc(sc_softc_t *sc, u_int flags, struct sc_cnstate *sp); static void sc_puts(scr_stat *scp, u_char *buf, int len); #define SCGETC_CN 1 #define SCGETC_NONBLOCK 2 static void sccnupdate(scr_stat *scp); static scr_stat *alloc_scp(sc_softc_t *sc, int vty); static void init_scp(sc_softc_t *sc, int vty, scr_stat *scp); static timeout_t scrn_timer; static int and_region(int *s1, int *e1, int s2, int e2); static void scrn_update(scr_stat *scp, int show_cursor); #ifdef DEV_SPLASH static int scsplash_callback(int event, void *arg); static void scsplash_saver(sc_softc_t *sc, int show); static int add_scrn_saver(void (*this_saver)(sc_softc_t *, int)); static int remove_scrn_saver(void (*this_saver)(sc_softc_t *, int)); static int set_scrn_saver_mode(scr_stat *scp, int mode, u_char *pal, int border); static int restore_scrn_saver_mode(scr_stat *scp, int changemode); static void stop_scrn_saver(sc_softc_t *sc, void (*saver)(sc_softc_t *, int)); static int wait_scrn_saver_stop(sc_softc_t *sc); #define scsplash_stick(stick) (sticky_splash = (stick)) #else /* !DEV_SPLASH */ #define scsplash_stick(stick) #endif /* DEV_SPLASH */ static int do_switch_scr(sc_softc_t *sc, int s); static int vt_proc_alive(scr_stat *scp); static int signal_vt_rel(scr_stat *scp); static int signal_vt_acq(scr_stat *scp); static int finish_vt_rel(scr_stat *scp, int release, int *s); static int finish_vt_acq(scr_stat *scp); static void exchange_scr(sc_softc_t *sc); static void update_cursor_image(scr_stat *scp); static void change_cursor_shape(scr_stat *scp, int flags, int base, int height); static void update_font(scr_stat *); static int save_kbd_state(scr_stat *scp); static int update_kbd_state(scr_stat *scp, int state, int mask); static int update_kbd_leds(scr_stat *scp, int which); static int sc_kattr(void); static timeout_t blink_screen; static struct tty *sc_alloc_tty(int, int); static cn_probe_t sc_cnprobe; static cn_init_t sc_cninit; static cn_term_t sc_cnterm; static cn_getc_t sc_cngetc; static cn_putc_t sc_cnputc; static cn_grab_t sc_cngrab; static cn_ungrab_t sc_cnungrab; CONSOLE_DRIVER(sc); static tsw_open_t sctty_open; static tsw_close_t sctty_close; static tsw_outwakeup_t sctty_outwakeup; static tsw_ioctl_t sctty_ioctl; static tsw_mmap_t sctty_mmap; static struct ttydevsw sc_ttydevsw = { .tsw_open = sctty_open, .tsw_close = sctty_close, .tsw_outwakeup = sctty_outwakeup, .tsw_ioctl = sctty_ioctl, .tsw_mmap = sctty_mmap, }; static d_ioctl_t consolectl_ioctl; static d_close_t consolectl_close; static struct cdevsw consolectl_devsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT | D_TRACKCLOSE, .d_ioctl = consolectl_ioctl, .d_close = consolectl_close, .d_name = "consolectl", }; /* ec -- emergency console. */ static u_int ec_scroffset; static void ec_putc(int c) { uintptr_t fb; u_short *scrptr; u_int ind; int attr, column, mysize, width, xsize, yborder, ysize; if (c < 0 || c > 0xff || c == '\a') return; if (sc_console == NULL) { #if !defined(__amd64__) && !defined(__i386__) return; #else /* * This is enough for ec_putc() to work very early on x86 * if the kernel starts in normal color text mode. */ #ifdef __amd64__ fb = KERNBASE + 0xb8000; #else /* __i386__ */ - fb = PMAP_MAP_LOW + 0xb8000; + fb = pmap_get_map_low() + 0xb8000; #endif xsize = 80; ysize = 25; #endif } else { if (!ISTEXTSC(&main_console)) return; fb = main_console.sc->adp->va_window; xsize = main_console.xsize; ysize = main_console.ysize; } yborder = ysize / 5; scrptr = (u_short *)(void *)fb + xsize * yborder; mysize = xsize * (ysize - 2 * yborder); do { ind = ec_scroffset; column = ind % xsize; width = (c == '\b' ? -1 : c == '\t' ? (column + 8) & ~7 : c == '\r' ? -column : c == '\n' ? xsize - column : 1); if (width == 0 || (width < 0 && ind < -width)) return; } while (atomic_cmpset_rel_int(&ec_scroffset, ind, ind + width) == 0); if (c == '\b' || c == '\r') return; if (c == '\n') ind += xsize; /* XXX clearing from new pos is not atomic */ attr = sc_kattr(); if (c == '\t' || c == '\n') c = ' '; do scrptr[ind++ % mysize] = (attr << 8) | c; while (--width != 0); } int sc_probe_unit(int unit, int flags) { if (!vty_enabled(VTY_SC)) return ENXIO; if (!scvidprobe(unit, flags, FALSE)) { if (bootverbose) printf("%s%d: no video adapter found.\n", SC_DRIVER_NAME, unit); return ENXIO; } /* syscons will be attached even when there is no keyboard */ sckbdprobe(unit, flags, FALSE); return 0; } /* probe video adapters, return TRUE if found */ static int scvidprobe(int unit, int flags, int cons) { /* * Access the video adapter driver through the back door! * Video adapter drivers need to be configured before syscons. * However, when syscons is being probed as the low-level console, * they have not been initialized yet. We force them to initialize * themselves here. XXX */ vid_configure(cons ? VIO_PROBE_ONLY : 0); return (vid_find_adapter("*", unit) >= 0); } /* probe the keyboard, return TRUE if found */ static int sckbdprobe(int unit, int flags, int cons) { /* access the keyboard driver through the backdoor! */ kbd_configure(cons ? KB_CONF_PROBE_ONLY : 0); return (kbd_find_keyboard("*", unit) >= 0); } static char *adapter_name(video_adapter_t *adp) { static struct { int type; char *name[2]; } names[] = { { KD_MONO, { "MDA", "MDA" } }, { KD_HERCULES, { "Hercules", "Hercules" } }, { KD_CGA, { "CGA", "CGA" } }, { KD_EGA, { "EGA", "EGA (mono)" } }, { KD_VGA, { "VGA", "VGA (mono)" } }, { KD_TGA, { "TGA", "TGA" } }, { -1, { "Unknown", "Unknown" } }, }; int i; for (i = 0; names[i].type != -1; ++i) if (names[i].type == adp->va_type) break; return names[i].name[(adp->va_flags & V_ADP_COLOR) ? 0 : 1]; } static void sctty_outwakeup(struct tty *tp) { size_t len; u_char buf[PCBURST]; scr_stat *scp = sc_get_stat(tp); if (scp->status & SLKED || (scp == scp->sc->cur_scp && scp->sc->blink_in_progress)) return; for (;;) { len = ttydisc_getc(tp, buf, sizeof buf); if (len == 0) break; SC_VIDEO_LOCK(scp->sc); sc_puts(scp, buf, len); SC_VIDEO_UNLOCK(scp->sc); } } static struct tty * sc_alloc_tty(int index, int devnum) { struct sc_ttysoftc *stc; struct tty *tp; /* Allocate TTY object and softc to store unit number. */ stc = malloc(sizeof(struct sc_ttysoftc), M_DEVBUF, M_WAITOK); stc->st_index = index; stc->st_stat = NULL; tp = tty_alloc_mutex(&sc_ttydevsw, stc, &Giant); /* Create device node. */ tty_makedev(tp, NULL, "v%r", devnum); return (tp); } #ifdef SC_PIXEL_MODE static void sc_set_vesa_mode(scr_stat *scp, sc_softc_t *sc, int unit) { video_info_t info; u_char *font; int depth; int fontsize; int i; int vmode; vmode = 0; (void)resource_int_value("sc", unit, "vesa_mode", &vmode); if (vmode < M_VESA_BASE || vmode > M_VESA_MODE_MAX || vidd_get_info(sc->adp, vmode, &info) != 0 || !sc_support_pixel_mode(&info)) vmode = 0; /* * If the mode is unset or unsupported, search for an available * 800x600 graphics mode with the highest color depth. */ if (vmode == 0) { for (depth = 0, i = M_VESA_BASE; i <= M_VESA_MODE_MAX; i++) if (vidd_get_info(sc->adp, i, &info) == 0 && info.vi_width == 800 && info.vi_height == 600 && sc_support_pixel_mode(&info) && info.vi_depth > depth) { vmode = i; depth = info.vi_depth; } if (vmode == 0) return; vidd_get_info(sc->adp, vmode, &info); } #if !defined(SC_NO_FONT_LOADING) && defined(SC_DFLT_FONT) fontsize = info.vi_cheight; #else fontsize = scp->font_size; #endif if (fontsize < 14) fontsize = 8; else if (fontsize >= 16) fontsize = 16; else fontsize = 14; #ifndef SC_NO_FONT_LOADING switch (fontsize) { case 8: if ((sc->fonts_loaded & FONT_8) == 0) return; font = sc->font_8; break; case 14: if ((sc->fonts_loaded & FONT_14) == 0) return; font = sc->font_14; break; case 16: if ((sc->fonts_loaded & FONT_16) == 0) return; font = sc->font_16; break; } #else font = NULL; #endif #ifdef DEV_SPLASH if ((sc->flags & SC_SPLASH_SCRN) != 0) splash_term(sc->adp); #endif #ifndef SC_NO_HISTORY if (scp->history != NULL) { sc_vtb_append(&scp->vtb, 0, scp->history, scp->ypos * scp->xsize + scp->xpos); scp->history_pos = sc_vtb_tail(scp->history); } #endif vidd_set_mode(sc->adp, vmode); scp->status |= (UNKNOWN_MODE | PIXEL_MODE | MOUSE_HIDDEN); scp->status &= ~(GRAPHICS_MODE | MOUSE_VISIBLE); scp->xpixel = info.vi_width; scp->ypixel = info.vi_height; scp->xsize = scp->xpixel / 8; scp->ysize = scp->ypixel / fontsize; scp->xpos = 0; scp->ypos = scp->ysize - 1; scp->xoff = scp->yoff = 0; scp->font = font; scp->font_size = fontsize; scp->font_width = 8; scp->start = scp->xsize * scp->ysize - 1; scp->end = 0; scp->cursor_pos = scp->cursor_oldpos = scp->xsize * scp->xsize; scp->mode = sc->initial_mode = vmode; #ifndef __sparc64__ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)sc->adp->va_window, FALSE); #endif sc_alloc_scr_buffer(scp, FALSE, FALSE); sc_init_emulator(scp, NULL); #ifndef SC_NO_CUTPASTE sc_alloc_cut_buffer(scp, FALSE); #endif #ifndef SC_NO_HISTORY sc_alloc_history_buffer(scp, 0, 0, FALSE); #endif sc_set_border(scp, scp->border); sc_set_cursor_image(scp); scp->status &= ~UNKNOWN_MODE; #ifdef DEV_SPLASH if ((sc->flags & SC_SPLASH_SCRN) != 0) splash_init(sc->adp, scsplash_callback, sc); #endif } #endif int sc_attach_unit(int unit, int flags) { sc_softc_t *sc; scr_stat *scp; struct cdev *dev; void *oldts, *ts; int i, vc; if (!vty_enabled(VTY_SC)) return ENXIO; flags &= ~SC_KERNEL_CONSOLE; if (sc_console_unit == unit) { /* * If this unit is being used as the system console, we need to * adjust some variables and buffers before and after scinit(). */ /* assert(sc_console != NULL) */ flags |= SC_KERNEL_CONSOLE; scmeminit(NULL); scinit(unit, flags); if (sc_console->tsw->te_size > 0) { sc_ktsw = sc_console->tsw; /* assert(sc_console->ts != NULL); */ oldts = sc_console->ts; for (i = 0; i <= mp_maxid; i++) { ts = malloc(sc_console->tsw->te_size, M_DEVBUF, M_WAITOK | M_ZERO); (*sc_console->tsw->te_init)(sc_console, &ts, SC_TE_COLD_INIT); sc_console->ts = ts; (*sc_console->tsw->te_default_attr)(sc_console, sc_kattrtab[i], SC_KERNEL_CONS_REV_ATTR); sc_kts[i] = ts; } sc_console->ts = oldts; (*sc_console->tsw->te_default_attr)(sc_console, SC_NORM_ATTR, SC_NORM_REV_ATTR); } } else { scinit(unit, flags); } sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE); sc->config = flags; callout_init(&sc->ctimeout, 0); callout_init(&sc->cblink, 0); scp = sc_get_stat(sc->dev[0]); if (sc_console == NULL) /* sc_console_unit < 0 */ sc_console = scp; #ifdef SC_PIXEL_MODE if ((sc->config & SC_VESAMODE) != 0) sc_set_vesa_mode(scp, sc, unit); #endif /* SC_PIXEL_MODE */ /* initialize cursor */ if (!ISGRAPHSC(scp)) update_cursor_image(scp); /* get screen update going */ scrn_timer(sc); /* set up the keyboard */ (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); update_kbd_state(scp, scp->status, LOCK_MASK); printf("%s%d: %s <%d virtual consoles, flags=0x%x>\n", SC_DRIVER_NAME, unit, adapter_name(sc->adp), sc->vtys, sc->config); if (bootverbose) { printf("%s%d:", SC_DRIVER_NAME, unit); if (sc->adapter >= 0) printf(" fb%d", sc->adapter); if (sc->keyboard >= 0) printf(", kbd%d", sc->keyboard); if (scp->tsw) printf(", terminal emulator: %s (%s)", scp->tsw->te_name, scp->tsw->te_desc); printf("\n"); } /* Register suspend/resume/shutdown callbacks for the kernel console. */ if (sc_console_unit == unit) { EVENTHANDLER_REGISTER(power_suspend_early, scsuspend, NULL, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(power_resume, scresume, NULL, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(shutdown_pre_sync, scshutdown, NULL, SHUTDOWN_PRI_DEFAULT); } for (vc = 0; vc < sc->vtys; vc++) { if (sc->dev[vc] == NULL) { sc->dev[vc] = sc_alloc_tty(vc, vc + unit * MAXCONS); if (vc == 0 && sc->dev == main_devs) SC_STAT(sc->dev[0]) = &main_console; } /* * The first vty already has struct tty and scr_stat initialized * in scinit(). The other vtys will have these structs when * first opened. */ } dev = make_dev(&consolectl_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "consolectl"); dev->si_drv1 = sc->dev[0]; return 0; } static void scmeminit(void *arg) { if (!vty_enabled(VTY_SC)) return; if (sc_malloc) return; sc_malloc = TRUE; /* * As soon as malloc() becomes functional, we had better allocate * various buffers for the kernel console. */ if (sc_console_unit < 0) /* sc_console == NULL */ return; /* copy the temporary buffer to the final buffer */ sc_alloc_scr_buffer(sc_console, FALSE, FALSE); #ifndef SC_NO_CUTPASTE sc_alloc_cut_buffer(sc_console, FALSE); #endif #ifndef SC_NO_HISTORY /* initialize history buffer & pointers */ sc_alloc_history_buffer(sc_console, 0, 0, FALSE); #endif } /* XXX */ SYSINIT(sc_mem, SI_SUB_KMEM, SI_ORDER_ANY, scmeminit, NULL); static int scdevtounit(struct tty *tp) { int vty = SC_VTY(tp); if (vty == SC_CONSOLECTL) return ((sc_console != NULL) ? sc_console->sc->unit : -1); else if ((vty < 0) || (vty >= MAXCONS*sc_max_unit())) return -1; else return vty/MAXCONS; } static int sctty_open(struct tty *tp) { int unit = scdevtounit(tp); sc_softc_t *sc; scr_stat *scp; #ifndef __sparc64__ keyarg_t key; #endif DPRINTF(5, ("scopen: dev:%s, unit:%d, vty:%d\n", devtoname(tp->t_dev), unit, SC_VTY(tp))); sc = sc_get_softc(unit, (sc_console_unit == unit) ? SC_KERNEL_CONSOLE : 0); if (sc == NULL) return ENXIO; if (!tty_opened(tp)) { /* Use the current setting of the <-- key as default VERASE. */ /* If the Delete key is preferable, an stty is necessary */ #ifndef __sparc64__ if (sc->kbd != NULL) { key.keynum = KEYCODE_BS; (void)kbdd_ioctl(sc->kbd, GIO_KEYMAPENT, (caddr_t)&key); tp->t_termios.c_cc[VERASE] = key.key.map[0]; } #endif } scp = sc_get_stat(tp); if (scp == NULL) { scp = SC_STAT(tp) = alloc_scp(sc, SC_VTY(tp)); if (ISGRAPHSC(scp)) sc_set_pixel_mode(scp, NULL, 0, 0, 16, 8); } if (!tp->t_winsize.ws_col && !tp->t_winsize.ws_row) { tp->t_winsize.ws_col = scp->xsize; tp->t_winsize.ws_row = scp->ysize; } return (0); } static void sctty_close(struct tty *tp) { scr_stat *scp; int s; if (SC_VTY(tp) != SC_CONSOLECTL) { scp = sc_get_stat(tp); /* were we in the middle of the VT switching process? */ DPRINTF(5, ("sc%d: scclose(), ", scp->sc->unit)); s = spltty(); if ((scp == scp->sc->cur_scp) && (scp->sc->unit == sc_console_unit)) cnavailable(sc_consptr, TRUE); if (finish_vt_rel(scp, TRUE, &s) == 0) /* force release */ DPRINTF(5, ("reset WAIT_REL, ")); if (finish_vt_acq(scp) == 0) /* force acknowledge */ DPRINTF(5, ("reset WAIT_ACQ, ")); #ifdef not_yet_done if (scp == &main_console) { scp->pid = 0; scp->proc = NULL; scp->smode.mode = VT_AUTO; } else { sc_vtb_destroy(&scp->vtb); #ifndef __sparc64__ sc_vtb_destroy(&scp->scr); #endif sc_free_history_buffer(scp, scp->ysize); SC_STAT(tp) = NULL; free(scp, M_DEVBUF); } #else scp->pid = 0; scp->proc = NULL; scp->smode.mode = VT_AUTO; #endif scp->kbd_mode = K_XLATE; if (scp == scp->sc->cur_scp) (void)kbdd_ioctl(scp->sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); DPRINTF(5, ("done.\n")); } } #if 0 /* XXX mpsafetty: fix screensaver. What about outwakeup? */ static int scread(struct cdev *dev, struct uio *uio, int flag) { if (!sc_saver_keyb_only) sc_touch_scrn_saver(); return ttyread(dev, uio, flag); } #endif static int sckbdevent(keyboard_t *thiskbd, int event, void *arg) { sc_softc_t *sc; struct tty *cur_tty; int c, error = 0; size_t len; const u_char *cp; sc = (sc_softc_t *)arg; /* assert(thiskbd == sc->kbd) */ mtx_lock(&Giant); switch (event) { case KBDIO_KEYINPUT: break; case KBDIO_UNLOADING: sc->kbd = NULL; sc->keyboard = -1; kbd_release(thiskbd, (void *)&sc->keyboard); goto done; default: error = EINVAL; goto done; } /* * Loop while there is still input to get from the keyboard. * I don't think this is nessesary, and it doesn't fix * the Xaccel-2.1 keyboard hang, but it can't hurt. XXX */ while ((c = scgetc(sc, SCGETC_NONBLOCK, NULL)) != NOKEY) { cur_tty = SC_DEV(sc, sc->cur_scp->index); if (!tty_opened_ns(cur_tty)) continue; if ((*sc->cur_scp->tsw->te_input)(sc->cur_scp, c, cur_tty)) continue; switch (KEYFLAGS(c)) { case 0x0000: /* normal key */ ttydisc_rint(cur_tty, KEYCHAR(c), 0); break; case FKEY: /* function key, return string */ cp = (*sc->cur_scp->tsw->te_fkeystr)(sc->cur_scp, c); if (cp != NULL) { ttydisc_rint_simple(cur_tty, cp, strlen(cp)); break; } cp = kbdd_get_fkeystr(thiskbd, KEYCHAR(c), &len); if (cp != NULL) ttydisc_rint_simple(cur_tty, cp, len); break; case MKEY: /* meta is active, prepend ESC */ ttydisc_rint(cur_tty, 0x1b, 0); ttydisc_rint(cur_tty, KEYCHAR(c), 0); break; case BKEY: /* backtab fixed sequence (esc [ Z) */ ttydisc_rint_simple(cur_tty, "\x1B[Z", 3); break; } ttydisc_rint_done(cur_tty); } sc->cur_scp->status |= MOUSE_HIDDEN; done: mtx_unlock(&Giant); return (error); } static int sctty_ioctl(struct tty *tp, u_long cmd, caddr_t data, struct thread *td) { int error; int i; struct cursor_attr *cap; sc_softc_t *sc; scr_stat *scp; int s; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) int ival; #endif /* If there is a user_ioctl function call that first */ if (sc_user_ioctl) { error = (*sc_user_ioctl)(tp, cmd, data, td); if (error != ENOIOCTL) return error; } error = sc_vid_ioctl(tp, cmd, data, td); if (error != ENOIOCTL) return error; #ifndef SC_NO_HISTORY error = sc_hist_ioctl(tp, cmd, data, td); if (error != ENOIOCTL) return error; #endif #ifndef SC_NO_SYSMOUSE error = sc_mouse_ioctl(tp, cmd, data, td); if (error != ENOIOCTL) return error; #endif scp = sc_get_stat(tp); /* assert(scp != NULL) */ /* scp is sc_console, if SC_VTY(dev) == SC_CONSOLECTL. */ sc = scp->sc; if (scp->tsw) { error = (*scp->tsw->te_ioctl)(scp, tp, cmd, data, td); if (error != ENOIOCTL) return error; } switch (cmd) { /* process console hardware related ioctl's */ case GIO_ATTR: /* get current attributes */ /* this ioctl is not processed here, but in the terminal emulator */ return ENOTTY; case GIO_COLOR: /* is this a color console ? */ *(int *)data = (sc->adp->va_flags & V_ADP_COLOR) ? 1 : 0; return 0; case CONS_BLANKTIME: /* set screen saver timeout (0 = no saver) */ if (*(int *)data < 0 || *(int *)data > MAX_BLANKTIME) return EINVAL; s = spltty(); scrn_blank_time = *(int *)data; run_scrn_saver = (scrn_blank_time != 0); splx(s); return 0; case CONS_CURSORTYPE: /* set cursor type (old interface + HIDDEN) */ s = spltty(); *(int *)data &= CONS_CURSOR_ATTRS; sc_change_cursor_shape(scp, *(int *)data, -1, -1); splx(s); return 0; case CONS_GETCURSORSHAPE: /* get cursor shape (new interface) */ switch (((int *)data)[0] & (CONS_DEFAULT_CURSOR | CONS_LOCAL_CURSOR)) { case 0: cap = &sc->curs_attr; break; case CONS_LOCAL_CURSOR: cap = &scp->base_curs_attr; break; case CONS_DEFAULT_CURSOR: cap = &sc->dflt_curs_attr; break; case CONS_DEFAULT_CURSOR | CONS_LOCAL_CURSOR: cap = &scp->dflt_curs_attr; break; } if (((int *)data)[0] & CONS_CHARCURSOR_COLORS) { ((int *)data)[1] = cap->bg[0]; ((int *)data)[2] = cap->bg[1]; } else if (((int *)data)[0] & CONS_MOUSECURSOR_COLORS) { ((int *)data)[1] = cap->mouse_ba; ((int *)data)[2] = cap->mouse_ia; } else { ((int *)data)[1] = cap->base; ((int *)data)[2] = cap->height; } ((int *)data)[0] = cap->flags; return 0; case CONS_SETCURSORSHAPE: /* set cursor shape (new interface) */ s = spltty(); sc_change_cursor_shape(scp, ((int *)data)[0], ((int *)data)[1], ((int *)data)[2]); splx(s); return 0; case CONS_BELLTYPE: /* set bell type sound/visual */ if ((*(int *)data) & CONS_VISUAL_BELL) sc->flags |= SC_VISUAL_BELL; else sc->flags &= ~SC_VISUAL_BELL; if ((*(int *)data) & CONS_QUIET_BELL) sc->flags |= SC_QUIET_BELL; else sc->flags &= ~SC_QUIET_BELL; return 0; case CONS_GETINFO: /* get current (virtual) console info */ { vid_info_t *ptr = (vid_info_t*)data; if (ptr->size == sizeof(struct vid_info)) { ptr->m_num = sc->cur_scp->index; ptr->font_size = scp->font_size; ptr->mv_col = scp->xpos; ptr->mv_row = scp->ypos; ptr->mv_csz = scp->xsize; ptr->mv_rsz = scp->ysize; ptr->mv_hsz = (scp->history != NULL) ? scp->history->vtb_rows : 0; /* * The following fields are filled by the terminal emulator. XXX * * ptr->mv_norm.fore * ptr->mv_norm.back * ptr->mv_rev.fore * ptr->mv_rev.back */ ptr->mv_grfc.fore = 0; /* not supported */ ptr->mv_grfc.back = 0; /* not supported */ ptr->mv_ovscan = scp->border; if (scp == sc->cur_scp) save_kbd_state(scp); ptr->mk_keylock = scp->status & LOCK_MASK; return 0; } return EINVAL; } case CONS_GETVERS: /* get version number */ *(int*)data = 0x200; /* version 2.0 */ return 0; case CONS_IDLE: /* see if the screen has been idle */ /* * When the screen is in the GRAPHICS_MODE or UNKNOWN_MODE, * the user process may have been writing something on the * screen and syscons is not aware of it. Declare the screen * is NOT idle if it is in one of these modes. But there is * an exception to it; if a screen saver is running in the * graphics mode in the current screen, we should say that the * screen has been idle. */ *(int *)data = (sc->flags & SC_SCRN_IDLE) && (!ISGRAPHSC(sc->cur_scp) || (sc->cur_scp->status & SAVER_RUNNING)); return 0; case CONS_SAVERMODE: /* set saver mode */ switch(*(int *)data) { case CONS_NO_SAVER: case CONS_USR_SAVER: /* if a LKM screen saver is running, stop it first. */ scsplash_stick(FALSE); saver_mode = *(int *)data; s = spltty(); #ifdef DEV_SPLASH if ((error = wait_scrn_saver_stop(NULL))) { splx(s); return error; } #endif run_scrn_saver = TRUE; if (saver_mode == CONS_USR_SAVER) scp->status |= SAVER_RUNNING; else scp->status &= ~SAVER_RUNNING; scsplash_stick(TRUE); splx(s); break; case CONS_LKM_SAVER: s = spltty(); if ((saver_mode == CONS_USR_SAVER) && (scp->status & SAVER_RUNNING)) scp->status &= ~SAVER_RUNNING; saver_mode = *(int *)data; splx(s); break; default: return EINVAL; } return 0; case CONS_SAVERSTART: /* immediately start/stop the screen saver */ /* * Note that this ioctl does not guarantee the screen saver * actually starts or stops. It merely attempts to do so... */ s = spltty(); run_scrn_saver = (*(int *)data != 0); if (run_scrn_saver) sc->scrn_time_stamp -= scrn_blank_time; splx(s); return 0; case CONS_SCRSHOT: /* get a screen shot */ { int retval, hist_rsz; size_t lsize, csize; vm_offset_t frbp, hstp; unsigned lnum; scrshot_t *ptr = (scrshot_t *)data; void *outp = ptr->buf; if (ptr->x < 0 || ptr->y < 0 || ptr->xsize < 0 || ptr->ysize < 0) return EINVAL; s = spltty(); if (ISGRAPHSC(scp)) { splx(s); return EOPNOTSUPP; } hist_rsz = (scp->history != NULL) ? scp->history->vtb_rows : 0; if (((u_int)ptr->x + ptr->xsize) > scp->xsize || ((u_int)ptr->y + ptr->ysize) > (scp->ysize + hist_rsz)) { splx(s); return EINVAL; } lsize = scp->xsize * sizeof(u_int16_t); csize = ptr->xsize * sizeof(u_int16_t); /* Pointer to the last line of framebuffer */ frbp = scp->vtb.vtb_buffer + scp->ysize * lsize + ptr->x * sizeof(u_int16_t); /* Pointer to the last line of target buffer */ outp = (char *)outp + ptr->ysize * csize; /* Pointer to the last line of history buffer */ if (scp->history != NULL) hstp = scp->history->vtb_buffer + sc_vtb_tail(scp->history) * sizeof(u_int16_t) + ptr->x * sizeof(u_int16_t); else hstp = 0; retval = 0; for (lnum = 0; lnum < (ptr->y + ptr->ysize); lnum++) { if (lnum < scp->ysize) { frbp -= lsize; } else { hstp -= lsize; if (hstp < scp->history->vtb_buffer) hstp += scp->history->vtb_rows * lsize; frbp = hstp; } if (lnum < ptr->y) continue; outp = (char *)outp - csize; retval = copyout((void *)frbp, outp, csize); if (retval != 0) break; } splx(s); return retval; } case VT_SETMODE: /* set screen switcher mode */ { struct vt_mode *mode; struct proc *p1; mode = (struct vt_mode *)data; DPRINTF(5, ("%s%d: VT_SETMODE ", SC_DRIVER_NAME, sc->unit)); if (scp->smode.mode == VT_PROCESS) { p1 = pfind(scp->pid); if (scp->proc == p1 && scp->proc != td->td_proc) { if (p1) PROC_UNLOCK(p1); DPRINTF(5, ("error EPERM\n")); return EPERM; } if (p1) PROC_UNLOCK(p1); } s = spltty(); if (mode->mode == VT_AUTO) { scp->smode.mode = VT_AUTO; scp->proc = NULL; scp->pid = 0; DPRINTF(5, ("VT_AUTO, ")); if ((scp == sc->cur_scp) && (sc->unit == sc_console_unit)) cnavailable(sc_consptr, TRUE); /* were we in the middle of the vty switching process? */ if (finish_vt_rel(scp, TRUE, &s) == 0) DPRINTF(5, ("reset WAIT_REL, ")); if (finish_vt_acq(scp) == 0) DPRINTF(5, ("reset WAIT_ACQ, ")); } else { if (!ISSIGVALID(mode->relsig) || !ISSIGVALID(mode->acqsig) || !ISSIGVALID(mode->frsig)) { splx(s); DPRINTF(5, ("error EINVAL\n")); return EINVAL; } DPRINTF(5, ("VT_PROCESS %d, ", td->td_proc->p_pid)); bcopy(data, &scp->smode, sizeof(struct vt_mode)); scp->proc = td->td_proc; scp->pid = scp->proc->p_pid; if ((scp == sc->cur_scp) && (sc->unit == sc_console_unit)) cnavailable(sc_consptr, FALSE); } splx(s); DPRINTF(5, ("\n")); return 0; } case VT_GETMODE: /* get screen switcher mode */ bcopy(&scp->smode, data, sizeof(struct vt_mode)); return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('v', 4): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VT_RELDISP: /* screen switcher ioctl */ s = spltty(); /* * This must be the current vty which is in the VT_PROCESS * switching mode... */ if ((scp != sc->cur_scp) || (scp->smode.mode != VT_PROCESS)) { splx(s); return EINVAL; } /* ...and this process is controlling it. */ if (scp->proc != td->td_proc) { splx(s); return EPERM; } error = EINVAL; switch(*(int *)data) { case VT_FALSE: /* user refuses to release screen, abort */ if ((error = finish_vt_rel(scp, FALSE, &s)) == 0) DPRINTF(5, ("%s%d: VT_FALSE\n", SC_DRIVER_NAME, sc->unit)); break; case VT_TRUE: /* user has released screen, go on */ if ((error = finish_vt_rel(scp, TRUE, &s)) == 0) DPRINTF(5, ("%s%d: VT_TRUE\n", SC_DRIVER_NAME, sc->unit)); break; case VT_ACKACQ: /* acquire acknowledged, switch completed */ if ((error = finish_vt_acq(scp)) == 0) DPRINTF(5, ("%s%d: VT_ACKACQ\n", SC_DRIVER_NAME, sc->unit)); break; default: break; } splx(s); return error; case VT_OPENQRY: /* return free virtual console */ for (i = sc->first_vty; i < sc->first_vty + sc->vtys; i++) { tp = SC_DEV(sc, i); if (!tty_opened_ns(tp)) { *(int *)data = i + 1; return 0; } } return EINVAL; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('v', 5): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VT_ACTIVATE: /* switch to screen *data */ i = (*(int *)data == 0) ? scp->index : (*(int *)data - 1); s = spltty(); error = sc_clean_up(sc->cur_scp); splx(s); if (error) return error; error = sc_switch_scr(sc, i); return (error); #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('v', 6): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VT_WAITACTIVE: /* wait for switch to occur */ i = (*(int *)data == 0) ? scp->index : (*(int *)data - 1); if ((i < sc->first_vty) || (i >= sc->first_vty + sc->vtys)) return EINVAL; if (i == sc->cur_scp->index) return 0; error = tsleep(VTY_WCHAN(sc, i), (PZERO + 1) | PCATCH, "waitvt", 0); return error; case VT_GETACTIVE: /* get active vty # */ *(int *)data = sc->cur_scp->index + 1; return 0; case VT_GETINDEX: /* get this vty # */ *(int *)data = scp->index + 1; return 0; case VT_LOCKSWITCH: /* prevent vty switching */ if ((*(int *)data) & 0x01) sc->flags |= SC_SCRN_VTYLOCK; else sc->flags &= ~SC_SCRN_VTYLOCK; return 0; case KDENABIO: /* allow io operations */ error = priv_check(td, PRIV_IO); if (error != 0) return error; error = securelevel_gt(td->td_ucred, 0); if (error != 0) return error; #ifdef __i386__ td->td_frame->tf_eflags |= PSL_IOPL; #elif defined(__amd64__) td->td_frame->tf_rflags |= PSL_IOPL; #endif return 0; case KDDISABIO: /* disallow io operations (default) */ #ifdef __i386__ td->td_frame->tf_eflags &= ~PSL_IOPL; #elif defined(__amd64__) td->td_frame->tf_rflags &= ~PSL_IOPL; #endif return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 20): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSKBSTATE: /* set keyboard state (locks) */ if (*(int *)data & ~LOCK_MASK) return EINVAL; scp->status &= ~LOCK_MASK; scp->status |= *(int *)data; if (scp == sc->cur_scp) update_kbd_state(scp, scp->status, LOCK_MASK); return 0; case KDGKBSTATE: /* get keyboard state (locks) */ if (scp == sc->cur_scp) save_kbd_state(scp); *(int *)data = scp->status & LOCK_MASK; return 0; case KDGETREPEAT: /* get keyboard repeat & delay rates */ case KDSETREPEAT: /* set keyboard repeat & delay rates (new) */ error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 67): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSETRAD: /* set keyboard repeat & delay rates (old) */ if (*(int *)data & ~0x7f) return EINVAL; error = kbdd_ioctl(sc->kbd, KDSETRAD, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 7): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSKBMODE: /* set keyboard mode */ switch (*(int *)data) { case K_XLATE: /* switch to XLT ascii mode */ case K_RAW: /* switch to RAW scancode mode */ case K_CODE: /* switch to CODE mode */ scp->kbd_mode = *(int *)data; if (scp == sc->cur_scp) (void)kbdd_ioctl(sc->kbd, KDSKBMODE, data); return 0; default: return EINVAL; } /* NOT REACHED */ case KDGKBMODE: /* get keyboard mode */ *(int *)data = scp->kbd_mode; return 0; case KDGKBINFO: error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 8): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDMKTONE: /* sound the bell */ if (*(int*)data) sc_bell(scp, (*(int*)data)&0xffff, (((*(int*)data)>>16)&0xffff)*hz/1000); else sc_bell(scp, scp->bell_pitch, scp->bell_duration); return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 63): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KIOCSOUND: /* make tone (*data) hz */ if (scp == sc->cur_scp) { if (*(int *)data) return sc_tone(*(int *)data); else return sc_tone(0); } return 0; case KDGKBTYPE: /* get keyboard type */ error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) { /* always return something? XXX */ *(int *)data = 0; } return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 66): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSETLED: /* set keyboard LED status */ if (*(int *)data & ~LED_MASK) /* FIXME: LOCK_MASK? */ return EINVAL; scp->status &= ~LED_MASK; scp->status |= *(int *)data; if (scp == sc->cur_scp) update_kbd_leds(scp, scp->status); return 0; case KDGETLED: /* get keyboard LED status */ if (scp == sc->cur_scp) save_kbd_state(scp); *(int *)data = scp->status & LED_MASK; return 0; case KBADDKBD: /* add/remove keyboard to/from mux */ case KBRELKBD: error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('c', 110): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case CONS_SETKBD: /* set the new keyboard */ { keyboard_t *newkbd; s = spltty(); newkbd = kbd_get_keyboard(*(int *)data); if (newkbd == NULL) { splx(s); return EINVAL; } error = 0; if (sc->kbd != newkbd) { i = kbd_allocate(newkbd->kb_name, newkbd->kb_unit, (void *)&sc->keyboard, sckbdevent, sc); /* i == newkbd->kb_index */ if (i >= 0) { if (sc->kbd != NULL) { save_kbd_state(sc->cur_scp); kbd_release(sc->kbd, (void *)&sc->keyboard); } sc->kbd = kbd_get_keyboard(i); /* sc->kbd == newkbd */ sc->keyboard = i; (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&sc->cur_scp->kbd_mode); update_kbd_state(sc->cur_scp, sc->cur_scp->status, LOCK_MASK); } else { error = EPERM; /* XXX */ } } splx(s); return error; } case CONS_RELKBD: /* release the current keyboard */ s = spltty(); error = 0; if (sc->kbd != NULL) { save_kbd_state(sc->cur_scp); error = kbd_release(sc->kbd, (void *)&sc->keyboard); if (error == 0) { sc->kbd = NULL; sc->keyboard = -1; } } splx(s); return error; case CONS_GETTERM: /* get the current terminal emulator info */ { sc_term_sw_t *sw; if (((term_info_t *)data)->ti_index == 0) { sw = scp->tsw; } else { sw = sc_term_match_by_number(((term_info_t *)data)->ti_index); } if (sw != NULL) { strncpy(((term_info_t *)data)->ti_name, sw->te_name, sizeof(((term_info_t *)data)->ti_name)); strncpy(((term_info_t *)data)->ti_desc, sw->te_desc, sizeof(((term_info_t *)data)->ti_desc)); ((term_info_t *)data)->ti_flags = 0; return 0; } else { ((term_info_t *)data)->ti_name[0] = '\0'; ((term_info_t *)data)->ti_desc[0] = '\0'; ((term_info_t *)data)->ti_flags = 0; return EINVAL; } } case CONS_SETTERM: /* set the current terminal emulator */ s = spltty(); error = sc_init_emulator(scp, ((term_info_t *)data)->ti_name); /* FIXME: what if scp == sc_console! XXX */ splx(s); return error; case GIO_SCRNMAP: /* get output translation table */ bcopy(&sc->scr_map, data, sizeof(sc->scr_map)); return 0; case PIO_SCRNMAP: /* set output translation table */ bcopy(data, &sc->scr_map, sizeof(sc->scr_map)); for (i=0; iscr_map); i++) { sc->scr_rmap[sc->scr_map[i]] = i; } return 0; case GIO_KEYMAP: /* get keyboard translation table */ case PIO_KEYMAP: /* set keyboard translation table */ case OGIO_KEYMAP: /* get keyboard translation table (compat) */ case OPIO_KEYMAP: /* set keyboard translation table (compat) */ case GIO_DEADKEYMAP: /* get accent key translation table */ case PIO_DEADKEYMAP: /* set accent key translation table */ case GETFKEY: /* get function key string */ case SETFKEY: /* set function key string */ error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #ifndef SC_NO_FONT_LOADING case PIO_FONT8x8: /* set 8x8 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; bcopy(data, sc->font_8, 8*256); sc->fonts_loaded |= FONT_8; /* * FONT KLUDGE * Always use the font page #0. XXX * Don't load if the current font size is not 8x8. */ if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size < 14)) sc_load_font(sc->cur_scp, 0, 8, 8, sc->font_8, 0, 256); return 0; case GIO_FONT8x8: /* get 8x8 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; if (sc->fonts_loaded & FONT_8) { bcopy(sc->font_8, data, 8*256); return 0; } else return ENXIO; case PIO_FONT8x14: /* set 8x14 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; bcopy(data, sc->font_14, 14*256); sc->fonts_loaded |= FONT_14; /* * FONT KLUDGE * Always use the font page #0. XXX * Don't load if the current font size is not 8x14. */ if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size >= 14) && (sc->cur_scp->font_size < 16)) sc_load_font(sc->cur_scp, 0, 14, 8, sc->font_14, 0, 256); return 0; case GIO_FONT8x14: /* get 8x14 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; if (sc->fonts_loaded & FONT_14) { bcopy(sc->font_14, data, 14*256); return 0; } else return ENXIO; case PIO_FONT8x16: /* set 8x16 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; bcopy(data, sc->font_16, 16*256); sc->fonts_loaded |= FONT_16; /* * FONT KLUDGE * Always use the font page #0. XXX * Don't load if the current font size is not 8x16. */ if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size >= 16)) sc_load_font(sc->cur_scp, 0, 16, 8, sc->font_16, 0, 256); return 0; case GIO_FONT8x16: /* get 8x16 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; if (sc->fonts_loaded & FONT_16) { bcopy(sc->font_16, data, 16*256); return 0; } else return ENXIO; #endif /* SC_NO_FONT_LOADING */ default: break; } return (ENOIOCTL); } static int consolectl_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { return sctty_ioctl(dev->si_drv1, cmd, data, td); } static int consolectl_close(struct cdev *dev, int flags, int mode, struct thread *td) { #ifndef SC_NO_SYSMOUSE mouse_info_t info; memset(&info, 0, sizeof(info)); info.operation = MOUSE_ACTION; /* * Make sure all buttons are released when moused and other * console daemons exit, so that no buttons are left pressed. */ (void) sctty_ioctl(dev->si_drv1, CONS_MOUSECTL, (caddr_t)&info, td); #endif return (0); } static void sc_cnprobe(struct consdev *cp) { int unit; int flags; if (!vty_enabled(VTY_SC)) { cp->cn_pri = CN_DEAD; return; } cp->cn_pri = sc_get_cons_priority(&unit, &flags); /* a video card is always required */ if (!scvidprobe(unit, flags, TRUE)) cp->cn_pri = CN_DEAD; /* syscons will become console even when there is no keyboard */ sckbdprobe(unit, flags, TRUE); if (cp->cn_pri == CN_DEAD) return; /* initialize required fields */ strcpy(cp->cn_name, "ttyv0"); } static void sc_cninit(struct consdev *cp) { int unit; int flags; sc_get_cons_priority(&unit, &flags); scinit(unit, flags | SC_KERNEL_CONSOLE); sc_console_unit = unit; sc_console = sc_get_stat(sc_get_softc(unit, SC_KERNEL_CONSOLE)->dev[0]); sc_consptr = cp; } static void sc_cnterm(struct consdev *cp) { void *ts; int i; /* we are not the kernel console any more, release everything */ if (sc_console_unit < 0) return; /* shouldn't happen */ #if 0 /* XXX */ sc_clear_screen(sc_console); sccnupdate(sc_console); #endif if (sc_ktsw != NULL) { for (i = 0; i <= mp_maxid; i++) { ts = sc_kts[i]; sc_kts[i] = NULL; (*sc_ktsw->te_term)(sc_console, &ts); free(ts, M_DEVBUF); } sc_ktsw = NULL; } scterm(sc_console_unit, SC_KERNEL_CONSOLE); sc_console_unit = -1; sc_console = NULL; } static void sccnclose(sc_softc_t *sc, struct sc_cnstate *sp); static int sc_cngetc_locked(struct sc_cnstate *sp); static void sccnkbdlock(sc_softc_t *sc, struct sc_cnstate *sp); static void sccnkbdunlock(sc_softc_t *sc, struct sc_cnstate *sp); static void sccnopen(sc_softc_t *sc, struct sc_cnstate *sp, int flags); static void sccnscrlock(sc_softc_t *sc, struct sc_cnstate *sp); static void sccnscrunlock(sc_softc_t *sc, struct sc_cnstate *sp); static void sccnkbdlock(sc_softc_t *sc, struct sc_cnstate *sp) { /* * Locking method: hope for the best. * The keyboard is supposed to be Giant locked. We can't handle that * in general. The kdb_active case here is not safe, and we will * proceed without the lock in all cases. */ sp->kbd_locked = !kdb_active && mtx_trylock(&Giant); } static void sccnkbdunlock(sc_softc_t *sc, struct sc_cnstate *sp) { if (sp->kbd_locked) mtx_unlock(&Giant); sp->kbd_locked = FALSE; } static void sccnscrlock(sc_softc_t *sc, struct sc_cnstate *sp) { int retries; /** * Locking method: * - if kdb_active and video_mtx is not owned by anyone, then lock * by kdb remaining active * - if !kdb_active, try to acquire video_mtx without blocking or * recursing; if we get it then it works normally. * Note that video_mtx is especially unusable if we already own it, * since then it is protecting something and syscons is not reentrant * enough to ignore the protection even in the kdb_active case. */ if (kdb_active) { sp->kdb_locked = sc->video_mtx.mtx_lock == MTX_UNOWNED || SCHEDULER_STOPPED(); sp->mtx_locked = FALSE; } else { sp->kdb_locked = FALSE; for (retries = 0; retries < 1000; retries++) { sp->mtx_locked = mtx_trylock_spin_flags(&sc->video_mtx, MTX_QUIET) != 0; if (SCHEDULER_STOPPED()) { sp->kdb_locked = TRUE; sp->mtx_locked = FALSE; break; } if (sp->mtx_locked) break; DELAY(1); } } } static void sccnscrunlock(sc_softc_t *sc, struct sc_cnstate *sp) { if (sp->mtx_locked) mtx_unlock_spin(&sc->video_mtx); sp->mtx_locked = sp->kdb_locked = FALSE; } static void sccnopen(sc_softc_t *sc, struct sc_cnstate *sp, int flags) { int kbd_mode; /* assert(sc_console_unit >= 0) */ sp->kbd_opened = FALSE; sp->scr_opened = FALSE; sp->kbd_locked = FALSE; /* Opening the keyboard is optional. */ if (!(flags & 1) || sc->kbd == NULL) goto over_keyboard; sccnkbdlock(sc, sp); /* * Make sure the keyboard is accessible even when the kbd device * driver is disabled. */ kbdd_enable(sc->kbd); /* Switch the keyboard to console mode (K_XLATE, polled) on all scp's. */ kbd_mode = K_XLATE; (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&kbd_mode); sc->kbd_open_level++; kbdd_poll(sc->kbd, TRUE); sp->kbd_opened = TRUE; over_keyboard: ; /* The screen is opened iff locking it succeeds. */ sccnscrlock(sc, sp); if (!sp->kdb_locked && !sp->mtx_locked) return; sp->scr_opened = TRUE; /* The screen switch is optional. */ if (!(flags & 2)) return; /* try to switch to the kernel console screen */ if (!cold && sc->cur_scp->index != sc_console->index && sc->cur_scp->smode.mode == VT_AUTO && sc_console->smode.mode == VT_AUTO) sc_switch_scr(sc, sc_console->index); } static void sccnclose(sc_softc_t *sc, struct sc_cnstate *sp) { sp->scr_opened = FALSE; sccnscrunlock(sc, sp); if (!sp->kbd_opened) return; /* Restore keyboard mode (for the current, possibly-changed scp). */ kbdd_poll(sc->kbd, FALSE); if (--sc->kbd_open_level == 0) (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&sc->cur_scp->kbd_mode); kbdd_disable(sc->kbd); sp->kbd_opened = FALSE; sccnkbdunlock(sc, sp); } /* * Grabbing switches the screen and keyboard focus to sc_console and the * keyboard mode to (K_XLATE, polled). Only switching to polled mode is * essential (for preventing the interrupt handler from eating input * between polls). Focus is part of the UI, and the other switches are * work just was well when they are done on every entry and exit. * * Screen switches while grabbed are supported, and to maintain focus for * this ungrabbing and closing only restore the polling state and then * the keyboard mode if on the original screen. */ static void sc_cngrab(struct consdev *cp) { sc_softc_t *sc; int lev; sc = sc_console->sc; lev = atomic_fetchadd_int(&sc->grab_level, 1); if (lev >= 0 && lev < 2) { sccnopen(sc, &sc->grab_state[lev], 1 | 2); sccnscrunlock(sc, &sc->grab_state[lev]); sccnkbdunlock(sc, &sc->grab_state[lev]); } } static void sc_cnungrab(struct consdev *cp) { sc_softc_t *sc; int lev; sc = sc_console->sc; lev = atomic_load_acq_int(&sc->grab_level) - 1; if (lev >= 0 && lev < 2) { sccnkbdlock(sc, &sc->grab_state[lev]); sccnscrlock(sc, &sc->grab_state[lev]); sccnclose(sc, &sc->grab_state[lev]); } atomic_add_int(&sc->grab_level, -1); } static char sc_cnputc_log[0x1000]; static u_int sc_cnputc_loghead; static u_int sc_cnputc_logtail; static void sc_cnputc(struct consdev *cd, int c) { struct sc_cnstate st; u_char buf[1]; scr_stat *scp = sc_console; void *oldts, *ts; struct sc_term_sw *oldtsw; #ifndef SC_NO_HISTORY #if 0 struct tty *tp; #endif #endif /* !SC_NO_HISTORY */ u_int head; int s; /* assert(sc_console != NULL) */ sccnopen(scp->sc, &st, 0); /* * Log the output. * * In the unlocked case, the logging is intentionally only * perfectly atomic for the indexes. */ head = atomic_fetchadd_int(&sc_cnputc_loghead, 1); sc_cnputc_log[head % sizeof(sc_cnputc_log)] = c; /* * If we couldn't open, do special reentrant output and return to defer * normal output. */ if (!st.scr_opened) { ec_putc(c); return; } #ifndef SC_NO_HISTORY if (scp == scp->sc->cur_scp && scp->status & SLKED) { scp->status &= ~SLKED; update_kbd_state(scp, scp->status, SLKED); if (scp->status & BUFFER_SAVED) { if (!sc_hist_restore(scp)) sc_remove_cutmarking(scp); scp->status &= ~BUFFER_SAVED; scp->status |= CURSOR_ENABLED; sc_draw_cursor_image(scp); } #if 0 /* * XXX: Now that TTY's have their own locks, we cannot process * any data after disabling scroll lock. cnputs already holds a * spinlock. */ tp = SC_DEV(scp->sc, scp->index); /* XXX "tp" can be NULL */ tty_lock(tp); if (tty_opened(tp)) sctty_outwakeup(tp); tty_unlock(tp); #endif } #endif /* !SC_NO_HISTORY */ /* Play any output still in the log (our char may already be done). */ while (sc_cnputc_logtail != atomic_load_acq_int(&sc_cnputc_loghead)) { buf[0] = sc_cnputc_log[sc_cnputc_logtail++ % sizeof(sc_cnputc_log)]; if (atomic_load_acq_int(&sc_cnputc_loghead) - sc_cnputc_logtail >= sizeof(sc_cnputc_log)) continue; /* Console output has a per-CPU "input" state. Switch for it. */ ts = sc_kts[curcpu]; if (ts != NULL) { oldtsw = scp->tsw; oldts = scp->ts; scp->tsw = sc_ktsw; scp->ts = ts; (*scp->tsw->te_sync)(scp); } else { /* Only 1 tsw early. Switch only its attr. */ (*scp->tsw->te_default_attr)(scp, sc_kattrtab[curcpu], SC_KERNEL_CONS_REV_ATTR); } sc_puts(scp, buf, 1); if (ts != NULL) { scp->tsw = oldtsw; scp->ts = oldts; (*scp->tsw->te_sync)(scp); } else { (*scp->tsw->te_default_attr)(scp, SC_KERNEL_CONS_ATTR, SC_KERNEL_CONS_REV_ATTR); } } s = spltty(); /* block sckbdevent and scrn_timer */ sccnupdate(scp); splx(s); sccnclose(scp->sc, &st); } static int sc_cngetc(struct consdev *cd) { struct sc_cnstate st; int c, s; /* assert(sc_console != NULL) */ sccnopen(sc_console->sc, &st, 1); s = spltty(); /* block sckbdevent and scrn_timer while we poll */ if (!st.kbd_opened) { splx(s); sccnclose(sc_console->sc, &st); return -1; /* means no keyboard since we fudged the locking */ } c = sc_cngetc_locked(&st); splx(s); sccnclose(sc_console->sc, &st); return c; } static int sc_cngetc_locked(struct sc_cnstate *sp) { static struct fkeytab fkey; static int fkeycp; scr_stat *scp; const u_char *p; int c; /* * Stop the screen saver and update the screen if necessary. * What if we have been running in the screen saver code... XXX */ if (sp->scr_opened) sc_touch_scrn_saver(); scp = sc_console->sc->cur_scp; /* XXX */ if (sp->scr_opened) sccnupdate(scp); if (fkeycp < fkey.len) return fkey.str[fkeycp++]; c = scgetc(scp->sc, SCGETC_CN | SCGETC_NONBLOCK, sp); switch (KEYFLAGS(c)) { case 0: /* normal char */ return KEYCHAR(c); case FKEY: /* function key */ p = (*scp->tsw->te_fkeystr)(scp, c); if (p != NULL) { fkey.len = strlen(p); bcopy(p, fkey.str, fkey.len); fkeycp = 1; return fkey.str[0]; } p = kbdd_get_fkeystr(scp->sc->kbd, KEYCHAR(c), (size_t *)&fkeycp); fkey.len = fkeycp; if ((p != NULL) && (fkey.len > 0)) { bcopy(p, fkey.str, fkey.len); fkeycp = 1; return fkey.str[0]; } return c; /* XXX */ case NOKEY: case ERRKEY: default: return -1; } /* NOT REACHED */ } static void sccnupdate(scr_stat *scp) { /* this is a cut-down version of scrn_timer()... */ if (suspend_in_progress || scp->sc->font_loading_in_progress) return; if (kdb_active || panicstr || shutdown_in_progress) { sc_touch_scrn_saver(); } else if (scp != scp->sc->cur_scp) { return; } if (!run_scrn_saver) scp->sc->flags &= ~SC_SCRN_IDLE; #ifdef DEV_SPLASH if ((saver_mode != CONS_LKM_SAVER) || !(scp->sc->flags & SC_SCRN_IDLE)) if (scp->sc->flags & SC_SCRN_BLANKED) stop_scrn_saver(scp->sc, current_saver); #endif if (scp != scp->sc->cur_scp || scp->sc->blink_in_progress || scp->sc->switch_in_progress) return; /* * FIXME: unlike scrn_timer(), we call scrn_update() from here even * when write_in_progress is non-zero. XXX */ if (!ISGRAPHSC(scp) && !(scp->sc->flags & SC_SCRN_BLANKED)) scrn_update(scp, TRUE); } static void scrn_timer(void *arg) { static time_t kbd_time_stamp = 0; sc_softc_t *sc; scr_stat *scp; int again, rate; again = (arg != NULL); if (arg != NULL) sc = (sc_softc_t *)arg; else if (sc_console != NULL) sc = sc_console->sc; else return; /* find the vty to update */ scp = sc->cur_scp; /* don't do anything when we are performing some I/O operations */ if (suspend_in_progress || sc->font_loading_in_progress) goto done; if ((sc->kbd == NULL) && (sc->config & SC_AUTODETECT_KBD)) { /* try to allocate a keyboard automatically */ if (kbd_time_stamp != time_uptime) { kbd_time_stamp = time_uptime; sc->keyboard = sc_allocate_keyboard(sc, -1); if (sc->keyboard >= 0) { sc->kbd = kbd_get_keyboard(sc->keyboard); (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&sc->cur_scp->kbd_mode); update_kbd_state(sc->cur_scp, sc->cur_scp->status, LOCK_MASK); } } } /* should we stop the screen saver? */ if (kdb_active || panicstr || shutdown_in_progress) sc_touch_scrn_saver(); if (run_scrn_saver) { if (time_uptime > sc->scrn_time_stamp + scrn_blank_time) sc->flags |= SC_SCRN_IDLE; else sc->flags &= ~SC_SCRN_IDLE; } else { sc->scrn_time_stamp = time_uptime; sc->flags &= ~SC_SCRN_IDLE; if (scrn_blank_time > 0) run_scrn_saver = TRUE; } #ifdef DEV_SPLASH if ((saver_mode != CONS_LKM_SAVER) || !(sc->flags & SC_SCRN_IDLE)) if (sc->flags & SC_SCRN_BLANKED) stop_scrn_saver(sc, current_saver); #endif /* should we just return ? */ if (sc->blink_in_progress || sc->switch_in_progress || sc->write_in_progress) goto done; /* Update the screen */ scp = sc->cur_scp; /* cur_scp may have changed... */ if (!ISGRAPHSC(scp) && !(sc->flags & SC_SCRN_BLANKED)) scrn_update(scp, TRUE); #ifdef DEV_SPLASH /* should we activate the screen saver? */ if ((saver_mode == CONS_LKM_SAVER) && (sc->flags & SC_SCRN_IDLE)) if (!ISGRAPHSC(scp) || (sc->flags & SC_SCRN_BLANKED)) (*current_saver)(sc, TRUE); #endif done: if (again) { /* * Use reduced "refresh" rate if we are in graphics and that is not a * graphical screen saver. In such case we just have nothing to do. */ if (ISGRAPHSC(scp) && !(sc->flags & SC_SCRN_BLANKED)) rate = 2; else rate = 30; callout_reset_sbt(&sc->ctimeout, SBT_1S / rate, 0, scrn_timer, sc, C_PREL(1)); } } static int and_region(int *s1, int *e1, int s2, int e2) { if (*e1 < s2 || e2 < *s1) return FALSE; *s1 = imax(*s1, s2); *e1 = imin(*e1, e2); return TRUE; } static void scrn_update(scr_stat *scp, int show_cursor) { int start; int end; int s; int e; /* assert(scp == scp->sc->cur_scp) */ SC_VIDEO_LOCK(scp->sc); #ifndef SC_NO_CUTPASTE /* remove the previous mouse pointer image if necessary */ if (scp->status & MOUSE_VISIBLE) { s = scp->mouse_pos; e = scp->mouse_pos + scp->xsize + 1; if ((scp->status & (MOUSE_MOVED | MOUSE_HIDDEN)) || and_region(&s, &e, scp->start, scp->end) || ((scp->status & CURSOR_ENABLED) && (scp->cursor_pos != scp->cursor_oldpos) && (and_region(&s, &e, scp->cursor_pos, scp->cursor_pos) || and_region(&s, &e, scp->cursor_oldpos, scp->cursor_oldpos)))) { sc_remove_mouse_image(scp); if (scp->end >= scp->xsize*scp->ysize) scp->end = scp->xsize*scp->ysize - 1; } } #endif /* !SC_NO_CUTPASTE */ #if 1 /* debug: XXX */ if (scp->end >= scp->xsize*scp->ysize) { printf("scrn_update(): scp->end %d > size_of_screen!!\n", scp->end); scp->end = scp->xsize*scp->ysize - 1; } if (scp->start < 0) { printf("scrn_update(): scp->start %d < 0\n", scp->start); scp->start = 0; } #endif /* update screen image */ if (scp->start <= scp->end) { if (scp->mouse_cut_end >= 0) { /* there is a marked region for cut & paste */ if (scp->mouse_cut_start <= scp->mouse_cut_end) { start = scp->mouse_cut_start; end = scp->mouse_cut_end; } else { start = scp->mouse_cut_end; end = scp->mouse_cut_start - 1; } s = start; e = end; /* does the cut-mark region overlap with the update region? */ if (and_region(&s, &e, scp->start, scp->end)) { (*scp->rndr->draw)(scp, s, e - s + 1, TRUE); s = 0; e = start - 1; if (and_region(&s, &e, scp->start, scp->end)) (*scp->rndr->draw)(scp, s, e - s + 1, FALSE); s = end + 1; e = scp->xsize*scp->ysize - 1; if (and_region(&s, &e, scp->start, scp->end)) (*scp->rndr->draw)(scp, s, e - s + 1, FALSE); } else { (*scp->rndr->draw)(scp, scp->start, scp->end - scp->start + 1, FALSE); } } else { (*scp->rndr->draw)(scp, scp->start, scp->end - scp->start + 1, FALSE); } } /* we are not to show the cursor and the mouse pointer... */ if (!show_cursor) { scp->end = 0; scp->start = scp->xsize*scp->ysize - 1; SC_VIDEO_UNLOCK(scp->sc); return; } /* update cursor image */ if (scp->status & CURSOR_ENABLED) { s = scp->start; e = scp->end; /* did cursor move since last time ? */ if (scp->cursor_pos != scp->cursor_oldpos) { /* do we need to remove old cursor image ? */ if (!and_region(&s, &e, scp->cursor_oldpos, scp->cursor_oldpos)) sc_remove_cursor_image(scp); sc_draw_cursor_image(scp); } else { if (and_region(&s, &e, scp->cursor_pos, scp->cursor_pos)) /* cursor didn't move, but has been overwritten */ sc_draw_cursor_image(scp); else if (scp->curs_attr.flags & CONS_BLINK_CURSOR) /* if it's a blinking cursor, update it */ (*scp->rndr->blink_cursor)(scp, scp->cursor_pos, sc_inside_cutmark(scp, scp->cursor_pos)); } } #ifndef SC_NO_CUTPASTE /* update "pseudo" mouse pointer image */ if (scp->sc->flags & SC_MOUSE_ENABLED) { if (!(scp->status & (MOUSE_VISIBLE | MOUSE_HIDDEN))) { scp->status &= ~MOUSE_MOVED; sc_draw_mouse_image(scp); } } #endif /* SC_NO_CUTPASTE */ scp->end = 0; scp->start = scp->xsize*scp->ysize - 1; SC_VIDEO_UNLOCK(scp->sc); } #ifdef DEV_SPLASH static int scsplash_callback(int event, void *arg) { sc_softc_t *sc; int error; sc = (sc_softc_t *)arg; switch (event) { case SPLASH_INIT: if (add_scrn_saver(scsplash_saver) == 0) { sc->flags &= ~SC_SAVER_FAILED; run_scrn_saver = TRUE; if (cold && !(boothowto & RB_VERBOSE)) { scsplash_stick(TRUE); (*current_saver)(sc, TRUE); } } return 0; case SPLASH_TERM: if (current_saver == scsplash_saver) { scsplash_stick(FALSE); error = remove_scrn_saver(scsplash_saver); if (error) return error; } return 0; default: return EINVAL; } } static void scsplash_saver(sc_softc_t *sc, int show) { static int busy = FALSE; scr_stat *scp; if (busy) return; busy = TRUE; scp = sc->cur_scp; if (show) { if (!(sc->flags & SC_SAVER_FAILED)) { if (!(sc->flags & SC_SCRN_BLANKED)) set_scrn_saver_mode(scp, -1, NULL, 0); switch (splash(sc->adp, TRUE)) { case 0: /* succeeded */ break; case EAGAIN: /* try later */ restore_scrn_saver_mode(scp, FALSE); sc_touch_scrn_saver(); /* XXX */ break; default: sc->flags |= SC_SAVER_FAILED; scsplash_stick(FALSE); restore_scrn_saver_mode(scp, TRUE); printf("scsplash_saver(): failed to put up the image\n"); break; } } } else if (!sticky_splash) { if ((sc->flags & SC_SCRN_BLANKED) && (splash(sc->adp, FALSE) == 0)) restore_scrn_saver_mode(scp, TRUE); } busy = FALSE; } static int add_scrn_saver(void (*this_saver)(sc_softc_t *, int)) { #if 0 int error; if (current_saver != none_saver) { error = remove_scrn_saver(current_saver); if (error) return error; } #endif if (current_saver != none_saver) return EBUSY; run_scrn_saver = FALSE; saver_mode = CONS_LKM_SAVER; current_saver = this_saver; return 0; } static int remove_scrn_saver(void (*this_saver)(sc_softc_t *, int)) { if (current_saver != this_saver) return EINVAL; #if 0 /* * In order to prevent `current_saver' from being called by * the timeout routine `scrn_timer()' while we manipulate * the saver list, we shall set `current_saver' to `none_saver' * before stopping the current saver, rather than blocking by `splXX()'. */ current_saver = none_saver; if (scrn_blanked) stop_scrn_saver(this_saver); #endif /* unblank all blanked screens */ wait_scrn_saver_stop(NULL); if (scrn_blanked) return EBUSY; current_saver = none_saver; return 0; } static int set_scrn_saver_mode(scr_stat *scp, int mode, u_char *pal, int border) { int s; /* assert(scp == scp->sc->cur_scp) */ s = spltty(); if (!ISGRAPHSC(scp)) sc_remove_cursor_image(scp); scp->splash_save_mode = scp->mode; scp->splash_save_status = scp->status & (GRAPHICS_MODE | PIXEL_MODE); scp->status &= ~(GRAPHICS_MODE | PIXEL_MODE); scp->status |= (UNKNOWN_MODE | SAVER_RUNNING); scp->sc->flags |= SC_SCRN_BLANKED; ++scrn_blanked; splx(s); if (mode < 0) return 0; scp->mode = mode; if (set_mode(scp) == 0) { if (scp->sc->adp->va_info.vi_flags & V_INFO_GRAPHICS) scp->status |= GRAPHICS_MODE; #ifndef SC_NO_PALETTE_LOADING if (pal != NULL) vidd_load_palette(scp->sc->adp, pal); #endif sc_set_border(scp, border); return 0; } else { s = spltty(); scp->mode = scp->splash_save_mode; scp->status &= ~(UNKNOWN_MODE | SAVER_RUNNING); scp->status |= scp->splash_save_status; splx(s); return 1; } } static int restore_scrn_saver_mode(scr_stat *scp, int changemode) { int mode; int status; int s; /* assert(scp == scp->sc->cur_scp) */ s = spltty(); mode = scp->mode; status = scp->status; scp->mode = scp->splash_save_mode; scp->status &= ~(UNKNOWN_MODE | SAVER_RUNNING); scp->status |= scp->splash_save_status; scp->sc->flags &= ~SC_SCRN_BLANKED; if (!changemode) { if (!ISGRAPHSC(scp)) sc_draw_cursor_image(scp); --scrn_blanked; splx(s); return 0; } if (set_mode(scp) == 0) { #ifndef SC_NO_PALETTE_LOADING #ifdef SC_PIXEL_MODE if (scp->sc->adp->va_info.vi_mem_model == V_INFO_MM_DIRECT) vidd_load_palette(scp->sc->adp, scp->sc->palette2); else #endif vidd_load_palette(scp->sc->adp, scp->sc->palette); #endif --scrn_blanked; splx(s); return 0; } else { scp->mode = mode; scp->status = status; splx(s); return 1; } } static void stop_scrn_saver(sc_softc_t *sc, void (*saver)(sc_softc_t *, int)) { (*saver)(sc, FALSE); run_scrn_saver = FALSE; /* the screen saver may have chosen not to stop after all... */ if (sc->flags & SC_SCRN_BLANKED) return; mark_all(sc->cur_scp); if (sc->delayed_next_scr) sc_switch_scr(sc, sc->delayed_next_scr - 1); if (!kdb_active) wakeup(&scrn_blanked); } static int wait_scrn_saver_stop(sc_softc_t *sc) { int error = 0; while (scrn_blanked > 0) { run_scrn_saver = FALSE; if (sc && !(sc->flags & SC_SCRN_BLANKED)) { error = 0; break; } error = tsleep(&scrn_blanked, PZERO | PCATCH, "scrsav", 0); if ((error != 0) && (error != ERESTART)) break; } run_scrn_saver = FALSE; return error; } #endif /* DEV_SPLASH */ void sc_touch_scrn_saver(void) { scsplash_stick(FALSE); run_scrn_saver = FALSE; } int sc_switch_scr(sc_softc_t *sc, u_int next_scr) { scr_stat *cur_scp; struct tty *tp; struct proc *p; int s; DPRINTF(5, ("sc0: sc_switch_scr() %d ", next_scr + 1)); if (sc->cur_scp == NULL) return (0); /* prevent switch if previously requested */ if (sc->flags & SC_SCRN_VTYLOCK) { sc_bell(sc->cur_scp, sc->cur_scp->bell_pitch, sc->cur_scp->bell_duration); return EPERM; } /* delay switch if the screen is blanked or being updated */ if ((sc->flags & SC_SCRN_BLANKED) || sc->write_in_progress || sc->blink_in_progress) { sc->delayed_next_scr = next_scr + 1; sc_touch_scrn_saver(); DPRINTF(5, ("switch delayed\n")); return 0; } sc->delayed_next_scr = 0; s = spltty(); cur_scp = sc->cur_scp; /* we are in the middle of the vty switching process... */ if (sc->switch_in_progress && (cur_scp->smode.mode == VT_PROCESS) && cur_scp->proc) { p = pfind(cur_scp->pid); if (cur_scp->proc != p) { if (p) PROC_UNLOCK(p); /* * The controlling process has died!!. Do some clean up. * NOTE:`cur_scp->proc' and `cur_scp->smode.mode' * are not reset here yet; they will be cleared later. */ DPRINTF(5, ("cur_scp controlling process %d died, ", cur_scp->pid)); if (cur_scp->status & SWITCH_WAIT_REL) { /* * Force the previous switch to finish, but return now * with error. */ DPRINTF(5, ("reset WAIT_REL, ")); finish_vt_rel(cur_scp, TRUE, &s); splx(s); DPRINTF(5, ("finishing previous switch\n")); return EINVAL; } else if (cur_scp->status & SWITCH_WAIT_ACQ) { /* let's assume screen switch has been completed. */ DPRINTF(5, ("reset WAIT_ACQ, ")); finish_vt_acq(cur_scp); } else { /* * We are in between screen release and acquisition, and * reached here via scgetc() or scrn_timer() which has * interrupted exchange_scr(). Don't do anything stupid. */ DPRINTF(5, ("waiting nothing, ")); } } else { if (p) PROC_UNLOCK(p); /* * The controlling process is alive, but not responding... * It is either buggy or it may be just taking time. * The following code is a gross kludge to cope with this * problem for which there is no clean solution. XXX */ if (cur_scp->status & SWITCH_WAIT_REL) { switch (sc->switch_in_progress++) { case 1: break; case 2: DPRINTF(5, ("sending relsig again, ")); signal_vt_rel(cur_scp); break; case 3: break; case 4: default: /* * Act as if the controlling program returned * VT_FALSE. */ DPRINTF(5, ("force reset WAIT_REL, ")); finish_vt_rel(cur_scp, FALSE, &s); splx(s); DPRINTF(5, ("act as if VT_FALSE was seen\n")); return EINVAL; } } else if (cur_scp->status & SWITCH_WAIT_ACQ) { switch (sc->switch_in_progress++) { case 1: break; case 2: DPRINTF(5, ("sending acqsig again, ")); signal_vt_acq(cur_scp); break; case 3: break; case 4: default: /* clear the flag and finish the previous switch */ DPRINTF(5, ("force reset WAIT_ACQ, ")); finish_vt_acq(cur_scp); break; } } } } /* * Return error if an invalid argument is given, or vty switch * is still in progress. */ if ((next_scr < sc->first_vty) || (next_scr >= sc->first_vty + sc->vtys) || sc->switch_in_progress) { splx(s); sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION); DPRINTF(5, ("error 1\n")); return EINVAL; } /* * Don't allow switching away from the graphics mode vty * if the switch mode is VT_AUTO, unless the next vty is the same * as the current or the current vty has been closed (but showing). */ tp = SC_DEV(sc, cur_scp->index); if ((cur_scp->index != next_scr) && tty_opened_ns(tp) && (cur_scp->smode.mode == VT_AUTO) && ISGRAPHSC(cur_scp)) { splx(s); sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION); DPRINTF(5, ("error, graphics mode\n")); return EINVAL; } /* * Is the wanted vty open? Don't allow switching to a closed vty. * If we are in DDB, don't switch to a vty in the VT_PROCESS mode. * Note that we always allow the user to switch to the kernel * console even if it is closed. */ if ((sc_console == NULL) || (next_scr != sc_console->index)) { tp = SC_DEV(sc, next_scr); if (!tty_opened_ns(tp)) { splx(s); sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION); DPRINTF(5, ("error 2, requested vty isn't open!\n")); return EINVAL; } if (kdb_active && SC_STAT(tp)->smode.mode == VT_PROCESS) { splx(s); DPRINTF(5, ("error 3, requested vty is in the VT_PROCESS mode\n")); return EINVAL; } } /* this is the start of vty switching process... */ ++sc->switch_in_progress; sc->old_scp = cur_scp; sc->new_scp = sc_get_stat(SC_DEV(sc, next_scr)); if (sc->new_scp == sc->old_scp) { sc->switch_in_progress = 0; /* * XXX wakeup() locks the scheduler lock which will hang if * the lock is in an in-between state, e.g., when we stop at * a breakpoint at fork_exit. It has always been wrong to call * wakeup() when the debugger is active. In RELENG_4, wakeup() * is supposed to be locked by splhigh(), but the debugger may * be invoked at splhigh(). */ if (!kdb_active) wakeup(VTY_WCHAN(sc,next_scr)); splx(s); DPRINTF(5, ("switch done (new == old)\n")); return 0; } /* has controlling process died? */ vt_proc_alive(sc->old_scp); vt_proc_alive(sc->new_scp); /* wait for the controlling process to release the screen, if necessary */ if (signal_vt_rel(sc->old_scp)) { splx(s); return 0; } /* go set up the new vty screen */ splx(s); exchange_scr(sc); s = spltty(); /* wake up processes waiting for this vty */ if (!kdb_active) wakeup(VTY_WCHAN(sc,next_scr)); /* wait for the controlling process to acknowledge, if necessary */ if (signal_vt_acq(sc->cur_scp)) { splx(s); return 0; } sc->switch_in_progress = 0; if (sc->unit == sc_console_unit) cnavailable(sc_consptr, TRUE); splx(s); DPRINTF(5, ("switch done\n")); return 0; } static int do_switch_scr(sc_softc_t *sc, int s) { vt_proc_alive(sc->new_scp); splx(s); exchange_scr(sc); s = spltty(); /* sc->cur_scp == sc->new_scp */ wakeup(VTY_WCHAN(sc,sc->cur_scp->index)); /* wait for the controlling process to acknowledge, if necessary */ if (!signal_vt_acq(sc->cur_scp)) { sc->switch_in_progress = 0; if (sc->unit == sc_console_unit) cnavailable(sc_consptr, TRUE); } return s; } static int vt_proc_alive(scr_stat *scp) { struct proc *p; if (scp->proc) { if ((p = pfind(scp->pid)) != NULL) PROC_UNLOCK(p); if (scp->proc == p) return TRUE; scp->proc = NULL; scp->smode.mode = VT_AUTO; DPRINTF(5, ("vt controlling process %d died\n", scp->pid)); } return FALSE; } static int signal_vt_rel(scr_stat *scp) { if (scp->smode.mode != VT_PROCESS) return FALSE; scp->status |= SWITCH_WAIT_REL; PROC_LOCK(scp->proc); kern_psignal(scp->proc, scp->smode.relsig); PROC_UNLOCK(scp->proc); DPRINTF(5, ("sending relsig to %d\n", scp->pid)); return TRUE; } static int signal_vt_acq(scr_stat *scp) { if (scp->smode.mode != VT_PROCESS) return FALSE; if (scp->sc->unit == sc_console_unit) cnavailable(sc_consptr, FALSE); scp->status |= SWITCH_WAIT_ACQ; PROC_LOCK(scp->proc); kern_psignal(scp->proc, scp->smode.acqsig); PROC_UNLOCK(scp->proc); DPRINTF(5, ("sending acqsig to %d\n", scp->pid)); return TRUE; } static int finish_vt_rel(scr_stat *scp, int release, int *s) { if (scp == scp->sc->old_scp && scp->status & SWITCH_WAIT_REL) { scp->status &= ~SWITCH_WAIT_REL; if (release) *s = do_switch_scr(scp->sc, *s); else scp->sc->switch_in_progress = 0; return 0; } return EINVAL; } static int finish_vt_acq(scr_stat *scp) { if (scp == scp->sc->new_scp && scp->status & SWITCH_WAIT_ACQ) { scp->status &= ~SWITCH_WAIT_ACQ; scp->sc->switch_in_progress = 0; return 0; } return EINVAL; } static void exchange_scr(sc_softc_t *sc) { scr_stat *scp; /* save the current state of video and keyboard */ sc_move_cursor(sc->old_scp, sc->old_scp->xpos, sc->old_scp->ypos); if (!ISGRAPHSC(sc->old_scp)) sc_remove_cursor_image(sc->old_scp); if (sc->old_scp->kbd_mode == K_XLATE) save_kbd_state(sc->old_scp); /* set up the video for the new screen */ scp = sc->cur_scp = sc->new_scp; if (sc->old_scp->mode != scp->mode || ISUNKNOWNSC(sc->old_scp)) set_mode(scp); #ifndef __sparc64__ else sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)sc->adp->va_window, FALSE); #endif scp->status |= MOUSE_HIDDEN; sc_move_cursor(scp, scp->xpos, scp->ypos); if (!ISGRAPHSC(scp)) sc_set_cursor_image(scp); #ifndef SC_NO_PALETTE_LOADING if (ISGRAPHSC(sc->old_scp)) { #ifdef SC_PIXEL_MODE if (sc->adp->va_info.vi_mem_model == V_INFO_MM_DIRECT) vidd_load_palette(sc->adp, sc->palette2); else #endif vidd_load_palette(sc->adp, sc->palette); } #endif sc_set_border(scp, scp->border); /* set up the keyboard for the new screen */ if (sc->kbd_open_level == 0 && sc->old_scp->kbd_mode != scp->kbd_mode) (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); update_kbd_state(scp, scp->status, LOCK_MASK); mark_all(scp); } static void sc_puts(scr_stat *scp, u_char *buf, int len) { #ifdef DEV_SPLASH /* make screensaver happy */ if (!sticky_splash && scp == scp->sc->cur_scp && !sc_saver_keyb_only) run_scrn_saver = FALSE; #endif if (scp->tsw) (*scp->tsw->te_puts)(scp, buf, len); if (scp->sc->delayed_next_scr) sc_switch_scr(scp->sc, scp->sc->delayed_next_scr - 1); } void sc_draw_cursor_image(scr_stat *scp) { /* assert(scp == scp->sc->cur_scp); */ SC_VIDEO_LOCK(scp->sc); (*scp->rndr->draw_cursor)(scp, scp->cursor_pos, scp->curs_attr.flags & CONS_BLINK_CURSOR, TRUE, sc_inside_cutmark(scp, scp->cursor_pos)); scp->cursor_oldpos = scp->cursor_pos; SC_VIDEO_UNLOCK(scp->sc); } void sc_remove_cursor_image(scr_stat *scp) { /* assert(scp == scp->sc->cur_scp); */ SC_VIDEO_LOCK(scp->sc); (*scp->rndr->draw_cursor)(scp, scp->cursor_oldpos, scp->curs_attr.flags & CONS_BLINK_CURSOR, FALSE, sc_inside_cutmark(scp, scp->cursor_oldpos)); SC_VIDEO_UNLOCK(scp->sc); } static void update_cursor_image(scr_stat *scp) { /* assert(scp == scp->sc->cur_scp); */ sc_remove_cursor_image(scp); sc_set_cursor_image(scp); sc_draw_cursor_image(scp); } void sc_set_cursor_image(scr_stat *scp) { scp->curs_attr = scp->base_curs_attr; if (scp->curs_attr.flags & CONS_HIDDEN_CURSOR) { /* hidden cursor is internally represented as zero-height underline */ scp->curs_attr.flags = CONS_CHAR_CURSOR; scp->curs_attr.base = scp->curs_attr.height = 0; } else if (scp->curs_attr.flags & CONS_CHAR_CURSOR) { scp->curs_attr.base = imin(scp->base_curs_attr.base, scp->font_size - 1); scp->curs_attr.height = imin(scp->base_curs_attr.height, scp->font_size - scp->curs_attr.base); } else { /* block cursor */ scp->curs_attr.base = 0; scp->curs_attr.height = scp->font_size; } /* assert(scp == scp->sc->cur_scp); */ SC_VIDEO_LOCK(scp->sc); (*scp->rndr->set_cursor)(scp, scp->curs_attr.base, scp->curs_attr.height, scp->curs_attr.flags & CONS_BLINK_CURSOR); SC_VIDEO_UNLOCK(scp->sc); } static void sc_adjust_ca(struct cursor_attr *cap, int flags, int base, int height) { if (flags & CONS_CHARCURSOR_COLORS) { cap->bg[0] = base & 0xff; cap->bg[1] = height & 0xff; } else if (flags & CONS_MOUSECURSOR_COLORS) { cap->mouse_ba = base & 0xff; cap->mouse_ia = height & 0xff; } else { if (base >= 0) cap->base = base; if (height >= 0) cap->height = height; if (!(flags & CONS_SHAPEONLY_CURSOR)) cap->flags = flags & CONS_CURSOR_ATTRS; } } static void change_cursor_shape(scr_stat *scp, int flags, int base, int height) { if ((scp == scp->sc->cur_scp) && !ISGRAPHSC(scp)) sc_remove_cursor_image(scp); if (flags & CONS_RESET_CURSOR) scp->base_curs_attr = scp->dflt_curs_attr; else if (flags & CONS_DEFAULT_CURSOR) { sc_adjust_ca(&scp->dflt_curs_attr, flags, base, height); scp->base_curs_attr = scp->dflt_curs_attr; } else sc_adjust_ca(&scp->base_curs_attr, flags, base, height); if ((scp == scp->sc->cur_scp) && !ISGRAPHSC(scp)) { sc_set_cursor_image(scp); sc_draw_cursor_image(scp); } } void sc_change_cursor_shape(scr_stat *scp, int flags, int base, int height) { sc_softc_t *sc; struct tty *tp; int s; int i; if (flags == -1) flags = CONS_SHAPEONLY_CURSOR; s = spltty(); if (flags & CONS_LOCAL_CURSOR) { /* local (per vty) change */ change_cursor_shape(scp, flags, base, height); splx(s); return; } /* global change */ sc = scp->sc; if (flags & CONS_RESET_CURSOR) sc->curs_attr = sc->dflt_curs_attr; else if (flags & CONS_DEFAULT_CURSOR) { sc_adjust_ca(&sc->dflt_curs_attr, flags, base, height); sc->curs_attr = sc->dflt_curs_attr; } else sc_adjust_ca(&sc->curs_attr, flags, base, height); for (i = sc->first_vty; i < sc->first_vty + sc->vtys; ++i) { if ((tp = SC_DEV(sc, i)) == NULL) continue; if ((scp = sc_get_stat(tp)) == NULL) continue; scp->dflt_curs_attr = sc->curs_attr; change_cursor_shape(scp, CONS_RESET_CURSOR, -1, -1); } splx(s); } static void scinit(int unit, int flags) { /* * When syscons is being initialized as the kernel console, malloc() * is not yet functional, because various kernel structures has not been * fully initialized yet. Therefore, we need to declare the following * static buffers for the console. This is less than ideal, * but is necessry evil for the time being. XXX */ static u_short sc_buffer[ROW*COL]; /* XXX */ #ifndef SC_NO_FONT_LOADING static u_char font_8[256*8]; static u_char font_14[256*14]; static u_char font_16[256*16]; #endif #ifdef SC_KERNEL_CONS_ATTRS static const u_char dflt_kattrtab[] = SC_KERNEL_CONS_ATTRS; #elif SC_KERNEL_CONS_ATTR == FG_WHITE static const u_char dflt_kattrtab[] = { FG_WHITE, FG_YELLOW, FG_LIGHTMAGENTA, FG_LIGHTRED, FG_LIGHTCYAN, FG_LIGHTGREEN, FG_LIGHTBLUE, FG_GREEN, 0, }; #else static const u_char dflt_kattrtab[] = { FG_WHITE, 0, }; #endif sc_softc_t *sc; scr_stat *scp; video_adapter_t *adp; int col; int row; int i; /* one time initialization */ if (init_done == COLD) { sc_get_bios_values(&bios_value); for (i = 0; i < nitems(sc_kattrtab); i++) sc_kattrtab[i] = dflt_kattrtab[i % (nitems(dflt_kattrtab) - 1)]; } init_done = WARM; /* * Allocate resources. Even if we are being called for the second * time, we must allocate them again, because they might have * disappeared... */ sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE); if ((sc->flags & SC_INIT_DONE) == 0) SC_VIDEO_LOCKINIT(sc); adp = NULL; if (sc->adapter >= 0) { vid_release(sc->adp, (void *)&sc->adapter); adp = sc->adp; sc->adp = NULL; } if (sc->keyboard >= 0) { DPRINTF(5, ("sc%d: releasing kbd%d\n", unit, sc->keyboard)); i = kbd_release(sc->kbd, (void *)&sc->keyboard); DPRINTF(5, ("sc%d: kbd_release returned %d\n", unit, i)); if (sc->kbd != NULL) { DPRINTF(5, ("sc%d: kbd != NULL!, index:%d, unit:%d, flags:0x%x\n", unit, sc->kbd->kb_index, sc->kbd->kb_unit, sc->kbd->kb_flags)); } sc->kbd = NULL; } sc->adapter = vid_allocate("*", unit, (void *)&sc->adapter); sc->adp = vid_get_adapter(sc->adapter); /* assert((sc->adapter >= 0) && (sc->adp != NULL)) */ sc->keyboard = sc_allocate_keyboard(sc, unit); DPRINTF(1, ("sc%d: keyboard %d\n", unit, sc->keyboard)); sc->kbd = kbd_get_keyboard(sc->keyboard); if (sc->kbd != NULL) { DPRINTF(1, ("sc%d: kbd index:%d, unit:%d, flags:0x%x\n", unit, sc->kbd->kb_index, sc->kbd->kb_unit, sc->kbd->kb_flags)); } if (!(sc->flags & SC_INIT_DONE) || (adp != sc->adp)) { sc->initial_mode = sc->adp->va_initial_mode; #ifndef SC_NO_FONT_LOADING if (flags & SC_KERNEL_CONSOLE) { sc->font_8 = font_8; sc->font_14 = font_14; sc->font_16 = font_16; } else if (sc->font_8 == NULL) { /* assert(sc_malloc) */ sc->font_8 = malloc(sizeof(font_8), M_DEVBUF, M_WAITOK); sc->font_14 = malloc(sizeof(font_14), M_DEVBUF, M_WAITOK); sc->font_16 = malloc(sizeof(font_16), M_DEVBUF, M_WAITOK); } #endif /* extract the hardware cursor location and hide the cursor for now */ vidd_read_hw_cursor(sc->adp, &col, &row); vidd_set_hw_cursor(sc->adp, -1, -1); /* set up the first console */ sc->first_vty = unit*MAXCONS; sc->vtys = MAXCONS; /* XXX: should be configurable */ if (flags & SC_KERNEL_CONSOLE) { /* * Set up devs structure but don't use it yet, calling make_dev() * might panic kernel. Wait for sc_attach_unit() to actually * create the devices. */ sc->dev = main_devs; scp = &main_console; init_scp(sc, sc->first_vty, scp); sc_vtb_init(&scp->vtb, VTB_MEMORY, scp->xsize, scp->ysize, (void *)sc_buffer, FALSE); if (sc_init_emulator(scp, SC_DFLT_TERM)) sc_init_emulator(scp, "*"); (*scp->tsw->te_default_attr)(scp, SC_KERNEL_CONS_ATTR, SC_KERNEL_CONS_REV_ATTR); } else { /* assert(sc_malloc) */ sc->dev = malloc(sizeof(struct tty *)*sc->vtys, M_DEVBUF, M_WAITOK|M_ZERO); sc->dev[0] = sc_alloc_tty(0, unit * MAXCONS); scp = alloc_scp(sc, sc->first_vty); SC_STAT(sc->dev[0]) = scp; } sc->cur_scp = scp; #ifndef __sparc64__ /* copy screen to temporary buffer */ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)scp->sc->adp->va_window, FALSE); if (ISTEXTSC(scp)) sc_vtb_copy(&scp->scr, 0, &scp->vtb, 0, scp->xsize*scp->ysize); #endif /* Sync h/w cursor position to s/w (sc and teken). */ if (col >= scp->xsize) col = 0; if (row >= scp->ysize) row = scp->ysize - 1; scp->xpos = col; scp->ypos = row; scp->cursor_pos = scp->cursor_oldpos = row*scp->xsize + col; (*scp->tsw->te_sync)(scp); sc->dflt_curs_attr.base = 0; sc->dflt_curs_attr.height = howmany(scp->font_size, 8); sc->dflt_curs_attr.flags = 0; sc->dflt_curs_attr.bg[0] = FG_RED; sc->dflt_curs_attr.bg[1] = FG_LIGHTGREY; sc->dflt_curs_attr.bg[2] = FG_BLUE; sc->dflt_curs_attr.mouse_ba = FG_WHITE; sc->dflt_curs_attr.mouse_ia = FG_RED; sc->curs_attr = sc->dflt_curs_attr; scp->base_curs_attr = scp->dflt_curs_attr = sc->curs_attr; scp->curs_attr = scp->base_curs_attr; #ifndef SC_NO_SYSMOUSE sc_mouse_move(scp, scp->xpixel/2, scp->ypixel/2); #endif if (!ISGRAPHSC(scp)) { sc_set_cursor_image(scp); sc_draw_cursor_image(scp); } /* save font and palette */ #ifndef SC_NO_FONT_LOADING sc->fonts_loaded = 0; if (ISFONTAVAIL(sc->adp->va_flags)) { #ifdef SC_DFLT_FONT bcopy(dflt_font_8, sc->font_8, sizeof(dflt_font_8)); bcopy(dflt_font_14, sc->font_14, sizeof(dflt_font_14)); bcopy(dflt_font_16, sc->font_16, sizeof(dflt_font_16)); sc->fonts_loaded = FONT_16 | FONT_14 | FONT_8; if (scp->font_size < 14) { sc_load_font(scp, 0, 8, 8, sc->font_8, 0, 256); } else if (scp->font_size >= 16) { sc_load_font(scp, 0, 16, 8, sc->font_16, 0, 256); } else { sc_load_font(scp, 0, 14, 8, sc->font_14, 0, 256); } #else /* !SC_DFLT_FONT */ if (scp->font_size < 14) { sc_save_font(scp, 0, 8, 8, sc->font_8, 0, 256); sc->fonts_loaded = FONT_8; } else if (scp->font_size >= 16) { sc_save_font(scp, 0, 16, 8, sc->font_16, 0, 256); sc->fonts_loaded = FONT_16; } else { sc_save_font(scp, 0, 14, 8, sc->font_14, 0, 256); sc->fonts_loaded = FONT_14; } #endif /* SC_DFLT_FONT */ /* FONT KLUDGE: always use the font page #0. XXX */ sc_show_font(scp, 0); } #endif /* !SC_NO_FONT_LOADING */ #ifndef SC_NO_PALETTE_LOADING vidd_save_palette(sc->adp, sc->palette); #ifdef SC_PIXEL_MODE for (i = 0; i < sizeof(sc->palette2); i++) sc->palette2[i] = i / 3; #endif #endif #ifdef DEV_SPLASH if (!(sc->flags & SC_SPLASH_SCRN)) { /* we are ready to put up the splash image! */ splash_init(sc->adp, scsplash_callback, sc); sc->flags |= SC_SPLASH_SCRN; } #endif } /* the rest is not necessary, if we have done it once */ if (sc->flags & SC_INIT_DONE) return; /* initialize mapscrn arrays to a one to one map */ for (i = 0; i < sizeof(sc->scr_map); i++) sc->scr_map[i] = sc->scr_rmap[i] = i; sc->flags |= SC_INIT_DONE; } static void scterm(int unit, int flags) { sc_softc_t *sc; scr_stat *scp; sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE); if (sc == NULL) return; /* shouldn't happen */ #ifdef DEV_SPLASH /* this console is no longer available for the splash screen */ if (sc->flags & SC_SPLASH_SCRN) { splash_term(sc->adp); sc->flags &= ~SC_SPLASH_SCRN; } #endif #if 0 /* XXX */ /* move the hardware cursor to the upper-left corner */ vidd_set_hw_cursor(sc->adp, 0, 0); #endif /* release the keyboard and the video card */ if (sc->keyboard >= 0) kbd_release(sc->kbd, &sc->keyboard); if (sc->adapter >= 0) vid_release(sc->adp, &sc->adapter); /* stop the terminal emulator, if any */ scp = sc_get_stat(sc->dev[0]); if (scp->tsw) (*scp->tsw->te_term)(scp, &scp->ts); mtx_destroy(&sc->video_mtx); /* clear the structure */ if (!(flags & SC_KERNEL_CONSOLE)) { free(scp->ts, M_DEVBUF); /* XXX: We need delete_dev() for this */ free(sc->dev, M_DEVBUF); #if 0 /* XXX: We need a ttyunregister for this */ free(sc->tty, M_DEVBUF); #endif #ifndef SC_NO_FONT_LOADING free(sc->font_8, M_DEVBUF); free(sc->font_14, M_DEVBUF); free(sc->font_16, M_DEVBUF); #endif /* XXX vtb, history */ } bzero(sc, sizeof(*sc)); sc->keyboard = -1; sc->adapter = -1; } static void scshutdown(__unused void *arg, __unused int howto) { KASSERT(sc_console != NULL, ("sc_console != NULL")); KASSERT(sc_console->sc != NULL, ("sc_console->sc != NULL")); KASSERT(sc_console->sc->cur_scp != NULL, ("sc_console->sc->cur_scp != NULL")); sc_touch_scrn_saver(); if (!cold && sc_console->sc->cur_scp->index != sc_console->index && sc_console->sc->cur_scp->smode.mode == VT_AUTO && sc_console->smode.mode == VT_AUTO) sc_switch_scr(sc_console->sc, sc_console->index); shutdown_in_progress = TRUE; } static void scsuspend(__unused void *arg) { int retry; KASSERT(sc_console != NULL, ("sc_console != NULL")); KASSERT(sc_console->sc != NULL, ("sc_console->sc != NULL")); KASSERT(sc_console->sc->cur_scp != NULL, ("sc_console->sc->cur_scp != NULL")); sc_susp_scr = sc_console->sc->cur_scp->index; if (sc_no_suspend_vtswitch || sc_susp_scr == sc_console->index) { sc_touch_scrn_saver(); sc_susp_scr = -1; return; } for (retry = 0; retry < 10; retry++) { sc_switch_scr(sc_console->sc, sc_console->index); if (!sc_console->sc->switch_in_progress) break; pause("scsuspend", hz); } suspend_in_progress = TRUE; } static void scresume(__unused void *arg) { KASSERT(sc_console != NULL, ("sc_console != NULL")); KASSERT(sc_console->sc != NULL, ("sc_console->sc != NULL")); KASSERT(sc_console->sc->cur_scp != NULL, ("sc_console->sc->cur_scp != NULL")); suspend_in_progress = FALSE; if (sc_susp_scr < 0) { update_font(sc_console->sc->cur_scp); return; } sc_switch_scr(sc_console->sc, sc_susp_scr); } int sc_clean_up(scr_stat *scp) { #ifdef DEV_SPLASH int error; #endif if (scp->sc->flags & SC_SCRN_BLANKED) { sc_touch_scrn_saver(); #ifdef DEV_SPLASH if ((error = wait_scrn_saver_stop(scp->sc))) return error; #endif } scp->status |= MOUSE_HIDDEN; sc_remove_mouse_image(scp); sc_remove_cutmarking(scp); return 0; } void sc_alloc_scr_buffer(scr_stat *scp, int wait, int discard) { sc_vtb_t new; sc_vtb_t old; old = scp->vtb; sc_vtb_init(&new, VTB_MEMORY, scp->xsize, scp->ysize, NULL, wait); if (!discard && (old.vtb_flags & VTB_VALID)) { /* retain the current cursor position and buffer contants */ scp->cursor_oldpos = scp->cursor_pos; /* * This works only if the old buffer has the same size as or larger * than the new one. XXX */ sc_vtb_copy(&old, 0, &new, 0, scp->xsize*scp->ysize); scp->vtb = new; } else { scp->vtb = new; sc_vtb_destroy(&old); } #ifndef SC_NO_SYSMOUSE /* move the mouse cursor at the center of the screen */ sc_mouse_move(scp, scp->xpixel / 2, scp->ypixel / 2); #endif } static scr_stat *alloc_scp(sc_softc_t *sc, int vty) { scr_stat *scp; /* assert(sc_malloc) */ scp = (scr_stat *)malloc(sizeof(scr_stat), M_DEVBUF, M_WAITOK); init_scp(sc, vty, scp); sc_alloc_scr_buffer(scp, TRUE, TRUE); if (sc_init_emulator(scp, SC_DFLT_TERM)) sc_init_emulator(scp, "*"); #ifndef SC_NO_CUTPASTE sc_alloc_cut_buffer(scp, TRUE); #endif #ifndef SC_NO_HISTORY sc_alloc_history_buffer(scp, 0, 0, TRUE); #endif return scp; } static void init_scp(sc_softc_t *sc, int vty, scr_stat *scp) { video_info_t info; bzero(scp, sizeof(*scp)); scp->index = vty; scp->sc = sc; scp->status = 0; scp->mode = sc->initial_mode; vidd_get_info(sc->adp, scp->mode, &info); if (info.vi_flags & V_INFO_GRAPHICS) { scp->status |= GRAPHICS_MODE; scp->xpixel = info.vi_width; scp->ypixel = info.vi_height; scp->xsize = info.vi_width/info.vi_cwidth; scp->ysize = info.vi_height/info.vi_cheight; scp->font_size = 0; scp->font = NULL; } else { scp->xsize = info.vi_width; scp->ysize = info.vi_height; scp->xpixel = scp->xsize*info.vi_cwidth; scp->ypixel = scp->ysize*info.vi_cheight; } scp->font_size = info.vi_cheight; scp->font_width = info.vi_cwidth; #ifndef SC_NO_FONT_LOADING if (info.vi_cheight < 14) scp->font = sc->font_8; else if (info.vi_cheight >= 16) scp->font = sc->font_16; else scp->font = sc->font_14; #else scp->font = NULL; #endif sc_vtb_init(&scp->vtb, VTB_MEMORY, 0, 0, NULL, FALSE); #ifndef __sparc64__ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, 0, 0, NULL, FALSE); #endif scp->xoff = scp->yoff = 0; scp->xpos = scp->ypos = 0; scp->start = scp->xsize * scp->ysize - 1; scp->end = 0; scp->tsw = NULL; scp->ts = NULL; scp->rndr = NULL; scp->border = (SC_NORM_ATTR >> 4) & 0x0f; scp->base_curs_attr = scp->dflt_curs_attr = sc->curs_attr; scp->mouse_cut_start = scp->xsize*scp->ysize; scp->mouse_cut_end = -1; scp->mouse_signal = 0; scp->mouse_pid = 0; scp->mouse_proc = NULL; scp->kbd_mode = K_XLATE; scp->bell_pitch = bios_value.bell_pitch; scp->bell_duration = BELL_DURATION; scp->status |= (bios_value.shift_state & NLKED); scp->status |= CURSOR_ENABLED | MOUSE_HIDDEN; scp->pid = 0; scp->proc = NULL; scp->smode.mode = VT_AUTO; scp->history = NULL; scp->history_pos = 0; scp->history_size = 0; } int sc_init_emulator(scr_stat *scp, char *name) { sc_term_sw_t *sw; sc_rndr_sw_t *rndr; void *p; int error; if (name == NULL) /* if no name is given, use the current emulator */ sw = scp->tsw; else /* ...otherwise find the named emulator */ sw = sc_term_match(name); if (sw == NULL) return EINVAL; rndr = NULL; if (strcmp(sw->te_renderer, "*") != 0) { rndr = sc_render_match(scp, sw->te_renderer, scp->status & (GRAPHICS_MODE | PIXEL_MODE)); } if (rndr == NULL) { rndr = sc_render_match(scp, scp->sc->adp->va_name, scp->status & (GRAPHICS_MODE | PIXEL_MODE)); if (rndr == NULL) return ENODEV; } if (sw == scp->tsw) { error = (*sw->te_init)(scp, &scp->ts, SC_TE_WARM_INIT); scp->rndr = rndr; scp->rndr->init(scp); sc_clear_screen(scp); /* assert(error == 0); */ return error; } if (sc_malloc && (sw->te_size > 0)) p = malloc(sw->te_size, M_DEVBUF, M_NOWAIT); else p = NULL; error = (*sw->te_init)(scp, &p, SC_TE_COLD_INIT); if (error) return error; if (scp->tsw) (*scp->tsw->te_term)(scp, &scp->ts); if (scp->ts != NULL) free(scp->ts, M_DEVBUF); scp->tsw = sw; scp->ts = p; scp->rndr = rndr; scp->rndr->init(scp); (*sw->te_default_attr)(scp, SC_NORM_ATTR, SC_NORM_REV_ATTR); sc_clear_screen(scp); return 0; } /* * scgetc(flags) - get character from keyboard. * If flags & SCGETC_CN, then avoid harmful side effects. * If flags & SCGETC_NONBLOCK, then wait until a key is pressed, else * return NOKEY if there is nothing there. */ static u_int scgetc(sc_softc_t *sc, u_int flags, struct sc_cnstate *sp) { scr_stat *scp; #ifndef SC_NO_HISTORY struct tty *tp; #endif u_int c; int this_scr; int f; int i; if (sc->kbd == NULL) return NOKEY; next_code: #if 1 /* I don't like this, but... XXX */ if (flags & SCGETC_CN) sccnupdate(sc->cur_scp); #endif scp = sc->cur_scp; /* first see if there is something in the keyboard port */ for (;;) { if (flags & SCGETC_CN) sccnscrunlock(sc, sp); c = kbdd_read_char(sc->kbd, !(flags & SCGETC_NONBLOCK)); if (flags & SCGETC_CN) sccnscrlock(sc, sp); if (c == ERRKEY) { if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); } else if (c == NOKEY) return c; else break; } /* make screensaver happy */ if (!(c & RELKEY)) sc_touch_scrn_saver(); if (!(flags & SCGETC_CN)) random_harvest_queue(&c, sizeof(c), RANDOM_KEYBOARD); if (sc->kbd_open_level == 0 && scp->kbd_mode != K_XLATE) return KEYCHAR(c); /* if scroll-lock pressed allow history browsing */ if (!ISGRAPHSC(scp) && scp->history && scp->status & SLKED) { scp->status &= ~CURSOR_ENABLED; sc_remove_cursor_image(scp); #ifndef SC_NO_HISTORY if (!(scp->status & BUFFER_SAVED)) { scp->status |= BUFFER_SAVED; sc_hist_save(scp); } switch (c) { /* FIXME: key codes */ case SPCLKEY | FKEY | F(49): /* home key */ sc_remove_cutmarking(scp); sc_hist_home(scp); goto next_code; case SPCLKEY | FKEY | F(57): /* end key */ sc_remove_cutmarking(scp); sc_hist_end(scp); goto next_code; case SPCLKEY | FKEY | F(50): /* up arrow key */ sc_remove_cutmarking(scp); if (sc_hist_up_line(scp)) if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); goto next_code; case SPCLKEY | FKEY | F(58): /* down arrow key */ sc_remove_cutmarking(scp); if (sc_hist_down_line(scp)) if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); goto next_code; case SPCLKEY | FKEY | F(51): /* page up key */ sc_remove_cutmarking(scp); for (i=0; iysize; i++) if (sc_hist_up_line(scp)) { if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); break; } goto next_code; case SPCLKEY | FKEY | F(59): /* page down key */ sc_remove_cutmarking(scp); for (i=0; iysize; i++) if (sc_hist_down_line(scp)) { if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); break; } goto next_code; } #endif /* SC_NO_HISTORY */ } /* * Process and consume special keys here. Return a plain char code * or a char code with the META flag or a function key code. */ if (c & RELKEY) { /* key released */ /* goto next_code */ } else { /* key pressed */ if (c & SPCLKEY) { c &= ~SPCLKEY; switch (KEYCHAR(c)) { /* LOCKING KEYS */ case NLK: case CLK: case ALK: break; case SLK: (void)kbdd_ioctl(sc->kbd, KDGKBSTATE, (caddr_t)&f); if (f & SLKED) { scp->status |= SLKED; } else { if (scp->status & SLKED) { scp->status &= ~SLKED; #ifndef SC_NO_HISTORY if (scp->status & BUFFER_SAVED) { if (!sc_hist_restore(scp)) sc_remove_cutmarking(scp); scp->status &= ~BUFFER_SAVED; scp->status |= CURSOR_ENABLED; sc_draw_cursor_image(scp); } /* Only safe in Giant-locked context. */ tp = SC_DEV(sc, scp->index); if (!(flags & SCGETC_CN) && tty_opened_ns(tp)) sctty_outwakeup(tp); #endif } } break; case PASTE: #ifndef SC_NO_CUTPASTE sc_mouse_paste(scp); #endif break; /* NON-LOCKING KEYS */ case NOP: case LSH: case RSH: case LCTR: case RCTR: case LALT: case RALT: case ASH: case META: break; case BTAB: if (!(sc->flags & SC_SCRN_BLANKED)) return c; break; case SPSC: #ifdef DEV_SPLASH /* force activatation/deactivation of the screen saver */ if (!(sc->flags & SC_SCRN_BLANKED)) { run_scrn_saver = TRUE; sc->scrn_time_stamp -= scrn_blank_time; } if (cold) { /* * While devices are being probed, the screen saver need * to be invoked explicitly. XXX */ if (sc->flags & SC_SCRN_BLANKED) { scsplash_stick(FALSE); stop_scrn_saver(sc, current_saver); } else { if (!ISGRAPHSC(scp)) { scsplash_stick(TRUE); (*current_saver)(sc, TRUE); } } } #endif /* DEV_SPLASH */ break; case RBT: #ifndef SC_DISABLE_REBOOT if (enable_reboot && !(flags & SCGETC_CN)) shutdown_nice(0); #endif break; case HALT: #ifndef SC_DISABLE_REBOOT if (enable_reboot && !(flags & SCGETC_CN)) shutdown_nice(RB_HALT); #endif break; case PDWN: #ifndef SC_DISABLE_REBOOT if (enable_reboot && !(flags & SCGETC_CN)) shutdown_nice(RB_HALT|RB_POWEROFF); #endif break; case SUSP: power_pm_suspend(POWER_SLEEP_STATE_SUSPEND); break; case STBY: power_pm_suspend(POWER_SLEEP_STATE_STANDBY); break; case DBG: #ifndef SC_DISABLE_KDBKEY if (enable_kdbkey) kdb_break(); #endif break; case PNC: if (enable_panic_key) panic("Forced by the panic key"); break; case NEXT: this_scr = scp->index; for (i = (this_scr - sc->first_vty + 1)%sc->vtys; sc->first_vty + i != this_scr; i = (i + 1)%sc->vtys) { struct tty *tp = SC_DEV(sc, sc->first_vty + i); if (tty_opened_ns(tp)) { sc_switch_scr(scp->sc, sc->first_vty + i); break; } } break; case PREV: this_scr = scp->index; for (i = (this_scr - sc->first_vty + sc->vtys - 1)%sc->vtys; sc->first_vty + i != this_scr; i = (i + sc->vtys - 1)%sc->vtys) { struct tty *tp = SC_DEV(sc, sc->first_vty + i); if (tty_opened_ns(tp)) { sc_switch_scr(scp->sc, sc->first_vty + i); break; } } break; default: if (KEYCHAR(c) >= F_SCR && KEYCHAR(c) <= L_SCR) { sc_switch_scr(scp->sc, sc->first_vty + KEYCHAR(c) - F_SCR); break; } /* assert(c & FKEY) */ if (!(sc->flags & SC_SCRN_BLANKED)) return c; break; } /* goto next_code */ } else { /* regular keys (maybe MKEY is set) */ #if !defined(SC_DISABLE_KDBKEY) && defined(KDB) if (enable_kdbkey) kdb_alt_break(c, &sc->sc_altbrk); #endif if (!(sc->flags & SC_SCRN_BLANKED)) return c; } } goto next_code; } static int sctty_mmap(struct tty *tp, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { scr_stat *scp; scp = sc_get_stat(tp); if (scp != scp->sc->cur_scp) return -1; return vidd_mmap(scp->sc->adp, offset, paddr, nprot, memattr); } static void update_font(scr_stat *scp) { #ifndef SC_NO_FONT_LOADING /* load appropriate font */ if (!(scp->status & GRAPHICS_MODE)) { if (!(scp->status & PIXEL_MODE) && ISFONTAVAIL(scp->sc->adp->va_flags)) { if (scp->font_size < 14) { if (scp->sc->fonts_loaded & FONT_8) sc_load_font(scp, 0, 8, 8, scp->sc->font_8, 0, 256); } else if (scp->font_size >= 16) { if (scp->sc->fonts_loaded & FONT_16) sc_load_font(scp, 0, 16, 8, scp->sc->font_16, 0, 256); } else { if (scp->sc->fonts_loaded & FONT_14) sc_load_font(scp, 0, 14, 8, scp->sc->font_14, 0, 256); } /* * FONT KLUDGE: * This is an interim kludge to display correct font. * Always use the font page #0 on the video plane 2. * Somehow we cannot show the font in other font pages on * some video cards... XXX */ sc_show_font(scp, 0); } mark_all(scp); } #endif /* !SC_NO_FONT_LOADING */ } static int save_kbd_state(scr_stat *scp) { int state; int error; error = kbdd_ioctl(scp->sc->kbd, KDGKBSTATE, (caddr_t)&state); if (error == ENOIOCTL) error = ENODEV; if (error == 0) { scp->status &= ~LOCK_MASK; scp->status |= state; } return error; } static int update_kbd_state(scr_stat *scp, int new_bits, int mask) { int state; int error; if (mask != LOCK_MASK) { error = kbdd_ioctl(scp->sc->kbd, KDGKBSTATE, (caddr_t)&state); if (error == ENOIOCTL) error = ENODEV; if (error) return error; state &= ~mask; state |= new_bits & mask; } else { state = new_bits & LOCK_MASK; } error = kbdd_ioctl(scp->sc->kbd, KDSKBSTATE, (caddr_t)&state); if (error == ENOIOCTL) error = ENODEV; return error; } static int update_kbd_leds(scr_stat *scp, int which) { int error; which &= LOCK_MASK; error = kbdd_ioctl(scp->sc->kbd, KDSETLED, (caddr_t)&which); if (error == ENOIOCTL) error = ENODEV; return error; } int set_mode(scr_stat *scp) { video_info_t info; /* reject unsupported mode */ if (vidd_get_info(scp->sc->adp, scp->mode, &info)) return 1; /* if this vty is not currently showing, do nothing */ if (scp != scp->sc->cur_scp) return 0; /* setup video hardware for the given mode */ vidd_set_mode(scp->sc->adp, scp->mode); scp->rndr->init(scp); #ifndef __sparc64__ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)scp->sc->adp->va_window, FALSE); #endif update_font(scp); sc_set_border(scp, scp->border); sc_set_cursor_image(scp); return 0; } void sc_set_border(scr_stat *scp, int color) { SC_VIDEO_LOCK(scp->sc); (*scp->rndr->draw_border)(scp, color); SC_VIDEO_UNLOCK(scp->sc); } #ifndef SC_NO_FONT_LOADING void sc_load_font(scr_stat *scp, int page, int size, int width, u_char *buf, int base, int count) { sc_softc_t *sc; sc = scp->sc; sc->font_loading_in_progress = TRUE; vidd_load_font(sc->adp, page, size, width, buf, base, count); sc->font_loading_in_progress = FALSE; } void sc_save_font(scr_stat *scp, int page, int size, int width, u_char *buf, int base, int count) { sc_softc_t *sc; sc = scp->sc; sc->font_loading_in_progress = TRUE; vidd_save_font(sc->adp, page, size, width, buf, base, count); sc->font_loading_in_progress = FALSE; } void sc_show_font(scr_stat *scp, int page) { vidd_show_font(scp->sc->adp, page); } #endif /* !SC_NO_FONT_LOADING */ void sc_paste(scr_stat *scp, const u_char *p, int count) { struct tty *tp; u_char *rmap; tp = SC_DEV(scp->sc, scp->sc->cur_scp->index); if (!tty_opened_ns(tp)) return; rmap = scp->sc->scr_rmap; for (; count > 0; --count) ttydisc_rint(tp, rmap[*p++], 0); ttydisc_rint_done(tp); } void sc_respond(scr_stat *scp, const u_char *p, int count, int wakeup) { struct tty *tp; tp = SC_DEV(scp->sc, scp->sc->cur_scp->index); if (!tty_opened_ns(tp)) return; ttydisc_rint_simple(tp, p, count); if (wakeup) { /* XXX: we can't always call ttydisc_rint_done() here! */ ttydisc_rint_done(tp); } } void sc_bell(scr_stat *scp, int pitch, int duration) { if (cold || kdb_active || shutdown_in_progress || !enable_bell) return; if (scp != scp->sc->cur_scp && (scp->sc->flags & SC_QUIET_BELL)) return; if (scp->sc->flags & SC_VISUAL_BELL) { if (scp->sc->blink_in_progress) return; scp->sc->blink_in_progress = 3; if (scp != scp->sc->cur_scp) scp->sc->blink_in_progress += 2; blink_screen(scp->sc->cur_scp); } else if (duration != 0 && pitch != 0) { if (scp != scp->sc->cur_scp) pitch *= 2; sysbeep(1193182 / pitch, duration); } } static int sc_kattr(void) { if (sc_console == NULL) return (SC_KERNEL_CONS_ATTR); /* for very early, before pcpu */ return (sc_kattrtab[curcpu % nitems(sc_kattrtab)]); } static void blink_screen(void *arg) { scr_stat *scp = arg; struct tty *tp; if (ISGRAPHSC(scp) || (scp->sc->blink_in_progress <= 1)) { scp->sc->blink_in_progress = 0; mark_all(scp); tp = SC_DEV(scp->sc, scp->index); if (tty_opened_ns(tp)) sctty_outwakeup(tp); if (scp->sc->delayed_next_scr) sc_switch_scr(scp->sc, scp->sc->delayed_next_scr - 1); } else { (*scp->rndr->draw)(scp, 0, scp->xsize*scp->ysize, scp->sc->blink_in_progress & 1); scp->sc->blink_in_progress--; callout_reset_sbt(&scp->sc->cblink, SBT_1S / 15, 0, blink_screen, scp, C_PREL(0)); } } /* * Until sc_attach_unit() gets called no dev structures will be available * to store the per-screen current status. This is the case when the * kernel is initially booting and needs access to its console. During * this early phase of booting the console's current status is kept in * one statically defined scr_stat structure, and any pointers to the * dev structures will be NULL. */ static scr_stat * sc_get_stat(struct tty *tp) { if (tp == NULL) return (&main_console); return (SC_STAT(tp)); } /* * Allocate active keyboard. Try to allocate "kbdmux" keyboard first, and, * if found, add all non-busy keyboards to "kbdmux". Otherwise look for * any keyboard. */ static int sc_allocate_keyboard(sc_softc_t *sc, int unit) { int idx0, idx; keyboard_t *k0, *k; keyboard_info_t ki; idx0 = kbd_allocate("kbdmux", -1, (void *)&sc->keyboard, sckbdevent, sc); if (idx0 != -1) { k0 = kbd_get_keyboard(idx0); for (idx = kbd_find_keyboard2("*", -1, 0); idx != -1; idx = kbd_find_keyboard2("*", -1, idx + 1)) { k = kbd_get_keyboard(idx); if (idx == idx0 || KBD_IS_BUSY(k)) continue; bzero(&ki, sizeof(ki)); strcpy(ki.kb_name, k->kb_name); ki.kb_unit = k->kb_unit; (void)kbdd_ioctl(k0, KBADDKBD, (caddr_t) &ki); } } else idx0 = kbd_allocate("*", unit, (void *)&sc->keyboard, sckbdevent, sc); return (idx0); } Index: head/sys/i386/acpica/acpi_machdep.c =================================================================== --- head/sys/i386/acpica/acpi_machdep.c (revision 343566) +++ head/sys/i386/acpica/acpi_machdep.c (revision 343567) @@ -1,389 +1,389 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 Mitsuru IWASAKI * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include uint32_t acpi_resume_beep; SYSCTL_UINT(_debug_acpi, OID_AUTO, resume_beep, CTLFLAG_RWTUN, &acpi_resume_beep, 0, "Beep the PC speaker when resuming"); uint32_t acpi_reset_video; TUNABLE_INT("hw.acpi.reset_video", &acpi_reset_video); static int intr_model = ACPI_INTR_PIC; int acpi_machdep_init(device_t dev) { struct acpi_softc *sc; sc = device_get_softc(dev); acpi_apm_init(sc); acpi_install_wakeup_handler(sc); if (intr_model == ACPI_INTR_PIC) BUS_CONFIG_INTR(dev, AcpiGbl_FADT.SciInterrupt, INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW); else acpi_SetIntrModel(intr_model); SYSCTL_ADD_UINT(&sc->acpi_sysctl_ctx, SYSCTL_CHILDREN(sc->acpi_sysctl_tree), OID_AUTO, "reset_video", CTLFLAG_RW, &acpi_reset_video, 0, "Call the VESA reset BIOS vector on the resume path"); return (0); } void acpi_SetDefaultIntrModel(int model) { intr_model = model; } /* Check BIOS date. If 1998 or older, disable ACPI. */ int acpi_machdep_quirks(int *quirks) { char *va; int year; /* BIOS address 0xffff5 contains the date in the format mm/dd/yy. */ va = pmap_mapbios(0xffff0, 16); sscanf(va + 11, "%2d", &year); pmap_unmapbios((vm_offset_t)va, 16); /* * Date must be >= 1/1/1999 or we don't trust ACPI. Note that this * check must be changed by my 114th birthday. */ if (year > 90 && year < 99) *quirks = ACPI_Q_BROKEN; return (0); } /* * Support for mapping ACPI tables during early boot. This abuses the * crashdump map because the kernel cannot allocate KVA in * pmap_mapbios() when this is used. This makes the following * assumptions about how we use this KVA: pages 0 and 1 are used to * map in the header of each table found via the RSDT or XSDT and * pages 2 to n are used to map in the RSDT or XSDT. This has to use * 2 pages for the table headers in case a header spans a page * boundary. * * XXX: We don't ensure the table fits in the available address space * in the crashdump map. */ /* * Map some memory using the crashdump map. 'offset' is an offset in * pages into the crashdump map to use for the start of the mapping. */ static void * table_map(vm_paddr_t pa, int offset, vm_offset_t length) { vm_offset_t va, off; void *data; off = pa & PAGE_MASK; length = round_page(length + off); - pa = pa & PG_FRAME; + pa = pmap_pg_frame(pa); va = (vm_offset_t)pmap_kenter_temporary(pa, offset) + (offset * PAGE_SIZE); data = (void *)(va + off); length -= PAGE_SIZE; while (length > 0) { va += PAGE_SIZE; pa += PAGE_SIZE; length -= PAGE_SIZE; pmap_kenter(va, pa); invlpg(va); } return (data); } /* Unmap memory previously mapped with table_map(). */ static void table_unmap(void *data, vm_offset_t length) { vm_offset_t va, off; va = (vm_offset_t)data; off = va & PAGE_MASK; length = round_page(length + off); va &= ~PAGE_MASK; while (length > 0) { pmap_kremove(va); invlpg(va); va += PAGE_SIZE; length -= PAGE_SIZE; } } /* * Map a table at a given offset into the crashdump map. It first * maps the header to determine the table length and then maps the * entire table. */ static void * map_table(vm_paddr_t pa, int offset, const char *sig) { ACPI_TABLE_HEADER *header; vm_offset_t length; void *table; header = table_map(pa, offset, sizeof(ACPI_TABLE_HEADER)); if (strncmp(header->Signature, sig, ACPI_NAME_SIZE) != 0) { table_unmap(header, sizeof(ACPI_TABLE_HEADER)); return (NULL); } length = header->Length; table_unmap(header, sizeof(ACPI_TABLE_HEADER)); table = table_map(pa, offset, length); if (ACPI_FAILURE(AcpiTbChecksum(table, length))) { if (bootverbose) printf("ACPI: Failed checksum for table %s\n", sig); #if (ACPI_CHECKSUM_ABORT) table_unmap(table, length); return (NULL); #endif } return (table); } /* * See if a given ACPI table is the requested table. Returns the * length of the able if it matches or zero on failure. */ static int probe_table(vm_paddr_t address, const char *sig) { ACPI_TABLE_HEADER *table; table = table_map(address, 0, sizeof(ACPI_TABLE_HEADER)); if (table == NULL) { if (bootverbose) printf("ACPI: Failed to map table at 0x%jx\n", (uintmax_t)address); return (0); } if (bootverbose) printf("Table '%.4s' at 0x%jx\n", table->Signature, (uintmax_t)address); if (strncmp(table->Signature, sig, ACPI_NAME_SIZE) != 0) { table_unmap(table, sizeof(ACPI_TABLE_HEADER)); return (0); } table_unmap(table, sizeof(ACPI_TABLE_HEADER)); return (1); } /* * Try to map a table at a given physical address previously returned * by acpi_find_table(). */ void * acpi_map_table(vm_paddr_t pa, const char *sig) { return (map_table(pa, 0, sig)); } /* Unmap a table previously mapped via acpi_map_table(). */ void acpi_unmap_table(void *table) { ACPI_TABLE_HEADER *header; header = (ACPI_TABLE_HEADER *)table; table_unmap(table, header->Length); } /* * Return the physical address of the requested table or zero if one * is not found. */ vm_paddr_t acpi_find_table(const char *sig) { ACPI_PHYSICAL_ADDRESS rsdp_ptr; ACPI_TABLE_RSDP *rsdp; ACPI_TABLE_RSDT *rsdt; ACPI_TABLE_XSDT *xsdt; ACPI_TABLE_HEADER *table; vm_paddr_t addr; int i, count; if (resource_disabled("acpi", 0)) return (0); /* * Map in the RSDP. Since ACPI uses AcpiOsMapMemory() which in turn * calls pmap_mapbios() to find the RSDP, we assume that we can use * pmap_mapbios() to map the RSDP. */ if ((rsdp_ptr = AcpiOsGetRootPointer()) == 0) return (0); rsdp = pmap_mapbios(rsdp_ptr, sizeof(ACPI_TABLE_RSDP)); if (rsdp == NULL) { if (bootverbose) printf("ACPI: Failed to map RSDP\n"); return (0); } /* * For ACPI >= 2.0, use the XSDT if it is available. * Otherwise, use the RSDT. We map the XSDT or RSDT at page 2 * in the crashdump area. Pages 0 and 1 are used to map in the * headers of candidate ACPI tables. */ addr = 0; if (rsdp->Revision >= 2 && rsdp->XsdtPhysicalAddress != 0) { /* * AcpiOsGetRootPointer only verifies the checksum for * the version 1.0 portion of the RSDP. Version 2.0 has * an additional checksum that we verify first. */ if (AcpiTbChecksum((UINT8 *)rsdp, ACPI_RSDP_XCHECKSUM_LENGTH)) { if (bootverbose) printf("ACPI: RSDP failed extended checksum\n"); return (0); } xsdt = map_table(rsdp->XsdtPhysicalAddress, 2, ACPI_SIG_XSDT); if (xsdt == NULL) { if (bootverbose) printf("ACPI: Failed to map XSDT\n"); return (0); } count = (xsdt->Header.Length - sizeof(ACPI_TABLE_HEADER)) / sizeof(UINT64); for (i = 0; i < count; i++) if (probe_table(xsdt->TableOffsetEntry[i], sig)) { addr = xsdt->TableOffsetEntry[i]; break; } acpi_unmap_table(xsdt); } else { rsdt = map_table(rsdp->RsdtPhysicalAddress, 2, ACPI_SIG_RSDT); if (rsdt == NULL) { if (bootverbose) printf("ACPI: Failed to map RSDT\n"); return (0); } count = (rsdt->Header.Length - sizeof(ACPI_TABLE_HEADER)) / sizeof(UINT32); for (i = 0; i < count; i++) if (probe_table(rsdt->TableOffsetEntry[i], sig)) { addr = rsdt->TableOffsetEntry[i]; break; } acpi_unmap_table(rsdt); } pmap_unmapbios((vm_offset_t)rsdp, sizeof(ACPI_TABLE_RSDP)); if (addr == 0) { if (bootverbose) printf("ACPI: No %s table found\n", sig); return (0); } if (bootverbose) printf("%s: Found table at 0x%jx\n", sig, (uintmax_t)addr); /* * Verify that we can map the full table and that its checksum is * correct, etc. */ table = map_table(addr, 0, sig); if (table == NULL) return (0); acpi_unmap_table(table); return (addr); } /* * ACPI nexus(4) driver. */ static int nexus_acpi_probe(device_t dev) { int error; error = acpi_identify(); if (error) return (error); return (BUS_PROBE_DEFAULT); } static int nexus_acpi_attach(device_t dev) { nexus_init_resources(); bus_generic_probe(dev); if (BUS_ADD_CHILD(dev, 10, "acpi", 0) == NULL) panic("failed to add acpi0 device"); return (bus_generic_attach(dev)); } static device_method_t nexus_acpi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_acpi_probe), DEVMETHOD(device_attach, nexus_acpi_attach), { 0, 0 } }; DEFINE_CLASS_1(nexus, nexus_acpi_driver, nexus_acpi_methods, 1, nexus_driver); static devclass_t nexus_devclass; DRIVER_MODULE(nexus_acpi, root, nexus_acpi_driver, nexus_devclass, 0, 0); Index: head/sys/i386/i386/bios.c =================================================================== --- head/sys/i386/i386/bios.c (revision 343566) +++ head/sys/i386/i386/bios.c (revision 343567) @@ -1,750 +1,734 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997 Michael Smith * Copyright (c) 1998 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Code for dealing with the BIOS in x86 PC systems. */ #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #include #include #endif #define BIOS_START 0xe0000 #define BIOS_SIZE 0x20000 /* exported lookup results */ struct bios32_SDentry PCIbios; static struct PnPBIOS_table *PnPBIOStable; static u_int bios32_SDCI; /* start fairly early */ static void bios32_init(void *junk); SYSINIT(bios32, SI_SUB_CPU, SI_ORDER_ANY, bios32_init, NULL); /* * bios32_init * * Locate various bios32 entities. */ static void bios32_init(void *junk) { u_long sigaddr; struct bios32_SDheader *sdh; struct PnPBIOS_table *pt; u_int8_t ck, *cv; int i; char *p; /* * BIOS32 Service Directory, PCI BIOS */ /* look for the signature */ if ((sigaddr = bios_sigsearch(0, "_32_", 4, 16, 0)) != 0) { /* get a virtual pointer to the structure */ sdh = (struct bios32_SDheader *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr); for (cv = (u_int8_t *)sdh, ck = 0, i = 0; i < (sdh->len * 16); i++) { ck += cv[i]; } /* If checksum is OK, enable use of the entrypoint */ if ((ck == 0) && (BIOS_START <= sdh->entry ) && (sdh->entry < (BIOS_START + BIOS_SIZE))) { bios32_SDCI = BIOS_PADDRTOVADDR(sdh->entry); if (bootverbose) { printf("bios32: Found BIOS32 Service Directory header at %p\n", sdh); printf("bios32: Entry = 0x%x (%x) Rev = %d Len = %d\n", sdh->entry, bios32_SDCI, sdh->revision, sdh->len); } /* Allow user override of PCI BIOS search */ if (((p = kern_getenv("machdep.bios.pci")) == NULL) || strcmp(p, "disable")) { /* See if there's a PCI BIOS entrypoint here */ PCIbios.ident.id = 0x49435024; /* PCI systems should have this */ if (!bios32_SDlookup(&PCIbios) && bootverbose) printf("pcibios: PCI BIOS entry at 0x%x+0x%x\n", PCIbios.base, PCIbios.entry); } if (p != NULL) freeenv(p); } else { printf("bios32: Bad BIOS32 Service Directory\n"); } } /* * PnP BIOS * * Allow user override of PnP BIOS search */ if ((((p = kern_getenv("machdep.bios.pnp")) == NULL) || strcmp(p, "disable")) && ((sigaddr = bios_sigsearch(0, "$PnP", 4, 16, 0)) != 0)) { /* get a virtual pointer to the structure */ pt = (struct PnPBIOS_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr); for (cv = (u_int8_t *)pt, ck = 0, i = 0; i < pt->len; i++) { ck += cv[i]; } /* If checksum is OK, enable use of the entrypoint */ if (ck == 0) { PnPBIOStable = pt; if (bootverbose) { printf("pnpbios: Found PnP BIOS data at %p\n", pt); printf("pnpbios: Entry = %x:%x Rev = %d.%d\n", pt->pmentrybase, pt->pmentryoffset, pt->version >> 4, pt->version & 0xf); if ((pt->control & 0x3) == 0x01) printf("pnpbios: Event flag at %x\n", pt->evflagaddr); if (pt->oemdevid != 0) printf("pnpbios: OEM ID %x\n", pt->oemdevid); } } else { printf("pnpbios: Bad PnP BIOS data checksum\n"); } } if (p != NULL) freeenv(p); if (bootverbose) { /* look for other know signatures */ printf("Other BIOS signatures found:\n"); } } /* * bios32_SDlookup * * Query the BIOS32 Service Directory for the service named in (ent), * returns nonzero if the lookup fails. The caller must fill in * (ent->ident), the remainder are populated on a successful lookup. */ int bios32_SDlookup(struct bios32_SDentry *ent) { struct bios_regs args; if (bios32_SDCI == 0) return (1); args.eax = ent->ident.id; /* set up arguments */ args.ebx = args.ecx = args.edx = 0; bios32(&args, bios32_SDCI, GSEL(GCODE_SEL, SEL_KPL)); if ((args.eax & 0xff) == 0) { /* success? */ ent->base = args.ebx; ent->len = args.ecx; ent->entry = args.edx; ent->ventry = BIOS_PADDRTOVADDR(ent->base + ent->entry); return (0); /* all OK */ } return (1); /* failed */ } /* * bios_sigsearch * * Search some or all of the BIOS region for a signature string. * * (start) Optional offset returned from this function * (for searching for multiple matches), or NULL * to start the search from the base of the BIOS. * Note that this will be a _physical_ address in * the range 0xe0000 - 0xfffff. * (sig) is a pointer to the byte(s) of the signature. * (siglen) number of bytes in the signature. * (paralen) signature paragraph (alignment) size. * (sigofs) offset of the signature within the paragraph. * * Returns the _physical_ address of the found signature, 0 if the * signature was not found. */ u_int32_t bios_sigsearch(u_int32_t start, u_char *sig, int siglen, int paralen, int sigofs) { u_char *sp, *end; /* compute the starting address */ if ((start >= BIOS_START) && (start <= (BIOS_START + BIOS_SIZE))) { sp = (char *)BIOS_PADDRTOVADDR(start); } else if (start == 0) { sp = (char *)BIOS_PADDRTOVADDR(BIOS_START); } else { return 0; /* bogus start address */ } /* compute the end address */ end = (u_char *)BIOS_PADDRTOVADDR(BIOS_START + BIOS_SIZE); /* loop searching */ while ((sp + sigofs + siglen) < end) { /* compare here */ if (!bcmp(sp + sigofs, sig, siglen)) { /* convert back to physical address */ return((u_int32_t)BIOS_VADDRTOPADDR(sp)); } sp += paralen; } return(0); } /* * do not staticize, used by bioscall.s */ union { struct { u_short offset; u_short segment; } vec16; struct { u_int offset; u_short segment; } vec32; } bioscall_vector; /* bios jump vector */ void set_bios_selectors(struct bios_segments *seg, int flags) { struct soft_segment_descriptor ssd = { 0, /* segment base address (overwritten) */ 0, /* length (overwritten) */ SDT_MEMERA, /* segment type (overwritten) */ 0, /* priority level */ 1, /* descriptor present */ 0, 0, 1, /* descriptor size (overwritten) */ 0 /* granularity == byte units */ }; union descriptor *p_gdt; #ifdef SMP p_gdt = &gdt[PCPU_GET(cpuid) * NGDT]; #else p_gdt = gdt; #endif ssd.ssd_base = seg->code32.base; ssd.ssd_limit = seg->code32.limit; ssdtosd(&ssd, &p_gdt[GBIOSCODE32_SEL].sd); ssd.ssd_def32 = 0; if (flags & BIOSCODE_FLAG) { ssd.ssd_base = seg->code16.base; ssd.ssd_limit = seg->code16.limit; ssdtosd(&ssd, &p_gdt[GBIOSCODE16_SEL].sd); } ssd.ssd_type = SDT_MEMRWA; if (flags & BIOSDATA_FLAG) { ssd.ssd_base = seg->data.base; ssd.ssd_limit = seg->data.limit; ssdtosd(&ssd, &p_gdt[GBIOSDATA_SEL].sd); } if (flags & BIOSUTIL_FLAG) { ssd.ssd_base = seg->util.base; ssd.ssd_limit = seg->util.limit; ssdtosd(&ssd, &p_gdt[GBIOSUTIL_SEL].sd); } if (flags & BIOSARGS_FLAG) { ssd.ssd_base = seg->args.base; ssd.ssd_limit = seg->args.limit; ssdtosd(&ssd, &p_gdt[GBIOSARGS_SEL].sd); } } extern int vm86pa; extern u_long vm86phystk; extern void bios16_jmp(void); /* * this routine is really greedy with selectors, and uses 5: * * 32-bit code selector: to return to kernel * 16-bit code selector: for running code * data selector: for 16-bit data * util selector: extra utility selector * args selector: to handle pointers * * the util selector is set from the util16 entry in bios16_args, if a * "U" specifier is seen. * * See for description of format specifiers */ int bios16(struct bios_args *args, char *fmt, ...) { char *p, *stack, *stack_top; va_list ap; int flags = BIOSCODE_FLAG | BIOSDATA_FLAG; u_int i, arg_start, arg_end; - pt_entry_t *pte; - pd_entry_t *ptd, orig_ptd; - + void *bios16_pmap_handle; arg_start = 0xffffffff; arg_end = 0; /* * Some BIOS entrypoints attempt to copy the largest-case * argument frame (in order to generalise handling for * different entry types). If our argument frame is * smaller than this, the BIOS will reach off the top of * our constructed stack segment. Pad the top of the stack * with some garbage to avoid this. */ stack = (caddr_t)PAGE_SIZE - 32; va_start(ap, fmt); for (p = fmt; p && *p; p++) { switch (*p) { case 'p': /* 32-bit pointer */ i = va_arg(ap, u_int); arg_start = min(arg_start, i); arg_end = max(arg_end, i); flags |= BIOSARGS_FLAG; stack -= 4; break; case 'i': /* 32-bit integer */ i = va_arg(ap, u_int); stack -= 4; break; case 'U': /* 16-bit selector */ flags |= BIOSUTIL_FLAG; /* FALLTHROUGH */ case 'D': /* 16-bit selector */ case 'C': /* 16-bit selector */ stack -= 2; break; case 's': /* 16-bit integer passed as an int */ i = va_arg(ap, int); stack -= 2; break; default: va_end(ap); return (EINVAL); } } va_end(ap); if (flags & BIOSARGS_FLAG) { if (arg_end - arg_start > ctob(16)) return (EACCES); args->seg.args.base = arg_start; args->seg.args.limit = 0xffff; } - args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME; + args->seg.code32.base = pmap_pg_frame((u_int)&bios16_jmp); args->seg.code32.limit = 0xffff; - /* - * no page table, so create one and install it. - */ - pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = IdlePTD; - *pte = vm86phystk | PG_RW | PG_V; - orig_ptd = *ptd; - *ptd = vtophys(pte) | PG_RW | PG_V; - pmap_invalidate_all(kernel_pmap); /* XXX insurance for now */ + bios16_pmap_handle = pmap_bios16_enter(); stack_top = stack; va_start(ap, fmt); for (p = fmt; p && *p; p++) { switch (*p) { case 'p': /* 32-bit pointer */ i = va_arg(ap, u_int); *(u_int *)stack = (i - arg_start) | (GSEL(GBIOSARGS_SEL, SEL_KPL) << 16); stack += 4; break; case 'i': /* 32-bit integer */ i = va_arg(ap, u_int); *(u_int *)stack = i; stack += 4; break; case 'U': /* 16-bit selector */ *(u_short *)stack = GSEL(GBIOSUTIL_SEL, SEL_KPL); stack += 2; break; case 'D': /* 16-bit selector */ *(u_short *)stack = GSEL(GBIOSDATA_SEL, SEL_KPL); stack += 2; break; case 'C': /* 16-bit selector */ *(u_short *)stack = GSEL(GBIOSCODE16_SEL, SEL_KPL); stack += 2; break; case 's': /* 16-bit integer passed as an int */ i = va_arg(ap, int); *(u_short *)stack = i; stack += 2; break; default: va_end(ap); return (EINVAL); } } va_end(ap); set_bios_selectors(&args->seg, flags); bioscall_vector.vec16.offset = (u_short)args->entry; bioscall_vector.vec16.segment = GSEL(GBIOSCODE16_SEL, SEL_KPL); i = bios16_call(&args->r, stack_top); - - *ptd = orig_ptd; /* remove page table */ - /* - * XXX only needs to be invlpg(0) but that doesn't work on the 386 - */ - pmap_invalidate_all(kernel_pmap); - free(pte, M_TEMP); /* ... and free it */ + pmap_bios16_leave(bios16_pmap_handle); return (i); } int bios_oem_strings(struct bios_oem *oem, u_char *buffer, size_t maxlen) { size_t idx = 0; struct bios_oem_signature *sig; u_int from, to; u_char c, *s, *se, *str, *bios_str; size_t i, off, len, tot; if ( !oem || !buffer || maxlen<2 ) return(-1); sig = oem->signature; if (!sig) return(-2); from = oem->range.from; to = oem->range.to; if ( (to<=from) || (from(BIOS_START+BIOS_SIZE)) ) return(-3); while (sig->anchor != NULL) { str = sig->anchor; len = strlen(str); off = sig->offset; tot = sig->totlen; /* make sure offset doesn't go beyond bios area */ if ( (to+off)>(BIOS_START+BIOS_SIZE) || ((from+off) maxlen - 1) { printf("sys/i386/i386/bios.c: sig '%s' " "idx %d + tot %d = %d > maxlen-1 %d\n", str, idx, tot, idx+tot, maxlen-1); return(-5); } bios_str = NULL; s = (u_char *)BIOS_PADDRTOVADDR(from); se = (u_char *)BIOS_PADDRTOVADDR(to-len); for (; s 0x7E) ) c = ' '; if (idx == 0) { if (c != ' ') buffer[idx++] = c; } else if ( (c != ' ') || ((c == ' ') && (buffer[idx-1] != ' ')) ) buffer[idx++] = c; } } sig++; } /* remove a final trailing space */ if ( (idx > 1) && (buffer[idx-1] == ' ') ) idx--; buffer[idx] = '\0'; return (idx); } #ifdef DEV_ISA /* * PnP BIOS interface; enumerate devices only known to the system * BIOS and save information about them for later use. */ struct pnp_sysdev { u_int16_t size; u_int8_t handle; u_int32_t devid; u_int8_t type[3]; u_int16_t attrib; #define PNPATTR_NODISABLE (1<<0) /* can't be disabled */ #define PNPATTR_NOCONFIG (1<<1) /* can't be configured */ #define PNPATTR_OUTPUT (1<<2) /* can be primary output */ #define PNPATTR_INPUT (1<<3) /* can be primary input */ #define PNPATTR_BOOTABLE (1<<4) /* can be booted from */ #define PNPATTR_DOCK (1<<5) /* is a docking station */ #define PNPATTR_REMOVEABLE (1<<6) /* device is removeable */ #define PNPATTR_CONFIG_STATIC (0) #define PNPATTR_CONFIG_DYNAMIC (1) #define PNPATTR_CONFIG_DYNONLY (3) #define PNPATTR_CONFIG(a) (((a) >> 7) & 0x3) /* device-specific data comes here */ u_int8_t devdata[0]; } __packed; /* We have to cluster arguments within a 64k range for the bios16 call */ struct pnp_sysdevargs { u_int16_t next; struct pnp_sysdev node; }; /* * This function is called after the bus has assigned resource * locations for a logical device. */ static void pnpbios_set_config(void *arg, struct isa_config *config, int enable) { } /* * Quiz the PnP BIOS, build a list of PNP IDs and resource data. */ static void pnpbios_identify(driver_t *driver, device_t parent) { struct PnPBIOS_table *pt = PnPBIOStable; struct bios_args args; struct pnp_sysdev *pd; struct pnp_sysdevargs *pda; u_int16_t ndevs, bigdev; int error, currdev; u_int8_t *devnodebuf, tag; u_int32_t *devid, *compid; int idx, left; device_t dev; /* no PnP BIOS information */ if (pt == NULL) return; /* Check to see if ACPI is already active. */ dev = devclass_get_device(devclass_find("acpi"), 0); if (dev != NULL && device_is_attached(dev)) return; /* get count of PnP devices */ bzero(&args, sizeof(args)); args.seg.code16.base = BIOS_PADDRTOVADDR(pt->pmentrybase); args.seg.code16.limit = 0xffff; /* XXX ? */ args.seg.data.base = BIOS_PADDRTOVADDR(pt->pmdataseg); args.seg.data.limit = 0xffff; args.entry = pt->pmentryoffset; if ((error = bios16(&args, PNP_COUNT_DEVNODES, &ndevs, &bigdev)) || (args.r.eax & 0xff)) { printf("pnpbios: error %d/%x getting device count/size limit\n", error, args.r.eax); return; } ndevs &= 0xff; /* clear high byte garbage */ if (bootverbose) printf("pnpbios: %d devices, largest %d bytes\n", ndevs, bigdev); devnodebuf = malloc(bigdev + (sizeof(struct pnp_sysdevargs) - sizeof(struct pnp_sysdev)), M_DEVBUF, M_NOWAIT); if (devnodebuf == NULL) { printf("pnpbios: cannot allocate memory, bailing\n"); return; } pda = (struct pnp_sysdevargs *)devnodebuf; pd = &pda->node; for (currdev = 0, left = ndevs; (currdev != 0xff) && (left > 0); left--) { bzero(pd, bigdev); pda->next = currdev; /* get current configuration */ if ((error = bios16(&args, PNP_GET_DEVNODE, &pda->next, &pda->node, 1))) { printf("pnpbios: error %d making BIOS16 call\n", error); break; } if ((error = (args.r.eax & 0xff))) { if (bootverbose) printf("pnpbios: %s 0x%x fetching node %d\n", error & 0x80 ? "error" : "warning", error, currdev); if (error & 0x80) break; } currdev = pda->next; if (pd->size < sizeof(struct pnp_sysdev)) { printf("pnpbios: bogus system node data, aborting scan\n"); break; } /* * Ignore PICs so that we don't have to worry about the PICs * claiming IRQs to prevent their use. The PIC drivers * already ensure that invalid IRQs are not used. */ if (!strcmp(pnp_eisaformat(pd->devid), "PNP0000")) /* ISA PIC */ continue; if (!strcmp(pnp_eisaformat(pd->devid), "PNP0003")) /* APIC */ continue; /* Add the device and parse its resources */ dev = BUS_ADD_CHILD(parent, ISA_ORDER_PNPBIOS, NULL, -1); isa_set_vendorid(dev, pd->devid); isa_set_logicalid(dev, pd->devid); /* * It appears that some PnP BIOS doesn't allow us to re-enable * the embedded system device once it is disabled. We shall * mark all system device nodes as "cannot be disabled", regardless * of actual settings in the device attribute byte. * XXX isa_set_configattr(dev, ((pd->attrib & PNPATTR_NODISABLE) ? 0 : ISACFGATTR_CANDISABLE) | ((!(pd->attrib & PNPATTR_NOCONFIG) && PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC) ? ISACFGATTR_DYNAMIC : 0)); */ isa_set_configattr(dev, (!(pd->attrib & PNPATTR_NOCONFIG) && PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC) ? ISACFGATTR_DYNAMIC : 0); isa_set_pnpbios_handle(dev, pd->handle); ISA_SET_CONFIG_CALLBACK(parent, dev, pnpbios_set_config, 0); pnp_parse_resources(dev, &pd->devdata[0], pd->size - sizeof(struct pnp_sysdev), 0); if (!device_get_desc(dev)) device_set_desc_copy(dev, pnp_eisaformat(pd->devid)); /* Find device IDs */ devid = &pd->devid; compid = NULL; /* look for a compatible device ID too */ left = pd->size - sizeof(struct pnp_sysdev); idx = 0; while (idx < left) { tag = pd->devdata[idx++]; if (PNP_RES_TYPE(tag) == 0) { /* Small resource */ switch (PNP_SRES_NUM(tag)) { case PNP_TAG_COMPAT_DEVICE: compid = (u_int32_t *)(pd->devdata + idx); if (bootverbose) printf("pnpbios: node %d compat ID 0x%08x\n", pd->handle, *compid); /* FALLTHROUGH */ case PNP_TAG_END: idx = left; break; default: idx += PNP_SRES_LEN(tag); break; } } else /* Large resource, skip it */ idx += *(u_int16_t *)(pd->devdata + idx) + 2; } if (bootverbose) { printf("pnpbios: handle %d device ID %s (%08x)", pd->handle, pnp_eisaformat(*devid), *devid); if (compid != NULL) printf(" compat ID %s (%08x)", pnp_eisaformat(*compid), *compid); printf("\n"); } } } static device_method_t pnpbios_methods[] = { /* Device interface */ DEVMETHOD(device_identify, pnpbios_identify), { 0, 0 } }; static driver_t pnpbios_driver = { "pnpbios", pnpbios_methods, 1, /* no softc */ }; static devclass_t pnpbios_devclass; DRIVER_MODULE(pnpbios, isa, pnpbios_driver, pnpbios_devclass, 0, 0); #endif /* DEV_ISA */ Index: head/sys/i386/i386/copyout.c =================================================================== --- head/sys/i386/i386/copyout.c (revision 343566) +++ head/sys/i386/i386/copyout.c (revision 343567) @@ -1,486 +1,475 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#if defined(PAE) || defined(PAE_TABLES) -#define KCR3 ((u_int)IdlePDPT) -#else -#define KCR3 ((u_int)IdlePTD) -#endif - int copyin_fast(const void *udaddr, void *kaddr, size_t len, u_int); static int (*copyin_fast_tramp)(const void *, void *, size_t, u_int); int copyout_fast(const void *kaddr, void *udaddr, size_t len, u_int); static int (*copyout_fast_tramp)(const void *, void *, size_t, u_int); int fubyte_fast(volatile const void *base, u_int kcr3); static int (*fubyte_fast_tramp)(volatile const void *, u_int); int fuword16_fast(volatile const void *base, u_int kcr3); static int (*fuword16_fast_tramp)(volatile const void *, u_int); int fueword_fast(volatile const void *base, long *val, u_int kcr3); static int (*fueword_fast_tramp)(volatile const void *, long *, u_int); int subyte_fast(volatile void *base, int val, u_int kcr3); static int (*subyte_fast_tramp)(volatile void *, int, u_int); int suword16_fast(volatile void *base, int val, u_int kcr3); static int (*suword16_fast_tramp)(volatile void *, int, u_int); int suword_fast(volatile void *base, long val, u_int kcr3); static int (*suword_fast_tramp)(volatile void *, long, u_int); static int fast_copyout = 1; SYSCTL_INT(_machdep, OID_AUTO, fast_copyout, CTLFLAG_RWTUN, &fast_copyout, 0, ""); void copyout_init_tramp(void) { copyin_fast_tramp = (int (*)(const void *, void *, size_t, u_int))( (uintptr_t)copyin_fast + setidt_disp); copyout_fast_tramp = (int (*)(const void *, void *, size_t, u_int))( (uintptr_t)copyout_fast + setidt_disp); fubyte_fast_tramp = (int (*)(volatile const void *, u_int))( (uintptr_t)fubyte_fast + setidt_disp); fuword16_fast_tramp = (int (*)(volatile const void *, u_int))( (uintptr_t)fuword16_fast + setidt_disp); fueword_fast_tramp = (int (*)(volatile const void *, long *, u_int))( (uintptr_t)fueword_fast + setidt_disp); subyte_fast_tramp = (int (*)(volatile void *, int, u_int))( (uintptr_t)subyte_fast + setidt_disp); suword16_fast_tramp = (int (*)(volatile void *, int, u_int))( (uintptr_t)suword16_fast + setidt_disp); suword_fast_tramp = (int (*)(volatile void *, long, u_int))( (uintptr_t)suword_fast + setidt_disp); } int cp_slow0(vm_offset_t uva, size_t len, bool write, void (*f)(vm_offset_t, void *), void *arg) { struct pcpu *pc; vm_page_t m[2]; - pt_entry_t *pte; vm_offset_t kaddr; int error, i, plen; bool sleepable; plen = howmany(uva - trunc_page(uva) + len, PAGE_SIZE); MPASS(plen <= nitems(m)); error = 0; i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, uva, len, (write ? VM_PROT_WRITE : VM_PROT_READ) | VM_PROT_QUICK_NOFAULT, m, nitems(m)); if (i != plen) return (EFAULT); sched_pin(); pc = get_pcpu(); if (!THREAD_CAN_SLEEP() || curthread->td_vslock_sz > 0 || (curthread->td_pflags & TDP_NOFAULTING) != 0) { sleepable = false; mtx_lock(&pc->pc_copyout_mlock); kaddr = pc->pc_copyout_maddr; } else { sleepable = true; sx_xlock(&pc->pc_copyout_slock); kaddr = pc->pc_copyout_saddr; } - for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) { - *pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(m[i]) | - pmap_cache_bits(kernel_pmap, pmap_page_get_memattr(m[i]), - FALSE); - invlpg(kaddr + ptoa(i)); - } + pmap_cp_slow0_map(kaddr, plen, m); kaddr += uva - trunc_page(uva); f(kaddr, arg); sched_unpin(); if (sleepable) sx_xunlock(&pc->pc_copyout_slock); else mtx_unlock(&pc->pc_copyout_mlock); vm_page_unhold_pages(m, plen); return (error); } struct copyinstr_arg0 { vm_offset_t kc; size_t len; size_t alen; bool end; }; static void copyinstr_slow0(vm_offset_t kva, void *arg) { struct copyinstr_arg0 *ca; char c; ca = arg; MPASS(ca->alen == 0 && ca->len > 0 && !ca->end); while (ca->alen < ca->len && !ca->end) { c = *(char *)(kva + ca->alen); *(char *)ca->kc = c; ca->alen++; ca->kc++; if (c == '\0') ca->end = true; } } int copyinstr(const void *udaddr, void *kaddr, size_t maxlen, size_t *lencopied) { struct copyinstr_arg0 ca; vm_offset_t uc; size_t plen; int error; error = 0; ca.end = false; for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; plen < maxlen && !ca.end; uc += ca.alen, plen += ca.alen) { ca.len = round_page(uc) - uc; if (ca.len == 0) ca.len = PAGE_SIZE; if (plen + ca.len > maxlen) ca.len = maxlen - plen; ca.alen = 0; if (cp_slow0(uc, ca.len, false, copyinstr_slow0, &ca) != 0) { error = EFAULT; break; } } if (!ca.end && plen == maxlen && error == 0) error = ENAMETOOLONG; if (lencopied != NULL) *lencopied = plen; return (error); } struct copyin_arg0 { vm_offset_t kc; size_t len; }; static void copyin_slow0(vm_offset_t kva, void *arg) { struct copyin_arg0 *ca; ca = arg; bcopy((void *)kva, (void *)ca->kc, ca->len); } int copyin(const void *udaddr, void *kaddr, size_t len) { struct copyin_arg0 ca; vm_offset_t uc; size_t plen; if ((uintptr_t)udaddr + len < (uintptr_t)udaddr || (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS) return (EFAULT); if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ && - copyin_fast_tramp(udaddr, kaddr, len, KCR3) == 0)) + copyin_fast_tramp(udaddr, kaddr, len, pmap_get_kcr3()) == 0)) return (0); for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) { ca.len = round_page(uc) - uc; if (ca.len == 0) ca.len = PAGE_SIZE; if (plen + ca.len > len) ca.len = len - plen; if (cp_slow0(uc, ca.len, false, copyin_slow0, &ca) != 0) return (EFAULT); } return (0); } static void copyout_slow0(vm_offset_t kva, void *arg) { struct copyin_arg0 *ca; ca = arg; bcopy((void *)ca->kc, (void *)kva, ca->len); } int copyout(const void *kaddr, void *udaddr, size_t len) { struct copyin_arg0 ca; vm_offset_t uc; size_t plen; if ((uintptr_t)udaddr + len < (uintptr_t)udaddr || (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS) return (EFAULT); if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ && - copyout_fast_tramp(kaddr, udaddr, len, KCR3) == 0)) + copyout_fast_tramp(kaddr, udaddr, len, pmap_get_kcr3()) == 0)) return (0); for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) { ca.len = round_page(uc) - uc; if (ca.len == 0) ca.len = PAGE_SIZE; if (plen + ca.len > len) ca.len = len - plen; if (cp_slow0(uc, ca.len, true, copyout_slow0, &ca) != 0) return (EFAULT); } return (0); } /* * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user * memory. */ static void fubyte_slow0(vm_offset_t kva, void *arg) { *(int *)arg = *(u_char *)kva; } int fubyte(volatile const void *base) { int res; if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout) { - res = fubyte_fast_tramp(base, KCR3); + res = fubyte_fast_tramp(base, pmap_get_kcr3()); if (res != -1) return (res); } if (cp_slow0((vm_offset_t)base, sizeof(char), false, fubyte_slow0, &res) != 0) return (-1); return (res); } static void fuword16_slow0(vm_offset_t kva, void *arg) { *(int *)arg = *(uint16_t *)kva; } int fuword16(volatile const void *base) { int res; if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout) { - res = fuword16_fast_tramp(base, KCR3); + res = fuword16_fast_tramp(base, pmap_get_kcr3()); if (res != -1) return (res); } if (cp_slow0((vm_offset_t)base, sizeof(uint16_t), false, fuword16_slow0, &res) != 0) return (-1); return (res); } static void fueword_slow0(vm_offset_t kva, void *arg) { *(uint32_t *)arg = *(uint32_t *)kva; } int fueword(volatile const void *base, long *val) { uint32_t res; if ((uintptr_t)base + sizeof(*val) < (uintptr_t)base || (uintptr_t)base + sizeof(*val) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout) { - if (fueword_fast_tramp(base, val, KCR3) == 0) + if (fueword_fast_tramp(base, val, pmap_get_kcr3()) == 0) return (0); } if (cp_slow0((vm_offset_t)base, sizeof(long), false, fueword_slow0, &res) != 0) return (-1); *val = res; return (0); } int fueword32(volatile const void *base, int32_t *val) { return (fueword(base, (long *)val)); } /* * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory. */ static void subyte_slow0(vm_offset_t kva, void *arg) { *(u_char *)kva = *(int *)arg; } int subyte(volatile void *base, int byte) { if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS) return (-1); - if (fast_copyout && subyte_fast_tramp(base, byte, KCR3) == 0) + if (fast_copyout && subyte_fast_tramp(base, byte, pmap_get_kcr3()) == 0) return (0); return (cp_slow0((vm_offset_t)base, sizeof(u_char), true, subyte_slow0, &byte) != 0 ? -1 : 0); } static void suword16_slow0(vm_offset_t kva, void *arg) { *(int *)kva = *(uint16_t *)arg; } int suword16(volatile void *base, int word) { if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS) return (-1); - if (fast_copyout && suword16_fast_tramp(base, word, KCR3) == 0) + if (fast_copyout && suword16_fast_tramp(base, word, pmap_get_kcr3()) + == 0) return (0); return (cp_slow0((vm_offset_t)base, sizeof(int16_t), true, suword16_slow0, &word) != 0 ? -1 : 0); } static void suword_slow0(vm_offset_t kva, void *arg) { *(int *)kva = *(uint32_t *)arg; } int suword(volatile void *base, long word) { if ((uintptr_t)base + sizeof(word) < (uintptr_t)base || (uintptr_t)base + sizeof(word) > VM_MAXUSER_ADDRESS) return (-1); - if (fast_copyout && suword_fast_tramp(base, word, KCR3) == 0) + if (fast_copyout && suword_fast_tramp(base, word, pmap_get_kcr3()) == 0) return (0); return (cp_slow0((vm_offset_t)base, sizeof(long), true, suword_slow0, &word) != 0 ? -1 : 0); } int suword32(volatile void *base, int32_t word) { return (suword(base, word)); } struct casueword_arg0 { uint32_t oldval; uint32_t newval; }; static void casueword_slow0(vm_offset_t kva, void *arg) { struct casueword_arg0 *ca; ca = arg; atomic_fcmpset_int((u_int *)kva, &ca->oldval, ca->newval); } int casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp, uint32_t newval) { struct casueword_arg0 ca; int res; ca.oldval = oldval; ca.newval = newval; res = cp_slow0((vm_offset_t)base, sizeof(int32_t), true, casueword_slow0, &ca); if (res == 0) { *oldvalp = ca.oldval; return (0); } return (-1); } int casueword(volatile u_long *base, u_long oldval, u_long *oldvalp, u_long newval) { struct casueword_arg0 ca; int res; ca.oldval = oldval; ca.newval = newval; res = cp_slow0((vm_offset_t)base, sizeof(int32_t), true, casueword_slow0, &ca); if (res == 0) { *oldvalp = ca.oldval; return (0); } return (-1); } Index: head/sys/i386/i386/genassym.c =================================================================== --- head/sys/i386/i386/genassym.c (revision 343566) +++ head/sys/i386/i386/genassym.c (revision 343567) @@ -1,245 +1,234 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_hwpmc_hooks.h" #include "opt_kstack_pages.h" #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include #endif #include #include #include #include #include #include ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_PFLAGS, offsetof(struct thread, td_pflags)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(TDP_CALLCHAIN, TDP_CALLCHAIN); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(TD0_KSTACK_PAGES, TD0_KSTACK_PAGES); ASSYM(PAGE_SIZE, PAGE_SIZE); -ASSYM(NPTEPG, NPTEPG); -ASSYM(NPDEPG, NPDEPG); -ASSYM(NPDEPTD, NPDEPTD); -ASSYM(NPGPTD, NPGPTD); -ASSYM(PDESIZE, sizeof(pd_entry_t)); -ASSYM(PTESIZE, sizeof(pt_entry_t)); -ASSYM(PDESHIFT, PDESHIFT); -ASSYM(PTESHIFT, PTESHIFT); ASSYM(PAGE_SHIFT, PAGE_SHIFT); ASSYM(PAGE_MASK, PAGE_MASK); -ASSYM(PDRSHIFT, PDRSHIFT); -ASSYM(PDRMASK, PDRMASK); -ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); -ASSYM(KERNBASE, KERNBASE); -ASSYM(KERNLOAD, KERNLOAD); ASSYM(PCB_CR0, offsetof(struct pcb, pcb_cr0)); ASSYM(PCB_CR2, offsetof(struct pcb, pcb_cr2)); ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3)); ASSYM(PCB_CR4, offsetof(struct pcb, pcb_cr4)); ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi)); ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi)); ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp)); ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp)); ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx)); ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip)); ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0)); ASSYM(PCB_DS, offsetof(struct pcb, pcb_ds)); ASSYM(PCB_ES, offsetof(struct pcb, pcb_es)); ASSYM(PCB_FS, offsetof(struct pcb, pcb_fs)); ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs)); ASSYM(PCB_SS, offsetof(struct pcb, pcb_ss)); ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0)); ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1)); ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); ASSYM(PCB_EXT_TSS, offsetof(struct pcb_ext, ext_tss)); ASSYM(PCB_FSD, offsetof(struct pcb, pcb_fsd)); ASSYM(PCB_GSD, offsetof(struct pcb, pcb_gsd)); ASSYM(PCB_VM86, offsetof(struct pcb, pcb_vm86)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_VM86CALL, PCB_VM86CALL); ASSYM(PCB_GDT, offsetof(struct pcb, pcb_gdt)); ASSYM(PCB_IDT, offsetof(struct pcb, pcb_idt)); ASSYM(PCB_LDT, offsetof(struct pcb, pcb_ldt)); ASSYM(PCB_TR, offsetof(struct pcb, pcb_tr)); ASSYM(TF_FS, offsetof(struct trapframe, tf_fs)); ASSYM(TF_ES, offsetof(struct trapframe, tf_es)); ASSYM(TF_DS, offsetof(struct trapframe, tf_ds)); ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); ASSYM(TF_EIP, offsetof(struct trapframe, tf_eip)); ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags)); ASSYM(TF_SZ, sizeof(struct trapframe)); ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler)); #ifdef COMPAT_43 ASSYM(SIGF_SC, offsetof(struct osigframe, sf_siginfo.si_sc)); #endif ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); #ifdef COMPAT_FREEBSD4 ASSYM(SIGF_UC4, offsetof(struct sigframe4, sf_uc)); #endif #ifdef COMPAT_43 ASSYM(SC_PS, offsetof(struct osigcontext, sc_ps)); ASSYM(SC_FS, offsetof(struct osigcontext, sc_fs)); ASSYM(SC_GS, offsetof(struct osigcontext, sc_gs)); ASSYM(SC_TRAPNO, offsetof(struct osigcontext, sc_trapno)); #endif #ifdef COMPAT_FREEBSD4 ASSYM(UC4_EFLAGS, offsetof(struct ucontext4, uc_mcontext.mc_eflags)); ASSYM(UC4_GS, offsetof(struct ucontext4, uc_mcontext.mc_gs)); #endif ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags)); ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs)); ASSYM(ENOENT, ENOENT); ASSYM(EFAULT, EFAULT); ASSYM(ENAMETOOLONG, ENAMETOOLONG); ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(MAXPATHLEN, MAXPATHLEN); ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo)); ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version)); ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname)); ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless)); ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon)); ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless)); ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size)); ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab)); ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab)); ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread)); ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_COMMON_TSSP, offsetof(struct pcpu, pc_common_tssp)); ASSYM(PC_COMMON_TSSD, offsetof(struct pcpu, pc_common_tssd)); ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt)); ASSYM(PC_FSGS_GDT, offsetof(struct pcpu, pc_fsgs_gdt)); ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt)); ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap)); ASSYM(PC_PRIVATE_TSS, offsetof(struct pcpu, pc_private_tss)); ASSYM(PC_KESP0, offsetof(struct pcpu, pc_kesp0)); ASSYM(PC_TRAMPSTK, offsetof(struct pcpu, pc_trampstk)); ASSYM(PC_COPYOUT_BUF, offsetof(struct pcpu, pc_copyout_buf)); ASSYM(PC_IBPB_SET, offsetof(struct pcpu, pc_ibpb_set)); +ASSYM(PMAP_TRM_MIN_ADDRESS, PMAP_TRM_MIN_ADDRESS); +ASSYM(KERNLOAD, KERNLOAD); +ASSYM(KERNBASE, KERNBASE); #ifdef DEV_APIC ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL); ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL); #endif ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL)); ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL)); ASSYM(GPROC0_SEL, GPROC0_SEL); ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame)); ASSYM(VM86_STACK_SPACE, VM86_STACK_SPACE); -ASSYM(PMAP_TRM_MIN_ADDRESS, PMAP_TRM_MIN_ADDRESS); ASSYM(TRAMP_COPYOUT_SZ, TRAMP_COPYOUT_SZ); #ifdef HWPMC_HOOKS ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN); #endif Index: head/sys/i386/i386/initcpu.c =================================================================== --- head/sys/i386/i386/initcpu.c (revision 343566) +++ head/sys/i386/i386/initcpu.c (revision 343567) @@ -1,985 +1,980 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) KATO Takenori, 1997, 1998. * * All rights reserved. Unpublished rights reserved under the copyright * laws of Japan. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer as * the first lines of this file unmodified. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #ifdef I486_CPU static void init_5x86(void); static void init_bluelightning(void); static void init_486dlc(void); static void init_cy486dx(void); #ifdef CPU_I486_ON_386 static void init_i486_on_386(void); #endif static void init_6x86(void); #endif /* I486_CPU */ #if defined(I586_CPU) && defined(CPU_WT_ALLOC) static void enable_K5_wt_alloc(void); static void enable_K6_wt_alloc(void); static void enable_K6_2_wt_alloc(void); #endif #ifdef I686_CPU static void init_6x86MX(void); static void init_ppro(void); static void init_mendocino(void); #endif static int hw_instruction_sse; SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); /* * -1: automatic (default) * 0: keep enable CLFLUSH * 1: force disable CLFLUSH */ static int hw_clflush_disable = -1; u_int cyrix_did; /* Device ID of Cyrix CPU */ #ifdef I486_CPU /* * IBM Blue Lightning */ static void init_bluelightning(void) { register_t saveintr; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); invd(); #ifdef CPU_BLUELIGHTNING_FPU_OP_CACHE wrmsr(0x1000, 0x9c92LL); /* FP operand can be cacheable on Cyrix FPU */ #else wrmsr(0x1000, 0x1c92LL); /* Intel FPU */ #endif /* Enables 13MB and 0-640KB cache. */ wrmsr(0x1001, (0xd0LL << 32) | 0x3ff); #ifdef CPU_BLUELIGHTNING_3X wrmsr(0x1002, 0x04000000LL); /* Enables triple-clock mode. */ #else wrmsr(0x1002, 0x03000000LL); /* Enables double-clock mode. */ #endif /* Enable caching in CR0. */ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ invd(); intr_restore(saveintr); } /* * Cyrix 486SLC/DLC/SR/DR series */ static void init_486dlc(void) { register_t saveintr; u_char ccr0; saveintr = intr_disable(); invd(); ccr0 = read_cyrix_reg(CCR0); #ifndef CYRIX_CACHE_WORKS ccr0 |= CCR0_NC1 | CCR0_BARB; write_cyrix_reg(CCR0, ccr0); invd(); #else ccr0 &= ~CCR0_NC0; #ifndef CYRIX_CACHE_REALLY_WORKS ccr0 |= CCR0_NC1 | CCR0_BARB; #else ccr0 |= CCR0_NC1; #endif #ifdef CPU_DIRECT_MAPPED_CACHE ccr0 |= CCR0_CO; /* Direct mapped mode. */ #endif write_cyrix_reg(CCR0, ccr0); /* Clear non-cacheable region. */ write_cyrix_reg(NCR1+2, NCR_SIZE_0K); write_cyrix_reg(NCR2+2, NCR_SIZE_0K); write_cyrix_reg(NCR3+2, NCR_SIZE_0K); write_cyrix_reg(NCR4+2, NCR_SIZE_0K); write_cyrix_reg(0, 0); /* dummy write */ /* Enable caching in CR0. */ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ invd(); #endif /* !CYRIX_CACHE_WORKS */ intr_restore(saveintr); } /* * Cyrix 486S/DX series */ static void init_cy486dx(void) { register_t saveintr; u_char ccr2; saveintr = intr_disable(); invd(); ccr2 = read_cyrix_reg(CCR2); #ifdef CPU_SUSP_HLT ccr2 |= CCR2_SUSP_HLT; #endif write_cyrix_reg(CCR2, ccr2); intr_restore(saveintr); } /* * Cyrix 5x86 */ static void init_5x86(void) { register_t saveintr; u_char ccr2, ccr3, ccr4, pcr0; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); (void)read_cyrix_reg(CCR3); /* dummy */ /* Initialize CCR2. */ ccr2 = read_cyrix_reg(CCR2); ccr2 |= CCR2_WB; #ifdef CPU_SUSP_HLT ccr2 |= CCR2_SUSP_HLT; #else ccr2 &= ~CCR2_SUSP_HLT; #endif ccr2 |= CCR2_WT1; write_cyrix_reg(CCR2, ccr2); /* Initialize CCR4. */ ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); ccr4 = read_cyrix_reg(CCR4); ccr4 |= CCR4_DTE; ccr4 |= CCR4_MEM; #ifdef CPU_FASTER_5X86_FPU ccr4 |= CCR4_FASTFPE; #else ccr4 &= ~CCR4_FASTFPE; #endif ccr4 &= ~CCR4_IOMASK; /******************************************************************** * WARNING: The "BIOS Writers Guide" mentions that I/O recovery time * should be 0 for errata fix. ********************************************************************/ #ifdef CPU_IORT ccr4 |= CPU_IORT & CCR4_IOMASK; #endif write_cyrix_reg(CCR4, ccr4); /* Initialize PCR0. */ /**************************************************************** * WARNING: RSTK_EN and LOOP_EN could make your system unstable. * BTB_EN might make your system unstable. ****************************************************************/ pcr0 = read_cyrix_reg(PCR0); #ifdef CPU_RSTK_EN pcr0 |= PCR0_RSTK; #else pcr0 &= ~PCR0_RSTK; #endif #ifdef CPU_BTB_EN pcr0 |= PCR0_BTB; #else pcr0 &= ~PCR0_BTB; #endif #ifdef CPU_LOOP_EN pcr0 |= PCR0_LOOP; #else pcr0 &= ~PCR0_LOOP; #endif /**************************************************************** * WARNING: if you use a memory mapped I/O device, don't use * DISABLE_5X86_LSSER option, which may reorder memory mapped * I/O access. * IF YOUR MOTHERBOARD HAS PCI BUS, DON'T DISABLE LSSER. ****************************************************************/ #ifdef CPU_DISABLE_5X86_LSSER pcr0 &= ~PCR0_LSSER; #else pcr0 |= PCR0_LSSER; #endif write_cyrix_reg(PCR0, pcr0); /* Restore CCR3. */ write_cyrix_reg(CCR3, ccr3); (void)read_cyrix_reg(0x80); /* dummy */ /* Unlock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */ /* Lock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); intr_restore(saveintr); } #ifdef CPU_I486_ON_386 /* * There are i486 based upgrade products for i386 machines. * In this case, BIOS doesn't enable CPU cache. */ static void init_i486_on_386(void) { register_t saveintr; saveintr = intr_disable(); load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0, NW = 0 */ intr_restore(saveintr); } #endif /* * Cyrix 6x86 * * XXX - What should I do here? Please let me know. */ static void init_6x86(void) { register_t saveintr; u_char ccr3, ccr4; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); /* Initialize CCR0. */ write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1); /* Initialize CCR1. */ #ifdef CPU_CYRIX_NO_LOCK write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK); #else write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK); #endif /* Initialize CCR2. */ #ifdef CPU_SUSP_HLT write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT); #else write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT); #endif ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); /* Initialize CCR4. */ ccr4 = read_cyrix_reg(CCR4); ccr4 |= CCR4_DTE; ccr4 &= ~CCR4_IOMASK; #ifdef CPU_IORT write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK)); #else write_cyrix_reg(CCR4, ccr4 | 7); #endif /* Initialize CCR5. */ #ifdef CPU_WT_ALLOC write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC); #endif /* Restore CCR3. */ write_cyrix_reg(CCR3, ccr3); /* Unlock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); /* * Earlier revision of the 6x86 CPU could crash the system if * L1 cache is in write-back mode. */ if ((cyrix_did & 0xff00) > 0x1600) load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ else { /* Revision 2.6 and lower. */ #ifdef CYRIX_CACHE_REALLY_WORKS load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ #else load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0 and NW = 1 */ #endif } /* Lock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); intr_restore(saveintr); } #endif /* I486_CPU */ #ifdef I586_CPU /* * Rise mP6 */ static void init_rise(void) { /* * The CMPXCHG8B instruction is always available but hidden. */ cpu_feature |= CPUID_CX8; } /* * IDT WinChip C6/2/2A/2B/3 * * http://www.centtech.com/winchip_bios_writers_guide_v4_0.pdf */ static void init_winchip(void) { u_int regs[4]; uint64_t fcr; fcr = rdmsr(0x0107); /* * Set ECX8, DSMC, DTLOCK/EDCTLB, EMMX, and ERETSTK and clear DPDC. */ fcr |= (1 << 1) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 16); fcr &= ~(1ULL << 11); /* * Additionally, set EBRPRED, E2MMX and EAMD3D for WinChip 2 and 3. */ if (CPUID_TO_MODEL(cpu_id) >= 8) fcr |= (1 << 12) | (1 << 19) | (1 << 20); wrmsr(0x0107, fcr); do_cpuid(1, regs); cpu_feature = regs[3]; } #endif #ifdef I686_CPU /* * Cyrix 6x86MX (code-named M2) * * XXX - What should I do here? Please let me know. */ static void init_6x86MX(void) { register_t saveintr; u_char ccr3, ccr4; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); /* Initialize CCR0. */ write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1); /* Initialize CCR1. */ #ifdef CPU_CYRIX_NO_LOCK write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK); #else write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK); #endif /* Initialize CCR2. */ #ifdef CPU_SUSP_HLT write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT); #else write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT); #endif ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); /* Initialize CCR4. */ ccr4 = read_cyrix_reg(CCR4); ccr4 &= ~CCR4_IOMASK; #ifdef CPU_IORT write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK)); #else write_cyrix_reg(CCR4, ccr4 | 7); #endif /* Initialize CCR5. */ #ifdef CPU_WT_ALLOC write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC); #endif /* Restore CCR3. */ write_cyrix_reg(CCR3, ccr3); /* Unlock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ /* Lock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); intr_restore(saveintr); } static int ppro_apic_used = -1; static void init_ppro(void) { u_int64_t apicbase; /* * Local APIC should be disabled if it is not going to be used. */ if (ppro_apic_used != 1) { apicbase = rdmsr(MSR_APICBASE); apicbase &= ~APICBASE_ENABLED; wrmsr(MSR_APICBASE, apicbase); ppro_apic_used = 0; } } /* * If the local APIC is going to be used after being disabled above, * re-enable it and don't disable it in the future. */ void ppro_reenable_apic(void) { u_int64_t apicbase; if (ppro_apic_used == 0) { apicbase = rdmsr(MSR_APICBASE); apicbase |= APICBASE_ENABLED; wrmsr(MSR_APICBASE, apicbase); ppro_apic_used = 1; } } /* * Initialize BBL_CR_CTL3 (Control register 3: used to configure the * L2 cache). */ static void init_mendocino(void) { #ifdef CPU_PPRO2CELERON register_t saveintr; u_int64_t bbl_cr_ctl3; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); bbl_cr_ctl3 = rdmsr(MSR_BBL_CR_CTL3); /* If the L2 cache is configured, do nothing. */ if (!(bbl_cr_ctl3 & 1)) { bbl_cr_ctl3 = 0x134052bLL; /* Set L2 Cache Latency (Default: 5). */ #ifdef CPU_CELERON_L2_LATENCY #if CPU_L2_LATENCY > 15 #error invalid CPU_L2_LATENCY. #endif bbl_cr_ctl3 |= CPU_L2_LATENCY << 1; #else bbl_cr_ctl3 |= 5 << 1; #endif wrmsr(MSR_BBL_CR_CTL3, bbl_cr_ctl3); } load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); intr_restore(saveintr); #endif /* CPU_PPRO2CELERON */ } /* * Initialize special VIA features */ static void init_via(void) { u_int regs[4], val; uint64_t fcr; /* * Explicitly enable CX8 and PGE on C3. * * http://www.via.com.tw/download/mainboards/6/13/VIA_C3_EBGA%20datasheet110.pdf */ if (CPUID_TO_MODEL(cpu_id) <= 9) fcr = (1 << 1) | (1 << 7); else fcr = 0; /* * Check extended CPUID for PadLock features. * * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf */ do_cpuid(0xc0000000, regs); if (regs[0] >= 0xc0000001) { do_cpuid(0xc0000001, regs); val = regs[3]; } else val = 0; /* Enable RNG if present. */ if ((val & VIA_CPUID_HAS_RNG) != 0) { via_feature_rng = VIA_HAS_RNG; wrmsr(0x110B, rdmsr(0x110B) | VIA_CPUID_DO_RNG); } /* Enable PadLock if present. */ if ((val & VIA_CPUID_HAS_ACE) != 0) via_feature_xcrypt |= VIA_HAS_AES; if ((val & VIA_CPUID_HAS_ACE2) != 0) via_feature_xcrypt |= VIA_HAS_AESCTR; if ((val & VIA_CPUID_HAS_PHE) != 0) via_feature_xcrypt |= VIA_HAS_SHA; if ((val & VIA_CPUID_HAS_PMM) != 0) via_feature_xcrypt |= VIA_HAS_MM; if (via_feature_xcrypt != 0) fcr |= 1 << 28; wrmsr(0x1107, rdmsr(0x1107) | fcr); } #endif /* I686_CPU */ #if defined(I586_CPU) || defined(I686_CPU) static void init_transmeta(void) { u_int regs[0]; /* Expose all hidden features. */ wrmsr(0x80860004, rdmsr(0x80860004) | ~0UL); do_cpuid(1, regs); cpu_feature = regs[3]; } #endif extern int elf32_nxstack; void initializecpu(void) { + uint64_t msr; switch (cpu) { #ifdef I486_CPU case CPU_BLUE: init_bluelightning(); break; case CPU_486DLC: init_486dlc(); break; case CPU_CY486DX: init_cy486dx(); break; case CPU_M1SC: init_5x86(); break; #ifdef CPU_I486_ON_386 case CPU_486: init_i486_on_386(); break; #endif case CPU_M1: init_6x86(); break; #endif /* I486_CPU */ #ifdef I586_CPU case CPU_586: switch (cpu_vendor_id) { case CPU_VENDOR_AMD: #ifdef CPU_WT_ALLOC if (((cpu_id & 0x0f0) > 0) && ((cpu_id & 0x0f0) < 0x60) && ((cpu_id & 0x00f) > 3)) enable_K5_wt_alloc(); else if (((cpu_id & 0x0f0) > 0x80) || (((cpu_id & 0x0f0) == 0x80) && (cpu_id & 0x00f) > 0x07)) enable_K6_2_wt_alloc(); else if ((cpu_id & 0x0f0) > 0x50) enable_K6_wt_alloc(); #endif if ((cpu_id & 0xf0) == 0xa0) /* * Make sure the TSC runs through * suspension, otherwise we can't use * it as timecounter */ wrmsr(0x1900, rdmsr(0x1900) | 0x20ULL); break; case CPU_VENDOR_CENTAUR: init_winchip(); break; case CPU_VENDOR_TRANSMETA: init_transmeta(); break; case CPU_VENDOR_RISE: init_rise(); break; } break; #endif #ifdef I686_CPU case CPU_M2: init_6x86MX(); break; case CPU_686: switch (cpu_vendor_id) { case CPU_VENDOR_INTEL: switch (cpu_id & 0xff0) { case 0x610: init_ppro(); break; case 0x660: init_mendocino(); break; } break; #ifdef CPU_ATHLON_SSE_HACK case CPU_VENDOR_AMD: /* * Sometimes the BIOS doesn't enable SSE instructions. * According to AMD document 20734, the mobile * Duron, the (mobile) Athlon 4 and the Athlon MP * support SSE. These correspond to cpu_id 0x66X * or 0x67X. */ if ((cpu_feature & CPUID_XMM) == 0 && ((cpu_id & ~0xf) == 0x660 || (cpu_id & ~0xf) == 0x670 || (cpu_id & ~0xf) == 0x680)) { u_int regs[4]; wrmsr(MSR_HWCR, rdmsr(MSR_HWCR) & ~0x08000); do_cpuid(1, regs); cpu_feature = regs[3]; } break; #endif case CPU_VENDOR_CENTAUR: init_via(); break; case CPU_VENDOR_TRANSMETA: init_transmeta(); break; } break; #endif default: break; } if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { load_cr4(rcr4() | CR4_FXSR | CR4_XMM); cpu_fxsr = hw_instruction_sse = 1; } -#if defined(PAE) || defined(PAE_TABLES) - if ((amd_feature & AMDID_NX) != 0) { - uint64_t msr; - + if (elf32_nxstack) { msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); - pg_nx = PG_NX; - elf32_nxstack = 1; } -#endif } void initializecpucache(void) { /* * CPUID with %eax = 1, %ebx returns * Bits 15-8: CLFLUSH line size * (Value * 8 = cache line size in bytes) */ if ((cpu_feature & CPUID_CLFSH) != 0) cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8; /* * XXXKIB: (temporary) hack to work around traps generated * when CLFLUSHing APIC register window under virtualization * environments. These environments tend to disable the * CPUID_SS feature even though the native CPU supports it. */ TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable); if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) { cpu_feature &= ~CPUID_CLFSH; cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; } /* * The kernel's use of CLFLUSH{,OPT} can be disabled manually * by setting the hw.clflush_disable tunable. */ if (hw_clflush_disable == 1) { cpu_feature &= ~CPUID_CLFSH; cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; } } #if defined(I586_CPU) && defined(CPU_WT_ALLOC) /* * Enable write allocate feature of AMD processors. * Following two functions require the Maxmem variable being set. */ static void enable_K5_wt_alloc(void) { u_int64_t msr; register_t saveintr; /* * Write allocate is supported only on models 1, 2, and 3, with * a stepping of 4 or greater. */ if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) { saveintr = intr_disable(); msr = rdmsr(0x83); /* HWCR */ wrmsr(0x83, msr & !(0x10)); /* * We have to tell the chip where the top of memory is, * since video cards could have frame bufferes there, * memory-mapped I/O could be there, etc. */ if(Maxmem > 0) msr = Maxmem / 16; else msr = 0; msr |= AMD_WT_ALLOC_TME | AMD_WT_ALLOC_FRE; /* * There is no way to know wheter 15-16M hole exists or not. * Therefore, we disable write allocate for this range. */ wrmsr(0x86, 0x0ff00f0); msr |= AMD_WT_ALLOC_PRE; wrmsr(0x85, msr); msr=rdmsr(0x83); wrmsr(0x83, msr|0x10); /* enable write allocate */ intr_restore(saveintr); } } static void enable_K6_wt_alloc(void) { quad_t size; u_int64_t whcr; register_t saveintr; saveintr = intr_disable(); wbinvd(); #ifdef CPU_DISABLE_CACHE /* * Certain K6-2 box becomes unstable when write allocation is * enabled. */ /* * The AMD-K6 processer provides the 64-bit Test Register 12(TR12), * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported. * All other bits in TR12 have no effect on the processer's operation. * The I/O Trap Restart function (bit 9 of TR12) is always enabled * on the AMD-K6. */ wrmsr(0x0000000e, (u_int64_t)0x0008); #endif /* Don't assume that memory size is aligned with 4M. */ if (Maxmem > 0) size = ((Maxmem >> 8) + 3) >> 2; else size = 0; /* Limit is 508M bytes. */ if (size > 0x7f) size = 0x7f; whcr = (rdmsr(0xc0000082) & ~(0x7fLL << 1)) | (size << 1); #if defined(NO_MEMORY_HOLE) if (whcr & (0x7fLL << 1)) whcr |= 0x0001LL; #else /* * There is no way to know wheter 15-16M hole exists or not. * Therefore, we disable write allocate for this range. */ whcr &= ~0x0001LL; #endif wrmsr(0x0c0000082, whcr); intr_restore(saveintr); } static void enable_K6_2_wt_alloc(void) { quad_t size; u_int64_t whcr; register_t saveintr; saveintr = intr_disable(); wbinvd(); #ifdef CPU_DISABLE_CACHE /* * Certain K6-2 box becomes unstable when write allocation is * enabled. */ /* * The AMD-K6 processer provides the 64-bit Test Register 12(TR12), * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported. * All other bits in TR12 have no effect on the processer's operation. * The I/O Trap Restart function (bit 9 of TR12) is always enabled * on the AMD-K6. */ wrmsr(0x0000000e, (u_int64_t)0x0008); #endif /* Don't assume that memory size is aligned with 4M. */ if (Maxmem > 0) size = ((Maxmem >> 8) + 3) >> 2; else size = 0; /* Limit is 4092M bytes. */ if (size > 0x3fff) size = 0x3ff; whcr = (rdmsr(0xc0000082) & ~(0x3ffLL << 22)) | (size << 22); #if defined(NO_MEMORY_HOLE) if (whcr & (0x3ffLL << 22)) whcr |= 1LL << 16; #else /* * There is no way to know wheter 15-16M hole exists or not. * Therefore, we disable write allocate for this range. */ whcr &= ~(1LL << 16); #endif wrmsr(0x0c0000082, whcr); intr_restore(saveintr); } #endif /* I585_CPU && CPU_WT_ALLOC */ #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(cyrixreg, cyrixreg) { register_t saveintr; u_int cr0; u_char ccr1, ccr2, ccr3; u_char ccr0 = 0, ccr4 = 0, ccr5 = 0, pcr0 = 0; cr0 = rcr0(); if (cpu_vendor_id == CPU_VENDOR_CYRIX) { saveintr = intr_disable(); if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) { ccr0 = read_cyrix_reg(CCR0); } ccr1 = read_cyrix_reg(CCR1); ccr2 = read_cyrix_reg(CCR2); ccr3 = read_cyrix_reg(CCR3); if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) { write_cyrix_reg(CCR3, CCR3_MAPEN0); ccr4 = read_cyrix_reg(CCR4); if ((cpu == CPU_M1) || (cpu == CPU_M2)) ccr5 = read_cyrix_reg(CCR5); else pcr0 = read_cyrix_reg(PCR0); write_cyrix_reg(CCR3, ccr3); /* Restore CCR3. */ } intr_restore(saveintr); if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) printf("CCR0=%x, ", (u_int)ccr0); printf("CCR1=%x, CCR2=%x, CCR3=%x", (u_int)ccr1, (u_int)ccr2, (u_int)ccr3); if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) { printf(", CCR4=%x, ", (u_int)ccr4); if (cpu == CPU_M1SC) printf("PCR0=%x\n", pcr0); else printf("CCR5=%x\n", ccr5); } } printf("CR0=%x\n", cr0); } #endif /* DDB */ Index: head/sys/i386/i386/locore.s =================================================================== --- head/sys/i386/i386/locore.s (revision 343566) +++ head/sys/i386/i386/locore.s (revision 343567) @@ -1,465 +1,456 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 * $FreeBSD$ * * originally from: locore.s, by William F. Jolitz * * Substantially rewritten by David Greenman, Rod Grimes, * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp * and many others. */ #include "opt_bootp.h" #include "opt_nfsroot.h" #include "opt_pmap.h" #include #include #include #include #include #include #include "assym.inc" /* - * PTmap is recursive pagemap at top of virtual address space. - * Within PTmap, the page directory can be found (third indirection). - */ - .globl PTmap,PTD,PTDpde - .set PTmap,(PTDPTDI << PDRSHIFT) - .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) - .set PTDpde,PTD + (PTDPTDI * PDESIZE) - -/* * Compiled KERNBASE location and the kernel load address, now identical. */ .globl kernbase .set kernbase,KERNBASE .globl kernload .set kernload,KERNLOAD /* * Globals */ .data ALIGN_DATA /* just to be sure */ .space 0x2000 /* space for tmpstk - temporary stack */ tmpstk: .globl bootinfo bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ .text /********************************************************************** * * This is where the bootblocks start us, set the ball rolling... * */ NON_GPROF_ENTRY(btext) /* Tell the bios to warmboot next time */ movw $0x1234,0x472 /* Set up a real frame in case the double return in newboot is executed. */ xorl %ebp,%ebp pushl %ebp movl %esp, %ebp /* Don't trust what the BIOS gives for eflags. */ pushl $PSL_KERNEL popfl /* * Don't trust what the BIOS gives for %fs and %gs. Trust the bootstrap * to set %cs, %ds, %es and %ss. */ mov %ds, %ax mov %ax, %fs mov %ax, %gs /* * Clear the bss. Not all boot programs do it, and it is our job anyway. * * XXX we don't check that there is memory for our bss and page tables * before using it. * * Note: we must be careful to not overwrite an active gdt or idt. They * inactive from now until we switch to new ones, since we don't load any * more segment registers or permit interrupts until after the switch. */ movl $__bss_end,%ecx movl $__bss_start,%edi subl %edi,%ecx xorl %eax,%eax cld rep stosb call recover_bootinfo /* Get onto a stack that we can trust. */ /* * XXX this step is delayed in case recover_bootinfo needs to return via * the old stack, but it need not be, since recover_bootinfo actually * returns via the old frame. */ movl $tmpstk,%esp call identify_cpu call pmap_cold /* set up bootstrap stack */ movl proc0kstack,%eax /* location of in-kernel stack */ /* * Only use bottom page for init386(). init386() calculates the * PCB + FPU save area size and returns the true top of stack. */ leal PAGE_SIZE(%eax),%esp xorl %ebp,%ebp /* mark end of frames */ pushl physfree /* value of first for init386(first) */ call init386 /* wire 386 chip for unix operation */ /* * Clean up the stack in a way that db_numargs() understands, so * that backtraces in ddb don't underrun the stack. Traps for * inaccessible memory are more fatal than usual this early. */ addl $4,%esp /* Switch to true top of stack. */ movl %eax,%esp call mi_startup /* autoconfiguration, mountroot etc */ /* NOTREACHED */ addl $0,%esp /* for db_numargs() again */ /********************************************************************** * * Recover the bootinfo passed to us from the boot program * */ recover_bootinfo: /* * This code is called in different ways depending on what loaded * and started the kernel. This is used to detect how we get the * arguments from the other code and what we do with them. * * Old disk boot blocks: * (*btext)(howto, bootdev, cyloffset, esym); * [return address == 0, and can NOT be returned to] * [cyloffset was not supported by the FreeBSD boot code * and always passed in as 0] * [esym is also known as total in the boot code, and * was never properly supported by the FreeBSD boot code] * * Old diskless netboot code: * (*btext)(0,0,0,0,&nfsdiskless,0,0,0); * [return address != 0, and can NOT be returned to] * If we are being booted by this code it will NOT work, * so we are just going to halt if we find this case. * * New uniform boot code: * (*btext)(howto, bootdev, 0, 0, 0, &bootinfo) * [return address != 0, and can be returned to] * * There may seem to be a lot of wasted arguments in here, but * that is so the newer boot code can still load very old kernels * and old boot code can load new kernels. */ /* * The old style disk boot blocks fake a frame on the stack and * did an lret to get here. The frame on the stack has a return * address of 0. */ cmpl $0,4(%ebp) je olddiskboot /* * We have some form of return address, so this is either the * old diskless netboot code, or the new uniform code. That can * be detected by looking at the 5th argument, if it is 0 * we are being booted by the new uniform boot code. */ cmpl $0,24(%ebp) je newboot /* * Seems we have been loaded by the old diskless boot code, we * don't stand a chance of running as the diskless structure * changed considerably between the two, so just halt. */ hlt /* * We have been loaded by the new uniform boot code. * Let's check the bootinfo version, and if we do not understand * it we return to the loader with a status of 1 to indicate this error */ newboot: movl 28(%ebp),%ebx /* &bootinfo.version */ movl BI_VERSION(%ebx),%eax cmpl $1,%eax /* We only understand version 1 */ je 1f movl $1,%eax /* Return status */ leave /* * XXX this returns to our caller's caller (as is required) since * we didn't set up a frame and our caller did. */ ret 1: /* * If we have a kernelname copy it in */ movl BI_KERNELNAME(%ebx),%esi cmpl $0,%esi je 2f /* No kernelname */ movl $MAXPATHLEN,%ecx /* Brute force!!! */ movl $kernelname,%edi cmpb $'/',(%esi) /* Make sure it starts with a slash */ je 1f movb $'/',(%edi) incl %edi decl %ecx 1: cld rep movsb 2: /* * Determine the size of the boot loader's copy of the bootinfo * struct. This is impossible to do properly because old versions * of the struct don't contain a size field and there are 2 old * versions with the same version number. */ movl $BI_ENDCOMMON,%ecx /* prepare for sizeless version */ testl $RB_BOOTINFO,8(%ebp) /* bi_size (and bootinfo) valid? */ je got_bi_size /* no, sizeless version */ movl BI_SIZE(%ebx),%ecx got_bi_size: /* * Copy the common part of the bootinfo struct */ movl %ebx,%esi movl $bootinfo,%edi cmpl $BOOTINFO_SIZE,%ecx jbe got_common_bi_size movl $BOOTINFO_SIZE,%ecx got_common_bi_size: cld rep movsb #ifdef NFS_ROOT #ifndef BOOTP_NFSV3 /* * If we have a nfs_diskless structure copy it in */ movl BI_NFS_DISKLESS(%ebx),%esi cmpl $0,%esi je olddiskboot movl $nfs_diskless,%edi movl $NFSDISKLESS_SIZE,%ecx cld rep movsb movl $nfs_diskless_valid,%edi movl $1,(%edi) #endif #endif /* * The old style disk boot. * (*btext)(howto, bootdev, cyloffset, esym); * Note that the newer boot code just falls into here to pick * up howto and bootdev, cyloffset and esym are no longer used */ olddiskboot: movl 8(%ebp),%eax movl %eax,boothowto movl 12(%ebp),%eax movl %eax,bootdev ret /********************************************************************** * * Identify the CPU and initialize anything special about it * */ ENTRY(identify_cpu) pushl %ebx /* Try to toggle alignment check flag; does not exist on 386. */ pushfl popl %eax movl %eax,%ecx orl $PSL_AC,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax andl $PSL_AC,%eax pushl %ecx popfl testl %eax,%eax jnz try486 /* NexGen CPU does not have aligment check flag. */ pushfl movl $0x5555, %eax xorl %edx, %edx movl $2, %ecx clc divl %ecx jz trynexgen popfl movl $CPU_386,cpu jmp 3f trynexgen: popfl movl $CPU_NX586,cpu movl $0x4778654e,cpu_vendor # store vendor string movl $0x72446e65,cpu_vendor+4 movl $0x6e657669,cpu_vendor+8 movl $0,cpu_vendor+12 jmp 3f try486: /* Try to toggle identification flag; does not exist on early 486s. */ pushfl popl %eax movl %eax,%ecx xorl $PSL_ID,%eax pushl %eax popfl pushfl popl %eax xorl %ecx,%eax andl $PSL_ID,%eax pushl %ecx popfl testl %eax,%eax jnz trycpuid movl $CPU_486,cpu /* * Check Cyrix CPU * Cyrix CPUs do not change the undefined flags following * execution of the divide instruction which divides 5 by 2. * * Note: CPUID is enabled on M2, so it passes another way. */ pushfl movl $0x5555, %eax xorl %edx, %edx movl $2, %ecx clc divl %ecx jnc trycyrix popfl jmp 3f /* You may use Intel CPU. */ trycyrix: popfl /* * IBM Bluelighting CPU also doesn't change the undefined flags. * Because IBM doesn't disclose the information for Bluelighting * CPU, we couldn't distinguish it from Cyrix's (including IBM * brand of Cyrix CPUs). */ movl $0x69727943,cpu_vendor # store vendor string movl $0x736e4978,cpu_vendor+4 movl $0x64616574,cpu_vendor+8 jmp 3f trycpuid: /* Use the `cpuid' instruction. */ xorl %eax,%eax cpuid # cpuid 0 movl %eax,cpu_high # highest capability movl %ebx,cpu_vendor # store vendor string movl %edx,cpu_vendor+4 movl %ecx,cpu_vendor+8 movb $0,cpu_vendor+12 movl $1,%eax cpuid # cpuid 1 movl %eax,cpu_id # store cpu_id movl %ebx,cpu_procinfo # store cpu_procinfo movl %edx,cpu_feature # store cpu_feature movl %ecx,cpu_feature2 # store cpu_feature2 rorl $8,%eax # extract family type andl $15,%eax cmpl $5,%eax jae 1f /* less than Pentium; must be 486 */ movl $CPU_486,cpu jmp 3f 1: /* a Pentium? */ cmpl $5,%eax jne 2f movl $CPU_586,cpu jmp 3f 2: /* Greater than Pentium...call it a Pentium Pro */ movl $CPU_686,cpu 3: popl %ebx ret END(identify_cpu) #ifdef XENHVM /* Xen Hypercall page */ .text .p2align PAGE_SHIFT, 0x90 /* Hypercall_page needs to be PAGE aligned */ NON_GPROF_ENTRY(hypercall_page) .skip 0x1000, 0x90 /* Fill with "nop"s */ #endif Index: head/sys/i386/i386/machdep.c =================================================================== --- head/sys/i386/i386/machdep.c (revision 343566) +++ head/sys/i386/i386/machdep.c (revision 343567) @@ -1,3270 +1,3266 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2018 The FreeBSD Foundation * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_atpic.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" #include "opt_perfmon.h" #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #endif #ifdef FDT #include #endif #ifdef DEV_APIC #include #endif #ifdef DEV_ISA #include #endif /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); register_t init386(int first); void dblfault_handler(void); void identify_cpu(void); static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len); static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel; u_int basemem; +static int above4g_allow = 1; +static int above24g_allow = 0; int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif long Maxmem = 0; long realmem = 0; #ifdef PAE FEATURE(pae, "Physical Address Extensions"); #endif /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2) #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; extern char start_exceptions[], end_exceptions[]; extern struct sysentvec elf32_freebsd_sysvec; /* Default init_ops implementation. */ struct init_ops init_ops = { .early_clock_source_init = i8254_init, .early_delay = i8254_delay, #ifdef DEV_APIC .msi_init = msi_init, #endif }; static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = kern_getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook1,1", 10) == 0 || strncmp(sysenv, "MacBook3,1", 10) == 0 || strncmp(sysenv, "MacBook4,1", 10) == 0 || strncmp(sysenv, "MacBookPro1,1", 13) == 0 || strncmp(sysenv, "MacBookPro1,2", 13) == 0 || strncmp(sysenv, "MacBookPro3,1", 13) == 0 || strncmp(sysenv, "MacBookPro4,1", 13) == 0 || strncmp(sysenv, "Macmini1,1", 10) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif /* * Display physical memory if SMBIOS reports reasonable amount. */ memsize = 0; sysenv = kern_getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } if (memsize < ptoa((uintmax_t)vm_free_count())) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); realmem = atop(memsize); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_free_count()), ptoa((uintmax_t)vm_free_count()) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; sf.sf_addr = 0; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; if (p->p_sysent->sv_sigcode_base != 0) { regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szosigcode; } else { /* a.out sysentvec does not use shared page */ regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode; } regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); bzero(sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); bzero(sf.sf_uc.uc_mcontext.__spare__, sizeof(sf.sf_uc.uc_mcontext.__spare__)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = ksi->ksi_addr; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szfreebsd4_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; struct segment_descriptor *sdp; char *xfpusave; size_t xfpusave_len; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, ksi, mask); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu); xfpusave = __builtin_alloca(xfpusave_len); } else { xfpusave_len = 0; xfpusave = NULL; } /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); /* * Unconditionally fill the fsbase and gsbase into the mcontext. */ sdp = &td->td_pcb->pcb_fsd; sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - 128; if (xfpusave != NULL) { sp -= xfpusave_len; sp = (char *)((unsigned int)sp & ~0x3F); sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; } sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || (xfpusave != NULL && copyout(xfpusave, (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) != 0)) { PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base; if (regs->tf_eip == 0) regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL, SIGPROCMASK_OLD); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct trapframe *regs; struct ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p; struct trapframe *regs; ucontext_t *ucp; char *xfpustate; size_t xfpustate_len; int cs, eflags, error, ret; ksiginfo_t ksi; p = td->td_proc; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, td->td_name, ucp->uc_mcontext.mc_flags); return (EINVAL); } regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { uprintf("pid %d (%s): sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; if (xfpustate_len > cpu_max_ext_state_size - sizeof(union savefpu)) { uprintf( "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", p->p_pid, td->td_name, xfpustate_len); return (EINVAL); } xfpustate = __builtin_alloca(xfpustate_len); error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, xfpustate, xfpustate_len); if (error != 0) { uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", p->p_pid, td->td_name); return (error); } } else { xfpustate = NULL; xfpustate_len = 0; } ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } #ifdef COMPAT_43 static void setup_priv_lcall_gate(struct proc *p) { struct i386_ldt_args uap; union descriptor desc; u_int lcall_addr; bzero(&uap, sizeof(uap)); uap.start = 0; uap.num = 1; lcall_addr = p->p_sysent->sv_psstrings - sz_lcall_tramp; bzero(&desc, sizeof(desc)); desc.sd.sd_type = SDT_MEMERA; desc.sd.sd_dpl = SEL_UPL; desc.sd.sd_p = 1; desc.sd.sd_def32 = 1; desc.sd.sd_gran = 1; desc.sd.sd_lolimit = 0xffff; desc.sd.sd_hilimit = 0xf; desc.sd.sd_lobase = lcall_addr; desc.sd.sd_hibase = lcall_addr >> 24; i386_set_ldt(curthread, &uap, &desc); } #endif /* * Reset registers to default values on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs; struct pcb *pcb; register_t saved_eflags; regs = td->td_frame; pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock_spin(&dt_lock); #ifdef COMPAT_43 if (td->td_proc->p_sysent->sv_psstrings != elf32_freebsd_sysvec.sv_psstrings) setup_priv_lcall_gate(td->td_proc); #endif /* * Reset the fs and gs bases. The values from the old address * space do not make sense for the new program. In particular, * gsbase might be the TLS base for the old program but the new * program has no TLS now. */ set_fsbase(td, 0); set_gsbase(td, 0); /* Make sure edx is 0x0 on entry. Linux binaries depend on it. */ saved_eflags = regs->tf_eflags & PSL_T; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = imgp->entry_addr; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | saved_eflags; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = imgp->ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } pcb->pcb_initial_npxcw = __INITIAL_NPXCW__; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: * * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS * bit to control the trap, because setting the CR0_EM bit does * not cause WAIT instructions to trap. It's important to trap * WAIT instructions - otherwise the "wait" variants of no-wait * control instructions would degenerate to the "no-wait" variants * after FP context switches but work correctly otherwise. It's * particularly important to trap WAITs when there is no NPX - * otherwise the "wait" variants would always degenerate. * * Try setting CR0_NE to get correct error reporting on 486DX's. * Setting it should fail or do nothing on lesser processors. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); static char bootmethod[16] = "BIOS"; SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, "System firmware boot method"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; struct mtx dt_lock; /* lock for GDT and LDT */ union descriptor gdt0[NGDT]; /* initial global descriptor table */ union descriptor *gdt = gdt0; /* global descriptor table */ union descriptor *ldt; /* local descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static struct i386tss *dblfault_tss; static char *dblfault_stack; static struct i386tss common_tss0; vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = SEL_KPL, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUFS_SEL 2 %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS_SEL 3 %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 6 Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { .ssd_base = 0x400, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GLDT_SEL 10 LDT Descriptor */ { .ssd_base = 0, .ssd_limit = sizeof(union descriptor) * NLDT - 1, .ssd_type = SDT_SYSLDT, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { .ssd_base = 0, .ssd_limit = (512 * sizeof(union descriptor)-1), .ssd_type = SDT_SYSLDT, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { .ssd_base = 0, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GNDIS_SEL 18 NDIS Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, }; uintptr_t setidt_disp; void setidt(int idx, inthand_t *func, int typ, int dpl, int selec) { uintptr_t off; off = func != NULL ? (uintptr_t)func + setidt_disp : 0; setidt_nodisp(idx, off, typ, dpl, selec); } void setidt_nodisp(int idx, uintptr_t off, int typ, int dpl, int selec) { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = off; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((u_int)off) >> 16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), #endif #ifdef XENHVM IDTVEC(xen_intr_upcall), #endif IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func, func_trm; bool trm; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { if (ip->gd_type == SDT_SYSTASKGT) { db_printf("%3d\t\n", idx); } else { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func >= PMAP_TRM_MIN_ADDRESS) { func_trm = func; func -= setidt_disp; trm = true; } else trm = false; if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); if (trm) db_printf(" (trampoline %#x)", func_trm); db_printf("\n"); } } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { uint64_t idtr, gdtr; idtr = ridt(); db_printf("idtr\t0x%08x/%04x\n", (u_int)(idtr >> 16), (u_int)idtr & 0xffff); gdtr = rgdt(); db_printf("gdtr\t0x%08x/%04x\n", (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); db_printf("ldtr\t0x%04x\n", rldt()); db_printf("tr\t0x%04x\n", rtr()); db_printf("cr0\t0x%08x\n", rcr0()); db_printf("cr2\t0x%08x\n", rcr2()); db_printf("cr3\t0x%08x\n", rcr3()); db_printf("cr4\t0x%08x\n", rcr4()); if (rcr4() & CR4_XSAVE) db_printf("xcr0\t0x%016llx\n", rxcr(0)); if (amd_feature & (AMDID_NX | AMDID_LM)) db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER)); if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) db_printf("FEATURES_CTL\t0x%016llx\n", rdmsr(MSR_IA32_FEATURE_CONTROL)); if ((cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6) db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR)); if (cpu_feature & CPUID_PAT) db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT)); } DB_SHOW_COMMAND(dbregs, db_show_dbregs) { db_printf("dr0\t0x%08x\n", rdr0()); db_printf("dr1\t0x%08x\n", rdr1()); db_printf("dr2\t0x%08x\n", rdr2()); db_printf("dr3\t0x%08x\n", rdr3()); db_printf("dr6\t0x%08x\n", rdr6()); db_printf("dr7\t0x%08x\n", rdr7()); } DB_SHOW_COMMAND(frame, db_show_frame) { struct trapframe *frame; frame = have_addr ? (struct trapframe *)addr : curthread->td_frame; printf("ss %#x esp %#x efl %#x cs %#x eip %#x\n", frame->tf_ss, frame->tf_esp, frame->tf_eflags, frame->tf_cs, frame->tf_eip); printf("err %#x trapno %d\n", frame->tf_err, frame->tf_trapno); printf("ds %#x es %#x fs %#x\n", frame->tf_ds, frame->tf_es, frame->tf_fs); printf("eax %#x ecx %#x edx %#x ebx %#x\n", frame->tf_eax, frame->tf_ecx, frame->tf_edx, frame->tf_ebx); printf("ebp %#x esi %#x edi %#x\n", frame->tf_ebp, frame->tf_esi, frame->tf_edi); } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, int *physmap_idxp) { + uint64_t lim, ign; int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; if (length == 0) return (1); -#ifndef PAE - if (base > 0xffffffff) { - printf("%uK of memory above 4GB ignored\n", - (u_int)(length / 1024)); + lim = 0x100000000; /* 4G */ + if (pae_mode && above4g_allow) + lim = above24g_allow ? -1ULL : 0x600000000; /* 24G */ + if (base >= lim) { + printf("%uK of memory above %uGB ignored, pae %d " + "above4g_allow %d above24g_allow %d\n", + (u_int)(length / 1024), (u_int)(lim >> 30), pae_mode, + above4g_allow, above24g_allow); return (1); } -#endif + if (base + length >= lim) { + ign = base + length - lim; + length -= ign; + printf("%uK of memory above %uGB ignored, pae %d " + "above4g_allow %d above24g_allow %d\n", + (u_int)(ign / 1024), (u_int)(lim >> 30), pae_mode, + above4g_allow, above24g_allow); + } /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = physmap_idx + 2; for (i = 0; i <= physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } static int add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) return (1); return (add_physmap_entry(smap->base, smap->length, physmap, physmap_idxp)); } static void add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap, int *physmap_idxp) { struct bios_smap *smap, *smapend; u_int32_t smapsize; /* * Memory map from INT 15:E820. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes SMAP. */ smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) if (!add_smap_entry(smap, physmap, physmap_idxp)) break; } static void basemem_setup(void) { - pt_entry_t *pte; - int i; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } - /* - * Map pages between basemem and ISA_HOLE_START, if any, r/w into - * the vm86 page table so that vm86 can scribble on them using - * the vm86 map too. XXX: why 2 ways for this and only 1 way for - * page 0, at least as initialized here? - */ - pte = (pt_entry_t *)vm86paddr; - for (i = basemem / 4; i < 160; i++) - pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; + pmap_basemem_setup(basemem); } /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int has_smap, off, physmap_idx, pa_indx, da_indx; u_long memtest; vm_paddr_t physmap[PHYSMAP_SIZE]; - pt_entry_t *pte; quad_t dcons_addr, dcons_size, physmem_tunable; int hasbrokenint12, i, res; u_int extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa; struct bios_smap *smap, *smapbase; caddr_t kmdp; has_smap = 0; bzero(&vmf, sizeof(vmf)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Tell the physical memory allocator about pages used to store * the kernel and preloaded data. See kmem_bootstrap_free(). */ vm_phys_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first)); + TUNABLE_INT_FETCH("hw.above4g_allow", &above4g_allow); + TUNABLE_INT_FETCH("hw.above24g_allow", &above24g_allow); + /* * Check if the loader supplied an SMAP memory map. If so, * use that and do not make any VM86 calls. */ physmap_idx = 0; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase != NULL) { add_smap_entries(smapbase, physmap, &physmap_idx); has_smap = 1; goto have_smap; } /* * Some newer BIOSes have a broken INT 12H implementation * which causes a kernel panic immediately. In this case, we * need use the SMAP to determine the base memory size. */ hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); if (hasbrokenint12 == 0) { /* Use INT12 to determine base memory size. */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; basemem_setup(); } /* * Fetch the memory map with INT 15:E820. Map page 1 R/W into * the kernel page table so we can use it as a buffer. The * kernel will unmap this page later. */ vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, PMAP_MAP_LOW + ptoa(1)); res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); KASSERT(res != 0, ("vm86_getptr() failed: address not found")); vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; has_smap = 1; if (!add_smap_entry(smap, physmap, &physmap_idx)) break; } while (vmf.vmf_ebx != 0); have_smap: /* * If we didn't fetch the "base memory" size from INT12, * figure it out from the SMAP (or just guess). */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } /* XXX: If we couldn't find basemem from SMAP, just guess. */ if (basemem == 0) basemem = 640; basemem_setup(); } if (physmap[1] != 0) goto physmap_done; /* * If we failed to find an SMAP, figure out the extended * memory size. We will then build a simple memory map with * two segments, one for "base memory" and the second for * "extended memory". Note that "extended memory" starts at a * physical address of 1MB and that both basemem and extmem * are in units of 1KB. * * First, try to fetch the extended memory size via INT 15:E801. */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { /* * If INT15:E801 fails, this is our last ditch effort * to determine the extended memory size. Currently * we prefer the RTC value over INT15:88. */ #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ alloc_ap_trampoline(physmap, &physmap_idx); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. * * This is especially confusing when it is much larger than the * memory size and is displayed as "realmem". */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend * the amount of memory in the system. */ if (has_smap && Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); /* * By default enable the memory test on real hardware, and disable * it if we appear to be running in a VM. This avoids touching all * pages unnecessarily, which doesn't matter on real hardware but is * bad for shared VM hosts. Use a general name so that * one could eventually do more with the code than just disable it. */ memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; - pte = CMAP3; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; - int *ptr = (int *)CADDR3; + int *ptr; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * map page into kernel: valid, read/write,non-cacheable */ - *pte = pa | PG_V | PG_RW | PG_N; - invltlb(); + ptr = (int *)pmap_cmap3(pa, PG_V | PG_RW | PG_N); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } - *pte = 0; - invltlb(); + pmap_cmap3(0, 0); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } static void i386_kdb_init(void) { #ifdef DDB db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab); #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } static void fixup_idt(void) { struct gate_descriptor *ip; uintptr_t off; int x; for (x = 0; x < NIDT; x++) { ip = &idt[x]; if (ip->gd_type != SDT_SYS386IGT && ip->gd_type != SDT_SYS386TGT) continue; off = ip->gd_looffset + (((u_int)ip->gd_hioffset) << 16); KASSERT(off >= (uintptr_t)start_exceptions && off < (uintptr_t)end_exceptions, ("IDT[%d] type %d off %#x", x, ip->gd_type, off)); off += setidt_disp; MPASS(off >= PMAP_TRM_MIN_ADDRESS && off < PMAP_TRM_MAX_ADDRESS); ip->gd_looffset = off; ip->gd_hioffset = off >> 16; } } static void i386_setidt1(void) { int x; /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif #ifdef XENHVM setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif } static void i386_setidt2(void) { setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } #if defined(DEV_ISA) && !defined(DEV_ATPIC) static void i386_setidt3(void) { setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); } #endif register_t init386(int first) { struct region_descriptor r_gdt, r_idt; /* table descriptors */ int gsel_tss, metadata_missing, x, pa; struct pcpu *pc; struct xstate_hdr *xhdr; caddr_t kmdp; vm_offset_t addend; size_t ucode_len; int late_console; thread0.td_kstack = proc0kstack; thread0.td_kstack_pages = TD0_KSTACK_PAGES; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); if (bootinfo.bi_modulep) { metadata_missing = 0; addend = (vm_paddr_t)bootinfo.bi_modulep < KERNBASE ? PMAP_MAP_LOW : 0; preload_metadata = (caddr_t)bootinfo.bi_modulep + addend; preload_bootstrap_relocate(addend); } else { metadata_missing = 1; } if (bootinfo.bi_envp != 0) { addend = (vm_paddr_t)bootinfo.bi_envp < KERNBASE ? PMAP_MAP_LOW : 0; init_static_kenv((char *)bootinfo.bi_envp + addend, 0); } else { init_static_kenv(NULL, 0); } /* * Re-evaluate CPU features if we loaded a microcode update. */ ucode_len = ucode_load_bsp(first); if (ucode_len != 0) { identify_cpu(); first = roundup2(first + ucode_len, PAGE_SIZE); } identify_hypervisor(); /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); pc = &__pcpu[0]; gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int)pc; gdt_segs[GPROC0_SEL].ssd_base = (int)&common_tss0; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt0[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt0[0]) - 1; r_gdt.rd_base = (int)gdt0; mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) pmap_kenter(pa, pa); dpcpu_init((void *)first, 0); first += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); /* Non-late cninit() and printf() can be moved up to here. */ /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); i386_setidt1(); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the clock before the console so that console * initialization can use DELAY(). */ clock_init(); finishidentcpu(); /* Final stage of CPU initialization */ i386_setidt2(); + pmap_set_nx(); initializecpu(); /* Initialize CPU registers */ initializecpucache(); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); /* Initialize the tss (except for the final esp0) early for vm86. */ common_tss0.tss_esp0 = thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE - VM86_STACK_SPACE; common_tss0.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); common_tss0.tss_ioopt = sizeof(struct i386tss) << 16; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); ltr(gsel_tss); /* Initialize the PIC early for vm86 calls. */ #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ i386_setidt3(); #endif #endif /* * The console and kdb should be initialized even earlier than here, * but some console drivers don't work until after getmemsize(). * Default to late console initialization to support these drivers. * This loses mainly printf()s in getmemsize() and early debugging. */ late_console = 1; TUNABLE_INT_FETCH("debug.late_console", &late_console); if (!late_console) { cninit(); i386_kdb_init(); } kmdp = preload_search_by_type("elf kernel"); link_elf_ireloc(kmdp); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ if (late_console) cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); if (late_console) i386_kdb_init(); msgbufinit(msgbufp, msgbufsize); npxinit(true); /* * Set up thread0 pcb after npxinit calculated pcb + fpu save * area size. Zero out the extended state header in fpu save * area. */ thread0.td_pcb = get_pcb_td(&thread0); thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0); bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); if (use_xsave) { xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1); xhdr->xstate_bv = xsave_mask; } PCPU_SET(curpcb, thread0.td_pcb); /* Move esp0 in the tss to its final place. */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ common_tss0.tss_esp0 = (vm_offset_t)thread0.td_pcb - VM86_STACK_SPACE; PCPU_SET(kesp0, common_tss0.tss_esp0); gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; /* clear busy bit */ ltr(gsel_tss); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; -#if defined(PAE) || defined(PAE_TABLES) - thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; -#else - thread0.td_pcb->pcb_cr3 = (int)IdlePTD; -#endif + thread0.td_pcb->pcb_cr3 = pmap_get_kcr3(); thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; cpu_probe_amdc1e(); #ifdef FDT x86_init_fdt(); #endif /* Location of kernel stack for locore */ return ((register_t)thread0.td_pcb); } static void machdep_init_trampoline(void) { struct region_descriptor r_gdt, r_idt; struct i386tss *tss; char *copyout_buf, *trampoline, *tramp_stack_base; int x; gdt = pmap_trm_alloc(sizeof(union descriptor) * NGDT * mp_ncpus, M_NOWAIT | M_ZERO); bcopy(gdt0, gdt, sizeof(union descriptor) * NGDT); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int)gdt; lgdt(&r_gdt); tss = pmap_trm_alloc(sizeof(struct i386tss) * mp_ncpus, M_NOWAIT | M_ZERO); bcopy(&common_tss0, tss, sizeof(struct i386tss)); gdt[GPROC0_SEL].sd.sd_lobase = (int)tss; gdt[GPROC0_SEL].sd.sd_hibase = (u_int)tss >> 24; gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tssp, tss); ltr(GSEL(GPROC0_SEL, SEL_KPL)); trampoline = pmap_trm_alloc(end_exceptions - start_exceptions, M_NOWAIT); bcopy(start_exceptions, trampoline, end_exceptions - start_exceptions); tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT); PCPU_SET(trampstk, (uintptr_t)tramp_stack_base + TRAMP_STACK_SZ - VM86_STACK_SPACE); tss[0].tss_esp0 = PCPU_GET(trampstk); idt = pmap_trm_alloc(sizeof(idt0), M_NOWAIT | M_ZERO); bcopy(idt0, idt, sizeof(idt0)); /* Re-initialize new IDT since the handlers were relocated */ setidt_disp = trampoline - start_exceptions; fixup_idt(); r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1; r_idt.rd_base = (int)idt; lidt(&r_idt); /* dblfault TSS */ dblfault_tss = pmap_trm_alloc(sizeof(struct i386tss), M_NOWAIT | M_ZERO); dblfault_stack = pmap_trm_alloc(PAGE_SIZE, M_NOWAIT); dblfault_tss->tss_esp = dblfault_tss->tss_esp0 = dblfault_tss->tss_esp1 = dblfault_tss->tss_esp2 = (int)dblfault_stack + PAGE_SIZE; dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 = dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); -#if defined(PAE) || defined(PAE_TABLES) - dblfault_tss->tss_cr3 = (int)IdlePDPT; -#else - dblfault_tss->tss_cr3 = (int)IdlePTD; -#endif + dblfault_tss->tss_cr3 = pmap_get_kcr3(); dblfault_tss->tss_eip = (int)dblfault_handler; dblfault_tss->tss_eflags = PSL_KERNEL; dblfault_tss->tss_ds = dblfault_tss->tss_es = dblfault_tss->tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss->tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss->tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL); gdt[GPANIC_SEL].sd.sd_lobase = (int)dblfault_tss; gdt[GPANIC_SEL].sd.sd_hibase = (u_int)dblfault_tss >> 24; /* make ldt memory segments */ ldt = pmap_trm_alloc(sizeof(union descriptor) * NLDT, M_NOWAIT | M_ZERO); gdt[GLDT_SEL].sd.sd_lobase = (int)ldt; gdt[GLDT_SEL].sd.sd_hibase = (u_int)ldt >> 24; ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < nitems(ldt_segs); x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT); PCPU_SET(copyout_buf, copyout_buf); copyout_init_tramp(); } SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_SECOND, machdep_init_trampoline, NULL); #ifdef COMPAT_43 static void i386_setup_lcall_gate(void) { struct sysentvec *sv; struct user_segment_descriptor desc; u_int lcall_addr; sv = &elf32_freebsd_sysvec; lcall_addr = (uintptr_t)sv->sv_psstrings - sz_lcall_tramp; bzero(&desc, sizeof(desc)); desc.sd_type = SDT_MEMERA; desc.sd_dpl = SEL_UPL; desc.sd_p = 1; desc.sd_def32 = 1; desc.sd_gran = 1; desc.sd_lolimit = 0xffff; desc.sd_hilimit = 0xf; desc.sd_lobase = lcall_addr; desc.sd_hibase = lcall_addr >> 24; bcopy(&desc, &ldt[LSYS5CALLS_SEL], sizeof(desc)); } SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY, i386_setup_lcall_gate, NULL); #endif void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } static int smap_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct bios_smap *smapbase; struct bios_smap_xattr smap; caddr_t kmdp; uint32_t *smapattr; int count, error, i; /* Retrieve the system memory map from the loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) return (0); smapattr = (uint32_t *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP_XATTR); count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase); error = 0; for (i = 0; i < count; i++) { smap.base = smapbase[i].base; smap.length = smapbase[i].length; smap.type = smapbase[i].type; if (smapattr != NULL) smap.xattr = smapattr[i]; else smap.xattr = 0; error = SYSCTL_OUT(req, &smap, sizeof(smap)); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); void spinlock_enter(void) { struct thread *td; register_t flags; td = curthread; if (td->td_md.md_spinlock_count == 0) { flags = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = flags; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t flags; td = curthread; critical_exit(); flags = td->td_md.md_saved_flags; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct region_descriptor r_idt; struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = (vm_offset_t)pmap_trm_alloc(PAGE_SIZE * 3, M_NOWAIT | M_ZERO); if (tmp == 0) panic("kmem_malloc returned 0"); tmp = round_page(tmp); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor *) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; r_idt.rd_limit = sizeof(idt0) - 1; lidt(&r_idt); /* SMP machines do not need the F00F hack. */ idt = new_idt; pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; pcb->pcb_gs = rgs(); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if ((td->td_frame->tf_eflags & PSL_T) == 0) { td->td_frame->tf_eflags |= PSL_T; td->td_dbgflags |= TDB_STEP; } return (0); } int ptrace_clear_single_step(struct thread *td) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); td->td_frame->tf_eflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; pcb = td->td_pcb; regs->r_gs = pcb->pcb_gs; return (fill_frame_regs(tp, regs)); } int fill_frame_regs(struct trapframe *tp, struct reg *regs) { regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; regs->r_err = 0; regs->r_trapno = 0; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb->pcb_gs = regs->r_gs; return (0); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { KASSERT(td == curthread || TD_IS_SUSPENDED(td) || P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); npxgetregs(td); if (cpu_fxsr) npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm, (struct save87 *)fpregs); else bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs, sizeof(*fpregs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { critical_enter(); if (cpu_fxsr) npx_set_fpregs_xmm((struct save87 *)fpregs, &get_pcb_user_save_td(td)->sv_xmm); else bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87, sizeof(*fpregs)); npxuserinited(td); critical_exit(); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; struct segment_descriptor *sdp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_eflags = tp->tf_eflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); sdp = &td->td_pcb->pcb_fsd; mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; mcp->mc_flags = 0; mcp->mc_xfpustate = 0; mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tp; char *xfpustate; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp) || (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if (mcp->mc_flags & _MC_HASFPXSTATE) { if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(union savefpu)) return (EINVAL); xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); } else xfpustate = NULL; ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len) { size_t max_len, len; mcp->mc_ownedfp = npxgetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = npxformat(); if (!use_xsave || xfpusave_len == 0) return; max_len = cpu_max_ext_state_size - sizeof(union savefpu); len = xfpusave_len; if (len > max_len) { len = max_len; bzero(xfpusave + max_len, len - max_len); } mcp->mc_flags |= _MC_HASFPXSTATE; mcp->mc_xfpustate_len = len; bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate, xfpustate, xfpustate_len); } else return (EINVAL); return (error); } static void fpstate_drop(struct thread *td) { KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); critical_enter(); if (PCPU_GET(fpcurthread) == td) npxdrop(); /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE); critical_exit(); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } dbregs->dr[4] = 0; dbregs->dr[5] = 0; return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) return (EINVAL); } pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(register_t dr6) { u_int32_t dr7; u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; bp = dr6 & DBREG_DR6_BMASK; if (bp == 0) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ Index: head/sys/i386/i386/mem.c =================================================================== --- head/sys/i386/i386/mem.c (revision 343566) +++ head/sys/i386/i386/mem.c (revision 343567) @@ -1,233 +1,232 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988 University of Utah. * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and code derived from software contributed to * Berkeley by William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: mem.c 1.13 89/10/08$ * from: @(#)mem.c 7.2 (Berkeley) 5/9/91 */ #include __FBSDID("$FreeBSD$"); /* * Memory special file */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Used in /dev/mem drivers and elsewhere */ MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors"); static struct sx memsxlock; SX_SYSINIT(memsxlockinit, &memsxlock, "/dev/mem lock"); /* ARGSUSED */ int memrw(struct cdev *dev, struct uio *uio, int flags) { int o; u_int c = 0; vm_paddr_t pa; struct iovec *iov; int error = 0; vm_offset_t addr; if (dev2unit(dev) != CDEV_MINOR_MEM && dev2unit(dev) != CDEV_MINOR_KMEM) return EIO; if (dev2unit(dev) == CDEV_MINOR_KMEM && uio->uio_resid > 0) { if (!kernacc((caddr_t)(int)uio->uio_offset, uio->uio_resid, uio->uio_rw == UIO_READ ? VM_PROT_READ : VM_PROT_WRITE)) return (EFAULT); } while (uio->uio_resid > 0 && error == 0) { iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iov++; uio->uio_iovcnt--; if (uio->uio_iovcnt < 0) panic("memrw"); continue; } if (dev2unit(dev) == CDEV_MINOR_MEM) { if (uio->uio_offset > cpu_getmaxphyaddr()) { error = EFAULT; break; } pa = trunc_page(uio->uio_offset); } else { /* * Extract the physical page since the mapping may * change at any time. This avoids panics on page * fault in this case but will cause reading/writing * to the wrong page. * Hopefully an application will notice the wrong * data on read access and refrain from writing. * This should be replaced by a special uiomove * type function that just returns an error if there * is a page fault on a kernel page. */ addr = trunc_page(uio->uio_offset); pa = pmap_extract(kernel_pmap, addr); if (pa == 0) return EFAULT; } /* * XXX UPS This should just use sf_buf_alloc. * Unfortunately sf_buf_alloc needs a vm_page * and we may want to look at memory not covered * by the page array. */ sx_xlock(&memsxlock); pmap_kenter((vm_offset_t)ptvmmap, pa); pmap_invalidate_page(kernel_pmap,(vm_offset_t)ptvmmap); o = (int)uio->uio_offset & PAGE_MASK; c = PAGE_SIZE - o; c = min(c, (u_int)iov->iov_len); error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio); pmap_qremove((vm_offset_t)ptvmmap, 1); sx_xunlock(&memsxlock); - } return (error); } /* * allow user processes to MMAP some memory sections * instead of going through read/write */ /* ARGSUSED */ int memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int prot __unused, vm_memattr_t *memattr __unused) { if (dev2unit(dev) == CDEV_MINOR_MEM) { if (offset > cpu_getmaxphyaddr()) return (-1); *paddr = offset; return (0); } return (-1); } /* * Operations for changing memory attributes. * * This is basically just an ioctl shim for mem_range_attr_get * and mem_range_attr_set. */ /* ARGSUSED */ int memioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, int flags, struct thread *td) { int nd, error = 0; struct mem_range_op *mo = (struct mem_range_op *)data; struct mem_range_desc *md; /* is this for us? */ if ((cmd != MEMRANGE_GET) && (cmd != MEMRANGE_SET)) return (ENOTTY); /* any chance we can handle this? */ if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); /* do we have any descriptors? */ if (mem_range_softc.mr_ndesc == 0) return (ENXIO); switch (cmd) { case MEMRANGE_GET: nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc); if (nd > 0) { md = (struct mem_range_desc *) malloc(nd * sizeof(struct mem_range_desc), M_MEMDESC, M_WAITOK); error = mem_range_attr_get(md, &nd); if (!error) error = copyout(md, mo->mo_desc, nd * sizeof(struct mem_range_desc)); free(md, M_MEMDESC); } else nd = mem_range_softc.mr_ndesc; mo->mo_arg[0] = nd; break; case MEMRANGE_SET: md = (struct mem_range_desc *)malloc(sizeof(struct mem_range_desc), M_MEMDESC, M_WAITOK); error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc)); /* clamp description string */ md->mr_owner[sizeof(md->mr_owner) - 1] = 0; if (error == 0) error = mem_range_attr_set(md, &mo->mo_arg[0]); free(md, M_MEMDESC); break; } return (error); } Index: head/sys/i386/i386/minidump_machdep.c =================================================================== --- head/sys/i386/i386/minidump_machdep.c (revision 343566) +++ head/sys/i386/i386/minidump_machdep.c (revision 343567) @@ -1,377 +1,85 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct kerneldumpheader) == 512); -#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) -#define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE) - uint32_t *vm_page_dump; int vm_page_dump_size; -static struct kerneldumpheader kdh; - -/* Handle chunked writes. */ -static size_t fragsz; -static void *dump_va; -static uint64_t counter, progress; - CTASSERT(sizeof(*vm_page_dump) == 4); - -static int -is_dumpable(vm_paddr_t pa) -{ - int i; - - for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { - if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) - return (1); - } - return (0); -} - -#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) - -static int -blk_flush(struct dumperinfo *di) -{ - int error; - - if (fragsz == 0) - return (0); - - error = dump_append(di, dump_va, 0, fragsz); - fragsz = 0; - return (error); -} - -static int -blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) -{ - size_t len; - int error, i, c; - u_int maxdumpsz; - - maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); - if (maxdumpsz == 0) /* seatbelt */ - maxdumpsz = PAGE_SIZE; - error = 0; - if ((sz % PAGE_SIZE) != 0) { - printf("size not page aligned\n"); - return (EINVAL); - } - if (ptr != NULL && pa != 0) { - printf("cant have both va and pa!\n"); - return (EINVAL); - } - if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { - printf("address not page aligned\n"); - return (EINVAL); - } - if (ptr != NULL) { - /* If we're doing a virtual dump, flush any pre-existing pa pages */ - error = blk_flush(di); - if (error) - return (error); - } - while (sz) { - len = maxdumpsz - fragsz; - if (len > sz) - len = sz; - counter += len; - progress -= len; - if (counter >> 24) { - printf(" %lld", PG2MB(progress >> PAGE_SHIFT)); - counter &= (1<<24) - 1; - } - - wdog_kern_pat(WD_LASTVAL); - - if (ptr) { - error = dump_append(di, ptr, 0, len); - if (error) - return (error); - ptr += len; - sz -= len; - } else { - for (i = 0; i < len; i += PAGE_SIZE) - dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); - fragsz += len; - pa += len; - sz -= len; - if (fragsz == maxdumpsz) { - error = blk_flush(di); - if (error) - return (error); - } - } - - /* Check for user abort. */ - c = cncheckc(); - if (c == 0x03) - return (ECANCELED); - if (c != -1) - printf(" (CTRL-C to abort) "); - } - - return (0); -} - -/* A fake page table page, to avoid having to handle both 4K and 2M pages */ -static pt_entry_t fakept[NPTEPG]; - -int -minidumpsys(struct dumperinfo *di) -{ - uint64_t dumpsize; - uint32_t ptesize; - vm_offset_t va; - int error; - uint32_t bits; - uint64_t pa; - pd_entry_t *pd; - pt_entry_t *pt; - int i, j, k, bit; - struct minidumphdr mdhdr; - - counter = 0; - /* Walk page table pages, set bits in vm_page_dump */ - ptesize = 0; - for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { - /* - * We always write a page, even if it is zero. Each - * page written corresponds to 2MB of space - */ - ptesize += PAGE_SIZE; - pd = IdlePTD; /* always mapped! */ - j = va >> PDRSHIFT; - if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { - /* This is an entire 2M page. */ - pa = pd[j] & PG_PS_FRAME; - for (k = 0; k < NPTEPG; k++) { - if (is_dumpable(pa)) - dump_add_page(pa); - pa += PAGE_SIZE; - } - continue; - } - if ((pd[j] & PG_V) == PG_V) { - /* set bit for each valid page in this 2MB block */ - pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0); - for (k = 0; k < NPTEPG; k++) { - if ((pt[k] & PG_V) == PG_V) { - pa = pt[k] & PG_FRAME; - if (is_dumpable(pa)) - dump_add_page(pa); - } - } - } else { - /* nothing, we're going to dump a null page */ - } - } - - /* Calculate dump size. */ - dumpsize = ptesize; - dumpsize += round_page(msgbufp->msg_size); - dumpsize += round_page(vm_page_dump_size); - for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { - bits = vm_page_dump[i]; - while (bits) { - bit = bsfl(bits); - pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; - /* Clear out undumpable pages now if needed */ - if (is_dumpable(pa)) { - dumpsize += PAGE_SIZE; - } else { - dump_drop_page(pa); - } - bits &= ~(1ul << bit); - } - } - dumpsize += PAGE_SIZE; - - progress = dumpsize; - - /* Initialize mdhdr */ - bzero(&mdhdr, sizeof(mdhdr)); - strcpy(mdhdr.magic, MINIDUMP_MAGIC); - mdhdr.version = MINIDUMP_VERSION; - mdhdr.msgbufsize = msgbufp->msg_size; - mdhdr.bitmapsize = vm_page_dump_size; - mdhdr.ptesize = ptesize; - mdhdr.kernbase = KERNBASE; -#if defined(PAE) || defined(PAE_TABLES) - mdhdr.paemode = 1; -#endif - - dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, - dumpsize); - - error = dump_start(di, &kdh); - if (error != 0) - goto fail; - - printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); - printf("Dumping %llu MB:", (long long)dumpsize >> 20); - - /* Dump my header */ - bzero(&fakept, sizeof(fakept)); - bcopy(&mdhdr, &fakept, sizeof(mdhdr)); - error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); - if (error) - goto fail; - - /* Dump msgbuf up front */ - error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); - if (error) - goto fail; - - /* Dump bitmap */ - error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); - if (error) - goto fail; - - /* Dump kernel page table pages */ - for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { - /* We always write a page, even if it is zero */ - pd = IdlePTD; /* always mapped! */ - j = va >> PDRSHIFT; - if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { - /* This is a single 2M block. Generate a fake PTP */ - pa = pd[j] & PG_PS_FRAME; - for (k = 0; k < NPTEPG; k++) { - fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; - } - error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); - if (error) - goto fail; - /* flush, in case we reuse fakept in the same block */ - error = blk_flush(di); - if (error) - goto fail; - continue; - } - if ((pd[j] & PG_V) == PG_V) { - pa = pd[j] & PG_FRAME; - error = blk_write(di, 0, pa, PAGE_SIZE); - if (error) - goto fail; - } else { - bzero(fakept, sizeof(fakept)); - error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); - if (error) - goto fail; - /* flush, in case we reuse fakept in the same block */ - error = blk_flush(di); - if (error) - goto fail; - } - } - - /* Dump memory chunks */ - /* XXX cluster it up and use blk_dump() */ - for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { - bits = vm_page_dump[i]; - while (bits) { - bit = bsfl(bits); - pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; - error = blk_write(di, 0, pa, PAGE_SIZE); - if (error) - goto fail; - bits &= ~(1ul << bit); - } - } - - error = blk_flush(di); - if (error) - goto fail; - - error = dump_finish(di, &kdh); - if (error != 0) - goto fail; - - printf("\nDump complete\n"); - return (0); - - fail: - if (error < 0) - error = -error; - - if (error == ECANCELED) - printf("\nDump aborted\n"); - else if (error == E2BIG || error == ENOSPC) - printf("\nDump failed. Partition too small.\n"); - else - printf("\n** DUMP FAILED (ERROR %d) **\n", error); - return (error); -} - void dump_add_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 5; /* 2^5 = 32 */ bit = pa & 31; atomic_set_int(&vm_page_dump[idx], 1ul << bit); } void dump_drop_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 5; /* 2^5 = 32 */ bit = pa & 31; atomic_clear_int(&vm_page_dump[idx], 1ul << bit); +} + +int +minidumpsys(struct dumperinfo *di) +{ + + return (pae_mode ? minidumpsys_pae(di) : minidumpsys_nopae(di)); } Index: head/sys/i386/i386/minidump_machdep_base.c =================================================================== --- head/sys/i386/i386/minidump_machdep_base.c (nonexistent) +++ head/sys/i386/i386/minidump_machdep_base.c (revision 343567) @@ -0,0 +1,360 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2006 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_watchdog.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CTASSERT(sizeof(struct kerneldumpheader) == 512); + +#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) +#define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE) + +extern uint32_t *vm_page_dump; +extern int vm_page_dump_size; + +static struct kerneldumpheader kdh; + +/* Handle chunked writes. */ +static size_t fragsz; +static void *dump_va; +static uint64_t counter, progress; + +CTASSERT(sizeof(*vm_page_dump) == 4); + + +static int +is_dumpable(vm_paddr_t pa) +{ + int i; + + for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { + if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) + return (1); + } + return (0); +} + +#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) + +static int +blk_flush(struct dumperinfo *di) +{ + int error; + + if (fragsz == 0) + return (0); + + error = dump_append(di, dump_va, 0, fragsz); + fragsz = 0; + return (error); +} + +static int +blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) +{ + size_t len; + int error, i, c; + u_int maxdumpsz; + + maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); + if (maxdumpsz == 0) /* seatbelt */ + maxdumpsz = PAGE_SIZE; + error = 0; + if ((sz % PAGE_SIZE) != 0) { + printf("size not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL && pa != 0) { + printf("cant have both va and pa!\n"); + return (EINVAL); + } + if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { + printf("address not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL) { + /* If we're doing a virtual dump, flush any pre-existing pa pages */ + error = blk_flush(di); + if (error) + return (error); + } + while (sz) { + len = maxdumpsz - fragsz; + if (len > sz) + len = sz; + counter += len; + progress -= len; + if (counter >> 24) { + printf(" %lld", PG2MB(progress >> PAGE_SHIFT)); + counter &= (1<<24) - 1; + } + + wdog_kern_pat(WD_LASTVAL); + + if (ptr) { + error = dump_append(di, ptr, 0, len); + if (error) + return (error); + ptr += len; + sz -= len; + } else { + for (i = 0; i < len; i += PAGE_SIZE) + dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); + fragsz += len; + pa += len; + sz -= len; + if (fragsz == maxdumpsz) { + error = blk_flush(di); + if (error) + return (error); + } + } + + /* Check for user abort. */ + c = cncheckc(); + if (c == 0x03) + return (ECANCELED); + if (c != -1) + printf(" (CTRL-C to abort) "); + } + + return (0); +} + +/* A fake page table page, to avoid having to handle both 4K and 2M pages */ +static pt_entry_t fakept[NPTEPG]; + +#ifdef PMAP_PAE_COMP +#define minidumpsys minidumpsys_pae +#define IdlePTD IdlePTD_pae +#else +#define minidumpsys minidumpsys_nopae +#define IdlePTD IdlePTD_nopae +#endif + +int +minidumpsys(struct dumperinfo *di) +{ + uint64_t dumpsize; + uint32_t ptesize; + vm_offset_t va; + int error; + uint32_t bits; + uint64_t pa; + pd_entry_t *pd; + pt_entry_t *pt; + int i, j, k, bit; + struct minidumphdr mdhdr; + + counter = 0; + /* Walk page table pages, set bits in vm_page_dump */ + ptesize = 0; + for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { + /* + * We always write a page, even if it is zero. Each + * page written corresponds to 2MB of space + */ + ptesize += PAGE_SIZE; + pd = IdlePTD; /* always mapped! */ + j = va >> PDRSHIFT; + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is an entire 2M page. */ + pa = pd[j] & PG_PS_FRAME; + for (k = 0; k < NPTEPG; k++) { + if (is_dumpable(pa)) + dump_add_page(pa); + pa += PAGE_SIZE; + } + continue; + } + if ((pd[j] & PG_V) == PG_V) { + /* set bit for each valid page in this 2MB block */ + pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0); + for (k = 0; k < NPTEPG; k++) { + if ((pt[k] & PG_V) == PG_V) { + pa = pt[k] & PG_FRAME; + if (is_dumpable(pa)) + dump_add_page(pa); + } + } + } else { + /* nothing, we're going to dump a null page */ + } + } + + /* Calculate dump size. */ + dumpsize = ptesize; + dumpsize += round_page(msgbufp->msg_size); + dumpsize += round_page(vm_page_dump_size); + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfl(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + /* Clear out undumpable pages now if needed */ + if (is_dumpable(pa)) { + dumpsize += PAGE_SIZE; + } else { + dump_drop_page(pa); + } + bits &= ~(1ul << bit); + } + } + dumpsize += PAGE_SIZE; + + progress = dumpsize; + + /* Initialize mdhdr */ + bzero(&mdhdr, sizeof(mdhdr)); + strcpy(mdhdr.magic, MINIDUMP_MAGIC); + mdhdr.version = MINIDUMP_VERSION; + mdhdr.msgbufsize = msgbufp->msg_size; + mdhdr.bitmapsize = vm_page_dump_size; + mdhdr.ptesize = ptesize; + mdhdr.kernbase = KERNBASE; + mdhdr.paemode = pae_mode; + + dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, + dumpsize); + + error = dump_start(di, &kdh); + if (error != 0) + goto fail; + + printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); + printf("Dumping %llu MB:", (long long)dumpsize >> 20); + + /* Dump my header */ + bzero(&fakept, sizeof(fakept)); + bcopy(&mdhdr, &fakept, sizeof(mdhdr)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + + /* Dump msgbuf up front */ + error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); + if (error) + goto fail; + + /* Dump bitmap */ + error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); + if (error) + goto fail; + + /* Dump kernel page table pages */ + for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { + /* We always write a page, even if it is zero */ + pd = IdlePTD; /* always mapped! */ + j = va >> PDRSHIFT; + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is a single 2M block. Generate a fake PTP */ + pa = pd[j] & PG_PS_FRAME; + for (k = 0; k < NPTEPG; k++) { + fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; + } + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + continue; + } + if ((pd[j] & PG_V) == PG_V) { + pa = pd[j] & PG_FRAME; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + } else { + bzero(fakept, sizeof(fakept)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + } + } + + /* Dump memory chunks */ + /* XXX cluster it up and use blk_dump() */ + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfl(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + bits &= ~(1ul << bit); + } + } + + error = blk_flush(di); + if (error) + goto fail; + + error = dump_finish(di, &kdh); + if (error != 0) + goto fail; + + printf("\nDump complete\n"); + return (0); + + fail: + if (error < 0) + error = -error; + + if (error == ECANCELED) + printf("\nDump aborted\n"); + else if (error == E2BIG || error == ENOSPC) + printf("\nDump failed. Partition too small.\n"); + else + printf("\n** DUMP FAILED (ERROR %d) **\n", error); + return (error); +} Property changes on: head/sys/i386/i386/minidump_machdep_base.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/sys/i386/i386/minidump_machdep_nopae.c =================================================================== --- head/sys/i386/i386/minidump_machdep_nopae.c (nonexistent) +++ head/sys/i386/i386/minidump_machdep_nopae.c (revision 343567) @@ -0,0 +1,40 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include "minidump_machdep_base.c" Property changes on: head/sys/i386/i386/minidump_machdep_nopae.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/sys/i386/i386/minidump_machdep_pae.c =================================================================== --- head/sys/i386/i386/minidump_machdep_pae.c (nonexistent) +++ head/sys/i386/i386/minidump_machdep_pae.c (revision 343567) @@ -0,0 +1,41 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#define PMAP_PAE_COMP +#include +#include +#include +#include +#include +#include "minidump_machdep_base.c" Property changes on: head/sys/i386/i386/minidump_machdep_pae.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/sys/i386/i386/mp_machdep.c =================================================================== --- head/sys/i386/i386/mp_machdep.c (revision 343566) +++ head/sys/i386/i386/mp_machdep.c (revision 343567) @@ -1,466 +1,462 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_cpu.h" #include "opt_kstack_pages.h" #include "opt_pmap.h" #include "opt_sched.h" #include "opt_smp.h" #if !defined(lint) #if !defined(SMP) #error How did you get here? #endif #ifndef DEV_APIC #error The apic device is required for SMP, add "device apic" to your config file. #endif #endif /* not lint */ #include #include #include #include /* cngetc() */ #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467) #define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) /* * this code MUST be enabled here and in mpboot.s. * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * #define CHECK_POINTS */ #if defined(CHECK_POINTS) #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) #define CHECK_INIT(D); \ CHECK_WRITE(0x34, (D)); \ CHECK_WRITE(0x35, (D)); \ CHECK_WRITE(0x36, (D)); \ CHECK_WRITE(0x37, (D)); \ CHECK_WRITE(0x38, (D)); \ CHECK_WRITE(0x39, (D)); #define CHECK_PRINT(S); \ printf("%s: %d, %d, %d, %d, %d, %d\n", \ (S), \ CHECK_READ(0x34), \ CHECK_READ(0x35), \ CHECK_READ(0x36), \ CHECK_READ(0x37), \ CHECK_READ(0x38), \ CHECK_READ(0x39)); #else /* CHECK_POINTS */ #define CHECK_INIT(D) #define CHECK_PRINT(S) #define CHECK_WRITE(A, D) #endif /* CHECK_POINTS */ extern struct pcpu __pcpu[]; /* * Local data and functions. */ static void install_ap_tramp(void); static int start_all_aps(void); static int start_ap(int apic_id); static char *ap_copyout_buf; static char *ap_tramp_stack_base; /* * Initialize the IPI handlers and start up the AP's. */ void cpu_mp_start(void) { int i; /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; cpu_ipi_pending[i] = 0; } /* Install an inter-CPU IPI for TLB invalidation */ setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for cache invalidation. */ setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install generic inter-CPU IPI handler */ setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for CPU suspend/resume */ setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); cpu_info[boot_cpu_id].cpu_bsp = 1; } else KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); /* Probe logical/physical core configuration. */ topo_probe(); assign_cpu_ids(); /* Start each Application Processor */ start_all_aps(); set_interrupt_apic_ids(); } /* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { struct pcpu *pc; struct i386tss *common_tssp; struct region_descriptor r_gdt, r_idt; int gsel_tss, myid, x; u_int cr0; /* bootAP is set in start_ap() to our ID. */ myid = bootAP; /* Update microcode before doing anything else. */ ucode_load_ap(myid); /* Get per-cpu data */ pc = &__pcpu[myid]; /* prime data page for it to use */ pcpu_init(pc, myid, sizeof(struct pcpu)); dpcpu_init(dpcpu, myid); pc->pc_apic_id = cpu_apic_ids[myid]; pc->pc_prvspace = pc; pc->pc_curthread = 0; pc->pc_common_tssp = common_tssp = &(__pcpu[0].pc_common_tssp)[myid]; fix_cpuid(); gdt_segs[GPRIV_SEL].ssd_base = (int)pc; gdt_segs[GPROC0_SEL].ssd_base = (int)common_tssp; gdt_segs[GLDT_SEL].ssd_base = (int)ldt; for (x = 0; x < NGDT; x++) { ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); } r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) &gdt[myid * NGDT]; lgdt(&r_gdt); /* does magic intra-segment return */ r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1; r_idt.rd_base = (int)idt; lidt(&r_idt); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); PCPU_SET(trampstk, (uintptr_t)ap_tramp_stack_base + TRAMP_STACK_SZ - VM86_STACK_SPACE); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; common_tssp->tss_esp0 = PCPU_GET(trampstk); common_tssp->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); common_tssp->tss_ioopt = sizeof(struct i386tss) << 16; PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); ltr(gsel_tss); PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); PCPU_SET(copyout_buf, ap_copyout_buf); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); CHECK_WRITE(0x38, 5); /* signal our startup to the BSP. */ mp_naps++; CHECK_WRITE(0x39, 6); /* Spin until the BSP releases the AP's. */ while (atomic_load_acq_int(&aps_ready) == 0) ia32_pause(); /* BSP may have changed PTD while we were waiting */ invltlb(); #if defined(I586_CPU) && !defined(NO_F00F_HACK) lidt(&r_idt); #endif init_secondary_tail(); } /* * start each AP in our list */ #define TMPMAP_START 1 static int start_all_aps(void) { u_char mpbiosreason; u_int32_t mpbioswarmvec; int apic_id, cpu; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); - /* Remap lowest 1MB */ - IdlePTD[0] = IdlePTD[1]; - load_cr3(rcr3()); /* invalidate TLB */ + pmap_remap_lower(true); /* install the AP 1st level boot code */ install_ap_tramp(); /* save the current value of the warm-start vector */ mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* take advantage of the P==V mapping for PTD[0] for AP boot */ /* start each AP */ for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; /* allocate and set up a boot stack data page */ bootstacks[cpu] = (char *)kmem_malloc(kstack_pages * PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 4; bootAP = cpu; ap_tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT); ap_copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT); /* attempt to start the Application Processor */ CHECK_INIT(99); /* setup checkpoints */ if (!start_ap(apic_id)) { printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); CHECK_PRINT("trace"); /* show checkpoints */ /* better panic as the AP may be running loose */ printf("panic y/n? [y] "); if (cngetc() != 'n') panic("bye-bye"); } CHECK_PRINT("trace"); /* show checkpoints */ CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } - /* Unmap lowest 1MB again */ - IdlePTD[0] = 0; - load_cr3(rcr3()); + pmap_remap_lower(false); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); /* number of APs actually started */ return mp_naps; } /* * load the 1st level AP boot code into base memory. */ /* targets for relocation */ extern void bigJump(void); extern void bootCodeSeg(void); extern void bootDataSeg(void); extern void MPentry(void); extern u_int MP_GDT; extern u_int mp_gdtbase; static void install_ap_tramp(void) { int x; int size = *(int *) ((u_long) & bootMP_size); vm_offset_t va = boot_address; u_char *src = (u_char *) ((u_long) bootMP); u_char *dst = (u_char *) va; u_int boot_base = (u_int) bootMP; u_int8_t *dst8; u_int16_t *dst16; u_int32_t *dst32; KASSERT (size <= PAGE_SIZE, ("'size' do not fit into PAGE_SIZE, as expected.")); pmap_kenter(va, boot_address); pmap_invalidate_page (kernel_pmap, va); for (x = 0; x < size; ++x) *dst++ = *src++; /* * modify addresses in code we just moved to basemem. unfortunately we * need fairly detailed info about mpboot.s for this to work. changes * to mpboot.s might require changes here. */ /* boot code is located in KERNEL space */ dst = (u_char *) va; /* modify the lgdt arg */ dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); /* modify the ljmp target for MPentry() */ dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); *dst32 = (u_int)MPentry; /* modify the target for boot code segment */ dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); dst8 = (u_int8_t *) (dst16 + 1); *dst16 = (u_int) boot_address & 0xffff; *dst8 = ((u_int) boot_address >> 16) & 0xff; /* modify the target for boot data segment */ dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); dst8 = (u_int8_t *) (dst16 + 1); *dst16 = (u_int) boot_address & 0xffff; *dst8 = ((u_int) boot_address >> 16) & 0xff; } /* * This function starts the AP (application processor) identified * by the APIC ID 'physicalCpu'. It does quite a "song and dance" * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It isn't pretty, * but it seems to work. */ static int start_ap(int apic_id) { int vector, ms; int cpus; /* calculate the vector */ vector = (boot_address >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ cpus = mp_naps; ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ DELAY(1000); } return 0; /* return FAILURE */ } Index: head/sys/i386/i386/mpboot.s =================================================================== --- head/sys/i386/i386/mpboot.s (revision 343566) +++ head/sys/i386/i386/mpboot.s (revision 343567) @@ -1,287 +1,283 @@ /*- * Copyright (c) 1995 Jack F. Vogel * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * mpboot.s: FreeBSD machine support for the Intel MP Spec * multiprocessor systems. * * $FreeBSD$ */ #include "opt_pmap.h" #include /* miscellaneous asm macros */ #include #include #include "assym.inc" /* * this code MUST be enabled here and in mp_machdep.c * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * #define CHECK_POINTS */ #if defined(CHECK_POINTS) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define CHECKPOINT(A,D) \ movb $(A),%al ; \ outb %al,$CMOS_REG ; \ movb $(D),%al ; \ outb %al,$CMOS_DATA #else #define CHECKPOINT(A,D) #endif /* CHECK_POINTS */ /* * the APs enter here from their trampoline code (bootMP, below) */ .p2align 4 NON_GPROF_ENTRY(MPentry) CHECKPOINT(0x36, 3) /* * Enable features on this processor. We don't support SMP on * CPUs older than a Pentium, so we know that we can use the cpuid * instruction. */ movl $1,%eax cpuid /* Retrieve features */ movl %cr4,%eax testl $CPUID_PSE,%edx jz 1f orl $CR4_PSE,%eax /* Enable PSE */ -1: - testl $CPUID_PGE,%edx - jz 1f +1: testl $CPUID_PGE,%edx + jz 2f orl $CR4_PGE,%eax /* Enable PGE */ -1: - testl $CPUID_VME,%edx - jz 1f +2: testl $CPUID_VME,%edx + jz 3f orl $CR4_VME,%eax /* Enable VME */ -1: - movl %eax,%cr4 +3: movl %eax,%cr4 /* Now enable paging mode */ -#if defined(PAE) || defined(PAE_TABLES) + cmpl $0, pae_mode + je 4f movl IdlePDPT, %eax movl %eax, %cr3 movl %cr4, %eax orl $CR4_PAE, %eax movl %eax, %cr4 movl $0x80000000, %eax cpuid movl $0x80000001, %ebx cmpl %ebx, %eax - jb 1f + jb 5f movl %ebx, %eax cpuid testl $AMDID_NX, %edx - je 1f + je 5f movl $MSR_EFER, %ecx rdmsr orl $EFER_NXE,%eax wrmsr -1: -#else - movl IdlePTD, %eax + jmp 5f +4: movl IdlePTD_nopae, %eax movl %eax,%cr3 -#endif - movl %cr0,%eax +5: movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* let the games begin! */ movl bootSTK,%esp /* boot stack end loc. */ pushl $mp_begin /* jump to high mem */ ret /* * Wait for the booting CPU to signal startup */ mp_begin: /* now running relocated at KERNBASE */ CHECKPOINT(0x37, 4) call init_secondary /* load i386 tables */ /* * This is the embedded trampoline or bootstrap that is * copied into 'real-mode' low memory, it is where the * secondary processor "wakes up". When it is executed * the processor will eventually jump into the routine * MPentry, which resides in normal kernel text above * 1Meg. -jackv */ .data ALIGN_DATA /* just to be sure */ BOOTMP1: NON_GPROF_ENTRY(bootMP) .code16 cli CHECKPOINT(0x34, 1) /* First guarantee a 'clean slate' */ xorl %eax, %eax movl %eax, %ebx movl %eax, %ecx movl %eax, %edx movl %eax, %esi movl %eax, %edi /* set up data segments */ mov %cs, %ax mov %ax, %ds mov %ax, %es mov %ax, %fs mov %ax, %gs mov %ax, %ss mov $(boot_stk-bootMP), %esp /* Now load the global descriptor table */ lgdt MP_GDTptr-bootMP /* Enable protected mode */ movl %cr0, %eax orl $CR0_PE, %eax movl %eax, %cr0 /* * make intrasegment jump to flush the processor pipeline and * reload CS register */ pushl $0x18 pushl $(protmode-bootMP) lretl .code32 protmode: CHECKPOINT(0x35, 2) /* * we are NOW running for the first time with %eip * having the full physical address, BUT we still * are using a segment descriptor with the origin * not matching the booting kernel. * * SO NOW... for the BIG Jump into kernel's segment * and physical text above 1 Meg. */ mov $0x10, %ebx movw %bx, %ds movw %bx, %es movw %bx, %fs movw %bx, %gs movw %bx, %ss .globl bigJump bigJump: /* this will be modified by mpInstallTramp() */ ljmp $0x08, $0 /* far jmp to MPentry() */ dead: hlt /* We should never get here */ jmp dead /* * MP boot strap Global Descriptor Table */ .p2align 4 .globl MP_GDT .globl bootCodeSeg .globl bootDataSeg MP_GDT: nulldesc: /* offset = 0x0 */ .word 0x0 .word 0x0 .byte 0x0 .byte 0x0 .byte 0x0 .byte 0x0 kernelcode: /* offset = 0x08 */ .word 0xffff /* segment limit 0..15 */ .word 0x0000 /* segment base 0..15 */ .byte 0x0 /* segment base 16..23; set for 0K */ .byte 0x9f /* flags; Type */ .byte 0xcf /* flags; Limit */ .byte 0x0 /* segment base 24..32 */ kerneldata: /* offset = 0x10 */ .word 0xffff /* segment limit 0..15 */ .word 0x0000 /* segment base 0..15 */ .byte 0x0 /* segment base 16..23; set for 0k */ .byte 0x93 /* flags; Type */ .byte 0xcf /* flags; Limit */ .byte 0x0 /* segment base 24..32 */ bootcode: /* offset = 0x18 */ .word 0xffff /* segment limit 0..15 */ bootCodeSeg: /* this will be modified by mpInstallTramp() */ .word 0x0000 /* segment base 0..15 */ .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ .byte 0x9e /* flags; Type */ .byte 0xcf /* flags; Limit */ .byte 0x0 /*segment base 24..32 */ bootdata: /* offset = 0x20 */ .word 0xffff bootDataSeg: /* this will be modified by mpInstallTramp() */ .word 0x0000 /* segment base 0..15 */ .byte 0x00 /* segment base 16...23; set for 0x000xx000 */ .byte 0x92 .byte 0xcf .byte 0x0 /* * GDT pointer for the lgdt call */ .globl mp_gdtbase MP_GDTptr: mp_gdtlimit: .word 0x0028 mp_gdtbase: /* this will be modified by mpInstallTramp() */ .long 0 .space 0x100 /* space for boot_stk - 1st temporary stack */ boot_stk: BOOTMP2: .globl bootMP_size bootMP_size: .long BOOTMP2 - BOOTMP1 Index: head/sys/i386/i386/pmap.c =================================================================== --- head/sys/i386/i386/pmap.c (revision 343566) +++ head/sys/i386/i386/pmap.c (revision 343567) @@ -1,6051 +1,6200 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * Copyright (c) 2018 The FreeBSD Foundation * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Portions of this software were developed by * Konstantin Belousov under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_apic.h" #include "opt_cpu.h" #include "opt_pmap.h" #include "opt_smp.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include #include #include #endif #include #include #include #include #include #include #include #ifdef SMP #include #endif +#include -#ifndef PMAP_SHPGPERPROC -#define PMAP_SHPGPERPROC 200 -#endif - #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) /* + * PTmap is recursive pagemap at top of virtual address space. + * Within PTmap, the page directory can be found (third indirection). + */ +#define PTmap ((pt_entry_t *)(PTDPTDI << PDRSHIFT)) +#define PTD ((pd_entry_t *)((PTDPTDI << PDRSHIFT) + (PTDPTDI * PAGE_SIZE))) +#define PTDpde ((pd_entry_t *)((PTDPTDI << PDRSHIFT) + (PTDPTDI * PAGE_SIZE) + \ + (PTDPTDI * PDESIZE))) + +/* + * Translate a virtual address to the kernel virtual address of its page table + * entry (PTE). This can be used recursively. If the address of a PTE as + * previously returned by this macro is itself given as the argument, then the + * address of the page directory entry (PDE) that maps the PTE will be + * returned. + * + * This macro may be used before pmap_bootstrap() is called. + */ +#define vtopte(va) (PTmap + i386_btop(va)) + +/* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) #define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ atomic_clear_int((u_int *)(pte), PG_W)) #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) -struct pmap kernel_pmap_store; +_Static_assert(sizeof(struct pmap) <= sizeof(struct pmap_KBI), + "pmap_KBI"); -vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ -vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ static int pgeflag = 0; /* PG_G or-in */ static int pseflag = 0; /* PG_PS or-in */ static int nkpt = NKPT; -vm_offset_t kernel_vm_end = /* 0 + */ NKPT * NBPDR; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pt_entry_t pg_nx; static uma_zone_t pdptzone; #endif -static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); +_Static_assert(VM_MAXUSER_ADDRESS == VADDR(TRPTDI, 0), "VM_MAXUSER_ADDRESS"); +_Static_assert(VM_MAX_KERNEL_ADDRESS <= VADDR(PTDPTDI, 0), + "VM_MAX_KERNEL_ADDRESS"); +_Static_assert(PMAP_MAP_LOW == VADDR(LOWPTDI, 0), "PMAP_MAP_LOW"); +_Static_assert(KERNLOAD == (KERNPTDI << PDRSHIFT), "KERNLOAD"); -static int pat_works = 1; -SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, - "Is page attribute table fully functional?"); +extern int pat_works; +extern int pg_ps_enabled; -static int pg_ps_enabled = 1; -SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &pg_ps_enabled, 0, "Are large page mappings enabled?"); +extern int elf32_nxstack; #define PAT_INDEX_SIZE 8 static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ /* * pmap_mapdev support pre initialization (i.e. console) */ #define PMAP_PREINIT_MAPPING_COUNT 8 static struct pmap_preinit_mapping { vm_paddr_t pa; vm_offset_t va; vm_size_t sz; int mode; } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; static int pmap_initialized; static struct rwlock_padalign pvh_global_lock; /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); -static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; +extern int pv_entry_max, pv_entry_count; +static int pv_entry_high_water = 0; static struct md_page *pv_table; -static int shpgperproc = PMAP_SHPGPERPROC; +extern int shpgperproc; -struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ -int pv_maxchunks; /* How many chunks we have KVA for */ -vm_offset_t pv_vafree; /* freelist stored in the PTE */ +static struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ +static int pv_maxchunks; /* How many chunks we have KVA for */ +static vm_offset_t pv_vafree; /* freelist stored in the PTE */ /* * All those kernel PT submaps that BSD is so fond of */ -pt_entry_t *CMAP3; +static pt_entry_t *CMAP3; static pd_entry_t *KPTD; -caddr_t ptvmmap = 0; -caddr_t CADDR3; +static caddr_t CADDR3; /* * Crashdump maps. */ static caddr_t crashdumpmap; static pt_entry_t *PMAP1 = NULL, *PMAP2, *PMAP3; static pt_entry_t *PADDR1 = NULL, *PADDR2, *PADDR3; #ifdef SMP static int PMAP1cpu, PMAP3cpu; -static int PMAP1changedcpu; -SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, - &PMAP1changedcpu, 0, - "Number of times pmap_pte_quick changed CPU with same PMAP1"); +extern int PMAP1changedcpu; #endif -static int PMAP1changed; -SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, - &PMAP1changed, 0, - "Number of times pmap_pte_quick changed PMAP1"); -static int PMAP1unchanged; -SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, - &PMAP1unchanged, 0, - "Number of times pmap_pte_quick didn't change PMAP1"); +extern int PMAP1changed; +extern int PMAP1unchanged; static struct mtx PMAP2mutex; -int pti; - /* * Internal flags for pmap_enter()'s helper functions. */ #define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ #define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags); #if VM_NRESERVLEVEL > 0 static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); #endif static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static int pmap_pvh_wired_mappings(struct md_page *pvh, int count); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static bool pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, vm_page_t m); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); -static void pmap_flush_page(vm_page_t m); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); -static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, - vm_offset_t eva); -static void pmap_invalidate_cache_range_all(vm_offset_t sva, - vm_offset_t eva); static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); static boolean_t pmap_is_referenced_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); #if VM_NRESERVLEVEL > 0 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); #endif static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); static void pmap_pte_attr(pt_entry_t *pte, int cache_bits); static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, struct spglist *free); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va, struct spglist *free); static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, struct spglist *free); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags); static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags); static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free); static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, int wait); #endif static void pmap_init_trm(void); +static void pmap_invalidate_all_int(pmap_t pmap); static __inline void pagezero(void *page); CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); -void pmap_cold(void); extern char _end[]; -u_long physfree; /* phys addr of next free page */ -u_long vm86phystk; /* PA of vm86/bios stack */ -u_long vm86paddr; /* address of vm86 region */ -int vm86pa; /* phys addr of vm86 region */ -u_long KERNend; /* phys addr end of kernel (just after bss) */ -pd_entry_t *IdlePTD; /* phys addr of kernel PTD */ -#if defined(PAE) || defined(PAE_TABLES) +extern u_long physfree; /* phys addr of next free page */ +extern u_long vm86phystk;/* PA of vm86/bios stack */ +extern u_long vm86paddr;/* address of vm86 region */ +extern int vm86pa; /* phys addr of vm86 region */ +extern u_long KERNend; /* phys addr end of kernel (just after bss) */ +#ifdef PMAP_PAE_COMP +pd_entry_t *IdlePTD_pae; /* phys addr of kernel PTD */ pdpt_entry_t *IdlePDPT; /* phys addr of kernel PDPT */ +pt_entry_t *KPTmap_pae; /* address of kernel page tables */ +#define IdlePTD IdlePTD_pae +#define KPTmap KPTmap_pae +#else +pd_entry_t *IdlePTD_nopae; +pt_entry_t *KPTmap_nopae; +#define IdlePTD IdlePTD_nopae +#define KPTmap KPTmap_nopae #endif -pt_entry_t *KPTmap; /* address of kernel page tables */ -u_long KPTphys; /* phys addr of kernel page tables */ +extern u_long KPTphys; /* phys addr of kernel page tables */ extern u_long tramp_idleptd; static u_long allocpages(u_int cnt, u_long *physfree) { u_long res; res = *physfree; *physfree += PAGE_SIZE * cnt; bzero((void *)res, PAGE_SIZE * cnt); return (res); } static void pmap_cold_map(u_long pa, u_long va, u_long cnt) { pt_entry_t *pt; for (pt = (pt_entry_t *)KPTphys + atop(va); cnt > 0; cnt--, pt++, va += PAGE_SIZE, pa += PAGE_SIZE) *pt = pa | PG_V | PG_RW | PG_A | PG_M; } static void pmap_cold_mapident(u_long pa, u_long cnt) { pmap_cold_map(pa, pa, cnt); } -_Static_assert(2 * NBPDR == KERNBASE, "Broken double-map of zero PTD"); +_Static_assert(LOWPTDI * 2 * NBPDR == KERNBASE, + "Broken double-map of zero PTD"); +static void +__CONCAT(PMTYPE, remap_lower)(bool enable) +{ + int i; + + for (i = 0; i < LOWPTDI; i++) + IdlePTD[i] = enable ? IdlePTD[LOWPTDI + i] : 0; + load_cr3(rcr3()); /* invalidate TLB */ +} + /* * Called from locore.s before paging is enabled. Sets up the first * kernel page table. Since kernel is mapped with PA == VA, this code * does not require relocations. */ void -pmap_cold(void) +__CONCAT(PMTYPE, cold)(void) { pt_entry_t *pt; u_long a; u_int cr3, ncr4; physfree = (u_long)&_end; if (bootinfo.bi_esymtab != 0) physfree = bootinfo.bi_esymtab; if (bootinfo.bi_kernend != 0) physfree = bootinfo.bi_kernend; physfree = roundup2(physfree, NBPDR); KERNend = physfree; /* Allocate Kernel Page Tables */ KPTphys = allocpages(NKPT, &physfree); KPTmap = (pt_entry_t *)KPTphys; /* Allocate Page Table Directory */ -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP /* XXX only need 32 bytes (easier for now) */ IdlePDPT = (pdpt_entry_t *)allocpages(1, &physfree); #endif IdlePTD = (pd_entry_t *)allocpages(NPGPTD, &physfree); /* * Allocate KSTACK. Leave a guard page between IdlePTD and * proc0kstack, to control stack overflow for thread0 and * prevent corruption of the page table. We leak the guard * physical memory due to 1:1 mappings. */ allocpages(1, &physfree); proc0kstack = allocpages(TD0_KSTACK_PAGES, &physfree); /* vm86/bios stack */ vm86phystk = allocpages(1, &physfree); /* pgtable + ext + IOPAGES */ vm86paddr = vm86pa = allocpages(3, &physfree); /* Install page tables into PTD. Page table page 1 is wasted. */ for (a = 0; a < NKPT; a++) IdlePTD[a] = (KPTphys + ptoa(a)) | PG_V | PG_RW | PG_A | PG_M; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP /* PAE install PTD pointers into PDPT */ for (a = 0; a < NPGPTD; a++) IdlePDPT[a] = ((u_int)IdlePTD + ptoa(a)) | PG_V; #endif /* * Install recursive mapping for kernel page tables into * itself. */ for (a = 0; a < NPGPTD; a++) IdlePTD[PTDPTDI + a] = ((u_int)IdlePTD + ptoa(a)) | PG_V | PG_RW; /* * Initialize page table pages mapping physical address zero * through the (physical) end of the kernel. Many of these * pages must be reserved, and we reserve them all and map * them linearly for convenience. We do this even if we've * enabled PSE above; we'll just switch the corresponding * kernel PDEs before we turn on paging. * * This and all other page table entries allow read and write * access for various reasons. Kernel mappings never have any * access restrictions. */ - pmap_cold_mapident(0, atop(NBPDR)); - pmap_cold_map(0, NBPDR, atop(NBPDR)); + pmap_cold_mapident(0, atop(NBPDR) * LOWPTDI); + pmap_cold_map(0, NBPDR * LOWPTDI, atop(NBPDR) * LOWPTDI); pmap_cold_mapident(KERNBASE, atop(KERNend - KERNBASE)); /* Map page table directory */ -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pmap_cold_mapident((u_long)IdlePDPT, 1); #endif pmap_cold_mapident((u_long)IdlePTD, NPGPTD); /* Map early KPTmap. It is really pmap_cold_mapident. */ pmap_cold_map(KPTphys, (u_long)KPTmap, NKPT); /* Map proc0kstack */ pmap_cold_mapident(proc0kstack, TD0_KSTACK_PAGES); /* ISA hole already mapped */ pmap_cold_mapident(vm86phystk, 1); pmap_cold_mapident(vm86pa, 3); /* Map page 0 into the vm86 page table */ *(pt_entry_t *)vm86pa = 0 | PG_RW | PG_U | PG_A | PG_M | PG_V; /* ...likewise for the ISA hole for vm86 */ for (pt = (pt_entry_t *)vm86pa + atop(ISA_HOLE_START), a = 0; a < atop(ISA_HOLE_LENGTH); a++, pt++) *pt = (ISA_HOLE_START + ptoa(a)) | PG_RW | PG_U | PG_A | PG_M | PG_V; /* Enable PSE, PGE, VME, and PAE if configured. */ ncr4 = 0; if ((cpu_feature & CPUID_PSE) != 0) { ncr4 |= CR4_PSE; pseflag = PG_PS; /* * Superpage mapping of the kernel text. Existing 4k * page table pages are wasted. */ for (a = KERNBASE; a < KERNend; a += NBPDR) IdlePTD[a >> PDRSHIFT] = a | PG_PS | PG_A | PG_M | PG_RW | PG_V; } if ((cpu_feature & CPUID_PGE) != 0) { ncr4 |= CR4_PGE; pgeflag = PG_G; } ncr4 |= (cpu_feature & CPUID_VME) != 0 ? CR4_VME : 0; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP ncr4 |= CR4_PAE; #endif if (ncr4 != 0) load_cr4(rcr4() | ncr4); /* Now enable paging */ -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP cr3 = (u_int)IdlePDPT; #else cr3 = (u_int)IdlePTD; #endif tramp_idleptd = cr3; load_cr3(cr3); load_cr0(rcr0() | CR0_PG); /* * Now running relocated at KERNBASE where the system is * linked to run. */ /* * Remove the lowest part of the double mapping of low memory * to get some null pointer checks. */ - IdlePTD[0] = 0; - load_cr3(cr3); /* invalidate TLB */ + __CONCAT(PMTYPE, remap_lower)(false); + + kernel_vm_end = /* 0 + */ NKPT * NBPDR; +#ifdef PMAP_PAE_COMP + i386_pmap_VM_NFREEORDER = VM_NFREEORDER_PAE; + i386_pmap_VM_LEVEL_0_ORDER = VM_LEVEL_0_ORDER_PAE; + i386_pmap_PDRSHIFT = PDRSHIFT_PAE; +#else + i386_pmap_VM_NFREEORDER = VM_NFREEORDER_NOPAE; + i386_pmap_VM_LEVEL_0_ORDER = VM_LEVEL_0_ORDER_NOPAE; + i386_pmap_PDRSHIFT = PDRSHIFT_NOPAE; +#endif } +static void +__CONCAT(PMTYPE, set_nx)(void) +{ + +#ifdef PMAP_PAE_COMP + if ((amd_feature & AMDID_NX) == 0) + return; + pg_nx = PG_NX; + elf32_nxstack = 1; + /* EFER.EFER_NXE is set in initializecpu(). */ +#endif +} + /* * Bootstrap the system enough to run with virtual memory. * * On the i386 this is called after pmap_cold() created initial * kernel page table and enabled paging, and just syncs the pmap * module with what has already been done. */ -void -pmap_bootstrap(vm_paddr_t firstaddr) +static void +__CONCAT(PMTYPE, bootstrap)(vm_paddr_t firstaddr) { vm_offset_t va; pt_entry_t *pte, *unused; struct pcpu *pc; u_long res; int i; res = atop(firstaddr - (vm_paddr_t)KERNLOAD); /* * Add a physical memory segment (vm_phys_seg) corresponding to the * preallocated kernel page table pages so that vm_page structures * representing these pages will be created. The vm_page structures * are required for promotion of the corresponding kernel virtual * addresses to superpage mappings. */ vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt)); /* * Initialize the first available kernel virtual address. * However, using "firstaddr" may waste a few pages of the * kernel virtual address space, because pmap_cold() may not * have mapped every physical page that it allocated. * Preferably, pmap_cold() would provide a first unused * virtual address in addition to "firstaddr". */ virtual_avail = (vm_offset_t)firstaddr; virtual_end = VM_MAX_KERNEL_ADDRESS; /* * Initialize the kernel pmap (which is statically allocated). * Count bootstrap data as being resident in case any of this data is * later unmapped (using pmap_remove()) and freed. */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pdir = IdlePTD; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP kernel_pmap->pm_pdpt = IdlePDPT; #endif CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ kernel_pmap->pm_stats.resident_count = res; TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * Initialize temporary map objects on the current CPU for use * during early boot. * CMAP1/CMAP2 are used for zeroing and copying pages. * CMAP3 is used for the boot-time memory test. */ pc = get_pcpu(); mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); SYSMAP(caddr_t, pc->pc_cmap_pte1, pc->pc_cmap_addr1, 1) SYSMAP(caddr_t, pc->pc_cmap_pte2, pc->pc_cmap_addr2, 1) SYSMAP(vm_offset_t, pte, pc->pc_qmap_addr, 1) SYSMAP(caddr_t, CMAP3, CADDR3, 1); /* * Crashdump maps. */ SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) /* * ptvmmap is used for reading arbitrary physical pages via /dev/mem. */ SYSMAP(caddr_t, unused, ptvmmap, 1) /* * msgbufp is used to map the system message buffer. */ SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize))) /* * KPTmap is used by pmap_kextract(). * * KPTmap is first initialized by pmap_cold(). However, that initial * KPTmap can only support NKPT page table pages. Here, a larger * KPTmap is created that can support KVA_PAGES page table pages. */ SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES) for (i = 0; i < NKPT; i++) KPTD[i] = (KPTphys + ptoa(i)) | PG_RW | PG_V; /* * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(), * respectively. */ SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1) SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1) SYSMAP(pt_entry_t *, PMAP3, PADDR3, 1) mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); virtual_avail = va; /* * Initialize the PAT MSR if present. * pmap_init_pat() clears and sets CR4_PGE, which, as a * side-effect, invalidates stale PG_G TLB entries that might * have been created in our pre-boot environment. We assume * that PAT support implies PGE and in reverse, PGE presence * comes with PAT. Both features were added for Pentium Pro. */ pmap_init_pat(); } static void pmap_init_reserved_pages(void) { struct pcpu *pc; vm_offset_t pages; int i; +#ifdef PMAP_PAE_COMP + if (!pae_mode) + return; +#else + if (pae_mode) + return; +#endif CPU_FOREACH(i) { pc = pcpu_find(i); mtx_init(&pc->pc_copyout_mlock, "cpmlk", NULL, MTX_DEF | MTX_NEW); pc->pc_copyout_maddr = kva_alloc(ptoa(2)); if (pc->pc_copyout_maddr == 0) panic("unable to allocate non-sleepable copyout KVA"); sx_init(&pc->pc_copyout_slock, "cpslk"); pc->pc_copyout_saddr = kva_alloc(ptoa(2)); if (pc->pc_copyout_saddr == 0) panic("unable to allocate sleepable copyout KVA"); pc->pc_pmap_eh_va = kva_alloc(ptoa(1)); if (pc->pc_pmap_eh_va == 0) panic("unable to allocate pmap_extract_and_hold KVA"); pc->pc_pmap_eh_ptep = (char *)vtopte(pc->pc_pmap_eh_va); /* * Skip if the mappings have already been initialized, * i.e. this is the BSP. */ if (pc->pc_cmap_addr1 != 0) continue; mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); pages = kva_alloc(PAGE_SIZE * 3); if (pages == 0) panic("unable to allocate CMAP KVA"); pc->pc_cmap_pte1 = vtopte(pages); pc->pc_cmap_pte2 = vtopte(pages + PAGE_SIZE); pc->pc_cmap_addr1 = (caddr_t)pages; pc->pc_cmap_addr2 = (caddr_t)(pages + PAGE_SIZE); pc->pc_qmap_addr = pages + ptoa(2); } } SYSINIT(rpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_reserved_pages, NULL); /* * Setup the PAT MSR. */ -void -pmap_init_pat(void) +static void +__CONCAT(PMTYPE, init_pat)(void) { int pat_table[PAT_INDEX_SIZE]; uint64_t pat_msr; u_long cr0, cr4; int i; /* Set default PAT index table. */ for (i = 0; i < PAT_INDEX_SIZE; i++) pat_table[i] = -1; pat_table[PAT_WRITE_BACK] = 0; pat_table[PAT_WRITE_THROUGH] = 1; pat_table[PAT_UNCACHEABLE] = 3; pat_table[PAT_WRITE_COMBINING] = 3; pat_table[PAT_WRITE_PROTECTED] = 3; pat_table[PAT_UNCACHED] = 3; /* * Bail if this CPU doesn't implement PAT. * We assume that PAT support implies PGE. */ if ((cpu_feature & CPUID_PAT) == 0) { for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; pat_works = 0; return; } /* * Due to some Intel errata, we can only safely use the lower 4 * PAT entries. * * Intel Pentium III Processor Specification Update * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B * or Mode C Paging) * * Intel Pentium IV Processor Specification Update * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) */ if (cpu_vendor_id == CPU_VENDOR_INTEL && !(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) pat_works = 0; /* Initialize default PAT entries. */ pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | PAT_VALUE(4, PAT_WRITE_BACK) | PAT_VALUE(5, PAT_WRITE_THROUGH) | PAT_VALUE(6, PAT_UNCACHED) | PAT_VALUE(7, PAT_UNCACHEABLE); if (pat_works) { /* * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. * Program 5 and 6 as WP and WC. * Leave 4 and 7 as WB and UC. */ pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6)); pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) | PAT_VALUE(6, PAT_WRITE_COMBINING); pat_table[PAT_UNCACHED] = 2; pat_table[PAT_WRITE_PROTECTED] = 5; pat_table[PAT_WRITE_COMBINING] = 6; } else { /* * Just replace PAT Index 2 with WC instead of UC-. */ pat_msr &= ~PAT_MASK(2); pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); pat_table[PAT_WRITE_COMBINING] = 2; } /* Disable PGE. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* Disable caches (CD = 1, NW = 0). */ cr0 = rcr0(); load_cr0((cr0 & ~CR0_NW) | CR0_CD); /* Flushes caches and TLBs. */ wbinvd(); invltlb(); /* Update PAT and index table. */ wrmsr(MSR_PAT, pat_msr); for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; /* Flush caches and TLBs again. */ wbinvd(); invltlb(); /* Restore caches and PGE. */ load_cr0(cr0); load_cr4(cr4); } -/* - * Initialize a vm_page's machine-dependent fields. - */ -void -pmap_page_init(vm_page_t m) -{ - - TAILQ_INIT(&m->md.pv_list); - m->md.pat_mode = PAT_WRITE_BACK; -} - -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP static void * pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ *flags = UMA_SLAB_KERNEL; return ((void *)kmem_alloc_contig_domainset(DOMAINSET_FIXED(domain), bytes, wait, 0x0ULL, 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); } #endif /* * Abuse the pte nodes for unmapped kva to thread a kva freelist through. * Requirements: * - Must deal with pages in order to ensure that none of the PG_* bits * are ever set, PG_V in particular. * - Assumes we can write to ptes without pte_store() atomic ops, even * on PAE systems. This should be ok. * - Assumes nothing will ever test these addresses for 0 to indicate * no mapping instead of correctly checking PG_V. * - Assumes a vm_offset_t will fit in a pte (true for i386). * Because PG_V is never set, there can be no mappings to invalidate. */ static vm_offset_t pmap_ptelist_alloc(vm_offset_t *head) { pt_entry_t *pte; vm_offset_t va; va = *head; if (va == 0) panic("pmap_ptelist_alloc: exhausted ptelist KVA"); pte = vtopte(va); *head = *pte; if (*head & PG_V) panic("pmap_ptelist_alloc: va with PG_V set!"); *pte = 0; return (va); } static void pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) { pt_entry_t *pte; if (va & PG_V) panic("pmap_ptelist_free: freeing va with PG_V set!"); pte = vtopte(va); *pte = *head; /* virtual! PG_V is 0 though */ *head = va; } static void pmap_ptelist_init(vm_offset_t *head, void *base, int npages) { int i; vm_offset_t va; *head = 0; for (i = npages - 1; i >= 0; i--) { va = (vm_offset_t)base + i * PAGE_SIZE; pmap_ptelist_free(head, va); } } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ -void -pmap_init(void) +static void +__CONCAT(PMTYPE, init)(void) { struct pmap_preinit_mapping *ppim; vm_page_t mpte; vm_size_t s; int i, pv_npg; /* * Initialize the vm page array entries for the kernel pmap's * page table pages. */ PMAP_LOCK(kernel_pmap); for (i = 0; i < NKPT; i++) { mpte = PHYS_TO_VM_PAGE(KPTphys + ptoa(i)); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_init: page table page is out of range")); mpte->pindex = i + KPTDI; mpte->phys_addr = KPTphys + ptoa(i); mpte->wire_count = 1; if (pseflag != 0 && KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend && pmap_insert_pt_page(kernel_pmap, mpte)) panic("pmap_init: pmap_insert_pt_page failed"); } PMAP_UNLOCK(kernel_pmap); vm_wire_add(NKPT); /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); /* * If the kernel is running on a virtual machine, then it must assume * that MCA is enabled by the hypervisor. Moreover, the kernel must * be prepared for the hypervisor changing the vendor and family that * are reported by CPUID. Consequently, the workaround for AMD Family * 10h Erratum 383 is enabled if the processor's feature set does not * include at least one feature that is only supported by older Intel * or newer AMD processors. */ if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 && (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI | CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP | AMDID2_FMA4)) == 0) workaround_erratum383 = 1; /* * Are large page mappings supported and enabled? */ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); if (pseflag == 0) pg_ps_enabled = 0; else if (pg_ps_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); pagesizes[1] = NBPDR; } /* * Calculate the size of the pv head table for superpages. * Handle the possibility that "vm_phys_segs[...].end" is zero. */ pv_npg = trunc_4mpage(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE) / NBPDR + 1; /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); if (pv_chunkbase == NULL) panic("pmap_init: not enough kvm for pv chunks"); pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, UMA_ZONE_VM | UMA_ZONE_NOFREE); uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); #endif pmap_initialized = 1; pmap_init_trm(); if (!bootverbose) return; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) continue; printf("PPIM %u: PA=%#jx, VA=%#x, size=%#x, mode=%#x\n", i, (uintmax_t)ppim->pa, ppim->va, ppim->sz, ppim->mode); } } +extern u_long pmap_pde_demotions; +extern u_long pmap_pde_mappings; +extern u_long pmap_pde_p_failures; +extern u_long pmap_pde_promotions; -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, - "Max number of PV entries"); -SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, - "Page share factor per proc"); - -static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, - "2/4MB page mapping counters"); - -static u_long pmap_pde_demotions; -SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, - &pmap_pde_demotions, 0, "2/4MB page demotions"); - -static u_long pmap_pde_mappings; -SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, - &pmap_pde_mappings, 0, "2/4MB page mappings"); - -static u_long pmap_pde_p_failures; -SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, - &pmap_pde_p_failures, 0, "2/4MB page promotion failures"); - -static u_long pmap_pde_promotions; -SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, - &pmap_pde_promotions, 0, "2/4MB page promotions"); - /*************************************************** * Low level helper routines..... ***************************************************/ -boolean_t -pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode) +static boolean_t +__CONCAT(PMTYPE, is_valid_memattr)(pmap_t pmap __unused, vm_memattr_t mode) { return (mode >= 0 && mode < PAT_INDEX_SIZE && pat_index[(int)mode] >= 0); } /* * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ -int -pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde) +static int +__CONCAT(PMTYPE, cache_bits)(pmap_t pmap, int mode, boolean_t is_pde) { int cache_bits, pat_flag, pat_idx; if (!pmap_is_valid_memattr(pmap, mode)) panic("Unknown caching mode %d\n", mode); /* The PAT bit is different for PTE's and PDE's. */ pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; /* Map the caching mode to a PAT index. */ pat_idx = pat_index[mode]; /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ cache_bits = 0; if (pat_idx & 0x4) cache_bits |= pat_flag; if (pat_idx & 0x2) cache_bits |= PG_NC_PCD; if (pat_idx & 0x1) cache_bits |= PG_NC_PWT; return (cache_bits); } -bool -pmap_ps_enabled(pmap_t pmap __unused) +static bool +__CONCAT(PMTYPE, ps_enabled)(pmap_t pmap __unused) { return (pg_ps_enabled); } /* * The caller is responsible for maintaining TLB consistency. */ static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde) { pd_entry_t *pde; pde = pmap_pde(kernel_pmap, va); pde_store(pde, newpde); } /* * After changing the page size for the specified virtual address in the page * table, flush the corresponding entries from the processor's TLB. Only the * calling processor's TLB is affected. * * The calling thread must be pinned to a processor. */ static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) { if ((newpde & PG_PS) == 0) /* Demotion: flush a specific 2MB page mapping. */ invlpg(va); else /* if ((newpde & PG_G) == 0) */ /* * Promotion: flush every 4KB page mapping from the TLB * because there are too many to flush individually. */ invltlb(); } -void -invltlb_glob(void) -{ - - invltlb(); -} - - #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. * * N.B.: Before calling any of the following TLB invalidation functions, * the calling processor must ensure that all stores updating a non- * kernel page table are globally performed. Otherwise, another * processor could cache an old, pre-update entry without being * invalidated. This can happen one of two ways: (1) The pmap becomes * active on another processor after its pm_active field is checked by * one of the following functions but before a store updating the page * table is globally performed. (2) The pmap becomes active on another * processor before its pm_active field is checked but due to * speculative loads one of the following functions stills reads the * pmap as inactive on the other processor. * * The kernel page table is exempt because its pm_active field is * immutable. The kernel page table is always active on every * processor. */ -void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +static void +pmap_invalidate_page_int(pmap_t pmap, vm_offset_t va) { cpuset_t *mask, other_cpus; u_int cpuid; sched_pin(); if (pmap == kernel_pmap) { invlpg(va); mask = &all_cpus; } else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) { mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } smp_masked_invlpg(*mask, va, pmap); sched_unpin(); } /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */ #define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE) -void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +static void +pmap_invalidate_range_int(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t *mask, other_cpus; vm_offset_t addr; u_int cpuid; if (eva - sva >= PMAP_INVLPG_THRESHOLD) { - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); return; } sched_pin(); if (pmap == kernel_pmap) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); mask = &all_cpus; } else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) { mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } smp_masked_invlpg_range(*mask, sva, eva, pmap); sched_unpin(); } -void -pmap_invalidate_all(pmap_t pmap) +static void +pmap_invalidate_all_int(pmap_t pmap) { cpuset_t *mask, other_cpus; u_int cpuid; sched_pin(); if (pmap == kernel_pmap) { invltlb(); mask = &all_cpus; } else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) { mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } smp_masked_invltlb(*mask, pmap); sched_unpin(); } -void -pmap_invalidate_cache(void) +static void +__CONCAT(PMTYPE, invalidate_cache)(void) { sched_pin(); wbinvd(); smp_cache_flush(); sched_unpin(); } struct pde_action { cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; u_int store; /* processor that updates the PDE */ }; static void pmap_update_pde_kernel(void *arg) { struct pde_action *act = arg; pd_entry_t *pde; if (act->store == PCPU_GET(cpuid)) { pde = pmap_pde(kernel_pmap, act->va); pde_store(pde, act->newpde); } } static void pmap_update_pde_user(void *arg) { struct pde_action *act = arg; if (act->store == PCPU_GET(cpuid)) pde_store(act->pde, act->newpde); } static void pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate)) pmap_update_pde_invalidate(act->va, act->newpde); } /* * Change the page size for the specified virtual address in a way that * prevents any possibility of the TLB ever having two entries that map the * same virtual address using different page sizes. This is the recommended * workaround for Erratum 383 on AMD Family 10h processors. It prevents a * machine check exception for a TLB state that is improperly diagnosed as a * hardware error. */ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; cpuset_t active, other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpuid; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; CPU_SET(cpuid, &active); smp_rendezvous_cpus(active, smp_no_rendezvous_barrier, pmap == kernel_pmap ? pmap_update_pde_kernel : pmap_update_pde_user, pmap_update_pde_teardown, &act); } else { if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (CPU_ISSET(cpuid, &active)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); } #else /* !SMP */ /* * Normal, non-SMP, 486+ invalidation functions. * We inline these within pmap.c for speed. */ -PMAP_INLINE void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +static void +pmap_invalidate_page_int(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap) invlpg(va); } -PMAP_INLINE void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +static void +pmap_invalidate_range_int(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (pmap == kernel_pmap) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } -PMAP_INLINE void -pmap_invalidate_all(pmap_t pmap) +static void +pmap_invalidate_all_int(pmap_t pmap) { if (pmap == kernel_pmap) invltlb(); } -PMAP_INLINE void -pmap_invalidate_cache(void) +static void +__CONCAT(PMTYPE, invalidate_cache)(void) { wbinvd(); } static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ static void -pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde) +__CONCAT(PMTYPE, invalidate_page)(pmap_t pmap, vm_offset_t va) { - /* - * When the PDE has PG_PROMOTED set, the 2- or 4MB page mapping was - * created by a promotion that did not invalidate the 512 or 1024 4KB - * page mappings that might exist in the TLB. Consequently, at this - * point, the TLB may hold both 4KB and 2- or 4MB page mappings for - * the address range [va, va + NBPDR). Therefore, the entire range - * must be invalidated here. In contrast, when PG_PROMOTED is clear, - * the TLB will not hold any 4KB page mappings for the address range - * [va, va + NBPDR), and so a single INVLPG suffices to invalidate the - * 2- or 4MB page mapping from the TLB. - */ - if ((pde & PG_PROMOTED) != 0) - pmap_invalidate_range(pmap, va, va + NBPDR - 1); - else - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); } -DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t, vm_offset_t), - static) +static void +__CONCAT(PMTYPE, invalidate_range)(pmap_t pmap, vm_offset_t sva, + vm_offset_t eva) { - if ((cpu_feature & CPUID_SS) != 0) - return (pmap_invalidate_cache_range_selfsnoop); - if ((cpu_feature & CPUID_CLFSH) != 0) - return (pmap_force_invalidate_cache_range); - return (pmap_invalidate_cache_range_all); + pmap_invalidate_range_int(pmap, sva, eva); } -#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) - static void -pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva) +__CONCAT(PMTYPE, invalidate_all)(pmap_t pmap) { - KASSERT((sva & PAGE_MASK) == 0, - ("pmap_invalidate_cache_range: sva not page-aligned")); - KASSERT((eva & PAGE_MASK) == 0, - ("pmap_invalidate_cache_range: eva not page-aligned")); + pmap_invalidate_all_int(pmap); } static void -pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva) +pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde) { - pmap_invalidate_cache_range_check_align(sva, eva); -} - -void -pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) -{ - - sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1); - if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) { - /* - * The supplied range is bigger than 2MB. - * Globally invalidate cache. - */ - pmap_invalidate_cache(); - return; - } - -#ifdef DEV_APIC /* - * XXX: Some CPUs fault, hang, or trash the local APIC - * registers if we use CLFLUSH on the local APIC - * range. The local APIC is always uncached, so we - * don't need to flush for that range anyway. + * When the PDE has PG_PROMOTED set, the 2- or 4MB page mapping was + * created by a promotion that did not invalidate the 512 or 1024 4KB + * page mappings that might exist in the TLB. Consequently, at this + * point, the TLB may hold both 4KB and 2- or 4MB page mappings for + * the address range [va, va + NBPDR). Therefore, the entire range + * must be invalidated here. In contrast, when PG_PROMOTED is clear, + * the TLB will not hold any 4KB page mappings for the address range + * [va, va + NBPDR), and so a single INVLPG suffices to invalidate the + * 2- or 4MB page mapping from the TLB. */ - if (pmap_kextract(sva) == lapic_paddr) - return; -#endif - - if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) { - /* - * Do per-cache line flush. Use the sfence - * instruction to insure that previous stores are - * included in the write-back. The processor - * propagates flush to other processors in the cache - * coherence domain. - */ - sfence(); - for (; sva < eva; sva += cpu_clflush_line_size) - clflushopt(sva); - sfence(); - } else { - /* - * Writes are ordered by CLFLUSH on Intel CPUs. - */ - if (cpu_vendor_id != CPU_VENDOR_INTEL) - mfence(); - for (; sva < eva; sva += cpu_clflush_line_size) - clflush(sva); - if (cpu_vendor_id != CPU_VENDOR_INTEL) - mfence(); - } + if ((pde & PG_PROMOTED) != 0) + pmap_invalidate_range_int(pmap, va, va + NBPDR - 1); + else + pmap_invalidate_page_int(pmap, va); } -static void -pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva) -{ - - pmap_invalidate_cache_range_check_align(sva, eva); - pmap_invalidate_cache(); -} - -void -pmap_invalidate_cache_pages(vm_page_t *pages, int count) -{ - int i; - - if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || - (cpu_feature & CPUID_CLFSH) == 0) { - pmap_invalidate_cache(); - } else { - for (i = 0; i < count; i++) - pmap_flush_page(pages[i]); - } -} - /* * Are we current address space or kernel? */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap); } /* * If the given pmap is not the current or kernel pmap, the returned pte must * be released by passing it to pmap_pte_release(). */ -pt_entry_t * -pmap_pte(pmap_t pmap, vm_offset_t va) +static pt_entry_t * +__CONCAT(PMTYPE, pte)(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); mtx_lock(&PMAP2mutex); newpf = *pde & PG_FRAME; if ((*PMAP2 & PG_FRAME) != newpf) { *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); + pmap_invalidate_page_int(kernel_pmap, + (vm_offset_t)PADDR2); } return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); } return (NULL); } /* * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte * being NULL. */ static __inline void pmap_pte_release(pt_entry_t *pte) { if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) mtx_unlock(&PMAP2mutex); } /* * NB: The sequence of updating a page table followed by accesses to the * corresponding pages is subject to the situation described in the "AMD64 * Architecture Programmer's Manual Volume 2: System Programming" rev. 3.23, * "7.3.1 Special Coherency Considerations". Therefore, issuing the INVLPG * right after modifying the PTE bits is crucial. */ static __inline void invlcaddr(void *caddr) { invlpg((u_int)caddr); } /* * Super fast pmap_pte routine best used when scanning * the pv lists. This eliminates many coarse-grained * invltlb calls. Note that many of the pv list * scans are across different pmaps. It is very wasteful * to do an entire invltlb for checking a single mapping. * * If the given pmap is not the current pmap, pvh_global_lock * must be held and curthread pinned to a CPU. */ static pt_entry_t * pmap_pte_quick(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); newpf = *pde & PG_FRAME; if ((*PMAP1 & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif invlcaddr(PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); invlcaddr(PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); } return (0); } static pt_entry_t * pmap_pte_quick3(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); newpf = *pde & PG_FRAME; if ((*PMAP3 & PG_FRAME) != newpf) { *PMAP3 = newpf | PG_RW | PG_V | PG_A | PG_M; #ifdef SMP PMAP3cpu = PCPU_GET(cpuid); #endif invlcaddr(PADDR3); PMAP1changed++; } else #ifdef SMP if (PMAP3cpu != PCPU_GET(cpuid)) { PMAP3cpu = PCPU_GET(cpuid); invlcaddr(PADDR3); PMAP1changedcpu++; } else #endif PMAP1unchanged++; return (PADDR3 + (i386_btop(va) & (NPTEPG - 1))); } return (0); } static pt_entry_t pmap_pte_ufast(pmap_t pmap, vm_offset_t va, pd_entry_t pde) { pt_entry_t *eh_ptep, pte, *ptep; PMAP_LOCK_ASSERT(pmap, MA_OWNED); pde &= PG_FRAME; critical_enter(); eh_ptep = (pt_entry_t *)PCPU_GET(pmap_eh_ptep); if ((*eh_ptep & PG_FRAME) != pde) { *eh_ptep = pde | PG_RW | PG_V | PG_A | PG_M; invlcaddr((void *)PCPU_GET(pmap_eh_va)); } ptep = (pt_entry_t *)PCPU_GET(pmap_eh_va) + (i386_btop(va) & (NPTEPG - 1)); pte = *ptep; critical_exit(); return (pte); } /* + * Extract from the kernel page table the physical address that is mapped by + * the given virtual address "va". + * + * This function may be used before pmap_bootstrap() is called. + */ +static vm_paddr_t +__CONCAT(PMTYPE, kextract)(vm_offset_t va) +{ + vm_paddr_t pa; + + if ((pa = pte_load(&PTD[va >> PDRSHIFT])) & PG_PS) { + pa = (pa & PG_PS_FRAME) | (va & PDRMASK); + } else { + /* + * Beware of a concurrent promotion that changes the PDE at + * this point! For example, vtopte() must not be used to + * access the PTE because it would use the new PDE. It is, + * however, safe to use the old PDE because the page table + * page is preserved by the promotion. + */ + pa = KPTmap[i386_btop(va)]; + pa = (pa & PG_FRAME) | (va & PAGE_MASK); + } + return (pa); +} + +/* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ -vm_paddr_t -pmap_extract(pmap_t pmap, vm_offset_t va) +static vm_paddr_t +__CONCAT(PMTYPE, extract)(pmap_t pmap, vm_offset_t va) { vm_paddr_t rtval; pt_entry_t pte; pd_entry_t pde; rtval = 0; PMAP_LOCK(pmap); pde = pmap->pm_pdir[va >> PDRSHIFT]; if (pde != 0) { if ((pde & PG_PS) != 0) rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); else { pte = pmap_pte_ufast(pmap, va, pde); rtval = (pte & PG_FRAME) | (va & PAGE_MASK); } } PMAP_UNLOCK(pmap); return (rtval); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ -vm_page_t -pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) +static vm_page_t +__CONCAT(PMTYPE, extract_and_hold)(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde; pt_entry_t pte; vm_page_t m; vm_paddr_t pa; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE(pa); } } else { pte = pmap_pte_ufast(pmap, va, pde); if (pte != 0 && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pa); } } if (m != NULL) vm_page_hold(m); } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. * * This function may be used before pmap_bootstrap() is called. */ -PMAP_INLINE void -pmap_kenter(vm_offset_t va, vm_paddr_t pa) +static void +__CONCAT(PMTYPE, kenter)(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V); } static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V | pmap_cache_bits(kernel_pmap, mode, 0)); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. * * This function may be used before pmap_bootstrap() is called. */ -PMAP_INLINE void -pmap_kremove(vm_offset_t va) +static void +__CONCAT(PMTYPE, kremove)(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); pte_clear(pte); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ -vm_offset_t -pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) +static vm_offset_t +__CONCAT(PMTYPE, map)(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, + int prot) { vm_offset_t va, sva; vm_paddr_t superpage_offset; pd_entry_t newpde; va = *virt; /* * Does the physical address range's size and alignment permit at * least one superpage mapping to be created? */ superpage_offset = start & PDRMASK; if ((end - start) - ((NBPDR - superpage_offset) & PDRMASK) >= NBPDR) { /* * Increase the starting virtual address so that its alignment * does not preclude the use of superpage mappings. */ if ((va & PDRMASK) < superpage_offset) va = (va & ~PDRMASK) + superpage_offset; else if ((va & PDRMASK) > superpage_offset) va = ((va + PDRMASK) & ~PDRMASK) + superpage_offset; } sva = va; while (start < end) { if ((start & PDRMASK) == 0 && end - start >= NBPDR && pseflag != 0) { KASSERT((va & PDRMASK) == 0, ("pmap_map: misaligned va %#x", va)); newpde = start | PG_PS | PG_RW | PG_V; pmap_kenter_pde(va, newpde); va += NBPDR; start += NBPDR; } else { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } } - pmap_invalidate_range(kernel_pmap, sva, va); + pmap_invalidate_range_int(kernel_pmap, sva, va); *virt = va; return (sva); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ -void -pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) +static void +__CONCAT(PMTYPE, qenter)(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *endpte, oldpte, pa, *pte; vm_page_t m; oldpte = 0; pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { m = *ma++; pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0); if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { oldpte |= *pte; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pte_store(pte, pa | pg_nx | PG_RW | PG_V); #else pte_store(pte, pa | PG_RW | PG_V); #endif } pte++; } if (__predict_false((oldpte & PG_V) != 0)) - pmap_invalidate_range(kernel_pmap, sva, sva + count * + pmap_invalidate_range_int(kernel_pmap, sva, sva + count * PAGE_SIZE); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ -void -pmap_qremove(vm_offset_t sva, int count) +static void +__CONCAT(PMTYPE, qremove)(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } - pmap_invalidate_range(kernel_pmap, sva, va); + pmap_invalidate_range_int(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. */ static __inline int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_insert(&pmap->pm_root, mpte)); } /* * Removes the page table page mapping the specified virtual address from the * specified pmap's collection of idle page table pages, and returns it. * Otherwise, returns NULL if there is no page table page corresponding to the * specified virtual address. */ static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_remove(&pmap->pm_root, va >> PDRSHIFT)); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_ptp(pmap, m, free); return (TRUE); } else return (FALSE); } static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free) { /* * unmap the page table page */ pmap->pm_pdir[m->pindex] = 0; --pmap->pm_stats.resident_count; /* * There is not need to invalidate the recursive mapping since * we never instantiate such mapping for the usermode pmaps, * and never remove page table pages from the kernel pmap. * Put page on a list so that it is released since all TLB * shootdown is done. */ MPASS(pmap != kernel_pmap); pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, struct spglist *free) { pd_entry_t ptepde; vm_page_t mpte; if (pmap == kernel_pmap) return (0); ptepde = *pmap_pde(pmap, va); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); return (pmap_unwire_ptp(pmap, mpte, free)); } /* * Initialize the pmap for the swapper process. */ -void -pmap_pinit0(pmap_t pmap) +static void +__CONCAT(PMTYPE, pinit0)(pmap_t pmap) { PMAP_LOCK_INIT(pmap); pmap->pm_pdir = IdlePTD; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pmap->pm_pdpt = IdlePDPT; #endif pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); pmap_activate_boot(pmap); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ -int -pmap_pinit(pmap_t pmap) +static int +__CONCAT(PMTYPE, pinit)(pmap_t pmap) { vm_page_t m; int i; /* * No need to allocate page table space yet but we do need a valid * page directory table. */ if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD); if (pmap->pm_pdir == NULL) return (0); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); KASSERT(((vm_offset_t)pmap->pm_pdpt & ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, ("pmap_pinit: pdpt misaligned")); KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), ("pmap_pinit: pdpt above 4g")); #endif pmap->pm_root.rt_root = 0; } KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_pinit: pmap has reserved page table page(s)")); /* * allocate the page directory page(s) */ - for (i = 0; i < NPGPTD;) { + for (i = 0; i < NPGPTD; i++) { m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | - VM_ALLOC_WIRED | VM_ALLOC_ZERO); - if (m == NULL) { - vm_wait(NULL); - } else { - pmap->pm_ptdpg[i] = m; -#if defined(PAE) || defined(PAE_TABLES) - pmap->pm_pdpt[i] = VM_PAGE_TO_PHYS(m) | PG_V; + VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); + pmap->pm_ptdpg[i] = m; +#ifdef PMAP_PAE_COMP + pmap->pm_pdpt[i] = VM_PAGE_TO_PHYS(m) | PG_V; #endif - i++; - } } pmap_qenter((vm_offset_t)pmap->pm_pdir, pmap->pm_ptdpg, NPGPTD); for (i = 0; i < NPGPTD; i++) if ((pmap->pm_ptdpg[i]->flags & PG_ZERO) == 0) pagezero(pmap->pm_pdir + (i * NPDEPG)); /* Install the trampoline mapping. */ pmap->pm_pdir[TRPTDI] = PTD[TRPTDI]; CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); return (1); } /* * this routine is called if the page table page is not * mapped correctly. */ static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags) { vm_paddr_t ptepa; vm_page_t m; /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ pmap->pm_stats.resident_count++; ptepa = VM_PAGE_TO_PHYS(m); pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); return (m); } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags) { u_int ptepindex; pd_entry_t ptepa; vm_page_t m; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; retry: /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * This supports switching from a 4MB page to a * normal 4K page. */ if (ptepa & PG_PS) { (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va); ptepa = pmap->pm_pdir[ptepindex]; } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (ptepa) { m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has * been deallocated. */ m = _pmap_allocpte(pmap, ptepindex, flags); if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ -void -pmap_release(pmap_t pmap) +static void +__CONCAT(PMTYPE, release)(pmap_t pmap) { vm_page_t m; int i; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); KASSERT(CPU_EMPTY(&pmap->pm_active), ("releasing active pmap %p", pmap)); pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); for (i = 0; i < NPGPTD; i++) { m = pmap->pm_ptdpg[i]; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), ("pmap_release: got wrong ptd page")); #endif vm_page_unwire_noq(m); vm_page_free(m); } } -static int -kvm_size(SYSCTL_HANDLER_ARGS) -{ - unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; - - return (sysctl_handle_long(oidp, &ksize, 0, req)); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_size, "IU", "Size of KVM"); - -static int -kvm_free(SYSCTL_HANDLER_ARGS) -{ - unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; - - return (sysctl_handle_long(oidp, &kfree, 0, req)); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_free, "IU", "Amount of KVM free"); - /* * grow the number of kernel page table entries, if needed */ -void -pmap_growkernel(vm_offset_t addr) +static void +__CONCAT(PMTYPE, growkernel)(vm_offset_t addr) { vm_paddr_t ptppaddr; vm_page_t nkpg; pd_entry_t newpdir; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, NBPDR); if (addr - 1 >= vm_map_max(kernel_map)) addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { kernel_vm_end = vm_map_max(kernel_map); break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); ptppaddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); pdir_pde(KPTD, kernel_vm_end) = newpdir; pmap_kenter_pde(kernel_vm_end, newpdir); kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { kernel_vm_end = vm_map_max(kernel_map); break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 11); CTASSERT(_NPCPV == 336); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ #define PC_FREE10 0x0000fffful /* Free values for index 10 */ static const uint32_t pc_freemask[_NPCM] = { PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE10 }; -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, - "Current number of pv entries"); - #ifdef PV_STATS -static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; - -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, - "Current number of pv entry chunks"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, - "Current number of pv entry chunks allocated"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, - "Current number of pv entry chunks frees"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, - "Number of times tried to get a chunk page but failed."); - -static long pv_entry_frees, pv_entry_allocs; -static int pv_entry_spare; - -SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, - "Current number of pv entry frees"); -SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, - "Current number of pv entry allocs"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, - "Current number of spare pv entries"); +extern int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; +extern long pv_entry_frees, pv_entry_allocs; +extern int pv_entry_spare; #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. */ static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap) { struct pch newtail; struct pv_chunk *pc; struct md_page *pvh; pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; pv_entry_t pv; vm_offset_t va; vm_page_t m, m_pc; struct spglist free; uint32_t inuse; int bit, field, freed; PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); pmap = NULL; m_pc = NULL; SLIST_INIT(&free); TAILQ_INIT(&newtail); while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || SLIST_EMPTY(&free))) { TAILQ_REMOVE(&pv_chunks, pc, pc_lru); if (pmap != pc->pc_pmap) { if (pmap != NULL) { - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { pmap = NULL; TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } } /* * Destroy every non-wired, 4 KB page mapping in the chunk. */ freed = 0; for (field = 0; field < _NPCM; field++) { for (inuse = ~pc->pc_map[field] & pc_freemask[field]; inuse != 0; inuse &= ~(1UL << bit)) { bit = bsfl(inuse); pv = &pc->pc_pventry[field * 32 + bit]; va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) continue; - pte = pmap_pte(pmap, va); + pte = __CONCAT(PMTYPE, pte)(pmap, va); tpte = *pte; if ((tpte & PG_W) == 0) tpte = pte_load_clear(pte); pmap_pte_release(pte); if ((tpte & PG_W) != 0) continue; KASSERT(tpte != 0, ("pmap_pv_reclaim: pmap %p va %x zero pte", pmap, va)); if ((tpte & PG_G) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if ((tpte & PG_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) { vm_page_aflag_clear(m, PGA_WRITEABLE); } } pc->pc_map[field] |= 1UL << bit; pmap_unuse_pt(pmap, va, &free); freed++; } } if (freed == 0) { TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } /* Every freed mapping is for a 4 KB page. */ pmap->pm_stats.resident_count -= freed; PV_STAT(pv_entry_frees += freed); PV_STAT(pv_entry_spare += freed); pv_entry_count -= freed; TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != pc_freemask[field]) { TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); /* * One freed pv entry in locked_pmap is * sufficient. */ if (pmap == locked_pmap) goto out; break; } if (field == _NPCM) { PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* Entire chunk is free; return it. */ m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); break; } } out: TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); if (pmap != NULL) { - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) { m_pc = SLIST_FIRST(&free); SLIST_REMOVE_HEAD(&free, plinks.s.ss); /* Recycle a freed page table page. */ m_pc->wire_count = 1; } vm_page_free_pages_toq(&free, true); return (m_pc); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 32; bit = idx % 32; pc->pc_map[field] |= 1ul << bit; for (idx = 0; idx < _NPCM; idx++) if (pc->pc_map[idx] != pc_freemask[idx]) { /* * 98% of the time, pc is already at the head of the * list. If it isn't already, move it to the head. */ if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != pc)) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, PQ_NONE); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } /* * get a new pv_entry, allocating a block from the system * when needed. */ static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) if (ratecheck(&lastprint, &printinterval)) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entries tunable.\n"); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = bsfl(pc->pc_map[field]); break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 32 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != 0) { PV_STAT(pv_entry_spare--); return (pv); /* not full, return */ } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare--); return (pv); } } /* * Access to the ptelist "pv_vafree" is synchronized by the pvh * global lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_ptelist_alloc() completes. */ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } m = pmap_pv_reclaim(pmap); if (m == NULL) goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); pmap_qenter((vm_offset_t)pc, &m, 1); pc->pc_pmap = pmap; pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); return (pv); } static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } return (pv); } static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 4mpage aligned")); /* * Transfer the 4mpage's pv entry for this mapping to the first * page's pv list. */ pvh = pa_to_pvh(pa); va = trunc_4mpage(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_pde: page %p is not managed", m)); va += PAGE_SIZE; pmap_insert_entry(pmap, va, m); } while (va < va_last); } #if VM_NRESERVLEVEL > 0 static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 4mpage aligned")); /* * Transfer the first page's pv entry for this mapping to the * 4mpage's pv list. Aside from avoiding the cost of a call * to get_pv_entry(), a transfer avoids the possibility that * get_pv_entry() calls pmap_collect() and that pmap_collect() * removes one of the mappings that is being promoted. */ m = PHYS_TO_VM_PAGE(pa); va = trunc_4mpage(va); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } #endif /* VM_NRESERVLEVEL > 0 */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } static void pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { struct md_page *pvh; rw_assert(&pvh_global_lock, RA_WLOCKED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } /* * Conditionally create a pv entry. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); } /* * Create the pv entries for each of the pages within a superpage. */ static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags) { struct md_page *pvh; pv_entry_t pv; bool noreclaim; rw_assert(&pvh_global_lock, RA_WLOCKED); noreclaim = (flags & PMAP_ENTER_NORECLAIM) != 0; if ((noreclaim && pv_entry_count >= pv_entry_high_water) || (pv = get_pv_entry(pmap, noreclaim)) == NULL) return (false); pv->pv_va = va; pvh = pa_to_pvh(pde & PG_PS_FRAME); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (true); } /* * Fills a page table page with mappings to consecutive physical pages. */ static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) { pt_entry_t *pte; for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { *pte = newpte; newpte += PAGE_SIZE; } } /* * Tries to demote a 2- or 4MB page mapping. If demotion fails, the * 2- or 4MB page mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; vm_paddr_t mptepa; vm_page_t mpte; struct spglist free; vm_offset_t sva; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == NULL) { KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); /* * Invalidate the 2- or 4MB page mapping and return * "failure" if the mapping was never accessed or the * allocation of the new page table page fails. */ if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); sva = trunc_4mpage(va); pmap_remove_pde(pmap, pde, sva, &free); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, sva, oldpde); vm_page_free_pages_toq(&free, true); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" " in pmap %p", va, pmap); return (FALSE); } if (pmap != kernel_pmap) pmap->pm_stats.resident_count++; } mptepa = VM_PAGE_TO_PHYS(mpte); /* * If the page mapping is in the kernel's address space, then the * KPTmap can provide access to the page table page. Otherwise, * temporarily map the page table page (mpte) into the kernel's * address space at either PADDR1 or PADDR2. */ if (pmap == kernel_pmap) firstpte = &KPTmap[i386_btop(trunc_4mpage(va))]; else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) { if ((*PMAP1 & PG_FRAME) != mptepa) { *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M; #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif invlcaddr(PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); invlcaddr(PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; firstpte = PADDR1; } else { mtx_lock(&PMAP2mutex); if ((*PMAP2 & PG_FRAME) != mptepa) { *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); + pmap_invalidate_page_int(kernel_pmap, + (vm_offset_t)PADDR2); } firstpte = PADDR2; } newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; KASSERT((oldpde & PG_A) != 0, ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); newpte = oldpde & ~PG_PS; if ((newpte & PG_PDE_PAT) != 0) newpte ^= PG_PDE_PAT | PG_PTE_PAT; /* * If the page table page is new, initialize it. */ if (mpte->wire_count == 1) { mpte->wire_count = NPTEPG; pmap_fill_ptp(firstpte, newpte); } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); /* * If the mapping has changed attributes, update the page table * entries. */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) pmap_fill_ptp(firstpte, newpte); /* * Demote the mapping. This pmap is locked. The old PDE has * PG_A set. If the old PDE has PG_RW set, it also has PG_M * set. Thus, there is no danger of a race with another * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (firstpte == PADDR2) mtx_unlock(&PMAP2mutex); /* * Invalidate the recursive mapping of the page table page. */ - pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); + pmap_invalidate_page_int(pmap, (vm_offset_t)vtopte(va)); /* * Demote the pv entry. This depends on the earlier demotion * of the mapping. Specifically, the (re)creation of a per- * page pv entry might trigger the execution of pmap_collect(), * which might reclaim a newly (re)created per-page pv entry * and destroy the associated mapping. In order to destroy * the mapping, the PDE must have already changed from mapping * the 2mpage to referencing the page table page. */ if ((oldpde & PG_MANAGED) != 0) pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME); pmap_pde_demotions++; CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x" " in pmap %p", va, pmap); return (TRUE); } /* * Removes a 2- or 4MB page mapping from the kernel pmap. */ static void pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; vm_paddr_t mptepa; vm_page_t mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mpte = pmap_remove_pt_page(pmap, va); if (mpte == NULL) panic("pmap_remove_kernel_pde: Missing pt page."); mptepa = VM_PAGE_TO_PHYS(mpte); newpde = mptepa | PG_M | PG_A | PG_RW | PG_V; /* * Initialize the page table page. */ pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]); /* * Remove the mapping. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else pmap_kenter_pde(va, newpde); /* * Invalidate the recursive mapping of the page table page. */ - pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); + pmap_invalidate_page_int(pmap, (vm_offset_t)vtopte(va)); } /* * pmap_remove_pde: do the things to unmap a superpage in a process */ static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free) { struct md_page *pvh; pd_entry_t oldpde; vm_offset_t eva, va; vm_page_t m, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_remove_pde: sva is not 4mpage aligned")); oldpde = pte_load_clear(pdq); if (oldpde & PG_W) pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; /* * Machines that don't support invlpg, also don't support * PG_G. */ if ((oldpde & PG_G) != 0) pmap_invalidate_pde_page(kernel_pmap, sva, oldpde); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; if (oldpde & PG_MANAGED) { pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) { if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpde & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } if (pmap == kernel_pmap) { pmap_remove_kernel_pde(pmap, pdq, sva); } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, free, FALSE); } } } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, struct spglist *free) { pt_entry_t oldpte; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpte = pte_load_clear(ptq); KASSERT(oldpte != 0, ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va)); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpte & PG_G) - pmap_invalidate_page(kernel_pmap, va); + pmap_invalidate_page_int(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); pmap_remove_entry(pmap, m, va); } return (pmap_unuse_pt(pmap, va, free)); } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free) { pt_entry_t *pte; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) return; pmap_remove_pte(pmap, pte, va, free); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); } /* * Removes the specified range of addresses from the page table page. */ static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, struct spglist *free) { pt_entry_t *pte; bool anyvalid; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); PMAP_LOCK_ASSERT(pmap, MA_OWNED); anyvalid = false; for (pte = pmap_pte_quick(pmap, sva); sva != eva; pte++, sva += PAGE_SIZE) { if (*pte == 0) continue; /* * The TLB entry for a PG_G mapping is invalidated by * pmap_remove_pte(). */ if ((*pte & PG_G) == 0) anyvalid = true; if (pmap_remove_pte(pmap, pte, sva, free)) break; } return (anyvalid); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ -void -pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +static void +__CONCAT(PMTYPE, remove)(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t ptpaddr; struct spglist free; int anyvalid; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); PMAP_LOCK(pmap); /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if ((sva + PAGE_SIZE == eva) && ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { pmap_remove_page(pmap, sva, &free); goto out; } for (; sva < eva; sva = pdnxt) { u_int pdirindex; /* * Calculate index for next page table. */ pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; if (pmap->pm_stats.resident_count == 0) break; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we removing the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_remove_pde(). */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; pmap_remove_pde(pmap, &pmap->pm_pdir[pdirindex], sva, &free); continue; } else if (!pmap_demote_pde(pmap, &pmap->pm_pdir[pdirindex], sva)) { /* The large page mapping was destroyed. */ continue; } } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (pdnxt > eva) pdnxt = eva; if (pmap_remove_ptes(pmap, sva, pdnxt, &free)) anyvalid = 1; } out: sched_unpin(); if (anyvalid) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, true); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ -void -pmap_remove_all(vm_page_t m) +static void +__CONCAT(PMTYPE, remove_all)(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; pmap_t pmap; pt_entry_t *pte, tpte; pd_entry_t *pde; vm_offset_t va; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } small_mappings: while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap->pm_stats.resident_count--; pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); tpte = pte_load_clear(pte); KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte", pmap, pv->pv_va)); if (tpte & PG_W) pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, &free); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); vm_page_free_pages_toq(&free, true); } /* * pmap_protect_pde: do the things to protect a 4mpage in a process */ static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) { pd_entry_t newpde, oldpde; vm_offset_t eva, va; vm_page_t m; boolean_t anychanged; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_protect_pde: sva is not 4mpage aligned")); anychanged = FALSE; retry: oldpde = newpde = *pde; if ((oldpde & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) vm_page_dirty(m); } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif if (newpde != oldpde) { /* * As an optimization to future operations on this PDE, clear * PG_PROMOTED. The impending invalidation will remove any * lingering 4KB page mappings from the TLB. */ if (!pde_cmpset(pde, oldpde, newpde & ~PG_PROMOTED)) goto retry; if ((oldpde & PG_G) != 0) pmap_invalidate_pde_page(kernel_pmap, sva, oldpde); else anychanged = TRUE; } return (anychanged); } /* * Set the physical protection on the * specified range of this map as requested. */ -void -pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +static void +__CONCAT(PMTYPE, protect)(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + vm_prot_t prot) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; boolean_t anychanged, pv_lists_locked; KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); if (prot == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } -#if defined(PAE) || defined(PAE_TABLES) - if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == - (VM_PROT_WRITE|VM_PROT_EXECUTE)) +#ifdef PMAP_PAE_COMP + if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == + (VM_PROT_WRITE | VM_PROT_EXECUTE)) return; #else if (prot & VM_PROT_WRITE) return; #endif if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } anychanged = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pt_entry_t obits, pbits; u_int pdirindex; pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we protecting the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_protect_pde(). */ if (pmap_protect_pde(pmap, &pmap->pm_pdir[pdirindex], sva, prot)) anychanged = TRUE; continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { if (anychanged) - pmap_invalidate_all( + pmap_invalidate_all_int( pmap); PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pde(pmap, &pmap->pm_pdir[pdirindex], sva)) { /* * The large page mapping was * destroyed. */ continue; } } } if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { vm_page_t m; retry: /* * Regardless of whether a pte is 32 or 64 bits in * size, PG_RW, PG_A, and PG_M are among the least * significant 32 bits. */ obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0) { if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_dirty(m); } pbits &= ~(PG_RW | PG_M); } -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; #endif if (pbits != obits) { -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if (!atomic_cmpset_64(pte, obits, pbits)) goto retry; #else if (!atomic_cmpset_int((u_int *)pte, obits, pbits)) goto retry; #endif if (obits & PG_G) - pmap_invalidate_page(pmap, sva); + pmap_invalidate_page_int(pmap, sva); else anychanged = TRUE; } } } if (anychanged) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } #if VM_NRESERVLEVEL > 0 /* * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are * within a single page table page (PTP) to a single 2- or 4MB page mapping. * For promotion to occur, two conditions must be met: (1) the 4KB page * mappings must map aligned, contiguous physical memory and (2) the 4KB page * mappings must have identical characteristics. * * Managed (PG_MANAGED) mappings within the kernel address space are not * promoted. The reason is that kernel PDEs are replicated in each pmap but * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel * pmap. */ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; vm_offset_t oldpteva; vm_page_t mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Examine the first PTE in the specified PTP. Abort if this PTE is * either invalid, unused, or does not map the first 4KB physical page * within a 2- or 4MB page. */ firstpte = pmap_pte_quick(pmap, trunc_4mpage(va)); setpde: newpde = *firstpte; if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared without * a TLB invalidation. */ if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde & ~PG_RW)) goto setpde; newpde &= ~PG_RW; } /* * Examine each of the other PTEs in the specified PTP. Abort if this * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { setpte: oldpte = *pte; if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((oldpte & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared * without a TLB invalidation. */ if (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~PG_RW)) goto setpte; oldpte &= ~PG_RW; oldpteva = (oldpte & PG_FRAME & PDRMASK) | (va & ~PDRMASK); CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x" " in pmap %p", oldpteva, pmap); } if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } pa -= PAGE_SIZE; } /* * Save the page table page in its current state until the PDE * mapping the superpage is demoted by pmap_demote_pde() or * destroyed by pmap_remove_pde(). */ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == va >> PDRSHIFT, ("pmap_promote_pde: page table page's pindex is wrong")); if (pmap_insert_pt_page(pmap, mpte)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x in pmap %p", va, pmap); return; } /* * Promote the pv entries. */ if ((newpde & PG_MANAGED) != 0) pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME); /* * Propagate the PAT index to its proper position. */ if ((newpde & PG_PTE_PAT) != 0) newpde ^= PG_PDE_PAT | PG_PTE_PAT; /* * Map the superpage. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, PG_PS | newpde); else if (pmap == kernel_pmap) pmap_kenter_pde(va, PG_PROMOTED | PG_PS | newpde); else pde_store(pde, PG_PROMOTED | PG_PS | newpde); pmap_pde_promotions++; CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x" " in pmap %p", va, pmap); } #endif /* VM_NRESERVLEVEL > 0 */ /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ -int -pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, - u_int flags, int8_t psind) +static int +__CONCAT(PMTYPE, enter)(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, u_int flags, int8_t psind) { pd_entry_t *pde; pt_entry_t *pte; pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte, om; int rv; va = trunc_page(va); KASSERT((pmap == kernel_pmap && va < VM_MAX_KERNEL_ADDRESS) || (pmap != kernel_pmap && va < VM_MAXUSER_ADDRESS), ("pmap_enter: toobig k%d %#x", pmap == kernel_pmap, va)); KASSERT(va < PMAP_TRM_MIN_ADDRESS, ("pmap_enter: invalid to pmap_enter into trampoline (va: 0x%x)", va)); KASSERT(pmap != kernel_pmap || (m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); KASSERT((flags & PMAP_ENTER_RESERVED) == 0, ("pmap_enter: flags %u has reserved bits set", flags)); pa = VM_PAGE_TO_PHYS(m); newpte = (pt_entry_t)(pa | PG_A | PG_V); if ((flags & VM_PROT_WRITE) != 0) newpte |= PG_M; if ((prot & VM_PROT_WRITE) != 0) newpte |= PG_RW; KASSERT((newpte & (PG_M | PG_RW)) != PG_M, ("pmap_enter: flags includes VM_PROT_WRITE but prot doesn't")); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; #endif if ((flags & PMAP_ENTER_WIRED) != 0) newpte |= PG_W; if (pmap != kernel_pmap) newpte |= PG_U; newpte |= pmap_cache_bits(pmap, m->md.pat_mode, psind > 0); if ((m->oflags & VPO_UNMANAGED) == 0) newpte |= PG_MANAGED; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); if (psind == 1) { /* Assert the required virtual and physical alignment. */ KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned")); KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m); goto out; } pde = pmap_pde(pmap, va); if (pmap != kernel_pmap) { /* * va is for UVA. * In the case that a page table page is not resident, * we are creating it here. pmap_allocpte() handles * demotion. */ mpte = pmap_allocpte(pmap, va, flags); if (mpte == NULL) { KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, ("pmap_allocpte failed with sleep allowed")); rv = KERN_RESOURCE_SHORTAGE; goto out; } } else { /* * va is for KVA, so pmap_demote_pde() will never fail * to install a page table page. PG_V is also * asserted by pmap_demote_pde(). */ mpte = NULL; KASSERT(pde != NULL && (*pde & PG_V) != 0, ("KVA %#x invalid pde pdir %#jx", va, (uintmax_t)pmap->pm_pdir[PTDPTDI])); if ((*pde & PG_PS) != 0) pmap_demote_pde(pmap, pde, va); } pte = pmap_pte_quick(pmap, va); /* * Page Directory table entry is not valid, which should not * happen. We should have either allocated the page table * page or demoted the existing mapping above. */ if (pte == NULL) { panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", (uintmax_t)pmap->pm_pdir[PTDPTDI], va); } origpte = *pte; pv = NULL; /* * Is the specified virtual address already mapped? */ if ((origpte & PG_V) != 0) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0) pmap->pm_stats.wired_count++; else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0) pmap->pm_stats.wired_count--; /* * Remove the extra PT page reference. */ if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%x", va)); } /* * Has the physical page changed? */ opa = origpte & PG_FRAME; if (opa == pa) { /* * No, might be a protection or wiring change. */ if ((origpte & PG_MANAGED) != 0 && (newpte & PG_RW) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); if (((origpte ^ newpte) & ~(PG_M | PG_A)) == 0) goto unchanged; goto validate; } /* * The physical page has changed. Temporarily invalidate * the mapping. This ensures that all threads sharing the * pmap keep a consistent view of the mapping, which is * necessary for the correct handling of COW faults. It * also permits reuse of the old mapping's PV entry, * avoiding an allocation. * * For consistency, handle unmanaged mappings the same way. */ origpte = pte_load_clear(pte); KASSERT((origpte & PG_FRAME) == opa, ("pmap_enter: unexpected pa update for %#x", va)); if ((origpte & PG_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); /* * The pmap lock is sufficient to synchronize with * concurrent calls to pmap_page_test_mappings() and * pmap_ts_referenced(). */ if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(om); if ((origpte & PG_A) != 0) vm_page_aflag_set(om, PGA_REFERENCED); pv = pmap_pvh_remove(&om->md, pmap, va); KASSERT(pv != NULL, ("pmap_enter: no PV entry for %#x", va)); if ((newpte & PG_MANAGED) == 0) free_pv_entry(pmap, pv); if ((om->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) vm_page_aflag_clear(om, PGA_WRITEABLE); } if ((origpte & PG_A) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); origpte = 0; } else { /* * Increment the counters. */ if ((newpte & PG_W) != 0) pmap->pm_stats.wired_count++; pmap->pm_stats.resident_count++; } /* * Enter on the PV list if part of our managed memory. */ if ((newpte & PG_MANAGED) != 0) { if (pv == NULL) { pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; } TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); if ((newpte & PG_RW) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } /* * Update the PTE. */ if ((origpte & PG_V) != 0) { validate: origpte = pte_load_store(pte, newpte); KASSERT((origpte & PG_FRAME) == pa, ("pmap_enter: unexpected pa update for %#x", va)); if ((newpte & PG_M) == 0 && (origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) vm_page_dirty(m); /* * Although the PTE may still have PG_RW set, TLB * invalidation may nonetheless be required because * the PTE no longer has PG_M set. */ } -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP else if ((origpte & PG_NX) != 0 || (newpte & PG_NX) == 0) { /* * This PTE change does not require TLB invalidation. */ goto unchanged; } #endif if ((origpte & PG_A) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); } else pte_store(pte, newpte); unchanged: #if VM_NRESERVLEVEL > 0 /* * If both the page table page and the reservation are fully * populated, then attempt promotion. */ if ((mpte == NULL || mpte->wire_count == NPTEPG) && pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va); #endif rv = KERN_SUCCESS; out: sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (rv); } /* * Tries to create a read- and/or execute-only 2 or 4 MB page mapping. Returns * true if successful. Returns false if (1) a mapping already exists at the * specified virtual address or (2) a PV entry cannot be allocated without * reclaiming another PV entry. */ static bool pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { pd_entry_t newpde; PMAP_LOCK_ASSERT(pmap, MA_OWNED); newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) | PG_PS | PG_V; if ((m->oflags & VPO_UNMANAGED) == 0) newpde |= PG_MANAGED; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif if (pmap != kernel_pmap) newpde |= PG_U; return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP | PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL) == KERN_SUCCESS); } /* * Tries to create the specified 2 or 4 MB page mapping. Returns KERN_SUCCESS * if the mapping was created, and either KERN_FAILURE or * KERN_RESOURCE_SHORTAGE otherwise. Returns KERN_FAILURE if * PMAP_ENTER_NOREPLACE was specified and a mapping already exists at the * specified virtual address. Returns KERN_RESOURCE_SHORTAGE if * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. * * The parameter "m" is only used when creating a managed, writeable mapping. */ static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, vm_page_t m) { struct spglist free; pd_entry_t oldpde, *pde; vm_page_t mt; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((newpde & (PG_M | PG_RW)) != PG_RW, ("pmap_enter_pde: newpde is missing PG_M")); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_V) != 0) { if ((flags & PMAP_ENTER_NOREPLACE) != 0) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (KERN_FAILURE); } /* Break the existing mapping(s). */ SLIST_INIT(&free); if ((oldpde & PG_PS) != 0) { /* * If the PDE resulted from a promotion, then a * reserved PT page could be freed. */ (void)pmap_remove_pde(pmap, pde, va, &free); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, va, oldpde); } else { if (pmap_remove_ptes(pmap, va, va + NBPDR, &free)) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); } vm_page_free_pages_toq(&free, true); if (pmap == kernel_pmap) { mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); if (pmap_insert_pt_page(pmap, mt)) { /* * XXX Currently, this can't happen because * we do not perform pmap_enter(psind == 1) * on the kernel pmap. */ panic("pmap_enter_pde: trie insert failed"); } } else KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p", pde)); } if ((newpde & PG_MANAGED) != 0) { /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pde(pmap, va, newpde, flags)) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); } if ((newpde & PG_RW) != 0) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) vm_page_aflag_set(mt, PGA_WRITEABLE); } } /* * Increment counters. */ if ((newpde & PG_W) != 0) pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE; pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; /* * Map the superpage. (This is not a promoted mapping; there will not * be any lingering 4KB page mappings in the TLB.) */ pde_store(pde, newpde); pmap_pde_mappings++; CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" " in pmap %p", va, pmap); return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ -void -pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, +static void +__CONCAT(PMTYPE, enter_object)(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pg_ps_enabled && pmap_enter_4mpage(pmap, va, m, prot)) m = &m[NBPDR / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte); m = TAILQ_NEXT(m, listq); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ -void -pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +static void +__CONCAT(PMTYPE, enter_quick)(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte) { pt_entry_t newpte, *pte; struct spglist free; KASSERT(pmap != kernel_pmap || va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a page table page is not * resident, we are creating it here. */ if (pmap != kernel_pmap) { u_int ptepindex; pd_entry_t ptepa; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; if (mpte && (mpte->pindex == ptepindex)) { mpte->wire_count++; } else { /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * If the page table page is mapped, we just increment * the hold count, and activate it. */ if (ptepa) { if (ptepa & PG_PS) return (NULL); mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); mpte->wire_count++; } else { mpte = _pmap_allocpte(pmap, ptepindex, PMAP_ENTER_NOSLEEP); if (mpte == NULL) return (mpte); } } } else { mpte = NULL; } sched_pin(); pte = pmap_pte_quick(pmap, va); if (*pte) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } sched_unpin(); return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, mpte, &free)) { - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); vm_page_free_pages_toq(&free, true); } mpte = NULL; } sched_unpin(); return (mpte); } /* * Increment counters */ pmap->pm_stats.resident_count++; newpte = VM_PAGE_TO_PHYS(m) | PG_V | pmap_cache_bits(pmap, m->md.pat_mode, 0); if ((m->oflags & VPO_UNMANAGED) == 0) newpte |= PG_MANAGED; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; #endif if (pmap != kernel_pmap) newpte |= PG_U; pte_store(pte, newpte); sched_unpin(); return (mpte); } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ -void * -pmap_kenter_temporary(vm_paddr_t pa, int i) +static void * +__CONCAT(PMTYPE, kenter_temporary)(vm_paddr_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); invlpg(va); return ((void *)crashdumpmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ -void -pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, - vm_pindex_t pindex, vm_size_t size) +static void +__CONCAT(PMTYPE, object_init_pt)(pmap_t pmap, vm_offset_t addr, + vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; vm_paddr_t pa, ptepa; vm_page_t p; int pat_mode; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if (pg_ps_enabled && (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 2/4MB page boundary. */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and * "size" is a multiple of 2/4M, adding the PAT setting to * "pa" will not affect the termination of this loop. */ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pmap, pat_mode, 1); pa < ptepa + size; pa += NBPDR) { pde = pmap_pde(pmap, addr); if (*pde == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; pmap_pde_mappings++; } /* Else continue on if the PDE is already valid. */ addr += NBPDR; } PMAP_UNLOCK(pmap); } } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ -void -pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +static void +__CONCAT(PMTYPE, unwire)(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t *pde; pt_entry_t *pte; boolean_t pv_lists_locked; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pde = pmap_pde(pmap, sva); if ((*pde & PG_V) == 0) continue; if ((*pde & PG_PS) != 0) { if ((*pde & PG_W) == 0) panic("pmap_unwire: pde %#jx is missing PG_W", (uintmax_t)*pde); /* * Are we unwiring the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * Regardless of whether a pde (or pte) is 32 * or 64 bits in size, PG_W is among the least * significant 32 bits. */ atomic_clear_int((u_int *)pde, PG_W); pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); /* Repeat sva. */ goto resume; } sched_pin(); } if (!pmap_demote_pde(pmap, pde, sva)) panic("pmap_unwire: demotion failed"); } } if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if ((*pte & PG_V) == 0) continue; if ((*pte & PG_W) == 0) panic("pmap_unwire: pte %#jx is missing PG_W", (uintmax_t)*pte); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. * * PG_W is among the least significant 32 bits. */ atomic_clear_int((u_int *)pte, PG_W); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. Since * current pmap is always the kernel pmap when executing in * kernel, and we do not copy from the kernel pmap to a user * pmap, this optimization is not usable in 4/4G full split i386 * world. */ -void -pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, - vm_offset_t src_addr) +static void +__CONCAT(PMTYPE, copy)(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, + vm_size_t len, vm_offset_t src_addr) { struct spglist free; pt_entry_t *src_pte, *dst_pte, ptetemp; pd_entry_t srcptepaddr; vm_page_t dstmpte, srcmpte; vm_offset_t addr, end_addr, pdnxt; u_int ptepindex; if (dst_addr != src_addr) return; end_addr = src_addr + len; rw_wlock(&pvh_global_lock); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } sched_pin(); for (addr = src_addr; addr < end_addr; addr = pdnxt) { KASSERT(addr < PMAP_TRM_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy the trampoline")); pdnxt = (addr + NBPDR) & ~PDRMASK; if (pdnxt < addr) pdnxt = end_addr; ptepindex = addr >> PDRSHIFT; srcptepaddr = src_pmap->pm_pdir[ptepindex]; if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr) continue; if (dst_pmap->pm_pdir[ptepindex] == 0 && ((srcptepaddr & PG_MANAGED) == 0 || pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr, PMAP_ENTER_NORECLAIM))) { dst_pmap->pm_pdir[ptepindex] = srcptepaddr & ~PG_W; dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; pmap_pde_mappings++; } continue; } srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); KASSERT(srcmpte->wire_count > 0, ("pmap_copy: source page table page is unused")); if (pdnxt > end_addr) pdnxt = end_addr; src_pte = pmap_pte_quick3(src_pmap, addr); while (addr < pdnxt) { ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { dstmpte = pmap_allocpte(dst_pmap, addr, PMAP_ENTER_NOSLEEP); if (dstmpte == NULL) goto out; dst_pte = pmap_pte_quick(dst_pmap, addr); if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); dst_pmap->pm_stats.resident_count++; } else { SLIST_INIT(&free); if (pmap_unwire_ptp(dst_pmap, dstmpte, &free)) { - pmap_invalidate_page(dst_pmap, - addr); + pmap_invalidate_page_int( + dst_pmap, addr); vm_page_free_pages_toq(&free, true); } goto out; } if (dstmpte->wire_count >= srcmpte->wire_count) break; } addr += PAGE_SIZE; src_pte++; } } out: sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } /* * Zero 1 page of virtual memory mapped from a hardware page by the caller. */ static __inline void pagezero(void *page) { #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) { if (cpu_feature & CPUID_SSE2) sse2_pagezero(page); else i686_pagezero(page); } else #endif bzero(page, PAGE_SIZE); } /* * Zero the specified hardware page. */ -void -pmap_zero_page(vm_page_t m) +static void +__CONCAT(PMTYPE, zero_page)(vm_page_t m) { pt_entry_t *cmap_pte2; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte2) panic("pmap_zero_page: CMAP2 busy"); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); pagezero(pc->pc_cmap_addr2); *cmap_pte2 = 0; /* * Unpin the thread before releasing the lock. Otherwise the thread * could be rescheduled while still bound to the current CPU, only * to unpin itself immediately upon resuming execution. */ sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } /* * Zero an an area within a single hardware page. off and size must not * cover an area beyond a single hardware page. */ -void -pmap_zero_page_area(vm_page_t m, int off, int size) +static void +__CONCAT(PMTYPE, zero_page_area)(vm_page_t m, int off, int size) { pt_entry_t *cmap_pte2; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte2) panic("pmap_zero_page_area: CMAP2 busy"); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); if (off == 0 && size == PAGE_SIZE) pagezero(pc->pc_cmap_addr2); else bzero(pc->pc_cmap_addr2 + off, size); *cmap_pte2 = 0; sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } /* * Copy 1 specified hardware page to another. */ -void -pmap_copy_page(vm_page_t src, vm_page_t dst) +static void +__CONCAT(PMTYPE, copy_page)(vm_page_t src, vm_page_t dst) { pt_entry_t *cmap_pte1, *cmap_pte2; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap_pte1 = pc->pc_cmap_pte1; cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte1) panic("pmap_copy_page: CMAP1 busy"); if (*cmap_pte2) panic("pmap_copy_page: CMAP2 busy"); *cmap_pte1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A | pmap_cache_bits(kernel_pmap, src->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr1); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M | pmap_cache_bits(kernel_pmap, dst->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); bcopy(pc->pc_cmap_addr1, pc->pc_cmap_addr2, PAGE_SIZE); *cmap_pte1 = 0; *cmap_pte2 = 0; sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } -int unmapped_buf_allowed = 1; - -void -pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], - vm_offset_t b_offset, int xfersize) +static void +__CONCAT(PMTYPE, copy_pages)(vm_page_t ma[], vm_offset_t a_offset, + vm_page_t mb[], vm_offset_t b_offset, int xfersize) { vm_page_t a_pg, b_pg; char *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; pt_entry_t *cmap_pte1, *cmap_pte2; struct pcpu *pc; int cnt; sched_pin(); pc = get_pcpu(); cmap_pte1 = pc->pc_cmap_pte1; cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte1 != 0) panic("pmap_copy_pages: CMAP1 busy"); if (*cmap_pte2 != 0) panic("pmap_copy_pages: CMAP2 busy"); while (xfersize > 0) { a_pg = ma[a_offset >> PAGE_SHIFT]; a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); b_pg = mb[b_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); *cmap_pte1 = PG_V | VM_PAGE_TO_PHYS(a_pg) | PG_A | pmap_cache_bits(kernel_pmap, a_pg->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr1); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(b_pg) | PG_A | PG_M | pmap_cache_bits(kernel_pmap, b_pg->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); a_cp = pc->pc_cmap_addr1 + a_pg_offset; b_cp = pc->pc_cmap_addr2 + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } *cmap_pte1 = 0; *cmap_pte2 = 0; sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ -boolean_t -pmap_page_exists_quick(pmap_t pmap, vm_page_t m) +static boolean_t +__CONCAT(PMTYPE, page_exists_quick)(pmap_t pmap, vm_page_t m) { struct md_page *pvh; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ -int -pmap_page_wired_mappings(vm_page_t m) +static int +__CONCAT(PMTYPE, page_wired_mappings)(vm_page_t m) { int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); count = pmap_pvh_wired_mappings(&m->md, count); if ((m->flags & PG_FICTITIOUS) == 0) { count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count); } rw_wunlock(&pvh_global_lock); return (count); } /* * pmap_pvh_wired_mappings: * * Return the updated number "count" of managed mappings that are wired. */ static int pmap_pvh_wired_mappings(struct md_page *pvh, int count) { pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } sched_unpin(); return (count); } /* * Returns TRUE if the given page is mapped individually or as part of * a 4mpage. Otherwise, returns FALSE. */ -boolean_t -pmap_page_is_mapped(vm_page_t m) +static boolean_t +__CONCAT(PMTYPE, page_is_mapped)(vm_page_t m) { boolean_t rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); rw_wunlock(&pvh_global_lock); return (rv); } /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ -void -pmap_remove_pages(pmap_t pmap) +static void +__CONCAT(PMTYPE, remove_pages)(pmap_t pmap) { pt_entry_t *pte, tpte; vm_page_t m, mpte, mt; pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; struct spglist free; int field, idx; int32_t bit; uint32_t inuse, bitmask; int allfree; if (pmap != PCPU_GET(curpmap)) { printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } SLIST_INIT(&free); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap, pc->pc_pmap)); allfree = 1; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = bsfl(inuse); bitmask = 1UL << bit; idx = field * 32 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = pmap_pde(pmap, pv->pv_va); tpte = *pte; if ((tpte & PG_PS) == 0) { pte = pmap_pte_quick(pmap, pv->pv_va); tpte = *pte & ~PG_PTE_PAT; } if (tpte == 0) { printf( "TPTE at %p IS ZERO @ VA %08x\n", pte, pv->pv_va); panic("bad pte"); } /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { allfree = 0; continue; } m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); KASSERT(m->phys_addr == (tpte & PG_FRAME), ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); pte_clear(pte); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((tpte & PG_PS) != 0) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) vm_page_dirty(mt); } else vm_page_dirty(m); } /* Mark free */ PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc->pc_map[field] |= bitmask; if ((tpte & PG_PS) != 0) { pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, &free, FALSE); } } else { pmap->pm_stats.resident_count--; TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } pmap_unuse_pt(pmap, pv->pv_va, &free); } } } if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } sched_unpin(); - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, true); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ -boolean_t -pmap_is_modified(vm_page_t m) +static boolean_t +__CONCAT(PMTYPE, is_modified)(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * Returns TRUE if any of the given mappings were used to modify * physical memory. Otherwise, returns FALSE. Both page and 2mpage * mappings are supported. */ static boolean_t pmap_is_modified_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW); PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is elgible * for prefault. */ -boolean_t -pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) +static boolean_t +__CONCAT(PMTYPE, is_prefaultable)(pmap_t pmap, vm_offset_t addr) { pd_entry_t pde; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); pde = *pmap_pde(pmap, addr); if (pde != 0 && (pde & PG_PS) == 0) rv = pmap_pte_ufast(pmap, addr, pde) == 0; PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ -boolean_t -pmap_is_referenced(vm_page_t m) +static boolean_t +__CONCAT(PMTYPE, is_referenced)(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); rw_wlock(&pvh_global_lock); rv = pmap_is_referenced_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * Returns TRUE if any of the given mappings were referenced and FALSE * otherwise. Both page and 4mpage mappings are supported. */ static boolean_t pmap_is_referenced_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * Clear the write and modified bits in each of the given page's mappings. */ -void -pmap_remove_write(vm_page_t m) +static void +__CONCAT(PMTYPE, remove_write)(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pd_entry_t *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); if ((*pde & PG_RW) != 0) (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); retry: oldpte = *pte; if ((oldpte & PG_RW) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_RW and PG_M are among the least * significant 32 bits. */ if (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~(PG_RW | PG_M))) goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * As an optimization, update the page's dirty field if a modified bit is * found while counting reference bits. This opportunistic update can be * performed at low cost and can eliminate the need for some future calls * to pmap_is_modified(). However, since this function stops after * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some * dirty pages. Those dirty pages will only be detected by a future call * to pmap_is_modified(). */ -int -pmap_ts_referenced(vm_page_t m) +static int +__CONCAT(PMTYPE, ts_referenced)(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf; pmap_t pmap; pd_entry_t *pde; pt_entry_t *pte; vm_paddr_t pa; int rtval = 0; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); pa = VM_PAGE_TO_PHYS(m); pvh = pa_to_pvh(pa); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0 || (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); if ((*pde & (PG_M | PG_RW)) == (PG_M | PG_RW)) { /* * Although "*pde" is mapping a 2/4MB page, because * this function is called at a 4KB page granularity, * we only update the 4KB page under test. */ vm_page_dirty(m); } if ((*pde & PG_A) != 0) { /* * Since this reference bit is shared by either 1024 * or 512 4KB pages, it should not be cleared every * time it is tested. Apply a simple "hash" function * on the physical page number, the virtual superpage * number, and the pmap address to select one 4KB page * out of the 1024 or 512 on which testing the * reference bit will result in clearing that bit. * This function is designed to avoid the selection of * the same 4KB page for every 2- or 4MB page mapping. * * On demotion, a mapping that hasn't been referenced * is simply destroyed. To avoid the possibility of a * subsequent page fault on a demoted wired mapping, * always leave its reference bit set. Moreover, * since the superpage is wired, the current state of * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^ (uintptr_t)pmap) & (NPTEPG - 1)) == 0 && (*pde & PG_W) == 0) { atomic_clear_int((u_int *)pde, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); } rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); } if (rtval >= PMAP_TS_REFERENCED_MAX) goto out; } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced: found a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if ((*pte & PG_A) != 0) { atomic_clear_int((u_int *)pte, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval < PMAP_TS_REFERENCED_MAX); out: sched_unpin(); rw_wunlock(&pvh_global_lock); return (rtval); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ -void -pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +static void +__CONCAT(PMTYPE, advise)(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + int advice) { pd_entry_t oldpde, *pde; pt_entry_t *pte; vm_offset_t va, pdnxt; vm_page_t m; boolean_t anychanged, pv_lists_locked; if (advice != MADV_DONTNEED && advice != MADV_FREE) return; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } anychanged = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pde = pmap_pde(pmap, sva); oldpde = *pde; if ((oldpde & PG_V) == 0) continue; else if ((oldpde & PG_PS) != 0) { if ((oldpde & PG_MANAGED) == 0) continue; if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { if (anychanged) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pde(pmap, pde, sva)) { /* * The large page mapping was destroyed. */ continue; } /* * Unless the page mappings are wired, remove the * mapping to a single page so that a subsequent * access may repromote. Since the underlying page * table page is fully populated, this removal never * frees a page table page. */ if ((oldpde & PG_W) == 0) { pte = pmap_pte_quick(pmap, sva); KASSERT((*pte & PG_V) != 0, ("pmap_advise: invalid PTE")); pmap_remove_pte(pmap, pte, sva, NULL); anychanged = TRUE; } } if (pdnxt > eva) pdnxt = eva; va = pdnxt; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | PG_V)) goto maybe_invlrng; else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if (advice == MADV_DONTNEED) { /* * Future calls to pmap_is_modified() * can be avoided by making the page * dirty now. */ m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); vm_page_dirty(m); } atomic_clear_int((u_int *)pte, PG_M | PG_A); } else if ((*pte & PG_A) != 0) atomic_clear_int((u_int *)pte, PG_A); else goto maybe_invlrng; if ((*pte & PG_G) != 0) { if (va == pdnxt) va = sva; } else anychanged = TRUE; continue; maybe_invlrng: if (va != pdnxt) { - pmap_invalidate_range(pmap, va, sva); + pmap_invalidate_range_int(pmap, va, sva); va = pdnxt; } } if (va != pdnxt) - pmap_invalidate_range(pmap, va, sva); + pmap_invalidate_range_int(pmap, va, sva); } if (anychanged) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Clear the modify bits on the specified physical page. */ -void -pmap_clear_modify(vm_page_t m) +static void +__CONCAT(PMTYPE, clear_modify)(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pd_entry_t oldpde, *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_RW) != 0) { if (pmap_demote_pde(pmap, pde, va)) { if ((oldpde & PG_W) == 0) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pte_quick(pmap, va); oldpte = *pte; if ((oldpte & PG_V) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_RW and PG_M are among the least * significant 32 bits. */ while (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, + va); } } } } PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_M is among the least significant * 32 bits. */ atomic_clear_int((u_int *)pte, PG_M); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } sched_unpin(); rw_wunlock(&pvh_global_lock); } /* * Miscellaneous support routines follow */ /* Adjust the cache mode for a 4KB page mapped via a PTE. */ static __inline void pmap_pte_attr(pt_entry_t *pte, int cache_bits) { u_int opte, npte; /* * The cache mode bits are all in the low 32-bits of the * PTE, so we can just spin on updating the low 32-bits. */ do { opte = *(u_int *)pte; npte = opte & ~PG_PTE_CACHE; npte |= cache_bits; } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); } /* Adjust the cache mode for a 2/4MB page mapped via a PDE. */ static __inline void pmap_pde_attr(pd_entry_t *pde, int cache_bits) { u_int opde, npde; /* * The cache mode bits are all in the low 32-bits of the * PDE, so we can just spin on updating the low 32-bits. */ do { opde = *(u_int *)pde; npde = opde & ~PG_PDE_CACHE; npde |= cache_bits; } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ -void * -pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) +static void * +__CONCAT(PMTYPE, mapdev_attr)(vm_paddr_t pa, vm_size_t size, int mode) { struct pmap_preinit_mapping *ppim; vm_offset_t va, offset; vm_size_t tmpsize; int i; offset = pa & PAGE_MASK; size = round_page(offset + size); pa = pa & PG_FRAME; if (pa < PMAP_MAP_LOW && pa + size <= PMAP_MAP_LOW) va = pa + PMAP_MAP_LOW; else if (!pmap_initialized) { va = 0; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) { ppim->pa = pa; ppim->sz = size; ppim->mode = mode; ppim->va = virtual_avail; virtual_avail += size; va = ppim->va; break; } } if (va == 0) panic("%s: too many preinit mappings", __func__); } else { /* * If we have a preinit mapping, re-use it. */ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->pa == pa && ppim->sz == size && ppim->mode == mode) return ((void *)(ppim->va + offset)); } va = kva_alloc(size); if (va == 0) panic("%s: Couldn't allocate KVA", __func__); } for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); - pmap_invalidate_range(kernel_pmap, va, va + tmpsize); + pmap_invalidate_range_int(kernel_pmap, va, va + tmpsize); pmap_invalidate_cache_range(va, va + size); return ((void *)(va + offset)); } -void * -pmap_mapdev(vm_paddr_t pa, vm_size_t size) +static void +__CONCAT(PMTYPE, unmapdev)(vm_offset_t va, vm_size_t size) { - - return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); -} - -void * -pmap_mapbios(vm_paddr_t pa, vm_size_t size) -{ - - return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); -} - -void -pmap_unmapdev(vm_offset_t va, vm_size_t size) -{ struct pmap_preinit_mapping *ppim; vm_offset_t offset; int i; if (va >= PMAP_MAP_LOW && va <= KERNBASE && va + size <= KERNBASE) return; offset = va & PAGE_MASK; size = round_page(offset + size); va = trunc_page(va); for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == va && ppim->sz == size) { if (pmap_initialized) return; ppim->pa = 0; ppim->va = 0; ppim->sz = 0; ppim->mode = 0; if (va + size == virtual_avail) virtual_avail = va; return; } } if (pmap_initialized) kva_free(va, size); } /* * Sets the memory attribute for the specified page. */ -void -pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) +static void +__CONCAT(PMTYPE, page_set_memattr)(vm_page_t m, vm_memattr_t ma) { m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) return; /* * If "m" is a normal page, flush it from the cache. * See pmap_invalidate_cache_range(). * * First, try to find an existing mapping of the page by sf * buffer. sf_buf_invalidate_cache() modifies mapping and * flushes the cache. */ if (sf_buf_invalidate_cache(m)) return; /* * If page is not mapped by sf buffer, but CPU does not * support self snoop, map the page transient and do * invalidation. In the worst case, whole cache is flushed by * pmap_invalidate_cache_range(). */ if ((cpu_feature & CPUID_SS) == 0) pmap_flush_page(m); } static void -pmap_flush_page(vm_page_t m) +__CONCAT(PMTYPE, flush_page)(vm_page_t m) { pt_entry_t *cmap_pte2; struct pcpu *pc; vm_offset_t sva, eva; bool useclflushopt; useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0; if (useclflushopt || (cpu_feature & CPUID_CLFSH) != 0) { sched_pin(); pc = get_pcpu(); cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte2) panic("pmap_flush_page: CMAP2 busy"); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); sva = (vm_offset_t)pc->pc_cmap_addr2; eva = sva + PAGE_SIZE; /* * Use mfence or sfence despite the ordering implied by * mtx_{un,}lock() because clflush on non-Intel CPUs * and clflushopt are not guaranteed to be ordered by * any other instruction. */ if (useclflushopt) sfence(); else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (; sva < eva; sva += cpu_clflush_line_size) { if (useclflushopt) clflushopt(sva); else clflush(sva); } if (useclflushopt) sfence(); else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); *cmap_pte2 = 0; sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } else pmap_invalidate_cache(); } /* * Changes the specified virtual address range's memory type to that given by * the parameter "mode". The specified virtual address range must be * completely contained within either the kernel map. * * Returns zero if the change completed successfully, and either EINVAL or * ENOMEM if the change failed. Specifically, EINVAL is returned if some part * of the virtual address range was not mapped, and ENOMEM is returned if * there was insufficient memory available to complete the change. */ -int -pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +static int +__CONCAT(PMTYPE, change_attr)(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; pd_entry_t *pde; pt_entry_t *pte; int cache_bits_pte, cache_bits_pde; boolean_t changed; base = trunc_page(va); offset = va & PAGE_MASK; size = round_page(offset + size); /* * Only supported on kernel virtual addresses above the recursive map. */ if (base < VM_MIN_KERNEL_ADDRESS) return (EINVAL); cache_bits_pde = pmap_cache_bits(kernel_pmap, mode, 1); cache_bits_pte = pmap_cache_bits(kernel_pmap, mode, 0); changed = FALSE; /* * Pages that aren't mapped aren't supported. Also break down * 2/4MB pages into 4KB pages if required. */ PMAP_LOCK(kernel_pmap); for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde == 0) { PMAP_UNLOCK(kernel_pmap); return (EINVAL); } if (*pde & PG_PS) { /* * If the current 2/4MB page already has * the required memory type, then we need not * demote this page. Just increment tmpva to * the next 2/4MB page frame. */ if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_4mpage(tmpva) + NBPDR; continue; } /* * If the current offset aligns with a 2/4MB * page frame and there is at least 2/4MB left * within the range, then we need not break * down this page into 4KB pages. */ if ((tmpva & PDRMASK) == 0 && tmpva + PDRMASK < base + size) { tmpva += NBPDR; continue; } if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) { PMAP_UNLOCK(kernel_pmap); return (ENOMEM); } } pte = vtopte(tmpva); if (*pte == 0) { PMAP_UNLOCK(kernel_pmap); return (EINVAL); } tmpva += PAGE_SIZE; } PMAP_UNLOCK(kernel_pmap); /* * Ok, all the pages exist, so run through them updating their * cache mode if required. */ for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde & PG_PS) { if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pde, cache_bits_pde); changed = TRUE; } tmpva = trunc_4mpage(tmpva) + NBPDR; } else { pte = vtopte(tmpva); if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { pmap_pte_attr(pte, cache_bits_pte); changed = TRUE; } tmpva += PAGE_SIZE; } } /* * Flush CPU caches to make sure any data isn't cached that * shouldn't be, etc. */ if (changed) { - pmap_invalidate_range(kernel_pmap, base, tmpva); + pmap_invalidate_range_int(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva); } return (0); } /* * perform the pmap work for mincore */ -int -pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) +static int +__CONCAT(PMTYPE, mincore)(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t pde; pt_entry_t pte; vm_paddr_t pa; int val; PMAP_LOCK(pmap); retry: pde = *pmap_pde(pmap, addr); if (pde != 0) { if ((pde & PG_PS) != 0) { pte = pde; /* Compute the physical address of the 4KB page. */ pa = ((pde & PG_PS_FRAME) | (addr & PDRMASK)) & PG_FRAME; val = MINCORE_SUPER; } else { pte = pmap_pte_ufast(pmap, addr, pde); pa = pte & PG_FRAME; val = 0; } } else { pte = 0; pa = 0; val = 0; } if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((pte & PG_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } -void -pmap_activate(struct thread *td) +static void +__CONCAT(PMTYPE, activate)(struct thread *td) { pmap_t pmap, oldpmap; u_int cpuid; u_int32_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); cpuid = PCPU_GET(cpuid); #if defined(SMP) CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); CPU_SET_ATOMIC(cpuid, &pmap->pm_active); #else CPU_CLR(cpuid, &oldpmap->pm_active); CPU_SET(cpuid, &pmap->pm_active); #endif -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP cr3 = vtophys(pmap->pm_pdpt); #else cr3 = vtophys(pmap->pm_pdir); #endif /* * pmap_activate is for the current thread on the current cpu */ td->td_pcb->pcb_cr3 = cr3; PCPU_SET(curpmap, pmap); critical_exit(); } -void -pmap_activate_boot(pmap_t pmap) +static void +__CONCAT(PMTYPE, activate_boot)(pmap_t pmap) { u_int cpuid; cpuid = PCPU_GET(cpuid); #if defined(SMP) CPU_SET_ATOMIC(cpuid, &pmap->pm_active); #else CPU_SET(cpuid, &pmap->pm_active); #endif PCPU_SET(curpmap, pmap); } -void -pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) -{ -} - /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ -void -pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, +static void +__CONCAT(PMTYPE, align_superpage)(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < NBPDR) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & PDRMASK; if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || (*addr & PDRMASK) == superpage_offset) return; if ((*addr & PDRMASK) < superpage_offset) *addr = (*addr & ~PDRMASK) + superpage_offset; else *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; } -vm_offset_t -pmap_quick_enter_page(vm_page_t m) +static vm_offset_t +__CONCAT(PMTYPE, quick_enter_page)(vm_page_t m) { vm_offset_t qaddr; pt_entry_t *pte; critical_enter(); qaddr = PCPU_GET(qmap_addr); pte = vtopte(qaddr); - KASSERT(*pte == 0, ("pmap_quick_enter_page: PTE busy")); + KASSERT(*pte == 0, + ("pmap_quick_enter_page: PTE busy %#jx", (uintmax_t)*pte)); *pte = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(kernel_pmap, pmap_page_get_memattr(m), 0); invlpg(qaddr); return (qaddr); } -void -pmap_quick_remove_page(vm_offset_t addr) +static void +__CONCAT(PMTYPE, quick_remove_page)(vm_offset_t addr) { vm_offset_t qaddr; pt_entry_t *pte; qaddr = PCPU_GET(qmap_addr); pte = vtopte(qaddr); KASSERT(*pte != 0, ("pmap_quick_remove_page: PTE not in use")); KASSERT(addr == qaddr, ("pmap_quick_remove_page: invalid address")); *pte = 0; critical_exit(); } static vmem_t *pmap_trm_arena; static vmem_addr_t pmap_trm_arena_last = PMAP_TRM_MIN_ADDRESS; static int trm_guard = PAGE_SIZE; static int pmap_trm_import(void *unused __unused, vmem_size_t size, int flags, vmem_addr_t *addrp) { vm_page_t m; vmem_addr_t af, addr, prev_addr; pt_entry_t *trm_pte; prev_addr = atomic_load_long(&pmap_trm_arena_last); size = round_page(size) + trm_guard; for (;;) { if (prev_addr + size < prev_addr || prev_addr + size < size || prev_addr + size > PMAP_TRM_MAX_ADDRESS) return (ENOMEM); addr = prev_addr + size; if (atomic_fcmpset_int(&pmap_trm_arena_last, &prev_addr, addr)) break; } prev_addr += trm_guard; trm_pte = PTmap + atop(prev_addr); for (af = prev_addr; af < addr; af += PAGE_SIZE) { m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_WAITOK); pte_store(&trm_pte[atop(af - prev_addr)], VM_PAGE_TO_PHYS(m) | PG_M | PG_A | PG_RW | PG_V | pgeflag | pmap_cache_bits(kernel_pmap, VM_MEMATTR_DEFAULT, FALSE)); } *addrp = prev_addr; return (0); } -static -void pmap_init_trm(void) +void +pmap_init_trm(void) { vm_page_t pd_m; TUNABLE_INT_FETCH("machdep.trm_guard", &trm_guard); if ((trm_guard & PAGE_MASK) != 0) trm_guard = 0; pmap_trm_arena = vmem_create("i386trampoline", 0, 0, 1, 0, M_WAITOK); vmem_set_import(pmap_trm_arena, pmap_trm_import, NULL, NULL, PAGE_SIZE); pd_m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_WAITOK | VM_ALLOC_ZERO); if ((pd_m->flags & PG_ZERO) == 0) pmap_zero_page(pd_m); PTD[TRPTDI] = VM_PAGE_TO_PHYS(pd_m) | PG_M | PG_A | PG_RW | PG_V | pmap_cache_bits(kernel_pmap, VM_MEMATTR_DEFAULT, TRUE); } -void * -pmap_trm_alloc(size_t size, int flags) +static void * +__CONCAT(PMTYPE, trm_alloc)(size_t size, int flags) { vmem_addr_t res; int error; MPASS((flags & ~(M_WAITOK | M_NOWAIT | M_ZERO)) == 0); error = vmem_xalloc(pmap_trm_arena, roundup2(size, 4), sizeof(int), 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags | M_FIRSTFIT, &res); if (error != 0) return (NULL); if ((flags & M_ZERO) != 0) bzero((void *)res, size); return ((void *)res); } -void -pmap_trm_free(void *addr, size_t size) +static void +__CONCAT(PMTYPE, trm_free)(void *addr, size_t size) { vmem_free(pmap_trm_arena, (uintptr_t)addr, roundup2(size, 4)); } #if defined(PMAP_DEBUG) pmap_pid_dump(int pid) { pmap_t pmap; struct proc *p; int npte = 0; int index; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid != pid) continue; if (p->p_vmspace) { int i,j; index = 0; pmap = vmspace_pmap(p->p_vmspace); for (i = 0; i < NPDEPTD; i++) { pd_entry_t *pde; pt_entry_t *pte; vm_offset_t base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { for (j = 0; j < NPTEPG; j++) { vm_offset_t va = base + (j << PAGE_SHIFT); if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { if (index) { index = 0; printf("\n"); } sx_sunlock(&allproc_lock); return (npte); } pte = pmap_pte(pmap, va); if (pte && pmap_pte_v(pte)) { pt_entry_t pa; vm_page_t m; pa = *pte; m = PHYS_TO_VM_PAGE(pa & PG_FRAME); printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", va, pa, m->hold_count, m->wire_count, m->flags); npte++; index++; if (index >= 2) { index = 0; printf("\n"); } else { printf(" "); } } } } } } } sx_sunlock(&allproc_lock); return (npte); } #endif + +static void +__CONCAT(PMTYPE, ksetrw)(vm_offset_t va) +{ + + *vtopte(va) |= PG_RW; +} + +static void +__CONCAT(PMTYPE, remap_lowptdi)(bool enable) +{ + + PTD[KPTDI] = enable ? PTD[LOWPTDI] : 0; + invltlb_glob(); +} + +static vm_offset_t +__CONCAT(PMTYPE, get_map_low)(void) +{ + + return (PMAP_MAP_LOW); +} + +static vm_offset_t +__CONCAT(PMTYPE, get_vm_maxuser_address)(void) +{ + + return (VM_MAXUSER_ADDRESS); +} + +static vm_paddr_t +__CONCAT(PMTYPE, pg_frame)(vm_paddr_t pa) +{ + + return (pa & PG_FRAME); +} + +static void +__CONCAT(PMTYPE, sf_buf_map)(struct sf_buf *sf) +{ + pt_entry_t opte, *ptep; + + /* + * Update the sf_buf's virtual-to-physical mapping, flushing the + * virtual address from the TLB. Since the reference count for + * the sf_buf's old mapping was zero, that mapping is not + * currently in use. Consequently, there is no need to exchange + * the old and new PTEs atomically, even under PAE. + */ + ptep = vtopte(sf->kva); + opte = *ptep; + *ptep = VM_PAGE_TO_PHYS(sf->m) | PG_RW | PG_V | + pmap_cache_bits(kernel_pmap, sf->m->md.pat_mode, 0); + + /* + * Avoid unnecessary TLB invalidations: If the sf_buf's old + * virtual-to-physical mapping was not used, then any processor + * that has invalidated the sf_buf's virtual address from its TLB + * since the last used mapping need not invalidate again. + */ +#ifdef SMP + if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) + CPU_ZERO(&sf->cpumask); +#else + if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) + pmap_invalidate_page_int(kernel_pmap, sf->kva); +#endif +} + +static void +__CONCAT(PMTYPE, cp_slow0_map)(vm_offset_t kaddr, int plen, vm_page_t *ma) +{ + pt_entry_t *pte; + int i; + + for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) { + *pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(ma[i]) | + pmap_cache_bits(kernel_pmap, pmap_page_get_memattr(ma[i]), + FALSE); + invlpg(kaddr + ptoa(i)); + } +} + +static u_int +__CONCAT(PMTYPE, get_kcr3)(void) +{ + +#ifdef PMAP_PAE_COMP + return ((u_int)IdlePDPT); +#else + return ((u_int)IdlePTD); +#endif +} + +static u_int +__CONCAT(PMTYPE, get_cr3)(pmap_t pmap) +{ + +#ifdef PMAP_PAE_COMP + return ((u_int)vtophys(pmap->pm_pdpt)); +#else + return ((u_int)vtophys(pmap->pm_pdir)); +#endif +} + +static caddr_t +__CONCAT(PMTYPE, cmap3)(vm_paddr_t pa, u_int pte_bits) +{ + pt_entry_t *pte; + + pte = CMAP3; + *pte = pa | pte_bits; + invltlb(); + return (CADDR3); +} + +static void +__CONCAT(PMTYPE, basemem_setup)(u_int basemem) +{ + pt_entry_t *pte; + int i; + + /* + * Map pages between basemem and ISA_HOLE_START, if any, r/w into + * the vm86 page table so that vm86 can scribble on them using + * the vm86 map too. XXX: why 2 ways for this and only 1 way for + * page 0, at least as initialized here? + */ + pte = (pt_entry_t *)vm86paddr; + for (i = basemem / 4; i < 160; i++) + pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; +} + +struct bios16_pmap_handle { + pt_entry_t *pte; + pd_entry_t *ptd; + pt_entry_t orig_ptd; +}; + +static void * +__CONCAT(PMTYPE, bios16_enter)(void) +{ + struct bios16_pmap_handle *h; + + /* + * no page table, so create one and install it. + */ + h = malloc(sizeof(struct bios16_pmap_handle), M_TEMP, M_WAITOK); + h->pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); + h->ptd = IdlePTD; + *h->pte = vm86phystk | PG_RW | PG_V; + h->orig_ptd = *h->ptd; + *h->ptd = vtophys(h->pte) | PG_RW | PG_V; + pmap_invalidate_all_int(kernel_pmap); /* XXX insurance for now */ + return (h); +} + +static void +__CONCAT(PMTYPE, bios16_leave)(void *arg) +{ + struct bios16_pmap_handle *h; + + h = arg; + *h->ptd = h->orig_ptd; /* remove page table */ + /* + * XXX only needs to be invlpg(0) but that doesn't work on the 386 + */ + pmap_invalidate_all_int(kernel_pmap); + free(h->pte, M_TEMP); /* ... and free it */ +} + +#define PMM(a) \ + .pm_##a = __CONCAT(PMTYPE, a), + +struct pmap_methods __CONCAT(PMTYPE, methods) = { + PMM(ksetrw) + PMM(remap_lower) + PMM(remap_lowptdi) + PMM(align_superpage) + PMM(quick_enter_page) + PMM(quick_remove_page) + PMM(trm_alloc) + PMM(trm_free) + PMM(get_map_low) + PMM(get_vm_maxuser_address) + PMM(kextract) + PMM(pg_frame) + PMM(sf_buf_map) + PMM(cp_slow0_map) + PMM(get_kcr3) + PMM(get_cr3) + PMM(cmap3) + PMM(basemem_setup) + PMM(set_nx) + PMM(bios16_enter) + PMM(bios16_leave) + PMM(bootstrap) + PMM(is_valid_memattr) + PMM(cache_bits) + PMM(ps_enabled) + PMM(pinit0) + PMM(pinit) + PMM(activate) + PMM(activate_boot) + PMM(advise) + PMM(clear_modify) + PMM(change_attr) + PMM(mincore) + PMM(copy) + PMM(copy_page) + PMM(copy_pages) + PMM(zero_page) + PMM(zero_page_area) + PMM(enter) + PMM(enter_object) + PMM(enter_quick) + PMM(kenter_temporary) + PMM(object_init_pt) + PMM(unwire) + PMM(page_exists_quick) + PMM(page_wired_mappings) + PMM(page_is_mapped) + PMM(remove_pages) + PMM(is_modified) + PMM(is_prefaultable) + PMM(is_referenced) + PMM(remove_write) + PMM(ts_referenced) + PMM(mapdev_attr) + PMM(unmapdev) + PMM(page_set_memattr) + PMM(extract) + PMM(extract_and_hold) + PMM(map) + PMM(qenter) + PMM(qremove) + PMM(release) + PMM(remove) + PMM(protect) + PMM(remove_all) + PMM(init) + PMM(init_pat) + PMM(growkernel) + PMM(invalidate_page) + PMM(invalidate_range) + PMM(invalidate_all) + PMM(invalidate_cache) + PMM(flush_page) + PMM(kenter) + PMM(kremove) +}; Index: head/sys/i386/i386/pmap_base.c =================================================================== --- head/sys/i386/i386/pmap_base.c (nonexistent) +++ head/sys/i386/i386/pmap_base.c (revision 343567) @@ -0,0 +1,954 @@ +/*- + * SPDX-License-Identifier: BSD-4-Clause + * + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * Copyright (c) 1994 John S. Dyson + * All rights reserved. + * Copyright (c) 1994 David Greenman + * All rights reserved. + * Copyright (c) 2005-2010 Alan L. Cox + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 + */ +/*- + * Copyright (c) 2003 Networks Associates Technology, Inc. + * All rights reserved. + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed for the FreeBSD Project by Jake Burkholder, + * Safeport Network Services, and Network Associates Laboratories, the + * Security Research Division of Network Associates, Inc. under + * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA + * CHATS research program. + * + * Portions of this software were developed by + * Konstantin Belousov under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_apic.h" +#include "opt_cpu.h" +#include "opt_pmap.h" +#include "opt_smp.h" +#include "opt_vm.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef DEV_APIC +#include +#include +#include +#endif +#include + +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); + +#include +#include +#include +#include +#include + +vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ +vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ + +int unmapped_buf_allowed = 1; + +int pti; + +u_long physfree; /* phys addr of next free page */ +u_long vm86phystk; /* PA of vm86/bios stack */ +u_long vm86paddr; /* address of vm86 region */ +int vm86pa; /* phys addr of vm86 region */ +u_long KERNend; /* phys addr end of kernel (just after bss) */ +u_long KPTphys; /* phys addr of kernel page tables */ +caddr_t ptvmmap = 0; +vm_offset_t kernel_vm_end; + +int i386_pmap_VM_NFREEORDER; +int i386_pmap_VM_LEVEL_0_ORDER; +int i386_pmap_PDRSHIFT; + +int pat_works = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, + &pat_works, 1, + "Is page attribute table fully functional?"); + +int pg_ps_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &pg_ps_enabled, 0, + "Are large page mappings enabled?"); + +int pv_entry_max = 0; +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, + &pv_entry_max, 0, + "Max number of PV entries"); + +int pv_entry_count = 0; +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, + &pv_entry_count, 0, + "Current number of pv entries"); + +#ifndef PMAP_SHPGPERPROC +#define PMAP_SHPGPERPROC 200 +#endif + +int shpgperproc = PMAP_SHPGPERPROC; +SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, + &shpgperproc, 0, + "Page share factor per proc"); + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, + "2/4MB page mapping counters"); + +u_long pmap_pde_demotions; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_pde_demotions, 0, + "2/4MB page demotions"); + +u_long pmap_pde_mappings; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_pde_mappings, 0, + "2/4MB page mappings"); + +u_long pmap_pde_p_failures; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_pde_p_failures, 0, + "2/4MB page promotion failures"); + +u_long pmap_pde_promotions; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_pde_promotions, 0, + "2/4MB page promotions"); + +#ifdef SMP +int PMAP1changedcpu; +SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, + &PMAP1changedcpu, 0, + "Number of times pmap_pte_quick changed CPU with same PMAP1"); +#endif + +int PMAP1changed; +SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, + &PMAP1changed, 0, + "Number of times pmap_pte_quick changed PMAP1"); +int PMAP1unchanged; +SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, + &PMAP1unchanged, 0, + "Number of times pmap_pte_quick didn't change PMAP1"); + +static int +kvm_size(SYSCTL_HANDLER_ARGS) +{ + unsigned long ksize; + + ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; + return (sysctl_handle_long(oidp, &ksize, 0, req)); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, + 0, 0, kvm_size, "IU", + "Size of KVM"); + +static int +kvm_free(SYSCTL_HANDLER_ARGS) +{ + unsigned long kfree; + + kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; + return (sysctl_handle_long(oidp, &kfree, 0, req)); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, + 0, 0, kvm_free, "IU", + "Amount of KVM free"); + +#ifdef PV_STATS +int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; +long pv_entry_frees, pv_entry_allocs; +int pv_entry_spare; + +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, + &pc_chunk_count, 0, + "Current number of pv entry chunks"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, + &pc_chunk_allocs, 0, + "Current number of pv entry chunks allocated"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, + &pc_chunk_frees, 0, + "Current number of pv entry chunks frees"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, + &pc_chunk_tryfail, 0, + "Number of times tried to get a chunk page but failed."); +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, + &pv_entry_frees, 0, + "Current number of pv entry frees"); +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, + &pv_entry_allocs, 0, + "Current number of pv entry allocs"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, + &pv_entry_spare, 0, + "Current number of spare pv entries"); +#endif + +struct pmap kernel_pmap_store; +static struct pmap_methods *pmap_methods_ptr; + +/* + * Initialize a vm_page's machine-dependent fields. + */ +void +pmap_page_init(vm_page_t m) +{ + + TAILQ_INIT(&m->md.pv_list); + m->md.pat_mode = PAT_WRITE_BACK; +} + +void +invltlb_glob(void) +{ + + invltlb(); +} + +static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, + vm_offset_t eva); +static void pmap_invalidate_cache_range_all(vm_offset_t sva, + vm_offset_t eva); + +void +pmap_flush_page(vm_page_t m) +{ + + pmap_methods_ptr->pm_flush_page(m); +} + +DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t, vm_offset_t), + static) +{ + + if ((cpu_feature & CPUID_SS) != 0) + return (pmap_invalidate_cache_range_selfsnoop); + if ((cpu_feature & CPUID_CLFSH) != 0) + return (pmap_force_invalidate_cache_range); + return (pmap_invalidate_cache_range_all); +} + +#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) + +static void +pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva) +{ + + KASSERT((sva & PAGE_MASK) == 0, + ("pmap_invalidate_cache_range: sva not page-aligned")); + KASSERT((eva & PAGE_MASK) == 0, + ("pmap_invalidate_cache_range: eva not page-aligned")); +} + +static void +pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva) +{ + + pmap_invalidate_cache_range_check_align(sva, eva); +} + +void +pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) +{ + + sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1); + if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) { + /* + * The supplied range is bigger than 2MB. + * Globally invalidate cache. + */ + pmap_invalidate_cache(); + return; + } + +#ifdef DEV_APIC + /* + * XXX: Some CPUs fault, hang, or trash the local APIC + * registers if we use CLFLUSH on the local APIC + * range. The local APIC is always uncached, so we + * don't need to flush for that range anyway. + */ + if (pmap_kextract(sva) == lapic_paddr) + return; +#endif + + if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) { + /* + * Do per-cache line flush. Use the sfence + * instruction to insure that previous stores are + * included in the write-back. The processor + * propagates flush to other processors in the cache + * coherence domain. + */ + sfence(); + for (; sva < eva; sva += cpu_clflush_line_size) + clflushopt(sva); + sfence(); + } else { + /* + * Writes are ordered by CLFLUSH on Intel CPUs. + */ + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); + for (; sva < eva; sva += cpu_clflush_line_size) + clflush(sva); + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); + } +} + +static void +pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva) +{ + + pmap_invalidate_cache_range_check_align(sva, eva); + pmap_invalidate_cache(); +} + +void +pmap_invalidate_cache_pages(vm_page_t *pages, int count) +{ + int i; + + if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || + (cpu_feature & CPUID_CLFSH) == 0) { + pmap_invalidate_cache(); + } else { + for (i = 0; i < count; i++) + pmap_flush_page(pages[i]); + } +} + +void +pmap_ksetrw(vm_offset_t va) +{ + + pmap_methods_ptr->pm_ksetrw(va); +} + +void +pmap_remap_lower(bool enable) +{ + + pmap_methods_ptr->pm_remap_lower(enable); +} + +void +pmap_remap_lowptdi(bool enable) +{ + + pmap_methods_ptr->pm_remap_lowptdi(enable); +} + +void +pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, + vm_offset_t *addr, vm_size_t size) +{ + + return (pmap_methods_ptr->pm_align_superpage(object, offset, + addr, size)); +} + +vm_offset_t +pmap_quick_enter_page(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_quick_enter_page(m)); +} + +void +pmap_quick_remove_page(vm_offset_t addr) +{ + + return (pmap_methods_ptr->pm_quick_remove_page(addr)); +} + +void * +pmap_trm_alloc(size_t size, int flags) +{ + + return (pmap_methods_ptr->pm_trm_alloc(size, flags)); +} + +void +pmap_trm_free(void *addr, size_t size) +{ + + pmap_methods_ptr->pm_trm_free(addr, size); +} + +void +pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) +{ +} + +vm_offset_t +pmap_get_map_low(void) +{ + + return (pmap_methods_ptr->pm_get_map_low()); +} + +vm_offset_t +pmap_get_vm_maxuser_address(void) +{ + + return (pmap_methods_ptr->pm_get_vm_maxuser_address()); +} + +vm_paddr_t +pmap_kextract(vm_offset_t va) +{ + + return (pmap_methods_ptr->pm_kextract(va)); +} + +vm_paddr_t +pmap_pg_frame(vm_paddr_t pa) +{ + + return (pmap_methods_ptr->pm_pg_frame(pa)); +} + +void +pmap_sf_buf_map(struct sf_buf *sf) +{ + + pmap_methods_ptr->pm_sf_buf_map(sf); +} + +void +pmap_cp_slow0_map(vm_offset_t kaddr, int plen, vm_page_t *ma) +{ + + pmap_methods_ptr->pm_cp_slow0_map(kaddr, plen, ma); +} + +u_int +pmap_get_kcr3(void) +{ + + return (pmap_methods_ptr->pm_get_kcr3()); +} + +u_int +pmap_get_cr3(pmap_t pmap) +{ + + return (pmap_methods_ptr->pm_get_cr3(pmap)); +} + +caddr_t +pmap_cmap3(vm_paddr_t pa, u_int pte_flags) +{ + + return (pmap_methods_ptr->pm_cmap3(pa, pte_flags)); +} + +void +pmap_basemem_setup(u_int basemem) +{ + + pmap_methods_ptr->pm_basemem_setup(basemem); +} + +void +pmap_set_nx(void) +{ + + pmap_methods_ptr->pm_set_nx(); +} + +void * +pmap_bios16_enter(void) +{ + + return (pmap_methods_ptr->pm_bios16_enter()); +} + +void +pmap_bios16_leave(void *handle) +{ + + pmap_methods_ptr->pm_bios16_leave(handle); +} + +void +pmap_bootstrap(vm_paddr_t firstaddr) +{ + + pmap_methods_ptr->pm_bootstrap(firstaddr); +} + +boolean_t +pmap_is_valid_memattr(pmap_t pmap, vm_memattr_t mode) +{ + + return (pmap_methods_ptr->pm_is_valid_memattr(pmap, mode)); +} + +int +pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde) +{ + + return (pmap_methods_ptr->pm_cache_bits(pmap, mode, is_pde)); +} + +bool +pmap_ps_enabled(pmap_t pmap) +{ + + return (pmap_methods_ptr->pm_ps_enabled(pmap)); +} + +void +pmap_pinit0(pmap_t pmap) +{ + + pmap_methods_ptr->pm_pinit0(pmap); +} + +int +pmap_pinit(pmap_t pmap) +{ + + return (pmap_methods_ptr->pm_pinit(pmap)); +} + +void +pmap_activate(struct thread *td) +{ + + pmap_methods_ptr->pm_activate(td); +} + +void +pmap_activate_boot(pmap_t pmap) +{ + + pmap_methods_ptr->pm_activate_boot(pmap); +} + +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ + + pmap_methods_ptr->pm_advise(pmap, sva, eva, advice); +} + +void +pmap_clear_modify(vm_page_t m) +{ + + pmap_methods_ptr->pm_clear_modify(m); +} + +int +pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +{ + + return (pmap_methods_ptr->pm_change_attr(va, size, mode)); +} + +int +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) +{ + + return (pmap_methods_ptr->pm_mincore(pmap, addr, locked_pa)); +} + +void +pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, + vm_offset_t src_addr) +{ + + pmap_methods_ptr->pm_copy(dst_pmap, src_pmap, dst_addr, len, src_addr); +} + +void +pmap_copy_page(vm_page_t src, vm_page_t dst) +{ + + pmap_methods_ptr->pm_copy_page(src, dst); +} + +void +pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], + vm_offset_t b_offset, int xfersize) +{ + + pmap_methods_ptr->pm_copy_pages(ma, a_offset, mb, b_offset, xfersize); +} + +void +pmap_zero_page(vm_page_t m) +{ + + pmap_methods_ptr->pm_zero_page(m); +} + +void +pmap_zero_page_area(vm_page_t m, int off, int size) +{ + + pmap_methods_ptr->pm_zero_page_area(m, off, size); +} + +int +pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, + u_int flags, int8_t psind) +{ + + return (pmap_methods_ptr->pm_enter(pmap, va, m, prot, flags, psind)); +} + +void +pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, + vm_page_t m_start, vm_prot_t prot) +{ + + pmap_methods_ptr->pm_enter_object(pmap, start, end, m_start, prot); +} + +void +pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + + pmap_methods_ptr->pm_enter_quick(pmap, va, m, prot); +} + +void * +pmap_kenter_temporary(vm_paddr_t pa, int i) +{ + + return (pmap_methods_ptr->pm_kenter_temporary(pa, i)); +} + +void +pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, + vm_pindex_t pindex, vm_size_t size) +{ + + pmap_methods_ptr->pm_object_init_pt(pmap, addr, object, pindex, size); +} + +void +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + + pmap_methods_ptr->pm_unwire(pmap, sva, eva); +} + +boolean_t +pmap_page_exists_quick(pmap_t pmap, vm_page_t m) +{ + + return (pmap_methods_ptr->pm_page_exists_quick(pmap, m)); +} + +int +pmap_page_wired_mappings(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_page_wired_mappings(m)); +} + +boolean_t +pmap_page_is_mapped(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_page_is_mapped(m)); +} + +void +pmap_remove_pages(pmap_t pmap) +{ + + pmap_methods_ptr->pm_remove_pages(pmap); +} + +boolean_t +pmap_is_modified(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_is_modified(m)); +} + +boolean_t +pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) +{ + + return (pmap_methods_ptr->pm_is_prefaultable(pmap, addr)); +} + +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_is_referenced(m)); +} + +void +pmap_remove_write(vm_page_t m) +{ + + pmap_methods_ptr->pm_remove_write(m); +} + +int +pmap_ts_referenced(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_ts_referenced(m)); +} + +void * +pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) +{ + + return (pmap_methods_ptr->pm_mapdev_attr(pa, size, mode)); +} + +void * +pmap_mapdev(vm_paddr_t pa, vm_size_t size) +{ + + return (pmap_methods_ptr->pm_mapdev_attr(pa, size, PAT_UNCACHEABLE)); +} + +void * +pmap_mapbios(vm_paddr_t pa, vm_size_t size) +{ + + return (pmap_methods_ptr->pm_mapdev_attr(pa, size, PAT_WRITE_BACK)); +} + +void +pmap_unmapdev(vm_offset_t va, vm_size_t size) +{ + + pmap_methods_ptr->pm_unmapdev(va, size); +} + +void +pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) +{ + + pmap_methods_ptr->pm_page_set_memattr(m, ma); +} + +vm_paddr_t +pmap_extract(pmap_t pmap, vm_offset_t va) +{ + + return (pmap_methods_ptr->pm_extract(pmap, va)); +} + +vm_page_t +pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) +{ + + return (pmap_methods_ptr->pm_extract_and_hold(pmap, va, prot)); +} + +vm_offset_t +pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) +{ + + return (pmap_methods_ptr->pm_map(virt, start, end, prot)); +} + +void +pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) +{ + + pmap_methods_ptr->pm_qenter(sva, ma, count); +} + +void +pmap_qremove(vm_offset_t sva, int count) +{ + + pmap_methods_ptr->pm_qremove(sva, count); +} + +void +pmap_release(pmap_t pmap) +{ + + pmap_methods_ptr->pm_release(pmap); +} + +void +pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + + pmap_methods_ptr->pm_remove(pmap, sva, eva); +} + +void +pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +{ + + pmap_methods_ptr->pm_protect(pmap, sva, eva, prot); +} + +void +pmap_remove_all(vm_page_t m) +{ + + pmap_methods_ptr->pm_remove_all(m); +} + +void +pmap_init(void) +{ + + pmap_methods_ptr->pm_init(); +} + +void +pmap_init_pat(void) +{ + + pmap_methods_ptr->pm_init_pat(); +} + +void +pmap_growkernel(vm_offset_t addr) +{ + + pmap_methods_ptr->pm_growkernel(addr); +} + +void +pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +{ + + pmap_methods_ptr->pm_invalidate_page(pmap, va); +} + +void +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + + pmap_methods_ptr->pm_invalidate_range(pmap, sva, eva); +} + +void +pmap_invalidate_all(pmap_t pmap) +{ + + pmap_methods_ptr->pm_invalidate_all(pmap); +} + +void +pmap_invalidate_cache(void) +{ + + pmap_methods_ptr->pm_invalidate_cache(); +} + +void +pmap_kenter(vm_offset_t va, vm_paddr_t pa) +{ + + pmap_methods_ptr->pm_kenter(va, pa); +} + +void +pmap_kremove(vm_offset_t va) +{ + + pmap_methods_ptr->pm_kremove(va); +} + +extern struct pmap_methods pmap_pae_methods, pmap_nopae_methods; +int pae_mode; +SYSCTL_INT(_vm_pmap, OID_AUTO, pae_mode, CTLFLAG_RD, + &pae_mode, 1, + "PAE"); + +void +pmap_cold(void) +{ + + if ((cpu_feature & CPUID_PAE) != 0) { + pae_mode = 1; + pmap_methods_ptr = &pmap_pae_methods; + pmap_pae_cold(); + } else { + pmap_methods_ptr = &pmap_nopae_methods; + pmap_nopae_cold(); + } +} Property changes on: head/sys/i386/i386/pmap_base.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/i386/i386/pmap_nopae.c =================================================================== --- head/sys/i386/i386/pmap_nopae.c (nonexistent) +++ head/sys/i386/i386/pmap_nopae.c (revision 343567) @@ -0,0 +1,48 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_apic.h" +#include "opt_cpu.h" +#include "opt_pmap.h" +#include "opt_smp.h" +#include "opt_vm.h" + +#include +#include +#include +#define PMTYPE pmap_nopae_ +#include +#include +_Static_assert(sizeof(struct pmap_KBI) >= sizeof(struct pmap), "pmap KBI"); +#include "pmap.c" Property changes on: head/sys/i386/i386/pmap_nopae.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/i386/i386/pmap_pae.c =================================================================== --- head/sys/i386/i386/pmap_pae.c (nonexistent) +++ head/sys/i386/i386/pmap_pae.c (revision 343567) @@ -0,0 +1,49 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_apic.h" +#include "opt_cpu.h" +#include "opt_pmap.h" +#include "opt_smp.h" +#include "opt_vm.h" + +#define PMAP_PAE_COMP +#include +#include +#include +#define PMTYPE pmap_pae_ +#include +#include +_Static_assert(sizeof(struct pmap_KBI) >= sizeof(struct pmap), "pmap KBI"); +#include "pmap.c" Property changes on: head/sys/i386/i386/pmap_pae.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/i386/i386/trap.c =================================================================== --- head/sys/i386/i386/trap.c (revision 343566) +++ head/sys/i386/i386/trap.c (revision 343567) @@ -1,1166 +1,1163 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * 386 Trap and System call handling */ #include "opt_clock.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_stack.h" #include "opt_trap.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #ifdef POWERFAIL_NMI #include #include #endif #ifdef KDTRACE_HOOKS #include #endif void trap(struct trapframe *frame); void syscall(struct trapframe *frame); static int trap_pfault(struct trapframe *, int, vm_offset_t); static void trap_fatal(struct trapframe *, vm_offset_t); void dblfault_handler(void); extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall); +extern uint64_t pg_nx; #define MAX_TRAP_MSG 32 struct trap_data { bool ei; const char *msg; }; static const struct trap_data trap_data[] = { [T_PRIVINFLT] = { .ei = true, .msg = "privileged instruction fault" }, [T_BPTFLT] = { .ei = false, .msg = "breakpoint instruction fault" }, [T_ARITHTRAP] = { .ei = true, .msg = "arithmetic trap" }, [T_PROTFLT] = { .ei = true, .msg = "general protection fault" }, [T_TRCTRAP] = { .ei = false, .msg = "debug exception" }, [T_PAGEFLT] = { .ei = true, .msg = "page fault" }, [T_ALIGNFLT] = { .ei = true, .msg = "alignment fault" }, [T_DIVIDE] = { .ei = true, .msg = "integer divide fault" }, [T_NMI] = { .ei = false, .msg = "non-maskable interrupt trap" }, [T_OFLOW] = { .ei = true, .msg = "overflow trap" }, [T_BOUND] = { .ei = true, .msg = "FPU bounds check fault" }, [T_DNA] = { .ei = true, .msg = "FPU device not available" }, [T_DOUBLEFLT] = { .ei = false, .msg = "double fault" }, [T_FPOPFLT] = { .ei = true, .msg = "FPU operand fetch fault" }, [T_TSSFLT] = { .ei = true, .msg = "invalid TSS fault" }, [T_SEGNPFLT] = { .ei = true, .msg = "segment not present fault" }, [T_STKFLT] = { .ei = true, .msg = "stack fault" }, [T_MCHK] = { .ei = true, .msg = "machine check trap" }, [T_XMMFLT] = { .ei = true, .msg = "SIMD floating-point exception" }, [T_DTRACE_RET] ={ .ei = true, .msg = "DTrace pid return trap" }, }; static bool trap_enable_intr(int trapno) { MPASS(trapno > 0); if (trapno < nitems(trap_data) && trap_data[trapno].msg != NULL) return (trap_data[trapno].ei); return (false); } static const char * trap_msg(int trapno) { const char *res; static const char unkn[] = "UNKNOWN"; res = NULL; if (trapno < nitems(trap_data)) res = trap_data[trapno].msg; if (res == NULL) res = unkn; return (res); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) int has_f00f_bug = 0; /* Initialized so that it can be patched. */ #endif static int prot_fault_translation = 0; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { ksiginfo_t ksi; struct thread *td; struct proc *p; int signo, ucode; u_int type; register_t addr, dr6; vm_offset_t eva; #ifdef POWERFAIL_NMI static int lastalert = 0; #endif td = curthread; p = td->td_proc; signo = 0; ucode = 0; addr = 0; dr6 = 0; VM_CNT_INC(v_trap); type = frame->tf_trapno; KASSERT((read_eflags() & PSL_I) == 0, ("trap: interrupts enabled, type %d frame %p", type, frame)); #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI && ipi_nmi_handler() == 0) return; #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); return; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI so we check for that first. * If the HWPMC module is active, 'pmc_hook' will point to * the function to be called. A non-zero return value from the * hook means that the NMI was consumed by it and that we can * return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(frame) != 0) return; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) return; #endif } if (type == T_MCHK) { mca_intr(); return; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type)) return; #endif /* * We must not allow context switches until %cr2 is read. * Also, for some Cyrix CPUs, %cr2 is clobbered by interrupts. * All faults use interrupt gates, so %cr2 can be safely read * now, before optional enable of the interrupts below. */ if (type == T_PAGEFLT) eva = rcr2(); /* * Buggy application or kernel code has disabled interrupts * and then trapped. Enabling interrupts now is wrong, but it * is better than running with interrupts disabled until they * are accidentally enabled later. */ if ((frame->tf_eflags & PSL_I) == 0 && TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) uprintf("pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); /* * Conditionally reenable interrupts. If we hold a spin lock, * then we must not reenable interrupts. This might be a * spurious page fault. */ if (trap_enable_intr(type) && td->td_md.md_spinlock_count == 0 && frame->tf_eip != (int)cpu_switch_load_gs) enable_intr(); if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_eip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ signo = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ enable_intr(); #ifdef KDTRACE_HOOKS if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(frame) == 0) return; #endif signo = SIGTRAP; ucode = TRAP_BRKPT; break; case T_TRCTRAP: /* debug exception */ enable_intr(); user_trctrap_out: signo = SIGTRAP; ucode = TRAP_TRACE; dr6 = rdr6(); if ((dr6 & DBREG_DR6_BS) != 0) { PROC_LOCK(td->td_proc); if ((td->td_dbgflags & TDB_STEP) != 0) { td->td_frame->tf_eflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; } PROC_UNLOCK(td->td_proc); } break; case T_ARITHTRAP: /* arithmetic trap */ ucode = npxtrap_x87(); if (ucode == -1) return; signo = SIGFPE; break; /* * The following two traps can happen in vm86 mode, * and, if so, we want to handle them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); if (signo == SIGTRAP) { load_dr6(rdr6() | 0x4000); goto user_trctrap_out; } if (signo == 0) goto user; break; } signo = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: signo = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ signo = trap_pfault(frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (signo == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; /* Proceed as in that case. */ ucode = ILL_PRVOPC; signo = SIGILL; break; } #endif if (signo == -1) return; if (signo == 0) goto user; if (signo == SIGSEGV) ucode = SEGV_MAPERR; else if (prot_fault_translation == 0) { /* * Autodetect. This check also covers * the images without the ABI-tag ELF * note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { signo = SIGSEGV; ucode = SEGV_ACCERR; } else { signo = SIGBUS; ucode = T_PAGEFLT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ signo = SIGBUS; ucode = T_PAGEFLT; } else { /* * Always SIGSEGV mode. */ signo = SIGSEGV; ucode = SEGV_ACCERR; } addr = eva; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; signo = SIGFPE; break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; signo = SIGFPE; break; case T_DNA: KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) return; uprintf("pid %d killed due to lack of floating point\n", p->p_pid); signo = SIGKILL; ucode = 0; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = npxtrap_sse(); if (ucode == -1) return; signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); if (dtrace_return_probe_ptr != NULL) dtrace_return_probe_ptr(frame); return; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE, eva); return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); if (npxdna()) return; break; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); return; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { signo = vm86_emulate((struct vm86frame *)frame); if (signo == SIGTRAP) { type = T_TRCTRAP; load_dr6(rdr6() | 0x4000); goto kernel_trctrap; } if (signo != 0) /* * returns to original process */ vm86_trap((struct vm86frame *)frame); return; } /* FALL THROUGH */ case T_SEGNPFLT: /* segment not present fault */ if (curpcb->pcb_flags & PCB_VM86CALL) break; /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ if (frame->tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; #if 0 PROC_LOCK(p); kern_psignal(p, SIGBUS); PROC_UNLOCK(p); #endif return; } if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. * * N.B. Comparing to long mode, 32-bit mode * does not push %esp on the trap frame, * because iretl faulted while in ring 0. As * the consequence, there is no need to fixup * the stack pointer for doreti_iret_fault, * the fixup and the complimentary trap() call * are executed on the main thread stack, not * on the trampoline stack. */ if (frame->tf_eip == (int)doreti_iret + setidt_disp) { frame->tf_eip = (int)doreti_iret_fault + setidt_disp; return; } if (type == T_STKFLT) break; if (frame->tf_eip == (int)doreti_popl_ds + setidt_disp) { frame->tf_eip = (int)doreti_popl_ds_fault + setidt_disp; return; } if (frame->tf_eip == (int)doreti_popl_es + setidt_disp) { frame->tf_eip = (int)doreti_popl_es_fault + setidt_disp; return; } if (frame->tf_eip == (int)doreti_popl_fs + setidt_disp) { frame->tf_eip = (int)doreti_popl_fs_fault + setidt_disp; return; } if (curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; return; } break; case T_TRCTRAP: /* debug exception */ kernel_trctrap: /* Clear any pending debug events. */ dr6 = rdr6(); load_dr6(0); /* * Ignore debug register exceptions due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap(dr6) && !(curpcb->pcb_flags & PCB_VM86CALL)) return; /* * Malicious user code can configure a debug * register watchpoint to trap on data access * to the top of stack and then execute 'pop * %ss; int 3'. Due to exception deferral for * 'pop %ss', the CPU will not interrupt 'int * 3' to raise the DB# exception for the debug * register but will postpone the DB# until * execution of the first instruction of the * BP# handler (in kernel mode). Normally the * previous check would ignore DB# exceptions * for watchpoints on user addresses raised in * kernel mode. However, some CPU errata * include cases where DB# exceptions do not * properly set bits in %dr6, e.g. Haswell * HSD23 and Skylake-X SKZ24. * * A deferred DB# can also be raised on the * first instructions of system call entry * points or single-step traps via similar use * of 'pop %ss' or 'mov xxx, %ss'. */ if (frame->tf_eip == (uintptr_t)IDTVEC(int0x80_syscall) + setidt_disp || frame->tf_eip == (uintptr_t)IDTVEC(bpt) + setidt_disp || frame->tf_eip == (uintptr_t)IDTVEC(dbg) + setidt_disp) return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, dr6, frame)) return; #endif break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } return; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); return; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ } trap_fatal(frame, eva); return; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap != NULL) signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %x code %d type %d " "addr 0x%x ss 0x%04x esp 0x%08x cs 0x%04x eip 0x%08x " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, addr, frame->tf_ss, frame->tf_esp, frame->tf_cs, frame->tf_eip, fubyte((void *)(frame->tf_eip + 0)), fubyte((void *)(frame->tf_eip + 1)), fubyte((void *)(frame->tf_eip + 2)), fubyte((void *)(frame->tf_eip + 3)), fubyte((void *)(frame->tf_eip + 4)), fubyte((void *)(frame->tf_eip + 5)), fubyte((void *)(frame->tf_eip + 6)), fubyte((void *)(frame->tf_eip + 7))); } KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); } static int trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) { struct thread *td; struct proc *p; vm_offset_t va; vm_map_t map; int rv; vm_prot_t ftype; td = curthread; p = td->td_proc; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= PMAP_TRM_MIN_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. * An exception: if the faulting address is the invalid * instruction entry in the IDT, then the Intel Pentium * F00F bug workaround was triggered, and we need to * treat it is as an illegal instruction, and not a page * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) return (-2); #endif if (usermode) return (SIGSEGV); trap_fatal(frame, eva); return (-1); } else { map = usermode ? &p->p_vmspace->vm_map : kernel_map; /* * Kernel cannot access a user-space address directly * because user pages are not mapped. Also, page * faults must not be caused during the interrupts. */ if (!usermode && td->td_intr_nesting_level != 0) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; -#if defined(PAE) || defined(PAE_TABLES) else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; -#endif else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss, esp; u_int type; struct soft_segment_descriptor softseg; #ifdef KDB bool handled; #endif code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg(type), frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); printf("fault code = %s %s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", -#if defined(PAE) || defined(PAE_TABLES) pg_nx != 0 ? (code & PGEX_I ? " instruction" : " data") : -#endif "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } else { printf("error code = %#x\n", code); } printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); if (TF_HAS_STACKREGS(frame)) { ss = frame->tf_ss & 0xffff; esp = frame->tf_esp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (int)&frame->tf_esp; } printf("stack pointer = 0x%x:0x%x\n", ss, esp); printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_eflags & PSL_T) printf("trace trap, "); if (frame->tf_eflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_eflags & PSL_NT) printf("nested task, "); if (frame->tf_eflags & PSL_RF) printf("resume, "); if (frame->tf_eflags & PSL_VM) printf("vm86, "); printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_trap) { kdb_why = KDB_WHY_TRAP; frame->tf_err = eva; /* smuggle fault address to ddb */ handled = kdb_trap(type, 0, frame); frame->tf_err = code; /* restore error code */ kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif printf("trap number = %d\n", type); if (trap_msg(type) != NULL) panic("%s", trap_msg(type)); else panic("unknown/reserved trap"); } /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). * * XXX Note that the current PTD gets replaced by IdlePTD when the * task switch occurs. This means that the stack that was active at * the time of the double fault is not available at unless * the machine was idle when the double fault occurred. The downside * of this is that "trace " in ddb won't work. */ void dblfault_handler(void) { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault:\n"); printf("eip = 0x%x\n", PCPU_GET(common_tssp)->tss_eip); printf("esp = 0x%x\n", PCPU_GET(common_tssp)->tss_esp); printf("ebp = 0x%x\n", PCPU_GET(common_tssp)->tss_ebp); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; struct syscall_args *sa; caddr_t params; long tmp; int error; #ifdef COMPAT_43 u_int32_t eip; int cs; #endif p = td->td_proc; frame = td->td_frame; sa = &td->td_sa; #ifdef COMPAT_43 if (__predict_false(frame->tf_cs == 7 && frame->tf_eip == 2)) { /* * In lcall $7,$0 after int $0x80. Convert the user * frame to what it would be for a direct int 0x80 instead * of lcall $7,$0, by popping the lcall return address. */ error = fueword32((void *)frame->tf_esp, &eip); if (error == -1) return (EFAULT); cs = fuword16((void *)(frame->tf_esp + sizeof(u_int32_t))); if (cs == -1) return (EFAULT); /* * Unwind in-kernel frame after all stack frame pieces * were successfully read. */ frame->tf_eip = eip; frame->tf_cs = cs; frame->tf_esp += 2 * sizeof(u_int32_t); frame->tf_err = 7; /* size of lcall $7,$0 */ } #endif sa->code = frame->tf_eax; params = (caddr_t)frame->tf_esp + sizeof(uint32_t); /* * Need to check if this is a 32 bit or 64 bit syscall. */ if (sa->code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(uint32_t); } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(quad_t); } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (params != NULL && sa->narg != 0) error = copyin(params, (caddr_t)sa->args, (u_int)(sa->narg * sizeof(uint32_t))); else error = 0; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; } return (error); } #include "../../kern/subr_syscall.c" /* * syscall - system call request C handler. A system call is * essentially treated as a trap by reusing the frame layout. */ void syscall(struct trapframe *frame) { struct thread *td; register_t orig_tf_eflags; int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!(TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0)) { panic("syscall"); /* NOT REACHED */ } #endif orig_tf_eflags = frame->tf_eflags; td = curthread; td->td_frame = frame; error = syscallenter(td); /* * Traced syscall. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { frame->tf_eflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_eip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, td->td_sa.code))); syscallret(td, error); } Index: head/sys/i386/i386/vm86.c =================================================================== --- head/sys/i386/i386/vm86.c (revision 343566) +++ head/sys/i386/i386/vm86.c (revision 343567) @@ -1,782 +1,885 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int vm86pa; extern struct pcb *vm86pcb; static struct mtx vm86_lock; extern int vm86_bioscall(struct vm86frame *); extern void vm86_biosret(struct vm86frame *); void vm86_prepcall(struct vm86frame *); struct system_map { int type; vm_offset_t start; vm_offset_t end; }; #define HLT 0xf4 #define CLI 0xfa #define STI 0xfb #define PUSHF 0x9c #define POPF 0x9d #define INTn 0xcd #define IRET 0xcf #define CALLm 0xff #define OPERAND_SIZE_PREFIX 0x66 #define ADDRESS_SIZE_PREFIX 0x67 #define PUSH_MASK ~(PSL_VM | PSL_RF | PSL_I) #define POP_MASK ~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL) static int vm86_suword16(volatile void *base, int word) { if (curthread->td_critnest != 0) { *(volatile uint16_t *)base = word; return (0); } return (suword16(base, word)); } static int vm86_suword(volatile void *base, long word) { if (curthread->td_critnest != 0) { *(volatile long *)base = word; return (0); } return (suword(base, word)); } static int vm86_fubyte(volatile const void *base) { if (curthread->td_critnest != 0) return (*(volatile const u_char *)base); return (fubyte(base)); } static int vm86_fuword16(volatile const void *base) { if (curthread->td_critnest != 0) return (*(volatile const uint16_t *)base); return (fuword16(base)); } static long vm86_fuword(volatile const void *base) { if (curthread->td_critnest != 0) return (*(volatile const long *)base); return (fuword(base)); } static __inline caddr_t MAKE_ADDR(u_short sel, u_short off) { return ((caddr_t)((sel << 4) + off)); } static __inline void GET_VEC(u_int vec, u_short *sel, u_short *off) { *sel = vec >> 16; *off = vec & 0xffff; } static __inline u_int MAKE_VEC(u_short sel, u_short off) { return ((sel << 16) | off); } static __inline void PUSH(u_short x, struct vm86frame *vmf) { vmf->vmf_sp -= 2; vm86_suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x); } static __inline void PUSHL(u_int x, struct vm86frame *vmf) { vmf->vmf_sp -= 4; vm86_suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x); } static __inline u_short POP(struct vm86frame *vmf) { u_short x = vm86_fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp)); vmf->vmf_sp += 2; return (x); } static __inline u_int POPL(struct vm86frame *vmf) { u_int x = vm86_fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp)); vmf->vmf_sp += 4; return (x); } int vm86_emulate(struct vm86frame *vmf) { struct vm86_kernel *vm86; caddr_t addr; u_char i_byte; u_int temp_flags; int inc_ip = 1; int retcode = 0; /* * pcb_ext contains the address of the extension area, or zero if * the extension is not present. (This check should not be needed, * as we can't enter vm86 mode until we set up an extension area) */ if (curpcb->pcb_ext == 0) return (SIGBUS); vm86 = &curpcb->pcb_ext->ext_vm86; if (vmf->vmf_eflags & PSL_T) retcode = SIGTRAP; addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip); i_byte = vm86_fubyte(addr); if (i_byte == ADDRESS_SIZE_PREFIX) { i_byte = vm86_fubyte(++addr); inc_ip++; } if (vm86->vm86_has_vme) { switch (i_byte) { case OPERAND_SIZE_PREFIX: i_byte = vm86_fubyte(++addr); inc_ip++; switch (i_byte) { case PUSHF: if (vmf->vmf_eflags & PSL_VIF) PUSHL((vmf->vmf_eflags & PUSH_MASK) | PSL_IOPL | PSL_I, vmf); else PUSHL((vmf->vmf_eflags & PUSH_MASK) | PSL_IOPL, vmf); vmf->vmf_ip += inc_ip; return (retcode); case POPF: temp_flags = POPL(vmf) & POP_MASK; vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK) | temp_flags | PSL_VM | PSL_I; vmf->vmf_ip += inc_ip; if (temp_flags & PSL_I) { vmf->vmf_eflags |= PSL_VIF; if (vmf->vmf_eflags & PSL_VIP) break; } else { vmf->vmf_eflags &= ~PSL_VIF; } return (retcode); } break; /* VME faults here if VIP is set, but does not set VIF. */ case STI: vmf->vmf_eflags |= PSL_VIF; vmf->vmf_ip += inc_ip; if ((vmf->vmf_eflags & PSL_VIP) == 0) { uprintf("fatal sti\n"); return (SIGKILL); } break; /* VME if no redirection support */ case INTn: break; /* VME if trying to set PSL_T, or PSL_I when VIP is set */ case POPF: temp_flags = POP(vmf) & POP_MASK; vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK) | temp_flags | PSL_VM | PSL_I; vmf->vmf_ip += inc_ip; if (temp_flags & PSL_I) { vmf->vmf_eflags |= PSL_VIF; if (vmf->vmf_eflags & PSL_VIP) break; } else { vmf->vmf_eflags &= ~PSL_VIF; } return (retcode); /* VME if trying to set PSL_T, or PSL_I when VIP is set */ case IRET: vmf->vmf_ip = POP(vmf); vmf->vmf_cs = POP(vmf); temp_flags = POP(vmf) & POP_MASK; vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK) | temp_flags | PSL_VM | PSL_I; if (temp_flags & PSL_I) { vmf->vmf_eflags |= PSL_VIF; if (vmf->vmf_eflags & PSL_VIP) break; } else { vmf->vmf_eflags &= ~PSL_VIF; } return (retcode); } return (SIGBUS); } switch (i_byte) { case OPERAND_SIZE_PREFIX: i_byte = vm86_fubyte(++addr); inc_ip++; switch (i_byte) { case PUSHF: if (vm86->vm86_eflags & PSL_VIF) PUSHL((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL | PSL_I, vmf); else PUSHL((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf); vmf->vmf_ip += inc_ip; return (retcode); case POPF: temp_flags = POPL(vmf) & POP_MASK; vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK) | temp_flags | PSL_VM | PSL_I; vmf->vmf_ip += inc_ip; if (temp_flags & PSL_I) { vm86->vm86_eflags |= PSL_VIF; if (vm86->vm86_eflags & PSL_VIP) break; } else { vm86->vm86_eflags &= ~PSL_VIF; } return (retcode); } return (SIGBUS); case CLI: vm86->vm86_eflags &= ~PSL_VIF; vmf->vmf_ip += inc_ip; return (retcode); case STI: /* if there is a pending interrupt, go to the emulator */ vm86->vm86_eflags |= PSL_VIF; vmf->vmf_ip += inc_ip; if (vm86->vm86_eflags & PSL_VIP) break; return (retcode); case PUSHF: if (vm86->vm86_eflags & PSL_VIF) PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL | PSL_I, vmf); else PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf); vmf->vmf_ip += inc_ip; return (retcode); case INTn: i_byte = vm86_fubyte(addr + 1); if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0) break; if (vm86->vm86_eflags & PSL_VIF) PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL | PSL_I, vmf); else PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf); PUSH(vmf->vmf_cs, vmf); PUSH(vmf->vmf_ip + inc_ip + 1, vmf); /* increment IP */ GET_VEC(vm86_fuword((caddr_t)(i_byte * 4)), &vmf->vmf_cs, &vmf->vmf_ip); vmf->vmf_flags &= ~PSL_T; vm86->vm86_eflags &= ~PSL_VIF; return (retcode); case IRET: vmf->vmf_ip = POP(vmf); vmf->vmf_cs = POP(vmf); temp_flags = POP(vmf) & POP_MASK; vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK) | temp_flags | PSL_VM | PSL_I; if (temp_flags & PSL_I) { vm86->vm86_eflags |= PSL_VIF; if (vm86->vm86_eflags & PSL_VIP) break; } else { vm86->vm86_eflags &= ~PSL_VIF; } return (retcode); case POPF: temp_flags = POP(vmf) & POP_MASK; vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK) | temp_flags | PSL_VM | PSL_I; vmf->vmf_ip += inc_ip; if (temp_flags & PSL_I) { vm86->vm86_eflags |= PSL_VIF; if (vm86->vm86_eflags & PSL_VIP) break; } else { vm86->vm86_eflags &= ~PSL_VIF; } return (retcode); } return (SIGBUS); } #define PGTABLE_SIZE ((1024 + 64) * 1024 / PAGE_SIZE) #define INTMAP_SIZE 32 #define IOMAP_SIZE ctob(IOPAGES) #define TSS_SIZE \ (sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \ INTMAP_SIZE + IOMAP_SIZE + 1) -struct vm86_layout { - pt_entry_t vml_pgtbl[PGTABLE_SIZE]; +struct vm86_layout_pae { + uint64_t vml_pgtbl[PGTABLE_SIZE]; struct pcb vml_pcb; struct pcb_ext vml_ext; char vml_intmap[INTMAP_SIZE]; char vml_iomap[IOMAP_SIZE]; char vml_iomap_trailer; }; -void -vm86_initialize(void) +struct vm86_layout_nopae { + uint32_t vml_pgtbl[PGTABLE_SIZE]; + struct pcb vml_pcb; + struct pcb_ext vml_ext; + char vml_intmap[INTMAP_SIZE]; + char vml_iomap[IOMAP_SIZE]; + char vml_iomap_trailer; +}; + +_Static_assert(sizeof(struct vm86_layout_pae) <= ctob(3), + "struct vm86_layout_pae exceeds space allocated in locore.s"); +_Static_assert(sizeof(struct vm86_layout_nopae) <= ctob(3), + "struct vm86_layout_nopae exceeds space allocated in locore.s"); + +static void +vm86_initialize_pae(void) { int i; u_int *addr; - struct vm86_layout *vml = (struct vm86_layout *)vm86paddr; + struct vm86_layout_pae *vml; struct pcb *pcb; struct pcb_ext *ext; struct soft_segment_descriptor ssd = { 0, /* segment base address (overwritten) */ 0, /* length (overwritten) */ SDT_SYS386TSS, /* segment type */ 0, /* priority level */ 1, /* descriptor present */ 0, 0, 0, /* default 16 size */ 0 /* granularity */ }; /* - * this should be a compile time error, but cpp doesn't grok sizeof(). - */ - if (sizeof(struct vm86_layout) > ctob(3)) - panic("struct vm86_layout exceeds space allocated in locore.s"); - - /* * Below is the memory layout that we use for the vm86 region. * * +--------+ * | | * | | * | page 0 | * | | +--------+ * | | | stack | * +--------+ +--------+ <--------- vm86paddr * | | |Page Tbl| 1M + 64K = 272 entries = 1088 bytes * | | +--------+ * | | | PCB | size: ~240 bytes * | page 1 | |PCB Ext | size: ~140 bytes (includes TSS) * | | +--------+ * | | |int map | * | | +--------+ * +--------+ | | * | page 2 | | I/O | * +--------+ | bitmap | * | page 3 | | | * | | +--------+ * +--------+ */ /* * A rudimentary PCB must be installed, in order to get to the * PCB extension area. We use the PCB area as a scratchpad for * data storage, the layout of which is shown below. * * pcb_esi = new PTD entry 0 * pcb_ebp = pointer to frame on vm86 stack * pcb_esp = stack frame pointer at time of switch * pcb_ebx = va of vm86 page table * pcb_eip = argument pointer to initial call * pcb_vm86[0] = saved TSS descriptor, word 0 * pcb_vm86[1] = saved TSS descriptor, word 1 */ #define new_ptd pcb_esi #define vm86_frame pcb_ebp #define pgtable_va pcb_ebx + vml = (struct vm86_layout_pae *)vm86paddr; pcb = &vml->vml_pcb; ext = &vml->vml_ext; mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF); bzero(pcb, sizeof(struct pcb)); pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U; pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame); pcb->pgtable_va = vm86paddr; - pcb->pcb_flags = PCB_VM86CALL; + pcb->pcb_flags = PCB_VM86CALL; pcb->pcb_ext = ext; - bzero(ext, sizeof(struct pcb_ext)); + bzero(ext, sizeof(struct pcb_ext)); ext->ext_tss.tss_esp0 = vm86paddr; ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - ext->ext_tss.tss_ioopt = + ext->ext_tss.tss_ioopt = ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16; ext->ext_iomap = vml->vml_iomap; ext->ext_vm86.vm86_intmap = vml->vml_intmap; if (cpu_feature & CPUID_VME) ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0); addr = (u_int *)ext->ext_vm86.vm86_intmap; for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++) *addr++ = 0; vml->vml_iomap_trailer = 0xff; ssd.ssd_base = (u_int)&ext->ext_tss; - ssd.ssd_limit = TSS_SIZE - 1; + ssd.ssd_limit = TSS_SIZE - 1; ssdtosd(&ssd, &ext->ext_tssd); vm86pcb = pcb; #if 0 /* * use whatever is leftover of the vm86 page layout as a * message buffer so we can capture early output. */ msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout), ctob(3) - sizeof(struct vm86_layout)); #endif } +static void +vm86_initialize_nopae(void) +{ + int i; + u_int *addr; + struct vm86_layout_nopae *vml; + struct pcb *pcb; + struct pcb_ext *ext; + struct soft_segment_descriptor ssd = { + 0, /* segment base address (overwritten) */ + 0, /* length (overwritten) */ + SDT_SYS386TSS, /* segment type */ + 0, /* priority level */ + 1, /* descriptor present */ + 0, 0, + 0, /* default 16 size */ + 0 /* granularity */ + }; + + vml = (struct vm86_layout_nopae *)vm86paddr; + pcb = &vml->vml_pcb; + ext = &vml->vml_ext; + + mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF); + + bzero(pcb, sizeof(struct pcb)); + pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U; + pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame); + pcb->pgtable_va = vm86paddr; + pcb->pcb_flags = PCB_VM86CALL; + pcb->pcb_ext = ext; + + bzero(ext, sizeof(struct pcb_ext)); + ext->ext_tss.tss_esp0 = vm86paddr; + ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); + ext->ext_tss.tss_ioopt = + ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16; + ext->ext_iomap = vml->vml_iomap; + ext->ext_vm86.vm86_intmap = vml->vml_intmap; + + if (cpu_feature & CPUID_VME) + ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0); + + addr = (u_int *)ext->ext_vm86.vm86_intmap; + for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++) + *addr++ = 0; + vml->vml_iomap_trailer = 0xff; + + ssd.ssd_base = (u_int)&ext->ext_tss; + ssd.ssd_limit = TSS_SIZE - 1; + ssdtosd(&ssd, &ext->ext_tssd); + + vm86pcb = pcb; + +#if 0 + /* + * use whatever is leftover of the vm86 page layout as a + * message buffer so we can capture early output. + */ + msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout), + ctob(3) - sizeof(struct vm86_layout)); +#endif +} + +void +vm86_initialize(void) +{ + + if (pae_mode) + vm86_initialize_pae(); + else + vm86_initialize_nopae(); +} + vm_offset_t vm86_getpage(struct vm86context *vmc, int pagenum) { int i; for (i = 0; i < vmc->npages; i++) if (vmc->pmap[i].pte_num == pagenum) return (vmc->pmap[i].kva); return (0); } vm_offset_t vm86_addpage(struct vm86context *vmc, int pagenum, vm_offset_t kva) { int i, flags = 0; for (i = 0; i < vmc->npages; i++) if (vmc->pmap[i].pte_num == pagenum) goto overlap; if (vmc->npages == VM86_PMAPSIZE) goto full; /* XXX grow map? */ if (kva == 0) { kva = (vm_offset_t)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); flags = VMAP_MALLOC; } i = vmc->npages++; vmc->pmap[i].flags = flags; vmc->pmap[i].kva = kva; vmc->pmap[i].pte_num = pagenum; return (kva); overlap: panic("vm86_addpage: overlap"); full: panic("vm86_addpage: not enough room"); } /* * called from vm86_bioscall, while in vm86 address space, to finalize setup. */ void vm86_prepcall(struct vm86frame *vmf) { struct vm86_kernel *vm86; uint32_t *stack; uint8_t *code; code = (void *)0xa00; stack = (void *)(0x1000 - 2); /* keep aligned */ if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) { /* interrupt call requested */ code[0] = INTn; code[1] = vmf->vmf_trapno & 0xff; code[2] = HLT; vmf->vmf_ip = (uintptr_t)code; vmf->vmf_cs = 0; } else { code[0] = HLT; stack--; stack[0] = MAKE_VEC(0, (uintptr_t)code); } vmf->vmf_sp = (uintptr_t)stack; vmf->vmf_ss = 0; vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = 0; vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER; vm86 = &curpcb->pcb_ext->ext_vm86; if (!vm86->vm86_has_vme) vm86->vm86_eflags = vmf->vmf_eflags; /* save VIF, VIP */ } /* * vm86 trap handler; determines whether routine succeeded or not. * Called while in vm86 space, returns to calling process. */ void vm86_trap(struct vm86frame *vmf) { void (*p)(struct vm86frame *); caddr_t addr; /* "should not happen" */ if ((vmf->vmf_eflags & PSL_VM) == 0) panic("vm86_trap called, but not in vm86 mode"); addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip); if (*(u_char *)addr == HLT) vmf->vmf_trapno = vmf->vmf_eflags & PSL_C; else vmf->vmf_trapno = vmf->vmf_trapno << 16; p = (void (*)(struct vm86frame *))((uintptr_t)vm86_biosret + setidt_disp); p(vmf); } int vm86_intcall(int intnum, struct vm86frame *vmf) { int (*p)(struct vm86frame *); int retval; if (intnum < 0 || intnum > 0xff) return (EINVAL); vmf->vmf_trapno = intnum; p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall + setidt_disp); mtx_lock(&vm86_lock); critical_enter(); retval = p(vmf); critical_exit(); mtx_unlock(&vm86_lock); return (retval); } /* * struct vm86context contains the page table to use when making * vm86 calls. If intnum is a valid interrupt number (0-255), then * the "interrupt trampoline" will be used, otherwise we use the * caller's cs:ip routine. */ int vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc) { - pt_entry_t *pte; + uint64_t *pte_pae; + uint32_t *pte_nopae; int (*p)(struct vm86frame *); vm_paddr_t page; int i, entry, retval; - pte = (pt_entry_t *)vm86paddr; mtx_lock(&vm86_lock); - for (i = 0; i < vmc->npages; i++) { - page = vtophys(vmc->pmap[i].kva & PG_FRAME); - entry = vmc->pmap[i].pte_num; - vmc->pmap[i].old_pte = pte[entry]; - pte[entry] = page | PG_V | PG_RW | PG_U; - pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + if (pae_mode) { + pte_pae = (uint64_t *)vm86paddr; + for (i = 0; i < vmc->npages; i++) { + page = vtophys(vmc->pmap[i].kva & PG_FRAME_PAE); + entry = vmc->pmap[i].pte_num; + vmc->pmap[i].old_pte = pte_pae[entry]; + pte_pae[entry] = page | PG_V | PG_RW | PG_U; + pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + } + } else { + pte_nopae = (uint32_t *)vm86paddr; + for (i = 0; i < vmc->npages; i++) { + page = vtophys(vmc->pmap[i].kva & PG_FRAME_NOPAE); + entry = vmc->pmap[i].pte_num; + vmc->pmap[i].old_pte = pte_nopae[entry]; + pte_nopae[entry] = page | PG_V | PG_RW | PG_U; + pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + } } vmf->vmf_trapno = intnum; p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall + setidt_disp); critical_enter(); retval = p(vmf); critical_exit(); - for (i = 0; i < vmc->npages; i++) { - entry = vmc->pmap[i].pte_num; - pte[entry] = vmc->pmap[i].old_pte; - pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + if (pae_mode) { + for (i = 0; i < vmc->npages; i++) { + entry = vmc->pmap[i].pte_num; + pte_pae[entry] = vmc->pmap[i].old_pte; + pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + } + } else { + for (i = 0; i < vmc->npages; i++) { + entry = vmc->pmap[i].pte_num; + pte_nopae[entry] = vmc->pmap[i].old_pte; + pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + } } mtx_unlock(&vm86_lock); return (retval); } vm_offset_t vm86_getaddr(struct vm86context *vmc, u_short sel, u_short off) { int i, page; vm_offset_t addr; addr = (vm_offset_t)MAKE_ADDR(sel, off); page = addr >> PAGE_SHIFT; for (i = 0; i < vmc->npages; i++) if (page == vmc->pmap[i].pte_num) return (vmc->pmap[i].kva + (addr & PAGE_MASK)); return (0); } int vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel, u_short *off) { int i; for (i = 0; i < vmc->npages; i++) if (kva >= vmc->pmap[i].kva && kva < vmc->pmap[i].kva + PAGE_SIZE) { *off = kva - vmc->pmap[i].kva; *sel = vmc->pmap[i].pte_num << 8; return (1); } return (0); } int vm86_sysarch(struct thread *td, char *args) { int error = 0; struct i386_vm86_args ua; struct vm86_kernel *vm86; if ((error = copyin(args, &ua, sizeof(struct i386_vm86_args))) != 0) return (error); if (td->td_pcb->pcb_ext == 0) if ((error = i386_extend_pcb(td)) != 0) return (error); vm86 = &td->td_pcb->pcb_ext->ext_vm86; switch (ua.sub_op) { case VM86_INIT: { struct vm86_init_args sa; if ((error = copyin(ua.sub_args, &sa, sizeof(sa))) != 0) return (error); if (cpu_feature & CPUID_VME) vm86->vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0); else vm86->vm86_has_vme = 0; vm86->vm86_inited = 1; vm86->vm86_debug = sa.debug; bcopy(&sa.int_map, vm86->vm86_intmap, 32); } break; #if 0 case VM86_SET_VME: { struct vm86_vme_args sa; if ((cpu_feature & CPUID_VME) == 0) return (ENODEV); if (error = copyin(ua.sub_args, &sa, sizeof(sa))) return (error); if (sa.state) load_cr4(rcr4() | CR4_VME); else load_cr4(rcr4() & ~CR4_VME); } break; #endif case VM86_GET_VME: { struct vm86_vme_args sa; sa.state = (rcr4() & CR4_VME ? 1 : 0); error = copyout(&sa, ua.sub_args, sizeof(sa)); } break; case VM86_INTCALL: { struct vm86_intcall_args sa; if ((error = priv_check(td, PRIV_VM86_INTCALL))) return (error); if ((error = copyin(ua.sub_args, &sa, sizeof(sa)))) return (error); if ((error = vm86_intcall(sa.intnum, &sa.vmf))) return (error); error = copyout(&sa, ua.sub_args, sizeof(sa)); } break; default: error = EINVAL; } return (error); } Index: head/sys/i386/i386/vm86bios.s =================================================================== --- head/sys/i386/i386/vm86bios.s (revision 343566) +++ head/sys/i386/i386/vm86bios.s (revision 343567) @@ -1,170 +1,174 @@ /*- * Copyright (c) 1998 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include /* miscellaneous asm macros */ #include #include "assym.inc" #define SCR_NEWPTD PCB_ESI /* readability macros */ #define SCR_VMFRAME PCB_EBP /* see vm86.c for explanation */ #define SCR_STACK PCB_ESP #define SCR_PGTABLE PCB_EBX #define SCR_ARGFRAME PCB_EIP #define SCR_TSS0 PCB_VM86 #define SCR_TSS1 (PCB_VM86+4) .data ALIGN_DATA .globl vm86pcb vm86pcb: .long 0 .text /* * vm86_bioscall(struct trapframe_vm86 *vm86) */ ENTRY(vm86_bioscall) movl vm86pcb,%edx /* scratch data area */ movl 4(%esp),%eax movl %eax,SCR_ARGFRAME(%edx) /* save argument pointer */ pushl %ebx pushl %ebp pushl %esi pushl %edi pushl %gs movl PCPU(CURTHREAD),%ecx cmpl %ecx,PCPU(FPCURTHREAD) /* do we need to save fp? */ jne 1f pushl %edx movl TD_PCB(%ecx),%ecx pushl PCB_SAVEFPU(%ecx) movl $npxsave,%eax call *%eax addl $4,%esp popl %edx /* recover our pcb */ 1: movl SCR_VMFRAME(%edx),%ebx /* target frame location */ movl %ebx,%edi /* destination */ movl SCR_ARGFRAME(%edx),%esi /* source (set on entry) */ movl $VM86_FRAMESIZE/4,%ecx /* sizeof(struct vm86frame)/4 */ cld rep movsl /* copy frame to new stack */ movl PCPU(CURPCB),%eax pushl %eax /* save curpcb */ movl %edx,PCPU(CURPCB) /* set curpcb to vm86pcb */ movl PCPU(TSS_GDT),%ebx /* entry in GDT */ movl 0(%ebx),%eax movl %eax,SCR_TSS0(%edx) /* save first word */ movl 4(%ebx),%eax andl $~0x200, %eax /* flip 386BSY -> 386TSS */ movl %eax,SCR_TSS1(%edx) /* save second word */ movl PCB_EXT(%edx),%edi /* vm86 tssd entry */ movl 0(%edi),%eax movl %eax,0(%ebx) movl 4(%edi),%eax movl %eax,4(%ebx) movl $GPROC0_SEL*8,%esi /* GSEL(entry, SEL_KPL) */ ltr %si movl %cr3,%eax pushl %eax /* save address space */ - movl IdlePTD,%ecx /* va (and pa) of Idle PTD */ - movl %ecx,%ebx + cmpb $0,pae_mode + jne 2f + movl IdlePTD_nopae,%ecx /* va (and pa) of Idle PTD */ + jmp 3f +2: movl IdlePTD_pae,%ecx +3: movl %ecx,%ebx movl 0(%ebx),%eax pushl %eax /* old ptde != 0 when booting */ pushl %ebx /* keep for reuse */ movl %esp,SCR_STACK(%edx) /* save current stack location */ movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */ movl %eax,0(%ebx) /* ... install as PTD entry 0 */ -#if defined(PAE) || defined(PAE_TABLES) + cmpb $0,pae_mode + je 4f movl IdlePDPT,%ecx -#endif - movl %ecx,%cr3 /* new page tables */ +4: movl %ecx,%cr3 /* new page tables */ movl SCR_VMFRAME(%edx),%esp /* switch to new stack */ pushl %esp movl $vm86_prepcall, %eax call *%eax /* finish setup */ add $4, %esp /* * Return via doreti */ MEXITCOUNT jmp doreti /* * vm86_biosret(struct trapframe_vm86 *vm86) */ ENTRY(vm86_biosret) movl vm86pcb,%edx /* data area */ movl 4(%esp),%esi /* source */ movl SCR_ARGFRAME(%edx),%edi /* destination */ movl $VM86_FRAMESIZE/4,%ecx /* size */ cld rep movsl /* copy frame to original frame */ movl SCR_STACK(%edx),%esp /* back to old stack */ popl %ebx /* saved va of Idle PTD */ popl %eax movl %eax,0(%ebx) /* restore old pte */ popl %eax movl %eax,%cr3 /* install old page table */ movl PCPU(TSS_GDT),%ebx /* entry in GDT */ movl SCR_TSS0(%edx),%eax movl %eax,0(%ebx) /* restore first word */ movl SCR_TSS1(%edx),%eax movl %eax,4(%ebx) /* restore second word */ movl $GPROC0_SEL*8,%esi /* GSEL(entry, SEL_KPL) */ ltr %si popl PCPU(CURPCB) /* restore curpcb/curproc */ movl SCR_ARGFRAME(%edx),%edx /* original stack frame */ movl TF_TRAPNO(%edx),%eax /* return (trapno) */ popl %gs popl %edi popl %esi popl %ebp popl %ebx ret /* back to our normal program */ Index: head/sys/i386/i386/vm_machdep.c =================================================================== --- head/sys/i386/i386/vm_machdep.c (revision 343566) +++ head/sys/i386/i386/vm_machdep.c (revision 343567) @@ -1,701 +1,673 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_npx.h" #include "opt_reset.h" #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NSFBUFS #define NSFBUFS (512 + maxusers * 16) #endif _Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread), "OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread."); _Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb), "OFFSETOF_CURPCB does not correspond with offset of pc_curpcb."); _Static_assert(__OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "__OFFSETOF_MONINORBUF does not correspond with offset of pc_monitorbuf."); union savefpu * get_pcb_user_save_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area")); return ((union savefpu *)p); } union savefpu * get_pcb_user_save_pcb(struct pcb *pcb) { vm_offset_t p; p = (vm_offset_t)(pcb + 1); return ((union savefpu *)p); } struct pcb * get_pcb_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) - sizeof(struct pcb); return ((struct pcb *)p); } void * alloc_fpusave(int flags) { void *res; struct savefpu_ymm *sf; res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags); if (use_xsave) { sf = (struct savefpu_ymm *)res; bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; } return (res); } /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct proc *p1; struct pcb *pcb2; struct mdproc *mdp2; p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ struct mdproc *mdp1 = &p1->p_md; struct proc_ldt *pldt, *pldt1; mtx_lock_spin(&dt_lock); if ((pldt1 = mdp1->md_ldt) != NULL && pldt1->ldt_refcnt > 1) { pldt = user_ldt_alloc(mdp1, pldt1->ldt_len); if (pldt == NULL) panic("could not copy LDT"); mdp1->md_ldt = pldt; set_user_ldt(mdp1); user_ldt_deref(pldt1); } else mtx_unlock_spin(&dt_lock); } return; } /* Ensure that td1's pcb is up to date. */ if (td1 == curthread) td1->td_pcb->pcb_gs = rgs(); critical_enter(); if (PCPU_GET(fpcurthread) == td1) npxsave(td1->td_pcb->pcb_save); critical_exit(); /* Point the pcb to the top of the stack */ pcb2 = get_pcb_td(td2); td2->td_pcb = pcb2; /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), cpu_max_ext_state_size); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. * The -VM86_STACK_SPACE (-16) is so we can expand the trapframe * if we go to vm86. */ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - VM86_STACK_SPACE) - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_eax = 0; /* Child returns zero */ td2->td_frame->tf_eflags &= ~PSL_C; /* success */ td2->td_frame->tf_edx = 1; /* * If the parent process has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame unless the debugger had set PF_FORK * on the parent. Otherwise, the child will receive a (likely * unexpected) SIGTRAP when it executes the first instruction after * returning to userland. */ if ((p1->p_pfsflags & PF_FORK) == 0) td2->td_frame->tf_eflags &= ~PSL_T; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ -#if defined(PAE) || defined(PAE_TABLES) - pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); -#else - pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); -#endif + pcb2->pcb_cr3 = pmap_get_cr3(vmspace_pmap(p2->p_vmspace)); pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline + setidt_disp; /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ /* * XXX don't copy the i/o pages. this should probably be fixed. */ pcb2->pcb_ext = 0; /* Copy the LDT, if necessary. */ mtx_lock_spin(&dt_lock); if (mdp2->md_ldt != NULL) { if (flags & RFMEM) { mdp2->md_ldt->ldt_refcnt++; } else { mdp2->md_ldt = user_ldt_alloc(mdp2, mdp2->md_ldt->ldt_len); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); } } mtx_unlock_spin(&dt_lock); /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; /* * Now, cpu_switch() can schedule the new process. * pcb_esp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_esi = (int) func; /* function */ td->td_pcb->pcb_ebx = (int) arg; /* first arg */ } void cpu_exit(struct thread *td) { /* * If this process has a custom LDT, release it. Reset pc->pcb_gs * and %gs before we free it in case they refer to an LDT entry. */ mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) { td->td_pcb->pcb_gs = _udatasel; load_gs(_udatasel); user_ldt_free(td); } else mtx_unlock_spin(&dt_lock); } void cpu_thread_exit(struct thread *td) { critical_enter(); if (td == PCPU_GET(fpcurthread)) npxdrop(); critical_exit(); /* Disable any hardware breakpoints. */ if (td->td_pcb->pcb_flags & PCB_DBREGS) { reset_dbregs(); td->td_pcb->pcb_flags &= ~PCB_DBREGS; } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (pcb->pcb_ext != NULL) { /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ /* * XXX do we need to move the TSS off the allocated pages * before freeing them? (not done here) */ pmap_trm_free(pcb->pcb_ext, ctob(IOPAGES + 1)); pcb->pcb_ext = NULL; } } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; struct xstate_hdr *xhdr; td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)((caddr_t)pcb - VM86_STACK_SPACE) - 1; pcb->pcb_ext = NULL; pcb->pcb_save = get_pcb_user_save_pcb(pcb); if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); bzero(xhdr, sizeof(*xhdr)); xhdr->xstate_bv = xsave_mask; } } void cpu_thread_free(struct thread *td) { cpu_thread_clean(td); } void cpu_set_syscall_retval(struct thread *td, int error) { switch (error) { case 0: td->td_frame->tf_eax = td->td_retval[0]; td->td_frame->tf_edx = td->td_retval[1]; td->td_frame->tf_eflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, int * 0x80 is 2 bytes. We saved this in tf_err. */ td->td_frame->tf_eip -= td->td_frame->tf_err; break; case EJUSTRETURN: break; default: td->td_frame->tf_eax = SV_ABI_ERRNO(td->td_proc, error); td->td_frame->tf_eflags |= PSL_C; break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { struct pcb *pcb2; /* Point the pcb to the top of the stack. */ pcb2 = td->td_pcb; /* * Copy the upcall pcb. This loads kernel regs. * Those not loaded individually below get their default * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE | PCB_KERNNPX); pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, cpu_max_ext_state_size); /* * Create a new fresh stack for the new thread. */ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); /* If the current thread has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame. Otherwise, the new thread will * receive a (likely unexpected) SIGTRAP when it executes the first * instruction after returning to userland. */ td->td_frame->tf_eflags &= ~PSL_T; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */ pcb2->pcb_ebx = (int)td; /* trampoline arg */ pcb2->pcb_eip = (int)fork_trampoline + setidt_disp; pcb2->pcb_gs = rgs(); /* * If we didn't copy the pcb, we'd need to do the following registers: * pcb2->pcb_cr3: cloned above. * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ pcb2->pcb_ext = NULL; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = PSL_KERNEL | PSL_I; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { /* * Do any extra cleaning that needs to be done. * The thread may have optional components * that are not present in a fresh thread. * This may be a recycled thread so make it look * as though it's newly allocated. */ cpu_thread_clean(td); /* * Set the trap frame to point at the beginning of the entry * function. */ td->td_frame->tf_ebp = 0; td->td_frame->tf_esp = (((int)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4; td->td_frame->tf_eip = (int)entry; /* Return address sentinel value to stop stack unwinding. */ suword((void *)td->td_frame->tf_esp, 0); /* Pass the argument to the entry point. */ suword((void *)(td->td_frame->tf_esp + sizeof(void *)), (int)arg); } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct segment_descriptor sd; uint32_t base; /* * Construct a descriptor and store it in the pcb for * the next context switch. Also store it in the gdt * so that the load of tf_fs into %fs will activate it * at return to userland. */ base = (uint32_t)tls_base; sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; sd.sd_p = 1; sd.sd_xx = 0; sd.sd_def32 = 1; sd.sd_gran = 1; critical_enter(); /* set %gs */ td->td_pcb->pcb_gsd = sd; if (td == curthread) { PCPU_GET(fsgs_gdt)[1] = sd; load_gs(GSEL(GUGS_SEL, SEL_UPL)); } critical_exit(); return (0); } /* * Convert kernel VA to physical address */ vm_paddr_t kvtop(void *addr) { vm_paddr_t pa; pa = pmap_kextract((vm_offset_t)addr); if (pa == 0) panic("kvtop: zero page frame"); return (pa); } /* * Get an sf_buf from the freelist. May block if none are available. */ void sf_buf_map(struct sf_buf *sf, int flags) { - pt_entry_t opte, *ptep; - /* - * Update the sf_buf's virtual-to-physical mapping, flushing the - * virtual address from the TLB. Since the reference count for - * the sf_buf's old mapping was zero, that mapping is not - * currently in use. Consequently, there is no need to exchange - * the old and new PTEs atomically, even under PAE. - */ - ptep = vtopte(sf->kva); - opte = *ptep; - *ptep = VM_PAGE_TO_PHYS(sf->m) | PG_RW | PG_V | - pmap_cache_bits(kernel_pmap, sf->m->md.pat_mode, 0); - - /* - * Avoid unnecessary TLB invalidations: If the sf_buf's old - * virtual-to-physical mapping was not used, then any processor - * that has invalidated the sf_buf's virtual address from its TLB - * since the last used mapping need not invalidate again. - */ + pmap_sf_buf_map(sf); #ifdef SMP - if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) - CPU_ZERO(&sf->cpumask); - sf_buf_shootdown(sf, flags); -#else - if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) - pmap_invalidate_page(kernel_pmap, sf->kva); #endif } #ifdef SMP void sf_buf_shootdown(struct sf_buf *sf, int flags) { cpuset_t other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &sf->cpumask)) { CPU_SET(cpuid, &sf->cpumask); invlpg(sf->kva); } if ((flags & SFB_CPUPRIVATE) == 0) { other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); CPU_NAND(&other_cpus, &sf->cpumask); if (!CPU_EMPTY(&other_cpus)) { CPU_OR(&sf->cpumask, &other_cpus); smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap); } } sched_unpin(); } #endif /* * MD part of sf_buf_free(). */ int sf_buf_unmap(struct sf_buf *sf) { return (0); } static void sf_buf_invalidate(struct sf_buf *sf) { vm_page_t m = sf->m; /* * Use pmap_qenter to update the pte for * existing mapping, in particular, the PAT * settings are recalculated. */ pmap_qenter(sf->kva, &m, 1); pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE); } /* * Invalidate the cache lines that may belong to the page, if * (possibly old) mapping of the page by sf buffer exists. Returns * TRUE when mapping was found and cache invalidated. */ boolean_t sf_buf_invalidate_cache(vm_page_t m) { return (sf_buf_process_page(m, sf_buf_invalidate)); } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending != 0) busdma_swi(); } /* * Tell whether this address is in some physical memory region. * Currently used by the kernel coredump code in order to avoid * dumping the ``ISA memory hole'' which could cause indefinite hangs, * or other unpredictable behaviour. */ int is_physical_memory(vm_paddr_t addr) { #ifdef DEV_ISA /* The ISA ``memory hole''. */ if (addr >= 0xa0000 && addr < 0x100000) return 0; #endif /* * stuff other tests for known memory-mapped devices (PCI?) * here */ return 1; } Index: head/sys/i386/include/md_var.h =================================================================== --- head/sys/i386/include/md_var.h (revision 343566) +++ head/sys/i386/include/md_var.h (revision 343567) @@ -1,83 +1,85 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_MD_VAR_H_ #define _MACHINE_MD_VAR_H_ #include extern u_int cyrix_did; #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif #ifdef COMPAT_FREEBSD4 extern int szfreebsd4_sigcode; #endif #ifdef COMPAT_43 extern int szosigcode; extern int sz_lcall_tramp; #endif extern uint32_t *vm_page_dump; extern vm_offset_t proc0kstack; extern uintptr_t setidt_disp; struct segment_descriptor; union savefpu; int cp_slow0(vm_offset_t uva, size_t len, bool write, void (*f)(vm_offset_t, void *), void *arg); void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs)); void copyout_init_tramp(void); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); void doreti_popl_ds(void) __asm(__STRING(doreti_popl_ds)); void doreti_popl_ds_fault(void) __asm(__STRING(doreti_popl_ds_fault)); void doreti_popl_es(void) __asm(__STRING(doreti_popl_es)); void doreti_popl_es_fault(void) __asm(__STRING(doreti_popl_es_fault)); void doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs)); void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault)); void fill_based_sd(struct segment_descriptor *sdp, uint32_t base); void i686_pagezero(void *addr); void sse2_pagezero(void *addr); +int minidumpsys_nopae(struct dumperinfo *); +int minidumpsys_pae(struct dumperinfo *); void init_AMD_Elan_sc520(void); vm_paddr_t kvtop(void *addr); void panicifcpuunsupported(void); void ppro_reenable_apic(void); void set_fsbase(struct thread *td, uint32_t base); void set_gsbase(struct thread *td, uint32_t base); void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec); void setidt_nodisp(int idx, uintptr_t func, int typ, int dpl, int selec); union savefpu *get_pcb_user_save_td(struct thread *td); union savefpu *get_pcb_user_save_pcb(struct pcb *pcb); #endif /* !_MACHINE_MD_VAR_H_ */ Index: head/sys/i386/include/param.h =================================================================== --- head/sys/i386/include/param.h (revision 343566) +++ head/sys/i386/include/param.h (revision 343567) @@ -1,169 +1,167 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)param.h 5.8 (Berkeley) 6/28/91 * $FreeBSD$ */ #ifndef _I386_INCLUDE_PARAM_H_ #define _I386_INCLUDE_PARAM_H_ #include /* * Machine dependent constants for Intel 386. */ #define __HAVE_ACPI #define __HAVE_PIR #define __PCI_REROUTE_INTERRUPT #ifndef MACHINE #define MACHINE "i386" #endif #ifndef MACHINE_ARCH #define MACHINE_ARCH "i386" #endif #define MID_MACHINE MID_I386 #if defined(SMP) || defined(KLD_MODULE) #ifndef MAXCPU #define MAXCPU 32 #endif #else #define MAXCPU 1 #endif /* SMP || KLD_MODULE */ #ifndef MAXMEMDOM #define MAXMEMDOM 1 #endif #define ALIGNBYTES _ALIGNBYTES #define ALIGN(p) _ALIGN(p) /* * ALIGNED_POINTER is a boolean macro that checks whether an address * is valid to fetch data elements of type t from on this architecture. * This does not reflect the optimal alignment, just the possibility * (within reasonable limits). */ #define ALIGNED_POINTER(p, t) 1 /* * CACHE_LINE_SIZE is the compile-time maximum cache line size for an * architecture. It should be used with appropriate caution. */ #define CACHE_LINE_SHIFT 6 #define CACHE_LINE_SIZE (1 << CACHE_LINE_SHIFT) #define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ -#define PAGE_SIZE (1<> PAGE_SHIFT) #define ptoa(x) ((x) << PAGE_SHIFT) #define i386_btop(x) ((x) >> PAGE_SHIFT) #define i386_ptob(x) ((x) << PAGE_SHIFT) #define pgtok(x) ((x) * (PAGE_SIZE / 1024)) #define INKERNEL(va) (TRUE) #endif /* !_I386_INCLUDE_PARAM_H_ */ Index: head/sys/i386/include/pmap.h =================================================================== --- head/sys/i386/include/pmap.h (revision 343566) +++ head/sys/i386/include/pmap.h (revision 343567) @@ -1,412 +1,309 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Derived from hp300 version by Mike Hibler, this version by William * Jolitz uses a recursive map [a pde points to the page directory] to * map the page tables using the pagetables themselves. This is done to * reduce the impact on kernel virtual memory for lots of sparse address * space, and to reduce the cost of memory to each process. * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 * $FreeBSD$ */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ /* * Page-directory and page-table entries follow this format, with a few * of the fields not present here and there, depending on a lot of things. */ /* ---- Intel Nomenclature ---- */ #define PG_V 0x001 /* P Valid */ #define PG_RW 0x002 /* R/W Read/Write */ #define PG_U 0x004 /* U/S User/Supervisor */ #define PG_NC_PWT 0x008 /* PWT Write through */ #define PG_NC_PCD 0x010 /* PCD Cache disable */ #define PG_A 0x020 /* A Accessed */ #define PG_M 0x040 /* D Dirty */ #define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */ #define PG_PTE_PAT 0x080 /* PAT PAT index */ #define PG_G 0x100 /* G Global */ #define PG_AVAIL1 0x200 /* / Available for system */ #define PG_AVAIL2 0x400 /* < programmers use */ #define PG_AVAIL3 0x800 /* \ */ #define PG_PDE_PAT 0x1000 /* PAT PAT index */ -#if defined(PAE) || defined(PAE_TABLES) #define PG_NX (1ull<<63) /* No-execute */ -#endif /* Our various interpretations of the above */ #define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ #define PG_MANAGED PG_AVAIL2 #define PG_PROMOTED PG_AVAIL3 /* PDE only */ -#if defined(PAE) || defined(PAE_TABLES) -#define PG_FRAME (0x000ffffffffff000ull) -#define PG_PS_FRAME (0x000fffffffe00000ull) -#else -#define PG_FRAME (~PAGE_MASK) -#define PG_PS_FRAME (0xffc00000) -#endif + #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ /* Page level cache control fields used to determine the PAT type */ #define PG_PDE_CACHE (PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD) #define PG_PTE_CACHE (PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD) /* * Promotion to a 2 or 4MB (PDE) page mapping requires that the corresponding * 4KB (PTE) page mappings have identical settings for the following fields: */ #define PG_PTE_PROMOTE (PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \ PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V) /* * Page Protection Exception bits */ #define PGEX_P 0x01 /* Protection violation vs. not present */ #define PGEX_W 0x02 /* during a Write cycle */ #define PGEX_U 0x04 /* access from User mode (UPL) */ #define PGEX_RSV 0x08 /* reserved PTE field is non-zero */ #define PGEX_I 0x10 /* during an instruction fetch */ /* - * Size of Kernel address space. This is the number of page table pages - * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. - * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). - * For PAE, the page table page unit size is 2MB. This means that 512 pages - * is 1 Gigabyte. Double everything. It must be a multiple of 8 for PAE. - */ -#if defined(PAE) || defined(PAE_TABLES) -#define KVA_PAGES (512*4) -#else -#define KVA_PAGES (256*4) -#endif - -/* * Pte related macros */ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< #include #include #include #include -#if defined(PAE) || defined(PAE_TABLES) - -typedef uint64_t pdpt_entry_t; -typedef uint64_t pd_entry_t; -typedef uint64_t pt_entry_t; - -#define PTESHIFT (3) -#define PDESHIFT (3) - -#else - -typedef uint32_t pd_entry_t; -typedef uint32_t pt_entry_t; - -#define PTESHIFT (2) -#define PDESHIFT (2) - -#endif - /* * Address of current address space page table maps and directories. */ #ifdef _KERNEL -#include -extern pt_entry_t PTmap[]; -extern pd_entry_t PTD[]; -extern pd_entry_t PTDpde[]; - -#if defined(PAE) || defined(PAE_TABLES) -extern pdpt_entry_t *IdlePDPT; -#endif -extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ - /* - * Translate a virtual address to the kernel virtual address of its page table - * entry (PTE). This can be used recursively. If the address of a PTE as - * previously returned by this macro is itself given as the argument, then the - * address of the page directory entry (PDE) that maps the PTE will be - * returned. - * - * This macro may be used before pmap_bootstrap() is called. - */ -#define vtopte(va) (PTmap + i386_btop(va)) - -/* * Translate a virtual address to its physical address. * * This macro may be used before pmap_bootstrap() is called. */ #define vtophys(va) pmap_kextract((vm_offset_t)(va)) -/* - * KPTmap is a linear mapping of the kernel page table. It differs from the - * recursive mapping in two ways: (1) it only provides access to kernel page - * table pages, and not user page table pages, and (2) it provides access to - * a kernel page table page after the corresponding virtual addresses have - * been promoted to a 2/4MB page mapping. - * - * KPTmap is first initialized by pmap_cold() to support just NPKT page table - * pages. Later, it is reinitialized by pmap_bootstrap() to allow for - * expansion of the kernel page table. - */ -extern pt_entry_t *KPTmap; - -#if (defined(PAE) || defined(PAE_TABLES)) - -#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new) -#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte) -#define pte_load_clear(ptep) atomic_swap_64_i586(ptep, 0) -#define pte_store(ptep, pte) atomic_store_rel_64_i586(ptep, pte) -#define pte_load(ptep) atomic_load_acq_64_i586(ptep) - -extern pt_entry_t pg_nx; - -#else /* !(PAE || PAE_TABLES) */ - -#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new) -#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte) -#define pte_load_clear(ptep) atomic_swap_int(ptep, 0) -#define pte_store(ptep, pte) do { \ - *(u_int *)(ptep) = (u_int)(pte); \ -} while (0) -#define pte_load(ptep) atomic_load_acq_int(ptep) - -#endif /* !(PAE || PAE_TABLES) */ - #define pte_clear(ptep) pte_store(ptep, 0) #define pde_store(pdep, pde) pte_store(pdep, pde) -/* - * Extract from the kernel page table the physical address that is mapped by - * the given virtual address "va". - * - * This function may be used before pmap_bootstrap() is called. - */ -static __inline vm_paddr_t -pmap_kextract(vm_offset_t va) -{ - vm_paddr_t pa; - - if ((pa = pte_load(&PTD[va >> PDRSHIFT])) & PG_PS) { - pa = (pa & PG_PS_FRAME) | (va & PDRMASK); - } else { - /* - * Beware of a concurrent promotion that changes the PDE at - * this point! For example, vtopte() must not be used to - * access the PTE because it would use the new PDE. It is, - * however, safe to use the old PDE because the page table - * page is preserved by the promotion. - */ - pa = KPTmap[i386_btop(va)]; - pa = (pa & PG_FRAME) | (va & PAGE_MASK); - } - return (pa); -} - #endif /* _KERNEL */ /* * Pmap stuff */ struct pv_entry; struct pv_chunk; struct md_page { TAILQ_HEAD(,pv_entry) pv_list; int pat_mode; }; +#define PMAP_EXTERN_FIELDS \ + cpuset_t pm_active; /* active on cpus */ \ + struct mtx pm_mtx; \ + struct pmap_statistics pm_stats; /* pmap statistics */ + +struct pmap_KBI { + PMAP_EXTERN_FIELDS + int32_t pm_fill[32]; +}; + +#ifdef PMTYPE struct pmap { - struct mtx pm_mtx; + PMAP_EXTERN_FIELDS pd_entry_t *pm_pdir; /* KVA of page directory */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ - cpuset_t pm_active; /* active on cpus */ - struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ -#if defined(PAE) || defined(PAE_TABLES) pdpt_entry_t *pm_pdpt; /* KVA of page directory pointer table */ -#endif struct vm_radix pm_root; /* spare page table pages */ vm_page_t pm_ptdpg[NPGPTD]; }; +#else +#define pmap pmap_KBI +#endif typedef struct pmap *pmap_t; #ifdef _KERNEL extern struct pmap kernel_pmap_store; #define kernel_pmap (&kernel_pmap_store) #define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx) #define PMAP_LOCK_ASSERT(pmap, type) \ mtx_assert(&(pmap)->pm_mtx, (type)) #define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx) #define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ NULL, MTX_DEF | MTX_DUPOK) #define PMAP_LOCKED(pmap) mtx_owned(&(pmap)->pm_mtx) #define PMAP_MTX(pmap) (&(pmap)->pm_mtx) #define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) #define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) #endif /* * For each vm_page_t, there is a list of all currently valid virtual * mappings of that page. An entry is a pv_entry_t, the list is pv_list. */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* * pv_entries are allocated in chunks per-process. This avoids the * need to track per-pmap assignments. */ #define _NPCM 11 #define _NPCPV 336 struct pv_chunk { pmap_t pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint32_t pc_map[_NPCM]; /* bitmap; 1 = free */ TAILQ_ENTRY(pv_chunk) pc_lru; struct pv_entry pc_pventry[_NPCPV]; }; #ifdef _KERNEL -extern caddr_t CADDR3; -extern pt_entry_t *CMAP3; extern vm_paddr_t phys_avail[]; extern vm_paddr_t dump_avail[]; extern char *ptvmmap; /* poor name! */ extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; #define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode) #define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) #define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz)) +struct sf_buf; + /* * Only the following functions or macros may be used before pmap_bootstrap() * is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and * vtopte(). */ void pmap_activate_boot(pmap_t pmap); +void pmap_basemem_setup(u_int basemem); +void *pmap_bios16_enter(void); +void pmap_bios16_leave(void *handle); void pmap_bootstrap(vm_paddr_t); int pmap_cache_bits(pmap_t, int mode, boolean_t is_pde); int pmap_change_attr(vm_offset_t, vm_size_t, int); +caddr_t pmap_cmap3(vm_paddr_t pa, u_int pte_bits); +void pmap_cp_slow0_map(vm_offset_t kaddr, int plen, vm_page_t *ma); +void pmap_flush_page(vm_page_t m); +u_int pmap_get_kcr3(void); +u_int pmap_get_cr3(pmap_t); +vm_offset_t pmap_get_map_low(void); +vm_offset_t pmap_get_vm_maxuser_address(void); void pmap_init_pat(void); void pmap_kenter(vm_offset_t va, vm_paddr_t pa); void *pmap_kenter_temporary(vm_paddr_t pa, int i); +vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); +void pmap_ksetrw(vm_offset_t va); void *pmap_mapbios(vm_paddr_t, vm_size_t); void *pmap_mapdev(vm_paddr_t, vm_size_t); void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int); boolean_t pmap_page_is_mapped(vm_page_t m); void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); +vm_paddr_t pmap_pg_frame(vm_paddr_t pa); bool pmap_ps_enabled(pmap_t pmap); +void pmap_remap_lower(bool); +void pmap_remap_lowptdi(bool); +void pmap_set_nx(void); +void pmap_sf_buf_map(struct sf_buf *sf); void pmap_unmapdev(vm_offset_t, vm_size_t); -pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2; void pmap_invalidate_page(pmap_t, vm_offset_t); void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); void pmap_invalidate_all(pmap_t); void pmap_invalidate_cache(void); void pmap_invalidate_cache_pages(vm_page_t *pages, int count); void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); void *pmap_trm_alloc(size_t size, int flags); void pmap_trm_free(void *addr, size_t size); void invltlb_glob(void); + +struct thread; + +extern int pae_mode; +extern int i386_pmap_VM_NFREEORDER; +extern int i386_pmap_VM_LEVEL_0_ORDER; +extern int i386_pmap_PDRSHIFT; #endif /* _KERNEL */ #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ Index: head/sys/i386/include/pmap_base.h =================================================================== --- head/sys/i386/include/pmap_base.h (nonexistent) +++ head/sys/i386/include/pmap_base.h (revision 343567) @@ -0,0 +1,124 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_PMAP_BASE_H_ +#define _MACHINE_PMAP_BASE_H_ + +struct pmap_methods { + void (*pm_ksetrw)(vm_offset_t); + void (*pm_remap_lower)(bool); + void (*pm_remap_lowptdi)(bool); + void (*pm_align_superpage)(vm_object_t object, vm_ooffset_t offset, + vm_offset_t *addr, vm_size_t size); + vm_offset_t (*pm_quick_enter_page)(vm_page_t m); + void (*pm_quick_remove_page)(vm_offset_t addr); + void *(*pm_trm_alloc)(size_t size, int flags); + void (*pm_trm_free)(void *addr, size_t size); + vm_offset_t (*pm_get_map_low)(void); + vm_offset_t (*pm_get_vm_maxuser_address)(void); + vm_paddr_t (*pm_kextract)(vm_offset_t va); + vm_paddr_t (*pm_pg_frame)(vm_paddr_t pa); + void (*pm_sf_buf_map)(struct sf_buf *sf); + void (*pm_cp_slow0_map)(vm_offset_t kaddr, int plen, vm_page_t *ma); + u_int (*pm_get_kcr3)(void); + u_int (*pm_get_cr3)(pmap_t); + caddr_t (*pm_cmap3)(vm_paddr_t pa, u_int pte_flags); + void (*pm_basemem_setup)(u_int basemem); + void (*pm_set_nx)(void); + void *(*pm_bios16_enter)(void); + void (*pm_bios16_leave)(void *handle); + void (*pm_bootstrap)(vm_paddr_t firstaddr); + boolean_t (*pm_is_valid_memattr)(pmap_t, vm_memattr_t); + int (*pm_cache_bits)(pmap_t, int, boolean_t); + bool (*pm_ps_enabled)(pmap_t); + void (*pm_pinit0)(pmap_t); + int (*pm_pinit)(pmap_t); + void (*pm_activate)(struct thread *); + void (*pm_activate_boot)(pmap_t); + void (*pm_advise)(pmap_t, vm_offset_t, vm_offset_t, int); + void (*pm_clear_modify)(vm_page_t); + int (*pm_change_attr)(vm_offset_t, vm_size_t, int); + int (*pm_mincore)(pmap_t, vm_offset_t, vm_paddr_t *); + void (*pm_copy)(pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t); + void (*pm_copy_page)(vm_page_t, vm_page_t); + void (*pm_copy_pages)(vm_page_t [], vm_offset_t, vm_page_t [], + vm_offset_t, int); + void (*pm_zero_page)(vm_page_t); + void (*pm_zero_page_area)(vm_page_t, int, int); + int (*pm_enter)(pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int, + int8_t); + void (*pm_enter_object)(pmap_t, vm_offset_t, vm_offset_t, + vm_page_t, vm_prot_t); + void (*pm_enter_quick)(pmap_t, vm_offset_t, vm_page_t, vm_prot_t); + void *(*pm_kenter_temporary)(vm_paddr_t pa, int); + void (*pm_object_init_pt)(pmap_t, vm_offset_t, vm_object_t, + vm_pindex_t, vm_size_t); + void (*pm_unwire)(pmap_t, vm_offset_t, vm_offset_t); + boolean_t (*pm_page_exists_quick)(pmap_t, vm_page_t); + int (*pm_page_wired_mappings)(vm_page_t); + boolean_t (*pm_page_is_mapped)(vm_page_t); + void (*pm_remove_pages)(pmap_t); + boolean_t (*pm_is_modified)(vm_page_t); + boolean_t (*pm_is_prefaultable)(pmap_t, vm_offset_t); + boolean_t (*pm_is_referenced)(vm_page_t); + void (*pm_remove_write)(vm_page_t); + int (*pm_ts_referenced)(vm_page_t); + void *(*pm_mapdev_attr)(vm_paddr_t, vm_size_t, int); + void (*pm_unmapdev)(vm_offset_t, vm_size_t); + void (*pm_page_set_memattr)(vm_page_t, vm_memattr_t); + vm_paddr_t (*pm_extract)(pmap_t, vm_offset_t); + vm_page_t (*pm_extract_and_hold)(pmap_t, vm_offset_t, vm_prot_t); + vm_offset_t (*pm_map)(vm_offset_t *, vm_paddr_t, vm_paddr_t, int); + void (*pm_qenter)(vm_offset_t sva, vm_page_t *, int); + void (*pm_qremove)(vm_offset_t, int); + void (*pm_release)(pmap_t); + void (*pm_protect)(pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); + void (*pm_remove)(pmap_t, vm_offset_t, vm_offset_t); + void (*pm_remove_all)(vm_page_t); + void (*pm_init)(void); + void (*pm_init_pat)(void); + void (*pm_growkernel)(vm_offset_t); + void (*pm_invalidate_page)(pmap_t, vm_offset_t); + void (*pm_invalidate_range)(pmap_t, vm_offset_t, vm_offset_t); + void (*pm_invalidate_all)(pmap_t); + void (*pm_invalidate_cache)(void); + void (*pm_flush_page)(vm_page_t); + void (*pm_kenter)(vm_offset_t, vm_paddr_t); + void (*pm_kremove)(vm_offset_t); +}; + +void pmap_cold(void); +void pmap_pae_cold(void); +void pmap_nopae_cold(void); + +#endif Property changes on: head/sys/i386/include/pmap_base.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/i386/include/pmap_nopae.h =================================================================== --- head/sys/i386/include/pmap_nopae.h (nonexistent) +++ head/sys/i386/include/pmap_nopae.h (revision 343567) @@ -0,0 +1,100 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Portions of this software were developed by + * Konstantin Belousov under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Derived from hp300 version by Mike Hibler, this version by William + * Jolitz uses a recursive map [a pde points to the page directory] to + * map the page tables using the pagetables themselves. This is done to + * reduce the impact on kernel virtual memory for lots of sparse address + * space, and to reduce the cost of memory to each process. + * + * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 + * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 + * $FreeBSD$ + */ + +#ifndef _MACHINE_PMAP_NOPAE_H +#define _MACHINE_PMAP_NOPAE_H + +#define NTRPPTD 1 +#define LOWPTDI 1 +#define KERNPTDI 2 + +#define NPGPTD 1 +#define NPGPTD_SHIFT 10 +#undef PDRSHIFT +#define PDRSHIFT PDRSHIFT_NOPAE +#undef NBPDR +#define NBPDR (1 << PDRSHIFT_NOPAE) /* bytes/page dir */ + +#define PG_FRAME PG_FRAME_NOPAE +#define PG_PS_FRAME PG_PS_FRAME_NOPAE + +#define KVA_PAGES (256*4) + +#ifndef NKPT +#define NKPT 30 +#endif + +typedef uint32_t pd_entry_t; +typedef uint32_t pt_entry_t; +typedef uint32_t pdpt_entry_t; /* Only to keep struct pmap layout. */ + +#define PTESHIFT (2) +#define PDESHIFT (2) + +#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new) +#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte) +#define pte_load_clear(ptep) atomic_swap_int(ptep, 0) +#define pte_store(ptep, pte) do { \ + *(u_int *)(ptep) = (u_int)(pte); \ +} while (0) +#define pte_load(ptep) atomic_load_int(ptep) + +extern pt_entry_t PTmap[]; +extern pd_entry_t PTD[]; +extern pd_entry_t PTDpde[]; +extern pd_entry_t *IdlePTD_nopae; +extern pt_entry_t *KPTmap_nopae; + +struct pmap; +pt_entry_t *__CONCAT(PMTYPE, pmap_pte)(struct pmap *, vm_offset_t) __pure2; + +#endif Property changes on: head/sys/i386/include/pmap_nopae.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/i386/include/pmap_pae.h =================================================================== --- head/sys/i386/include/pmap_pae.h (nonexistent) +++ head/sys/i386/include/pmap_pae.h (revision 343567) @@ -0,0 +1,123 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Portions of this software were developed by + * Konstantin Belousov under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Derived from hp300 version by Mike Hibler, this version by William + * Jolitz uses a recursive map [a pde points to the page directory] to + * map the page tables using the pagetables themselves. This is done to + * reduce the impact on kernel virtual memory for lots of sparse address + * space, and to reduce the cost of memory to each process. + * + * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 + * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 + * $FreeBSD$ + */ + +#ifndef _MACHINE_PMAP_PAE_H +#define _MACHINE_PMAP_PAE_H + +#define NTRPPTD 2 /* Number of PTDs for trampoline + mapping */ +#define LOWPTDI 2 /* low memory map pde */ +#define KERNPTDI 4 /* start of kernel text pde */ + +#define NPGPTD 4 /* Num of pages for page directory */ +#define NPGPTD_SHIFT 9 +#undef PDRSHIFT +#define PDRSHIFT PDRSHIFT_PAE +#undef NBPDR +#define NBPDR (1 << PDRSHIFT_PAE) /* bytes/page dir */ + +#define PG_FRAME PG_FRAME_PAE +#define PG_PS_FRAME PG_PS_FRAME_PAE + +/* + * Size of Kernel address space. This is the number of page table pages + * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. + * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). + * For PAE, the page table page unit size is 2MB. This means that 512 pages + * is 1 Gigabyte. Double everything. It must be a multiple of 8 for PAE. + */ +#define KVA_PAGES (512*4) + +/* + * The initial number of kernel page table pages that are constructed + * by pmap_cold() must be sufficient to map vm_page_array. That number can + * be calculated as follows: + * max_phys / PAGE_SIZE * sizeof(struct vm_page) / NBPDR + * PAE: max_phys 16G, sizeof(vm_page) 76, NBPDR 2M, 152 page table pages. + * PAE_TABLES: max_phys 4G, sizeof(vm_page) 68, NBPDR 2M, 36 page table pages. + * Non-PAE: max_phys 4G, sizeof(vm_page) 68, NBPDR 4M, 18 page table pages. + */ +#ifndef NKPT +#define NKPT 240 +#endif + +typedef uint64_t pdpt_entry_t; +typedef uint64_t pd_entry_t; +typedef uint64_t pt_entry_t; + +#define PTESHIFT (3) +#define PDESHIFT (3) + +#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new) +#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte) +#define pte_load_clear(ptep) atomic_swap_64_i586(ptep, 0) +#define pte_store(ptep, pte) atomic_store_rel_64_i586(ptep, pte) +#define pte_load(ptep) atomic_load_acq_64_i586(ptep) + +extern pdpt_entry_t *IdlePDPT; +extern pt_entry_t pg_nx; +extern pd_entry_t *IdlePTD_pae; /* physical address of "Idle" state directory */ + +/* + * KPTmap is a linear mapping of the kernel page table. It differs from the + * recursive mapping in two ways: (1) it only provides access to kernel page + * table pages, and not user page table pages, and (2) it provides access to + * a kernel page table page after the corresponding virtual addresses have + * been promoted to a 2/4MB page mapping. + * + * KPTmap is first initialized by pmap_cold() to support just NPKT page table + * pages. Later, it is reinitialized by pmap_bootstrap() to allow for + * expansion of the kernel page table. + */ +extern pt_entry_t *KPTmap_pae; + +#endif Property changes on: head/sys/i386/include/pmap_pae.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/i386/include/vm86.h =================================================================== --- head/sys/i386/include/vm86.h (revision 343566) +++ head/sys/i386/include/vm86.h (revision 343567) @@ -1,167 +1,167 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997 Jonathan Lemon * All rights reserved. * * Derived from register.h, which is * Copyright (c) 1996 Michael Smith. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_VM86_H_ #define _MACHINE_VM86_H_ 1 /* standard register representation */ typedef union { u_int r_ex; struct { u_short r_x; u_int :16; } r_w; struct { u_char r_l; u_char r_h; u_int :16; } r_b; } reg86_t; /* layout must match definition of struct trapframe_vm86 in */ struct vm86frame { int kernel_fs; int kernel_es; int kernel_ds; reg86_t edi; reg86_t esi; reg86_t ebp; reg86_t isp; reg86_t ebx; reg86_t edx; reg86_t ecx; reg86_t eax; int vmf_trapno; int vmf_err; reg86_t eip; reg86_t cs; reg86_t eflags; reg86_t esp; reg86_t ss; reg86_t es; reg86_t ds; reg86_t fs; reg86_t gs; #define vmf_ah eax.r_b.r_h #define vmf_al eax.r_b.r_l #define vmf_ax eax.r_w.r_x #define vmf_eax eax.r_ex #define vmf_bh ebx.r_b.r_h #define vmf_bl ebx.r_b.r_l #define vmf_bx ebx.r_w.r_x #define vmf_ebx ebx.r_ex #define vmf_ch ecx.r_b.r_h #define vmf_cl ecx.r_b.r_l #define vmf_cx ecx.r_w.r_x #define vmf_ecx ecx.r_ex #define vmf_dh edx.r_b.r_h #define vmf_dl edx.r_b.r_l #define vmf_dx edx.r_w.r_x #define vmf_edx edx.r_ex #define vmf_si esi.r_w.r_x #define vmf_di edi.r_w.r_x #define vmf_cs cs.r_w.r_x #define vmf_ds ds.r_w.r_x #define vmf_es es.r_w.r_x #define vmf_ss ss.r_w.r_x #define vmf_bp ebp.r_w.r_x #define vmf_sp esp.r_w.r_x #define vmf_ip eip.r_w.r_x #define vmf_flags eflags.r_w.r_x #define vmf_eflags eflags.r_ex }; #define VM86_PMAPSIZE 24 #define VMAP_MALLOC 1 /* page was malloced by us */ struct vm86context { int npages; struct vm86pmap { int flags; int pte_num; vm_offset_t kva; - u_int old_pte; + uint64_t old_pte; } pmap[VM86_PMAPSIZE]; }; #define VM_USERCHANGE (PSL_USERCHANGE) #define VME_USERCHANGE (VM_USERCHANGE | PSL_VIP | PSL_VIF) struct vm86_kernel { caddr_t vm86_intmap; /* interrupt map */ u_int vm86_eflags; /* emulated flags */ int vm86_has_vme; /* VME support */ int vm86_inited; /* we were initialized */ int vm86_debug; caddr_t vm86_sproc; /* address of sproc */ }; #define VM86_INIT 1 #define VM86_SET_VME 2 #define VM86_GET_VME 3 #define VM86_INTCALL 4 struct vm86_init_args { int debug; /* debug flag */ int cpu_type; /* cpu type to emulate */ u_char int_map[32]; /* interrupt map */ }; struct vm86_vme_args { int state; /* status */ }; struct vm86_intcall_args { int intnum; struct vm86frame vmf; }; #ifdef _KERNEL extern int vm86paddr; struct thread; extern int vm86_emulate(struct vm86frame *); extern int vm86_sysarch(struct thread *, char *); extern void vm86_trap(struct vm86frame *); extern int vm86_intcall(int, struct vm86frame *); extern int vm86_datacall(int, struct vm86frame *, struct vm86context *); extern void vm86_initialize(void); extern vm_offset_t vm86_getpage(struct vm86context *, int); extern vm_offset_t vm86_addpage(struct vm86context *, int, vm_offset_t); extern int vm86_getptr(struct vm86context *, vm_offset_t, u_short *, u_short *); extern vm_offset_t vm86_getaddr(struct vm86context *, u_short, u_short); #endif /* _KERNEL */ #endif /* _MACHINE_VM86_H_ */ Index: head/sys/i386/include/vmparam.h =================================================================== --- head/sys/i386/include/vmparam.h (revision 343566) +++ head/sys/i386/include/vmparam.h (revision 343567) @@ -1,235 +1,244 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 * $FreeBSD$ */ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ 1 /* * Machine dependent constants for 386. */ /* * Virtual memory related constants, all in bytes */ #define MAXTSIZ (128UL*1024*1024) /* max text size */ #ifndef DFLDSIZ #define DFLDSIZ (128UL*1024*1024) /* initial data size limit */ #endif #ifndef MAXDSIZ #define MAXDSIZ (512UL*1024*1024) /* max data size */ #endif #ifndef DFLSSIZ #define DFLSSIZ (8UL*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ #define MAXSSIZ (64UL*1024*1024) /* max stack size */ #endif #ifndef SGROWSIZ #define SGROWSIZ (128UL*1024) /* amount to grow stack */ #endif /* * Choose between DENSE and SPARSE based on whether lower execution time or * lower kernel address space consumption is desired. Under PAE, kernel * address space is often in short supply. */ #ifdef PAE #define VM_PHYSSEG_SPARSE #else #define VM_PHYSSEG_DENSE #endif /* * The number of PHYSSEG entries must be one greater than the number * of phys_avail entries because the phys_avail entry that spans the * largest physical address that is accessible by ISA DMA is split * into two PHYSSEG entries. */ #define VM_PHYSSEG_MAX 17 /* * Create one free page pool. Since the i386 kernel virtual address * space does not include a mapping onto the machine's entire physical * memory, VM_FREEPOOL_DIRECT is defined as an alias for the default * pool, VM_FREEPOOL_DEFAULT. */ #define VM_NFREEPOOL 1 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 0 /* - * Create two free page lists: VM_FREELIST_DEFAULT is for physical - * pages that are above the largest physical address that is - * accessible by ISA DMA and VM_FREELIST_LOWMEM is for physical pages - * that are below that address. + * Create up to three free page lists: VM_FREELIST_DMA32 is for physical pages + * that have physical addresses below 4G but are not accessible by ISA DMA, + * and VM_FREELIST_ISADMA is for physical pages that are accessible by ISA + * DMA. */ -#define VM_NFREELIST 2 +#define VM_NFREELIST 3 #define VM_FREELIST_DEFAULT 0 -#define VM_FREELIST_LOWMEM 1 +#define VM_FREELIST_DMA32 1 +#define VM_FREELIST_LOWMEM 2 #define VM_LOWMEM_BOUNDARY (16 << 20) /* 16MB ISA DMA limit */ /* + * Always create DMA32 freelist if there is any memory above 4G. + * Bounce dma is extremely fragile and simultaneously intensively + * used. + */ +#define VM_DMA32_NPAGES_THRESHOLD 1 + +/* * The largest allocation size is 2MB under PAE and 4MB otherwise. */ -#ifdef PAE -#define VM_NFREEORDER 10 -#else -#define VM_NFREEORDER 11 -#endif +#define VM_NFREEORDER_PAE 10 +#define VM_NFREEORDER_NOPAE 11 +#define VM_NFREEORDER_MAX VM_NFREEORDER_NOPAE +#define VM_NFREEORDER i386_pmap_VM_NFREEORDER /* * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL #define VM_NRESERVLEVEL 1 #endif /* * Level 0 reservations consist of 512 pages when PAE pagetables are * used, and 1024 pages otherwise. */ #ifndef VM_LEVEL_0_ORDER -#if defined(PAE) || defined(PAE_TABLES) -#define VM_LEVEL_0_ORDER 9 +#define VM_LEVEL_0_ORDER_PAE 9 +#define VM_LEVEL_0_ORDER_NOPAE 10 +#define VM_LEVEL_0_ORDER_MAX VM_LEVEL_0_ORDER_NOPAE +#define VM_LEVEL_0_ORDER i386_pmap_VM_LEVEL_0_ORDER #else -#define VM_LEVEL_0_ORDER 10 +#define VM_LEVEL_0_ORDER_MAX VM_LEVEL_0_ORDER #endif -#endif /* * Kernel physical load address. */ #ifndef KERNLOAD -#define KERNLOAD (KERNPTDI << PDRSHIFT) +#define KERNLOAD (8 * 1024 * 1024) #endif /* !defined(KERNLOAD) */ /* * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. * Because of the page that is both a PD and PT, it looks a little * messy at times, but hey, we'll do anything to save a page :-) */ -#define VM_MAX_KERNEL_ADDRESS VADDR(PTDPTDI, 0) +#define VM_MAX_KERNEL_ADDRESS (0xffffffffU - 16 * 1024 * 1024 + 1) #define VM_MIN_KERNEL_ADDRESS 0 #define KERNBASE KERNLOAD #define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI) #define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0) -#define VM_MAXUSER_ADDRESS VADDR(TRPTDI, 0) +#define VM_MAXUSER_ADDRESS (0xffffffff - 4 * 1024 * 1024 + 1) #define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define USRSTACK SHAREDPAGE #define VM_MAX_ADDRESS VADDR(PTDPTDI, 0) #define VM_MIN_ADDRESS ((vm_offset_t)0) #define PMAP_TRM_MIN_ADDRESS VM_MAXUSER_ADDRESS #define PMAP_TRM_MAX_ADDRESS 0xffffffff -#define PMAP_MAP_LOW VADDR(LOWPTDI, 0) +#define PMAP_MAP_LOW (4 * 1024 * 1024) /* * KVA layout. The unit of the system allocation is single PDE, which * represents NBPDR bytes, aligned to NBPDR. NBPDR is 4M for non-PAE - * page tables, and 2M for PAE. Addresses below are shown for non-PAE. + * page tables, and 2M for PAE, so PAE mode requires twice as many PTDs + * to create the same memory map as non-PAE. * * 0x00000000 - 0x003fffff Transient identity map of low memory (0-4M), * normally disabled to catch NULL derefs. * 0x00400000 - 0x007fffff Fixed mapping of the low memory (4-8M). * 0x00800000 - 0xffbfffff KERNBASE (VA) == KERNLOAD (PA), kernel * text + data and all kernel maps. Managed * by MI VM. * 0xffc00000 - 0xffdfffff Recursive kernel page table mapping, pointed * to by PTmap. PTD[] recursively points * into PTmap. * 0xffe00000 - 0xffffffff Kernel/User mode shared PDE, contains GDT, * IDT, TSS, LDT, trampoline code and stacks. * Managed by pmap_trm_alloc(). */ /* * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE -#define VM_KMEM_SIZE_SCALE (3) +#define VM_KMEM_SIZE_SCALE (1) #endif /* * Optional floor (in bytes) on the size of the kmem arena. */ #ifndef VM_KMEM_SIZE_MIN #define VM_KMEM_SIZE_MIN (12 * 1024 * 1024) #endif /* * Optional ceiling (in bytes) on the size of the kmem arena: 40% of the * kernel map rounded to the nearest multiple of the superpage size. */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX (((((VM_MAX_KERNEL_ADDRESS - \ VM_MIN_KERNEL_ADDRESS) >> (PDRSHIFT - 2)) + 5) / 10) << PDRSHIFT) #endif /* initial pagein size of beginning of executable file */ #ifndef VM_INITIAL_PAGEIN #define VM_INITIAL_PAGEIN 16 #endif #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ #ifndef VM_MAX_AUTOTUNE_MAXUSERS #define VM_MAX_AUTOTUNE_MAXUSERS 384 #endif #define SFBUF #define SFBUF_MAP #define SFBUF_CPUSET #define SFBUF_PROCESS_PAGE #define PMAP_HAS_DMAP 0 #define PHYS_TO_DMAP(x) ({ panic("No direct map exists"); 0; }) #define DMAP_TO_PHYS(x) ({ panic("No direct map exists"); 0; }) #endif /* _MACHINE_VMPARAM_H_ */ Index: head/sys/i386/pci/pci_cfgreg.c =================================================================== --- head/sys/i386/pci/pci_cfgreg.c (revision 343566) +++ head/sys/i386/pci/pci_cfgreg.c (revision 343567) @@ -1,680 +1,678 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997, Stefan Esser * Copyright (c) 2000, Michael Smith * Copyright (c) 2000, BSDi * Copyright (c) 2004, Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define PRVERB(a) do { \ if (bootverbose) \ printf a ; \ } while(0) #define PCIE_CACHE 8 struct pcie_cfg_elem { TAILQ_ENTRY(pcie_cfg_elem) elem; vm_offset_t vapage; vm_paddr_t papage; }; enum { CFGMECH_NONE = 0, CFGMECH_1, CFGMECH_2, CFGMECH_PCIE, }; SYSCTL_DECL(_hw_pci); static TAILQ_HEAD(pcie_cfg_list, pcie_cfg_elem) pcie_list[MAXCPU]; static uint64_t pcie_base; static int pcie_minbus, pcie_maxbus; static uint32_t pcie_badslots; static int cfgmech; static int devmax; static struct mtx pcicfg_mtx; static int mcfg_enable = 1; SYSCTL_INT(_hw_pci, OID_AUTO, mcfg, CTLFLAG_RDTUN, &mcfg_enable, 0, "Enable support for PCI-e memory mapped config access"); static uint32_t pci_docfgregread(int bus, int slot, int func, int reg, int bytes); static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes); static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes); static int pcireg_cfgopen(void); static int pciereg_cfgread(int bus, unsigned slot, unsigned func, unsigned reg, unsigned bytes); static void pciereg_cfgwrite(int bus, unsigned slot, unsigned func, unsigned reg, int data, unsigned bytes); /* * Some BIOS writers seem to want to ignore the spec and put * 0 in the intline rather than 255 to indicate none. Some use * numbers in the range 128-254 to indicate something strange and * apparently undocumented anywhere. Assume these are completely bogus * and map them to 255, which means "none". */ static __inline int pci_i386_map_intline(int line) { if (line == 0 || line >= 128) return (PCI_INVALID_IRQ); return (line); } static u_int16_t pcibios_get_version(void) { struct bios_regs args; if (PCIbios.ventry == 0) { PRVERB(("pcibios: No call entry point\n")); return (0); } args.eax = PCIBIOS_BIOS_PRESENT; if (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL))) { PRVERB(("pcibios: BIOS_PRESENT call failed\n")); return (0); } if (args.edx != 0x20494350) { PRVERB(("pcibios: BIOS_PRESENT didn't return 'PCI ' in edx\n")); return (0); } return (args.ebx & 0xffff); } /* * Initialise access to PCI configuration space */ int pci_cfgregopen(void) { static int opened = 0; uint64_t pciebar; u_int16_t vid, did; u_int16_t v; if (opened) return (1); if (cfgmech == CFGMECH_NONE && pcireg_cfgopen() == 0) return (0); v = pcibios_get_version(); if (v > 0) PRVERB(("pcibios: BIOS version %x.%02x\n", (v & 0xff00) >> 8, v & 0xff)); mtx_init(&pcicfg_mtx, "pcicfg", NULL, MTX_SPIN); opened = 1; /* $PIR requires PCI BIOS 2.10 or greater. */ if (v >= 0x0210) pci_pir_open(); if (cfgmech == CFGMECH_PCIE) return (1); /* * Grope around in the PCI config space to see if this is a * chipset that is capable of doing memory-mapped config cycles. * This also implies that it can do PCIe extended config cycles. */ /* Check for supported chipsets */ vid = pci_cfgregread(0, 0, 0, PCIR_VENDOR, 2); did = pci_cfgregread(0, 0, 0, PCIR_DEVICE, 2); switch (vid) { case 0x8086: switch (did) { case 0x3590: case 0x3592: /* Intel 7520 or 7320 */ pciebar = pci_cfgregread(0, 0, 0, 0xce, 2) << 16; pcie_cfgregopen(pciebar, 0, 255); break; case 0x2580: case 0x2584: case 0x2590: /* Intel 915, 925, or 915GM */ pciebar = pci_cfgregread(0, 0, 0, 0x48, 4); pcie_cfgregopen(pciebar, 0, 255); break; } } return(1); } static uint32_t pci_docfgregread(int bus, int slot, int func, int reg, int bytes) { if (cfgmech == CFGMECH_PCIE && (bus >= pcie_minbus && bus <= pcie_maxbus) && (bus != 0 || !(1 << slot & pcie_badslots))) return (pciereg_cfgread(bus, slot, func, reg, bytes)); else return (pcireg_cfgread(bus, slot, func, reg, bytes)); } /* * Read configuration space register */ u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes) { uint32_t line; /* * Some BIOS writers seem to want to ignore the spec and put * 0 in the intline rather than 255 to indicate none. The rest of * the code uses 255 as an invalid IRQ. */ if (reg == PCIR_INTLINE && bytes == 1) { line = pci_docfgregread(bus, slot, func, PCIR_INTLINE, 1); return (pci_i386_map_intline(line)); } return (pci_docfgregread(bus, slot, func, reg, bytes)); } /* * Write configuration space register */ void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes) { if (cfgmech == CFGMECH_PCIE && (bus >= pcie_minbus && bus <= pcie_maxbus) && (bus != 0 || !(1 << slot & pcie_badslots))) pciereg_cfgwrite(bus, slot, func, reg, data, bytes); else pcireg_cfgwrite(bus, slot, func, reg, data, bytes); } /* * Configuration space access using direct register operations */ /* enable configuration space accesses and return data port address */ static int pci_cfgenable(unsigned bus, unsigned slot, unsigned func, int reg, int bytes) { int dataport = 0; if (bus <= PCI_BUSMAX && slot < devmax && func <= PCI_FUNCMAX && (unsigned)reg <= PCI_REGMAX && bytes != 3 && (unsigned)bytes <= 4 && (reg & (bytes - 1)) == 0) { switch (cfgmech) { case CFGMECH_PCIE: case CFGMECH_1: outl(CONF1_ADDR_PORT, (1U << 31) | (bus << 16) | (slot << 11) | (func << 8) | (reg & ~0x03)); dataport = CONF1_DATA_PORT + (reg & 0x03); break; case CFGMECH_2: outb(CONF2_ENABLE_PORT, 0xf0 | (func << 1)); outb(CONF2_FORWARD_PORT, bus); dataport = 0xc000 | (slot << 8) | reg; break; } } return (dataport); } /* disable configuration space accesses */ static void pci_cfgdisable(void) { switch (cfgmech) { case CFGMECH_PCIE: case CFGMECH_1: /* * Do nothing for the config mechanism 1 case. * Writing a 0 to the address port can apparently * confuse some bridges and cause spurious * access failures. */ break; case CFGMECH_2: outb(CONF2_ENABLE_PORT, 0); break; } } static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes) { int data = -1; int port; mtx_lock_spin(&pcicfg_mtx); port = pci_cfgenable(bus, slot, func, reg, bytes); if (port != 0) { switch (bytes) { case 1: data = inb(port); break; case 2: data = inw(port); break; case 4: data = inl(port); break; } pci_cfgdisable(); } mtx_unlock_spin(&pcicfg_mtx); return (data); } static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes) { int port; mtx_lock_spin(&pcicfg_mtx); port = pci_cfgenable(bus, slot, func, reg, bytes); if (port != 0) { switch (bytes) { case 1: outb(port, data); break; case 2: outw(port, data); break; case 4: outl(port, data); break; } pci_cfgdisable(); } mtx_unlock_spin(&pcicfg_mtx); } /* check whether the configuration mechanism has been correctly identified */ static int pci_cfgcheck(int maxdev) { uint32_t id, class; uint8_t header; uint8_t device; int port; if (bootverbose) printf("pci_cfgcheck:\tdevice "); for (device = 0; device < maxdev; device++) { if (bootverbose) printf("%d ", device); port = pci_cfgenable(0, device, 0, 0, 4); id = inl(port); if (id == 0 || id == 0xffffffff) continue; port = pci_cfgenable(0, device, 0, 8, 4); class = inl(port) >> 8; if (bootverbose) printf("[class=%06x] ", class); if (class == 0 || (class & 0xf870ff) != 0) continue; port = pci_cfgenable(0, device, 0, 14, 1); header = inb(port); if (bootverbose) printf("[hdr=%02x] ", header); if ((header & 0x7e) != 0) continue; if (bootverbose) printf("is there (id=%08x)\n", id); pci_cfgdisable(); return (1); } if (bootverbose) printf("-- nothing found\n"); pci_cfgdisable(); return (0); } static int pcireg_cfgopen(void) { uint32_t mode1res, oldval1; uint8_t mode2res, oldval2; /* Check for type #1 first. */ oldval1 = inl(CONF1_ADDR_PORT); if (bootverbose) { printf("pci_open(1):\tmode 1 addr port (0x0cf8) is 0x%08x\n", oldval1); } cfgmech = CFGMECH_1; devmax = 32; outl(CONF1_ADDR_PORT, CONF1_ENABLE_CHK); DELAY(1); mode1res = inl(CONF1_ADDR_PORT); outl(CONF1_ADDR_PORT, oldval1); if (bootverbose) printf("pci_open(1a):\tmode1res=0x%08x (0x%08lx)\n", mode1res, CONF1_ENABLE_CHK); if (mode1res) { if (pci_cfgcheck(32)) return (cfgmech); } outl(CONF1_ADDR_PORT, CONF1_ENABLE_CHK1); mode1res = inl(CONF1_ADDR_PORT); outl(CONF1_ADDR_PORT, oldval1); if (bootverbose) printf("pci_open(1b):\tmode1res=0x%08x (0x%08lx)\n", mode1res, CONF1_ENABLE_CHK1); if ((mode1res & CONF1_ENABLE_MSK1) == CONF1_ENABLE_RES1) { if (pci_cfgcheck(32)) return (cfgmech); } /* Type #1 didn't work, so try type #2. */ oldval2 = inb(CONF2_ENABLE_PORT); if (bootverbose) { printf("pci_open(2):\tmode 2 enable port (0x0cf8) is 0x%02x\n", oldval2); } if ((oldval2 & 0xf0) == 0) { cfgmech = CFGMECH_2; devmax = 16; outb(CONF2_ENABLE_PORT, CONF2_ENABLE_CHK); mode2res = inb(CONF2_ENABLE_PORT); outb(CONF2_ENABLE_PORT, oldval2); if (bootverbose) printf("pci_open(2a):\tmode2res=0x%02x (0x%02x)\n", mode2res, CONF2_ENABLE_CHK); if (mode2res == CONF2_ENABLE_RES) { if (bootverbose) printf("pci_open(2a):\tnow trying mechanism 2\n"); if (pci_cfgcheck(16)) return (cfgmech); } } /* Nothing worked, so punt. */ cfgmech = CFGMECH_NONE; devmax = 0; return (cfgmech); } int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus) { struct pcie_cfg_list *pcielist; struct pcie_cfg_elem *pcie_array, *elem; #ifdef SMP struct pcpu *pc; #endif vm_offset_t va; uint32_t val1, val2; int i, slot; if (!mcfg_enable) return (0); if (minbus != 0) return (0); -#ifndef PAE - if (base >= 0x100000000) { + if (!pae_mode && base >= 0x100000000) { if (bootverbose) printf( "PCI: Memory Mapped PCI configuration area base 0x%jx too high\n", (uintmax_t)base); return (0); } -#endif if (bootverbose) printf("PCIe: Memory Mapped configuration base @ 0x%jx\n", (uintmax_t)base); #ifdef SMP STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) #endif { pcie_array = malloc(sizeof(struct pcie_cfg_elem) * PCIE_CACHE, M_DEVBUF, M_NOWAIT); if (pcie_array == NULL) return (0); va = kva_alloc(PCIE_CACHE * PAGE_SIZE); if (va == 0) { free(pcie_array, M_DEVBUF); return (0); } #ifdef SMP pcielist = &pcie_list[pc->pc_cpuid]; #else pcielist = &pcie_list[0]; #endif TAILQ_INIT(pcielist); for (i = 0; i < PCIE_CACHE; i++) { elem = &pcie_array[i]; elem->vapage = va + (i * PAGE_SIZE); elem->papage = 0; TAILQ_INSERT_HEAD(pcielist, elem, elem); } } pcie_base = base; pcie_minbus = minbus; pcie_maxbus = maxbus; cfgmech = CFGMECH_PCIE; devmax = 32; /* * On some AMD systems, some of the devices on bus 0 are * inaccessible using memory-mapped PCI config access. Walk * bus 0 looking for such devices. For these devices, we will * fall back to using type 1 config access instead. */ if (pci_cfgregopen() != 0) { for (slot = 0; slot <= PCI_SLOTMAX; slot++) { val1 = pcireg_cfgread(0, slot, 0, 0, 4); if (val1 == 0xffffffff) continue; val2 = pciereg_cfgread(0, slot, 0, 0, 4); if (val2 != val1) pcie_badslots |= (1 << slot); } } return (1); } #define PCIE_PADDR(base, reg, bus, slot, func) \ ((base) + \ ((((bus) & 0xff) << 20) | \ (((slot) & 0x1f) << 15) | \ (((func) & 0x7) << 12) | \ ((reg) & 0xfff))) static __inline vm_offset_t pciereg_findaddr(int bus, unsigned slot, unsigned func, unsigned reg) { struct pcie_cfg_list *pcielist; struct pcie_cfg_elem *elem; vm_paddr_t pa, papage; pa = PCIE_PADDR(pcie_base, reg, bus, slot, func); papage = pa & ~PAGE_MASK; /* * Find an element in the cache that matches the physical page desired, * or create a new mapping from the least recently used element. * A very simple LRU algorithm is used here, does it need to be more * efficient? */ pcielist = &pcie_list[PCPU_GET(cpuid)]; TAILQ_FOREACH(elem, pcielist, elem) { if (elem->papage == papage) break; } if (elem == NULL) { elem = TAILQ_LAST(pcielist, pcie_cfg_list); if (elem->papage != 0) { pmap_kremove(elem->vapage); invlpg(elem->vapage); } pmap_kenter(elem->vapage, papage); elem->papage = papage; } if (elem != TAILQ_FIRST(pcielist)) { TAILQ_REMOVE(pcielist, elem, elem); TAILQ_INSERT_HEAD(pcielist, elem, elem); } return (elem->vapage | (pa & PAGE_MASK)); } /* * AMD BIOS And Kernel Developer's Guides for CPU families starting with 10h * have a requirement that all accesses to the memory mapped PCI configuration * space are done using AX class of registers. * Since other vendors do not currently have any contradicting requirements * the AMD access pattern is applied universally. */ static int pciereg_cfgread(int bus, unsigned slot, unsigned func, unsigned reg, unsigned bytes) { vm_offset_t va; int data = -1; if (bus < pcie_minbus || bus > pcie_maxbus || slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX) return (-1); critical_enter(); va = pciereg_findaddr(bus, slot, func, reg); switch (bytes) { case 4: __asm("movl %1, %0" : "=a" (data) : "m" (*(volatile uint32_t *)va)); break; case 2: __asm("movzwl %1, %0" : "=a" (data) : "m" (*(volatile uint16_t *)va)); break; case 1: __asm("movzbl %1, %0" : "=a" (data) : "m" (*(volatile uint8_t *)va)); break; } critical_exit(); return (data); } static void pciereg_cfgwrite(int bus, unsigned slot, unsigned func, unsigned reg, int data, unsigned bytes) { vm_offset_t va; if (bus < pcie_minbus || bus > pcie_maxbus || slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX) return; critical_enter(); va = pciereg_findaddr(bus, slot, func, reg); switch (bytes) { case 4: __asm("movl %1, %0" : "=m" (*(volatile uint32_t *)va) : "a" (data)); break; case 2: __asm("movw %1, %0" : "=m" (*(volatile uint16_t *)va) : "a" ((uint16_t)data)); break; case 1: __asm("movb %1, %0" : "=m" (*(volatile uint8_t *)va) : "a" ((uint8_t)data)); break; } critical_exit(); } Index: head/sys/x86/acpica/acpi_wakeup.c =================================================================== --- head/sys/x86/acpica/acpi_wakeup.c (revision 343566) +++ head/sys/x86/acpica/acpi_wakeup.c (revision 343567) @@ -1,484 +1,478 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 Takanori Watanabe * Copyright (c) 2001-2012 Mitsuru IWASAKI * Copyright (c) 2003 Peter Wemm * Copyright (c) 2008-2012 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #if defined(__amd64__) #define DEV_APIC #else #include "opt_apic.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include #include #endif #ifdef SMP #include #include #endif #include #include #include "acpi_wakecode.h" #include "acpi_wakedata.h" /* Make sure the code is less than a page and leave room for the stack. */ CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024); extern int acpi_resume_beep; extern int acpi_reset_video; extern int acpi_susp_bounce; #ifdef SMP extern struct susppcb **susppcbs; static cpuset_t suspcpus; #else static struct susppcb **susppcbs; #endif static void *acpi_alloc_wakeup_handler(void **); static void acpi_stop_beep(void *); #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *, int); static void acpi_wakeup_cpus(struct acpi_softc *); #endif #ifdef __amd64__ #define ACPI_WAKEPAGES 4 #else #define ACPI_WAKEPAGES 1 #endif #define WAKECODE_FIXUP(offset, type, val) do { \ type *addr; \ addr = (type *)(sc->acpi_wakeaddr + (offset)); \ *addr = val; \ } while (0) static void acpi_stop_beep(void *arg) { if (acpi_resume_beep != 0) timer_spkr_release(); } #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *sc, int cpu) { struct pcb *pcb; int vector = (sc->acpi_wakephys >> 12) & 0xff; int apic_id = cpu_apic_ids[cpu]; int ms; pcb = &susppcbs[cpu]->sp_pcb; WAKECODE_FIXUP(wakeup_pcb, struct pcb *, pcb); WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base); ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to resume. */ for (ms = 0; ms < 5000; ms++) { if (!CPU_ISSET(cpu, &suspended_cpus)) return (1); /* return SUCCESS */ DELAY(1000); } return (0); /* return FAILURE */ } #define WARMBOOT_TARGET 0 #ifdef __amd64__ #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #else /* __i386__ */ #define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467) #define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469) #endif #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) static void acpi_wakeup_cpus(struct acpi_softc *sc) { uint32_t mpbioswarmvec; int cpu; u_char mpbiosreason; /* save the current value of the warm-start vector */ mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* setup a vector to our boot code */ *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *)WARMBOOT_SEG) = sc->acpi_wakephys >> 4; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* Wake up each AP. */ for (cpu = 1; cpu < mp_ncpus; cpu++) { if (!CPU_ISSET(cpu, &suspcpus)) continue; if (acpi_wakeup_ap(sc, cpu) == 0) { /* restore the warmstart vector */ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)", cpu, cpu_apic_ids[cpu]); } } #ifdef __i386__ /* * Remove the identity mapping of low memory for all CPUs and sync * the TLB for the BSP. The APs are now spinning in * cpususpend_handler() and we will release them soon. Then each * will invalidate its TLB. */ - PTD[KPTDI] = 0; - invltlb_glob(); + pmap_remap_lowptdi(false); #endif /* restore the warmstart vector */ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); } #endif int acpi_sleep_machdep(struct acpi_softc *sc, int state) { ACPI_STATUS status; struct pcb *pcb; #ifdef __amd64__ struct pcpu *pc; int i; #endif if (sc->acpi_wakeaddr == 0ul) return (-1); /* couldn't alloc wake memory */ #ifdef SMP suspcpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &suspcpus); #endif if (acpi_resume_beep != 0) timer_spkr_acquire(); AcpiSetFirmwareWakingVector(sc->acpi_wakephys, 0); intr_suspend(); pcb = &susppcbs[0]->sp_pcb; if (savectx(pcb)) { #ifdef __amd64__ fpususpend(susppcbs[0]->sp_fpususpend); #else npxsuspend(susppcbs[0]->sp_fpususpend); #endif #ifdef SMP if (!CPU_EMPTY(&suspcpus) && suspend_cpus(suspcpus) == 0) { device_printf(sc->acpi_dev, "Failed to suspend APs\n"); return (0); /* couldn't sleep */ } #endif #ifdef __amd64__ hw_ibrs_active = 0; hw_ssb_active = 0; cpu_stdext_feature3 = 0; CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_ibpb_set = 0; } #endif WAKECODE_FIXUP(resume_beep, uint8_t, (acpi_resume_beep != 0)); WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0)); #ifdef __amd64__ WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER) & ~(EFER_LMA)); #else WAKECODE_FIXUP(wakeup_cr4, register_t, pcb->pcb_cr4); #endif WAKECODE_FIXUP(wakeup_pcb, struct pcb *, pcb); WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base); #ifdef __i386__ /* * Map some low memory with virt == phys for ACPI wakecode * to use to jump to high memory after enabling paging. This * is the same as for similar jump in locore, except the * jump is a single instruction, and we know its address * more precisely so only need a single PTD, and we have to * be careful to use the kernel map (PTD[0] is for curthread * which may be a user thread in deprecated APIs). */ - PTD[KPTDI] = PTD[LOWPTDI]; + pmap_remap_lowptdi(true); #endif /* Call ACPICA to enter the desired sleep state */ if (state == ACPI_STATE_S4 && sc->acpi_s4bios) status = AcpiEnterSleepStateS4bios(); else status = AcpiEnterSleepState(state); if (ACPI_FAILURE(status)) { device_printf(sc->acpi_dev, "AcpiEnterSleepState failed - %s\n", AcpiFormatException(status)); return (0); /* couldn't sleep */ } if (acpi_susp_bounce) resumectx(pcb); for (;;) ia32_pause(); } else { /* * Re-initialize console hardware as soon as possibe. * No console output (e.g. printf) is allowed before * this point. */ cnresume(); #ifdef __amd64__ fpuresume(susppcbs[0]->sp_fpususpend); #else npxresume(susppcbs[0]->sp_fpususpend); #endif } return (1); /* wakeup successfully */ } int acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result, int intr_enabled) { if (sleep_result == -1) return (sleep_result); if (!intr_enabled) { /* Wakeup MD procedures in interrupt disabled context */ if (sleep_result == 1) { ucode_reload(); pmap_init_pat(); initializecpu(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); #ifdef DEV_APIC lapic_xapic_mode(); #endif #ifdef SMP if (!CPU_EMPTY(&suspcpus)) acpi_wakeup_cpus(sc); #endif } #ifdef SMP if (!CPU_EMPTY(&suspcpus)) resume_cpus(suspcpus); #endif mca_resume(); #ifdef __amd64__ if (vmm_resume_p != NULL) vmm_resume_p(); #endif intr_resume(/*suspend_cancelled*/false); AcpiSetFirmwareWakingVector(0, 0); } else { /* Wakeup MD procedures in interrupt enabled context */ if (sleep_result == 1 && mem_range_softc.mr_op != NULL && mem_range_softc.mr_op->reinit != NULL) mem_range_softc.mr_op->reinit(&mem_range_softc); } return (sleep_result); } static void * acpi_alloc_wakeup_handler(void *wakepages[ACPI_WAKEPAGES]) { int i; memset(wakepages, 0, ACPI_WAKEPAGES * sizeof(*wakepages)); /* * Specify the region for our wakeup code. We want it in the low 1 MB * region, excluding real mode IVT (0-0x3ff), BDA (0x400-0x4ff), EBDA * (less than 128KB, below 0xa0000, must be excluded by SMAP and DSDT), * and ROM area (0xa0000 and above). The temporary page tables must be * page-aligned. */ for (i = 0; i < ACPI_WAKEPAGES; i++) { wakepages[i] = contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0x500, 0xa0000, PAGE_SIZE, 0ul); if (wakepages[i] == NULL) { printf("%s: can't alloc wake memory\n", __func__); goto freepages; } } if (EVENTHANDLER_REGISTER(power_resume, acpi_stop_beep, NULL, EVENTHANDLER_PRI_LAST) == NULL) { printf("%s: can't register event handler\n", __func__); goto freepages; } susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK); for (i = 0; i < mp_ncpus; i++) { susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK); susppcbs[i]->sp_fpususpend = alloc_fpusave(M_WAITOK); } return (wakepages); freepages: for (i = 0; i < ACPI_WAKEPAGES; i++) if (wakepages[i] != NULL) contigfree(wakepages[i], PAGE_SIZE, M_DEVBUF); return (NULL); } void acpi_install_wakeup_handler(struct acpi_softc *sc) { static void *wakeaddr; void *wakepages[ACPI_WAKEPAGES]; #ifdef __amd64__ uint64_t *pt4, *pt3, *pt2; vm_paddr_t pt4pa, pt3pa, pt2pa; int i; #endif if (wakeaddr != NULL) return; if (acpi_alloc_wakeup_handler(wakepages) == NULL) return; wakeaddr = wakepages[0]; sc->acpi_wakeaddr = (vm_offset_t)wakeaddr; sc->acpi_wakephys = vtophys(wakeaddr); #ifdef __amd64__ pt4 = wakepages[1]; pt3 = wakepages[2]; pt2 = wakepages[3]; pt4pa = vtophys(pt4); pt3pa = vtophys(pt3); pt2pa = vtophys(pt2); #endif bcopy(wakecode, (void *)sc->acpi_wakeaddr, sizeof(wakecode)); /* Patch GDT base address, ljmp targets. */ WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t, sc->acpi_wakephys + bootgdt); WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t, sc->acpi_wakephys + wakeup_32); #ifdef __amd64__ WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t, sc->acpi_wakephys + wakeup_64); WAKECODE_FIXUP(wakeup_pagetables, uint32_t, pt4pa); #endif /* Save pointers to some global data. */ WAKECODE_FIXUP(wakeup_ret, void *, resumectx); #ifndef __amd64__ -#if defined(PAE) || defined(PAE_TABLES) - WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdpt)); -#else - WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdir)); -#endif - + WAKECODE_FIXUP(wakeup_cr3, register_t, pmap_get_kcr3()); #else /* __amd64__ */ /* Create the initial 1GB replicated page tables */ for (i = 0; i < 512; i++) { /* * Each slot of the level 4 pages points * to the same level 3 page */ pt4[i] = (uint64_t)pt3pa; pt4[i] |= PG_V | PG_RW | PG_U; /* * Each slot of the level 3 pages points * to the same level 2 page */ pt3[i] = (uint64_t)pt2pa; pt3[i] |= PG_V | PG_RW | PG_U; /* The level 2 page slots are mapped with 2MB pages for 1GB. */ pt2[i] = i * (2 * 1024 * 1024); pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; } #endif /* !__amd64__ */ if (bootverbose) device_printf(sc->acpi_dev, "wakeup code va %#jx pa %#jx\n", (uintmax_t)sc->acpi_wakeaddr, (uintmax_t)sc->acpi_wakephys); } Index: head/sys/x86/include/_types.h =================================================================== --- head/sys/x86/include/_types.h (revision 343566) +++ head/sys/x86/include/_types.h (revision 343567) @@ -1,150 +1,146 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2002 Mike Barcroft * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)ansi.h 8.2 (Berkeley) 1/4/94 * From: @(#)types.h 8.3 (Berkeley) 1/5/94 * $FreeBSD$ */ #ifndef _MACHINE__TYPES_H_ #define _MACHINE__TYPES_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif #include #define __NO_STRICT_ALIGNMENT /* * Basic types upon which most other types are built. */ typedef signed char __int8_t; typedef unsigned char __uint8_t; typedef short __int16_t; typedef unsigned short __uint16_t; typedef int __int32_t; typedef unsigned int __uint32_t; #ifdef __LP64__ typedef long __int64_t; typedef unsigned long __uint64_t; #else __extension__ typedef long long __int64_t; __extension__ typedef unsigned long long __uint64_t; #endif /* * Standard type definitions. */ #ifdef __LP64__ typedef __int32_t __clock_t; /* clock()... */ typedef __int64_t __critical_t; #ifndef _STANDALONE typedef double __double_t; typedef float __float_t; #endif typedef __int64_t __intfptr_t; typedef __int64_t __intptr_t; #else typedef unsigned long __clock_t; typedef __int32_t __critical_t; #ifndef _STANDALONE typedef long double __double_t; typedef long double __float_t; #endif typedef __int32_t __intfptr_t; typedef __int32_t __intptr_t; #endif typedef __int64_t __intmax_t; typedef __int32_t __int_fast8_t; typedef __int32_t __int_fast16_t; typedef __int32_t __int_fast32_t; typedef __int64_t __int_fast64_t; typedef __int8_t __int_least8_t; typedef __int16_t __int_least16_t; typedef __int32_t __int_least32_t; typedef __int64_t __int_least64_t; #ifdef __LP64__ typedef __int64_t __ptrdiff_t; /* ptr1 - ptr2 */ typedef __int64_t __register_t; typedef __int64_t __segsz_t; /* segment size (in pages) */ typedef __uint64_t __size_t; /* sizeof() */ typedef __int64_t __ssize_t; /* byte count or error */ typedef __int64_t __time_t; /* time()... */ typedef __uint64_t __uintfptr_t; typedef __uint64_t __uintptr_t; #else typedef __int32_t __ptrdiff_t; typedef __int32_t __register_t; typedef __int32_t __segsz_t; typedef __uint32_t __size_t; typedef __int32_t __ssize_t; typedef __int32_t __time_t; typedef __uint32_t __uintfptr_t; typedef __uint32_t __uintptr_t; #endif typedef __uint64_t __uintmax_t; typedef __uint32_t __uint_fast8_t; typedef __uint32_t __uint_fast16_t; typedef __uint32_t __uint_fast32_t; typedef __uint64_t __uint_fast64_t; typedef __uint8_t __uint_least8_t; typedef __uint16_t __uint_least16_t; typedef __uint32_t __uint_least32_t; typedef __uint64_t __uint_least64_t; #ifdef __LP64__ typedef __uint64_t __u_register_t; typedef __uint64_t __vm_offset_t; typedef __uint64_t __vm_paddr_t; typedef __uint64_t __vm_size_t; #else typedef __uint32_t __u_register_t; typedef __uint32_t __vm_offset_t; -#ifdef PAE typedef __uint64_t __vm_paddr_t; -#else -typedef __uint32_t __vm_paddr_t; -#endif typedef __uint32_t __vm_size_t; #endif typedef int ___wchar_t; #define __WCHAR_MIN __INT_MIN /* min value for a wchar_t */ #define __WCHAR_MAX __INT_MAX /* max value for a wchar_t */ #endif /* !_MACHINE__TYPES_H_ */ Index: head/sys/x86/include/x86_var.h =================================================================== --- head/sys/x86/include/x86_var.h (revision 343566) +++ head/sys/x86/include/x86_var.h (revision 343567) @@ -1,154 +1,141 @@ /*- * Copyright (c) 1995 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_X86_VAR_H_ #define _X86_X86_VAR_H_ /* * Miscellaneous machine-dependent declarations. */ extern long Maxmem; extern u_int basemem; extern int busdma_swi_pending; extern u_int cpu_exthigh; extern u_int cpu_feature; extern u_int cpu_feature2; extern u_int amd_feature; extern u_int amd_feature2; extern u_int amd_rascap; extern u_int amd_pminfo; extern u_int amd_extended_feature_extensions; extern u_int via_feature_rng; extern u_int via_feature_xcrypt; extern u_int cpu_clflush_line_size; extern u_int cpu_stdext_feature; extern u_int cpu_stdext_feature2; extern u_int cpu_stdext_feature3; extern uint64_t cpu_ia32_arch_caps; extern u_int cpu_fxsr; extern u_int cpu_high; extern u_int cpu_id; extern u_int cpu_max_ext_state_size; extern u_int cpu_mxcsr_mask; extern u_int cpu_procinfo; extern u_int cpu_procinfo2; extern char cpu_vendor[]; extern u_int cpu_vendor_id; extern u_int cpu_mon_mwait_flags; extern u_int cpu_mon_min_size; extern u_int cpu_mon_max_size; extern u_int cpu_maxphyaddr; extern char ctx_switch_xsave[]; extern u_int hv_high; extern char hv_vendor[]; extern char kstack[]; extern char sigcode[]; extern int szsigcode; extern int vm_page_dump_size; extern int workaround_erratum383; extern int _udatasel; extern int _ucodesel; extern int _ucode32sel; extern int _ufssel; extern int _ugssel; extern int use_xsave; extern uint64_t xsave_mask; extern u_int max_apic_id; extern int pti; extern int hw_ibrs_active; extern int hw_ssb_active; struct pcb; struct thread; struct reg; struct fpreg; struct dbreg; struct dumperinfo; struct trapframe; /* * The interface type of the interrupt handler entry point cannot be * expressed in C. Use simplest non-variadic function type as an * approximation. */ typedef void alias_for_inthand_t(void); -/* - * Returns the maximum physical address that can be used with the - * current system. - */ -static __inline vm_paddr_t -cpu_getmaxphyaddr(void) -{ -#if defined(__i386__) && !defined(PAE) - return (0xffffffff); -#else - return ((1ULL << cpu_maxphyaddr) - 1); -#endif -} - bool acpi_get_fadt_bootflags(uint16_t *flagsp); void *alloc_fpusave(int flags); void busdma_swi(void); +vm_paddr_t cpu_getmaxphyaddr(void); bool cpu_mwait_usable(void); void cpu_probe_amdc1e(void); void cpu_setregs(void); bool disable_wp(void); void restore_wp(bool old_wp); void dump_add_page(vm_paddr_t); void dump_drop_page(vm_paddr_t); void finishidentcpu(void); void identify_cpu1(void); void identify_cpu2(void); void identify_cpu_fixup_bsp(void); void identify_hypervisor(void); void initializecpu(void); void initializecpucache(void); bool fix_cpuid(void); void fillw(int /*u_short*/ pat, void *base, size_t cnt); int is_physical_memory(vm_paddr_t addr); int isa_nmi(int cd); void handle_ibrs_entry(void); void handle_ibrs_exit(void); void hw_ibrs_recalculate(void); void hw_ssb_recalculate(bool all_cpus); void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame); void nmi_call_kdb_smp(u_int type, struct trapframe *frame); void nmi_handle_intr(u_int type, struct trapframe *frame); void pagecopy(void *from, void *to); void printcpuinfo(void); int pti_get_default(void); int user_dbreg_trap(register_t dr6); int minidumpsys(struct dumperinfo *); struct pcb *get_pcb_td(struct thread *td); #endif Index: head/sys/x86/x86/identcpu.c =================================================================== --- head/sys/x86/x86/identcpu.c (revision 343566) +++ head/sys/x86/x86/identcpu.c (revision 343567) @@ -1,2535 +1,2553 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * Copyright (c) 1997 KATO Takenori. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include +#include +#include + #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ #define IDENTBLUE_CYRIX486 0 #define IDENTBLUE_IBMCPU 1 #define IDENTBLUE_CYRIXM2 2 static void identifycyrix(void); static void print_transmeta_info(void); #endif static u_int find_cpu_vendor_id(void); static void print_AMD_info(void); static void print_INTEL_info(void); static void print_INTEL_TLB(u_int data); static void print_hypervisor_info(void); static void print_svm_info(void); static void print_via_padlock_info(void); static void print_vmx_info(void); #ifdef __i386__ int cpu; /* Are we 386, 386sx, 486, etc? */ int cpu_class; #endif u_int cpu_feature; /* Feature flags */ u_int cpu_feature2; /* Feature flags */ u_int amd_feature; /* AMD feature flags */ u_int amd_feature2; /* AMD feature flags */ u_int amd_rascap; /* AMD RAS capabilities */ u_int amd_pminfo; /* AMD advanced power management info */ u_int amd_extended_feature_extensions; u_int via_feature_rng; /* VIA RNG features */ u_int via_feature_xcrypt; /* VIA ACE features */ u_int cpu_high; /* Highest arg to CPUID */ u_int cpu_exthigh; /* Highest arg to extended CPUID */ u_int cpu_id; /* Stepping ID */ u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */ u_int cpu_procinfo2; /* Multicore info */ char cpu_vendor[20]; /* CPU Origin code */ u_int cpu_vendor_id; /* CPU vendor ID */ u_int cpu_fxsr; /* SSE enabled */ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ u_int cpu_clflush_line_size = 32; u_int cpu_stdext_feature; /* %ebx */ u_int cpu_stdext_feature2; /* %ecx */ u_int cpu_stdext_feature3; /* %edx */ uint64_t cpu_ia32_arch_caps; u_int cpu_max_ext_state_size; u_int cpu_mon_mwait_flags; /* MONITOR/MWAIT flags (CPUID.05H.ECX) */ u_int cpu_mon_min_size; /* MONITOR minimum range size, bytes */ u_int cpu_mon_max_size; /* MONITOR minimum range size, bytes */ u_int cpu_maxphyaddr; /* Max phys addr width in bits */ char machine[] = MACHINE; SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD, &via_feature_rng, 0, "VIA RNG feature available in CPU"); SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD, &via_feature_xcrypt, 0, "VIA xcrypt feature available in CPU"); #ifdef __amd64__ #ifdef SCTL_MASK32 extern int adaptive_machine_arch; #endif static int sysctl_hw_machine(SYSCTL_HANDLER_ARGS) { #ifdef SCTL_MASK32 static const char machine32[] = "i386"; #endif int error; #ifdef SCTL_MASK32 if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch) error = SYSCTL_OUT(req, machine32, sizeof(machine32)); else #endif error = SYSCTL_OUT(req, machine, sizeof(machine)); return (error); } SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A", "Machine class"); #else SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); #endif static char cpu_model[128]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD | CTLFLAG_MPSAFE, cpu_model, 0, "Machine model"); static int hw_clockrate; SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, &hw_clockrate, 0, "CPU instruction clock rate"); u_int hv_high; char hv_vendor[16]; SYSCTL_STRING(_hw, OID_AUTO, hv_vendor, CTLFLAG_RD | CTLFLAG_MPSAFE, hv_vendor, 0, "Hypervisor vendor"); static eventhandler_tag tsc_post_tag; static char cpu_brand[48]; #ifdef __i386__ #define MAX_BRAND_INDEX 8 static const char *cpu_brandtable[MAX_BRAND_INDEX + 1] = { NULL, /* No brand */ "Intel Celeron", "Intel Pentium III", "Intel Pentium III Xeon", NULL, NULL, NULL, NULL, "Intel Pentium 4" }; static struct { char *cpu_name; int cpu_class; } cpus[] = { { "Intel 80286", CPUCLASS_286 }, /* CPU_286 */ { "i386SX", CPUCLASS_386 }, /* CPU_386SX */ { "i386DX", CPUCLASS_386 }, /* CPU_386 */ { "i486SX", CPUCLASS_486 }, /* CPU_486SX */ { "i486DX", CPUCLASS_486 }, /* CPU_486 */ { "Pentium", CPUCLASS_586 }, /* CPU_586 */ { "Cyrix 486", CPUCLASS_486 }, /* CPU_486DLC */ { "Pentium Pro", CPUCLASS_686 }, /* CPU_686 */ { "Cyrix 5x86", CPUCLASS_486 }, /* CPU_M1SC */ { "Cyrix 6x86", CPUCLASS_486 }, /* CPU_M1 */ { "Blue Lightning", CPUCLASS_486 }, /* CPU_BLUE */ { "Cyrix 6x86MX", CPUCLASS_686 }, /* CPU_M2 */ { "NexGen 586", CPUCLASS_386 }, /* CPU_NX586 (XXX) */ { "Cyrix 486S/DX", CPUCLASS_486 }, /* CPU_CY486DX */ { "Pentium II", CPUCLASS_686 }, /* CPU_PII */ { "Pentium III", CPUCLASS_686 }, /* CPU_PIII */ { "Pentium 4", CPUCLASS_686 }, /* CPU_P4 */ }; #endif static struct { char *vendor; u_int vendor_id; } cpu_vendors[] = { { INTEL_VENDOR_ID, CPU_VENDOR_INTEL }, /* GenuineIntel */ { AMD_VENDOR_ID, CPU_VENDOR_AMD }, /* AuthenticAMD */ { CENTAUR_VENDOR_ID, CPU_VENDOR_CENTAUR }, /* CentaurHauls */ #ifdef __i386__ { NSC_VENDOR_ID, CPU_VENDOR_NSC }, /* Geode by NSC */ { CYRIX_VENDOR_ID, CPU_VENDOR_CYRIX }, /* CyrixInstead */ { TRANSMETA_VENDOR_ID, CPU_VENDOR_TRANSMETA }, /* GenuineTMx86 */ { SIS_VENDOR_ID, CPU_VENDOR_SIS }, /* SiS SiS SiS */ { UMC_VENDOR_ID, CPU_VENDOR_UMC }, /* UMC UMC UMC */ { NEXGEN_VENDOR_ID, CPU_VENDOR_NEXGEN }, /* NexGenDriven */ { RISE_VENDOR_ID, CPU_VENDOR_RISE }, /* RiseRiseRise */ #if 0 /* XXX CPUID 8000_0000h and 8086_0000h, not 0000_0000h */ { "TransmetaCPU", CPU_VENDOR_TRANSMETA }, #endif #endif }; void printcpuinfo(void) { u_int regs[4], i; char *brand; printf("CPU: "); #ifdef __i386__ cpu_class = cpus[cpu].cpu_class; strncpy(cpu_model, cpus[cpu].cpu_name, sizeof (cpu_model)); #else strncpy(cpu_model, "Hammer", sizeof (cpu_model)); #endif /* Check for extended CPUID information and a processor name. */ if (cpu_exthigh >= 0x80000004) { brand = cpu_brand; for (i = 0x80000002; i < 0x80000005; i++) { do_cpuid(i, regs); memcpy(brand, regs, sizeof(regs)); brand += sizeof(regs); } } switch (cpu_vendor_id) { case CPU_VENDOR_INTEL: #ifdef __i386__ if ((cpu_id & 0xf00) > 0x300) { u_int brand_index; cpu_model[0] = '\0'; switch (cpu_id & 0x3000) { case 0x1000: strcpy(cpu_model, "Overdrive "); break; case 0x2000: strcpy(cpu_model, "Dual "); break; } switch (cpu_id & 0xf00) { case 0x400: strcat(cpu_model, "i486 "); /* Check the particular flavor of 486 */ switch (cpu_id & 0xf0) { case 0x00: case 0x10: strcat(cpu_model, "DX"); break; case 0x20: strcat(cpu_model, "SX"); break; case 0x30: strcat(cpu_model, "DX2"); break; case 0x40: strcat(cpu_model, "SL"); break; case 0x50: strcat(cpu_model, "SX2"); break; case 0x70: strcat(cpu_model, "DX2 Write-Back Enhanced"); break; case 0x80: strcat(cpu_model, "DX4"); break; } break; case 0x500: /* Check the particular flavor of 586 */ strcat(cpu_model, "Pentium"); switch (cpu_id & 0xf0) { case 0x00: strcat(cpu_model, " A-step"); break; case 0x10: strcat(cpu_model, "/P5"); break; case 0x20: strcat(cpu_model, "/P54C"); break; case 0x30: strcat(cpu_model, "/P24T"); break; case 0x40: strcat(cpu_model, "/P55C"); break; case 0x70: strcat(cpu_model, "/P54C"); break; case 0x80: strcat(cpu_model, "/P55C (quarter-micron)"); break; default: /* nothing */ break; } #if defined(I586_CPU) && !defined(NO_F00F_HACK) /* * XXX - If/when Intel fixes the bug, this * should also check the version of the * CPU, not just that it's a Pentium. */ has_f00f_bug = 1; #endif break; case 0x600: /* Check the particular flavor of 686 */ switch (cpu_id & 0xf0) { case 0x00: strcat(cpu_model, "Pentium Pro A-step"); break; case 0x10: strcat(cpu_model, "Pentium Pro"); break; case 0x30: case 0x50: case 0x60: strcat(cpu_model, "Pentium II/Pentium II Xeon/Celeron"); cpu = CPU_PII; break; case 0x70: case 0x80: case 0xa0: case 0xb0: strcat(cpu_model, "Pentium III/Pentium III Xeon/Celeron"); cpu = CPU_PIII; break; default: strcat(cpu_model, "Unknown 80686"); break; } break; case 0xf00: strcat(cpu_model, "Pentium 4"); cpu = CPU_P4; break; default: strcat(cpu_model, "unknown"); break; } /* * If we didn't get a brand name from the extended * CPUID, try to look it up in the brand table. */ if (cpu_high > 0 && *cpu_brand == '\0') { brand_index = cpu_procinfo & CPUID_BRAND_INDEX; if (brand_index <= MAX_BRAND_INDEX && cpu_brandtable[brand_index] != NULL) strcpy(cpu_brand, cpu_brandtable[brand_index]); } } #else /* Please make up your mind folks! */ strcat(cpu_model, "EM64T"); #endif break; case CPU_VENDOR_AMD: /* * Values taken from AMD Processor Recognition * http://www.amd.com/K6/k6docs/pdf/20734g.pdf * (also describes ``Features'' encodings. */ strcpy(cpu_model, "AMD "); #ifdef __i386__ switch (cpu_id & 0xFF0) { case 0x410: strcat(cpu_model, "Standard Am486DX"); break; case 0x430: strcat(cpu_model, "Enhanced Am486DX2 Write-Through"); break; case 0x470: strcat(cpu_model, "Enhanced Am486DX2 Write-Back"); break; case 0x480: strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Through"); break; case 0x490: strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Back"); break; case 0x4E0: strcat(cpu_model, "Am5x86 Write-Through"); break; case 0x4F0: strcat(cpu_model, "Am5x86 Write-Back"); break; case 0x500: strcat(cpu_model, "K5 model 0"); break; case 0x510: strcat(cpu_model, "K5 model 1"); break; case 0x520: strcat(cpu_model, "K5 PR166 (model 2)"); break; case 0x530: strcat(cpu_model, "K5 PR200 (model 3)"); break; case 0x560: strcat(cpu_model, "K6"); break; case 0x570: strcat(cpu_model, "K6 266 (model 1)"); break; case 0x580: strcat(cpu_model, "K6-2"); break; case 0x590: strcat(cpu_model, "K6-III"); break; case 0x5a0: strcat(cpu_model, "Geode LX"); break; default: strcat(cpu_model, "Unknown"); break; } #else if ((cpu_id & 0xf00) == 0xf00) strcat(cpu_model, "AMD64 Processor"); else strcat(cpu_model, "Unknown"); #endif break; #ifdef __i386__ case CPU_VENDOR_CYRIX: strcpy(cpu_model, "Cyrix "); switch (cpu_id & 0xff0) { case 0x440: strcat(cpu_model, "MediaGX"); break; case 0x520: strcat(cpu_model, "6x86"); break; case 0x540: cpu_class = CPUCLASS_586; strcat(cpu_model, "GXm"); break; case 0x600: strcat(cpu_model, "6x86MX"); break; default: /* * Even though CPU supports the cpuid * instruction, it can be disabled. * Therefore, this routine supports all Cyrix * CPUs. */ switch (cyrix_did & 0xf0) { case 0x00: switch (cyrix_did & 0x0f) { case 0x00: strcat(cpu_model, "486SLC"); break; case 0x01: strcat(cpu_model, "486DLC"); break; case 0x02: strcat(cpu_model, "486SLC2"); break; case 0x03: strcat(cpu_model, "486DLC2"); break; case 0x04: strcat(cpu_model, "486SRx"); break; case 0x05: strcat(cpu_model, "486DRx"); break; case 0x06: strcat(cpu_model, "486SRx2"); break; case 0x07: strcat(cpu_model, "486DRx2"); break; case 0x08: strcat(cpu_model, "486SRu"); break; case 0x09: strcat(cpu_model, "486DRu"); break; case 0x0a: strcat(cpu_model, "486SRu2"); break; case 0x0b: strcat(cpu_model, "486DRu2"); break; default: strcat(cpu_model, "Unknown"); break; } break; case 0x10: switch (cyrix_did & 0x0f) { case 0x00: strcat(cpu_model, "486S"); break; case 0x01: strcat(cpu_model, "486S2"); break; case 0x02: strcat(cpu_model, "486Se"); break; case 0x03: strcat(cpu_model, "486S2e"); break; case 0x0a: strcat(cpu_model, "486DX"); break; case 0x0b: strcat(cpu_model, "486DX2"); break; case 0x0f: strcat(cpu_model, "486DX4"); break; default: strcat(cpu_model, "Unknown"); break; } break; case 0x20: if ((cyrix_did & 0x0f) < 8) strcat(cpu_model, "6x86"); /* Where did you get it? */ else strcat(cpu_model, "5x86"); break; case 0x30: strcat(cpu_model, "6x86"); break; case 0x40: if ((cyrix_did & 0xf000) == 0x3000) { cpu_class = CPUCLASS_586; strcat(cpu_model, "GXm"); } else strcat(cpu_model, "MediaGX"); break; case 0x50: strcat(cpu_model, "6x86MX"); break; case 0xf0: switch (cyrix_did & 0x0f) { case 0x0d: strcat(cpu_model, "Overdrive CPU"); break; case 0x0e: strcpy(cpu_model, "Texas Instruments 486SXL"); break; case 0x0f: strcat(cpu_model, "486SLC/DLC"); break; default: strcat(cpu_model, "Unknown"); break; } break; default: strcat(cpu_model, "Unknown"); break; } break; } break; case CPU_VENDOR_RISE: strcpy(cpu_model, "Rise "); switch (cpu_id & 0xff0) { case 0x500: /* 6401 and 6441 (Kirin) */ case 0x520: /* 6510 (Lynx) */ strcat(cpu_model, "mP6"); break; default: strcat(cpu_model, "Unknown"); } break; #endif case CPU_VENDOR_CENTAUR: #ifdef __i386__ switch (cpu_id & 0xff0) { case 0x540: strcpy(cpu_model, "IDT WinChip C6"); break; case 0x580: strcpy(cpu_model, "IDT WinChip 2"); break; case 0x590: strcpy(cpu_model, "IDT WinChip 3"); break; case 0x660: strcpy(cpu_model, "VIA C3 Samuel"); break; case 0x670: if (cpu_id & 0x8) strcpy(cpu_model, "VIA C3 Ezra"); else strcpy(cpu_model, "VIA C3 Samuel 2"); break; case 0x680: strcpy(cpu_model, "VIA C3 Ezra-T"); break; case 0x690: strcpy(cpu_model, "VIA C3 Nehemiah"); break; case 0x6a0: case 0x6d0: strcpy(cpu_model, "VIA C7 Esther"); break; case 0x6f0: strcpy(cpu_model, "VIA Nano"); break; default: strcpy(cpu_model, "VIA/IDT Unknown"); } #else strcpy(cpu_model, "VIA "); if ((cpu_id & 0xff0) == 0x6f0) strcat(cpu_model, "Nano Processor"); else strcat(cpu_model, "Unknown"); #endif break; #ifdef __i386__ case CPU_VENDOR_IBM: strcpy(cpu_model, "Blue Lightning CPU"); break; case CPU_VENDOR_NSC: switch (cpu_id & 0xff0) { case 0x540: strcpy(cpu_model, "Geode SC1100"); cpu = CPU_GEODE1100; break; default: strcpy(cpu_model, "Geode/NSC unknown"); break; } break; #endif default: strcat(cpu_model, "Unknown"); break; } /* * Replace cpu_model with cpu_brand minus leading spaces if * we have one. */ brand = cpu_brand; while (*brand == ' ') ++brand; if (*brand != '\0') strcpy(cpu_model, brand); printf("%s (", cpu_model); if (tsc_freq != 0) { hw_clockrate = (tsc_freq + 5000) / 1000000; printf("%jd.%02d-MHz ", (intmax_t)(tsc_freq + 4999) / 1000000, (u_int)((tsc_freq + 4999) / 10000) % 100); } #ifdef __i386__ switch(cpu_class) { case CPUCLASS_286: printf("286"); break; case CPUCLASS_386: printf("386"); break; #if defined(I486_CPU) case CPUCLASS_486: printf("486"); break; #endif #if defined(I586_CPU) case CPUCLASS_586: printf("586"); break; #endif #if defined(I686_CPU) case CPUCLASS_686: printf("686"); break; #endif default: printf("Unknown"); /* will panic below... */ } #else printf("K8"); #endif printf("-class CPU)\n"); if (*cpu_vendor) printf(" Origin=\"%s\"", cpu_vendor); if (cpu_id) printf(" Id=0x%x", cpu_id); if (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_CENTAUR || #ifdef __i386__ cpu_vendor_id == CPU_VENDOR_TRANSMETA || cpu_vendor_id == CPU_VENDOR_RISE || cpu_vendor_id == CPU_VENDOR_NSC || (cpu_vendor_id == CPU_VENDOR_CYRIX && ((cpu_id & 0xf00) > 0x500)) || #endif 0) { printf(" Family=0x%x", CPUID_TO_FAMILY(cpu_id)); printf(" Model=0x%x", CPUID_TO_MODEL(cpu_id)); printf(" Stepping=%u", cpu_id & CPUID_STEPPING); #ifdef __i386__ if (cpu_vendor_id == CPU_VENDOR_CYRIX) printf("\n DIR=0x%04x", cyrix_did); #endif /* * AMD CPUID Specification * http://support.amd.com/us/Embedded_TechDocs/25481.pdf * * Intel Processor Identification and CPUID Instruction * http://www.intel.com/assets/pdf/appnote/241618.pdf */ if (cpu_high > 0) { /* * Here we should probably set up flags indicating * whether or not various features are available. * The interesting ones are probably VME, PSE, PAE, * and PGE. The code already assumes without bothering * to check that all CPUs >= Pentium have a TSC and * MSRs. */ printf("\n Features=0x%b", cpu_feature, "\020" "\001FPU" /* Integral FPU */ "\002VME" /* Extended VM86 mode support */ "\003DE" /* Debugging Extensions (CR4.DE) */ "\004PSE" /* 4MByte page tables */ "\005TSC" /* Timestamp counter */ "\006MSR" /* Machine specific registers */ "\007PAE" /* Physical address extension */ "\010MCE" /* Machine Check support */ "\011CX8" /* CMPEXCH8 instruction */ "\012APIC" /* SMP local APIC */ "\013oldMTRR" /* Previous implementation of MTRR */ "\014SEP" /* Fast System Call */ "\015MTRR" /* Memory Type Range Registers */ "\016PGE" /* PG_G (global bit) support */ "\017MCA" /* Machine Check Architecture */ "\020CMOV" /* CMOV instruction */ "\021PAT" /* Page attributes table */ "\022PSE36" /* 36 bit address space support */ "\023PN" /* Processor Serial number */ "\024CLFLUSH" /* Has the CLFLUSH instruction */ "\025" "\026DTS" /* Debug Trace Store */ "\027ACPI" /* ACPI support */ "\030MMX" /* MMX instructions */ "\031FXSR" /* FXSAVE/FXRSTOR */ "\032SSE" /* Streaming SIMD Extensions */ "\033SSE2" /* Streaming SIMD Extensions #2 */ "\034SS" /* Self snoop */ "\035HTT" /* Hyperthreading (see EBX bit 16-23) */ "\036TM" /* Thermal Monitor clock slowdown */ "\037IA64" /* CPU can execute IA64 instructions */ "\040PBE" /* Pending Break Enable */ ); if (cpu_feature2 != 0) { printf("\n Features2=0x%b", cpu_feature2, "\020" "\001SSE3" /* SSE3 */ "\002PCLMULQDQ" /* Carry-Less Mul Quadword */ "\003DTES64" /* 64-bit Debug Trace */ "\004MON" /* MONITOR/MWAIT Instructions */ "\005DS_CPL" /* CPL Qualified Debug Store */ "\006VMX" /* Virtual Machine Extensions */ "\007SMX" /* Safer Mode Extensions */ "\010EST" /* Enhanced SpeedStep */ "\011TM2" /* Thermal Monitor 2 */ "\012SSSE3" /* SSSE3 */ "\013CNXT-ID" /* L1 context ID available */ "\014SDBG" /* IA32 silicon debug */ "\015FMA" /* Fused Multiply Add */ "\016CX16" /* CMPXCHG16B Instruction */ "\017xTPR" /* Send Task Priority Messages*/ "\020PDCM" /* Perf/Debug Capability MSR */ "\021" "\022PCID" /* Process-context Identifiers*/ "\023DCA" /* Direct Cache Access */ "\024SSE4.1" /* SSE 4.1 */ "\025SSE4.2" /* SSE 4.2 */ "\026x2APIC" /* xAPIC Extensions */ "\027MOVBE" /* MOVBE Instruction */ "\030POPCNT" /* POPCNT Instruction */ "\031TSCDLT" /* TSC-Deadline Timer */ "\032AESNI" /* AES Crypto */ "\033XSAVE" /* XSAVE/XRSTOR States */ "\034OSXSAVE" /* OS-Enabled State Management*/ "\035AVX" /* Advanced Vector Extensions */ "\036F16C" /* Half-precision conversions */ "\037RDRAND" /* RDRAND Instruction */ "\040HV" /* Hypervisor */ ); } if (amd_feature != 0) { printf("\n AMD Features=0x%b", amd_feature, "\020" /* in hex */ "\001" /* Same */ "\002" /* Same */ "\003" /* Same */ "\004" /* Same */ "\005" /* Same */ "\006" /* Same */ "\007" /* Same */ "\010" /* Same */ "\011" /* Same */ "\012" /* Same */ "\013" /* Undefined */ "\014SYSCALL" /* Have SYSCALL/SYSRET */ "\015" /* Same */ "\016" /* Same */ "\017" /* Same */ "\020" /* Same */ "\021" /* Same */ "\022" /* Same */ "\023" /* Reserved, unknown */ "\024MP" /* Multiprocessor Capable */ "\025NX" /* Has EFER.NXE, NX */ "\026" /* Undefined */ "\027MMX+" /* AMD MMX Extensions */ "\030" /* Same */ "\031" /* Same */ "\032FFXSR" /* Fast FXSAVE/FXRSTOR */ "\033Page1GB" /* 1-GB large page support */ "\034RDTSCP" /* RDTSCP */ "\035" /* Undefined */ "\036LM" /* 64 bit long mode */ "\0373DNow!+" /* AMD 3DNow! Extensions */ "\0403DNow!" /* AMD 3DNow! */ ); } if (amd_feature2 != 0) { printf("\n AMD Features2=0x%b", amd_feature2, "\020" "\001LAHF" /* LAHF/SAHF in long mode */ "\002CMP" /* CMP legacy */ "\003SVM" /* Secure Virtual Mode */ "\004ExtAPIC" /* Extended APIC register */ "\005CR8" /* CR8 in legacy mode */ "\006ABM" /* LZCNT instruction */ "\007SSE4A" /* SSE4A */ "\010MAS" /* Misaligned SSE mode */ "\011Prefetch" /* 3DNow! Prefetch/PrefetchW */ "\012OSVW" /* OS visible workaround */ "\013IBS" /* Instruction based sampling */ "\014XOP" /* XOP extended instructions */ "\015SKINIT" /* SKINIT/STGI */ "\016WDT" /* Watchdog timer */ "\017" "\020LWP" /* Lightweight Profiling */ "\021FMA4" /* 4-operand FMA instructions */ "\022TCE" /* Translation Cache Extension */ "\023" "\024NodeId" /* NodeId MSR support */ "\025" "\026TBM" /* Trailing Bit Manipulation */ "\027Topology" /* Topology Extensions */ "\030PCXC" /* Core perf count */ "\031PNXC" /* NB perf count */ "\032" "\033DBE" /* Data Breakpoint extension */ "\034PTSC" /* Performance TSC */ "\035PL2I" /* L2I perf count */ "\036MWAITX" /* MONITORX/MWAITX instructions */ "\037" "\040" ); } if (cpu_stdext_feature != 0) { printf("\n Structured Extended Features=0x%b", cpu_stdext_feature, "\020" /* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ "\001FSGSBASE" "\002TSCADJ" "\003SGX" /* Bit Manipulation Instructions */ "\004BMI1" /* Hardware Lock Elision */ "\005HLE" /* Advanced Vector Instructions 2 */ "\006AVX2" /* FDP_EXCPTN_ONLY */ "\007FDPEXC" /* Supervisor Mode Execution Prot. */ "\010SMEP" /* Bit Manipulation Instructions */ "\011BMI2" "\012ERMS" /* Invalidate Processor Context ID */ "\013INVPCID" /* Restricted Transactional Memory */ "\014RTM" "\015PQM" "\016NFPUSG" /* Intel Memory Protection Extensions */ "\017MPX" "\020PQE" /* AVX512 Foundation */ "\021AVX512F" "\022AVX512DQ" /* Enhanced NRBG */ "\023RDSEED" /* ADCX + ADOX */ "\024ADX" /* Supervisor Mode Access Prevention */ "\025SMAP" "\026AVX512IFMA" "\027PCOMMIT" "\030CLFLUSHOPT" "\031CLWB" "\032PROCTRACE" "\033AVX512PF" "\034AVX512ER" "\035AVX512CD" "\036SHA" "\037AVX512BW" "\040AVX512VL" ); } if (cpu_stdext_feature2 != 0) { printf("\n Structured Extended Features2=0x%b", cpu_stdext_feature2, "\020" "\001PREFETCHWT1" "\002AVX512VBMI" "\003UMIP" "\004PKU" "\005OSPKE" "\027RDPID" "\037SGXLC" ); } if (cpu_stdext_feature3 != 0) { printf("\n Structured Extended Features3=0x%b", cpu_stdext_feature3, "\020" "\033IBPB" "\034STIBP" "\035L1DFL" "\036ARCH_CAP" "\040SSBD" ); } if ((cpu_feature2 & CPUID2_XSAVE) != 0) { cpuid_count(0xd, 0x1, regs); if (regs[0] != 0) { printf("\n XSAVE Features=0x%b", regs[0], "\020" "\001XSAVEOPT" "\002XSAVEC" "\003XINUSE" "\004XSAVES"); } } if (cpu_ia32_arch_caps != 0) { printf("\n IA32_ARCH_CAPS=0x%b", (u_int)cpu_ia32_arch_caps, "\020" "\001RDCL_NO" "\002IBRS_ALL" "\003RSBA" "\004SKIP_L1DFL_VME" "\005SSB_NO" ); } if (amd_extended_feature_extensions != 0) { u_int amd_fe_masked; amd_fe_masked = amd_extended_feature_extensions; if ((amd_fe_masked & AMDFEID_IBRS) == 0) amd_fe_masked &= ~(AMDFEID_IBRS_ALWAYSON | AMDFEID_PREFER_IBRS); if ((amd_fe_masked & AMDFEID_STIBP) == 0) amd_fe_masked &= ~AMDFEID_STIBP_ALWAYSON; printf("\n " "AMD Extended Feature Extensions ID EBX=" "0x%b", amd_fe_masked, "\020" "\001CLZERO" "\002IRPerf" "\003XSaveErPtr" "\015IBPB" "\017IBRS" "\020STIBP" "\021IBRS_ALWAYSON" "\022STIBP_ALWAYSON" "\023PREFER_IBRS" "\031SSBD" "\032VIRT_SSBD" "\033SSB_NO" ); } if (via_feature_rng != 0 || via_feature_xcrypt != 0) print_via_padlock_info(); if (cpu_feature2 & CPUID2_VMX) print_vmx_info(); if (amd_feature2 & AMDID2_SVM) print_svm_info(); if ((cpu_feature & CPUID_HTT) && cpu_vendor_id == CPU_VENDOR_AMD) cpu_feature &= ~CPUID_HTT; /* * If this CPU supports P-state invariant TSC then * mention the capability. */ if (tsc_is_invariant) { printf("\n TSC: P-state invariant"); if (tsc_perf_stat) printf(", performance statistics"); } } #ifdef __i386__ } else if (cpu_vendor_id == CPU_VENDOR_CYRIX) { printf(" DIR=0x%04x", cyrix_did); printf(" Stepping=%u", (cyrix_did & 0xf000) >> 12); printf(" Revision=%u", (cyrix_did & 0x0f00) >> 8); #ifndef CYRIX_CACHE_REALLY_WORKS if (cpu == CPU_M1 && (cyrix_did & 0xff00) < 0x1700) printf("\n CPU cache: write-through mode"); #endif #endif } /* Avoid ugly blank lines: only print newline when we have to. */ if (*cpu_vendor || cpu_id) printf("\n"); if (bootverbose) { if (cpu_vendor_id == CPU_VENDOR_AMD) print_AMD_info(); else if (cpu_vendor_id == CPU_VENDOR_INTEL) print_INTEL_info(); #ifdef __i386__ else if (cpu_vendor_id == CPU_VENDOR_TRANSMETA) print_transmeta_info(); #endif } print_hypervisor_info(); } #ifdef __i386__ void panicifcpuunsupported(void) { #if !defined(lint) #if !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU) #error This kernel is not configured for one of the supported CPUs #endif #else /* lint */ #endif /* lint */ /* * Now that we have told the user what they have, * let them know if that machine type isn't configured. */ switch (cpu_class) { case CPUCLASS_286: /* a 286 should not make it this far, anyway */ case CPUCLASS_386: #if !defined(I486_CPU) case CPUCLASS_486: #endif #if !defined(I586_CPU) case CPUCLASS_586: #endif #if !defined(I686_CPU) case CPUCLASS_686: #endif panic("CPU class not configured"); default: break; } } static volatile u_int trap_by_rdmsr; /* * Special exception 6 handler. * The rdmsr instruction generates invalid opcodes fault on 486-class * Cyrix CPU. Stacked eip register points the rdmsr instruction in the * function identblue() when this handler is called. Stacked eip should * be advanced. */ inthand_t bluetrap6; #ifdef __GNUCLIKE_ASM __asm (" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(bluetrap6)) ",@function \n\ " __XSTRING(CNAME(bluetrap6)) ": \n\ ss \n\ movl $0xa8c1d," __XSTRING(CNAME(trap_by_rdmsr)) " \n\ addl $2, (%esp) /* rdmsr is a 2-byte instruction */ \n\ iret \n\ "); #endif /* * Special exception 13 handler. * Accessing non-existent MSR generates general protection fault. */ inthand_t bluetrap13; #ifdef __GNUCLIKE_ASM __asm (" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(bluetrap13)) ",@function \n\ " __XSTRING(CNAME(bluetrap13)) ": \n\ ss \n\ movl $0xa89c4," __XSTRING(CNAME(trap_by_rdmsr)) " \n\ popl %eax /* discard error code */ \n\ addl $2, (%esp) /* rdmsr is a 2-byte instruction */ \n\ iret \n\ "); #endif /* * Distinguish IBM Blue Lightning CPU from Cyrix CPUs that does not * support cpuid instruction. This function should be called after * loading interrupt descriptor table register. * * I don't like this method that handles fault, but I couldn't get * information for any other methods. Does blue giant know? */ static int identblue(void) { trap_by_rdmsr = 0; /* * Cyrix 486-class CPU does not support rdmsr instruction. * The rdmsr instruction generates invalid opcode fault, and exception * will be trapped by bluetrap6() on Cyrix 486-class CPU. The * bluetrap6() set the magic number to trap_by_rdmsr. */ setidt(IDT_UD, bluetrap6, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Certain BIOS disables cpuid instruction of Cyrix 6x86MX CPU. * In this case, rdmsr generates general protection fault, and * exception will be trapped by bluetrap13(). */ setidt(IDT_GP, bluetrap13, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); rdmsr(0x1002); /* Cyrix CPU generates fault. */ if (trap_by_rdmsr == 0xa8c1d) return IDENTBLUE_CYRIX486; else if (trap_by_rdmsr == 0xa89c4) return IDENTBLUE_CYRIXM2; return IDENTBLUE_IBMCPU; } /* * identifycyrix() set lower 16 bits of cyrix_did as follows: * * F E D C B A 9 8 7 6 5 4 3 2 1 0 * +-------+-------+---------------+ * | SID | RID | Device ID | * | (DIR 1) | (DIR 0) | * +-------+-------+---------------+ */ static void identifycyrix(void) { register_t saveintr; int ccr2_test = 0, dir_test = 0; u_char ccr2, ccr3; saveintr = intr_disable(); ccr2 = read_cyrix_reg(CCR2); write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW); read_cyrix_reg(CCR2); if (read_cyrix_reg(CCR2) != ccr2) ccr2_test = 1; write_cyrix_reg(CCR2, ccr2); ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, ccr3 ^ CCR3_MAPEN3); read_cyrix_reg(CCR3); if (read_cyrix_reg(CCR3) != ccr3) dir_test = 1; /* CPU supports DIRs. */ write_cyrix_reg(CCR3, ccr3); if (dir_test) { /* Device ID registers are available. */ cyrix_did = read_cyrix_reg(DIR1) << 8; cyrix_did += read_cyrix_reg(DIR0); } else if (ccr2_test) cyrix_did = 0x0010; /* 486S A-step */ else cyrix_did = 0x00ff; /* Old 486SLC/DLC and TI486SXLC/SXL */ intr_restore(saveintr); } #endif /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status) { /* If there was an error during the transition, don't do anything. */ if (status != 0) return; /* Total setting for this level gives the new frequency in MHz. */ hw_clockrate = level->total_set.freq; } static void hook_tsc_freq(void *arg __unused) { if (tsc_is_invariant) return; tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY); } SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL); static const char *const vm_bnames[] = { "QEMU", /* QEMU */ "Plex86", /* Plex86 */ "Bochs", /* Bochs */ "Xen", /* Xen */ "BHYVE", /* bhyve */ "Seabios", /* KVM */ NULL }; static const char *const vm_pnames[] = { "VMware Virtual Platform", /* VMWare VM */ "Virtual Machine", /* Microsoft VirtualPC */ "VirtualBox", /* Sun xVM VirtualBox */ "Parallels Virtual Platform", /* Parallels VM */ "KVM", /* KVM */ NULL }; void identify_hypervisor(void) { u_int regs[4]; char *p; int i; /* * [RFC] CPUID usage for interaction between Hypervisors and Linux. * http://lkml.org/lkml/2008/10/1/246 * * KB1009458: Mechanisms to determine if software is running in * a VMware virtual machine * http://kb.vmware.com/kb/1009458 */ if (cpu_feature2 & CPUID2_HV) { vm_guest = VM_GUEST_VM; do_cpuid(0x40000000, regs); /* * KVM from Linux kernels prior to commit * 57c22e5f35aa4b9b2fe11f73f3e62bbf9ef36190 set %eax * to 0 rather than a valid hv_high value. Check for * the KVM signature bytes and fixup %eax to the * highest supported leaf in that case. */ if (regs[0] == 0 && regs[1] == 0x4b4d564b && regs[2] == 0x564b4d56 && regs[3] == 0x0000004d) regs[0] = 0x40000001; if (regs[0] >= 0x40000000) { hv_high = regs[0]; ((u_int *)&hv_vendor)[0] = regs[1]; ((u_int *)&hv_vendor)[1] = regs[2]; ((u_int *)&hv_vendor)[2] = regs[3]; hv_vendor[12] = '\0'; if (strcmp(hv_vendor, "VMwareVMware") == 0) vm_guest = VM_GUEST_VMWARE; else if (strcmp(hv_vendor, "Microsoft Hv") == 0) vm_guest = VM_GUEST_HV; else if (strcmp(hv_vendor, "KVMKVMKVM") == 0) vm_guest = VM_GUEST_KVM; else if (strcmp(hv_vendor, "bhyve bhyve") == 0) vm_guest = VM_GUEST_BHYVE; } return; } /* * Examine SMBIOS strings for older hypervisors. */ p = kern_getenv("smbios.system.serial"); if (p != NULL) { if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) { vmware_hvcall(VMW_HVCMD_GETVERSION, regs); if (regs[1] == VMW_HVMAGIC) { vm_guest = VM_GUEST_VMWARE; freeenv(p); return; } } freeenv(p); } /* * XXX: Some of these entries may not be needed since they were * added to FreeBSD before the checks above. */ p = kern_getenv("smbios.bios.vendor"); if (p != NULL) { for (i = 0; vm_bnames[i] != NULL; i++) if (strcmp(p, vm_bnames[i]) == 0) { vm_guest = VM_GUEST_VM; freeenv(p); return; } freeenv(p); } p = kern_getenv("smbios.system.product"); if (p != NULL) { for (i = 0; vm_pnames[i] != NULL; i++) if (strcmp(p, vm_pnames[i]) == 0) { vm_guest = VM_GUEST_VM; freeenv(p); return; } freeenv(p); } } bool fix_cpuid(void) { uint64_t msr; /* * Clear "Limit CPUID Maxval" bit and return true if the caller should * get the largest standard CPUID function number again if it is set * from BIOS. It is necessary for probing correct CPU topology later * and for the correct operation of the AVX-aware userspace. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && ((CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x3) || (CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) >= 0xe))) { msr = rdmsr(MSR_IA32_MISC_ENABLE); if ((msr & IA32_MISC_EN_LIMCPUID) != 0) { msr &= ~IA32_MISC_EN_LIMCPUID; wrmsr(MSR_IA32_MISC_ENABLE, msr); return (true); } } /* * Re-enable AMD Topology Extension that could be disabled by BIOS * on some notebook processors. Without the extension it's really * hard to determine the correct CPU cache topology. * See BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 15h * Models 60h-6Fh Processors, Publication # 50742. */ if (vm_guest == VM_GUEST_NO && cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x15) { msr = rdmsr(MSR_EXTFEATURES); if ((msr & ((uint64_t)1 << 54)) == 0) { msr |= (uint64_t)1 << 54; wrmsr(MSR_EXTFEATURES, msr); return (true); } } return (false); } void identify_cpu1(void) { u_int regs[4]; do_cpuid(0, regs); cpu_high = regs[0]; ((u_int *)&cpu_vendor)[0] = regs[1]; ((u_int *)&cpu_vendor)[1] = regs[3]; ((u_int *)&cpu_vendor)[2] = regs[2]; cpu_vendor[12] = '\0'; do_cpuid(1, regs); cpu_id = regs[0]; cpu_procinfo = regs[1]; cpu_feature = regs[3]; cpu_feature2 = regs[2]; } void identify_cpu2(void) { u_int regs[4], cpu_stdext_disable; if (cpu_high >= 7) { cpuid_count(7, 0, regs); cpu_stdext_feature = regs[1]; /* * Some hypervisors failed to filter out unsupported * extended features. Allow to disable the * extensions, activation of which requires setting a * bit in CR4, and which VM monitors do not support. */ cpu_stdext_disable = 0; TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable); cpu_stdext_feature &= ~cpu_stdext_disable; cpu_stdext_feature2 = regs[2]; cpu_stdext_feature3 = regs[3]; if ((cpu_stdext_feature3 & CPUID_STDEXT3_ARCH_CAP) != 0) cpu_ia32_arch_caps = rdmsr(MSR_IA32_ARCH_CAP); } } void identify_cpu_fixup_bsp(void) { u_int regs[4]; cpu_vendor_id = find_cpu_vendor_id(); if (fix_cpuid()) { do_cpuid(0, regs); cpu_high = regs[0]; } } /* * Final stage of CPU identification. */ void finishidentcpu(void) { u_int regs[4]; #ifdef __i386__ u_char ccr3; #endif identify_cpu_fixup_bsp(); if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) { do_cpuid(5, regs); cpu_mon_mwait_flags = regs[2]; cpu_mon_min_size = regs[0] & CPUID5_MON_MIN_SIZE; cpu_mon_max_size = regs[1] & CPUID5_MON_MAX_SIZE; } identify_cpu2(); #ifdef __i386__ if (cpu_high > 0 && (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_TRANSMETA || cpu_vendor_id == CPU_VENDOR_CENTAUR || cpu_vendor_id == CPU_VENDOR_NSC)) { do_cpuid(0x80000000, regs); if (regs[0] >= 0x80000000) cpu_exthigh = regs[0]; } #else if (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_CENTAUR) { do_cpuid(0x80000000, regs); cpu_exthigh = regs[0]; } #endif if (cpu_exthigh >= 0x80000001) { do_cpuid(0x80000001, regs); amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff); amd_feature2 = regs[2]; } if (cpu_exthigh >= 0x80000007) { do_cpuid(0x80000007, regs); amd_rascap = regs[1]; amd_pminfo = regs[3]; } if (cpu_exthigh >= 0x80000008) { do_cpuid(0x80000008, regs); cpu_maxphyaddr = regs[0] & 0xff; amd_extended_feature_extensions = regs[1]; cpu_procinfo2 = regs[2]; } else { cpu_maxphyaddr = (cpu_feature & CPUID_PAE) != 0 ? 36 : 32; } #ifdef __i386__ if (cpu_vendor_id == CPU_VENDOR_CYRIX) { if (cpu == CPU_486) { /* * These conditions are equivalent to: * - CPU does not support cpuid instruction. * - Cyrix/IBM CPU is detected. */ if (identblue() == IDENTBLUE_IBMCPU) { strcpy(cpu_vendor, "IBM"); cpu_vendor_id = CPU_VENDOR_IBM; cpu = CPU_BLUE; return; } } switch (cpu_id & 0xf00) { case 0x600: /* * Cyrix's datasheet does not describe DIRs. * Therefor, I assume it does not have them * and use the result of the cpuid instruction. * XXX they seem to have it for now at least. -Peter */ identifycyrix(); cpu = CPU_M2; break; default: identifycyrix(); /* * This routine contains a trick. * Don't check (cpu_id & 0x00f0) == 0x50 to detect M2, now. */ switch (cyrix_did & 0x00f0) { case 0x00: case 0xf0: cpu = CPU_486DLC; break; case 0x10: cpu = CPU_CY486DX; break; case 0x20: if ((cyrix_did & 0x000f) < 8) cpu = CPU_M1; else cpu = CPU_M1SC; break; case 0x30: cpu = CPU_M1; break; case 0x40: /* MediaGX CPU */ cpu = CPU_M1SC; break; default: /* M2 and later CPUs are treated as M2. */ cpu = CPU_M2; /* * enable cpuid instruction. */ ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); write_cyrix_reg(CCR4, read_cyrix_reg(CCR4) | CCR4_CPUID); write_cyrix_reg(CCR3, ccr3); do_cpuid(0, regs); cpu_high = regs[0]; /* eax */ do_cpuid(1, regs); cpu_id = regs[0]; /* eax */ cpu_feature = regs[3]; /* edx */ break; } } } else if (cpu == CPU_486 && *cpu_vendor == '\0') { /* * There are BlueLightning CPUs that do not change * undefined flags by dividing 5 by 2. In this case, * the CPU identification routine in locore.s leaves * cpu_vendor null string and puts CPU_486 into the * cpu. */ if (identblue() == IDENTBLUE_IBMCPU) { strcpy(cpu_vendor, "IBM"); cpu_vendor_id = CPU_VENDOR_IBM; cpu = CPU_BLUE; return; } } #endif } int pti_get_default(void) { if (strcmp(cpu_vendor, AMD_VENDOR_ID) == 0) return (0); if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) != 0) return (0); return (1); } static u_int find_cpu_vendor_id(void) { int i; for (i = 0; i < nitems(cpu_vendors); i++) if (strcmp(cpu_vendor, cpu_vendors[i].vendor) == 0) return (cpu_vendors[i].vendor_id); return (0); } static void print_AMD_assoc(int i) { if (i == 255) printf(", fully associative\n"); else printf(", %d-way associative\n", i); } static void print_AMD_l2_assoc(int i) { switch (i & 0x0f) { case 0: printf(", disabled/not present\n"); break; case 1: printf(", direct mapped\n"); break; case 2: printf(", 2-way associative\n"); break; case 4: printf(", 4-way associative\n"); break; case 6: printf(", 8-way associative\n"); break; case 8: printf(", 16-way associative\n"); break; case 15: printf(", fully associative\n"); break; default: printf(", reserved configuration\n"); break; } } static void print_AMD_info(void) { #ifdef __i386__ uint64_t amd_whcr; #endif u_int regs[4]; if (cpu_exthigh >= 0x80000005) { do_cpuid(0x80000005, regs); printf("L1 2MB data TLB: %d entries", (regs[0] >> 16) & 0xff); print_AMD_assoc(regs[0] >> 24); printf("L1 2MB instruction TLB: %d entries", regs[0] & 0xff); print_AMD_assoc((regs[0] >> 8) & 0xff); printf("L1 4KB data TLB: %d entries", (regs[1] >> 16) & 0xff); print_AMD_assoc(regs[1] >> 24); printf("L1 4KB instruction TLB: %d entries", regs[1] & 0xff); print_AMD_assoc((regs[1] >> 8) & 0xff); printf("L1 data cache: %d kbytes", regs[2] >> 24); printf(", %d bytes/line", regs[2] & 0xff); printf(", %d lines/tag", (regs[2] >> 8) & 0xff); print_AMD_assoc((regs[2] >> 16) & 0xff); printf("L1 instruction cache: %d kbytes", regs[3] >> 24); printf(", %d bytes/line", regs[3] & 0xff); printf(", %d lines/tag", (regs[3] >> 8) & 0xff); print_AMD_assoc((regs[3] >> 16) & 0xff); } if (cpu_exthigh >= 0x80000006) { do_cpuid(0x80000006, regs); if ((regs[0] >> 16) != 0) { printf("L2 2MB data TLB: %d entries", (regs[0] >> 16) & 0xfff); print_AMD_l2_assoc(regs[0] >> 28); printf("L2 2MB instruction TLB: %d entries", regs[0] & 0xfff); print_AMD_l2_assoc((regs[0] >> 28) & 0xf); } else { printf("L2 2MB unified TLB: %d entries", regs[0] & 0xfff); print_AMD_l2_assoc((regs[0] >> 28) & 0xf); } if ((regs[1] >> 16) != 0) { printf("L2 4KB data TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc(regs[1] >> 28); printf("L2 4KB instruction TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc((regs[1] >> 28) & 0xf); } else { printf("L2 4KB unified TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc((regs[1] >> 28) & 0xf); } printf("L2 unified cache: %d kbytes", regs[2] >> 16); printf(", %d bytes/line", regs[2] & 0xff); printf(", %d lines/tag", (regs[2] >> 8) & 0x0f); print_AMD_l2_assoc((regs[2] >> 12) & 0x0f); } #ifdef __i386__ if (((cpu_id & 0xf00) == 0x500) && (((cpu_id & 0x0f0) > 0x80) || (((cpu_id & 0x0f0) == 0x80) && (cpu_id & 0x00f) > 0x07))) { /* K6-2(new core [Stepping 8-F]), K6-III or later */ amd_whcr = rdmsr(0xc0000082); if (!(amd_whcr & (0x3ff << 22))) { printf("Write Allocate Disable\n"); } else { printf("Write Allocate Enable Limit: %dM bytes\n", (u_int32_t)((amd_whcr & (0x3ff << 22)) >> 22) * 4); printf("Write Allocate 15-16M bytes: %s\n", (amd_whcr & (1 << 16)) ? "Enable" : "Disable"); } } else if (((cpu_id & 0xf00) == 0x500) && ((cpu_id & 0x0f0) > 0x50)) { /* K6, K6-2(old core) */ amd_whcr = rdmsr(0xc0000082); if (!(amd_whcr & (0x7f << 1))) { printf("Write Allocate Disable\n"); } else { printf("Write Allocate Enable Limit: %dM bytes\n", (u_int32_t)((amd_whcr & (0x7f << 1)) >> 1) * 4); printf("Write Allocate 15-16M bytes: %s\n", (amd_whcr & 0x0001) ? "Enable" : "Disable"); printf("Hardware Write Allocate Control: %s\n", (amd_whcr & 0x0100) ? "Enable" : "Disable"); } } #endif /* * Opteron Rev E shows a bug as in very rare occasions a read memory * barrier is not performed as expected if it is followed by a * non-atomic read-modify-write instruction. * As long as that bug pops up very rarely (intensive machine usage * on other operating systems generally generates one unexplainable * crash any 2 months) and as long as a model specific fix would be * impractical at this stage, print out a warning string if the broken * model and family are identified. */ if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x20 && CPUID_TO_MODEL(cpu_id) <= 0x3f) printf("WARNING: This architecture revision has known SMP " "hardware bugs which may cause random instability\n"); } static void print_INTEL_info(void) { u_int regs[4]; u_int rounds, regnum; u_int nwaycode, nway; if (cpu_high >= 2) { rounds = 0; do { do_cpuid(0x2, regs); if (rounds == 0 && (rounds = (regs[0] & 0xff)) == 0) break; /* we have a buggy CPU */ for (regnum = 0; regnum <= 3; ++regnum) { if (regs[regnum] & (1<<31)) continue; if (regnum != 0) print_INTEL_TLB(regs[regnum] & 0xff); print_INTEL_TLB((regs[regnum] >> 8) & 0xff); print_INTEL_TLB((regs[regnum] >> 16) & 0xff); print_INTEL_TLB((regs[regnum] >> 24) & 0xff); } } while (--rounds > 0); } if (cpu_exthigh >= 0x80000006) { do_cpuid(0x80000006, regs); nwaycode = (regs[2] >> 12) & 0x0f; if (nwaycode >= 0x02 && nwaycode <= 0x08) nway = 1 << (nwaycode / 2); else nway = 0; printf("L2 cache: %u kbytes, %u-way associative, %u bytes/line\n", (regs[2] >> 16) & 0xffff, nway, regs[2] & 0xff); } } static void print_INTEL_TLB(u_int data) { switch (data) { case 0x0: case 0x40: default: break; case 0x1: printf("Instruction TLB: 4 KB pages, 4-way set associative, 32 entries\n"); break; case 0x2: printf("Instruction TLB: 4 MB pages, fully associative, 2 entries\n"); break; case 0x3: printf("Data TLB: 4 KB pages, 4-way set associative, 64 entries\n"); break; case 0x4: printf("Data TLB: 4 MB Pages, 4-way set associative, 8 entries\n"); break; case 0x6: printf("1st-level instruction cache: 8 KB, 4-way set associative, 32 byte line size\n"); break; case 0x8: printf("1st-level instruction cache: 16 KB, 4-way set associative, 32 byte line size\n"); break; case 0x9: printf("1st-level instruction cache: 32 KB, 4-way set associative, 64 byte line size\n"); break; case 0xa: printf("1st-level data cache: 8 KB, 2-way set associative, 32 byte line size\n"); break; case 0xb: printf("Instruction TLB: 4 MByte pages, 4-way set associative, 4 entries\n"); break; case 0xc: printf("1st-level data cache: 16 KB, 4-way set associative, 32 byte line size\n"); break; case 0xd: printf("1st-level data cache: 16 KBytes, 4-way set associative, 64 byte line size"); break; case 0xe: printf("1st-level data cache: 24 KBytes, 6-way set associative, 64 byte line size\n"); break; case 0x1d: printf("2nd-level cache: 128 KBytes, 2-way set associative, 64 byte line size\n"); break; case 0x21: printf("2nd-level cache: 256 KBytes, 8-way set associative, 64 byte line size\n"); break; case 0x22: printf("3rd-level cache: 512 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x23: printf("3rd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x24: printf("2nd-level cache: 1 MBytes, 16-way set associative, 64 byte line size\n"); break; case 0x25: printf("3rd-level cache: 2 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x29: printf("3rd-level cache: 4 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x2c: printf("1st-level data cache: 32 KB, 8-way set associative, 64 byte line size\n"); break; case 0x30: printf("1st-level instruction cache: 32 KB, 8-way set associative, 64 byte line size\n"); break; case 0x39: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 128 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x3b: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 128 KB, 2-way set associative, sectored cache, 64 byte line size\n"); break; case 0x3c: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 256 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x41: printf("2nd-level cache: 128 KB, 4-way set associative, 32 byte line size\n"); break; case 0x42: printf("2nd-level cache: 256 KB, 4-way set associative, 32 byte line size\n"); break; case 0x43: printf("2nd-level cache: 512 KB, 4-way set associative, 32 byte line size\n"); break; case 0x44: printf("2nd-level cache: 1 MB, 4-way set associative, 32 byte line size\n"); break; case 0x45: printf("2nd-level cache: 2 MB, 4-way set associative, 32 byte line size\n"); break; case 0x46: printf("3rd-level cache: 4 MB, 4-way set associative, 64 byte line size\n"); break; case 0x47: printf("3rd-level cache: 8 MB, 8-way set associative, 64 byte line size\n"); break; case 0x48: printf("2nd-level cache: 3MByte, 12-way set associative, 64 byte line size\n"); break; case 0x49: if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) == 0x6) printf("3rd-level cache: 4MB, 16-way set associative, 64-byte line size\n"); else printf("2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size"); break; case 0x4a: printf("3rd-level cache: 6MByte, 12-way set associative, 64 byte line size\n"); break; case 0x4b: printf("3rd-level cache: 8MByte, 16-way set associative, 64 byte line size\n"); break; case 0x4c: printf("3rd-level cache: 12MByte, 12-way set associative, 64 byte line size\n"); break; case 0x4d: printf("3rd-level cache: 16MByte, 16-way set associative, 64 byte line size\n"); break; case 0x4e: printf("2nd-level cache: 6MByte, 24-way set associative, 64 byte line size\n"); break; case 0x4f: printf("Instruction TLB: 4 KByte pages, 32 entries\n"); break; case 0x50: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 64 entries\n"); break; case 0x51: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 128 entries\n"); break; case 0x52: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 256 entries\n"); break; case 0x55: printf("Instruction TLB: 2-MByte or 4-MByte pages, fully associative, 7 entries\n"); break; case 0x56: printf("Data TLB0: 4 MByte pages, 4-way set associative, 16 entries\n"); break; case 0x57: printf("Data TLB0: 4 KByte pages, 4-way associative, 16 entries\n"); break; case 0x59: printf("Data TLB0: 4 KByte pages, fully associative, 16 entries\n"); break; case 0x5a: printf("Data TLB0: 2-MByte or 4 MByte pages, 4-way set associative, 32 entries\n"); break; case 0x5b: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 64 entries\n"); break; case 0x5c: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 128 entries\n"); break; case 0x5d: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 256 entries\n"); break; case 0x60: printf("1st-level data cache: 16 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x61: printf("Instruction TLB: 4 KByte pages, fully associative, 48 entries\n"); break; case 0x63: printf("Data TLB: 2 MByte or 4 MByte pages, 4-way set associative, 32 entries and a separate array with 1 GByte pages, 4-way set associative, 4 entries\n"); break; case 0x64: printf("Data TLB: 4 KBytes pages, 4-way set associative, 512 entries\n"); break; case 0x66: printf("1st-level data cache: 8 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x67: printf("1st-level data cache: 16 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x68: printf("1st-level data cache: 32 KB, 4 way set associative, sectored cache, 64 byte line size\n"); break; case 0x6a: printf("uTLB: 4KByte pages, 8-way set associative, 64 entries\n"); break; case 0x6b: printf("DTLB: 4KByte pages, 8-way set associative, 256 entries\n"); break; case 0x6c: printf("DTLB: 2M/4M pages, 8-way set associative, 128 entries\n"); break; case 0x6d: printf("DTLB: 1 GByte pages, fully associative, 16 entries\n"); break; case 0x70: printf("Trace cache: 12K-uops, 8-way set associative\n"); break; case 0x71: printf("Trace cache: 16K-uops, 8-way set associative\n"); break; case 0x72: printf("Trace cache: 32K-uops, 8-way set associative\n"); break; case 0x76: printf("Instruction TLB: 2M/4M pages, fully associative, 8 entries\n"); break; case 0x78: printf("2nd-level cache: 1 MB, 4-way set associative, 64-byte line size\n"); break; case 0x79: printf("2nd-level cache: 128 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7a: printf("2nd-level cache: 256 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7b: printf("2nd-level cache: 512 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7c: printf("2nd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7d: printf("2nd-level cache: 2-MB, 8-way set associative, 64-byte line size\n"); break; case 0x7f: printf("2nd-level cache: 512-KB, 2-way set associative, 64-byte line size\n"); break; case 0x80: printf("2nd-level cache: 512 KByte, 8-way set associative, 64-byte line size\n"); break; case 0x82: printf("2nd-level cache: 256 KB, 8-way set associative, 32 byte line size\n"); break; case 0x83: printf("2nd-level cache: 512 KB, 8-way set associative, 32 byte line size\n"); break; case 0x84: printf("2nd-level cache: 1 MB, 8-way set associative, 32 byte line size\n"); break; case 0x85: printf("2nd-level cache: 2 MB, 8-way set associative, 32 byte line size\n"); break; case 0x86: printf("2nd-level cache: 512 KB, 4-way set associative, 64 byte line size\n"); break; case 0x87: printf("2nd-level cache: 1 MB, 8-way set associative, 64 byte line size\n"); break; case 0xa0: printf("DTLB: 4k pages, fully associative, 32 entries\n"); break; case 0xb0: printf("Instruction TLB: 4 KB Pages, 4-way set associative, 128 entries\n"); break; case 0xb1: printf("Instruction TLB: 2M pages, 4-way, 8 entries or 4M pages, 4-way, 4 entries\n"); break; case 0xb2: printf("Instruction TLB: 4KByte pages, 4-way set associative, 64 entries\n"); break; case 0xb3: printf("Data TLB: 4 KB Pages, 4-way set associative, 128 entries\n"); break; case 0xb4: printf("Data TLB1: 4 KByte pages, 4-way associative, 256 entries\n"); break; case 0xb5: printf("Instruction TLB: 4KByte pages, 8-way set associative, 64 entries\n"); break; case 0xb6: printf("Instruction TLB: 4KByte pages, 8-way set associative, 128 entries\n"); break; case 0xba: printf("Data TLB1: 4 KByte pages, 4-way associative, 64 entries\n"); break; case 0xc0: printf("Data TLB: 4 KByte and 4 MByte pages, 4-way associative, 8 entries\n"); break; case 0xc1: printf("Shared 2nd-Level TLB: 4 KByte/2MByte pages, 8-way associative, 1024 entries\n"); break; case 0xc2: printf("DTLB: 4 KByte/2 MByte pages, 4-way associative, 16 entries\n"); break; case 0xc3: printf("Shared 2nd-Level TLB: 4 KByte /2 MByte pages, 6-way associative, 1536 entries. Also 1GBbyte pages, 4-way, 16 entries\n"); break; case 0xc4: printf("DTLB: 2M/4M Byte pages, 4-way associative, 32 entries\n"); break; case 0xca: printf("Shared 2nd-Level TLB: 4 KByte pages, 4-way associative, 512 entries\n"); break; case 0xd0: printf("3rd-level cache: 512 KByte, 4-way set associative, 64 byte line size\n"); break; case 0xd1: printf("3rd-level cache: 1 MByte, 4-way set associative, 64 byte line size\n"); break; case 0xd2: printf("3rd-level cache: 2 MByte, 4-way set associative, 64 byte line size\n"); break; case 0xd6: printf("3rd-level cache: 1 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xd7: printf("3rd-level cache: 2 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xd8: printf("3rd-level cache: 4 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xdc: printf("3rd-level cache: 1.5 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xdd: printf("3rd-level cache: 3 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xde: printf("3rd-level cache: 6 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xe2: printf("3rd-level cache: 2 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xe3: printf("3rd-level cache: 4 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xe4: printf("3rd-level cache: 8 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xea: printf("3rd-level cache: 12MByte, 24-way set associative, 64 byte line size\n"); break; case 0xeb: printf("3rd-level cache: 18MByte, 24-way set associative, 64 byte line size\n"); break; case 0xec: printf("3rd-level cache: 24MByte, 24-way set associative, 64 byte line size\n"); break; case 0xf0: printf("64-Byte prefetching\n"); break; case 0xf1: printf("128-Byte prefetching\n"); break; } } static void print_svm_info(void) { u_int features, regs[4]; uint64_t msr; int comma; printf("\n SVM: "); do_cpuid(0x8000000A, regs); features = regs[3]; msr = rdmsr(MSR_VM_CR); if ((msr & VM_CR_SVMDIS) == VM_CR_SVMDIS) printf("(disabled in BIOS) "); if (!bootverbose) { comma = 0; if (features & (1 << 0)) { printf("%sNP", comma ? "," : ""); comma = 1; } if (features & (1 << 3)) { printf("%sNRIP", comma ? "," : ""); comma = 1; } if (features & (1 << 5)) { printf("%sVClean", comma ? "," : ""); comma = 1; } if (features & (1 << 6)) { printf("%sAFlush", comma ? "," : ""); comma = 1; } if (features & (1 << 7)) { printf("%sDAssist", comma ? "," : ""); comma = 1; } printf("%sNAsids=%d", comma ? "," : "", regs[1]); return; } printf("Features=0x%b", features, "\020" "\001NP" /* Nested paging */ "\002LbrVirt" /* LBR virtualization */ "\003SVML" /* SVM lock */ "\004NRIPS" /* NRIP save */ "\005TscRateMsr" /* MSR based TSC rate control */ "\006VmcbClean" /* VMCB clean bits */ "\007FlushByAsid" /* Flush by ASID */ "\010DecodeAssist" /* Decode assist */ "\011" "\012" "\013PauseFilter" /* PAUSE intercept filter */ "\014EncryptedMcodePatch" "\015PauseFilterThreshold" /* PAUSE filter threshold */ "\016AVIC" /* virtual interrupt controller */ "\017" "\020V_VMSAVE_VMLOAD" "\021vGIF" "\022" "\023" "\024" "\025" "\026" "\027" "\030" "\031" "\032" "\033" "\034" "\035" "\036" "\037" "\040" ); printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]); } #ifdef __i386__ static void print_transmeta_info(void) { u_int regs[4], nreg = 0; do_cpuid(0x80860000, regs); nreg = regs[0]; if (nreg >= 0x80860001) { do_cpuid(0x80860001, regs); printf(" Processor revision %u.%u.%u.%u\n", (regs[1] >> 24) & 0xff, (regs[1] >> 16) & 0xff, (regs[1] >> 8) & 0xff, regs[1] & 0xff); } if (nreg >= 0x80860002) { do_cpuid(0x80860002, regs); printf(" Code Morphing Software revision %u.%u.%u-%u-%u\n", (regs[1] >> 24) & 0xff, (regs[1] >> 16) & 0xff, (regs[1] >> 8) & 0xff, regs[1] & 0xff, regs[2]); } if (nreg >= 0x80860006) { char info[65]; do_cpuid(0x80860003, (u_int*) &info[0]); do_cpuid(0x80860004, (u_int*) &info[16]); do_cpuid(0x80860005, (u_int*) &info[32]); do_cpuid(0x80860006, (u_int*) &info[48]); info[64] = 0; printf(" %s\n", info); } } #endif static void print_via_padlock_info(void) { u_int regs[4]; do_cpuid(0xc0000001, regs); printf("\n VIA Padlock Features=0x%b", regs[3], "\020" "\003RNG" /* RNG */ "\007AES" /* ACE */ "\011AES-CTR" /* ACE2 */ "\013SHA1,SHA256" /* PHE */ "\015RSA" /* PMM */ ); } static uint32_t vmx_settable(uint64_t basic, int msr, int true_msr) { uint64_t val; if (basic & (1ULL << 55)) val = rdmsr(true_msr); else val = rdmsr(msr); /* Just report the controls that can be set to 1. */ return (val >> 32); } static void print_vmx_info(void) { uint64_t basic, msr; uint32_t entry, exit, mask, pin, proc, proc2; int comma; printf("\n VT-x: "); msr = rdmsr(MSR_IA32_FEATURE_CONTROL); if (!(msr & IA32_FEATURE_CONTROL_VMX_EN)) printf("(disabled in BIOS) "); basic = rdmsr(MSR_VMX_BASIC); pin = vmx_settable(basic, MSR_VMX_PINBASED_CTLS, MSR_VMX_TRUE_PINBASED_CTLS); proc = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS, MSR_VMX_TRUE_PROCBASED_CTLS); if (proc & PROCBASED_SECONDARY_CONTROLS) proc2 = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2); else proc2 = 0; exit = vmx_settable(basic, MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS); entry = vmx_settable(basic, MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS); if (!bootverbose) { comma = 0; if (exit & VM_EXIT_SAVE_PAT && exit & VM_EXIT_LOAD_PAT && entry & VM_ENTRY_LOAD_PAT) { printf("%sPAT", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_HLT_EXITING) { printf("%sHLT", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_MTF) { printf("%sMTF", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_PAUSE_EXITING) { printf("%sPAUSE", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_ENABLE_EPT) { printf("%sEPT", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_UNRESTRICTED_GUEST) { printf("%sUG", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_ENABLE_VPID) { printf("%sVPID", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_USE_TPR_SHADOW && proc2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES && proc2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE && proc2 & PROCBASED2_APIC_REGISTER_VIRTUALIZATION && proc2 & PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY) { printf("%sVID", comma ? "," : ""); comma = 1; if (pin & PINBASED_POSTED_INTERRUPT) printf(",PostIntr"); } return; } mask = basic >> 32; printf("Basic Features=0x%b", mask, "\020" "\02132PA" /* 32-bit physical addresses */ "\022SMM" /* SMM dual-monitor */ "\027INS/OUTS" /* VM-exit info for INS and OUTS */ "\030TRUE" /* TRUE_CTLS MSRs */ ); printf("\n Pin-Based Controls=0x%b", pin, "\020" "\001ExtINT" /* External-interrupt exiting */ "\004NMI" /* NMI exiting */ "\006VNMI" /* Virtual NMIs */ "\007PreTmr" /* Activate VMX-preemption timer */ "\010PostIntr" /* Process posted interrupts */ ); printf("\n Primary Processor Controls=0x%b", proc, "\020" "\003INTWIN" /* Interrupt-window exiting */ "\004TSCOff" /* Use TSC offsetting */ "\010HLT" /* HLT exiting */ "\012INVLPG" /* INVLPG exiting */ "\013MWAIT" /* MWAIT exiting */ "\014RDPMC" /* RDPMC exiting */ "\015RDTSC" /* RDTSC exiting */ "\020CR3-LD" /* CR3-load exiting */ "\021CR3-ST" /* CR3-store exiting */ "\024CR8-LD" /* CR8-load exiting */ "\025CR8-ST" /* CR8-store exiting */ "\026TPR" /* Use TPR shadow */ "\027NMIWIN" /* NMI-window exiting */ "\030MOV-DR" /* MOV-DR exiting */ "\031IO" /* Unconditional I/O exiting */ "\032IOmap" /* Use I/O bitmaps */ "\034MTF" /* Monitor trap flag */ "\035MSRmap" /* Use MSR bitmaps */ "\036MONITOR" /* MONITOR exiting */ "\037PAUSE" /* PAUSE exiting */ ); if (proc & PROCBASED_SECONDARY_CONTROLS) printf("\n Secondary Processor Controls=0x%b", proc2, "\020" "\001APIC" /* Virtualize APIC accesses */ "\002EPT" /* Enable EPT */ "\003DT" /* Descriptor-table exiting */ "\004RDTSCP" /* Enable RDTSCP */ "\005x2APIC" /* Virtualize x2APIC mode */ "\006VPID" /* Enable VPID */ "\007WBINVD" /* WBINVD exiting */ "\010UG" /* Unrestricted guest */ "\011APIC-reg" /* APIC-register virtualization */ "\012VID" /* Virtual-interrupt delivery */ "\013PAUSE-loop" /* PAUSE-loop exiting */ "\014RDRAND" /* RDRAND exiting */ "\015INVPCID" /* Enable INVPCID */ "\016VMFUNC" /* Enable VM functions */ "\017VMCS" /* VMCS shadowing */ "\020EPT#VE" /* EPT-violation #VE */ "\021XSAVES" /* Enable XSAVES/XRSTORS */ ); printf("\n Exit Controls=0x%b", mask, "\020" "\003DR" /* Save debug controls */ /* Ignore Host address-space size */ "\015PERF" /* Load MSR_PERF_GLOBAL_CTRL */ "\020AckInt" /* Acknowledge interrupt on exit */ "\023PAT-SV" /* Save MSR_PAT */ "\024PAT-LD" /* Load MSR_PAT */ "\025EFER-SV" /* Save MSR_EFER */ "\026EFER-LD" /* Load MSR_EFER */ "\027PTMR-SV" /* Save VMX-preemption timer value */ ); printf("\n Entry Controls=0x%b", mask, "\020" "\003DR" /* Save debug controls */ /* Ignore IA-32e mode guest */ /* Ignore Entry to SMM */ /* Ignore Deactivate dual-monitor treatment */ "\016PERF" /* Load MSR_PERF_GLOBAL_CTRL */ "\017PAT" /* Load MSR_PAT */ "\020EFER" /* Load MSR_EFER */ ); if (proc & PROCBASED_SECONDARY_CONTROLS && (proc2 & (PROCBASED2_ENABLE_EPT | PROCBASED2_ENABLE_VPID)) != 0) { msr = rdmsr(MSR_VMX_EPT_VPID_CAP); mask = msr; printf("\n EPT Features=0x%b", mask, "\020" "\001XO" /* Execute-only translations */ "\007PW4" /* Page-walk length of 4 */ "\011UC" /* EPT paging-structure mem can be UC */ "\017WB" /* EPT paging-structure mem can be WB */ "\0212M" /* EPT PDE can map a 2-Mbyte page */ "\0221G" /* EPT PDPTE can map a 1-Gbyte page */ "\025INVEPT" /* INVEPT is supported */ "\026AD" /* Accessed and dirty flags for EPT */ "\032single" /* INVEPT single-context type */ "\033all" /* INVEPT all-context type */ ); mask = msr >> 32; printf("\n VPID Features=0x%b", mask, "\020" "\001INVVPID" /* INVVPID is supported */ "\011individual" /* INVVPID individual-address type */ "\012single" /* INVVPID single-context type */ "\013all" /* INVVPID all-context type */ /* INVVPID single-context-retaining-globals type */ "\014single-globals" ); } } static void print_hypervisor_info(void) { if (*hv_vendor) printf("Hypervisor: Origin = \"%s\"\n", hv_vendor); +} + +/* + * Returns the maximum physical address that can be used with the + * current system. + */ +vm_paddr_t +cpu_getmaxphyaddr(void) +{ + +#if defined(__i386__) + if (!pae_mode) + return (0xffffffff); +#endif + return ((1ULL << cpu_maxphyaddr) - 1); }