diff --git a/sys/arm/at91/at91_machdep.c b/sys/arm/at91/at91_machdep.c index 3df922ef3365..27e88447d441 100644 --- a/sys/arm/at91/at91_machdep.c +++ b/sys/arm/at91/at91_machdep.c @@ -1,685 +1,684 @@ /*- * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * machdep.c * * Machine dependant functions for kernel setup * * This file needs a lot of work. * * Created : 17/09/94 */ #include __FBSDID("$FreeBSD$"); #define _ARM32_BUS_DMA_PRIVATE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef MAXCPU #define MAXCPU 1 #endif /* Page table for mapping proc0 zero page */ #define KERNEL_PT_SYS 0 #define KERNEL_PT_KERN 1 #define KERNEL_PT_KERN_NUM 22 /* L2 table for mapping after kernel */ #define KERNEL_PT_AFKERNEL KERNEL_PT_KERN + KERNEL_PT_KERN_NUM #define KERNEL_PT_AFKERNEL_NUM 5 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */ #define NUM_KERNEL_PTS (KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM) extern u_int data_abort_handler_address; extern u_int prefetch_abort_handler_address; extern u_int undefined_handler_address; struct pv_addr kernel_pt_table[NUM_KERNEL_PTS]; /* Physical and virtual addresses for some global pages */ vm_paddr_t phys_avail[10]; vm_paddr_t dump_avail[4]; struct pv_addr systempage; struct pv_addr msgbufpv; struct pv_addr irqstack; struct pv_addr undstack; struct pv_addr abtstack; struct pv_addr kernelstack; /* Static device mappings. */ const struct pmap_devmap at91_devmap[] = { /* * Map the on-board devices VA == PA so that we can access them * with the MMU on or off. */ { /* * This at least maps the interrupt controller, the UART * and the timer. Other devices should use newbus to * map their memory anyway. */ 0xdff00000, 0xfff00000, 0x00100000, VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE, }, /* * We can't just map the OHCI registers VA == PA, because * AT91xx_xxx_BASE belongs to the userland address space. * We could just choose a different virtual address, but a better * solution would probably be to just use pmap_mapdev() to allocate * KVA, as we don't need the OHCI controller before the vm * initialization is done. However, the AT91 resource allocation * system doesn't know how to use pmap_mapdev() yet. * Care must be taken to ensure PA and VM address do not overlap * between entries. */ { /* * Add the ohci controller, and anything else that might be * on this chip select for a VA/PA mapping. */ /* Internal Memory 1MB */ AT91RM92_OHCI_BASE, AT91RM92_OHCI_PA_BASE, 0x00100000, VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE, }, { /* CompactFlash controller. Portion of EBI CS4 1MB */ AT91RM92_CF_BASE, AT91RM92_CF_PA_BASE, 0x00100000, VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE, }, /* * The next two should be good for the 9260, 9261 and 9G20 since * addresses mapping is the same. */ { /* Internal Memory 1MB */ AT91SAM9G20_OHCI_BASE, AT91SAM9G20_OHCI_PA_BASE, 0x00100000, VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE, }, { /* EBI CS3 256MB */ AT91SAM9G20_NAND_BASE, AT91SAM9G20_NAND_PA_BASE, AT91SAM9G20_NAND_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE, }, /* * The next should be good for the 9G45. */ { /* Internal Memory 1MB */ AT91SAM9G45_OHCI_BASE, AT91SAM9G45_OHCI_PA_BASE, 0x00100000, VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE, }, { 0, 0, 0, 0, 0, } }; #ifdef LINUX_BOOT_ABI extern int membanks; extern int memstart[]; extern int memsize[]; #endif long at91_ramsize(void) { uint32_t cr, mdr, mr, *SDRAMC; int banks, rows, cols, bw; #ifdef LINUX_BOOT_ABI /* * If we found any ATAGs that were for memory, return the first bank. */ if (membanks > 0) return (memsize[0]); #endif if (at91_is_rm92()) { SDRAMC = (uint32_t *)(AT91_BASE + AT91RM92_SDRAMC_BASE); cr = SDRAMC[AT91RM92_SDRAMC_CR / 4]; mr = SDRAMC[AT91RM92_SDRAMC_MR / 4]; banks = (cr & AT91RM92_SDRAMC_CR_NB_4) ? 2 : 1; rows = ((cr & AT91RM92_SDRAMC_CR_NR_MASK) >> 2) + 11; cols = (cr & AT91RM92_SDRAMC_CR_NC_MASK) + 8; bw = (mr & AT91RM92_SDRAMC_MR_DBW_16) ? 1 : 2; } else if (at91_cpu_is(AT91_T_SAM9G45)) { SDRAMC = (uint32_t *)(AT91_BASE + AT91SAM9G45_DDRSDRC0_BASE); cr = SDRAMC[AT91SAM9G45_DDRSDRC_CR / 4]; mdr = SDRAMC[AT91SAM9G45_DDRSDRC_MDR / 4]; banks = 0; rows = ((cr & AT91SAM9G45_DDRSDRC_CR_NR_MASK) >> 2) + 11; cols = (cr & AT91SAM9G45_DDRSDRC_CR_NC_MASK) + 8; bw = (mdr & AT91SAM9G45_DDRSDRC_MDR_DBW_16) ? 1 : 2; /* Fix the calculation for DDR memory */ mdr &= AT91SAM9G45_DDRSDRC_MDR_MASK; if (mdr & AT91SAM9G45_DDRSDRC_MDR_LPDDR1 || mdr & AT91SAM9G45_DDRSDRC_MDR_DDR2) { /* The cols value is 1 higher for DDR */ cols += 1; /* DDR has 4 internal banks. */ banks = 2; } } else { /* * This should be good for the 9260, 9261, 9G20, 9G35 and 9X25 * as addresses and registers are the same. */ SDRAMC = (uint32_t *)(AT91_BASE + AT91SAM9G20_SDRAMC_BASE); cr = SDRAMC[AT91SAM9G20_SDRAMC_CR / 4]; mr = SDRAMC[AT91SAM9G20_SDRAMC_MR / 4]; banks = (cr & AT91SAM9G20_SDRAMC_CR_NB_4) ? 2 : 1; rows = ((cr & AT91SAM9G20_SDRAMC_CR_NR_MASK) >> 2) + 11; cols = (cr & AT91SAM9G20_SDRAMC_CR_NC_MASK) + 8; bw = (cr & AT91SAM9G20_SDRAMC_CR_DBW_16) ? 1 : 2; } return (1 << (cols + rows + banks + bw)); } static const char *soc_type_name[] = { [AT91_T_CAP9] = "at91cap9", [AT91_T_RM9200] = "at91rm9200", [AT91_T_SAM9260] = "at91sam9260", [AT91_T_SAM9261] = "at91sam9261", [AT91_T_SAM9263] = "at91sam9263", [AT91_T_SAM9G10] = "at91sam9g10", [AT91_T_SAM9G20] = "at91sam9g20", [AT91_T_SAM9G45] = "at91sam9g45", [AT91_T_SAM9N12] = "at91sam9n12", [AT91_T_SAM9RL] = "at91sam9rl", [AT91_T_SAM9X5] = "at91sam9x5", [AT91_T_NONE] = "UNKNOWN" }; static const char *soc_subtype_name[] = { [AT91_ST_NONE] = "UNKNOWN", [AT91_ST_RM9200_BGA] = "at91rm9200_bga", [AT91_ST_RM9200_PQFP] = "at91rm9200_pqfp", [AT91_ST_SAM9XE] = "at91sam9xe", [AT91_ST_SAM9G45] = "at91sam9g45", [AT91_ST_SAM9M10] = "at91sam9m10", [AT91_ST_SAM9G46] = "at91sam9g46", [AT91_ST_SAM9M11] = "at91sam9m11", [AT91_ST_SAM9G15] = "at91sam9g15", [AT91_ST_SAM9G25] = "at91sam9g25", [AT91_ST_SAM9G35] = "at91sam9g35", [AT91_ST_SAM9X25] = "at91sam9x25", [AT91_ST_SAM9X35] = "at91sam9x35", }; struct at91_soc_info soc_info; /* * Read the SoC ID from the CIDR register and try to match it against the * values we know. If we find a good one, we return true. If not, we * return false. When we find a good one, we also find the subtype * and CPU family. */ static int at91_try_id(uint32_t dbgu_base) { uint32_t socid; soc_info.cidr = *(volatile uint32_t *)(AT91_BASE + dbgu_base + DBGU_C1R); socid = soc_info.cidr & ~AT91_CPU_VERSION_MASK; soc_info.type = AT91_T_NONE; soc_info.subtype = AT91_ST_NONE; soc_info.family = (soc_info.cidr & AT91_CPU_FAMILY_MASK) >> 20; soc_info.exid = *(volatile uint32_t *)(AT91_BASE + dbgu_base + DBGU_C2R); switch (socid) { case AT91_CPU_CAP9: soc_info.type = AT91_T_CAP9; break; case AT91_CPU_RM9200: soc_info.type = AT91_T_RM9200; break; case AT91_CPU_SAM9XE128: case AT91_CPU_SAM9XE256: case AT91_CPU_SAM9XE512: case AT91_CPU_SAM9260: soc_info.type = AT91_T_SAM9260; if (soc_info.family == AT91_FAMILY_SAM9XE) soc_info.subtype = AT91_ST_SAM9XE; break; case AT91_CPU_SAM9261: soc_info.type = AT91_T_SAM9261; break; case AT91_CPU_SAM9263: soc_info.type = AT91_T_SAM9263; break; case AT91_CPU_SAM9G10: soc_info.type = AT91_T_SAM9G10; break; case AT91_CPU_SAM9G20: soc_info.type = AT91_T_SAM9G20; break; case AT91_CPU_SAM9G45: soc_info.type = AT91_T_SAM9G45; break; case AT91_CPU_SAM9N12: soc_info.type = AT91_T_SAM9N12; break; case AT91_CPU_SAM9RL64: soc_info.type = AT91_T_SAM9RL; break; case AT91_CPU_SAM9X5: soc_info.type = AT91_T_SAM9X5; break; default: return (0); } switch (soc_info.type) { case AT91_T_SAM9G45: switch (soc_info.exid) { case AT91_EXID_SAM9G45: soc_info.subtype = AT91_ST_SAM9G45; break; case AT91_EXID_SAM9G46: soc_info.subtype = AT91_ST_SAM9G46; break; case AT91_EXID_SAM9M10: soc_info.subtype = AT91_ST_SAM9M10; break; case AT91_EXID_SAM9M11: soc_info.subtype = AT91_ST_SAM9M11; break; } break; case AT91_T_SAM9X5: switch (soc_info.exid) { case AT91_EXID_SAM9G15: soc_info.subtype = AT91_ST_SAM9G15; break; case AT91_EXID_SAM9G25: soc_info.subtype = AT91_ST_SAM9G25; break; case AT91_EXID_SAM9G35: soc_info.subtype = AT91_ST_SAM9G35; break; case AT91_EXID_SAM9X25: soc_info.subtype = AT91_ST_SAM9X25; break; case AT91_EXID_SAM9X35: soc_info.subtype = AT91_ST_SAM9X35; break; } break; default: break; } /* * Disable interrupts in the DBGU unit... */ *(volatile uint32_t *)(AT91_BASE + dbgu_base + USART_IDR) = 0xffffffff; /* * Save the name for later... */ snprintf(soc_info.name, sizeof(soc_info.name), "%s%s%s", soc_type_name[soc_info.type], soc_info.subtype == AT91_ST_NONE ? "" : " subtype ", soc_info.subtype == AT91_ST_NONE ? "" : soc_subtype_name[soc_info.subtype]); /* * try to get the matching CPU support. */ soc_info.soc_data = at91_match_soc(soc_info.type, soc_info.subtype); soc_info.dbgu_base = AT91_BASE + dbgu_base; return (1); } static void at91_soc_id(void) { if (!at91_try_id(AT91_DBGU0)) at91_try_id(AT91_DBGU1); } #ifdef ARM_MANY_BOARD /* likely belongs in arm/arm/machdep.c, but since board_init is still at91 only... */ SET_DECLARE(arm_board_set, const struct arm_board); /* Not yet fully functional, but enough to build ATMEL config */ static long board_init(void) { return -1; } #endif void * initarm(struct arm_boot_params *abp) { struct pv_addr kernel_l1pt; struct pv_addr dpcpu; int i; u_int l1pagetable; vm_offset_t freemempos; vm_offset_t afterkern; uint32_t memsize; vm_offset_t lastaddr; lastaddr = parse_boot_param(abp); set_cpufuncs(); pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK; /* Define a macro to simplify memory allocation */ #define valloc_pages(var, np) \ alloc_pages((var).pv_va, (np)); \ (var).pv_pa = (var).pv_va + (KERNPHYSADDR - KERNVIRTADDR); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) freemempos += PAGE_SIZE; valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); for (i = 0; i < NUM_KERNEL_PTS; ++i) { if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { valloc_pages(kernel_pt_table[i], L2_TABLE_SIZE / PAGE_SIZE); } else { kernel_pt_table[i].pv_va = freemempos - (i % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) * L2_TABLE_SIZE_REAL; kernel_pt_table[i].pv_pa = kernel_pt_table[i].pv_va - KERNVIRTADDR + KERNPHYSADDR; } } /* * Allocate a page for the system page mapped to 0x00000000 * or 0xffff0000. This page will just contain the system vectors * and can be shared by all processes. */ valloc_pages(systempage, 1); /* Allocate dynamic per-cpu area. */ valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu.pv_va, 0); /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU); valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU); valloc_pages(undstack, UND_STACK_SIZE * MAXCPU); valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU); valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); /* * Now we start construction of the L1 page table * We start by mapping the L2 page tables into the L1. * This means that we can replace L1 mappings later on if necessary */ l1pagetable = kernel_l1pt.pv_va; /* Map the L2 pages tables in the L1 page table */ pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH, &kernel_pt_table[KERNEL_PT_SYS]); for (i = 0; i < KERNEL_PT_KERN_NUM; i++) pmap_link_l2pt(l1pagetable, KERNBASE + i * L1_S_SIZE, &kernel_pt_table[KERNEL_PT_KERN + i]); pmap_map_chunk(l1pagetable, KERNBASE, PHYSADDR, (((uint32_t)lastaddr - KERNBASE) + PAGE_SIZE) & ~(PAGE_SIZE - 1), VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); afterkern = round_page((lastaddr + L1_S_SIZE) & ~(L1_S_SIZE - 1)); for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) { pmap_link_l2pt(l1pagetable, afterkern + i * L1_S_SIZE, &kernel_pt_table[KERNEL_PT_AFKERNEL + i]); } /* Map the vector page. */ pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map the DPCPU pages */ pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, DPCPU_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map the stack pages */ pmap_map_chunk(l1pagetable, irqstack.pv_va, irqstack.pv_pa, IRQ_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, abtstack.pv_va, abtstack.pv_pa, ABT_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, undstack.pv_va, undstack.pv_pa, UND_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, kernelstack.pv_va, kernelstack.pv_pa, KSTACK_PAGES * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); pmap_map_chunk(l1pagetable, msgbufpv.pv_va, msgbufpv.pv_pa, msgbufsize, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); for (i = 0; i < NUM_KERNEL_PTS; ++i) { pmap_map_chunk(l1pagetable, kernel_pt_table[i].pv_va, kernel_pt_table[i].pv_pa, L2_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); } pmap_devmap_bootstrap(l1pagetable, at91_devmap); cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT); setttb(kernel_l1pt.pv_pa); cpu_tlb_flushID(); cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)); at91_soc_id(); /* Initialize all the clocks, so that the console can work */ at91_pmc_init_clock(); cninit(); if (soc_info.soc_data == NULL) printf("Warning: No soc support for %s found.\n", soc_info.name); memsize = board_init(); physmem = memsize / PAGE_SIZE; /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE); set_stackptrs(0); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ cpu_idcache_wbinv_all(); /* Set stack for exception handlers */ data_abort_handler_address = (u_int)data_abort_handler; prefetch_abort_handler_address = (u_int)prefetch_abort_handler; undefined_handler_address = (u_int)undefinedinstruction_bounce; undefined_init(); init_proc0(kernelstack.pv_va); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); pmap_curmaxkvaddr = afterkern + L1_S_SIZE * (KERNEL_PT_KERN_NUM - 1); arm_dump_avail_init(memsize, sizeof(dump_avail)/sizeof(dump_avail[0])); vm_max_kernel_address = KERNVIRTADDR + 3 * memsize; pmap_bootstrap(freemempos, &kernel_l1pt); msgbufp = (void*)msgbufpv.pv_va; msgbufinit(msgbufp, msgbufsize); mutex_init(); i = 0; #if PHYSADDR != KERNPHYSADDR phys_avail[i++] = PHYSADDR; phys_avail[i++] = KERNPHYSADDR; #endif phys_avail[i++] = virtual_avail - KERNVIRTADDR + KERNPHYSADDR; phys_avail[i++] = PHYSADDR + memsize; phys_avail[i++] = 0; phys_avail[i++] = 0; init_param2(physmem); kdb_init(); return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - sizeof(struct pcb))); } /* * These functions are handled elsewhere, so make them nops here. */ void cpu_startprofclock(void) { } void cpu_stopprofclock(void) { } void cpu_initclocks(void) { } void DELAY(int n) { if (soc_info.soc_data) soc_info.soc_data->soc_delay(n); } void cpu_reset(void) { if (soc_info.soc_data) soc_info.soc_data->soc_reset(); while (1) continue; } diff --git a/sys/arm/sa11x0/assabet_machdep.c b/sys/arm/sa11x0/assabet_machdep.c index 9266939bbdf2..986395340d39 100644 --- a/sys/arm/sa11x0/assabet_machdep.c +++ b/sys/arm/sa11x0/assabet_machdep.c @@ -1,391 +1,390 @@ /* $NetBSD: hpc_machdep.c,v 1.70 2003/09/16 08:18:22 agc Exp $ */ /*- * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * machdep.c * * Machine dependant functions for kernel setup * * This file needs a lot of work. * * Created : 17/09/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_md.h" #define _ARM32_BUS_DMA_PRIVATE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #define MDROOT_ADDR 0xd0400000 #define KERNEL_PT_VMEM 0 /* Page table for mapping video memory */ #define KERNEL_PT_SYS 0 /* Page table for mapping proc0 zero page */ #define KERNEL_PT_IO 3 /* Page table for mapping IO */ #define KERNEL_PT_IRQ 2 /* Page table for mapping irq handler */ #define KERNEL_PT_KERNEL 1 /* Page table for mapping kernel */ #define KERNEL_PT_L1 4 /* Page table for mapping l1pt */ #define KERNEL_PT_VMDATA 5 /* Page tables for mapping kernel VM */ #define KERNEL_PT_VMDATA_NUM 7 /* start with 16MB of KVM */ #define NUM_KERNEL_PTS (KERNEL_PT_VMDATA + KERNEL_PT_VMDATA_NUM) #define KERNEL_VM_BASE (KERNBASE + 0x00100000) #define KERNEL_VM_SIZE 0x05000000 extern u_int data_abort_handler_address; extern u_int prefetch_abort_handler_address; extern u_int undefined_handler_address; struct pv_addr kernel_pt_table[NUM_KERNEL_PTS]; extern vm_offset_t sa1110_uart_vaddr; extern vm_offset_t sa1_cache_clean_addr; #ifndef MD_ROOT_SIZE #define MD_ROOT_SIZE 65535 #endif /* Physical and virtual addresses for some global pages */ vm_paddr_t phys_avail[10]; vm_paddr_t dump_avail[4]; vm_paddr_t physical_start; vm_paddr_t physical_end; vm_paddr_t physical_freestart; struct pv_addr systempage; struct pv_addr irqstack; struct pv_addr undstack; struct pv_addr abtstack; struct pv_addr kernelstack; /* Static device mappings. */ static const struct pmap_devmap assabet_devmap[] = { /* * Map the on-board devices VA == PA so that we can access them * with the MMU on or off. */ { SACOM1_VBASE, SACOM1_BASE, SACOM1_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE, }, { SAIPIC_BASE, SAIPIC_BASE, SAIPIC_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE, }, { 0, 0, 0, 0, 0, } }; struct arm32_dma_range * bus_dma_get_range(void) { return (NULL); } int bus_dma_get_range_nb(void) { return (0); } void cpu_reset() { cpu_halt(); while (1); } #define CPU_SA110_CACHE_CLEAN_SIZE (0x4000 * 2) void * initarm(struct arm_boot_params *abp) { struct pv_addr kernel_l1pt; struct pv_addr md_addr; struct pv_addr md_bla; struct pv_addr dpcpu; int loop; u_int l1pagetable; vm_offset_t freemempos; vm_offset_t lastalloced; vm_offset_t lastaddr; uint32_t memsize = 32 * 1024 * 1024; sa1110_uart_vaddr = SACOM1_VBASE; boothowto = RB_VERBOSE | RB_SINGLE; /* Default value */ lastaddr = parse_boot_param(abp); cninit(); set_cpufuncs(); physmem = memsize / PAGE_SIZE; pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); physical_start = (vm_offset_t) KERNBASE; physical_end = lastaddr; physical_freestart = (((vm_offset_t)physical_end) + PAGE_MASK) & ~PAGE_MASK; md_addr.pv_va = md_addr.pv_pa = MDROOT_ADDR; freemempos = (vm_offset_t)round_page(physical_freestart); memset((void *)freemempos, 0, 256*1024); /* Define a macro to simplify memory allocation */ #define valloc_pages(var, np) \ alloc_pages((var).pv_pa, (np)); \ (var).pv_va = (var).pv_pa; #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += ((np) * PAGE_SIZE);\ memset((char *)(var), 0, ((np) * PAGE_SIZE)); while ((freemempos & (L1_TABLE_SIZE - 1)) != 0) freemempos += PAGE_SIZE; valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); valloc_pages(md_bla, L2_TABLE_SIZE / PAGE_SIZE); alloc_pages(sa1_cache_clean_addr, CPU_SA110_CACHE_CLEAN_SIZE / PAGE_SIZE); for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) { if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { valloc_pages(kernel_pt_table[loop], L2_TABLE_SIZE / PAGE_SIZE); } else { kernel_pt_table[loop].pv_pa = freemempos + (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) * L2_TABLE_SIZE_REAL; kernel_pt_table[loop].pv_va = kernel_pt_table[loop].pv_pa; } } /* * Allocate a page for the system page mapped to V0x00000000 * This page will just contain the system vectors and can be * shared by all processes. */ valloc_pages(systempage, 1); /* Allocate dynamic per-cpu area. */ valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu.pv_va, 0); /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); valloc_pages(undstack, UND_STACK_SIZE); valloc_pages(kernelstack, KSTACK_PAGES); lastalloced = kernelstack.pv_va; /* * Allocate memory for the l1 and l2 page tables. The scheme to avoid * wasting memory by allocating the l1pt on the first 16k memory was * taken from NetBSD rpc_machdep.c. NKPT should be greater than 12 for * this to work (which is supposed to be the case). */ /* * Now we start construction of the L1 page table * We start by mapping the L2 page tables into the L1. * This means that we can replace L1 mappings later on if necessary */ l1pagetable = kernel_l1pt.pv_pa; /* Map the L2 pages tables in the L1 page table */ pmap_link_l2pt(l1pagetable, 0x00000000, &kernel_pt_table[KERNEL_PT_SYS]); pmap_link_l2pt(l1pagetable, KERNBASE, &kernel_pt_table[KERNEL_PT_KERNEL]); pmap_link_l2pt(l1pagetable, 0xd0000000, &kernel_pt_table[KERNEL_PT_IO]); pmap_link_l2pt(l1pagetable, lastalloced & ~((L1_S_SIZE * 4) - 1), &kernel_pt_table[KERNEL_PT_L1]); pmap_link_l2pt(l1pagetable, 0x90000000, &kernel_pt_table[KERNEL_PT_IRQ]); pmap_link_l2pt(l1pagetable, MDROOT_ADDR, &md_bla); for (loop = 0; loop < KERNEL_PT_VMDATA_NUM; ++loop) pmap_link_l2pt(l1pagetable, KERNEL_VM_BASE + loop * 0x00100000, &kernel_pt_table[KERNEL_PT_VMDATA + loop]); pmap_map_chunk(l1pagetable, KERNBASE, KERNBASE, ((uint32_t)lastaddr - KERNBASE), VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map the DPCPU pages */ pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, DPCPU_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map the stack pages */ pmap_map_chunk(l1pagetable, irqstack.pv_va, irqstack.pv_pa, IRQ_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, md_addr.pv_va, md_addr.pv_pa, MD_ROOT_SIZE * 1024, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, abtstack.pv_va, abtstack.pv_pa, ABT_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, undstack.pv_va, undstack.pv_pa, UND_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, kernelstack.pv_va, kernelstack.pv_pa, KSTACK_PAGES * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) { pmap_map_chunk(l1pagetable, kernel_pt_table[loop].pv_va, kernel_pt_table[loop].pv_pa, L2_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); } pmap_map_chunk(l1pagetable, md_bla.pv_va, md_bla.pv_pa, L2_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); /* Map the vector page. */ pmap_map_entry(l1pagetable, vector_page, systempage.pv_pa, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map the statically mapped devices. */ pmap_devmap_bootstrap(l1pagetable, assabet_devmap); pmap_map_chunk(l1pagetable, sa1_cache_clean_addr, 0xf0000000, CPU_SA110_CACHE_CLEAN_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); data_abort_handler_address = (u_int)data_abort_handler; prefetch_abort_handler_address = (u_int)prefetch_abort_handler; undefined_handler_address = (u_int)undefinedinstruction_bounce; undefined_init(); cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT); setttb(kernel_l1pt.pv_pa); cpu_tlb_flushID(); cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ set_stackptrs(0); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ cpu_idcache_wbinv_all(); bootverbose = 1; /* Set stack for exception handlers */ init_proc0(kernelstack.pv_va); /* Enable MMU, I-cache, D-cache, write buffer. */ cpufunc_control(0x337f, 0x107d); arm_vector_init(ARM_VECTORS_LOW, ARM_VEC_ALL); pmap_curmaxkvaddr = freemempos + KERNEL_PT_VMDATA_NUM * 0x400000; dump_avail[0] = phys_avail[0] = round_page(virtual_avail); dump_avail[1] = phys_avail[1] = 0xc0000000 + 0x02000000 - 1; dump_avail[2] = phys_avail[2] = 0; dump_avail[3] = phys_avail[3] = 0; mutex_init(); vm_max_kernel_address = 0xd0000000; pmap_bootstrap(freemempos, &kernel_l1pt); init_param2(physmem); kdb_init(); return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - sizeof(struct pcb))); } diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index e5765687145d..06ad6346adbe 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -1,812 +1,810 @@ /* * Copyright (c) 2007-2009 Google Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include -#include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_node.h" #include "fuse_internal.h" #include "fuse_ipc.h" #include "fuse_io.h" #define FUSE_DEBUG_MODULE IO #include "fuse_debug.h" static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh); static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh); static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag); int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) { struct fuse_filehandle *fufh; int err, directio; MPASS(vp->v_type == VREG || vp->v_type == VDIR); err = fuse_filehandle_getrw(vp, (uio->uio_rw == UIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh); if (err) { printf("FUSE: io dispatch: filehandles are closed\n"); return err; } /* * Ideally, when the daemon asks for direct io at open time, the * standard file flag should be set according to this, so that would * just change the default mode, which later on could be changed via * fcntl(2). * But this doesn't work, the O_DIRECT flag gets cleared at some point * (don't know where). So to make any use of the Fuse direct_io option, * we hardwire it into the file's private data (similarly to Linux, * btw.). */ directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); switch (uio->uio_rw) { case UIO_READ: if (directio) { FS_DEBUG("direct read of vnode %ju via file handle %ju\n", (uintmax_t)VTOILLU(vp), (uintmax_t)fufh->fh_id); err = fuse_read_directbackend(vp, uio, cred, fufh); } else { FS_DEBUG("buffered read of vnode %ju\n", (uintmax_t)VTOILLU(vp)); err = fuse_read_biobackend(vp, uio, cred, fufh); } break; case UIO_WRITE: if (directio) { FS_DEBUG("direct write of vnode %ju via file handle %ju\n", (uintmax_t)VTOILLU(vp), (uintmax_t)fufh->fh_id); err = fuse_write_directbackend(vp, uio, cred, fufh); } else { FS_DEBUG("buffered write of vnode %ju\n", (uintmax_t)VTOILLU(vp)); err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag); } break; default: panic("uninterpreted mode passed to fuse_io_dispatch"); } return (err); } static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh) { struct buf *bp; daddr_t lbn; int bcount; int err = 0, n = 0, on = 0; off_t filesize; const int biosize = fuse_iosize(vp); FS_DEBUG("resid=%zx offset=%jx fsize=%jx\n", uio->uio_resid, uio->uio_offset, VTOFUD(vp)->filesize); if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); bcount = MIN(MAXBSIZE, biosize); filesize = VTOFUD(vp)->filesize; do { if (fuse_isdeadfs(vp)) { err = ENXIO; break; } lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); FS_DEBUG2G("biosize %d, lbn %d, on %d\n", biosize, (int)lbn, on); /* * Obtain the buffer cache block. Figure out the buffer size * when we are at EOF. If we are modifying the size of the * buffer based on an EOF condition we need to hold * nfs_rslock() through obtaining the buffer to prevent * a potential writer-appender from messing with n_size. * Otherwise we may accidently truncate the buffer and * lose dirty data. * * Note that bcount is *not* DEV_BSIZE aligned. */ if ((off_t)lbn * biosize >= filesize) { bcount = 0; } else if ((off_t)(lbn + 1) * biosize > filesize) { bcount = filesize - (off_t)lbn *biosize; } bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); if (!bp) return (EINTR); /* * If B_CACHE is not set, we must issue the read. If this * fails, we return an error. */ if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); err = fuse_io_strategy(vp, bp); if (err) { brelse(bp); return (err); } } /* * on is the offset into the current bp. Figure out how many * bytes we can copy out of the bp. Note that bcount is * NOT DEV_BSIZE aligned. * * Then figure out how many bytes we can copy into the uio. */ n = 0; if (on < bcount) n = MIN((unsigned)(bcount - on), uio->uio_resid); if (n > 0) { FS_DEBUG2G("feeding buffeater with %d bytes of buffer %p," " saying %d was asked for\n", n, bp->b_data + on, n + (int)bp->b_resid); err = uiomove(bp->b_data + on, n, uio); } brelse(bp); FS_DEBUG2G("end of turn, err %d, uio->uio_resid %zd, n %d\n", err, uio->uio_resid, n); } while (err == 0 && uio->uio_resid > 0 && n > 0); return (err); } static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh) { struct fuse_dispatcher fdi; struct fuse_read_in *fri; int err = 0; if (uio->uio_resid == 0) return (0); fdisp_init(&fdi, 0); /* * XXX In "normal" case we use an intermediate kernel buffer for * transmitting data from daemon's context to ours. Eventually, we should * get rid of this. Anyway, if the target uio lives in sysspace (we are * called from pageops), and the input data doesn't need kernel-side * processing (we are not called from readdir) we can already invoke * an optimized, "peer-to-peer" I/O routine. */ while (uio->uio_resid > 0) { fdi.iosize = sizeof(*fri); fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred); fri = fdi.indata; fri->fh = fufh->fh_id; fri->offset = uio->uio_offset; fri->size = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_read); FS_DEBUG2G("fri->fh %ju, fri->offset %ju, fri->size %ju\n", (uintmax_t)fri->fh, (uintmax_t)fri->offset, (uintmax_t)fri->size); if ((err = fdisp_wait_answ(&fdi))) goto out; FS_DEBUG2G("complete: got iosize=%d, requested fri.size=%zd; " "resid=%zd offset=%ju\n", fri->size, fdi.iosize, uio->uio_resid, (uintmax_t)uio->uio_offset); if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio))) break; if (fdi.iosize < fri->size) break; } out: fdisp_destroy(&fdi); return (err); } static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_write_in *fwi; struct fuse_dispatcher fdi; size_t chunksize; int diff; int err = 0; if (!uio->uio_resid) return (0); fdisp_init(&fdi, 0); while (uio->uio_resid > 0) { chunksize = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_write); fdi.iosize = sizeof(*fwi) + chunksize; fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); fwi = fdi.indata; fwi->fh = fufh->fh_id; fwi->offset = uio->uio_offset; fwi->size = chunksize; if ((err = uiomove((char *)fdi.indata + sizeof(*fwi), chunksize, uio))) break; if ((err = fdisp_wait_answ(&fdi))) break; diff = chunksize - ((struct fuse_write_out *)fdi.answ)->size; if (diff < 0) { err = EINVAL; break; } uio->uio_resid += diff; uio->uio_offset -= diff; if (uio->uio_offset > fvdat->filesize) fuse_vnode_setsize(vp, cred, uio->uio_offset); } fdisp_destroy(&fdi); return (err); } static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct buf *bp; daddr_t lbn; int bcount; int n, on, err = 0; const int biosize = fuse_iosize(vp); KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode")); FS_DEBUG("resid=%zx offset=%jx fsize=%jx\n", uio->uio_resid, uio->uio_offset, fvdat->filesize); if (vp->v_type != VREG) return (EIO); if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); if (ioflag & IO_APPEND) uio_setoffset(uio, fvdat->filesize); /* * Find all of this file's B_NEEDCOMMIT buffers. If our writes * would exceed the local maximum per-file write commit size when * combined with those, we must decide whether to flush, * go synchronous, or return err. We don't bother checking * IO_UNIT -- we just make all writes atomic anyway, as there's * no point optimizing for something that really won't ever happen. */ do { if (fuse_isdeadfs(vp)) { err = ENXIO; break; } lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); n = MIN((unsigned)(biosize - on), uio->uio_resid); FS_DEBUG2G("lbn %ju, on %d, n %d, uio offset %ju, uio resid %zd\n", (uintmax_t)lbn, on, n, (uintmax_t)uio->uio_offset, uio->uio_resid); again: /* * Handle direct append and file extension cases, calculate * unaligned buffer size. */ if (uio->uio_offset == fvdat->filesize && n) { /* * Get the buffer (in its pre-append state to maintain * B_CACHE if it was previously set). Resize the * nfsnode after we have locked the buffer to prevent * readers from reading garbage. */ bcount = on; FS_DEBUG("getting block from OS, bcount %d\n", bcount); bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); if (bp != NULL) { long save; err = fuse_vnode_setsize(vp, cred, uio->uio_offset + n); if (err) { brelse(bp); break; } save = bp->b_flags & B_CACHE; bcount += n; allocbuf(bp, bcount); bp->b_flags |= save; } } else { /* * Obtain the locked cache block first, and then * adjust the file's size as appropriate. */ bcount = on + n; if ((off_t)lbn * biosize + bcount < fvdat->filesize) { if ((off_t)(lbn + 1) * biosize < fvdat->filesize) bcount = biosize; else bcount = fvdat->filesize - (off_t)lbn *biosize; } FS_DEBUG("getting block from OS, bcount %d\n", bcount); bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); if (bp && uio->uio_offset + n > fvdat->filesize) { err = fuse_vnode_setsize(vp, cred, uio->uio_offset + n); if (err) { brelse(bp); break; } } } if (!bp) { err = EINTR; break; } /* * Issue a READ if B_CACHE is not set. In special-append * mode, B_CACHE is based on the buffer prior to the write * op and is typically set, avoiding the read. If a read * is required in special append mode, the server will * probably send us a short-read since we extended the file * on our end, resulting in b_resid == 0 and, thusly, * B_CACHE getting set. * * We can also avoid issuing the read if the write covers * the entire buffer. We have to make sure the buffer state * is reasonable in this case since we will not be initiating * I/O. See the comments in kern/vfs_bio.c's getblk() for * more information. * * B_CACHE may also be set due to the buffer being cached * normally. */ if (on == 0 && n == bcount) { bp->b_flags |= B_CACHE; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); fuse_io_strategy(vp, bp); if ((err = bp->b_error)) { brelse(bp); break; } } if (bp->b_wcred == NOCRED) bp->b_wcred = crhold(cred); /* * If dirtyend exceeds file size, chop it down. This should * not normally occur but there is an append race where it * might occur XXX, so we log it. * * If the chopping creates a reverse-indexed or degenerate * situation with dirtyoff/end, we 0 both of them. */ if (bp->b_dirtyend > bcount) { FS_DEBUG("FUSE append race @%lx:%d\n", (long)bp->b_blkno * biosize, bp->b_dirtyend - bcount); bp->b_dirtyend = bcount; } if (bp->b_dirtyoff >= bp->b_dirtyend) bp->b_dirtyoff = bp->b_dirtyend = 0; /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. * * While it is possible to merge discontiguous writes due to * our having a B_CACHE buffer ( and thus valid read data * for the hole), we don't because it could lead to * significant cache coherency problems with multiple clients, * especially if locking is implemented later on. * * as an optimization we could theoretically maintain * a linked list of discontinuous areas, but we would still * have to commit them separately so there isn't much * advantage to it except perhaps a bit of asynchronization. */ if (bp->b_dirtyend > 0 && (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { /* * Yes, we mean it. Write out everything to "storage" * immediatly, without hesitation. (Apart from other * reasons: the only way to know if a write is valid * if its actually written out.) */ bwrite(bp); if (bp->b_error == EINTR) { err = EINTR; break; } goto again; } err = uiomove((char *)bp->b_data + on, n, uio); /* * Since this block is being modified, it must be written * again and not just committed. Since write clustering does * not work for the stage 1 data write, only the stage 2 * commit rpc, we have to clear B_CLUSTEROK as well. */ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (err) { bp->b_ioflags |= BIO_ERROR; bp->b_error = err; brelse(bp); break; } /* * Only update dirtyoff/dirtyend if not a degenerate * condition. */ if (n) { if (bp->b_dirtyend > 0) { bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); bp->b_dirtyend = MAX((on + n), bp->b_dirtyend); } else { bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } vfs_bio_set_valid(bp, on, n); } err = bwrite(bp); if (err) break; } while (uio->uio_resid > 0 && n > 0); if (fuse_sync_resize && (fvdat->flag & FN_SIZECHANGE) != 0) fuse_vnode_savesize(vp, cred); return (err); } int fuse_io_strategy(struct vnode *vp, struct buf *bp) { struct fuse_filehandle *fufh; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct ucred *cred; struct uio *uiop; struct uio uio; struct iovec io; int error = 0; const int biosize = fuse_iosize(vp); MPASS(vp->v_type == VREG || vp->v_type == VDIR); MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE); FS_DEBUG("inode=%ju offset=%jd resid=%ld\n", (uintmax_t)VTOI(vp), (intmax_t)(((off_t)bp->b_blkno) * biosize), bp->b_bcount); error = fuse_filehandle_getrw(vp, (bp->b_iocmd == BIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh); if (error) { printf("FUSE: strategy: filehandles are closed\n"); bp->b_ioflags |= BIO_ERROR; bp->b_error = error; return (error); } cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred; uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = curthread; /* * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We * do this here so we do not have to do it in all the code that * calls us. */ bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; KASSERT(!(bp->b_flags & B_DONE), ("fuse_io_strategy: bp %p already marked done", bp)); if (bp->b_iocmd == BIO_READ) { io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; uiop->uio_offset = ((off_t)bp->b_blkno) * biosize; error = fuse_read_directbackend(vp, uiop, cred, fufh); if ((!error && uiop->uio_resid) || (fsess_opt_brokenio(vnode_mount(vp)) && error == EIO && uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 && uiop->uio_offset >= fvdat->cached_attrs.va_size)) { /* * If we had a short read with no error, we must have * hit a file hole. We should zero-fill the remainder. * This can also occur if the server hits the file EOF. * * Holes used to be able to occur due to pending * writes, but that is not possible any longer. */ int nread = bp->b_bcount - uiop->uio_resid; int left = uiop->uio_resid; if (error != 0) { printf("FUSE: Fix broken io: offset %ju, " " resid %zd, file size %ju/%ju\n", (uintmax_t)uiop->uio_offset, uiop->uio_resid, fvdat->filesize, fvdat->cached_attrs.va_size); error = 0; } if (left > 0) bzero((char *)bp->b_data + nread, left); uiop->uio_resid = 0; } if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } } else { /* * If we only need to commit, try to commit */ if (bp->b_flags & B_NEEDCOMMIT) { FS_DEBUG("write: B_NEEDCOMMIT flags set\n"); } /* * Setup for actual write */ if ((off_t)bp->b_blkno * biosize + bp->b_dirtyend > fvdat->filesize) bp->b_dirtyend = fvdat->filesize - (off_t)bp->b_blkno * biosize; if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = (off_t)bp->b_blkno * biosize + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; error = fuse_write_directbackend(vp, uiop, cred, fufh); if (error == EINTR || error == ETIMEDOUT || (!error && (bp->b_flags & B_NEEDCOMMIT))) { bp->b_flags &= ~(B_INVAL | B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) { bdirty(bp); bp->b_flags &= ~B_DONE; } if ((error == EINTR || error == ETIMEDOUT) && (bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_flags |= B_INVAL; bp->b_error = error; } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; bufdone(bp); return (0); } } bp->b_resid = uiop->uio_resid; bufdone(bp); return (error); } int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td) { struct vop_fsync_args a = { .a_vp = vp, .a_waitfor = waitfor, .a_td = td, }; return (vop_stdfsync(&a)); } /* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int fuse_io_invalbuf(struct vnode *vp, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); int error = 0; if (vp->v_iflag & VI_DOOMED) return 0; ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf"); while (fvdat->flag & FN_FLUSHINPROG) { struct proc *p = td->td_proc; if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) return EIO; fvdat->flag |= FN_FLUSHWANT; tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz); error = 0; if (p != NULL) { PROC_LOCK(p); if (SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) error = EINTR; PROC_UNLOCK(p); } if (error == EINTR) return EINTR; } fvdat->flag |= FN_FLUSHINPROG; if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_LOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object); } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); while (error) { if (error == ERESTART || error == EINTR) { fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return EINTR; } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); } fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return (error); } diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index 63de37a27de4..e70e60e1997d 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -1,1242 +1,1241 @@ /*- * Copyright (c) 1994, Sean Eric Fagan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #ifdef COMPAT_FREEBSD32 #include #include struct ptrace_io_desc32 { int piod_op; uint32_t piod_offs; uint32_t piod_addr; uint32_t piod_len; }; struct ptrace_vm_entry32 { int pve_entry; int pve_timestamp; uint32_t pve_start; uint32_t pve_end; uint32_t pve_offset; u_int pve_prot; u_int pve_pathlen; int32_t pve_fileid; u_int pve_fsid; uint32_t pve_path; }; struct ptrace_lwpinfo32 { lwpid_t pl_lwpid; /* LWP described. */ int pl_event; /* Event that stopped the LWP. */ int pl_flags; /* LWP flags. */ sigset_t pl_sigmask; /* LWP signal mask */ sigset_t pl_siglist; /* LWP pending signal */ struct siginfo32 pl_siginfo; /* siginfo for signal */ char pl_tdname[MAXCOMLEN + 1]; /* LWP name. */ int pl_child_pid; /* New child pid */ }; #endif /* * Functions implemented using PROC_ACTION(): * * proc_read_regs(proc, regs) * Get the current user-visible register set from the process * and copy it into the regs structure (). * The process is stopped at the time read_regs is called. * * proc_write_regs(proc, regs) * Update the current register set from the passed in regs * structure. Take care to avoid clobbering special CPU * registers or privileged bits in the PSL. * Depending on the architecture this may have fix-up work to do, * especially if the IAR or PCW are modified. * The process is stopped at the time write_regs is called. * * proc_read_fpregs, proc_write_fpregs * deal with the floating point register set, otherwise as above. * * proc_read_dbregs, proc_write_dbregs * deal with the processor debug register set, otherwise as above. * * proc_sstep(proc) * Arrange for the process to trap after executing a single instruction. */ #define PROC_ACTION(action) do { \ int error; \ \ PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); \ if ((td->td_proc->p_flag & P_INMEM) == 0) \ error = EIO; \ else \ error = (action); \ return (error); \ } while(0) int proc_read_regs(struct thread *td, struct reg *regs) { PROC_ACTION(fill_regs(td, regs)); } int proc_write_regs(struct thread *td, struct reg *regs) { PROC_ACTION(set_regs(td, regs)); } int proc_read_dbregs(struct thread *td, struct dbreg *dbregs) { PROC_ACTION(fill_dbregs(td, dbregs)); } int proc_write_dbregs(struct thread *td, struct dbreg *dbregs) { PROC_ACTION(set_dbregs(td, dbregs)); } /* * Ptrace doesn't support fpregs at all, and there are no security holes * or translations for fpregs, so we can just copy them. */ int proc_read_fpregs(struct thread *td, struct fpreg *fpregs) { PROC_ACTION(fill_fpregs(td, fpregs)); } int proc_write_fpregs(struct thread *td, struct fpreg *fpregs) { PROC_ACTION(set_fpregs(td, fpregs)); } #ifdef COMPAT_FREEBSD32 /* For 32 bit binaries, we need to expose the 32 bit regs layouts. */ int proc_read_regs32(struct thread *td, struct reg32 *regs32) { PROC_ACTION(fill_regs32(td, regs32)); } int proc_write_regs32(struct thread *td, struct reg32 *regs32) { PROC_ACTION(set_regs32(td, regs32)); } int proc_read_dbregs32(struct thread *td, struct dbreg32 *dbregs32) { PROC_ACTION(fill_dbregs32(td, dbregs32)); } int proc_write_dbregs32(struct thread *td, struct dbreg32 *dbregs32) { PROC_ACTION(set_dbregs32(td, dbregs32)); } int proc_read_fpregs32(struct thread *td, struct fpreg32 *fpregs32) { PROC_ACTION(fill_fpregs32(td, fpregs32)); } int proc_write_fpregs32(struct thread *td, struct fpreg32 *fpregs32) { PROC_ACTION(set_fpregs32(td, fpregs32)); } #endif int proc_sstep(struct thread *td) { PROC_ACTION(ptrace_single_step(td)); } int proc_rwmem(struct proc *p, struct uio *uio) { vm_map_t map; vm_offset_t pageno; /* page number */ vm_prot_t reqprot; int error, fault_flags, page_offset, writing; /* * Assert that someone has locked this vmspace. (Should be * curthread but we can't assert that.) This keeps the process * from exiting out from under us until this operation completes. */ KASSERT(p->p_lock >= 1, ("%s: process %p (pid %d) not held", __func__, p, p->p_pid)); /* * The map we want... */ map = &p->p_vmspace->vm_map; /* * If we are writing, then we request vm_fault() to create a private * copy of each page. Since these copies will not be writeable by the * process, we must explicity request that they be dirtied. */ writing = uio->uio_rw == UIO_WRITE; reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ; fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; /* * Only map in one page at a time. We don't have to, but it * makes things easier. This way is trivial - right? */ do { vm_offset_t uva; u_int len; vm_page_t m; uva = (vm_offset_t)uio->uio_offset; /* * Get the page number of this segment. */ pageno = trunc_page(uva); page_offset = uva - pageno; /* * How many bytes to copy */ len = min(PAGE_SIZE - page_offset, uio->uio_resid); /* * Fault and hold the page on behalf of the process. */ error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m); if (error != KERN_SUCCESS) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; else error = EFAULT; break; } /* * Now do the i/o move. */ error = uiomove_fromphys(&m, page_offset, len, uio); /* Make the I-cache coherent for breakpoints. */ if (writing && error == 0) { vm_map_lock_read(map); if (vm_map_check_protection(map, pageno, pageno + PAGE_SIZE, VM_PROT_EXECUTE)) vm_sync_icache(map, uva, len); vm_map_unlock_read(map); } /* * Release the page. */ vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); } while (error == 0 && uio->uio_resid > 0); return (error); } static int ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve) { struct vattr vattr; vm_map_t map; vm_map_entry_t entry; vm_object_t obj, tobj, lobj; struct vmspace *vm; struct vnode *vp; char *freepath, *fullpath; u_int pathlen; int error, index; error = 0; obj = NULL; vm = vmspace_acquire_ref(p); map = &vm->vm_map; vm_map_lock_read(map); do { entry = map->header.next; index = 0; while (index < pve->pve_entry && entry != &map->header) { entry = entry->next; index++; } if (index != pve->pve_entry) { error = EINVAL; break; } while (entry != &map->header && (entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) { entry = entry->next; index++; } if (entry == &map->header) { error = ENOENT; break; } /* We got an entry. */ pve->pve_entry = index + 1; pve->pve_timestamp = map->timestamp; pve->pve_start = entry->start; pve->pve_end = entry->end - 1; pve->pve_offset = entry->offset; pve->pve_prot = entry->protection; /* Backing object's path needed? */ if (pve->pve_pathlen == 0) break; pathlen = pve->pve_pathlen; pve->pve_pathlen = 0; obj = entry->object.vm_object; if (obj != NULL) VM_OBJECT_LOCK(obj); } while (0); vm_map_unlock_read(map); vmspace_free(vm); pve->pve_fsid = VNOVAL; pve->pve_fileid = VNOVAL; if (error == 0 && obj != NULL) { lobj = obj; for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_LOCK(tobj); if (lobj != obj) VM_OBJECT_UNLOCK(lobj); lobj = tobj; pve->pve_offset += tobj->backing_object_offset; } vp = (lobj->type == OBJT_VNODE) ? lobj->handle : NULL; if (vp != NULL) vref(vp); if (lobj != obj) VM_OBJECT_UNLOCK(lobj); VM_OBJECT_UNLOCK(obj); if (vp != NULL) { freepath = NULL; fullpath = NULL; vn_fullpath(td, vp, &fullpath, &freepath); vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &vattr, td->td_ucred) == 0) { pve->pve_fileid = vattr.va_fileid; pve->pve_fsid = vattr.va_fsid; } vput(vp); if (fullpath != NULL) { pve->pve_pathlen = strlen(fullpath) + 1; if (pve->pve_pathlen <= pathlen) { error = copyout(fullpath, pve->pve_path, pve->pve_pathlen); } else error = ENAMETOOLONG; } if (freepath != NULL) free(freepath, M_TEMP); } } return (error); } #ifdef COMPAT_FREEBSD32 static int ptrace_vm_entry32(struct thread *td, struct proc *p, struct ptrace_vm_entry32 *pve32) { struct ptrace_vm_entry pve; int error; pve.pve_entry = pve32->pve_entry; pve.pve_pathlen = pve32->pve_pathlen; pve.pve_path = (void *)(uintptr_t)pve32->pve_path; error = ptrace_vm_entry(td, p, &pve); if (error == 0) { pve32->pve_entry = pve.pve_entry; pve32->pve_timestamp = pve.pve_timestamp; pve32->pve_start = pve.pve_start; pve32->pve_end = pve.pve_end; pve32->pve_offset = pve.pve_offset; pve32->pve_prot = pve.pve_prot; pve32->pve_fileid = pve.pve_fileid; pve32->pve_fsid = pve.pve_fsid; } pve32->pve_pathlen = pve.pve_pathlen; return (error); } static void ptrace_lwpinfo_to32(const struct ptrace_lwpinfo *pl, struct ptrace_lwpinfo32 *pl32) { pl32->pl_lwpid = pl->pl_lwpid; pl32->pl_event = pl->pl_event; pl32->pl_flags = pl->pl_flags; pl32->pl_sigmask = pl->pl_sigmask; pl32->pl_siglist = pl->pl_siglist; siginfo_to_siginfo32(&pl->pl_siginfo, &pl32->pl_siginfo); strcpy(pl32->pl_tdname, pl->pl_tdname); pl32->pl_child_pid = pl->pl_child_pid; } #endif /* COMPAT_FREEBSD32 */ /* * Process debugging system call. */ #ifndef _SYS_SYSPROTO_H_ struct ptrace_args { int req; pid_t pid; caddr_t addr; int data; }; #endif #ifdef COMPAT_FREEBSD32 /* * This CPP subterfuge is to try and reduce the number of ifdefs in * the body of the code. * COPYIN(uap->addr, &r.reg, sizeof r.reg); * becomes either: * copyin(uap->addr, &r.reg, sizeof r.reg); * or * copyin(uap->addr, &r.reg32, sizeof r.reg32); * .. except this is done at runtime. */ #define COPYIN(u, k, s) wrap32 ? \ copyin(u, k ## 32, s ## 32) : \ copyin(u, k, s) #define COPYOUT(k, u, s) wrap32 ? \ copyout(k ## 32, u, s ## 32) : \ copyout(k, u, s) #else #define COPYIN(u, k, s) copyin(u, k, s) #define COPYOUT(k, u, s) copyout(k, u, s) #endif int sys_ptrace(struct thread *td, struct ptrace_args *uap) { /* * XXX this obfuscation is to reduce stack usage, but the register * structs may be too large to put on the stack anyway. */ union { struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; struct dbreg dbreg; struct fpreg fpreg; struct reg reg; #ifdef COMPAT_FREEBSD32 struct dbreg32 dbreg32; struct fpreg32 fpreg32; struct reg32 reg32; struct ptrace_io_desc32 piod32; struct ptrace_lwpinfo32 pl32; struct ptrace_vm_entry32 pve32; #endif } r; void *addr; int error = 0; #ifdef COMPAT_FREEBSD32 int wrap32 = 0; if (SV_CURPROC_FLAG(SV_ILP32)) wrap32 = 1; #endif AUDIT_ARG_PID(uap->pid); AUDIT_ARG_CMD(uap->req); AUDIT_ARG_VALUE(uap->data); addr = &r; switch (uap->req) { case PT_GETREGS: case PT_GETFPREGS: case PT_GETDBREGS: case PT_LWPINFO: break; case PT_SETREGS: error = COPYIN(uap->addr, &r.reg, sizeof r.reg); break; case PT_SETFPREGS: error = COPYIN(uap->addr, &r.fpreg, sizeof r.fpreg); break; case PT_SETDBREGS: error = COPYIN(uap->addr, &r.dbreg, sizeof r.dbreg); break; case PT_IO: error = COPYIN(uap->addr, &r.piod, sizeof r.piod); break; case PT_VM_ENTRY: error = COPYIN(uap->addr, &r.pve, sizeof r.pve); break; default: addr = uap->addr; break; } if (error) return (error); error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data); if (error) return (error); switch (uap->req) { case PT_VM_ENTRY: error = COPYOUT(&r.pve, uap->addr, sizeof r.pve); break; case PT_IO: error = COPYOUT(&r.piod, uap->addr, sizeof r.piod); break; case PT_GETREGS: error = COPYOUT(&r.reg, uap->addr, sizeof r.reg); break; case PT_GETFPREGS: error = COPYOUT(&r.fpreg, uap->addr, sizeof r.fpreg); break; case PT_GETDBREGS: error = COPYOUT(&r.dbreg, uap->addr, sizeof r.dbreg); break; case PT_LWPINFO: error = copyout(&r.pl, uap->addr, uap->data); break; } return (error); } #undef COPYIN #undef COPYOUT #ifdef COMPAT_FREEBSD32 /* * PROC_READ(regs, td2, addr); * becomes either: * proc_read_regs(td2, addr); * or * proc_read_regs32(td2, addr); * .. except this is done at runtime. There is an additional * complication in that PROC_WRITE disallows 32 bit consumers * from writing to 64 bit address space targets. */ #define PROC_READ(w, t, a) wrap32 ? \ proc_read_ ## w ## 32(t, a) : \ proc_read_ ## w (t, a) #define PROC_WRITE(w, t, a) wrap32 ? \ (safe ? proc_write_ ## w ## 32(t, a) : EINVAL ) : \ proc_write_ ## w (t, a) #else #define PROC_READ(w, t, a) proc_read_ ## w (t, a) #define PROC_WRITE(w, t, a) proc_write_ ## w (t, a) #endif int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) { struct iovec iov; struct uio uio; struct proc *curp, *p, *pp; struct thread *td2 = NULL, *td3; struct ptrace_io_desc *piod = NULL; struct ptrace_lwpinfo *pl; int error, write, tmp, num; int proctree_locked = 0; lwpid_t tid = 0, *buf; #ifdef COMPAT_FREEBSD32 int wrap32 = 0, safe = 0; struct ptrace_io_desc32 *piod32 = NULL; struct ptrace_lwpinfo32 *pl32 = NULL; struct ptrace_lwpinfo plr; #endif curp = td->td_proc; /* Lock proctree before locking the process. */ switch (req) { case PT_TRACE_ME: case PT_ATTACH: case PT_STEP: case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: case PT_FOLLOW_FORK: case PT_DETACH: sx_xlock(&proctree_lock); proctree_locked = 1; break; default: break; } write = 0; if (req == PT_TRACE_ME) { p = td->td_proc; PROC_LOCK(p); } else { if (pid <= PID_MAX) { if ((p = pfind(pid)) == NULL) { if (proctree_locked) sx_xunlock(&proctree_lock); return (ESRCH); } } else { td2 = tdfind(pid, -1); if (td2 == NULL) { if (proctree_locked) sx_xunlock(&proctree_lock); return (ESRCH); } p = td2->td_proc; tid = pid; pid = p->p_pid; } } AUDIT_ARG_PROCESS(p); if ((p->p_flag & P_WEXIT) != 0) { error = ESRCH; goto fail; } if ((error = p_cansee(td, p)) != 0) goto fail; if ((error = p_candebug(td, p)) != 0) goto fail; /* * System processes can't be debugged. */ if ((p->p_flag & P_SYSTEM) != 0) { error = EINVAL; goto fail; } if (tid == 0) { if ((p->p_flag & P_STOPPED_TRACE) != 0) { KASSERT(p->p_xthread != NULL, ("NULL p_xthread")); td2 = p->p_xthread; } else { td2 = FIRST_THREAD_IN_PROC(p); } tid = td2->td_tid; } #ifdef COMPAT_FREEBSD32 /* * Test if we're a 32 bit client and what the target is. * Set the wrap controls accordingly. */ if (SV_CURPROC_FLAG(SV_ILP32)) { if (SV_PROC_FLAG(td2->td_proc, SV_ILP32)) safe = 1; wrap32 = 1; } #endif /* * Permissions check */ switch (req) { case PT_TRACE_ME: /* Always legal. */ break; case PT_ATTACH: /* Self */ if (p->p_pid == td->td_proc->p_pid) { error = EINVAL; goto fail; } /* Already traced */ if (p->p_flag & P_TRACED) { error = EBUSY; goto fail; } /* Can't trace an ancestor if you're being traced. */ if (curp->p_flag & P_TRACED) { for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) { if (pp == p) { error = EINVAL; goto fail; } } } /* OK */ break; case PT_CLEARSTEP: /* Allow thread to clear single step for itself */ if (td->td_tid == tid) break; /* FALLTHROUGH */ default: /* not being traced... */ if ((p->p_flag & P_TRACED) == 0) { error = EPERM; goto fail; } /* not being traced by YOU */ if (p->p_pptr != td->td_proc) { error = EBUSY; goto fail; } /* not currently stopped */ if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) == 0 || p->p_suspcount != p->p_numthreads || (p->p_flag & P_WAITED) == 0) { error = EBUSY; goto fail; } if ((p->p_flag & P_STOPPED_TRACE) == 0) { static int count = 0; if (count++ == 0) printf("P_STOPPED_TRACE not set.\n"); } /* OK */ break; } /* Keep this process around until we finish this request. */ _PHOLD(p); #ifdef FIX_SSTEP /* * Single step fixup ala procfs */ FIX_SSTEP(td2); #endif /* * Actually do the requests */ td->td_retval[0] = 0; switch (req) { case PT_TRACE_ME: /* set my trace flag and "owner" so it can read/write me */ p->p_flag |= P_TRACED; if (p->p_flag & P_PPWAIT) p->p_flag |= P_PPTRACE; p->p_oppid = p->p_pptr->p_pid; break; case PT_ATTACH: /* security check done above */ /* * It would be nice if the tracing relationship was separate * from the parent relationship but that would require * another set of links in the proc struct or for "wait" * to scan the entire proc table. To make life easier, * we just re-parent the process we're trying to trace. * The old parent is remembered so we can put things back * on a "detach". */ p->p_flag |= P_TRACED; p->p_oppid = p->p_pptr->p_pid; if (p->p_pptr != td->td_proc) { proc_reparent(p, td->td_proc); } data = SIGSTOP; goto sendsig; /* in PT_CONTINUE below */ case PT_CLEARSTEP: error = ptrace_clear_single_step(td2); break; case PT_SETSTEP: error = ptrace_single_step(td2); break; case PT_SUSPEND: td2->td_dbgflags |= TDB_SUSPEND; thread_lock(td2); td2->td_flags |= TDF_NEEDSUSPCHK; thread_unlock(td2); break; case PT_RESUME: td2->td_dbgflags &= ~TDB_SUSPEND; break; case PT_FOLLOW_FORK: if (data) p->p_flag |= P_FOLLOWFORK; else p->p_flag &= ~P_FOLLOWFORK; break; case PT_STEP: case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: case PT_DETACH: /* Zero means do not send any signal */ if (data < 0 || data > _SIG_MAXSIG) { error = EINVAL; break; } switch (req) { case PT_STEP: error = ptrace_single_step(td2); if (error) goto out; break; case PT_CONTINUE: case PT_TO_SCE: case PT_TO_SCX: case PT_SYSCALL: if (addr != (void *)1) { error = ptrace_set_pc(td2, (u_long)(uintfptr_t)addr); if (error) goto out; } switch (req) { case PT_TO_SCE: p->p_stops |= S_PT_SCE; break; case PT_TO_SCX: p->p_stops |= S_PT_SCX; break; case PT_SYSCALL: p->p_stops |= S_PT_SCE | S_PT_SCX; break; } break; case PT_DETACH: /* reset process parent */ if (p->p_oppid != p->p_pptr->p_pid) { struct proc *pp; PROC_LOCK(p->p_pptr); sigqueue_take(p->p_ksi); PROC_UNLOCK(p->p_pptr); PROC_UNLOCK(p); pp = pfind(p->p_oppid); if (pp == NULL) pp = initproc; else PROC_UNLOCK(pp); PROC_LOCK(p); proc_reparent(p, pp); if (pp == initproc) p->p_sigparent = SIGCHLD; } p->p_oppid = 0; p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK); /* should we send SIGCHLD? */ /* childproc_continued(p); */ break; } sendsig: if (proctree_locked) { sx_xunlock(&proctree_lock); proctree_locked = 0; } p->p_xstat = data; p->p_xthread = NULL; if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) != 0) { /* deliver or queue signal */ td2->td_dbgflags &= ~TDB_XSIG; td2->td_xsig = data; if (req == PT_DETACH) { FOREACH_THREAD_IN_PROC(p, td3) td3->td_dbgflags &= ~TDB_SUSPEND; } /* * unsuspend all threads, to not let a thread run, * you should use PT_SUSPEND to suspend it before * continuing process. */ PROC_SLOCK(p); p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED); thread_unsuspend(p); PROC_SUNLOCK(p); if (req == PT_ATTACH) kern_psignal(p, data); } else { if (data) kern_psignal(p, data); } break; case PT_WRITE_I: case PT_WRITE_D: td2->td_dbgflags |= TDB_USERWR; write = 1; /* FALLTHROUGH */ case PT_READ_I: case PT_READ_D: PROC_UNLOCK(p); tmp = 0; /* write = 0 set above */ iov.iov_base = write ? (caddr_t)&data : (caddr_t)&tmp; iov.iov_len = sizeof(int); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(uintptr_t)addr; uio.uio_resid = sizeof(int); uio.uio_segflg = UIO_SYSSPACE; /* i.e.: the uap */ uio.uio_rw = write ? UIO_WRITE : UIO_READ; uio.uio_td = td; error = proc_rwmem(p, &uio); if (uio.uio_resid != 0) { /* * XXX proc_rwmem() doesn't currently return ENOSPC, * so I think write() can bogusly return 0. * XXX what happens for short writes? We don't want * to write partial data. * XXX proc_rwmem() returns EPERM for other invalid * addresses. Convert this to EINVAL. Does this * clobber returns of EPERM for other reasons? */ if (error == 0 || error == ENOSPC || error == EPERM) error = EINVAL; /* EOF */ } if (!write) td->td_retval[0] = tmp; PROC_LOCK(p); break; case PT_IO: #ifdef COMPAT_FREEBSD32 if (wrap32) { piod32 = addr; iov.iov_base = (void *)(uintptr_t)piod32->piod_addr; iov.iov_len = piod32->piod_len; uio.uio_offset = (off_t)(uintptr_t)piod32->piod_offs; uio.uio_resid = piod32->piod_len; } else #endif { piod = addr; iov.iov_base = piod->piod_addr; iov.iov_len = piod->piod_len; uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs; uio.uio_resid = piod->piod_len; } uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_USERSPACE; uio.uio_td = td; #ifdef COMPAT_FREEBSD32 tmp = wrap32 ? piod32->piod_op : piod->piod_op; #else tmp = piod->piod_op; #endif switch (tmp) { case PIOD_READ_D: case PIOD_READ_I: uio.uio_rw = UIO_READ; break; case PIOD_WRITE_D: case PIOD_WRITE_I: td2->td_dbgflags |= TDB_USERWR; uio.uio_rw = UIO_WRITE; break; default: error = EINVAL; goto out; } PROC_UNLOCK(p); error = proc_rwmem(p, &uio); #ifdef COMPAT_FREEBSD32 if (wrap32) piod32->piod_len -= uio.uio_resid; else #endif piod->piod_len -= uio.uio_resid; PROC_LOCK(p); break; case PT_KILL: data = SIGKILL; goto sendsig; /* in PT_CONTINUE above */ case PT_SETREGS: td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(regs, td2, addr); break; case PT_GETREGS: error = PROC_READ(regs, td2, addr); break; case PT_SETFPREGS: td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(fpregs, td2, addr); break; case PT_GETFPREGS: error = PROC_READ(fpregs, td2, addr); break; case PT_SETDBREGS: td2->td_dbgflags |= TDB_USERWR; error = PROC_WRITE(dbregs, td2, addr); break; case PT_GETDBREGS: error = PROC_READ(dbregs, td2, addr); break; case PT_LWPINFO: if (data <= 0 || #ifdef COMPAT_FREEBSD32 (!wrap32 && data > sizeof(*pl)) || (wrap32 && data > sizeof(*pl32))) { #else data > sizeof(*pl)) { #endif error = EINVAL; break; } #ifdef COMPAT_FREEBSD32 if (wrap32) { pl = &plr; pl32 = addr; } else #endif pl = addr; pl->pl_lwpid = td2->td_tid; pl->pl_event = PL_EVENT_NONE; pl->pl_flags = 0; if (td2->td_dbgflags & TDB_XSIG) { pl->pl_event = PL_EVENT_SIGNAL; if (td2->td_dbgksi.ksi_signo != 0 && #ifdef COMPAT_FREEBSD32 ((!wrap32 && data >= offsetof(struct ptrace_lwpinfo, pl_siginfo) + sizeof(pl->pl_siginfo)) || (wrap32 && data >= offsetof(struct ptrace_lwpinfo32, pl_siginfo) + sizeof(struct siginfo32))) #else data >= offsetof(struct ptrace_lwpinfo, pl_siginfo) + sizeof(pl->pl_siginfo) #endif ){ pl->pl_flags |= PL_FLAG_SI; pl->pl_siginfo = td2->td_dbgksi.ksi_info; } } if ((pl->pl_flags & PL_FLAG_SI) == 0) bzero(&pl->pl_siginfo, sizeof(pl->pl_siginfo)); if (td2->td_dbgflags & TDB_SCE) pl->pl_flags |= PL_FLAG_SCE; else if (td2->td_dbgflags & TDB_SCX) pl->pl_flags |= PL_FLAG_SCX; if (td2->td_dbgflags & TDB_EXEC) pl->pl_flags |= PL_FLAG_EXEC; if (td2->td_dbgflags & TDB_FORK) { pl->pl_flags |= PL_FLAG_FORKED; pl->pl_child_pid = td2->td_dbg_forked; } if (td2->td_dbgflags & TDB_CHILD) pl->pl_flags |= PL_FLAG_CHILD; pl->pl_sigmask = td2->td_sigmask; pl->pl_siglist = td2->td_siglist; strcpy(pl->pl_tdname, td2->td_name); #ifdef COMPAT_FREEBSD32 if (wrap32) ptrace_lwpinfo_to32(pl, pl32); #endif break; case PT_GETNUMLWPS: td->td_retval[0] = p->p_numthreads; break; case PT_GETLWPLIST: if (data <= 0) { error = EINVAL; break; } num = imin(p->p_numthreads, data); PROC_UNLOCK(p); buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK); tmp = 0; PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td2) { if (tmp >= num) break; buf[tmp++] = td2->td_tid; } PROC_UNLOCK(p); error = copyout(buf, addr, tmp * sizeof(lwpid_t)); free(buf, M_TEMP); if (!error) td->td_retval[0] = tmp; PROC_LOCK(p); break; case PT_VM_TIMESTAMP: td->td_retval[0] = p->p_vmspace->vm_map.timestamp; break; case PT_VM_ENTRY: PROC_UNLOCK(p); #ifdef COMPAT_FREEBSD32 if (wrap32) error = ptrace_vm_entry32(td, p, addr); else #endif error = ptrace_vm_entry(td, p, addr); PROC_LOCK(p); break; default: #ifdef __HAVE_PTRACE_MACHDEP if (req >= PT_FIRSTMACH) { PROC_UNLOCK(p); error = cpu_ptrace(td2, req, addr, data); PROC_LOCK(p); } else #endif /* Unknown request. */ error = EINVAL; break; } out: /* Drop our hold on this process now that the request has completed. */ _PRELE(p); fail: PROC_UNLOCK(p); if (proctree_locked) sx_xunlock(&proctree_lock); return (error); } #undef PROC_READ #undef PROC_WRITE /* * Stop a process because of a debugging event; * stay stopped until p->p_step is cleared * (cleared by PIOCCONT in procfs). */ void stopevent(struct proc *p, unsigned int event, unsigned int val) { PROC_LOCK_ASSERT(p, MA_OWNED); p->p_step = 1; do { p->p_xstat = val; p->p_xthread = NULL; p->p_stype = event; /* Which event caused the stop? */ wakeup(&p->p_stype); /* Wake up any PIOCWAIT'ing procs */ msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0); } while (p->p_step); } diff --git a/sys/mips/adm5120/adm5120_machdep.c b/sys/mips/adm5120/adm5120_machdep.c index 29079c804cbf..2713618607eb 100644 --- a/sys/mips/adm5120/adm5120_machdep.c +++ b/sys/mips/adm5120/adm5120_machdep.c @@ -1,145 +1,144 @@ /*- * Copyright (C) 2007 by Oleksandr Tymoshenko. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int *edata; extern int *end; void platform_cpu_init() { /* Nothing special */ } static void mips_init(void) { int i; printf("entry: mips_init()\n"); bootverbose = 1; realmem = btoc(16 << 20); for (i = 0; i < 10; i++) { phys_avail[i] = 0; } /* phys_avail regions are in bytes */ phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); dump_avail[0] = phys_avail[0]; dump_avail[1] = phys_avail[1]; physmem = realmem; init_param1(); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } void platform_reset(void) { __asm __volatile("li $25, 0xbfc00000"); __asm __volatile("j $25"); } void platform_start(__register_t a0 __unused, __register_t a1 __unused, __register_t a2 __unused, __register_t a3 __unused) { vm_offset_t kernend; uint64_t platform_counter_freq = 175 * 1000 * 1000; /* clear the BSS and SBSS segments */ kernend = (vm_offset_t)&end; memset(&edata, 0, kernend - (vm_offset_t)(&edata)); mips_postboot_fixup(); /* Initialize pcpu stuff */ mips_pcpu0_init(); cninit(); mips_init(); mips_timer_init_params(platform_counter_freq, 0); } diff --git a/sys/mips/alchemy/alchemy_machdep.c b/sys/mips/alchemy/alchemy_machdep.c index 0593f83cddd2..d7f242ae0ad9 100644 --- a/sys/mips/alchemy/alchemy_machdep.c +++ b/sys/mips/alchemy/alchemy_machdep.c @@ -1,149 +1,148 @@ /*- * Copyright (C) 2007 by Oleksandr Tymoshenko. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int *edata; extern int *end; void platform_cpu_init() { /* Nothing special */ } static void mips_init(void) { int i; printf("entry: mips_init()\n"); bootverbose = 1; realmem = btoc(16 << 20); for (i = 0; i < 10; i++) { phys_avail[i] = 0; } /* phys_avail regions are in bytes */ phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); dump_avail[0] = phys_avail[0]; dump_avail[1] = phys_avail[1]; physmem = realmem; init_param1(); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } void platform_reset(void) { __asm __volatile("li $25, 0xbfc00000"); __asm __volatile("j $25"); } void platform_start(__register_t a0 __unused, __register_t a1 __unused, __register_t a2 __unused, __register_t a3 __unused) { vm_offset_t kernend; uint64_t platform_counter_freq = 175 * 1000 * 1000; /* clear the BSS and SBSS segments */ kernend = (vm_offset_t)&end; memset(&edata, 0, kernend - (vm_offset_t)(&edata)); mips_postboot_fixup(); /* Initialize pcpu stuff */ mips_pcpu0_init(); cninit(); mips_init(); /* Set counter_freq for tick_init_params() */ platform_counter_freq = 175 * 1000 * 1000; mips_timer_init_params(platform_counter_freq, 0); } diff --git a/sys/mips/beri/beri_machdep.c b/sys/mips/beri/beri_machdep.c index 98fcb85aa3aa..f36b0ea1bb12 100644 --- a/sys/mips/beri/beri_machdep.c +++ b/sys/mips/beri/beri_machdep.c @@ -1,209 +1,208 @@ /*- * Copyright (c) 2006 Wojciech A. Koszek * Copyright (c) 2012 Robert N. M. Watson * All rights reserved. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) * ("CTSRD"), as part of the DARPA CRASH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #endif #include #include #include -#include #include #include #include #include #include #include #include #include extern int *edata; extern int *end; void platform_cpu_init() { /* Nothing special */ } static void mips_init(void) { int i; for (i = 0; i < 10; i++) { phys_avail[i] = 0; } /* phys_avail regions are in bytes */ phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); dump_avail[0] = phys_avail[0]; dump_avail[1] = phys_avail[1]; physmem = realmem; init_param1(); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } /* * Perform a board-level soft-reset. * * XXXRW: BERI doesn't yet have a board-level soft-reset. */ void platform_reset(void) { panic("%s: not yet", __func__); } void platform_start(__register_t a0, __register_t a1, __register_t a2, __register_t a3) { vm_offset_t kernend; uint64_t platform_counter_freq; int argc = a0; char **argv = (char **)a1; char **envp = (char **)a2; unsigned int memsize = a3; #ifdef FDT vm_offset_t dtbp; void *kmdp; #endif int i; /* clear the BSS and SBSS segments */ kernend = (vm_offset_t)&end; memset(&edata, 0, kernend - (vm_offset_t)(&edata)); mips_postboot_fixup(); mips_pcpu0_init(); #ifdef FDT /* * Find the dtb passed in by the boot loader (currently fictional). */ kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); else dtbp = (vm_offset_t)NULL; #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #else #error "Non-static FDT not yet supported on BERI" #endif if (OF_install(OFW_FDT, 0) == FALSE) while (1); if (OF_init(&fdt_static_dtb) != 0) while (1); #endif /* * XXXRW: We have no way to compare wallclock time to cycle rate on * BERI, so for now assume we run at the MALTA default (100MHz). */ platform_counter_freq = MIPS_DEFAULT_HZ; mips_timer_early_init(platform_counter_freq); cninit(); printf("entry: platform_start()\n"); bootverbose = 1; if (bootverbose) { printf("cmd line: "); for (i = 0; i < argc; i++) printf("%s ", argv[i]); printf("\n"); printf("envp:\n"); for (i = 0; envp[i]; i += 2) printf("\t%s = %s\n", envp[i], envp[i+1]); printf("memsize = %08x\n", memsize); } realmem = btoc(memsize); mips_init(); mips_timer_init_params(platform_counter_freq, 0); } diff --git a/sys/mips/cavium/octeon_machdep.c b/sys/mips/cavium/octeon_machdep.c index fcf3ab66da14..53a84427a0a6 100644 --- a/sys/mips/cavium/octeon_machdep.c +++ b/sys/mips/cavium/octeon_machdep.c @@ -1,663 +1,662 @@ /*- * Copyright (c) 2006 Wojciech A. Koszek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__mips_n64) #define MAX_APP_DESC_ADDR 0xffffffffafffffff #else #define MAX_APP_DESC_ADDR 0xafffffff #endif struct octeon_feature_description { octeon_feature_t ofd_feature; const char *ofd_string; }; extern int *end; extern char cpu_model[]; extern char cpu_board[]; static const struct octeon_feature_description octeon_feature_descriptions[] = { { OCTEON_FEATURE_SAAD, "SAAD" }, { OCTEON_FEATURE_ZIP, "ZIP" }, { OCTEON_FEATURE_CRYPTO, "CRYPTO" }, { OCTEON_FEATURE_DORM_CRYPTO, "DORM_CRYPTO" }, { OCTEON_FEATURE_PCIE, "PCIE" }, { OCTEON_FEATURE_SRIO, "SRIO" }, { OCTEON_FEATURE_KEY_MEMORY, "KEY_MEMORY" }, { OCTEON_FEATURE_LED_CONTROLLER, "LED_CONTROLLER" }, { OCTEON_FEATURE_TRA, "TRA" }, { OCTEON_FEATURE_MGMT_PORT, "MGMT_PORT" }, { OCTEON_FEATURE_RAID, "RAID" }, { OCTEON_FEATURE_USB, "USB" }, { OCTEON_FEATURE_NO_WPTR, "NO_WPTR" }, { OCTEON_FEATURE_DFA, "DFA" }, { OCTEON_FEATURE_MDIO_CLAUSE_45, "MDIO_CLAUSE_45" }, { OCTEON_FEATURE_NPEI, "NPEI" }, { OCTEON_FEATURE_ILK, "ILK" }, { OCTEON_FEATURE_HFA, "HFA" }, { OCTEON_FEATURE_DFM, "DFM" }, { OCTEON_FEATURE_CIU2, "CIU2" }, { OCTEON_FEATURE_DICI_MODE, "DICI_MODE" }, { OCTEON_FEATURE_BIT_EXTRACTOR, "BIT_EXTRACTOR" }, { OCTEON_FEATURE_NAND, "NAND" }, { OCTEON_FEATURE_MMC, "MMC" }, { OCTEON_FEATURE_PKND, "PKND" }, { OCTEON_FEATURE_CN68XX_WQE, "CN68XX_WQE" }, { 0, NULL } }; static uint64_t octeon_get_ticks(void); static unsigned octeon_get_timecount(struct timecounter *tc); static void octeon_boot_params_init(register_t ptr); static struct timecounter octeon_timecounter = { octeon_get_timecount, /* get_timecount */ 0, /* no poll_pps */ 0xffffffffu, /* octeon_mask */ 0, /* frequency */ "Octeon", /* name */ 900, /* quality (adjusted in code) */ }; void platform_cpu_init() { /* Nothing special yet */ } /* * Perform a board-level soft-reset. */ void platform_reset(void) { cvmx_write_csr(CVMX_CIU_SOFT_RST, 1); } /* * octeon_debug_symbol * * Does nothing. * Used to mark the point for simulator to begin tracing */ void octeon_debug_symbol(void) { } /* * octeon_ciu_reset * * Shutdown all CIU to IP2, IP3 mappings */ void octeon_ciu_reset(void) { uint64_t cvmctl; /* Disable all CIU interrupts by default */ cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num()*2), 0); cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num()*2+1), 0); cvmx_write_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num()*2), 0); cvmx_write_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num()*2+1), 0); #ifdef SMP /* Enable the MBOX interrupts. */ cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num()*2+1), (1ull << (OCTEON_IRQ_MBOX0 - 8)) | (1ull << (OCTEON_IRQ_MBOX1 - 8))); #endif /* * Move the Performance Counter interrupt to OCTEON_PMC_IRQ */ cvmctl = mips_rd_cvmctl(); cvmctl &= ~(7 << 7); cvmctl |= (OCTEON_PMC_IRQ + 2) << 7; mips_wr_cvmctl(cvmctl); } static void octeon_memory_init(void) { vm_paddr_t phys_end; int64_t addr; unsigned i, j; phys_end = round_page(MIPS_KSEG0_TO_PHYS((vm_offset_t)&end)); if (cvmx_sysinfo_get()->board_type == CVMX_BOARD_TYPE_SIM) { /* Simulator we limit to 96 meg */ phys_avail[0] = phys_end; phys_avail[1] = 96 << 20; dump_avail[0] = phys_avail[0]; dump_avail[1] = phys_avail[1]; realmem = physmem = btoc(phys_avail[1] - phys_avail[0]); return; } /* * Allocate memory from bootmem 1MB at a time and merge * adjacent entries. */ i = 0; while (i < PHYS_AVAIL_ENTRIES) { /* * If there is less than 2MB of memory available in 128-byte * blocks, do not steal any more memory. We need to leave some * memory for the command queues to be allocated out of. */ if (cvmx_bootmem_available_mem(128) < 2 << 20) break; addr = cvmx_bootmem_phy_alloc(1 << 20, phys_end, ~(vm_paddr_t)0, PAGE_SIZE, 0); if (addr == -1) break; /* * The SDK needs to be able to easily map any memory that might * come to it e.g. in the form of an mbuf. Because on !n64 we * can't direct-map some addresses and we don't want to manage * temporary mappings within the SDK, don't feed memory that * can't be direct-mapped to the kernel. */ #if !defined(__mips_n64) if (!MIPS_DIRECT_MAPPABLE(addr + (1 << 20) - 1)) continue; #endif physmem += btoc(1 << 20); if (i > 0 && phys_avail[i - 1] == addr) { phys_avail[i - 1] += 1 << 20; continue; } phys_avail[i + 0] = addr; phys_avail[i + 1] = addr + (1 << 20); i += 2; } for (j = 0; j < i; j++) dump_avail[j] = phys_avail[j]; realmem = physmem; } void platform_start(__register_t a0, __register_t a1, __register_t a2 __unused, __register_t a3) { const struct octeon_feature_description *ofd; uint64_t platform_counter_freq; int rv; mips_postboot_fixup(); /* * Initialize boot parameters so that we can determine things like * which console we shoud use, etc. */ octeon_boot_params_init(a3); /* Initialize pcpu stuff */ mips_pcpu0_init(); mips_timer_early_init(cvmx_sysinfo_get()->cpu_clock_hz); /* Initialize console. */ cninit(); /* * Display information about the CPU. */ #if !defined(OCTEON_MODEL) printf("Using runtime CPU model checks.\n"); #else printf("Compiled for CPU model: " __XSTRING(OCTEON_MODEL) "\n"); #endif strcpy(cpu_model, octeon_model_get_string(cvmx_get_proc_id())); printf("CPU Model: %s\n", cpu_model); printf("CPU clock: %uMHz Core Mask: %#x\n", cvmx_sysinfo_get()->cpu_clock_hz / 1000000, cvmx_sysinfo_get()->core_mask); rv = octeon_model_version_check(cvmx_get_proc_id()); if (rv == -1) panic("%s: kernel not compatible with this processor.", __func__); /* * Display information about the board. */ #if defined(OCTEON_BOARD_CAPK_0100ND) strcpy(cpu_board, "CAPK-0100ND"); if (cvmx_sysinfo_get()->board_type != CVMX_BOARD_TYPE_CN3010_EVB_HS5) { panic("Compiled for %s, but board type is %s.", cpu_board, cvmx_board_type_to_string(cvmx_sysinfo_get()->board_type)); } #else strcpy(cpu_board, cvmx_board_type_to_string(cvmx_sysinfo_get()->board_type)); #endif printf("Board: %s\n", cpu_board); printf("Board Type: %u Revision: %u/%u\n", cvmx_sysinfo_get()->board_type, cvmx_sysinfo_get()->board_rev_major, cvmx_sysinfo_get()->board_rev_minor); printf("Serial number: %s\n", cvmx_sysinfo_get()->board_serial_number); /* * Additional on-chip hardware/settings. * * XXX Display PCI host/target? What else? */ printf("MAC address base: %6D (%u configured)\n", cvmx_sysinfo_get()->mac_addr_base, ":", cvmx_sysinfo_get()->mac_addr_count); octeon_ciu_reset(); /* * XXX * We can certainly parse command line arguments or U-Boot environment * to determine whether to bootverbose / single user / ... I think * stass has patches to add support for loader things to U-Boot even. */ bootverbose = 1; /* * For some reason on the cn38xx simulator ebase register is set to * 0x80001000 at bootup time. Move it back to the default, but * when we move to having support for multiple executives, we need * to rethink this. */ mips_wr_ebase(0x80000000); octeon_memory_init(); init_param1(); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif cpu_clock = cvmx_sysinfo_get()->cpu_clock_hz; platform_counter_freq = cpu_clock; octeon_timecounter.tc_frequency = cpu_clock; platform_timecounter = &octeon_timecounter; mips_timer_init_params(platform_counter_freq, 0); set_cputicker(octeon_get_ticks, cpu_clock, 0); #ifdef SMP /* * Clear any pending IPIs. */ cvmx_write_csr(CVMX_CIU_MBOX_CLRX(0), 0xffffffff); #endif printf("Octeon SDK: %s\n", OCTEON_SDK_VERSION_STRING); printf("Available Octeon features:"); for (ofd = octeon_feature_descriptions; ofd->ofd_string != NULL; ofd++) if (octeon_has_feature(ofd->ofd_feature)) printf(" %s", ofd->ofd_string); printf("\n"); } static uint64_t octeon_get_ticks(void) { uint64_t cvmcount; CVMX_MF_CYCLE(cvmcount); return (cvmcount); } static unsigned octeon_get_timecount(struct timecounter *tc) { return ((unsigned)octeon_get_ticks()); } static int sysctl_machdep_led_display(SYSCTL_HANDLER_ARGS) { size_t buflen; char buf[9]; int error; if (req->newptr == NULL) return (EINVAL); if (cvmx_sysinfo_get()->led_display_base_addr == 0) return (ENODEV); /* * Revision 1.x of the EBT3000 only supports 4 characters, but * other devices support 8. */ if (cvmx_sysinfo_get()->board_type == CVMX_BOARD_TYPE_EBT3000 && cvmx_sysinfo_get()->board_rev_major == 1) buflen = 4; else buflen = 8; if (req->newlen > buflen) return (E2BIG); error = SYSCTL_IN(req, buf, req->newlen); if (error != 0) return (error); buf[req->newlen] = '\0'; ebt3000_str_write(buf); return (0); } SYSCTL_PROC(_machdep, OID_AUTO, led_display, CTLTYPE_STRING | CTLFLAG_WR, NULL, 0, sysctl_machdep_led_display, "A", "String to display on LED display"); void cvmx_dvprintf(const char *fmt, va_list ap) { if (!bootverbose) return; vprintf(fmt, ap); } void cvmx_dprintf(const char *fmt, ...) { va_list ap; va_start(ap, fmt); cvmx_dvprintf(fmt, ap); va_end(ap); } /** * version of printf that works better in exception context. * * @param format * * XXX If this function weren't in cvmx-interrupt.c, we'd use the SDK version. */ void cvmx_safe_printf(const char *format, ...) { char buffer[256]; char *ptr = buffer; int count; va_list args; va_start(args, format); #ifndef __U_BOOT__ count = vsnprintf(buffer, sizeof(buffer), format, args); #else count = vsprintf(buffer, format, args); #endif va_end(args); while (count-- > 0) { cvmx_uart_lsr_t lsrval; /* Spin until there is room */ do { lsrval.u64 = cvmx_read_csr(CVMX_MIO_UARTX_LSR(0)); #if !defined(CONFIG_OCTEON_SIM_SPEED) if (lsrval.s.temt == 0) cvmx_wait(10000); /* Just to reduce the load on the system */ #endif } while (lsrval.s.temt == 0); if (*ptr == '\n') cvmx_write_csr(CVMX_MIO_UARTX_THR(0), '\r'); cvmx_write_csr(CVMX_MIO_UARTX_THR(0), *ptr++); } } /* impSTART: This stuff should move back into the Cavium SDK */ /* **************************************************************************************** * * APP/BOOT DESCRIPTOR STUFF * **************************************************************************************** */ /* Define the struct that is initialized by the bootloader used by the * startup code. * * Copyright (c) 2004, 2005, 2006 Cavium Networks. * * The authors hereby grant permission to use, copy, modify, distribute, * and license this software and its documentation for any purpose, provided * that existing copyright notices are retained in all copies and that this * notice is included verbatim in any distributions. No written agreement, * license, or royalty fee is required for any of the authorized uses. * Modifications to this software may be copyrighted by their authors * and need not follow the licensing terms described here, provided that * the new terms are clearly indicated on the first page of each file where * they apply. */ #define OCTEON_CURRENT_DESC_VERSION 6 #define OCTEON_ARGV_MAX_ARGS (64) #define OCTOEN_SERIAL_LEN 20 typedef struct { /* Start of block referenced by assembly code - do not change! */ uint32_t desc_version; uint32_t desc_size; uint64_t stack_top; uint64_t heap_base; uint64_t heap_end; uint64_t entry_point; /* Only used by bootloader */ uint64_t desc_vaddr; /* End of This block referenced by assembly code - do not change! */ uint32_t exception_base_addr; uint32_t stack_size; uint32_t heap_size; uint32_t argc; /* Argc count for application */ uint32_t argv[OCTEON_ARGV_MAX_ARGS]; uint32_t flags; uint32_t core_mask; uint32_t dram_size; /**< DRAM size in megabyes */ uint32_t phy_mem_desc_addr; /**< physical address of free memory descriptor block*/ uint32_t debugger_flags_base_addr; /**< used to pass flags from app to debugger */ uint32_t eclock_hz; /**< CPU clock speed, in hz */ uint32_t dclock_hz; /**< DRAM clock speed, in hz */ uint32_t spi_clock_hz; /**< SPI4 clock in hz */ uint16_t board_type; uint8_t board_rev_major; uint8_t board_rev_minor; uint16_t chip_type; uint8_t chip_rev_major; uint8_t chip_rev_minor; char board_serial_number[OCTOEN_SERIAL_LEN]; uint8_t mac_addr_base[6]; uint8_t mac_addr_count; uint64_t cvmx_desc_vaddr; } octeon_boot_descriptor_t; static cvmx_bootinfo_t * octeon_process_app_desc_ver_6(octeon_boot_descriptor_t *app_desc_ptr) { cvmx_bootinfo_t *octeon_bootinfo; /* XXX Why is 0x00000000ffffffffULL a bad value? */ if (app_desc_ptr->cvmx_desc_vaddr == 0 || app_desc_ptr->cvmx_desc_vaddr == 0xfffffffful) { cvmx_safe_printf("Bad octeon_bootinfo %#jx\n", (uintmax_t)app_desc_ptr->cvmx_desc_vaddr); return (NULL); } octeon_bootinfo = cvmx_phys_to_ptr(app_desc_ptr->cvmx_desc_vaddr); if (octeon_bootinfo->major_version != 1) { cvmx_safe_printf("Incompatible CVMX descriptor from bootloader: %d.%d %p\n", (int) octeon_bootinfo->major_version, (int) octeon_bootinfo->minor_version, octeon_bootinfo); return (NULL); } cvmx_sysinfo_minimal_initialize(octeon_bootinfo->phy_mem_desc_addr, octeon_bootinfo->board_type, octeon_bootinfo->board_rev_major, octeon_bootinfo->board_rev_minor, octeon_bootinfo->eclock_hz); memcpy(cvmx_sysinfo_get()->mac_addr_base, octeon_bootinfo->mac_addr_base, 6); cvmx_sysinfo_get()->mac_addr_count = octeon_bootinfo->mac_addr_count; cvmx_sysinfo_get()->compact_flash_common_base_addr = octeon_bootinfo->compact_flash_common_base_addr; cvmx_sysinfo_get()->compact_flash_attribute_base_addr = octeon_bootinfo->compact_flash_attribute_base_addr; cvmx_sysinfo_get()->core_mask = octeon_bootinfo->core_mask; cvmx_sysinfo_get()->led_display_base_addr = octeon_bootinfo->led_display_base_addr; memcpy(cvmx_sysinfo_get()->board_serial_number, octeon_bootinfo->board_serial_number, sizeof cvmx_sysinfo_get()->board_serial_number); return (octeon_bootinfo); } static void octeon_boot_params_init(register_t ptr) { octeon_boot_descriptor_t *app_desc_ptr; cvmx_bootinfo_t *octeon_bootinfo; if (ptr == 0 || ptr >= MAX_APP_DESC_ADDR) { cvmx_safe_printf("app descriptor passed at invalid address %#jx\n", (uintmax_t)ptr); platform_reset(); } app_desc_ptr = (octeon_boot_descriptor_t *)(intptr_t)ptr; if (app_desc_ptr->desc_version < 6) { cvmx_safe_printf("Your boot code is too old to be supported.\n"); platform_reset(); } octeon_bootinfo = octeon_process_app_desc_ver_6(app_desc_ptr); if (octeon_bootinfo == NULL) { cvmx_safe_printf("Could not parse boot descriptor.\n"); platform_reset(); } if (cvmx_sysinfo_get()->led_display_base_addr != 0) { /* * Revision 1.x of the EBT3000 only supports 4 characters, but * other devices support 8. */ if (cvmx_sysinfo_get()->board_type == CVMX_BOARD_TYPE_EBT3000 && cvmx_sysinfo_get()->board_rev_major == 1) ebt3000_str_write("FBSD"); else ebt3000_str_write("FreeBSD!"); } if (cvmx_sysinfo_get()->phy_mem_desc_addr == (uint64_t)0) { cvmx_safe_printf("Your boot loader did not supply a memory descriptor.\n"); platform_reset(); } cvmx_bootmem_init(cvmx_sysinfo_get()->phy_mem_desc_addr); octeon_feature_init(); __cvmx_helper_cfg_init(); } /* impEND: This stuff should move back into the Cavium SDK */ diff --git a/sys/mips/gxemul/gxemul_machdep.c b/sys/mips/gxemul/gxemul_machdep.c index 8996919d3e6a..a068efc7b4ed 100644 --- a/sys/mips/gxemul/gxemul_machdep.c +++ b/sys/mips/gxemul/gxemul_machdep.c @@ -1,238 +1,237 @@ /*- * Copyright (c) 2006 Wojciech A. Koszek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #ifdef SMP #include #include #endif #include extern int *edata; extern int *end; void platform_cpu_init() { /* Nothing special */ } static void mips_init(void) { int i; for (i = 0; i < 10; i++) { phys_avail[i] = 0; } /* phys_avail regions are in bytes */ phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); dump_avail[0] = phys_avail[0]; dump_avail[1] = phys_avail[1]; physmem = realmem; init_param1(); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } /* * Perform a board-level soft-reset. * * XXXRW: Does gxemul have a moral equivalent to board-level reset? */ void platform_reset(void) { panic("%s: not yet", __func__); } void platform_start(__register_t a0, __register_t a1, __register_t a2, __register_t a3) { vm_offset_t kernend; uint64_t platform_counter_freq; int argc = a0; char **argv = (char **)a1; char **envp = (char **)a2; int i; /* clear the BSS and SBSS segments */ kernend = (vm_offset_t)&end; memset(&edata, 0, kernend - (vm_offset_t)(&edata)); mips_postboot_fixup(); mips_pcpu0_init(); /* * XXXRW: Support for the gxemul real-time clock required in order to * usefully determine our emulated timer frequency. Go with something * classic as the default in the mean time. */ platform_counter_freq = MIPS_DEFAULT_HZ; mips_timer_early_init(platform_counter_freq); cninit(); printf("entry: platform_start()\n"); bootverbose = 1; if (bootverbose) { printf("cmd line: "); for (i = 0; i < argc; i++) printf("%s ", argv[i]); printf("\n"); if (envp != NULL) { printf("envp:\n"); for (i = 0; envp[i]; i += 2) printf("\t%s = %s\n", envp[i], envp[i+1]); } else { printf("no envp.\n"); } } realmem = btoc(GXEMUL_MP_DEV_READ(GXEMUL_MP_DEV_MEMORY)); mips_init(); mips_timer_init_params(platform_counter_freq, 0); } #ifdef SMP void platform_ipi_send(int cpuid) { GXEMUL_MP_DEV_WRITE(GXEMUL_MP_DEV_IPI_ONE, (1 << 16) | cpuid); } void platform_ipi_clear(void) { GXEMUL_MP_DEV_WRITE(GXEMUL_MP_DEV_IPI_READ, 0); } int platform_ipi_intrnum(void) { return (GXEMUL_MP_DEV_IPI_INTERRUPT - 2); } struct cpu_group * platform_smp_topo(void) { return (smp_topo_none()); } void platform_init_ap(int cpuid) { int ipi_int_mask, clock_int_mask; /* * Unmask the clock and ipi interrupts. */ clock_int_mask = hard_int_mask(5); ipi_int_mask = hard_int_mask(platform_ipi_intrnum()); set_intr_mask(ipi_int_mask | clock_int_mask); } void platform_cpu_mask(cpuset_t *mask) { unsigned i, n; n = GXEMUL_MP_DEV_READ(GXEMUL_MP_DEV_NCPUS); CPU_ZERO(mask); for (i = 0; i < n; i++) CPU_SET(i, mask); } int platform_processor_id(void) { return (GXEMUL_MP_DEV_READ(GXEMUL_MP_DEV_WHOAMI)); } int platform_start_ap(int cpuid) { GXEMUL_MP_DEV_WRITE(GXEMUL_MP_DEV_STARTADDR, (intptr_t)mpentry); GXEMUL_MP_DEV_WRITE(GXEMUL_MP_DEV_START, cpuid); return (0); } #endif /* SMP */ diff --git a/sys/mips/idt/idt_machdep.c b/sys/mips/idt/idt_machdep.c index ac18748f5672..b48931987d29 100644 --- a/sys/mips/idt/idt_machdep.c +++ b/sys/mips/idt/idt_machdep.c @@ -1,179 +1,178 @@ /*- * Copyright (C) 2007 by Oleksandr Tymoshenko. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * $Id: $ * */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int *edata; extern int *end; void platform_cpu_init() { /* Nothing special */ } void platform_reset(void) { volatile unsigned int * p = (void *)0xb8008000; /* * TODO: we should take care of TLB stuff here. Otherwise * board does not boots properly next time */ /* Write 0x8000_0001 to the Reset register */ *p = 0x80000001; __asm __volatile("li $25, 0xbfc00000"); __asm __volatile("j $25"); } void platform_start(__register_t a0, __register_t a1, __register_t a2 __unused, __register_t a3 __unused) { uint64_t platform_counter_freq; vm_offset_t kernend; int argc = a0; char **argv = (char **)a1; int i, mem; /* clear the BSS and SBSS segments */ kernend = (vm_offset_t)&end; memset(&edata, 0, kernend - (vm_offset_t)(&edata)); mips_postboot_fixup(); /* Initialize pcpu stuff */ mips_pcpu0_init(); /* * Looking for mem=XXM argument */ mem = 0; /* Just something to start with */ for (i=0; i < argc; i++) { if (strncmp(argv[i], "mem=", 4) == 0) { mem = strtol(argv[i] + 4, NULL, 0); break; } } bootverbose = 1; if (mem > 0) realmem = btoc(mem << 20); else realmem = btoc(32 << 20); for (i = 0; i < 10; i++) { phys_avail[i] = 0; } /* phys_avail regions are in bytes */ phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); dump_avail[0] = phys_avail[0]; dump_avail[1] = phys_avail[1]; physmem = realmem; /* * ns8250 uart code uses DELAY so ticker should be inititalized * before cninit. And tick_init_params refers to hz, so * init_param1 * should be called first. */ init_param1(); /* TODO: parse argc,argv */ platform_counter_freq = 330000000UL; mips_timer_init_params(platform_counter_freq, 1); cninit(); /* Panic here, after cninit */ if (mem == 0) panic("No mem=XX parameter in arguments"); printf("cmd line: "); for (i=0; i < argc; i++) printf("%s ", argv[i]); printf("\n"); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } diff --git a/sys/mips/malta/malta_machdep.c b/sys/mips/malta/malta_machdep.c index 8efb98778b60..27cfa9f664cb 100644 --- a/sys/mips/malta/malta_machdep.c +++ b/sys/mips/malta/malta_machdep.c @@ -1,304 +1,303 @@ /*- * Copyright (c) 2006 Wojciech A. Koszek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #ifdef TICK_USE_YAMON_FREQ #include #endif #ifdef TICK_USE_MALTA_RTC #include #include #include #endif #include extern int *edata; extern int *end; void lcd_init(void); void lcd_puts(char *); void malta_reset(void); /* * Offsets to MALTA LCD characters. */ static int malta_lcd_offs[] = { MALTA_ASCIIPOS0, MALTA_ASCIIPOS1, MALTA_ASCIIPOS2, MALTA_ASCIIPOS3, MALTA_ASCIIPOS4, MALTA_ASCIIPOS5, MALTA_ASCIIPOS6, MALTA_ASCIIPOS7 }; void platform_cpu_init() { /* Nothing special */ } /* * Put character to Malta LCD at given position. */ static void malta_lcd_putc(int pos, char c) { void *addr; char *ch; if (pos < 0 || pos > 7) return; addr = (void *)(MALTA_ASCII_BASE + malta_lcd_offs[pos]); ch = (char *)MIPS_PHYS_TO_KSEG0(addr); *ch = c; } /* * Print given string on LCD. */ static void malta_lcd_print(char *str) { int i; if (str == NULL) return; for (i = 0; *str != '\0'; i++, str++) malta_lcd_putc(i, *str); } void lcd_init(void) { malta_lcd_print("FreeBSD_"); } void lcd_puts(char *s) { malta_lcd_print(s); } #ifdef TICK_USE_MALTA_RTC static __inline uint8_t rtcin(uint8_t addr) { *((volatile uint8_t *) MIPS_PHYS_TO_KSEG1(MALTA_PCI0_ADDR(MALTA_RTCADR))) = addr; return (*((volatile uint8_t *) MIPS_PHYS_TO_KSEG1(MALTA_PCI0_ADDR(MALTA_RTCDAT)))); } static __inline void writertc(uint8_t addr, uint8_t val) { *((volatile uint8_t *) MIPS_PHYS_TO_KSEG1(MALTA_PCI0_ADDR(MALTA_RTCADR))) = addr; *((volatile uint8_t *) MIPS_PHYS_TO_KSEG1(MALTA_PCI0_ADDR(MALTA_RTCDAT))) = val; } #endif static void mips_init(void) { int i; for (i = 0; i < 10; i++) { phys_avail[i] = 0; } /* phys_avail regions are in bytes */ phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); dump_avail[0] = phys_avail[0]; dump_avail[1] = phys_avail[1]; physmem = realmem; init_param1(); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } /* * Perform a board-level soft-reset. * Note that this is not emulated by gxemul. */ void platform_reset(void) { char *c; c = (char *)MIPS_PHYS_TO_KSEG0(MALTA_SOFTRES); *c = MALTA_GORESET; } static uint64_t malta_cpu_freq(void) { uint64_t platform_counter_freq = 0; #if defined(TICK_USE_YAMON_FREQ) /* * If we are running on a board which uses YAMON firmware, * then query CPU pipeline clock from the syscon object. * If unsuccessful, use hard-coded default. */ platform_counter_freq = yamon_getcpufreq(); #elif defined(TICK_USE_MALTA_RTC) /* * If we are running on a board with the MC146818 RTC, * use it to determine CPU pipeline clock frequency. */ u_int64_t counterval[2]; /* Set RTC to binary mode. */ writertc(RTC_STATUSB, (rtcin(RTC_STATUSB) | RTCSB_BCD)); /* Busy-wait for falling edge of RTC update. */ while (((rtcin(RTC_STATUSA) & RTCSA_TUP) == 0)) ; while (((rtcin(RTC_STATUSA)& RTCSA_TUP) != 0)) ; counterval[0] = mips_rd_count(); /* Busy-wait for falling edge of RTC update. */ while (((rtcin(RTC_STATUSA) & RTCSA_TUP) == 0)) ; while (((rtcin(RTC_STATUSA)& RTCSA_TUP) != 0)) ; counterval[1] = mips_rd_count(); platform_counter_freq = counterval[1] - counterval[0]; #endif if (platform_counter_freq == 0) platform_counter_freq = MIPS_DEFAULT_HZ; return (platform_counter_freq); } void platform_start(__register_t a0, __register_t a1, __register_t a2, __register_t a3) { vm_offset_t kernend; uint64_t platform_counter_freq; int argc = a0; char **argv = (char **)a1; char **envp = (char **)a2; unsigned int memsize = a3; int i; /* clear the BSS and SBSS segments */ kernend = (vm_offset_t)&end; memset(&edata, 0, kernend - (vm_offset_t)(&edata)); mips_postboot_fixup(); mips_pcpu0_init(); platform_counter_freq = malta_cpu_freq(); mips_timer_early_init(platform_counter_freq); cninit(); printf("entry: platform_start()\n"); bootverbose = 1; if (bootverbose) { printf("cmd line: "); for (i = 0; i < argc; i++) printf("%s ", argv[i]); printf("\n"); printf("envp:\n"); for (i = 0; envp[i]; i += 2) printf("\t%s = %s\n", envp[i], envp[i+1]); printf("memsize = %08x\n", memsize); } realmem = btoc(memsize); mips_init(); mips_timer_init_params(platform_counter_freq, 0); } diff --git a/sys/mips/rt305x/rt305x_machdep.c b/sys/mips/rt305x/rt305x_machdep.c index 33c131cb0666..07342bde9eda 100644 --- a/sys/mips/rt305x/rt305x_machdep.c +++ b/sys/mips/rt305x/rt305x_machdep.c @@ -1,192 +1,191 @@ /*- * Copyright (C) 2010-2011 by Aleksandr Rybalko. All rights reserved. * Copyright (C) 2007 by Oleksandr Tymoshenko. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int *edata; extern int *end; static char boot1_env[0x1000]; void platform_cpu_init() { /* Nothing special */ } static void mips_init(void) { int i; printf("entry: mips_init()\n"); bootverbose = 1; realmem = btoc(32 << 20); for (i = 0; i < 10; i++) { phys_avail[i] = 0; } /* phys_avail regions are in bytes */ dump_avail[0] = phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); dump_avail[1] = phys_avail[1] = ctob(realmem); physmem = realmem; init_param1(); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } void platform_reset(void) { __asm __volatile("li $25, 0xbf000000"); __asm __volatile("j $25"); } void platform_start(__register_t a0 __unused, __register_t a1 __unused, __register_t a2 __unused, __register_t a3 __unused) { vm_offset_t kernend; uint64_t platform_counter_freq = PLATFORM_COUNTER_FREQ; int i; int argc = a0; char **argv = (char **)MIPS_PHYS_TO_KSEG0(a1); char **envp = (char **)MIPS_PHYS_TO_KSEG0(a2); /* clear the BSS and SBSS segments */ kernend = (vm_offset_t)&end; memset(&edata, 0, kernend - (vm_offset_t)(&edata)); mips_postboot_fixup(); /* Initialize pcpu stuff */ mips_pcpu0_init(); /* initialize console so that we have printf */ boothowto |= (RB_SERIAL | RB_MULTIPLE); /* Use multiple consoles */ boothowto |= (RB_VERBOSE); cninit(); init_static_kenv(boot1_env, sizeof(boot1_env)); printf("U-Boot args (from %d args):\n", argc - 1); if (argc == 1) printf("\tNone\n"); for (i = 1; i < argc; i++) { char *n = "argv ", *arg; if (i > 99) break; if (argv[i]) { arg = (char *)(intptr_t)MIPS_PHYS_TO_KSEG0(argv[i]); printf("\targv[%d] = %s\n", i, arg); sprintf(n, "argv%d", i); setenv(n, arg); } } printf("Environment:\n"); for (i = 0; envp[i] ; i++) { char *n, *arg; arg = (char *)(intptr_t)MIPS_PHYS_TO_KSEG0(envp[i]); printf("\t%s\n", arg); n = strsep(&arg, "="); if (arg == NULL) setenv(n, "1"); else setenv(n, arg); } mips_init(); mips_timer_init_params(platform_counter_freq, 2); } diff --git a/sys/mips/sentry5/s5_machdep.c b/sys/mips/sentry5/s5_machdep.c index cde081bc92d9..a10b48d2bf9b 100644 --- a/sys/mips/sentry5/s5_machdep.c +++ b/sys/mips/sentry5/s5_machdep.c @@ -1,224 +1,223 @@ /*- * Copyright (c) 2007 Bruce M. Simpson. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CFE #include #endif extern int *edata; extern int *end; void platform_cpu_init() { /* Nothing special */ } static void mips_init(void) { int i, j; printf("entry: mips_init()\n"); #ifdef CFE /* * Query DRAM memory map from CFE. */ physmem = 0; for (i = 0; i < 10; i += 2) { int result; uint64_t addr, len, type; result = cfe_enummem(i, 0, &addr, &len, &type); if (result < 0) { phys_avail[i] = phys_avail[i + 1] = 0; break; } if (type != CFE_MI_AVAILABLE) continue; phys_avail[i] = addr; if (i == 0 && addr == 0) { /* * If this is the first physical memory segment probed * from CFE, omit the region at the start of physical * memory where the kernel has been loaded. */ phys_avail[i] += MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); } phys_avail[i + 1] = addr + len; physmem += len; } realmem = btoc(physmem); #endif for (j = 0; j < i; j++) dump_avail[j] = phys_avail[j]; physmem = realmem; init_param1(); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } void platform_reset(void) { #if defined(CFE) cfe_exit(0, 0); #else *((volatile uint8_t *)MIPS_PHYS_TO_KSEG1(SENTRY5_EXTIFADR)) = 0x80; #endif } void platform_start(__register_t a0, __register_t a1, __register_t a2, __register_t a3) { vm_offset_t kernend; uint64_t platform_counter_freq; /* clear the BSS and SBSS segments */ kernend = (vm_offset_t)&end; memset(&edata, 0, kernend - (vm_offset_t)(&edata)); mips_postboot_fixup(); /* Initialize pcpu stuff */ mips_pcpu0_init(); #ifdef CFE /* * Initialize CFE firmware trampolines before * we initialize the low-level console. * * CFE passes the following values in registers: * a0: firmware handle * a2: firmware entry point * a3: entry point seal */ if (a3 == CFE_EPTSEAL) cfe_init(a0, a2); #endif cninit(); mips_init(); # if 0 /* * Probe the Broadcom Sentry5's on-chip PLL clock registers * and discover the CPU pipeline clock and bus clock * multipliers from this. * XXX: Wrong place. You have to ask the ChipCommon * or External Interface cores on the SiBa. */ uint32_t busmult, cpumult, refclock, clkcfg1; #define S5_CLKCFG1_REFCLOCK_MASK 0x0000001F #define S5_CLKCFG1_BUSMULT_MASK 0x000003E0 #define S5_CLKCFG1_BUSMULT_SHIFT 5 #define S5_CLKCFG1_CPUMULT_MASK 0xFFFFFC00 #define S5_CLKCFG1_CPUMULT_SHIFT 10 counter_freq = 100000000; /* XXX */ clkcfg1 = s5_rd_clkcfg1(); printf("clkcfg1 = 0x%08x\n", clkcfg1); refclock = clkcfg1 & 0x1F; busmult = ((clkcfg1 & 0x000003E0) >> 5) + 1; cpumult = ((clkcfg1 & 0xFFFFFC00) >> 10) + 1; printf("refclock = %u\n", refclock); printf("busmult = %u\n", busmult); printf("cpumult = %u\n", cpumult); counter_freq = cpumult * refclock; # else platform_counter_freq = 200 * 1000 * 1000; /* Sentry5 is 200MHz */ # endif mips_timer_init_params(platform_counter_freq, 0); } diff --git a/sys/mips/sibyte/sb_machdep.c b/sys/mips/sibyte/sb_machdep.c index 452eea6fa631..5e7c2fded872 100644 --- a/sys/mips/sibyte/sb_machdep.c +++ b/sys/mips/sibyte/sb_machdep.c @@ -1,433 +1,432 @@ /*- * Copyright (c) 2007 Bruce M. Simpson. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kdb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #include #endif #ifdef CFE #include #endif #include "sb_scd.h" #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #endif #ifdef CFE_ENV extern void cfe_env_init(void); #endif extern int *edata; extern int *end; extern char MipsTLBMiss[], MipsTLBMissEnd[]; void platform_cpu_init() { /* Nothing special */ } static void sb_intr_init(int cpuid) { int intrnum, intsrc; /* * Disable all sources to the interrupt mapper and setup the mapping * between an interrupt source and the mips hard interrupt number. */ for (intsrc = 0; intsrc < NUM_INTSRC; ++intsrc) { intrnum = sb_route_intsrc(intsrc); sb_disable_intsrc(cpuid, intsrc); sb_write_intmap(cpuid, intsrc, intrnum); #ifdef SMP /* * Set up the mailbox interrupt mapping. * * The mailbox interrupt is "special" in that it is not shared * with any other interrupt source. */ if (intsrc == INTSRC_MAILBOX3) { intrnum = platform_ipi_intrnum(); sb_write_intmap(cpuid, INTSRC_MAILBOX3, intrnum); sb_enable_intsrc(cpuid, INTSRC_MAILBOX3); } #endif } } static void mips_init(void) { int i, j, cfe_mem_idx, tmp; uint64_t maxmem; #ifdef CFE_ENV cfe_env_init(); #endif TUNABLE_INT_FETCH("boothowto", &boothowto); if (boothowto & RB_VERBOSE) bootverbose++; #ifdef MAXMEM tmp = MAXMEM; #else tmp = 0; #endif TUNABLE_INT_FETCH("hw.physmem", &tmp); maxmem = (uint64_t)tmp * 1024; /* * XXX * If we used vm_paddr_t consistently in pmap, etc., we could * use 64-bit page numbers on !n64 systems, too, like i386 * does with PAE. */ #if !defined(__mips_n64) if (maxmem == 0 || maxmem > 0xffffffff) maxmem = 0xffffffff; #endif #ifdef CFE /* * Query DRAM memory map from CFE. */ physmem = 0; cfe_mem_idx = 0; for (i = 0; i < 10; i += 2) { int result; uint64_t addr, len, type; result = cfe_enummem(cfe_mem_idx++, 0, &addr, &len, &type); if (result < 0) { phys_avail[i] = phys_avail[i + 1] = 0; break; } KASSERT(type == CFE_MI_AVAILABLE, ("CFE DRAM region is not available?")); if (bootverbose) printf("cfe_enummem: 0x%016jx/%ju.\n", addr, len); if (maxmem != 0) { if (addr >= maxmem) { printf("Ignoring %ju bytes of memory at 0x%jx " "that is above maxmem %dMB\n", len, addr, (int)(maxmem / (1024 * 1024))); continue; } if (addr + len > maxmem) { printf("Ignoring %ju bytes of memory " "that is above maxmem %dMB\n", (addr + len) - maxmem, (int)(maxmem / (1024 * 1024))); len = maxmem - addr; } } phys_avail[i] = addr; if (i == 0 && addr == 0) { /* * If this is the first physical memory segment probed * from CFE, omit the region at the start of physical * memory where the kernel has been loaded. */ phys_avail[i] += MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); } phys_avail[i + 1] = addr + len; physmem += len; } realmem = btoc(physmem); #endif for (j = 0; j < i; j++) dump_avail[j] = phys_avail[j]; physmem = realmem; init_param1(); init_param2(physmem); mips_cpu_init(); /* * Sibyte has a L1 data cache coherent with DMA. This includes * on-chip network interfaces as well as PCI/HyperTransport bus * masters. */ cpuinfo.cache_coherent_dma = TRUE; /* * XXX * The kernel is running in 32-bit mode but the CFE is running in * 64-bit mode. So the SR_KX bit in the status register is turned * on by the CFE every time we call into it - for e.g. CFE_CONSOLE. * * This means that if get a TLB miss for any address above 0xc0000000 * and the SR_KX bit is set then we will end up in the XTLB exception * vector. * * For now work around this by copying the TLB exception handling * code to the XTLB exception vector. */ { bcopy(MipsTLBMiss, (void *)MIPS_XTLB_MISS_EXC_VEC, MipsTLBMissEnd - MipsTLBMiss); mips_icache_sync_all(); mips_dcache_wbinv_all(); } pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } void platform_reset(void) { /* * XXX SMP * XXX flush data caches */ sb_system_reset(); } static void kseg0_map_coherent(void) { uint32_t config; const int CFG_K0_COHERENT = 5; config = mips_rd_config(); config &= ~MIPS_CONFIG_K0_MASK; config |= CFG_K0_COHERENT; mips_wr_config(config); } #ifdef SMP void platform_ipi_send(int cpuid) { KASSERT(cpuid == 0 || cpuid == 1, ("platform_ipi_send: invalid cpuid %d", cpuid)); sb_set_mailbox(cpuid, 1ULL); } void platform_ipi_clear(void) { int cpuid; cpuid = PCPU_GET(cpuid); sb_clear_mailbox(cpuid, 1ULL); } int platform_ipi_intrnum(void) { return (4); } struct cpu_group * platform_smp_topo(void) { return (smp_topo_none()); } void platform_init_ap(int cpuid) { int ipi_int_mask, clock_int_mask; KASSERT(cpuid == 1, ("AP has an invalid cpu id %d", cpuid)); /* * Make sure that kseg0 is mapped cacheable-coherent */ kseg0_map_coherent(); sb_intr_init(cpuid); /* * Unmask the clock and ipi interrupts. */ clock_int_mask = hard_int_mask(5); ipi_int_mask = hard_int_mask(platform_ipi_intrnum()); set_intr_mask(ipi_int_mask | clock_int_mask); } int platform_start_ap(int cpuid) { #ifdef CFE int error; if ((error = cfe_cpu_start(cpuid, mpentry, 0, 0, 0))) { printf("cfe_cpu_start error: %d\n", error); return (-1); } else { return (0); } #else return (-1); #endif /* CFE */ } #endif /* SMP */ static u_int sb_get_timecount(struct timecounter *tc) { return ((u_int)sb_zbbus_cycle_count()); } static void sb_timecounter_init(void) { static struct timecounter sb_timecounter = { sb_get_timecount, NULL, ~0u, 0, "sibyte_zbbus_counter", 2000 }; /* * The ZBbus cycle counter runs at half the cpu frequency. */ sb_timecounter.tc_frequency = sb_cpu_speed() / 2; platform_timecounter = &sb_timecounter; } void platform_start(__register_t a0, __register_t a1, __register_t a2, __register_t a3) { /* * Make sure that kseg0 is mapped cacheable-coherent */ kseg0_map_coherent(); /* clear the BSS and SBSS segments */ memset(&edata, 0, (vm_offset_t)&end - (vm_offset_t)&edata); mips_postboot_fixup(); sb_intr_init(0); sb_timecounter_init(); /* Initialize pcpu stuff */ mips_pcpu0_init(); #ifdef CFE /* * Initialize CFE firmware trampolines before * we initialize the low-level console. * * CFE passes the following values in registers: * a0: firmware handle * a2: firmware entry point * a3: entry point seal */ if (a3 == CFE_EPTSEAL) cfe_init(a0, a2); #endif cninit(); mips_init(); mips_timer_init_params(sb_cpu_speed(), 0); } diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index b17376039ebb..24350c44f956 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -1,2544 +1,2543 @@ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Matt Thomas of Allegro Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ */ /*- * Copyright (C) 2001 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is also stored by the * logical address mapping module, this module may throw away valid virtual * to physical mappings at almost any time. However, invalidations of * mappings must be done as requested. * * In order to cope with hardware architectures which make virtual to * physical map invalidates expensive, this module may delay invalidate * reduced protection operations until such time as they are actually * necessary. This module is given full information as to which processors * are currently using which maps, and to when physical maps must be made * correct. */ #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu_if.h" #define MOEA_DEBUG #define TODO panic("%s: not implemented", __func__); #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) #define VSID_TO_SR(vsid) ((vsid) & 0xf) #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) struct ofw_map { vm_offset_t om_va; vm_size_t om_len; vm_offset_t om_pa; u_int om_mode; }; /* * Map of physical memory regions. */ static struct mem_region *regions; static struct mem_region *pregions; static u_int phys_avail_count; static int regions_sz, pregions_sz; static struct ofw_map *translations; /* * Lock for the pteg and pvo tables. */ struct mtx moea_table_mutex; struct mtx moea_vsid_mutex; /* tlbie instruction synchronization */ static struct mtx tlbie_mtx; /* * PTEG data. */ static struct pteg *moea_pteg_table; u_int moea_pteg_count; u_int moea_pteg_mask; /* * PVO data. */ struct pvo_head *moea_pvo_table; /* pvo entries by pteg index */ struct pvo_head moea_pvo_kunmanaged = LIST_HEAD_INITIALIZER(moea_pvo_kunmanaged); /* list of unmanaged pages */ static struct rwlock_padalign pvh_global_lock; uma_zone_t moea_upvo_zone; /* zone for pvo entries for unmanaged pages */ uma_zone_t moea_mpvo_zone; /* zone for pvo entries for managed pages */ #define BPVO_POOL_SIZE 32768 static struct pvo_entry *moea_bpvo_pool; static int moea_bpvo_pool_index = 0; #define VSID_NBPW (sizeof(u_int32_t) * 8) static u_int moea_vsid_bitmap[NPMAPS / VSID_NBPW]; static boolean_t moea_initialized = FALSE; /* * Statistics. */ u_int moea_pte_valid = 0; u_int moea_pte_overflow = 0; u_int moea_pte_replacements = 0; u_int moea_pvo_entries = 0; u_int moea_pvo_enter_calls = 0; u_int moea_pvo_remove_calls = 0; u_int moea_pte_spills = 0; SYSCTL_INT(_machdep, OID_AUTO, moea_pte_valid, CTLFLAG_RD, &moea_pte_valid, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea_pte_overflow, CTLFLAG_RD, &moea_pte_overflow, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea_pte_replacements, CTLFLAG_RD, &moea_pte_replacements, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_entries, CTLFLAG_RD, &moea_pvo_entries, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_enter_calls, CTLFLAG_RD, &moea_pvo_enter_calls, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_remove_calls, CTLFLAG_RD, &moea_pvo_remove_calls, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea_pte_spills, CTLFLAG_RD, &moea_pte_spills, 0, ""); /* * Allocate physical memory for use in moea_bootstrap. */ static vm_offset_t moea_bootstrap_alloc(vm_size_t, u_int); /* * PTE calls. */ static int moea_pte_insert(u_int, struct pte *); /* * PVO calls. */ static int moea_pvo_enter(pmap_t, uma_zone_t, struct pvo_head *, vm_offset_t, vm_offset_t, u_int, int); static void moea_pvo_remove(struct pvo_entry *, int); static struct pvo_entry *moea_pvo_find_va(pmap_t, vm_offset_t, int *); static struct pte *moea_pvo_to_pte(const struct pvo_entry *, int); /* * Utility routines. */ static void moea_enter_locked(pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t); static void moea_syncicache(vm_offset_t, vm_size_t); static boolean_t moea_query_bit(vm_page_t, int); static u_int moea_clear_bit(vm_page_t, int); static void moea_kremove(mmu_t, vm_offset_t); int moea_pte_spill(vm_offset_t); /* * Kernel MMU interface */ void moea_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); void moea_clear_modify(mmu_t, vm_page_t); void moea_clear_reference(mmu_t, vm_page_t); void moea_copy_page(mmu_t, vm_page_t, vm_page_t); void moea_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t); void moea_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, vm_prot_t); void moea_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); vm_paddr_t moea_extract(mmu_t, pmap_t, vm_offset_t); vm_page_t moea_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); void moea_init(mmu_t); boolean_t moea_is_modified(mmu_t, vm_page_t); boolean_t moea_is_prefaultable(mmu_t, pmap_t, vm_offset_t); boolean_t moea_is_referenced(mmu_t, vm_page_t); int moea_ts_referenced(mmu_t, vm_page_t); vm_offset_t moea_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int); boolean_t moea_page_exists_quick(mmu_t, pmap_t, vm_page_t); int moea_page_wired_mappings(mmu_t, vm_page_t); void moea_pinit(mmu_t, pmap_t); void moea_pinit0(mmu_t, pmap_t); void moea_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); void moea_qenter(mmu_t, vm_offset_t, vm_page_t *, int); void moea_qremove(mmu_t, vm_offset_t, int); void moea_release(mmu_t, pmap_t); void moea_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea_remove_all(mmu_t, vm_page_t); void moea_remove_write(mmu_t, vm_page_t); void moea_zero_page(mmu_t, vm_page_t); void moea_zero_page_area(mmu_t, vm_page_t, int, int); void moea_zero_page_idle(mmu_t, vm_page_t); void moea_activate(mmu_t, struct thread *); void moea_deactivate(mmu_t, struct thread *); void moea_cpu_bootstrap(mmu_t, int); void moea_bootstrap(mmu_t, vm_offset_t, vm_offset_t); void *moea_mapdev(mmu_t, vm_paddr_t, vm_size_t); void *moea_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t); void moea_unmapdev(mmu_t, vm_offset_t, vm_size_t); vm_paddr_t moea_kextract(mmu_t, vm_offset_t); void moea_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t); void moea_kenter(mmu_t, vm_offset_t, vm_paddr_t); void moea_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma); boolean_t moea_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); static void moea_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); static mmu_method_t moea_methods[] = { MMUMETHOD(mmu_change_wiring, moea_change_wiring), MMUMETHOD(mmu_clear_modify, moea_clear_modify), MMUMETHOD(mmu_clear_reference, moea_clear_reference), MMUMETHOD(mmu_copy_page, moea_copy_page), MMUMETHOD(mmu_enter, moea_enter), MMUMETHOD(mmu_enter_object, moea_enter_object), MMUMETHOD(mmu_enter_quick, moea_enter_quick), MMUMETHOD(mmu_extract, moea_extract), MMUMETHOD(mmu_extract_and_hold, moea_extract_and_hold), MMUMETHOD(mmu_init, moea_init), MMUMETHOD(mmu_is_modified, moea_is_modified), MMUMETHOD(mmu_is_prefaultable, moea_is_prefaultable), MMUMETHOD(mmu_is_referenced, moea_is_referenced), MMUMETHOD(mmu_ts_referenced, moea_ts_referenced), MMUMETHOD(mmu_map, moea_map), MMUMETHOD(mmu_page_exists_quick,moea_page_exists_quick), MMUMETHOD(mmu_page_wired_mappings,moea_page_wired_mappings), MMUMETHOD(mmu_pinit, moea_pinit), MMUMETHOD(mmu_pinit0, moea_pinit0), MMUMETHOD(mmu_protect, moea_protect), MMUMETHOD(mmu_qenter, moea_qenter), MMUMETHOD(mmu_qremove, moea_qremove), MMUMETHOD(mmu_release, moea_release), MMUMETHOD(mmu_remove, moea_remove), MMUMETHOD(mmu_remove_all, moea_remove_all), MMUMETHOD(mmu_remove_write, moea_remove_write), MMUMETHOD(mmu_sync_icache, moea_sync_icache), MMUMETHOD(mmu_zero_page, moea_zero_page), MMUMETHOD(mmu_zero_page_area, moea_zero_page_area), MMUMETHOD(mmu_zero_page_idle, moea_zero_page_idle), MMUMETHOD(mmu_activate, moea_activate), MMUMETHOD(mmu_deactivate, moea_deactivate), MMUMETHOD(mmu_page_set_memattr, moea_page_set_memattr), /* Internal interfaces */ MMUMETHOD(mmu_bootstrap, moea_bootstrap), MMUMETHOD(mmu_cpu_bootstrap, moea_cpu_bootstrap), MMUMETHOD(mmu_mapdev_attr, moea_mapdev_attr), MMUMETHOD(mmu_mapdev, moea_mapdev), MMUMETHOD(mmu_unmapdev, moea_unmapdev), MMUMETHOD(mmu_kextract, moea_kextract), MMUMETHOD(mmu_kenter, moea_kenter), MMUMETHOD(mmu_kenter_attr, moea_kenter_attr), MMUMETHOD(mmu_dev_direct_mapped,moea_dev_direct_mapped), { 0, 0 } }; MMU_DEF(oea_mmu, MMU_TYPE_OEA, moea_methods, 0); static __inline uint32_t moea_calc_wimg(vm_offset_t pa, vm_memattr_t ma) { uint32_t pte_lo; int i; if (ma != VM_MEMATTR_DEFAULT) { switch (ma) { case VM_MEMATTR_UNCACHEABLE: return (PTE_I | PTE_G); case VM_MEMATTR_WRITE_COMBINING: case VM_MEMATTR_WRITE_BACK: case VM_MEMATTR_PREFETCHABLE: return (PTE_I); case VM_MEMATTR_WRITE_THROUGH: return (PTE_W | PTE_M); } } /* * Assume the page is cache inhibited and access is guarded unless * it's in our available memory array. */ pte_lo = PTE_I | PTE_G; for (i = 0; i < pregions_sz; i++) { if ((pa >= pregions[i].mr_start) && (pa < (pregions[i].mr_start + pregions[i].mr_size))) { pte_lo = PTE_M; break; } } return pte_lo; } static void tlbie(vm_offset_t va) { mtx_lock_spin(&tlbie_mtx); __asm __volatile("ptesync"); __asm __volatile("tlbie %0" :: "r"(va)); __asm __volatile("eieio; tlbsync; ptesync"); mtx_unlock_spin(&tlbie_mtx); } static void tlbia(void) { vm_offset_t va; for (va = 0; va < 0x00040000; va += 0x00001000) { __asm __volatile("tlbie %0" :: "r"(va)); powerpc_sync(); } __asm __volatile("tlbsync"); powerpc_sync(); } static __inline int va_to_sr(u_int *sr, vm_offset_t va) { return (sr[(uintptr_t)va >> ADDR_SR_SHFT]); } static __inline u_int va_to_pteg(u_int sr, vm_offset_t addr) { u_int hash; hash = (sr & SR_VSID_MASK) ^ (((u_int)addr & ADDR_PIDX) >> ADDR_PIDX_SHFT); return (hash & moea_pteg_mask); } static __inline struct pvo_head * vm_page_to_pvoh(vm_page_t m) { return (&m->md.mdpg_pvoh); } static __inline void moea_attr_clear(vm_page_t m, int ptebit) { rw_assert(&pvh_global_lock, RA_WLOCKED); m->md.mdpg_attrs &= ~ptebit; } static __inline int moea_attr_fetch(vm_page_t m) { return (m->md.mdpg_attrs); } static __inline void moea_attr_save(vm_page_t m, int ptebit) { rw_assert(&pvh_global_lock, RA_WLOCKED); m->md.mdpg_attrs |= ptebit; } static __inline int moea_pte_compare(const struct pte *pt, const struct pte *pvo_pt) { if (pt->pte_hi == pvo_pt->pte_hi) return (1); return (0); } static __inline int moea_pte_match(struct pte *pt, u_int sr, vm_offset_t va, int which) { return (pt->pte_hi & ~PTE_VALID) == (((sr & SR_VSID_MASK) << PTE_VSID_SHFT) | ((va >> ADDR_API_SHFT) & PTE_API) | which); } static __inline void moea_pte_create(struct pte *pt, u_int sr, vm_offset_t va, u_int pte_lo) { mtx_assert(&moea_table_mutex, MA_OWNED); /* * Construct a PTE. Default to IMB initially. Valid bit only gets * set when the real pte is set in memory. * * Note: Don't set the valid bit for correct operation of tlb update. */ pt->pte_hi = ((sr & SR_VSID_MASK) << PTE_VSID_SHFT) | (((va & ADDR_PIDX) >> ADDR_API_SHFT) & PTE_API); pt->pte_lo = pte_lo; } static __inline void moea_pte_synch(struct pte *pt, struct pte *pvo_pt) { mtx_assert(&moea_table_mutex, MA_OWNED); pvo_pt->pte_lo |= pt->pte_lo & (PTE_REF | PTE_CHG); } static __inline void moea_pte_clear(struct pte *pt, vm_offset_t va, int ptebit) { mtx_assert(&moea_table_mutex, MA_OWNED); /* * As shown in Section 7.6.3.2.3 */ pt->pte_lo &= ~ptebit; tlbie(va); } static __inline void moea_pte_set(struct pte *pt, struct pte *pvo_pt) { mtx_assert(&moea_table_mutex, MA_OWNED); pvo_pt->pte_hi |= PTE_VALID; /* * Update the PTE as defined in section 7.6.3.1. * Note that the REF/CHG bits are from pvo_pt and thus should havce * been saved so this routine can restore them (if desired). */ pt->pte_lo = pvo_pt->pte_lo; powerpc_sync(); pt->pte_hi = pvo_pt->pte_hi; powerpc_sync(); moea_pte_valid++; } static __inline void moea_pte_unset(struct pte *pt, struct pte *pvo_pt, vm_offset_t va) { mtx_assert(&moea_table_mutex, MA_OWNED); pvo_pt->pte_hi &= ~PTE_VALID; /* * Force the reg & chg bits back into the PTEs. */ powerpc_sync(); /* * Invalidate the pte. */ pt->pte_hi &= ~PTE_VALID; tlbie(va); /* * Save the reg & chg bits. */ moea_pte_synch(pt, pvo_pt); moea_pte_valid--; } static __inline void moea_pte_change(struct pte *pt, struct pte *pvo_pt, vm_offset_t va) { /* * Invalidate the PTE */ moea_pte_unset(pt, pvo_pt, va); moea_pte_set(pt, pvo_pt); } /* * Quick sort callout for comparing memory regions. */ static int om_cmp(const void *a, const void *b); static int om_cmp(const void *a, const void *b) { const struct ofw_map *mapa; const struct ofw_map *mapb; mapa = a; mapb = b; if (mapa->om_pa < mapb->om_pa) return (-1); else if (mapa->om_pa > mapb->om_pa) return (1); else return (0); } void moea_cpu_bootstrap(mmu_t mmup, int ap) { u_int sdr; int i; if (ap) { powerpc_sync(); __asm __volatile("mtdbatu 0,%0" :: "r"(battable[0].batu)); __asm __volatile("mtdbatl 0,%0" :: "r"(battable[0].batl)); isync(); __asm __volatile("mtibatu 0,%0" :: "r"(battable[0].batu)); __asm __volatile("mtibatl 0,%0" :: "r"(battable[0].batl)); isync(); } #ifdef WII /* * Special case for the Wii: don't install the PCI BAT. */ if (strcmp(installed_platform(), "wii") != 0) { #endif __asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu)); __asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl)); #ifdef WII } #endif isync(); __asm __volatile("mtibatu 1,%0" :: "r"(0)); __asm __volatile("mtdbatu 2,%0" :: "r"(0)); __asm __volatile("mtibatu 2,%0" :: "r"(0)); __asm __volatile("mtdbatu 3,%0" :: "r"(0)); __asm __volatile("mtibatu 3,%0" :: "r"(0)); isync(); for (i = 0; i < 16; i++) mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); powerpc_sync(); sdr = (u_int)moea_pteg_table | (moea_pteg_mask >> 10); __asm __volatile("mtsdr1 %0" :: "r"(sdr)); isync(); tlbia(); } void moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { ihandle_t mmui; phandle_t chosen, mmu; int sz; int i, j; vm_size_t size, physsz, hwphyssz; vm_offset_t pa, va, off; void *dpcpu; register_t msr; /* * Set up BAT0 to map the lowest 256 MB area */ battable[0x0].batl = BATL(0x00000000, BAT_M, BAT_PP_RW); battable[0x0].batu = BATU(0x00000000, BAT_BL_256M, BAT_Vs); /* * Map PCI memory space. */ battable[0x8].batl = BATL(0x80000000, BAT_I|BAT_G, BAT_PP_RW); battable[0x8].batu = BATU(0x80000000, BAT_BL_256M, BAT_Vs); battable[0x9].batl = BATL(0x90000000, BAT_I|BAT_G, BAT_PP_RW); battable[0x9].batu = BATU(0x90000000, BAT_BL_256M, BAT_Vs); battable[0xa].batl = BATL(0xa0000000, BAT_I|BAT_G, BAT_PP_RW); battable[0xa].batu = BATU(0xa0000000, BAT_BL_256M, BAT_Vs); battable[0xb].batl = BATL(0xb0000000, BAT_I|BAT_G, BAT_PP_RW); battable[0xb].batu = BATU(0xb0000000, BAT_BL_256M, BAT_Vs); /* * Map obio devices. */ battable[0xf].batl = BATL(0xf0000000, BAT_I|BAT_G, BAT_PP_RW); battable[0xf].batu = BATU(0xf0000000, BAT_BL_256M, BAT_Vs); /* * Use an IBAT and a DBAT to map the bottom segment of memory * where we are. Turn off instruction relocation temporarily * to prevent faults while reprogramming the IBAT. */ msr = mfmsr(); mtmsr(msr & ~PSL_IR); __asm (".balign 32; \n" "mtibatu 0,%0; mtibatl 0,%1; isync; \n" "mtdbatu 0,%0; mtdbatl 0,%1; isync" :: "r"(battable[0].batu), "r"(battable[0].batl)); mtmsr(msr); #ifdef WII if (strcmp(installed_platform(), "wii") != 0) { #endif /* map pci space */ __asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu)); __asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl)); #ifdef WII } #endif isync(); /* set global direct map flag */ hw_direct_map = 1; mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); CTR0(KTR_PMAP, "moea_bootstrap: physical memory"); for (i = 0; i < pregions_sz; i++) { vm_offset_t pa; vm_offset_t end; CTR3(KTR_PMAP, "physregion: %#x - %#x (%#x)", pregions[i].mr_start, pregions[i].mr_start + pregions[i].mr_size, pregions[i].mr_size); /* * Install entries into the BAT table to allow all * of physmem to be convered by on-demand BAT entries. * The loop will sometimes set the same battable element * twice, but that's fine since they won't be used for * a while yet. */ pa = pregions[i].mr_start & 0xf0000000; end = pregions[i].mr_start + pregions[i].mr_size; do { u_int n = pa >> ADDR_SR_SHFT; battable[n].batl = BATL(pa, BAT_M, BAT_PP_RW); battable[n].batu = BATU(pa, BAT_BL_256M, BAT_Vs); pa += SEGMENT_LENGTH; } while (pa < end); } if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) panic("moea_bootstrap: phys_avail too small"); phys_avail_count = 0; physsz = 0; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); for (i = 0, j = 0; i < regions_sz; i++, j += 2) { CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start, regions[i].mr_start + regions[i].mr_size, regions[i].mr_size); if (hwphyssz != 0 && (physsz + regions[i].mr_size) >= hwphyssz) { if (physsz < hwphyssz) { phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + hwphyssz - physsz; physsz = hwphyssz; phys_avail_count++; } break; } phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; phys_avail_count++; physsz += regions[i].mr_size; } /* Check for overlap with the kernel and exception vectors */ for (j = 0; j < 2*phys_avail_count; j+=2) { if (phys_avail[j] < EXC_LAST) phys_avail[j] += EXC_LAST; if (kernelstart >= phys_avail[j] && kernelstart < phys_avail[j+1]) { if (kernelend < phys_avail[j+1]) { phys_avail[2*phys_avail_count] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; phys_avail[2*phys_avail_count + 1] = phys_avail[j+1]; phys_avail_count++; } phys_avail[j+1] = kernelstart & ~PAGE_MASK; } if (kernelend >= phys_avail[j] && kernelend < phys_avail[j+1]) { if (kernelstart > phys_avail[j]) { phys_avail[2*phys_avail_count] = phys_avail[j]; phys_avail[2*phys_avail_count + 1] = kernelstart & ~PAGE_MASK; phys_avail_count++; } phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; } } physmem = btoc(physsz); /* * Allocate PTEG table. */ #ifdef PTEGCOUNT moea_pteg_count = PTEGCOUNT; #else moea_pteg_count = 0x1000; while (moea_pteg_count < physmem) moea_pteg_count <<= 1; moea_pteg_count >>= 1; #endif /* PTEGCOUNT */ size = moea_pteg_count * sizeof(struct pteg); CTR2(KTR_PMAP, "moea_bootstrap: %d PTEGs, %d bytes", moea_pteg_count, size); moea_pteg_table = (struct pteg *)moea_bootstrap_alloc(size, size); CTR1(KTR_PMAP, "moea_bootstrap: PTEG table at %p", moea_pteg_table); bzero((void *)moea_pteg_table, moea_pteg_count * sizeof(struct pteg)); moea_pteg_mask = moea_pteg_count - 1; /* * Allocate pv/overflow lists. */ size = sizeof(struct pvo_head) * moea_pteg_count; moea_pvo_table = (struct pvo_head *)moea_bootstrap_alloc(size, PAGE_SIZE); CTR1(KTR_PMAP, "moea_bootstrap: PVO table at %p", moea_pvo_table); for (i = 0; i < moea_pteg_count; i++) LIST_INIT(&moea_pvo_table[i]); /* * Initialize the lock that synchronizes access to the pteg and pvo * tables. */ mtx_init(&moea_table_mutex, "pmap table", NULL, MTX_DEF | MTX_RECURSE); mtx_init(&moea_vsid_mutex, "VSID table", NULL, MTX_DEF); mtx_init(&tlbie_mtx, "tlbie", NULL, MTX_SPIN); /* * Initialise the unmanaged pvo pool. */ moea_bpvo_pool = (struct pvo_entry *)moea_bootstrap_alloc( BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0); moea_bpvo_pool_index = 0; /* * Make sure kernel vsid is allocated as well as VSID 0. */ moea_vsid_bitmap[(KERNEL_VSIDBITS & (NPMAPS - 1)) / VSID_NBPW] |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); moea_vsid_bitmap[0] |= 1; /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); for (i = 0; i < 16; i++) kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; CPU_FILL(&kernel_pmap->pm_active); RB_INIT(&kernel_pmap->pmap_pvo); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); /* * Set up the Open Firmware mappings */ chosen = OF_finddevice("/chosen"); if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1 && (mmu = OF_instance_to_package(mmui)) != -1 && (sz = OF_getproplen(mmu, "translations")) != -1) { translations = NULL; for (i = 0; phys_avail[i] != 0; i += 2) { if (phys_avail[i + 1] >= sz) { translations = (struct ofw_map *)phys_avail[i]; break; } } if (translations == NULL) panic("moea_bootstrap: no space to copy translations"); bzero(translations, sz); if (OF_getprop(mmu, "translations", translations, sz) == -1) panic("moea_bootstrap: can't get ofw translations"); CTR0(KTR_PMAP, "moea_bootstrap: translations"); sz /= sizeof(*translations); qsort(translations, sz, sizeof (*translations), om_cmp); for (i = 0; i < sz; i++) { CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x", translations[i].om_pa, translations[i].om_va, translations[i].om_len); /* * If the mapping is 1:1, let the RAM and device * on-demand BAT tables take care of the translation. */ if (translations[i].om_va == translations[i].om_pa) continue; /* Enter the pages */ for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) moea_kenter(mmup, translations[i].om_va + off, translations[i].om_pa + off); } } /* * Calculate the last available physical address. */ for (i = 0; phys_avail[i + 2] != 0; i += 2) ; Maxmem = powerpc_btop(phys_avail[i + 1]); moea_cpu_bootstrap(mmup,0); pmap_bootstrapped++; /* * Set the start and end of kva. */ virtual_avail = VM_MIN_KERNEL_ADDRESS; virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; /* * Allocate a kernel stack with a guard page for thread0 and map it * into the kernel page map. */ pa = moea_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE); va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; virtual_avail = va + KSTACK_PAGES * PAGE_SIZE; CTR2(KTR_PMAP, "moea_bootstrap: kstack0 at %#x (%#x)", pa, va); thread0.td_kstack = va; thread0.td_kstack_pages = KSTACK_PAGES; for (i = 0; i < KSTACK_PAGES; i++) { moea_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the message buffer. */ pa = msgbuf_phys = moea_bootstrap_alloc(msgbufsize, PAGE_SIZE); msgbufp = (struct msgbuf *)virtual_avail; va = virtual_avail; virtual_avail += round_page(msgbufsize); while (va < virtual_avail) { moea_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the dynamic percpu area. */ pa = moea_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); dpcpu = (void *)virtual_avail; va = virtual_avail; virtual_avail += DPCPU_SIZE; while (va < virtual_avail) { moea_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } dpcpu_init(dpcpu, 0); } /* * Activate a user pmap. The pmap must be activated before it's address * space can be accessed in any way. */ void moea_activate(mmu_t mmu, struct thread *td) { pmap_t pm, pmr; /* * Load all the data we need up front to encourage the compiler to * not issue any loads while we have interrupts disabled below. */ pm = &td->td_proc->p_vmspace->vm_pmap; pmr = pm->pmap_phys; CPU_SET(PCPU_GET(cpuid), &pm->pm_active); PCPU_SET(curpmap, pmr); } void moea_deactivate(mmu_t mmu, struct thread *td) { pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); PCPU_SET(curpmap, NULL); } void moea_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) { struct pvo_entry *pvo; PMAP_LOCK(pm); pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); if (pvo != NULL) { if (wired) { if ((pvo->pvo_vaddr & PVO_WIRED) == 0) pm->pm_stats.wired_count++; pvo->pvo_vaddr |= PVO_WIRED; } else { if ((pvo->pvo_vaddr & PVO_WIRED) != 0) pm->pm_stats.wired_count--; pvo->pvo_vaddr &= ~PVO_WIRED; } } PMAP_UNLOCK(pm); } void moea_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) { vm_offset_t dst; vm_offset_t src; dst = VM_PAGE_TO_PHYS(mdst); src = VM_PAGE_TO_PHYS(msrc); bcopy((void *)src, (void *)dst, PAGE_SIZE); } /* * Zero a page of physical memory by temporarily mapping it into the tlb. */ void moea_zero_page(mmu_t mmu, vm_page_t m) { vm_offset_t pa = VM_PAGE_TO_PHYS(m); void *va = (void *)pa; bzero(va, PAGE_SIZE); } void moea_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) { vm_offset_t pa = VM_PAGE_TO_PHYS(m); void *va = (void *)(pa + off); bzero(va, size); } void moea_zero_page_idle(mmu_t mmu, vm_page_t m) { vm_offset_t pa = VM_PAGE_TO_PHYS(m); void *va = (void *)pa; bzero(va, PAGE_SIZE); } /* * Map the given physical page at the specified virtual address in the * target pmap with the protection requested. If specified the page * will be wired down. */ void moea_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, boolean_t wired) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); moea_enter_locked(pmap, va, m, prot, wired); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Map the given physical page at the specified virtual address in the * target pmap with the protection requested. If specified the page * will be wired down. * * The page queues and pmap must be locked. */ static void moea_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, boolean_t wired) { struct pvo_head *pvo_head; uma_zone_t zone; vm_page_t pg; u_int pte_lo, pvo_flags; int error; if (!moea_initialized) { pvo_head = &moea_pvo_kunmanaged; zone = moea_upvo_zone; pvo_flags = 0; pg = NULL; } else { pvo_head = vm_page_to_pvoh(m); pg = m; zone = moea_mpvo_zone; pvo_flags = PVO_MANAGED; } if (pmap_bootstrapped) rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || VM_OBJECT_LOCKED(m->object), ("moea_enter_locked: page %p is not busy", m)); /* XXX change the pvo head for fake pages */ if ((m->oflags & VPO_UNMANAGED) != 0) { pvo_flags &= ~PVO_MANAGED; pvo_head = &moea_pvo_kunmanaged; zone = moea_upvo_zone; } pte_lo = moea_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); if (prot & VM_PROT_WRITE) { pte_lo |= PTE_BW; if (pmap_bootstrapped && (m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_WRITEABLE); } else pte_lo |= PTE_BR; if (prot & VM_PROT_EXECUTE) pvo_flags |= PVO_EXECUTABLE; if (wired) pvo_flags |= PVO_WIRED; error = moea_pvo_enter(pmap, zone, pvo_head, va, VM_PAGE_TO_PHYS(m), pte_lo, pvo_flags); /* * Flush the real page from the instruction cache. This has be done * for all user mappings to prevent information leakage via the * instruction cache. moea_pvo_enter() returns ENOENT for the first * mapping for a page. */ if (pmap != kernel_pmap && error == ENOENT && (pte_lo & (PTE_I | PTE_G)) == 0) moea_syncicache(VM_PAGE_TO_PHYS(m), PAGE_SIZE); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void moea_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m; vm_pindex_t diff, psize; psize = atop(end - start); m = m_start; rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { moea_enter_locked(pm, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); m = TAILQ_NEXT(m, listq); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pm); } void moea_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); moea_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pm); } vm_paddr_t moea_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; PMAP_LOCK(pm); pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); if (pvo == NULL) pa = 0; else pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF); PMAP_UNLOCK(pm); return (pa); } /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given * protection. */ vm_page_t moea_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; vm_paddr_t pa; m = NULL; pa = 0; PMAP_LOCK(pmap); retry: pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) && ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); vm_page_hold(m); } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } void moea_init(mmu_t mmu) { moea_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); moea_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); moea_initialized = TRUE; } boolean_t moea_is_referenced(mmu_t mmu, vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea_is_referenced: page %p is not managed", m)); rw_wlock(&pvh_global_lock); rv = moea_query_bit(m, PTE_REF); rw_wunlock(&pvh_global_lock); return (rv); } boolean_t moea_is_modified(mmu_t mmu, vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea_is_modified: page %p is not managed", m)); /* * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PTE_CHG set. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = moea_query_bit(m, PTE_CHG); rw_wunlock(&pvh_global_lock); return (rv); } boolean_t moea_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) { struct pvo_entry *pvo; boolean_t rv; PMAP_LOCK(pmap); pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); rv = pvo == NULL || (pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0; PMAP_UNLOCK(pmap); return (rv); } void moea_clear_reference(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea_clear_reference: page %p is not managed", m)); rw_wlock(&pvh_global_lock); moea_clear_bit(m, PTE_REF); rw_wunlock(&pvh_global_lock); } void moea_clear_modify(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea_clear_modify: page %p is not managed", m)); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); KASSERT((m->oflags & VPO_BUSY) == 0, ("moea_clear_modify: page %p is busy", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_CHG * set. If the object containing the page is locked and the page is * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); moea_clear_bit(m, PTE_CHG); rw_wunlock(&pvh_global_lock); } /* * Clear the write and modified bits in each of the given page's mappings. */ void moea_remove_write(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; struct pte *pt; pmap_t pmap; u_int lo; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea_remove_write: page %p is not managed", m)); /* * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); lo = moea_attr_fetch(m); powerpc_sync(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); if ((pvo->pvo_pte.pte.pte_lo & PTE_PP) != PTE_BR) { pt = moea_pvo_to_pte(pvo, -1); pvo->pvo_pte.pte.pte_lo &= ~PTE_PP; pvo->pvo_pte.pte.pte_lo |= PTE_BR; if (pt != NULL) { moea_pte_synch(pt, &pvo->pvo_pte.pte); lo |= pvo->pvo_pte.pte.pte_lo; pvo->pvo_pte.pte.pte_lo &= ~PTE_CHG; moea_pte_change(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr); mtx_unlock(&moea_table_mutex); } } PMAP_UNLOCK(pmap); } if ((lo & PTE_CHG) != 0) { moea_attr_clear(m, PTE_CHG); vm_page_dirty(m); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); } /* * moea_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int moea_ts_referenced(mmu_t mmu, vm_page_t m) { int count; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea_ts_referenced: page %p is not managed", m)); rw_wlock(&pvh_global_lock); count = moea_clear_bit(m, PTE_REF); rw_wunlock(&pvh_global_lock); return (count); } /* * Modify the WIMG settings of all mappings for a page. */ void moea_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) { struct pvo_entry *pvo; struct pvo_head *pvo_head; struct pte *pt; pmap_t pmap; u_int lo; if ((m->oflags & VPO_UNMANAGED) != 0) { m->md.mdpg_cache_attrs = ma; return; } rw_wlock(&pvh_global_lock); pvo_head = vm_page_to_pvoh(m); lo = moea_calc_wimg(VM_PAGE_TO_PHYS(m), ma); LIST_FOREACH(pvo, pvo_head, pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); pt = moea_pvo_to_pte(pvo, -1); pvo->pvo_pte.pte.pte_lo &= ~PTE_WIMG; pvo->pvo_pte.pte.pte_lo |= lo; if (pt != NULL) { moea_pte_change(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr); if (pvo->pvo_pmap == kernel_pmap) isync(); } mtx_unlock(&moea_table_mutex); PMAP_UNLOCK(pmap); } m->md.mdpg_cache_attrs = ma; rw_wunlock(&pvh_global_lock); } /* * Map a wired page into kernel virtual address space. */ void moea_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) { moea_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); } void moea_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) { u_int pte_lo; int error; #if 0 if (va < VM_MIN_KERNEL_ADDRESS) panic("moea_kenter: attempt to enter non-kernel address %#x", va); #endif pte_lo = moea_calc_wimg(pa, ma); PMAP_LOCK(kernel_pmap); error = moea_pvo_enter(kernel_pmap, moea_upvo_zone, &moea_pvo_kunmanaged, va, pa, pte_lo, PVO_WIRED); if (error != 0 && error != ENOENT) panic("moea_kenter: failed to enter va %#x pa %#x: %d", va, pa, error); PMAP_UNLOCK(kernel_pmap); } /* * Extract the physical page address associated with the given kernel virtual * address. */ vm_paddr_t moea_kextract(mmu_t mmu, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; /* * Allow direct mappings on 32-bit OEA */ if (va < VM_MIN_KERNEL_ADDRESS) { return (va); } PMAP_LOCK(kernel_pmap); pvo = moea_pvo_find_va(kernel_pmap, va & ~ADDR_POFF, NULL); KASSERT(pvo != NULL, ("moea_kextract: no addr found")); pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF); PMAP_UNLOCK(kernel_pmap); return (pa); } /* * Remove a wired page from kernel virtual address space. */ void moea_kremove(mmu_t mmu, vm_offset_t va) { moea_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); } /* * Map a range of physical addresses into kernel virtual address space. * * The value passed in *virt is a suggested virtual address for the mapping. * Architectures which can support a direct-mapped physical to virtual region * can return the appropriate address within that region, leaving '*virt' * unchanged. We cannot and therefore do not; *virt is updated with the * first usable address after the mapped region. */ vm_offset_t moea_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, vm_paddr_t pa_end, int prot) { vm_offset_t sva, va; sva = *virt; va = sva; for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) moea_kenter(mmu, va, pa_start); *virt = va; return (sva); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t moea_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) { int loops; struct pvo_entry *pvo; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea_page_exists_quick: page %p is not managed", m)); loops = 0; rv = FALSE; rw_wlock(&pvh_global_lock); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { if (pvo->pvo_pmap == pmap) { rv = TRUE; break; } if (++loops >= 16) break; } rw_wunlock(&pvh_global_lock); return (rv); } /* * Return the number of managed mappings to the given physical page * that are wired. */ int moea_page_wired_mappings(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) if ((pvo->pvo_vaddr & PVO_WIRED) != 0) count++; rw_wunlock(&pvh_global_lock); return (count); } static u_int moea_vsidcontext; void moea_pinit(mmu_t mmu, pmap_t pmap) { int i, mask; u_int entropy; KASSERT((int)pmap < VM_MIN_KERNEL_ADDRESS, ("moea_pinit: virt pmap")); PMAP_LOCK_INIT(pmap); RB_INIT(&pmap->pmap_pvo); entropy = 0; __asm __volatile("mftb %0" : "=r"(entropy)); if ((pmap->pmap_phys = (pmap_t)moea_kextract(mmu, (vm_offset_t)pmap)) == NULL) { pmap->pmap_phys = pmap; } mtx_lock(&moea_vsid_mutex); /* * Allocate some segment registers for this pmap. */ for (i = 0; i < NPMAPS; i += VSID_NBPW) { u_int hash, n; /* * Create a new value by mutiplying by a prime and adding in * entropy from the timebase register. This is to make the * VSID more random so that the PT hash function collides * less often. (Note that the prime casues gcc to do shifts * instead of a multiply.) */ moea_vsidcontext = (moea_vsidcontext * 0x1105) + entropy; hash = moea_vsidcontext & (NPMAPS - 1); if (hash == 0) /* 0 is special, avoid it */ continue; n = hash >> 5; mask = 1 << (hash & (VSID_NBPW - 1)); hash = (moea_vsidcontext & 0xfffff); if (moea_vsid_bitmap[n] & mask) { /* collision? */ /* anything free in this bucket? */ if (moea_vsid_bitmap[n] == 0xffffffff) { entropy = (moea_vsidcontext >> 20); continue; } i = ffs(~moea_vsid_bitmap[n]) - 1; mask = 1 << i; hash &= 0xfffff & ~(VSID_NBPW - 1); hash |= i; } KASSERT(!(moea_vsid_bitmap[n] & mask), ("Allocating in-use VSID group %#x\n", hash)); moea_vsid_bitmap[n] |= mask; for (i = 0; i < 16; i++) pmap->pm_sr[i] = VSID_MAKE(i, hash); mtx_unlock(&moea_vsid_mutex); return; } mtx_unlock(&moea_vsid_mutex); panic("moea_pinit: out of segments"); } /* * Initialize the pmap associated with process 0. */ void moea_pinit0(mmu_t mmu, pmap_t pm) { moea_pinit(mmu, pm); bzero(&pm->pm_stats, sizeof(pm->pm_stats)); } /* * Set the physical protection on the specified range of this map as requested. */ void moea_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { struct pvo_entry *pvo, *tpvo, key; struct pte *pt; KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, ("moea_protect: non current pmap")); if ((prot & VM_PROT_READ) == VM_PROT_NONE) { moea_remove(mmu, pm, sva, eva); return; } rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); key.pvo_vaddr = sva; for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); if ((prot & VM_PROT_EXECUTE) == 0) pvo->pvo_vaddr &= ~PVO_EXECUTABLE; /* * Grab the PTE pointer before we diddle with the cached PTE * copy. */ pt = moea_pvo_to_pte(pvo, -1); /* * Change the protection of the page. */ pvo->pvo_pte.pte.pte_lo &= ~PTE_PP; pvo->pvo_pte.pte.pte_lo |= PTE_BR; /* * If the PVO is in the page table, update that pte as well. */ if (pt != NULL) { moea_pte_change(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr); mtx_unlock(&moea_table_mutex); } } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pm); } /* * Map a list of wired pages into kernel virtual address space. This is * intended for temporary mappings which do not need page modification or * references recorded. Existing mappings in the region are overwritten. */ void moea_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) { vm_offset_t va; va = sva; while (count-- > 0) { moea_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); va += PAGE_SIZE; m++; } } /* * Remove page mappings from kernel virtual address space. Intended for * temporary mappings entered by moea_qenter. */ void moea_qremove(mmu_t mmu, vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { moea_kremove(mmu, va); va += PAGE_SIZE; } } void moea_release(mmu_t mmu, pmap_t pmap) { int idx, mask; /* * Free segment register's VSID */ if (pmap->pm_sr[0] == 0) panic("moea_release"); mtx_lock(&moea_vsid_mutex); idx = VSID_TO_HASH(pmap->pm_sr[0]) & (NPMAPS-1); mask = 1 << (idx % VSID_NBPW); idx /= VSID_NBPW; moea_vsid_bitmap[idx] &= ~mask; mtx_unlock(&moea_vsid_mutex); PMAP_LOCK_DESTROY(pmap); } /* * Remove the given range of addresses from the specified map. */ void moea_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct pvo_entry *pvo, *tpvo, key; rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); key.pvo_vaddr = sva; for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); moea_pvo_remove(pvo, -1); } PMAP_UNLOCK(pm); rw_wunlock(&pvh_global_lock); } /* * Remove physical page from all pmaps in which it resides. moea_pvo_remove() * will reflect changes in pte's back to the vm_page. */ void moea_remove_all(mmu_t mmu, vm_page_t m) { struct pvo_head *pvo_head; struct pvo_entry *pvo, *next_pvo; pmap_t pmap; rw_wlock(&pvh_global_lock); pvo_head = vm_page_to_pvoh(m); for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) { next_pvo = LIST_NEXT(pvo, pvo_vlink); pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); moea_pvo_remove(pvo, -1); PMAP_UNLOCK(pmap); } if ((m->aflags & PGA_WRITEABLE) && moea_query_bit(m, PTE_CHG)) { moea_attr_clear(m, PTE_CHG); vm_page_dirty(m); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); } /* * Allocate a physical page of memory directly from the phys_avail map. * Can only be called from moea_bootstrap before avail start and end are * calculated. */ static vm_offset_t moea_bootstrap_alloc(vm_size_t size, u_int align) { vm_offset_t s, e; int i, j; size = round_page(size); for (i = 0; phys_avail[i + 1] != 0; i += 2) { if (align != 0) s = (phys_avail[i] + align - 1) & ~(align - 1); else s = phys_avail[i]; e = s + size; if (s < phys_avail[i] || e > phys_avail[i + 1]) continue; if (s == phys_avail[i]) { phys_avail[i] += size; } else if (e == phys_avail[i + 1]) { phys_avail[i + 1] -= size; } else { for (j = phys_avail_count * 2; j > i; j -= 2) { phys_avail[j] = phys_avail[j - 2]; phys_avail[j + 1] = phys_avail[j - 1]; } phys_avail[i + 3] = phys_avail[i + 1]; phys_avail[i + 1] = s; phys_avail[i + 2] = e; phys_avail_count++; } return (s); } panic("moea_bootstrap_alloc: could not allocate memory"); } static void moea_syncicache(vm_offset_t pa, vm_size_t len) { __syncicache((void *)pa, len); } static int moea_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, vm_offset_t va, vm_offset_t pa, u_int pte_lo, int flags) { struct pvo_entry *pvo; u_int sr; int first; u_int ptegidx; int i; int bootstrap; moea_pvo_enter_calls++; first = 0; bootstrap = 0; /* * Compute the PTE Group index. */ va &= ~ADDR_POFF; sr = va_to_sr(pm->pm_sr, va); ptegidx = va_to_pteg(sr, va); /* * Remove any existing mapping for this page. Reuse the pvo entry if * there is a mapping. */ mtx_lock(&moea_table_mutex); LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) { if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) { if ((pvo->pvo_pte.pte.pte_lo & PTE_RPGN) == pa && (pvo->pvo_pte.pte.pte_lo & PTE_PP) == (pte_lo & PTE_PP)) { mtx_unlock(&moea_table_mutex); return (0); } moea_pvo_remove(pvo, -1); break; } } /* * If we aren't overwriting a mapping, try to allocate. */ if (moea_initialized) { pvo = uma_zalloc(zone, M_NOWAIT); } else { if (moea_bpvo_pool_index >= BPVO_POOL_SIZE) { panic("moea_enter: bpvo pool exhausted, %d, %d, %d", moea_bpvo_pool_index, BPVO_POOL_SIZE, BPVO_POOL_SIZE * sizeof(struct pvo_entry)); } pvo = &moea_bpvo_pool[moea_bpvo_pool_index]; moea_bpvo_pool_index++; bootstrap = 1; } if (pvo == NULL) { mtx_unlock(&moea_table_mutex); return (ENOMEM); } moea_pvo_entries++; pvo->pvo_vaddr = va; pvo->pvo_pmap = pm; LIST_INSERT_HEAD(&moea_pvo_table[ptegidx], pvo, pvo_olink); pvo->pvo_vaddr &= ~ADDR_POFF; if (flags & VM_PROT_EXECUTE) pvo->pvo_vaddr |= PVO_EXECUTABLE; if (flags & PVO_WIRED) pvo->pvo_vaddr |= PVO_WIRED; if (pvo_head != &moea_pvo_kunmanaged) pvo->pvo_vaddr |= PVO_MANAGED; if (bootstrap) pvo->pvo_vaddr |= PVO_BOOTSTRAP; moea_pte_create(&pvo->pvo_pte.pte, sr, va, pa | pte_lo); /* * Add to pmap list */ RB_INSERT(pvo_tree, &pm->pmap_pvo, pvo); /* * Remember if the list was empty and therefore will be the first * item. */ if (LIST_FIRST(pvo_head) == NULL) first = 1; LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED) pm->pm_stats.wired_count++; pm->pm_stats.resident_count++; /* * We hope this succeeds but it isn't required. */ i = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte); if (i >= 0) { PVO_PTEGIDX_SET(pvo, i); } else { panic("moea_pvo_enter: overflow"); moea_pte_overflow++; } mtx_unlock(&moea_table_mutex); return (first ? ENOENT : 0); } static void moea_pvo_remove(struct pvo_entry *pvo, int pteidx) { struct pte *pt; /* * If there is an active pte entry, we need to deactivate it (and * save the ref & cfg bits). */ pt = moea_pvo_to_pte(pvo, pteidx); if (pt != NULL) { moea_pte_unset(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr); mtx_unlock(&moea_table_mutex); PVO_PTEGIDX_CLR(pvo); } else { moea_pte_overflow--; } /* * Update our statistics. */ pvo->pvo_pmap->pm_stats.resident_count--; if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED) pvo->pvo_pmap->pm_stats.wired_count--; /* * Save the REF/CHG bits into their cache if the page is managed. */ if ((pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED) { struct vm_page *pg; pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); if (pg != NULL) { moea_attr_save(pg, pvo->pvo_pte.pte.pte_lo & (PTE_REF | PTE_CHG)); } } /* * Remove this PVO from the PV and pmap lists. */ LIST_REMOVE(pvo, pvo_vlink); RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); /* * Remove this from the overflow list and return it to the pool * if we aren't going to reuse it. */ LIST_REMOVE(pvo, pvo_olink); if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) uma_zfree(pvo->pvo_vaddr & PVO_MANAGED ? moea_mpvo_zone : moea_upvo_zone, pvo); moea_pvo_entries--; moea_pvo_remove_calls++; } static __inline int moea_pvo_pte_index(const struct pvo_entry *pvo, int ptegidx) { int pteidx; /* * We can find the actual pte entry without searching by grabbing * the PTEG index from 3 unused bits in pte_lo[11:9] and by * noticing the HID bit. */ pteidx = ptegidx * 8 + PVO_PTEGIDX_GET(pvo); if (pvo->pvo_pte.pte.pte_hi & PTE_HID) pteidx ^= moea_pteg_mask * 8; return (pteidx); } static struct pvo_entry * moea_pvo_find_va(pmap_t pm, vm_offset_t va, int *pteidx_p) { struct pvo_entry *pvo; int ptegidx; u_int sr; va &= ~ADDR_POFF; sr = va_to_sr(pm->pm_sr, va); ptegidx = va_to_pteg(sr, va); mtx_lock(&moea_table_mutex); LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) { if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) { if (pteidx_p) *pteidx_p = moea_pvo_pte_index(pvo, ptegidx); break; } } mtx_unlock(&moea_table_mutex); return (pvo); } static struct pte * moea_pvo_to_pte(const struct pvo_entry *pvo, int pteidx) { struct pte *pt; /* * If we haven't been supplied the ptegidx, calculate it. */ if (pteidx == -1) { int ptegidx; u_int sr; sr = va_to_sr(pvo->pvo_pmap->pm_sr, pvo->pvo_vaddr); ptegidx = va_to_pteg(sr, pvo->pvo_vaddr); pteidx = moea_pvo_pte_index(pvo, ptegidx); } pt = &moea_pteg_table[pteidx >> 3].pt[pteidx & 7]; mtx_lock(&moea_table_mutex); if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) && !PVO_PTEGIDX_ISSET(pvo)) { panic("moea_pvo_to_pte: pvo %p has valid pte in pvo but no " "valid pte index", pvo); } if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0 && PVO_PTEGIDX_ISSET(pvo)) { panic("moea_pvo_to_pte: pvo %p has valid pte index in pvo " "pvo but no valid pte", pvo); } if ((pt->pte_hi ^ (pvo->pvo_pte.pte.pte_hi & ~PTE_VALID)) == PTE_VALID) { if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0) { panic("moea_pvo_to_pte: pvo %p has valid pte in " "moea_pteg_table %p but invalid in pvo", pvo, pt); } if (((pt->pte_lo ^ pvo->pvo_pte.pte.pte_lo) & ~(PTE_CHG|PTE_REF)) != 0) { panic("moea_pvo_to_pte: pvo %p pte does not match " "pte %p in moea_pteg_table", pvo, pt); } mtx_assert(&moea_table_mutex, MA_OWNED); return (pt); } if (pvo->pvo_pte.pte.pte_hi & PTE_VALID) { panic("moea_pvo_to_pte: pvo %p has invalid pte %p in " "moea_pteg_table but valid in pvo", pvo, pt); } mtx_unlock(&moea_table_mutex); return (NULL); } /* * XXX: THIS STUFF SHOULD BE IN pte.c? */ int moea_pte_spill(vm_offset_t addr) { struct pvo_entry *source_pvo, *victim_pvo; struct pvo_entry *pvo; int ptegidx, i, j; u_int sr; struct pteg *pteg; struct pte *pt; moea_pte_spills++; sr = mfsrin(addr); ptegidx = va_to_pteg(sr, addr); /* * Have to substitute some entry. Use the primary hash for this. * Use low bits of timebase as random generator. */ pteg = &moea_pteg_table[ptegidx]; mtx_lock(&moea_table_mutex); __asm __volatile("mftb %0" : "=r"(i)); i &= 7; pt = &pteg->pt[i]; source_pvo = NULL; victim_pvo = NULL; LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) { /* * We need to find a pvo entry for this address. */ if (source_pvo == NULL && moea_pte_match(&pvo->pvo_pte.pte, sr, addr, pvo->pvo_pte.pte.pte_hi & PTE_HID)) { /* * Now found an entry to be spilled into the pteg. * The PTE is now valid, so we know it's active. */ j = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte); if (j >= 0) { PVO_PTEGIDX_SET(pvo, j); moea_pte_overflow--; mtx_unlock(&moea_table_mutex); return (1); } source_pvo = pvo; if (victim_pvo != NULL) break; } /* * We also need the pvo entry of the victim we are replacing * so save the R & C bits of the PTE. */ if ((pt->pte_hi & PTE_HID) == 0 && victim_pvo == NULL && moea_pte_compare(pt, &pvo->pvo_pte.pte)) { victim_pvo = pvo; if (source_pvo != NULL) break; } } if (source_pvo == NULL) { mtx_unlock(&moea_table_mutex); return (0); } if (victim_pvo == NULL) { if ((pt->pte_hi & PTE_HID) == 0) panic("moea_pte_spill: victim p-pte (%p) has no pvo" "entry", pt); /* * If this is a secondary PTE, we need to search it's primary * pvo bucket for the matching PVO. */ LIST_FOREACH(pvo, &moea_pvo_table[ptegidx ^ moea_pteg_mask], pvo_olink) { /* * We also need the pvo entry of the victim we are * replacing so save the R & C bits of the PTE. */ if (moea_pte_compare(pt, &pvo->pvo_pte.pte)) { victim_pvo = pvo; break; } } if (victim_pvo == NULL) panic("moea_pte_spill: victim s-pte (%p) has no pvo" "entry", pt); } /* * We are invalidating the TLB entry for the EA we are replacing even * though it's valid. If we don't, we lose any ref/chg bit changes * contained in the TLB entry. */ source_pvo->pvo_pte.pte.pte_hi &= ~PTE_HID; moea_pte_unset(pt, &victim_pvo->pvo_pte.pte, victim_pvo->pvo_vaddr); moea_pte_set(pt, &source_pvo->pvo_pte.pte); PVO_PTEGIDX_CLR(victim_pvo); PVO_PTEGIDX_SET(source_pvo, i); moea_pte_replacements++; mtx_unlock(&moea_table_mutex); return (1); } static int moea_pte_insert(u_int ptegidx, struct pte *pvo_pt) { struct pte *pt; int i; mtx_assert(&moea_table_mutex, MA_OWNED); /* * First try primary hash. */ for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) { if ((pt->pte_hi & PTE_VALID) == 0) { pvo_pt->pte_hi &= ~PTE_HID; moea_pte_set(pt, pvo_pt); return (i); } } /* * Now try secondary hash. */ ptegidx ^= moea_pteg_mask; for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) { if ((pt->pte_hi & PTE_VALID) == 0) { pvo_pt->pte_hi |= PTE_HID; moea_pte_set(pt, pvo_pt); return (i); } } panic("moea_pte_insert: overflow"); return (-1); } static boolean_t moea_query_bit(vm_page_t m, int ptebit) { struct pvo_entry *pvo; struct pte *pt; rw_assert(&pvh_global_lock, RA_WLOCKED); if (moea_attr_fetch(m) & ptebit) return (TRUE); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { /* * See if we saved the bit off. If so, cache it and return * success. */ if (pvo->pvo_pte.pte.pte_lo & ptebit) { moea_attr_save(m, ptebit); return (TRUE); } } /* * No luck, now go through the hard part of looking at the PTEs * themselves. Sync so that any pending REF/CHG bits are flushed to * the PTEs. */ powerpc_sync(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { /* * See if this pvo has a valid PTE. if so, fetch the * REF/CHG bits from the valid PTE. If the appropriate * ptebit is set, cache it and return success. */ pt = moea_pvo_to_pte(pvo, -1); if (pt != NULL) { moea_pte_synch(pt, &pvo->pvo_pte.pte); mtx_unlock(&moea_table_mutex); if (pvo->pvo_pte.pte.pte_lo & ptebit) { moea_attr_save(m, ptebit); return (TRUE); } } } return (FALSE); } static u_int moea_clear_bit(vm_page_t m, int ptebit) { u_int count; struct pvo_entry *pvo; struct pte *pt; rw_assert(&pvh_global_lock, RA_WLOCKED); /* * Clear the cached value. */ moea_attr_clear(m, ptebit); /* * Sync so that any pending REF/CHG bits are flushed to the PTEs (so * we can reset the right ones). note that since the pvo entries and * list heads are accessed via BAT0 and are never placed in the page * table, we don't have to worry about further accesses setting the * REF/CHG bits. */ powerpc_sync(); /* * For each pvo entry, clear the pvo's ptebit. If this pvo has a * valid pte clear the ptebit from the valid pte. */ count = 0; LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pt = moea_pvo_to_pte(pvo, -1); if (pt != NULL) { moea_pte_synch(pt, &pvo->pvo_pte.pte); if (pvo->pvo_pte.pte.pte_lo & ptebit) { count++; moea_pte_clear(pt, PVO_VADDR(pvo), ptebit); } mtx_unlock(&moea_table_mutex); } pvo->pvo_pte.pte.pte_lo &= ~ptebit; } return (count); } /* * Return true if the physical range is encompassed by the battable[idx] */ static int moea_bat_mapped(int idx, vm_offset_t pa, vm_size_t size) { u_int prot; u_int32_t start; u_int32_t end; u_int32_t bat_ble; /* * Return immediately if not a valid mapping */ if (!(battable[idx].batu & BAT_Vs)) return (EINVAL); /* * The BAT entry must be cache-inhibited, guarded, and r/w * so it can function as an i/o page */ prot = battable[idx].batl & (BAT_I|BAT_G|BAT_PP_RW); if (prot != (BAT_I|BAT_G|BAT_PP_RW)) return (EPERM); /* * The address should be within the BAT range. Assume that the * start address in the BAT has the correct alignment (thus * not requiring masking) */ start = battable[idx].batl & BAT_PBS; bat_ble = (battable[idx].batu & ~(BAT_EBS)) | 0x03; end = start | (bat_ble << 15) | 0x7fff; if ((pa < start) || ((pa + size) > end)) return (ERANGE); return (0); } boolean_t moea_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { int i; /* * This currently does not work for entries that * overlap 256M BAT segments. */ for(i = 0; i < 16; i++) if (moea_bat_mapped(i, pa, size) == 0) return (0); return (EFAULT); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * moea_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { return (moea_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); } void * moea_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma) { vm_offset_t va, tmpva, ppa, offset; int i; ppa = trunc_page(pa); offset = pa & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); /* * If the physical address lies within a valid BAT table entry, * return the 1:1 mapping. This currently doesn't work * for regions that overlap 256M BAT segments. */ for (i = 0; i < 16; i++) { if (moea_bat_mapped(i, pa, size) == 0) return ((void *) pa); } va = kmem_alloc_nofault(kernel_map, size); if (!va) panic("moea_mapdev: Couldn't alloc kernel virtual memory"); for (tmpva = va; size > 0;) { moea_kenter_attr(mmu, tmpva, ppa, ma); tlbie(tmpva); size -= PAGE_SIZE; tmpva += PAGE_SIZE; ppa += PAGE_SIZE; } return ((void *)(va + offset)); } void moea_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) { vm_offset_t base, offset; /* * If this is outside kernel virtual space, then it's a * battable entry and doesn't require unmapping */ if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= virtual_end)) { base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); kmem_free(kernel_map, base, size); } } static void moea_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) { struct pvo_entry *pvo; vm_offset_t lim; vm_paddr_t pa; vm_size_t len; PMAP_LOCK(pm); while (sz > 0) { lim = round_page(va); len = MIN(lim - va, sz); pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); if (pvo != NULL) { pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF); moea_syncicache(pa, len); } va += len; sz -= len; } PMAP_UNLOCK(pm); } diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 00dab9bd883b..0d77b576cffd 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1,2526 +1,2525 @@ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Matt Thomas of Allegro Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ */ /*- * Copyright (C) 2001 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is also stored by the * logical address mapping module, this module may throw away valid virtual * to physical mappings at almost any time. However, invalidations of * mappings must be done as requested. * * In order to cope with hardware architectures which make virtual to * physical map invalidates expensive, this module may delay invalidate * reduced protection operations until such time as they are actually * necessary. This module is given full information as to which processors * are currently using which maps, and to when physical maps must be made * correct. */ #include "opt_compat.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu_oea64.h" #include "mmu_if.h" #include "moea64_if.h" void moea64_release_vsid(uint64_t vsid); uintptr_t moea64_get_unique_vsid(void); #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) #define ENABLE_TRANS(msr) mtmsr(msr) #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) #define VSID_HASH_MASK 0x0000007fffffffffULL /* * Locking semantics: * -- Read lock: if no modifications are being made to either the PVO lists * or page table or if any modifications being made result in internal * changes (e.g. wiring, protection) such that the existence of the PVOs * is unchanged and they remain associated with the same pmap (in which * case the changes should be protected by the pmap lock) * -- Write lock: required if PTEs/PVOs are being inserted or removed. */ #define LOCK_TABLE_RD() rw_rlock(&moea64_table_lock) #define UNLOCK_TABLE_RD() rw_runlock(&moea64_table_lock) #define LOCK_TABLE_WR() rw_wlock(&moea64_table_lock) #define UNLOCK_TABLE_WR() rw_wunlock(&moea64_table_lock) struct ofw_map { cell_t om_va; cell_t om_len; cell_t om_pa_hi; cell_t om_pa_lo; cell_t om_mode; }; /* * Map of physical memory regions. */ static struct mem_region *regions; static struct mem_region *pregions; static u_int phys_avail_count; static int regions_sz, pregions_sz; extern void bs_remap_earlyboot(void); /* * Lock for the pteg and pvo tables. */ struct rwlock moea64_table_lock; struct mtx moea64_slb_mutex; /* * PTEG data. */ u_int moea64_pteg_count; u_int moea64_pteg_mask; /* * PVO data. */ struct pvo_head *moea64_pvo_table; /* pvo entries by pteg index */ uma_zone_t moea64_upvo_zone; /* zone for pvo entries for unmanaged pages */ uma_zone_t moea64_mpvo_zone; /* zone for pvo entries for managed pages */ #define BPVO_POOL_SIZE 327680 static struct pvo_entry *moea64_bpvo_pool; static int moea64_bpvo_pool_index = 0; #define VSID_NBPW (sizeof(u_int32_t) * 8) #ifdef __powerpc64__ #define NVSIDS (NPMAPS * 16) #define VSID_HASHMASK 0xffffffffUL #else #define NVSIDS NPMAPS #define VSID_HASHMASK 0xfffffUL #endif static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW]; static boolean_t moea64_initialized = FALSE; /* * Statistics. */ u_int moea64_pte_valid = 0; u_int moea64_pte_overflow = 0; u_int moea64_pvo_entries = 0; u_int moea64_pvo_enter_calls = 0; u_int moea64_pvo_remove_calls = 0; SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, &moea64_pte_valid, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD, &moea64_pte_overflow, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, &moea64_pvo_entries, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD, &moea64_pvo_enter_calls, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD, &moea64_pvo_remove_calls, 0, ""); vm_offset_t moea64_scratchpage_va[2]; struct pvo_entry *moea64_scratchpage_pvo[2]; uintptr_t moea64_scratchpage_pte[2]; struct mtx moea64_scratchpage_mtx; uint64_t moea64_large_page_mask = 0; int moea64_large_page_size = 0; int moea64_large_page_shift = 0; /* * PVO calls. */ static int moea64_pvo_enter(mmu_t, pmap_t, uma_zone_t, struct pvo_head *, vm_offset_t, vm_offset_t, uint64_t, int); static void moea64_pvo_remove(mmu_t, struct pvo_entry *); static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); /* * Utility routines. */ static boolean_t moea64_query_bit(mmu_t, vm_page_t, u_int64_t); static u_int moea64_clear_bit(mmu_t, vm_page_t, u_int64_t); static void moea64_kremove(mmu_t, vm_offset_t); static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_size_t sz); /* * Kernel MMU interface */ void moea64_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); void moea64_clear_modify(mmu_t, vm_page_t); void moea64_clear_reference(mmu_t, vm_page_t); void moea64_copy_page(mmu_t, vm_page_t, vm_page_t); void moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t); void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, vm_prot_t); void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t); vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); void moea64_init(mmu_t); boolean_t moea64_is_modified(mmu_t, vm_page_t); boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t); boolean_t moea64_is_referenced(mmu_t, vm_page_t); int moea64_ts_referenced(mmu_t, vm_page_t); vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int); boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); int moea64_page_wired_mappings(mmu_t, vm_page_t); void moea64_pinit(mmu_t, pmap_t); void moea64_pinit0(mmu_t, pmap_t); void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int); void moea64_qremove(mmu_t, vm_offset_t, int); void moea64_release(mmu_t, pmap_t); void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea64_remove_pages(mmu_t, pmap_t); void moea64_remove_all(mmu_t, vm_page_t); void moea64_remove_write(mmu_t, vm_page_t); void moea64_zero_page(mmu_t, vm_page_t); void moea64_zero_page_area(mmu_t, vm_page_t, int, int); void moea64_zero_page_idle(mmu_t, vm_page_t); void moea64_activate(mmu_t, struct thread *); void moea64_deactivate(mmu_t, struct thread *); void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t); void *moea64_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t); void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t); vm_paddr_t moea64_kextract(mmu_t, vm_offset_t); void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma); void moea64_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t ma); void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t); boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); static mmu_method_t moea64_methods[] = { MMUMETHOD(mmu_change_wiring, moea64_change_wiring), MMUMETHOD(mmu_clear_modify, moea64_clear_modify), MMUMETHOD(mmu_clear_reference, moea64_clear_reference), MMUMETHOD(mmu_copy_page, moea64_copy_page), MMUMETHOD(mmu_enter, moea64_enter), MMUMETHOD(mmu_enter_object, moea64_enter_object), MMUMETHOD(mmu_enter_quick, moea64_enter_quick), MMUMETHOD(mmu_extract, moea64_extract), MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), MMUMETHOD(mmu_init, moea64_init), MMUMETHOD(mmu_is_modified, moea64_is_modified), MMUMETHOD(mmu_is_prefaultable, moea64_is_prefaultable), MMUMETHOD(mmu_is_referenced, moea64_is_referenced), MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), MMUMETHOD(mmu_map, moea64_map), MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings), MMUMETHOD(mmu_pinit, moea64_pinit), MMUMETHOD(mmu_pinit0, moea64_pinit0), MMUMETHOD(mmu_protect, moea64_protect), MMUMETHOD(mmu_qenter, moea64_qenter), MMUMETHOD(mmu_qremove, moea64_qremove), MMUMETHOD(mmu_release, moea64_release), MMUMETHOD(mmu_remove, moea64_remove), MMUMETHOD(mmu_remove_pages, moea64_remove_pages), MMUMETHOD(mmu_remove_all, moea64_remove_all), MMUMETHOD(mmu_remove_write, moea64_remove_write), MMUMETHOD(mmu_sync_icache, moea64_sync_icache), MMUMETHOD(mmu_zero_page, moea64_zero_page), MMUMETHOD(mmu_zero_page_area, moea64_zero_page_area), MMUMETHOD(mmu_zero_page_idle, moea64_zero_page_idle), MMUMETHOD(mmu_activate, moea64_activate), MMUMETHOD(mmu_deactivate, moea64_deactivate), MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), /* Internal interfaces */ MMUMETHOD(mmu_mapdev, moea64_mapdev), MMUMETHOD(mmu_mapdev_attr, moea64_mapdev_attr), MMUMETHOD(mmu_unmapdev, moea64_unmapdev), MMUMETHOD(mmu_kextract, moea64_kextract), MMUMETHOD(mmu_kenter, moea64_kenter), MMUMETHOD(mmu_kenter_attr, moea64_kenter_attr), MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped), { 0, 0 } }; MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); static __inline u_int va_to_pteg(uint64_t vsid, vm_offset_t addr, int large) { uint64_t hash; int shift; shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT; hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >> shift); return (hash & moea64_pteg_mask); } static __inline struct pvo_head * vm_page_to_pvoh(vm_page_t m) { return (&m->md.mdpg_pvoh); } static __inline void moea64_pte_create(struct lpte *pt, uint64_t vsid, vm_offset_t va, uint64_t pte_lo, int flags) { /* * Construct a PTE. Default to IMB initially. Valid bit only gets * set when the real pte is set in memory. * * Note: Don't set the valid bit for correct operation of tlb update. */ pt->pte_hi = (vsid << LPTE_VSID_SHIFT) | (((uint64_t)(va & ADDR_PIDX) >> ADDR_API_SHFT64) & LPTE_API); if (flags & PVO_LARGE) pt->pte_hi |= LPTE_BIG; pt->pte_lo = pte_lo; } static __inline uint64_t moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma) { uint64_t pte_lo; int i; if (ma != VM_MEMATTR_DEFAULT) { switch (ma) { case VM_MEMATTR_UNCACHEABLE: return (LPTE_I | LPTE_G); case VM_MEMATTR_WRITE_COMBINING: case VM_MEMATTR_WRITE_BACK: case VM_MEMATTR_PREFETCHABLE: return (LPTE_I); case VM_MEMATTR_WRITE_THROUGH: return (LPTE_W | LPTE_M); } } /* * Assume the page is cache inhibited and access is guarded unless * it's in our available memory array. */ pte_lo = LPTE_I | LPTE_G; for (i = 0; i < pregions_sz; i++) { if ((pa >= pregions[i].mr_start) && (pa < (pregions[i].mr_start + pregions[i].mr_size))) { pte_lo &= ~(LPTE_I | LPTE_G); pte_lo |= LPTE_M; break; } } return pte_lo; } /* * Quick sort callout for comparing memory regions. */ static int om_cmp(const void *a, const void *b); static int om_cmp(const void *a, const void *b) { const struct ofw_map *mapa; const struct ofw_map *mapb; mapa = a; mapb = b; if (mapa->om_pa_hi < mapb->om_pa_hi) return (-1); else if (mapa->om_pa_hi > mapb->om_pa_hi) return (1); else if (mapa->om_pa_lo < mapb->om_pa_lo) return (-1); else if (mapa->om_pa_lo > mapb->om_pa_lo) return (1); else return (0); } static void moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz) { struct ofw_map translations[sz/sizeof(struct ofw_map)]; register_t msr; vm_offset_t off; vm_paddr_t pa_base; int i; bzero(translations, sz); if (OF_getprop(mmu, "translations", translations, sz) == -1) panic("moea64_bootstrap: can't get ofw translations"); CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations"); sz /= sizeof(*translations); qsort(translations, sz, sizeof (*translations), om_cmp); for (i = 0; i < sz; i++) { CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x", (uint32_t)(translations[i].om_pa_lo), translations[i].om_va, translations[i].om_len); if (translations[i].om_pa_lo % PAGE_SIZE) panic("OFW translation not page-aligned!"); pa_base = translations[i].om_pa_lo; #ifdef __powerpc64__ pa_base += (vm_offset_t)translations[i].om_pa_hi << 32; #else if (translations[i].om_pa_hi) panic("OFW translations above 32-bit boundary!"); #endif /* Now enter the pages for this mapping */ DISABLE_TRANS(msr); for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { if (moea64_pvo_find_va(kernel_pmap, translations[i].om_va + off) != NULL) continue; moea64_kenter(mmup, translations[i].om_va + off, pa_base + off); } ENABLE_TRANS(msr); } } #ifdef __powerpc64__ static void moea64_probe_large_page(void) { uint16_t pvr = mfpvr() >> 16; switch (pvr) { case IBM970: case IBM970FX: case IBM970MP: powerpc_sync(); isync(); mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG); powerpc_sync(); isync(); /* FALLTHROUGH */ case IBMCELLBE: moea64_large_page_size = 0x1000000; /* 16 MB */ moea64_large_page_shift = 24; break; default: moea64_large_page_size = 0; } moea64_large_page_mask = moea64_large_page_size - 1; } static void moea64_bootstrap_slb_prefault(vm_offset_t va, int large) { struct slb *cache; struct slb entry; uint64_t esid, slbe; uint64_t i; cache = PCPU_GET(slb); esid = va >> ADDR_SR_SHFT; slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; for (i = 0; i < 64; i++) { if (cache[i].slbe == (slbe | i)) return; } entry.slbe = slbe; entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; if (large) entry.slbv |= SLBV_L; slb_insert_kernel(entry.slbe, entry.slbv); } #endif static void moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { register_t msr; vm_paddr_t pa; vm_offset_t size, off; uint64_t pte_lo; int i; if (moea64_large_page_size == 0) hw_direct_map = 0; DISABLE_TRANS(msr); if (hw_direct_map) { LOCK_TABLE_WR(); PMAP_LOCK(kernel_pmap); for (i = 0; i < pregions_sz; i++) { for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + pregions[i].mr_size; pa += moea64_large_page_size) { pte_lo = LPTE_M; /* * Set memory access as guarded if prefetch within * the page could exit the available physmem area. */ if (pa & moea64_large_page_mask) { pa &= moea64_large_page_mask; pte_lo |= LPTE_G; } if (pa + moea64_large_page_size > pregions[i].mr_start + pregions[i].mr_size) pte_lo |= LPTE_G; moea64_pvo_enter(mmup, kernel_pmap, moea64_upvo_zone, NULL, pa, pa, pte_lo, PVO_WIRED | PVO_LARGE); } } PMAP_UNLOCK(kernel_pmap); UNLOCK_TABLE_WR(); } else { size = sizeof(struct pvo_head) * moea64_pteg_count; off = (vm_offset_t)(moea64_pvo_table); for (pa = off; pa < off + size; pa += PAGE_SIZE) moea64_kenter(mmup, pa, pa); size = BPVO_POOL_SIZE*sizeof(struct pvo_entry); off = (vm_offset_t)(moea64_bpvo_pool); for (pa = off; pa < off + size; pa += PAGE_SIZE) moea64_kenter(mmup, pa, pa); /* * Map certain important things, like ourselves. * * NOTE: We do not map the exception vector space. That code is * used only in real mode, and leaving it unmapped allows us to * catch NULL pointer deferences, instead of making NULL a valid * address. */ for (pa = kernelstart & ~PAGE_MASK; pa < kernelend; pa += PAGE_SIZE) moea64_kenter(mmup, pa, pa); } ENABLE_TRANS(msr); } void moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { int i, j; vm_size_t physsz, hwphyssz; #ifndef __powerpc64__ /* We don't have a direct map since there is no BAT */ hw_direct_map = 0; /* Make sure battable is zero, since we have no BAT */ for (i = 0; i < 16; i++) { battable[i].batu = 0; battable[i].batl = 0; } #else moea64_probe_large_page(); /* Use a direct map if we have large page support */ if (moea64_large_page_size > 0) hw_direct_map = 1; else hw_direct_map = 0; #endif /* Get physical memory regions from firmware */ mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) panic("moea64_bootstrap: phys_avail too small"); phys_avail_count = 0; physsz = 0; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); for (i = 0, j = 0; i < regions_sz; i++, j += 2) { CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start, regions[i].mr_start + regions[i].mr_size, regions[i].mr_size); if (hwphyssz != 0 && (physsz + regions[i].mr_size) >= hwphyssz) { if (physsz < hwphyssz) { phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + hwphyssz - physsz; physsz = hwphyssz; phys_avail_count++; } break; } phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; phys_avail_count++; physsz += regions[i].mr_size; } /* Check for overlap with the kernel and exception vectors */ for (j = 0; j < 2*phys_avail_count; j+=2) { if (phys_avail[j] < EXC_LAST) phys_avail[j] += EXC_LAST; if (kernelstart >= phys_avail[j] && kernelstart < phys_avail[j+1]) { if (kernelend < phys_avail[j+1]) { phys_avail[2*phys_avail_count] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; phys_avail[2*phys_avail_count + 1] = phys_avail[j+1]; phys_avail_count++; } phys_avail[j+1] = kernelstart & ~PAGE_MASK; } if (kernelend >= phys_avail[j] && kernelend < phys_avail[j+1]) { if (kernelstart > phys_avail[j]) { phys_avail[2*phys_avail_count] = phys_avail[j]; phys_avail[2*phys_avail_count + 1] = kernelstart & ~PAGE_MASK; phys_avail_count++; } phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; } } physmem = btoc(physsz); #ifdef PTEGCOUNT moea64_pteg_count = PTEGCOUNT; #else moea64_pteg_count = 0x1000; while (moea64_pteg_count < physmem) moea64_pteg_count <<= 1; moea64_pteg_count >>= 1; #endif /* PTEGCOUNT */ } void moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { vm_size_t size; register_t msr; int i; /* * Set PTEG mask */ moea64_pteg_mask = moea64_pteg_count - 1; /* * Allocate pv/overflow lists. */ size = sizeof(struct pvo_head) * moea64_pteg_count; moea64_pvo_table = (struct pvo_head *)moea64_bootstrap_alloc(size, PAGE_SIZE); CTR1(KTR_PMAP, "moea64_bootstrap: PVO table at %p", moea64_pvo_table); DISABLE_TRANS(msr); for (i = 0; i < moea64_pteg_count; i++) LIST_INIT(&moea64_pvo_table[i]); ENABLE_TRANS(msr); /* * Initialize the lock that synchronizes access to the pteg and pvo * tables. */ rw_init_flags(&moea64_table_lock, "pmap tables", RW_RECURSE); mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); /* * Initialise the unmanaged pvo pool. */ moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0); moea64_bpvo_pool_index = 0; /* * Make sure kernel vsid is allocated as well as VSID 0. */ #ifndef __powerpc64__ moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW] |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); moea64_vsid_bitmap[0] |= 1; #endif /* * Initialize the kernel pmap (which is statically allocated). */ #ifdef __powerpc64__ for (i = 0; i < 64; i++) { pcpup->pc_slb[i].slbv = 0; pcpup->pc_slb[i].slbe = 0; } #else for (i = 0; i < 16; i++) kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; #endif kernel_pmap->pmap_phys = kernel_pmap; CPU_FILL(&kernel_pmap->pm_active); RB_INIT(&kernel_pmap->pmap_pvo); PMAP_LOCK_INIT(kernel_pmap); /* * Now map in all the other buffers we allocated earlier */ moea64_setup_direct_map(mmup, kernelstart, kernelend); } void moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { ihandle_t mmui; phandle_t chosen; phandle_t mmu; size_t sz; int i; vm_offset_t pa, va; void *dpcpu; /* * Set up the Open Firmware pmap and add its mappings if not in real * mode. */ chosen = OF_finddevice("/chosen"); if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1) { mmu = OF_instance_to_package(mmui); if (mmu == -1 || (sz = OF_getproplen(mmu, "translations")) == -1) sz = 0; if (sz > 6144 /* tmpstksz - 2 KB headroom */) panic("moea64_bootstrap: too many ofw translations"); if (sz > 0) moea64_add_ofw_mappings(mmup, mmu, sz); } /* * Calculate the last available physical address. */ for (i = 0; phys_avail[i + 2] != 0; i += 2) ; Maxmem = powerpc_btop(phys_avail[i + 1]); /* * Initialize MMU and remap early physical mappings */ MMU_CPU_BOOTSTRAP(mmup,0); mtmsr(mfmsr() | PSL_DR | PSL_IR); pmap_bootstrapped++; bs_remap_earlyboot(); /* * Set the start and end of kva. */ virtual_avail = VM_MIN_KERNEL_ADDRESS; virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; /* * Map the entire KVA range into the SLB. We must not fault there. */ #ifdef __powerpc64__ for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH) moea64_bootstrap_slb_prefault(va, 0); #endif /* * Figure out how far we can extend virtual_end into segment 16 * without running into existing mappings. Segment 16 is guaranteed * to contain neither RAM nor devices (at least on Apple hardware), * but will generally contain some OFW mappings we should not * step on. */ #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */ PMAP_LOCK(kernel_pmap); while (virtual_end < VM_MAX_KERNEL_ADDRESS && moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL) virtual_end += PAGE_SIZE; PMAP_UNLOCK(kernel_pmap); #endif /* * Allocate a kernel stack with a guard page for thread0 and map it * into the kernel page map. */ pa = moea64_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE); va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; virtual_avail = va + KSTACK_PAGES * PAGE_SIZE; CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va); thread0.td_kstack = va; thread0.td_kstack_pages = KSTACK_PAGES; for (i = 0; i < KSTACK_PAGES; i++) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the message buffer. */ pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE); msgbufp = (struct msgbuf *)virtual_avail; va = virtual_avail; virtual_avail += round_page(msgbufsize); while (va < virtual_avail) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the dynamic percpu area. */ pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); dpcpu = (void *)virtual_avail; va = virtual_avail; virtual_avail += DPCPU_SIZE; while (va < virtual_avail) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } dpcpu_init(dpcpu, 0); /* * Allocate some things for page zeroing. We put this directly * in the page table, marked with LPTE_LOCKED, to avoid any * of the PVO book-keeping or other parts of the VM system * from even knowing that this hack exists. */ if (!hw_direct_map) { mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, MTX_DEF); for (i = 0; i < 2; i++) { moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; virtual_end -= PAGE_SIZE; moea64_kenter(mmup, moea64_scratchpage_va[i], 0); moea64_scratchpage_pvo[i] = moea64_pvo_find_va( kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); LOCK_TABLE_RD(); moea64_scratchpage_pte[i] = MOEA64_PVO_TO_PTE( mmup, moea64_scratchpage_pvo[i]); moea64_scratchpage_pvo[i]->pvo_pte.lpte.pte_hi |= LPTE_LOCKED; MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[i], &moea64_scratchpage_pvo[i]->pvo_pte.lpte, moea64_scratchpage_pvo[i]->pvo_vpn); UNLOCK_TABLE_RD(); } } } /* * Activate a user pmap. The pmap must be activated before its address * space can be accessed in any way. */ void moea64_activate(mmu_t mmu, struct thread *td) { pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; CPU_SET(PCPU_GET(cpuid), &pm->pm_active); #ifdef __powerpc64__ PCPU_SET(userslb, pm->pm_slb); #else PCPU_SET(curpmap, pm->pmap_phys); #endif } void moea64_deactivate(mmu_t mmu, struct thread *td) { pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); #ifdef __powerpc64__ PCPU_SET(userslb, NULL); #else PCPU_SET(curpmap, NULL); #endif } void moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) { struct pvo_entry *pvo; uintptr_t pt; uint64_t vsid; int i, ptegidx; LOCK_TABLE_WR(); PMAP_LOCK(pm); pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); if (pvo != NULL) { pt = MOEA64_PVO_TO_PTE(mmu, pvo); if (wired) { if ((pvo->pvo_vaddr & PVO_WIRED) == 0) pm->pm_stats.wired_count++; pvo->pvo_vaddr |= PVO_WIRED; pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED; } else { if ((pvo->pvo_vaddr & PVO_WIRED) != 0) pm->pm_stats.wired_count--; pvo->pvo_vaddr &= ~PVO_WIRED; pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED; } if (pt != -1) { /* Update wiring flag in page table. */ MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); } else if (wired) { /* * If we are wiring the page, and it wasn't in the * page table before, add it. */ vsid = PVO_VSID(pvo); ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE); i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); if (i >= 0) { PVO_PTEGIDX_CLR(pvo); PVO_PTEGIDX_SET(pvo, i); } } } UNLOCK_TABLE_WR(); PMAP_UNLOCK(pm); } /* * This goes through and sets the physical address of our * special scratch PTE to the PA we want to zero or copy. Because * of locking issues (this can get called in pvo_enter() by * the UMA allocator), we can't use most other utility functions here */ static __inline void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_offset_t pa) { KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo &= ~(LPTE_WIMG | LPTE_RPGN); moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo |= moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[which], &moea64_scratchpage_pvo[which]->pvo_pte.lpte, moea64_scratchpage_pvo[which]->pvo_vpn); isync(); } void moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) { vm_offset_t dst; vm_offset_t src; dst = VM_PAGE_TO_PHYS(mdst); src = VM_PAGE_TO_PHYS(msrc); if (hw_direct_map) { bcopy((void *)src, (void *)dst, PAGE_SIZE); } else { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, src); moea64_set_scratchpage_pa(mmu, 1, dst); bcopy((void *)moea64_scratchpage_va[0], (void *)moea64_scratchpage_va[1], PAGE_SIZE); mtx_unlock(&moea64_scratchpage_mtx); } } void moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) { vm_offset_t pa = VM_PAGE_TO_PHYS(m); if (size + off > PAGE_SIZE) panic("moea64_zero_page: size + off > PAGE_SIZE"); if (hw_direct_map) { bzero((caddr_t)pa + off, size); } else { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, pa); bzero((caddr_t)moea64_scratchpage_va[0] + off, size); mtx_unlock(&moea64_scratchpage_mtx); } } /* * Zero a page of physical memory by temporarily mapping it */ void moea64_zero_page(mmu_t mmu, vm_page_t m) { vm_offset_t pa = VM_PAGE_TO_PHYS(m); vm_offset_t va, off; if (!hw_direct_map) { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, pa); va = moea64_scratchpage_va[0]; } else { va = pa; } for (off = 0; off < PAGE_SIZE; off += cacheline_size) __asm __volatile("dcbz 0,%0" :: "r"(va + off)); if (!hw_direct_map) mtx_unlock(&moea64_scratchpage_mtx); } void moea64_zero_page_idle(mmu_t mmu, vm_page_t m) { moea64_zero_page(mmu, m); } /* * Map the given physical page at the specified virtual address in the * target pmap with the protection requested. If specified the page * will be wired down. */ void moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, boolean_t wired) { struct pvo_head *pvo_head; uma_zone_t zone; vm_page_t pg; uint64_t pte_lo; u_int pvo_flags; int error; if (!moea64_initialized) { pvo_head = NULL; pg = NULL; zone = moea64_upvo_zone; pvo_flags = 0; } else { pvo_head = vm_page_to_pvoh(m); pg = m; zone = moea64_mpvo_zone; pvo_flags = PVO_MANAGED; } KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || VM_OBJECT_LOCKED(m->object), ("moea64_enter: page %p is not busy", m)); /* XXX change the pvo head for fake pages */ if ((m->oflags & VPO_UNMANAGED) != 0) { pvo_flags &= ~PVO_MANAGED; pvo_head = NULL; zone = moea64_upvo_zone; } pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); if (prot & VM_PROT_WRITE) { pte_lo |= LPTE_BW; if (pmap_bootstrapped && (m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_WRITEABLE); } else pte_lo |= LPTE_BR; if ((prot & VM_PROT_EXECUTE) == 0) pte_lo |= LPTE_NOEXEC; if (wired) pvo_flags |= PVO_WIRED; LOCK_TABLE_WR(); PMAP_LOCK(pmap); error = moea64_pvo_enter(mmu, pmap, zone, pvo_head, va, VM_PAGE_TO_PHYS(m), pte_lo, pvo_flags); PMAP_UNLOCK(pmap); UNLOCK_TABLE_WR(); /* * Flush the page from the instruction cache if this page is * mapped executable and cacheable. */ if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) && (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { vm_page_aflag_set(m, PGA_EXECUTABLE); moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); } } static void moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_size_t sz) { /* * This is much trickier than on older systems because * we can't sync the icache on physical addresses directly * without a direct map. Instead we check a couple of cases * where the memory is already mapped in and, failing that, * use the same trick we use for page zeroing to create * a temporary mapping for this physical address. */ if (!pmap_bootstrapped) { /* * If PMAP is not bootstrapped, we are likely to be * in real mode. */ __syncicache((void *)pa, sz); } else if (pmap == kernel_pmap) { __syncicache((void *)va, sz); } else if (hw_direct_map) { __syncicache((void *)pa, sz); } else { /* Use the scratch page to set up a temp mapping */ mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF); __syncicache((void *)(moea64_scratchpage_va[1] + (va & ADDR_POFF)), sz); mtx_unlock(&moea64_scratchpage_mtx); } } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m; vm_pindex_t diff, psize; psize = atop(end - start); m = m_start; while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { moea64_enter(mmu, pm, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); m = TAILQ_NEXT(m, listq); } } void moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot) { moea64_enter(mmu, pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); } vm_paddr_t moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; PMAP_LOCK(pm); pvo = moea64_pvo_find_va(pm, va); if (pvo == NULL) pa = 0; else pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(pm); return (pa); } /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given * protection. */ vm_page_t moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; vm_paddr_t pa; m = NULL; pa = 0; PMAP_LOCK(pmap); retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL && (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) == LPTE_RW || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); vm_page_hold(m); } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } static mmu_t installed_mmu; static void * moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { /* * This entire routine is a horrible hack to avoid bothering kmem * for new KVA addresses. Because this can get called from inside * kmem allocation routines, calling kmem for a new address here * can lead to multiply locking non-recursive mutexes. */ vm_offset_t va; vm_page_t m; int pflags, needed_lock; *flags = UMA_SLAB_PRIV; needed_lock = !PMAP_LOCKED(kernel_pmap); pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; for (;;) { m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); if (m == NULL) { if (wait & M_NOWAIT) return (NULL); VM_WAIT; } else break; } va = VM_PAGE_TO_PHYS(m); LOCK_TABLE_WR(); if (needed_lock) PMAP_LOCK(kernel_pmap); moea64_pvo_enter(installed_mmu, kernel_pmap, moea64_upvo_zone, NULL, va, VM_PAGE_TO_PHYS(m), LPTE_M, PVO_WIRED | PVO_BOOTSTRAP); if (needed_lock) PMAP_UNLOCK(kernel_pmap); UNLOCK_TABLE_WR(); if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) bzero((void *)va, PAGE_SIZE); return (void *)va; } extern int elf32_nxstack; void moea64_init(mmu_t mmu) { CTR0(KTR_PMAP, "moea64_init"); moea64_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); moea64_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); if (!hw_direct_map) { installed_mmu = mmu; uma_zone_set_allocf(moea64_upvo_zone,moea64_uma_page_alloc); uma_zone_set_allocf(moea64_mpvo_zone,moea64_uma_page_alloc); } #ifdef COMPAT_FREEBSD32 elf32_nxstack = 1; #endif moea64_initialized = TRUE; } boolean_t moea64_is_referenced(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_is_referenced: page %p is not managed", m)); return (moea64_query_bit(mmu, m, PTE_REF)); } boolean_t moea64_is_modified(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_is_modified: page %p is not managed", m)); /* * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have LPTE_CHG set. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (moea64_query_bit(mmu, m, LPTE_CHG)); } boolean_t moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) { struct pvo_entry *pvo; boolean_t rv; PMAP_LOCK(pmap); pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); rv = pvo == NULL || (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0; PMAP_UNLOCK(pmap); return (rv); } void moea64_clear_reference(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_clear_reference: page %p is not managed", m)); moea64_clear_bit(mmu, m, LPTE_REF); } void moea64_clear_modify(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_clear_modify: page %p is not managed", m)); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); KASSERT((m->oflags & VPO_BUSY) == 0, ("moea64_clear_modify: page %p is busy", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG * set. If the object containing the page is locked and the page is * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; moea64_clear_bit(mmu, m, LPTE_CHG); } /* * Clear the write and modified bits in each of the given page's mappings. */ void moea64_remove_write(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; uintptr_t pt; pmap_t pmap; uint64_t lo = 0; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_remove_write: page %p is not managed", m)); /* * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PGA_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->aflags & PGA_WRITEABLE) == 0) return; powerpc_sync(); LOCK_TABLE_RD(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) { pt = MOEA64_PVO_TO_PTE(mmu, pvo); pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP; pvo->pvo_pte.lpte.pte_lo |= LPTE_BR; if (pt != -1) { MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); lo |= pvo->pvo_pte.lpte.pte_lo; pvo->pvo_pte.lpte.pte_lo &= ~LPTE_CHG; MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); if (pvo->pvo_pmap == kernel_pmap) isync(); } } if ((lo & LPTE_CHG) != 0) vm_page_dirty(m); PMAP_UNLOCK(pmap); } UNLOCK_TABLE_RD(); vm_page_aflag_clear(m, PGA_WRITEABLE); } /* * moea64_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int moea64_ts_referenced(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_ts_referenced: page %p is not managed", m)); return (moea64_clear_bit(mmu, m, LPTE_REF)); } /* * Modify the WIMG settings of all mappings for a page. */ void moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) { struct pvo_entry *pvo; struct pvo_head *pvo_head; uintptr_t pt; pmap_t pmap; uint64_t lo; if ((m->oflags & VPO_UNMANAGED) != 0) { m->md.mdpg_cache_attrs = ma; return; } pvo_head = vm_page_to_pvoh(m); lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma); LOCK_TABLE_RD(); LIST_FOREACH(pvo, pvo_head, pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); pt = MOEA64_PVO_TO_PTE(mmu, pvo); pvo->pvo_pte.lpte.pte_lo &= ~LPTE_WIMG; pvo->pvo_pte.lpte.pte_lo |= lo; if (pt != -1) { MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); if (pvo->pvo_pmap == kernel_pmap) isync(); } PMAP_UNLOCK(pmap); } UNLOCK_TABLE_RD(); m->md.mdpg_cache_attrs = ma; } /* * Map a wired page into kernel virtual address space. */ void moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) { uint64_t pte_lo; int error; pte_lo = moea64_calc_wimg(pa, ma); LOCK_TABLE_WR(); PMAP_LOCK(kernel_pmap); error = moea64_pvo_enter(mmu, kernel_pmap, moea64_upvo_zone, NULL, va, pa, pte_lo, PVO_WIRED); PMAP_UNLOCK(kernel_pmap); UNLOCK_TABLE_WR(); if (error != 0 && error != ENOENT) panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va, pa, error); } void moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) { moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); } /* * Extract the physical page address associated with the given kernel virtual * address. */ vm_paddr_t moea64_kextract(mmu_t mmu, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; /* * Shortcut the direct-mapped case when applicable. We never put * anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS. */ if (va < VM_MIN_KERNEL_ADDRESS) return (va); PMAP_LOCK(kernel_pmap); pvo = moea64_pvo_find_va(kernel_pmap, va); KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR, va)); pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(kernel_pmap); return (pa); } /* * Remove a wired page from kernel virtual address space. */ void moea64_kremove(mmu_t mmu, vm_offset_t va) { moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); } /* * Map a range of physical addresses into kernel virtual address space. * * The value passed in *virt is a suggested virtual address for the mapping. * Architectures which can support a direct-mapped physical to virtual region * can return the appropriate address within that region, leaving '*virt' * unchanged. We cannot and therefore do not; *virt is updated with the * first usable address after the mapped region. */ vm_offset_t moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, vm_paddr_t pa_end, int prot) { vm_offset_t sva, va; sva = *virt; va = sva; for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) moea64_kenter(mmu, va, pa_start); *virt = va; return (sva); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) { int loops; struct pvo_entry *pvo; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_page_exists_quick: page %p is not managed", m)); loops = 0; rv = FALSE; LOCK_TABLE_RD(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { if (pvo->pvo_pmap == pmap) { rv = TRUE; break; } if (++loops >= 16) break; } UNLOCK_TABLE_RD(); return (rv); } /* * Return the number of managed mappings to the given physical page * that are wired. */ int moea64_page_wired_mappings(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); LOCK_TABLE_RD(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) if ((pvo->pvo_vaddr & PVO_WIRED) != 0) count++; UNLOCK_TABLE_RD(); return (count); } static uintptr_t moea64_vsidcontext; uintptr_t moea64_get_unique_vsid(void) { u_int entropy; register_t hash; uint32_t mask; int i; entropy = 0; __asm __volatile("mftb %0" : "=r"(entropy)); mtx_lock(&moea64_slb_mutex); for (i = 0; i < NVSIDS; i += VSID_NBPW) { u_int n; /* * Create a new value by mutiplying by a prime and adding in * entropy from the timebase register. This is to make the * VSID more random so that the PT hash function collides * less often. (Note that the prime casues gcc to do shifts * instead of a multiply.) */ moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy; hash = moea64_vsidcontext & (NVSIDS - 1); if (hash == 0) /* 0 is special, avoid it */ continue; n = hash >> 5; mask = 1 << (hash & (VSID_NBPW - 1)); hash = (moea64_vsidcontext & VSID_HASHMASK); if (moea64_vsid_bitmap[n] & mask) { /* collision? */ /* anything free in this bucket? */ if (moea64_vsid_bitmap[n] == 0xffffffff) { entropy = (moea64_vsidcontext >> 20); continue; } i = ffs(~moea64_vsid_bitmap[n]) - 1; mask = 1 << i; hash &= VSID_HASHMASK & ~(VSID_NBPW - 1); hash |= i; } KASSERT(!(moea64_vsid_bitmap[n] & mask), ("Allocating in-use VSID %#zx\n", hash)); moea64_vsid_bitmap[n] |= mask; mtx_unlock(&moea64_slb_mutex); return (hash); } mtx_unlock(&moea64_slb_mutex); panic("%s: out of segments",__func__); } #ifdef __powerpc64__ void moea64_pinit(mmu_t mmu, pmap_t pmap) { PMAP_LOCK_INIT(pmap); RB_INIT(&pmap->pmap_pvo); pmap->pm_slb_tree_root = slb_alloc_tree(); pmap->pm_slb = slb_alloc_user_cache(); pmap->pm_slb_len = 0; } #else void moea64_pinit(mmu_t mmu, pmap_t pmap) { int i; uint32_t hash; PMAP_LOCK_INIT(pmap); RB_INIT(&pmap->pmap_pvo); if (pmap_bootstrapped) pmap->pmap_phys = (pmap_t)moea64_kextract(mmu, (vm_offset_t)pmap); else pmap->pmap_phys = pmap; /* * Allocate some segment registers for this pmap. */ hash = moea64_get_unique_vsid(); for (i = 0; i < 16; i++) pmap->pm_sr[i] = VSID_MAKE(i, hash); KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0")); } #endif /* * Initialize the pmap associated with process 0. */ void moea64_pinit0(mmu_t mmu, pmap_t pm) { moea64_pinit(mmu, pm); bzero(&pm->pm_stats, sizeof(pm->pm_stats)); } /* * Set the physical protection on the specified range of this map as requested. */ static void moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot) { uintptr_t pt; struct vm_page *pg; uint64_t oldlo; PMAP_LOCK_ASSERT(pm, MA_OWNED); /* * Grab the PTE pointer before we diddle with the cached PTE * copy. */ pt = MOEA64_PVO_TO_PTE(mmu, pvo); /* * Change the protection of the page. */ oldlo = pvo->pvo_pte.lpte.pte_lo; pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP; pvo->pvo_pte.lpte.pte_lo &= ~LPTE_NOEXEC; if ((prot & VM_PROT_EXECUTE) == 0) pvo->pvo_pte.lpte.pte_lo |= LPTE_NOEXEC; if (prot & VM_PROT_WRITE) pvo->pvo_pte.lpte.pte_lo |= LPTE_BW; else pvo->pvo_pte.lpte.pte_lo |= LPTE_BR; pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); /* * If the PVO is in the page table, update that pte as well. */ if (pt != -1) MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) && (pvo->pvo_pte.lpte.pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { if ((pg->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(pg, PGA_EXECUTABLE); moea64_syncicache(mmu, pm, PVO_VADDR(pvo), pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, PAGE_SIZE); } /* * Update vm about the REF/CHG bits if the page is managed and we have * removed write access. */ if ((pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED && (oldlo & LPTE_PP) != LPTE_BR && !(prot && VM_PROT_WRITE)) { if (pg != NULL) { if (pvo->pvo_pte.lpte.pte_lo & LPTE_CHG) vm_page_dirty(pg); if (pvo->pvo_pte.lpte.pte_lo & LPTE_REF) vm_page_aflag_set(pg, PGA_REFERENCED); } } } void moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { struct pvo_entry *pvo, *tpvo, key; CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, sva, eva, prot); KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, ("moea64_protect: non current pmap")); if ((prot & VM_PROT_READ) == VM_PROT_NONE) { moea64_remove(mmu, pm, sva, eva); return; } LOCK_TABLE_RD(); PMAP_LOCK(pm); key.pvo_vaddr = sva; for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); moea64_pvo_protect(mmu, pm, pvo, prot); } UNLOCK_TABLE_RD(); PMAP_UNLOCK(pm); } /* * Map a list of wired pages into kernel virtual address space. This is * intended for temporary mappings which do not need page modification or * references recorded. Existing mappings in the region are overwritten. */ void moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count) { while (count-- > 0) { moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); va += PAGE_SIZE; m++; } } /* * Remove page mappings from kernel virtual address space. Intended for * temporary mappings entered by moea64_qenter. */ void moea64_qremove(mmu_t mmu, vm_offset_t va, int count) { while (count-- > 0) { moea64_kremove(mmu, va); va += PAGE_SIZE; } } void moea64_release_vsid(uint64_t vsid) { int idx, mask; mtx_lock(&moea64_slb_mutex); idx = vsid & (NVSIDS-1); mask = 1 << (idx % VSID_NBPW); idx /= VSID_NBPW; KASSERT(moea64_vsid_bitmap[idx] & mask, ("Freeing unallocated VSID %#jx", vsid)); moea64_vsid_bitmap[idx] &= ~mask; mtx_unlock(&moea64_slb_mutex); } void moea64_release(mmu_t mmu, pmap_t pmap) { /* * Free segment registers' VSIDs */ #ifdef __powerpc64__ slb_free_tree(pmap); slb_free_user_cache(pmap->pm_slb); #else KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0")); moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0])); #endif PMAP_LOCK_DESTROY(pmap); } /* * Remove all pages mapped by the specified pmap */ void moea64_remove_pages(mmu_t mmu, pmap_t pm) { struct pvo_entry *pvo, *tpvo; LOCK_TABLE_WR(); PMAP_LOCK(pm); RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) { if (!(pvo->pvo_vaddr & PVO_WIRED)) moea64_pvo_remove(mmu, pvo); } UNLOCK_TABLE_WR(); PMAP_UNLOCK(pm); } /* * Remove the given range of addresses from the specified map. */ void moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct pvo_entry *pvo, *tpvo, key; /* * Perform an unsynchronized read. This is, however, safe. */ if (pm->pm_stats.resident_count == 0) return; LOCK_TABLE_WR(); PMAP_LOCK(pm); key.pvo_vaddr = sva; for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); moea64_pvo_remove(mmu, pvo); } UNLOCK_TABLE_WR(); PMAP_UNLOCK(pm); } /* * Remove physical page from all pmaps in which it resides. moea64_pvo_remove() * will reflect changes in pte's back to the vm_page. */ void moea64_remove_all(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo, *next_pvo; pmap_t pmap; LOCK_TABLE_WR(); LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); moea64_pvo_remove(mmu, pvo); PMAP_UNLOCK(pmap); } UNLOCK_TABLE_WR(); if ((m->aflags & PGA_WRITEABLE) && moea64_is_modified(mmu, m)) vm_page_dirty(m); vm_page_aflag_clear(m, PGA_WRITEABLE); vm_page_aflag_clear(m, PGA_EXECUTABLE); } /* * Allocate a physical page of memory directly from the phys_avail map. * Can only be called from moea64_bootstrap before avail start and end are * calculated. */ vm_offset_t moea64_bootstrap_alloc(vm_size_t size, u_int align) { vm_offset_t s, e; int i, j; size = round_page(size); for (i = 0; phys_avail[i + 1] != 0; i += 2) { if (align != 0) s = (phys_avail[i] + align - 1) & ~(align - 1); else s = phys_avail[i]; e = s + size; if (s < phys_avail[i] || e > phys_avail[i + 1]) continue; if (s + size > platform_real_maxaddr()) continue; if (s == phys_avail[i]) { phys_avail[i] += size; } else if (e == phys_avail[i + 1]) { phys_avail[i + 1] -= size; } else { for (j = phys_avail_count * 2; j > i; j -= 2) { phys_avail[j] = phys_avail[j - 2]; phys_avail[j + 1] = phys_avail[j - 1]; } phys_avail[i + 3] = phys_avail[i + 1]; phys_avail[i + 1] = s; phys_avail[i + 2] = e; phys_avail_count++; } return (s); } panic("moea64_bootstrap_alloc: could not allocate memory"); } static int moea64_pvo_enter(mmu_t mmu, pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, vm_offset_t va, vm_offset_t pa, uint64_t pte_lo, int flags) { struct pvo_entry *pvo; uint64_t vsid; int first; u_int ptegidx; int i; int bootstrap; /* * One nasty thing that can happen here is that the UMA calls to * allocate new PVOs need to map more memory, which calls pvo_enter(), * which calls UMA... * * We break the loop by detecting recursion and allocating out of * the bootstrap pool. */ first = 0; bootstrap = (flags & PVO_BOOTSTRAP); if (!moea64_initialized) bootstrap = 1; PMAP_LOCK_ASSERT(pm, MA_OWNED); rw_assert(&moea64_table_lock, RA_WLOCKED); /* * Compute the PTE Group index. */ va &= ~ADDR_POFF; vsid = va_to_vsid(pm, va); ptegidx = va_to_pteg(vsid, va, flags & PVO_LARGE); /* * Remove any existing mapping for this page. Reuse the pvo entry if * there is a mapping. */ moea64_pvo_enter_calls++; LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) { if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) { if ((pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) == pa && (pvo->pvo_pte.lpte.pte_lo & (LPTE_NOEXEC | LPTE_PP)) == (pte_lo & (LPTE_NOEXEC | LPTE_PP))) { if (!(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID)) { /* Re-insert if spilled */ i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); if (i >= 0) PVO_PTEGIDX_SET(pvo, i); moea64_pte_overflow--; } return (0); } moea64_pvo_remove(mmu, pvo); break; } } /* * If we aren't overwriting a mapping, try to allocate. */ if (bootstrap) { if (moea64_bpvo_pool_index >= BPVO_POOL_SIZE) { panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd", moea64_bpvo_pool_index, BPVO_POOL_SIZE, BPVO_POOL_SIZE * sizeof(struct pvo_entry)); } pvo = &moea64_bpvo_pool[moea64_bpvo_pool_index]; moea64_bpvo_pool_index++; bootstrap = 1; } else { pvo = uma_zalloc(zone, M_NOWAIT); } if (pvo == NULL) return (ENOMEM); moea64_pvo_entries++; pvo->pvo_vaddr = va; pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT) | (vsid << 16); pvo->pvo_pmap = pm; LIST_INSERT_HEAD(&moea64_pvo_table[ptegidx], pvo, pvo_olink); pvo->pvo_vaddr &= ~ADDR_POFF; if (flags & PVO_WIRED) pvo->pvo_vaddr |= PVO_WIRED; if (pvo_head != NULL) pvo->pvo_vaddr |= PVO_MANAGED; if (bootstrap) pvo->pvo_vaddr |= PVO_BOOTSTRAP; if (flags & PVO_LARGE) pvo->pvo_vaddr |= PVO_LARGE; moea64_pte_create(&pvo->pvo_pte.lpte, vsid, va, (uint64_t)(pa) | pte_lo, flags); /* * Add to pmap list */ RB_INSERT(pvo_tree, &pm->pmap_pvo, pvo); /* * Remember if the list was empty and therefore will be the first * item. */ if (pvo_head != NULL) { if (LIST_FIRST(pvo_head) == NULL) first = 1; LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); } if (pvo->pvo_vaddr & PVO_WIRED) { pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED; pm->pm_stats.wired_count++; } pm->pm_stats.resident_count++; /* * We hope this succeeds but it isn't required. */ i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); if (i >= 0) { PVO_PTEGIDX_SET(pvo, i); } else { panic("moea64_pvo_enter: overflow"); moea64_pte_overflow++; } if (pm == kernel_pmap) isync(); #ifdef __powerpc64__ /* * Make sure all our bootstrap mappings are in the SLB as soon * as virtual memory is switched on. */ if (!pmap_bootstrapped) moea64_bootstrap_slb_prefault(va, flags & PVO_LARGE); #endif return (first ? ENOENT : 0); } static void moea64_pvo_remove(mmu_t mmu, struct pvo_entry *pvo) { struct vm_page *pg; uintptr_t pt; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); rw_assert(&moea64_table_lock, RA_WLOCKED); /* * If there is an active pte entry, we need to deactivate it (and * save the ref & cfg bits). */ pt = MOEA64_PVO_TO_PTE(mmu, pvo); if (pt != -1) { MOEA64_PTE_UNSET(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); PVO_PTEGIDX_CLR(pvo); } else { moea64_pte_overflow--; } /* * Update our statistics. */ pvo->pvo_pmap->pm_stats.resident_count--; if (pvo->pvo_vaddr & PVO_WIRED) pvo->pvo_pmap->pm_stats.wired_count--; /* * Remove this PVO from the pmap list. */ RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); /* * Remove this from the overflow list and return it to the pool * if we aren't going to reuse it. */ LIST_REMOVE(pvo, pvo_olink); /* * Update vm about the REF/CHG bits if the page is managed. */ pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); if ((pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED && pg != NULL) { LIST_REMOVE(pvo, pvo_vlink); if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) { if (pvo->pvo_pte.lpte.pte_lo & LPTE_CHG) vm_page_dirty(pg); if (pvo->pvo_pte.lpte.pte_lo & LPTE_REF) vm_page_aflag_set(pg, PGA_REFERENCED); if (LIST_EMPTY(vm_page_to_pvoh(pg))) vm_page_aflag_clear(pg, PGA_WRITEABLE); } if (LIST_EMPTY(vm_page_to_pvoh(pg))) vm_page_aflag_clear(pg, PGA_EXECUTABLE); } moea64_pvo_entries--; moea64_pvo_remove_calls++; if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) uma_zfree((pvo->pvo_vaddr & PVO_MANAGED) ? moea64_mpvo_zone : moea64_upvo_zone, pvo); } static struct pvo_entry * moea64_pvo_find_va(pmap_t pm, vm_offset_t va) { struct pvo_entry key; key.pvo_vaddr = va & ~ADDR_POFF; return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key)); } static boolean_t moea64_query_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) { struct pvo_entry *pvo; uintptr_t pt; LOCK_TABLE_RD(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { /* * See if we saved the bit off. If so, return success. */ if (pvo->pvo_pte.lpte.pte_lo & ptebit) { UNLOCK_TABLE_RD(); return (TRUE); } } /* * No luck, now go through the hard part of looking at the PTEs * themselves. Sync so that any pending REF/CHG bits are flushed to * the PTEs. */ powerpc_sync(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { /* * See if this pvo has a valid PTE. if so, fetch the * REF/CHG bits from the valid PTE. If the appropriate * ptebit is set, return success. */ PMAP_LOCK(pvo->pvo_pmap); pt = MOEA64_PVO_TO_PTE(mmu, pvo); if (pt != -1) { MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); if (pvo->pvo_pte.lpte.pte_lo & ptebit) { PMAP_UNLOCK(pvo->pvo_pmap); UNLOCK_TABLE_RD(); return (TRUE); } } PMAP_UNLOCK(pvo->pvo_pmap); } UNLOCK_TABLE_RD(); return (FALSE); } static u_int moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) { u_int count; struct pvo_entry *pvo; uintptr_t pt; /* * Sync so that any pending REF/CHG bits are flushed to the PTEs (so * we can reset the right ones). note that since the pvo entries and * list heads are accessed via BAT0 and are never placed in the page * table, we don't have to worry about further accesses setting the * REF/CHG bits. */ powerpc_sync(); /* * For each pvo entry, clear the pvo's ptebit. If this pvo has a * valid pte clear the ptebit from the valid pte. */ count = 0; LOCK_TABLE_RD(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { PMAP_LOCK(pvo->pvo_pmap); pt = MOEA64_PVO_TO_PTE(mmu, pvo); if (pt != -1) { MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); if (pvo->pvo_pte.lpte.pte_lo & ptebit) { count++; MOEA64_PTE_CLEAR(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn, ptebit); } } pvo->pvo_pte.lpte.pte_lo &= ~ptebit; PMAP_UNLOCK(pvo->pvo_pmap); } UNLOCK_TABLE_RD(); return (count); } boolean_t moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { struct pvo_entry *pvo, key; vm_offset_t ppa; int error = 0; PMAP_LOCK(kernel_pmap); key.pvo_vaddr = ppa = pa & ~ADDR_POFF; for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key); ppa < pa + size; ppa += PAGE_SIZE, pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) { if (pvo == NULL || (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) != ppa) { error = EFAULT; break; } } PMAP_UNLOCK(kernel_pmap); return (error); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * moea64_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma) { vm_offset_t va, tmpva, ppa, offset; ppa = trunc_page(pa); offset = pa & PAGE_MASK; size = roundup2(offset + size, PAGE_SIZE); va = kmem_alloc_nofault(kernel_map, size); if (!va) panic("moea64_mapdev: Couldn't alloc kernel virtual memory"); for (tmpva = va; size > 0;) { moea64_kenter_attr(mmu, tmpva, ppa, ma); size -= PAGE_SIZE; tmpva += PAGE_SIZE; ppa += PAGE_SIZE; } return ((void *)(va + offset)); } void * moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT); } void moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) { vm_offset_t base, offset; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup2(offset + size, PAGE_SIZE); kmem_free(kernel_map, base, size); } void moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) { struct pvo_entry *pvo; vm_offset_t lim; vm_paddr_t pa; vm_size_t len; PMAP_LOCK(pm); while (sz > 0) { lim = round_page(va); len = MIN(lim - va, sz); pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_pte.lpte.pte_lo & LPTE_I)) { pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | (va & ADDR_POFF); moea64_syncicache(mmu, pm, va, pa, len); } va += len; sz -= len; } PMAP_UNLOCK(pm); } diff --git a/sys/powerpc/aim/moea64_native.c b/sys/powerpc/aim/moea64_native.c index 3da2f5ad3f9e..de0d4ba250fe 100644 --- a/sys/powerpc/aim/moea64_native.c +++ b/sys/powerpc/aim/moea64_native.c @@ -1,634 +1,633 @@ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Matt Thomas of Allegro Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ */ /*- * Copyright (C) 2001 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Native 64-bit page table operations for running without a hypervisor. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include "mmu_oea64.h" #include "mmu_if.h" #include "moea64_if.h" #define PTESYNC() __asm __volatile("ptesync"); #define TLBSYNC() __asm __volatile("tlbsync; ptesync"); #define SYNC() __asm __volatile("sync"); #define EIEIO() __asm __volatile("eieio"); #define VSID_HASH_MASK 0x0000007fffffffffULL static __inline void TLBIE(uint64_t vpn) { #ifndef __powerpc64__ register_t vpn_hi, vpn_lo; register_t msr; register_t scratch, intr; #endif static volatile u_int tlbie_lock = 0; vpn <<= ADDR_PIDX_SHFT; vpn &= ~(0xffffULL << 48); /* Hobo spinlock: we need stronger guarantees than mutexes provide */ while (!atomic_cmpset_int(&tlbie_lock, 0, 1)); isync(); /* Flush instruction queue once lock acquired */ #ifdef __powerpc64__ __asm __volatile("tlbie %0" :: "r"(vpn) : "memory"); __asm __volatile("eieio; tlbsync; ptesync" ::: "memory"); #else vpn_hi = (uint32_t)(vpn >> 32); vpn_lo = (uint32_t)vpn; intr = intr_disable(); __asm __volatile("\ mfmsr %0; \ mr %1, %0; \ insrdi %1,%5,1,0; \ mtmsrd %1; isync; \ \ sld %1,%2,%4; \ or %1,%1,%3; \ tlbie %1; \ \ mtmsrd %0; isync; \ eieio; \ tlbsync; \ ptesync;" : "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1) : "memory"); intr_restore(intr); #endif /* No barriers or special ops -- taken care of by ptesync above */ tlbie_lock = 0; } #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) #define ENABLE_TRANS(msr) mtmsr(msr) /* * PTEG data. */ static struct lpteg *moea64_pteg_table; /* * PTE calls. */ static int moea64_pte_insert_native(mmu_t, u_int, struct lpte *); static uintptr_t moea64_pvo_to_pte_native(mmu_t, const struct pvo_entry *); static void moea64_pte_synch_native(mmu_t, uintptr_t pt, struct lpte *pvo_pt); static void moea64_pte_clear_native(mmu_t, uintptr_t pt, struct lpte *pvo_pt, uint64_t vpn, uint64_t ptebit); static void moea64_pte_change_native(mmu_t, uintptr_t pt, struct lpte *pvo_pt, uint64_t vpn); static void moea64_pte_unset_native(mmu_t mmu, uintptr_t pt, struct lpte *pvo_pt, uint64_t vpn); /* * Utility routines. */ static void moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend); static void moea64_cpu_bootstrap_native(mmu_t, int ap); static void tlbia(void); static mmu_method_t moea64_native_methods[] = { /* Internal interfaces */ MMUMETHOD(mmu_bootstrap, moea64_bootstrap_native), MMUMETHOD(mmu_cpu_bootstrap, moea64_cpu_bootstrap_native), MMUMETHOD(moea64_pte_synch, moea64_pte_synch_native), MMUMETHOD(moea64_pte_clear, moea64_pte_clear_native), MMUMETHOD(moea64_pte_unset, moea64_pte_unset_native), MMUMETHOD(moea64_pte_change, moea64_pte_change_native), MMUMETHOD(moea64_pte_insert, moea64_pte_insert_native), MMUMETHOD(moea64_pvo_to_pte, moea64_pvo_to_pte_native), { 0, 0 } }; MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods, 0, oea64_mmu); static __inline u_int va_to_pteg(uint64_t vsid, vm_offset_t addr, int large) { uint64_t hash; int shift; shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT; hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >> shift); return (hash & moea64_pteg_mask); } static void moea64_pte_synch_native(mmu_t mmu, uintptr_t pt_cookie, struct lpte *pvo_pt) { struct lpte *pt = (struct lpte *)pt_cookie; pvo_pt->pte_lo |= pt->pte_lo & (LPTE_REF | LPTE_CHG); } static void moea64_pte_clear_native(mmu_t mmu, uintptr_t pt_cookie, struct lpte *pvo_pt, uint64_t vpn, uint64_t ptebit) { struct lpte *pt = (struct lpte *)pt_cookie; /* * As shown in Section 7.6.3.2.3 */ pt->pte_lo &= ~ptebit; critical_enter(); TLBIE(vpn); critical_exit(); } static void moea64_pte_set_native(struct lpte *pt, struct lpte *pvo_pt) { pvo_pt->pte_hi |= LPTE_VALID; /* * Update the PTE as defined in section 7.6.3.1. * Note that the REF/CHG bits are from pvo_pt and thus should have * been saved so this routine can restore them (if desired). */ pt->pte_lo = pvo_pt->pte_lo; EIEIO(); pt->pte_hi = pvo_pt->pte_hi; PTESYNC(); /* Keep statistics for unlocked pages */ if (!(pvo_pt->pte_hi & LPTE_LOCKED)) moea64_pte_valid++; } static void moea64_pte_unset_native(mmu_t mmu, uintptr_t pt_cookie, struct lpte *pvo_pt, uint64_t vpn) { struct lpte *pt = (struct lpte *)pt_cookie; /* * Invalidate the pte. */ isync(); critical_enter(); pvo_pt->pte_hi &= ~LPTE_VALID; pt->pte_hi &= ~LPTE_VALID; PTESYNC(); TLBIE(vpn); critical_exit(); /* * Save the reg & chg bits. */ moea64_pte_synch_native(mmu, pt_cookie, pvo_pt); /* Keep statistics for unlocked pages */ if (!(pvo_pt->pte_hi & LPTE_LOCKED)) moea64_pte_valid--; } static void moea64_pte_change_native(mmu_t mmu, uintptr_t pt, struct lpte *pvo_pt, uint64_t vpn) { /* * Invalidate the PTE */ moea64_pte_unset_native(mmu, pt, pvo_pt, vpn); moea64_pte_set_native((struct lpte *)pt, pvo_pt); } static void moea64_cpu_bootstrap_native(mmu_t mmup, int ap) { int i = 0; #ifdef __powerpc64__ struct slb *slb = PCPU_GET(slb); register_t seg0; #endif /* * Initialize segment registers and MMU */ mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); /* * Install kernel SLB entries */ #ifdef __powerpc64__ __asm __volatile ("slbia"); __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); for (i = 0; i < 64; i++) { if (!(slb[i].slbe & SLBE_VALID)) continue; __asm __volatile ("slbmte %0, %1" :: "r"(slb[i].slbv), "r"(slb[i].slbe)); } #else for (i = 0; i < 16; i++) mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); #endif /* * Install page table */ __asm __volatile ("ptesync; mtsdr1 %0; isync" :: "r"((uintptr_t)moea64_pteg_table | (uintptr_t)(flsl(moea64_pteg_mask >> 11)))); tlbia(); } static void moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { vm_size_t size; vm_offset_t off; vm_paddr_t pa; register_t msr; moea64_early_bootstrap(mmup, kernelstart, kernelend); /* * Allocate PTEG table. */ size = moea64_pteg_count * sizeof(struct lpteg); CTR2(KTR_PMAP, "moea64_bootstrap: %d PTEGs, %d bytes", moea64_pteg_count, size); /* * We now need to allocate memory. This memory, to be allocated, * has to reside in a page table. The page table we are about to * allocate. We don't have BAT. So drop to data real mode for a minute * as a measure of last resort. We do this a couple times. */ moea64_pteg_table = (struct lpteg *)moea64_bootstrap_alloc(size, size); DISABLE_TRANS(msr); bzero((void *)moea64_pteg_table, moea64_pteg_count * sizeof(struct lpteg)); ENABLE_TRANS(msr); CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table); moea64_mid_bootstrap(mmup, kernelstart, kernelend); /* * Add a mapping for the page table itself if there is no direct map. */ if (!hw_direct_map) { size = moea64_pteg_count * sizeof(struct lpteg); off = (vm_offset_t)(moea64_pteg_table); DISABLE_TRANS(msr); for (pa = off; pa < off + size; pa += PAGE_SIZE) pmap_kenter(pa, pa); ENABLE_TRANS(msr); } /* Bring up virtual memory */ moea64_late_bootstrap(mmup, kernelstart, kernelend); } static void tlbia(void) { vm_offset_t i; #ifndef __powerpc64__ register_t msr, scratch; #endif TLBSYNC(); for (i = 0; i < 0xFF000; i += 0x00001000) { #ifdef __powerpc64__ __asm __volatile("tlbiel %0" :: "r"(i)); #else __asm __volatile("\ mfmsr %0; \ mr %1, %0; \ insrdi %1,%3,1,0; \ mtmsrd %1; \ isync; \ \ tlbiel %2; \ \ mtmsrd %0; \ isync;" : "=r"(msr), "=r"(scratch) : "r"(i), "r"(1)); #endif } EIEIO(); TLBSYNC(); } static uintptr_t moea64_pvo_to_pte_native(mmu_t mmu, const struct pvo_entry *pvo) { struct lpte *pt; int pteidx, ptegidx; uint64_t vsid; /* If the PTEG index is not set, then there is no page table entry */ if (!PVO_PTEGIDX_ISSET(pvo)) return (-1); /* * Calculate the ptegidx */ vsid = PVO_VSID(pvo); ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE); /* * We can find the actual pte entry without searching by grabbing * the PTEG index from 3 unused bits in pvo_vaddr and by * noticing the HID bit. */ if (pvo->pvo_pte.lpte.pte_hi & LPTE_HID) ptegidx ^= moea64_pteg_mask; pteidx = (ptegidx << 3) | PVO_PTEGIDX_GET(pvo); if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && !PVO_PTEGIDX_ISSET(pvo)) { panic("moea64_pvo_to_pte: pvo %p has valid pte in pvo but no " "valid pte index", pvo); } if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0 && PVO_PTEGIDX_ISSET(pvo)) { panic("moea64_pvo_to_pte: pvo %p has valid pte index in pvo " "pvo but no valid pte", pvo); } pt = &moea64_pteg_table[pteidx >> 3].pt[pteidx & 7]; if ((pt->pte_hi ^ (pvo->pvo_pte.lpte.pte_hi & ~LPTE_VALID)) == LPTE_VALID) { if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0) { panic("moea64_pvo_to_pte: pvo %p has valid pte in " "moea64_pteg_table %p but invalid in pvo", pvo, pt); } if (((pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo) & ~(LPTE_M|LPTE_CHG|LPTE_REF)) != 0) { panic("moea64_pvo_to_pte: pvo %p pte does not match " "pte %p in moea64_pteg_table difference is %#x", pvo, pt, (uint32_t)(pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo)); } return ((uintptr_t)pt); } if (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) { panic("moea64_pvo_to_pte: pvo %p has invalid pte %p in " "moea64_pteg_table but valid in pvo", pvo, pt); } return (-1); } static __inline int moea64_pte_spillable_ident(u_int ptegidx) { struct lpte *pt; int i, j, k; /* Start at a random slot */ i = mftb() % 8; k = -1; for (j = 0; j < 8; j++) { pt = &moea64_pteg_table[ptegidx].pt[(i + j) % 8]; if (pt->pte_hi & (LPTE_LOCKED | LPTE_WIRED)) continue; /* This is a candidate, so remember it */ k = (i + j) % 8; /* Try to get a page that has not been used lately */ if (!(pt->pte_lo & LPTE_REF)) return (k); } return (k); } static int moea64_pte_insert_native(mmu_t mmu, u_int ptegidx, struct lpte *pvo_pt) { struct lpte *pt; struct pvo_entry *pvo; u_int pteg_bktidx; int i; /* * First try primary hash. */ pteg_bktidx = ptegidx; for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) { if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) { pvo_pt->pte_hi &= ~LPTE_HID; moea64_pte_set_native(pt, pvo_pt); return (i); } } /* * Now try secondary hash. */ pteg_bktidx ^= moea64_pteg_mask; for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) { if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) { pvo_pt->pte_hi |= LPTE_HID; moea64_pte_set_native(pt, pvo_pt); return (i); } } /* * Out of luck. Find a PTE to sacrifice. */ pteg_bktidx = ptegidx; i = moea64_pte_spillable_ident(pteg_bktidx); if (i < 0) { pteg_bktidx ^= moea64_pteg_mask; i = moea64_pte_spillable_ident(pteg_bktidx); } if (i < 0) { /* No freeable slots in either PTEG? We're hosed. */ panic("moea64_pte_insert: overflow"); return (-1); } if (pteg_bktidx == ptegidx) pvo_pt->pte_hi &= ~LPTE_HID; else pvo_pt->pte_hi |= LPTE_HID; /* * Synchronize the sacrifice PTE with its PVO, then mark both * invalid. The PVO will be reused when/if the VM system comes * here after a fault. */ pt = &moea64_pteg_table[pteg_bktidx].pt[i]; if (pt->pte_hi & LPTE_HID) pteg_bktidx ^= moea64_pteg_mask; /* PTEs indexed by primary */ LIST_FOREACH(pvo, &moea64_pvo_table[pteg_bktidx], pvo_olink) { if (pvo->pvo_pte.lpte.pte_hi == pt->pte_hi) { KASSERT(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID, ("Invalid PVO for valid PTE!")); moea64_pte_unset_native(mmu, (uintptr_t)pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); PVO_PTEGIDX_CLR(pvo); moea64_pte_overflow++; break; } } KASSERT(pvo->pvo_pte.lpte.pte_hi == pt->pte_hi, ("Unable to find PVO for spilled PTE")); /* * Set the new PTE. */ moea64_pte_set_native(pt, pvo_pt); return (i); } diff --git a/sys/powerpc/ps3/mmu_ps3.c b/sys/powerpc/ps3/mmu_ps3.c index 5a7fe93af3bc..dfca9a7d7725 100644 --- a/sys/powerpc/ps3/mmu_ps3.c +++ b/sys/powerpc/ps3/mmu_ps3.c @@ -1,311 +1,310 @@ /*- * Copyright (C) 2010 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include "mmu_if.h" #include "moea64_if.h" #include "ps3-hvcall.h" #define VSID_HASH_MASK 0x0000007fffffffffUL #define PTESYNC() __asm __volatile("ptesync") extern int ps3fb_remap(void); static uint64_t mps3_vas_id; /* * Kernel MMU interface */ static void mps3_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend); static void mps3_cpu_bootstrap(mmu_t mmup, int ap); static void mps3_pte_synch(mmu_t, uintptr_t pt, struct lpte *pvo_pt); static void mps3_pte_clear(mmu_t, uintptr_t pt, struct lpte *pvo_pt, uint64_t vpn, uint64_t ptebit); static void mps3_pte_unset(mmu_t, uintptr_t pt, struct lpte *pvo_pt, uint64_t vpn); static void mps3_pte_change(mmu_t, uintptr_t pt, struct lpte *pvo_pt, uint64_t vpn); static int mps3_pte_insert(mmu_t, u_int ptegidx, struct lpte *pvo_pt); static uintptr_t mps3_pvo_to_pte(mmu_t, const struct pvo_entry *pvo); static mmu_method_t mps3_methods[] = { MMUMETHOD(mmu_bootstrap, mps3_bootstrap), MMUMETHOD(mmu_cpu_bootstrap, mps3_cpu_bootstrap), MMUMETHOD(moea64_pte_synch, mps3_pte_synch), MMUMETHOD(moea64_pte_clear, mps3_pte_clear), MMUMETHOD(moea64_pte_unset, mps3_pte_unset), MMUMETHOD(moea64_pte_change, mps3_pte_change), MMUMETHOD(moea64_pte_insert, mps3_pte_insert), MMUMETHOD(moea64_pvo_to_pte, mps3_pvo_to_pte), { 0, 0 } }; MMU_DEF_INHERIT(ps3_mmu, "mmu_ps3", mps3_methods, 0, oea64_mmu); static void mps3_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { uint64_t final_pteg_count; moea64_early_bootstrap(mmup, kernelstart, kernelend); lv1_construct_virtual_address_space( 20 /* log_2(moea64_pteg_count) */, 2 /* n page sizes */, (24UL << 56) | (16UL << 48) /* page sizes 16 MB + 64 KB */, &mps3_vas_id, &final_pteg_count ); moea64_pteg_count = final_pteg_count / sizeof(struct lpteg); moea64_mid_bootstrap(mmup, kernelstart, kernelend); moea64_late_bootstrap(mmup, kernelstart, kernelend); } static void mps3_cpu_bootstrap(mmu_t mmup, int ap) { struct slb *slb = PCPU_GET(slb); register_t seg0; int i; mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); /* * Destroy the loader's address space if we are coming up for * the first time, and redo the FB mapping so we can continue * having a console. */ if (!ap) lv1_destruct_virtual_address_space(0); lv1_select_virtual_address_space(mps3_vas_id); if (!ap) ps3fb_remap(); /* * Install kernel SLB entries */ __asm __volatile ("slbia"); __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); for (i = 0; i < 64; i++) { if (!(slb[i].slbe & SLBE_VALID)) continue; __asm __volatile ("slbmte %0, %1" :: "r"(slb[i].slbv), "r"(slb[i].slbe)); } } static void mps3_pte_synch(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt) { uint64_t halfbucket[4], rcbits; PTESYNC(); lv1_read_htab_entries(mps3_vas_id, slot & ~0x3UL, &halfbucket[0], &halfbucket[1], &halfbucket[2], &halfbucket[3], &rcbits); /* * rcbits contains the low 12 bits of each PTEs 2nd part, * spaced at 16-bit intervals */ KASSERT((halfbucket[slot & 0x3] & LPTE_AVPN_MASK) == (pvo_pt->pte_hi & LPTE_AVPN_MASK), ("PTE upper word %#lx != %#lx\n", halfbucket[slot & 0x3], pvo_pt->pte_hi)); pvo_pt->pte_lo |= (rcbits >> ((3 - (slot & 0x3))*16)) & (LPTE_CHG | LPTE_REF); } static void mps3_pte_clear(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn, u_int64_t ptebit) { lv1_write_htab_entry(mps3_vas_id, slot, pvo_pt->pte_hi, pvo_pt->pte_lo & ~ptebit); } static void mps3_pte_unset(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn) { mps3_pte_synch(mmu, slot, pvo_pt); pvo_pt->pte_hi &= ~LPTE_VALID; lv1_write_htab_entry(mps3_vas_id, slot, 0, 0); moea64_pte_valid--; } static void mps3_pte_change(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn) { mps3_pte_synch(mmu, slot, pvo_pt); lv1_write_htab_entry(mps3_vas_id, slot, pvo_pt->pte_hi, pvo_pt->pte_lo); } static int mps3_pte_insert(mmu_t mmu, u_int ptegidx, struct lpte *pvo_pt) { int result; struct lpte evicted; struct pvo_entry *pvo; uint64_t index; pvo_pt->pte_hi |= LPTE_VALID; pvo_pt->pte_hi &= ~LPTE_HID; evicted.pte_hi = 0; PTESYNC(); result = lv1_insert_htab_entry(mps3_vas_id, ptegidx << 3, pvo_pt->pte_hi, pvo_pt->pte_lo, LPTE_LOCKED | LPTE_WIRED, 0, &index, &evicted.pte_hi, &evicted.pte_lo); if (result != 0) { /* No freeable slots in either PTEG? We're hosed. */ panic("mps3_pte_insert: overflow (%d)", result); return (-1); } /* * See where we ended up. */ if (index >> 3 != ptegidx) pvo_pt->pte_hi |= LPTE_HID; moea64_pte_valid++; if (!evicted.pte_hi) return (index & 0x7); /* * Synchronize the sacrifice PTE with its PVO, then mark both * invalid. The PVO will be reused when/if the VM system comes * here after a fault. */ ptegidx = index >> 3; /* Where the sacrifice PTE was found */ if (evicted.pte_hi & LPTE_HID) ptegidx ^= moea64_pteg_mask; /* PTEs indexed by primary */ KASSERT((evicted.pte_hi & (LPTE_WIRED | LPTE_LOCKED)) == 0, ("Evicted a wired PTE")); result = 0; LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) { if (!PVO_PTEGIDX_ISSET(pvo)) continue; if (pvo->pvo_pte.lpte.pte_hi == (evicted.pte_hi | LPTE_VALID)) { KASSERT(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID, ("Invalid PVO for valid PTE!")); pvo->pvo_pte.lpte.pte_hi &= ~LPTE_VALID; pvo->pvo_pte.lpte.pte_lo |= evicted.pte_lo & (LPTE_REF | LPTE_CHG); PVO_PTEGIDX_CLR(pvo); moea64_pte_valid--; moea64_pte_overflow++; result = 1; break; } } KASSERT(result == 1, ("PVO for sacrifice PTE not found")); return (index & 0x7); } static __inline u_int va_to_pteg(uint64_t vsid, vm_offset_t addr, int large) { uint64_t hash; int shift; shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT; hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >> shift); return (hash & moea64_pteg_mask); } uintptr_t mps3_pvo_to_pte(mmu_t mmu, const struct pvo_entry *pvo) { uint64_t vsid; u_int ptegidx; /* If the PTEG index is not set, then there is no page table entry */ if (!PVO_PTEGIDX_ISSET(pvo)) return (-1); vsid = PVO_VSID(pvo); ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE); /* * We can find the actual pte entry without searching by grabbing * the PTEG index from 3 unused bits in pvo_vaddr and by * noticing the HID bit. */ if (pvo->pvo_pte.lpte.pte_hi & LPTE_HID) ptegidx ^= moea64_pteg_mask; return ((ptegidx << 3) | PVO_PTEGIDX_GET(pvo)); } diff --git a/sys/sparc64/sparc64/tsb.c b/sys/sparc64/sparc64/tsb.c index 403043c64036..c38d50e2b6b7 100644 --- a/sys/sparc64/sparc64/tsb.c +++ b/sys/sparc64/sparc64/tsb.c @@ -1,229 +1,228 @@ /*- * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Berkeley Software Design Inc's name may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from BSDI: pmap.c,v 1.28.2.15 2000/04/27 03:10:31 cp Exp */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_pmap.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include CTASSERT((1 << TTE_SHIFT) == sizeof(struct tte)); CTASSERT(TSB_BUCKET_MASK < (1 << 12)); PMAP_STATS_VAR(tsb_nrepl); PMAP_STATS_VAR(tsb_nlookup_k); PMAP_STATS_VAR(tsb_nlookup_u); PMAP_STATS_VAR(tsb_nenter_k); PMAP_STATS_VAR(tsb_nenter_k_oc); PMAP_STATS_VAR(tsb_nenter_u); PMAP_STATS_VAR(tsb_nenter_u_oc); PMAP_STATS_VAR(tsb_nforeach); struct tte *tsb_kernel; vm_size_t tsb_kernel_mask; vm_size_t tsb_kernel_size; vm_paddr_t tsb_kernel_phys; u_int tsb_kernel_ldd_phys; struct tte * tsb_tte_lookup(pmap_t pm, vm_offset_t va) { struct tte *bucket; struct tte *tp; u_long sz; u_int i; if (pm == kernel_pmap) { PMAP_STATS_INC(tsb_nlookup_k); tp = tsb_kvtotte(va); if (tte_match(tp, va)) return (tp); } else { PMAP_LOCK_ASSERT(pm, MA_OWNED); PMAP_STATS_INC(tsb_nlookup_u); for (sz = TS_MIN; sz <= TS_MAX; sz++) { bucket = tsb_vtobucket(pm, sz, va); for (i = 0; i < TSB_BUCKET_SIZE; i++) { tp = &bucket[i]; if (tte_match(tp, va)) return (tp); } } } return (NULL); } struct tte * tsb_tte_enter(pmap_t pm, vm_page_t m, vm_offset_t va, u_long sz, u_long data) { struct tte *bucket; struct tte *rtp; struct tte *tp; vm_offset_t ova; int b0; int i; if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) { CTR5(KTR_SPARE2, "tsb_tte_enter: off colour va=%#lx pa=%#lx o=%p ot=%d pi=%#lx", va, VM_PAGE_TO_PHYS(m), m->object, m->object ? m->object->type : -1, m->pindex); if (pm == kernel_pmap) PMAP_STATS_INC(tsb_nenter_k_oc); else PMAP_STATS_INC(tsb_nenter_u_oc); } rw_assert(&tte_list_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pm, MA_OWNED); if (pm == kernel_pmap) { PMAP_STATS_INC(tsb_nenter_k); tp = tsb_kvtotte(va); KASSERT((tp->tte_data & TD_V) == 0, ("tsb_tte_enter: replacing valid kernel mapping")); goto enter; } PMAP_STATS_INC(tsb_nenter_u); bucket = tsb_vtobucket(pm, sz, va); tp = NULL; rtp = NULL; b0 = rd(tick) & (TSB_BUCKET_SIZE - 1); i = b0; do { if ((bucket[i].tte_data & TD_V) == 0) { tp = &bucket[i]; break; } if (tp == NULL) { if ((bucket[i].tte_data & TD_REF) == 0) tp = &bucket[i]; else if (rtp == NULL) rtp = &bucket[i]; } } while ((i = (i + 1) & (TSB_BUCKET_SIZE - 1)) != b0); if (tp == NULL) tp = rtp; if ((tp->tte_data & TD_V) != 0) { PMAP_STATS_INC(tsb_nrepl); ova = TTE_GET_VA(tp); pmap_remove_tte(pm, NULL, tp, ova); tlb_page_demap(pm, ova); } enter: if ((m->flags & PG_FICTITIOUS) == 0) { data |= TD_CP; if ((m->oflags & VPO_UNMANAGED) == 0) { pm->pm_stats.resident_count++; data |= TD_PV; } if (pmap_cache_enter(m, va) != 0) data |= TD_CV; TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link); } else data |= TD_FAKE | TD_E; tp->tte_vpn = TV_VPN(va, sz); tp->tte_data = data; return (tp); } /* * Traverse the tsb of a pmap, calling the callback function for any tte entry * that has a virtual address between start and end. If this function returns 0, * tsb_foreach() terminates. * This is used by pmap_remove(), pmap_protect(), and pmap_copy() in the case * that the number of pages in the range given to them reaches the * dimensions of the tsb size as an optimization. */ void tsb_foreach(pmap_t pm1, pmap_t pm2, vm_offset_t start, vm_offset_t end, tsb_callback_t *callback) { vm_offset_t va; struct tte *tp; struct tte *tsbp; uintptr_t i; uintptr_t n; PMAP_STATS_INC(tsb_nforeach); if (pm1 == kernel_pmap) { tsbp = tsb_kernel; n = tsb_kernel_size / sizeof(struct tte); } else { tsbp = pm1->pm_tsb; n = TSB_SIZE; } for (i = 0; i < n; i++) { tp = &tsbp[i]; if ((tp->tte_data & TD_V) != 0) { va = TTE_GET_VA(tp); if (va >= start && va < end) { if (!callback(pm1, pm2, tp, va)) break; } } } }