Index: head/sys/arm/arm/nexus.c =================================================================== --- head/sys/arm/arm/nexus.c (revision 298054) +++ head/sys/arm/arm/nexus.c (revision 298055) @@ -1,417 +1,417 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * This code implements a `root nexus' for Arm Architecture * machines. The function of the root nexus is to serve as an * attachment point for both processors and buses, and to manage * resources which are common to all of them. In particular, * this code implements the core resource managers for interrupt * requests, DMA requests (which rightfully should be a part of the * ISA code but it's easier to do it here for now), I/O port addresses, * and I/O memory address space. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include "ofw_bus_if.h" #endif static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); struct nexus_device { struct resource_list nx_resources; }; #define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev)) static struct rman mem_rman; static int nexus_probe(device_t); static int nexus_attach(device_t); static int nexus_print_child(device_t, device_t); static device_t nexus_add_child(device_t, u_int, const char *, int); static struct resource *nexus_alloc_resource(device_t, device_t, int, int *, rman_res_t, rman_res_t, rman_res_t, u_int); static int nexus_activate_resource(device_t, device_t, int, int, struct resource *); static bus_space_tag_t nexus_get_bus_tag(device_t, device_t); #ifdef ARM_INTRNG #ifdef SMP static int nexus_bind_intr(device_t, device_t, struct resource *, int); #endif #endif static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol); #ifdef ARM_INTRNG static int nexus_describe_intr(device_t dev, device_t child, struct resource *irq, void *cookie, const char *descr); #endif static int nexus_deactivate_resource(device_t, device_t, int, int, struct resource *); static int nexus_release_resource(device_t, device_t, int, int, struct resource *); static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep); static int nexus_teardown_intr(device_t, device_t, struct resource *, void *); #ifdef FDT static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, pcell_t *intr); #endif static device_method_t nexus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_probe), DEVMETHOD(device_attach, nexus_attach), /* Bus interface */ DEVMETHOD(bus_print_child, nexus_print_child), DEVMETHOD(bus_add_child, nexus_add_child), DEVMETHOD(bus_alloc_resource, nexus_alloc_resource), DEVMETHOD(bus_activate_resource, nexus_activate_resource), DEVMETHOD(bus_config_intr, nexus_config_intr), DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource), DEVMETHOD(bus_release_resource, nexus_release_resource), DEVMETHOD(bus_setup_intr, nexus_setup_intr), DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), DEVMETHOD(bus_get_bus_tag, nexus_get_bus_tag), #ifdef ARM_INTRNG DEVMETHOD(bus_describe_intr, nexus_describe_intr), #ifdef SMP DEVMETHOD(bus_bind_intr, nexus_bind_intr), #endif #endif #ifdef FDT DEVMETHOD(ofw_bus_map_intr, nexus_ofw_map_intr), #endif { 0, 0 } }; static devclass_t nexus_devclass; static driver_t nexus_driver = { "nexus", nexus_methods, 1 /* no softc */ }; EARLY_DRIVER_MODULE(nexus, root, nexus_driver, nexus_devclass, 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_EARLY); static int nexus_probe(device_t dev) { device_quiet(dev); /* suppress attach message for neatness */ return (BUS_PROBE_DEFAULT); } static int nexus_attach(device_t dev) { mem_rman.rm_start = 0; mem_rman.rm_end = BUS_SPACE_MAXADDR; mem_rman.rm_type = RMAN_ARRAY; mem_rman.rm_descr = "I/O memory addresses"; if (rman_init(&mem_rman) || rman_manage_region(&mem_rman, 0, BUS_SPACE_MAXADDR)) panic("nexus_probe mem_rman"); /* * First, deal with the children we know about already */ bus_generic_probe(dev); bus_generic_attach(dev); return (0); } static int nexus_print_child(device_t bus, device_t child) { int retval = 0; retval += bus_print_child_header(bus, child); retval += printf("\n"); return (retval); } static device_t nexus_add_child(device_t bus, u_int order, const char *name, int unit) { device_t child; struct nexus_device *ndev; ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO); if (!ndev) return (0); resource_list_init(&ndev->nx_resources); child = device_add_child_ordered(bus, order, name, unit); /* should we free this in nexus_child_detached? */ device_set_ivars(child, ndev); return (child); } /* * Allocate a resource on behalf of child. NB: child is usually going to be a * child of one of our descendants, not a direct child of nexus0. * (Exceptions include footbridge.) */ static struct resource * nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct resource *rv; struct rman *rm; int needactivate = flags & RF_ACTIVE; flags &= ~RF_ACTIVE; switch (type) { case SYS_RES_MEMORY: case SYS_RES_IOPORT: rm = &mem_rman; break; default: return (NULL); } rv = rman_reserve_resource(rm, start, end, count, flags, child); - if (rv == 0) + if (rv == NULL) return (NULL); rman_set_rid(rv, *rid); if (needactivate) { if (bus_activate_resource(child, type, *rid, rv)) { rman_release_resource(rv); return (0); } } return (rv); } static int nexus_release_resource(device_t bus, device_t child, int type, int rid, struct resource *res) { int error; if (rman_get_flags(res) & RF_ACTIVE) { error = bus_deactivate_resource(child, type, rid, res); if (error) return (error); } return (rman_release_resource(res)); } static bus_space_tag_t nexus_get_bus_tag(device_t bus __unused, device_t child __unused) { #ifdef FDT return(fdtbus_bs_tag); #else return((void *)1); #endif } static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol) { int ret = ENODEV; #ifdef ARM_INTRNG device_printf(dev, "bus_config_intr is obsolete and not supported!\n"); ret = EOPNOTSUPP; #else if (arm_config_irq) ret = (*arm_config_irq)(irq, trig, pol); #endif return (ret); } static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep) { #ifndef ARM_INTRNG int irq; #endif if ((rman_get_flags(res) & RF_SHAREABLE) == 0) flags |= INTR_EXCL; #ifdef ARM_INTRNG return(intr_setup_irq(child, res, filt, intr, arg, flags, cookiep)); #else for (irq = rman_get_start(res); irq <= rman_get_end(res); irq++) { arm_setup_irqhandler(device_get_nameunit(child), filt, intr, arg, irq, flags, cookiep); arm_unmask_irq(irq); } return (0); #endif } static int nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) { #ifdef ARM_INTRNG return (intr_teardown_irq(child, r, ih)); #else return (arm_remove_irqhandler(rman_get_start(r), ih)); #endif } #ifdef ARM_INTRNG static int nexus_describe_intr(device_t dev, device_t child, struct resource *irq, void *cookie, const char *descr) { return (intr_describe_irq(child, irq, cookie, descr)); } #ifdef SMP static int nexus_bind_intr(device_t dev, device_t child, struct resource *irq, int cpu) { return (intr_bind_irq(child, irq, cpu)); } #endif #endif static int nexus_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { int err; bus_addr_t paddr; bus_size_t psize; bus_space_handle_t vaddr; if ((err = rman_activate_resource(r)) != 0) return (err); /* * If this is a memory resource, map it into the kernel. */ if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { paddr = (bus_addr_t)rman_get_start(r); psize = (bus_size_t)rman_get_size(r); #ifdef FDT err = bus_space_map(fdtbus_bs_tag, paddr, psize, 0, &vaddr); if (err != 0) { rman_deactivate_resource(r); return (err); } rman_set_bustag(r, fdtbus_bs_tag); #else vaddr = (bus_space_handle_t)pmap_mapdev((vm_offset_t)paddr, (vm_size_t)psize); if (vaddr == 0) { rman_deactivate_resource(r); return (ENOMEM); } rman_set_bustag(r, (void *)1); #endif rman_set_virtual(r, (void *)vaddr); rman_set_bushandle(r, vaddr); } return (0); } static int nexus_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { bus_size_t psize; bus_space_handle_t vaddr; psize = (bus_size_t)rman_get_size(r); vaddr = rman_get_bushandle(r); if (vaddr != 0) { #ifdef FDT bus_space_unmap(fdtbus_bs_tag, vaddr, psize); #else pmap_unmapdev((vm_offset_t)vaddr, (vm_size_t)psize); #endif rman_set_virtual(r, NULL); rman_set_bushandle(r, 0); } return (rman_deactivate_resource(r)); } #ifdef FDT static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, pcell_t *intr) { return (intr_fdt_map_irq(iparent, intr, icells)); } #endif Index: head/sys/arm/arm/pmap-v4.c =================================================================== --- head/sys/arm/arm/pmap-v4.c (revision 298054) +++ head/sys/arm/arm/pmap-v4.c (revision 298055) @@ -1,4874 +1,4874 @@ /* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */ /*- * Copyright 2004 Olivier Houchard. * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2002-2003 Wasabi Systems, Inc. * Copyright (c) 2001 Richard Earnshaw * Copyright (c) 2001-2002 Christopher Gilbert * All rights reserved. * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * * RiscBSD kernel project * * pmap.c * * Machine dependant vm stuff * * Created : 20/09/94 */ /* * Special compilation symbols * PMAP_DEBUG - Build in pmap_debug_level code * * Note that pmap_mapdev() and pmap_unmapdev() are implemented in arm/devmap.c */ /* Include header files */ #include "opt_vm.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PMAP_DEBUG #define PDEBUG(_lev_,_stat_) \ if (pmap_debug_level >= (_lev_)) \ ((_stat_)) #define dprintf printf int pmap_debug_level = 0; #define PMAP_INLINE #else /* PMAP_DEBUG */ #define PDEBUG(_lev_,_stat_) /* Nothing */ #define dprintf(x, arg...) #define PMAP_INLINE __inline #endif /* PMAP_DEBUG */ extern struct pv_addr systempage; extern int last_fault_code; /* * Internal function prototypes */ static void pmap_free_pv_entry (pv_entry_t); static pv_entry_t pmap_get_pv_entry(void); static int pmap_enter_locked(pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int); static vm_paddr_t pmap_extract_locked(pmap_t pmap, vm_offset_t va); static void pmap_fix_cache(struct vm_page *, pmap_t, vm_offset_t); static void pmap_alloc_l1(pmap_t); static void pmap_free_l1(pmap_t); static int pmap_clearbit(struct vm_page *, u_int); static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t); static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t); static void pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int); static vm_offset_t kernel_pt_lookup(vm_paddr_t); static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1"); vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t pmap_curmaxkvaddr; vm_paddr_t kernel_l1pa; vm_offset_t kernel_vm_end = 0; vm_offset_t vm_max_kernel_address; struct pmap kernel_pmap_store; static pt_entry_t *csrc_pte, *cdst_pte; static vm_offset_t csrcp, cdstp, qmap_addr; static struct mtx cmtx, qmap_mtx; static void pmap_init_l1(struct l1_ttable *, pd_entry_t *); /* * These routines are called when the CPU type is identified to set up * the PTE prototypes, cache modes, etc. * * The variables are always here, just in case LKMs need to reference * them (though, they shouldn't). */ pt_entry_t pte_l1_s_cache_mode; pt_entry_t pte_l1_s_cache_mode_pt; pt_entry_t pte_l1_s_cache_mask; pt_entry_t pte_l2_l_cache_mode; pt_entry_t pte_l2_l_cache_mode_pt; pt_entry_t pte_l2_l_cache_mask; pt_entry_t pte_l2_s_cache_mode; pt_entry_t pte_l2_s_cache_mode_pt; pt_entry_t pte_l2_s_cache_mask; pt_entry_t pte_l2_s_prot_u; pt_entry_t pte_l2_s_prot_w; pt_entry_t pte_l2_s_prot_mask; pt_entry_t pte_l1_s_proto; pt_entry_t pte_l1_c_proto; pt_entry_t pte_l2_s_proto; void (*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t); void (*pmap_copy_page_offs_func)(vm_paddr_t a_phys, vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs, int cnt); void (*pmap_zero_page_func)(vm_paddr_t, int, int); -struct msgbuf *msgbufp = 0; +struct msgbuf *msgbufp = NULL; /* * Crashdump maps. */ static caddr_t crashdumpmap; extern void bcopy_page(vm_offset_t, vm_offset_t); extern void bzero_page(vm_offset_t); extern vm_offset_t alloc_firstaddr; char *_tmppt; /* * Metadata for L1 translation tables. */ struct l1_ttable { /* Entry on the L1 Table list */ SLIST_ENTRY(l1_ttable) l1_link; /* Entry on the L1 Least Recently Used list */ TAILQ_ENTRY(l1_ttable) l1_lru; /* Track how many domains are allocated from this L1 */ volatile u_int l1_domain_use_count; /* * A free-list of domain numbers for this L1. * We avoid using ffs() and a bitmap to track domains since ffs() * is slow on ARM. */ u_int8_t l1_domain_first; u_int8_t l1_domain_free[PMAP_DOMAINS]; /* Physical address of this L1 page table */ vm_paddr_t l1_physaddr; /* KVA of this L1 page table */ pd_entry_t *l1_kva; }; /* * Convert a virtual address into its L1 table index. That is, the * index used to locate the L2 descriptor table pointer in an L1 table. * This is basically used to index l1->l1_kva[]. * * Each L2 descriptor table represents 1MB of VA space. */ #define L1_IDX(va) (((vm_offset_t)(va)) >> L1_S_SHIFT) /* * L1 Page Tables are tracked using a Least Recently Used list. * - New L1s are allocated from the HEAD. * - Freed L1s are added to the TAIl. * - Recently accessed L1s (where an 'access' is some change to one of * the userland pmaps which owns this L1) are moved to the TAIL. */ static TAILQ_HEAD(, l1_ttable) l1_lru_list; /* * A list of all L1 tables */ static SLIST_HEAD(, l1_ttable) l1_list; static struct mtx l1_lru_lock; /* * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots. * * This is normally 16MB worth L2 page descriptors for any given pmap. * Reference counts are maintained for L2 descriptors so they can be * freed when empty. */ struct l2_dtable { /* The number of L2 page descriptors allocated to this l2_dtable */ u_int l2_occupancy; /* List of L2 page descriptors */ struct l2_bucket { pt_entry_t *l2b_kva; /* KVA of L2 Descriptor Table */ vm_paddr_t l2b_phys; /* Physical address of same */ u_short l2b_l1idx; /* This L2 table's L1 index */ u_short l2b_occupancy; /* How many active descriptors */ } l2_bucket[L2_BUCKET_SIZE]; }; /* pmap_kenter_internal flags */ #define KENTER_CACHE 0x1 #define KENTER_USER 0x2 /* * Given an L1 table index, calculate the corresponding l2_dtable index * and bucket index within the l2_dtable. */ #define L2_IDX(l1idx) (((l1idx) >> L2_BUCKET_LOG2) & \ (L2_SIZE - 1)) #define L2_BUCKET(l1idx) ((l1idx) & (L2_BUCKET_SIZE - 1)) /* * Given a virtual address, this macro returns the * virtual address required to drop into the next L2 bucket. */ #define L2_NEXT_BUCKET(va) (((va) & L1_S_FRAME) + L1_S_SIZE) /* * We try to map the page tables write-through, if possible. However, not * all CPUs have a write-through cache mode, so on those we have to sync * the cache when we frob page tables. * * We try to evaluate this at compile time, if possible. However, it's * not always possible to do that, hence this run-time var. */ int pmap_needs_pte_sync; /* * Macro to determine if a mapping might be resident in the * instruction cache and/or TLB */ #define PV_BEEN_EXECD(f) (((f) & (PVF_REF | PVF_EXEC)) == (PVF_REF | PVF_EXEC)) /* * Macro to determine if a mapping might be resident in the * data cache and/or TLB */ #define PV_BEEN_REFD(f) (((f) & PVF_REF) != 0) #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #define pmap_is_current(pm) ((pm) == kernel_pmap || \ curproc->p_vmspace->vm_map.pmap == (pm)) static uma_zone_t pvzone = NULL; uma_zone_t l2zone; static uma_zone_t l2table_zone; static vm_offset_t pmap_kernel_l2dtable_kva; static vm_offset_t pmap_kernel_l2ptp_kva; static vm_paddr_t pmap_kernel_l2ptp_phys; static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0; static struct rwlock pvh_global_lock; void pmap_copy_page_offs_generic(vm_paddr_t a_phys, vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs, int cnt); #if ARM_MMU_XSCALE == 1 void pmap_copy_page_offs_xscale(vm_paddr_t a_phys, vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs, int cnt); #endif /* * This list exists for the benefit of pmap_map_chunk(). It keeps track * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can * find them as necessary. * * Note that the data on this list MUST remain valid after initarm() returns, * as pmap_bootstrap() uses it to contruct L2 table metadata. */ SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list); static void pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt) { int i; l1->l1_kva = l1pt; l1->l1_domain_use_count = 0; l1->l1_domain_first = 0; for (i = 0; i < PMAP_DOMAINS; i++) l1->l1_domain_free[i] = i + 1; /* * Copy the kernel's L1 entries to each new L1. */ if (l1pt != kernel_pmap->pm_l1->l1_kva) memcpy(l1pt, kernel_pmap->pm_l1->l1_kva, L1_TABLE_SIZE); if ((l1->l1_physaddr = pmap_extract(kernel_pmap, (vm_offset_t)l1pt)) == 0) panic("pmap_init_l1: can't get PA of L1 at %p", l1pt); SLIST_INSERT_HEAD(&l1_list, l1, l1_link); TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); } static vm_offset_t kernel_pt_lookup(vm_paddr_t pa) { struct pv_addr *pv; SLIST_FOREACH(pv, &kernel_pt_list, pv_list) { if (pv->pv_pa == pa) return (pv->pv_va); } return (0); } #if ARM_MMU_GENERIC != 0 void pmap_pte_init_generic(void) { pte_l1_s_cache_mode = L1_S_B|L1_S_C; pte_l1_s_cache_mask = L1_S_CACHE_MASK_generic; pte_l2_l_cache_mode = L2_B|L2_C; pte_l2_l_cache_mask = L2_L_CACHE_MASK_generic; pte_l2_s_cache_mode = L2_B|L2_C; pte_l2_s_cache_mask = L2_S_CACHE_MASK_generic; /* * If we have a write-through cache, set B and C. If * we have a write-back cache, then we assume setting * only C will make those pages write-through. */ if (cpufuncs.cf_dcache_wb_range == (void *) cpufunc_nullop) { pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C; pte_l2_l_cache_mode_pt = L2_B|L2_C; pte_l2_s_cache_mode_pt = L2_B|L2_C; } else { pte_l1_s_cache_mode_pt = L1_S_C; pte_l2_l_cache_mode_pt = L2_C; pte_l2_s_cache_mode_pt = L2_C; } pte_l2_s_prot_u = L2_S_PROT_U_generic; pte_l2_s_prot_w = L2_S_PROT_W_generic; pte_l2_s_prot_mask = L2_S_PROT_MASK_generic; pte_l1_s_proto = L1_S_PROTO_generic; pte_l1_c_proto = L1_C_PROTO_generic; pte_l2_s_proto = L2_S_PROTO_generic; pmap_copy_page_func = pmap_copy_page_generic; pmap_copy_page_offs_func = pmap_copy_page_offs_generic; pmap_zero_page_func = pmap_zero_page_generic; } #endif /* ARM_MMU_GENERIC != 0 */ #if ARM_MMU_XSCALE == 1 #if (ARM_NMMUS > 1) || defined (CPU_XSCALE_CORE3) static u_int xscale_use_minidata; #endif void pmap_pte_init_xscale(void) { uint32_t auxctl; int write_through = 0; pte_l1_s_cache_mode = L1_S_B|L1_S_C|L1_S_XSCALE_P; pte_l1_s_cache_mask = L1_S_CACHE_MASK_xscale; pte_l2_l_cache_mode = L2_B|L2_C; pte_l2_l_cache_mask = L2_L_CACHE_MASK_xscale; pte_l2_s_cache_mode = L2_B|L2_C; pte_l2_s_cache_mask = L2_S_CACHE_MASK_xscale; pte_l1_s_cache_mode_pt = L1_S_C; pte_l2_l_cache_mode_pt = L2_C; pte_l2_s_cache_mode_pt = L2_C; #ifdef XSCALE_CACHE_READ_WRITE_ALLOCATE /* * The XScale core has an enhanced mode where writes that * miss the cache cause a cache line to be allocated. This * is significantly faster than the traditional, write-through * behavior of this case. */ pte_l1_s_cache_mode |= L1_S_XSCALE_TEX(TEX_XSCALE_X); pte_l2_l_cache_mode |= L2_XSCALE_L_TEX(TEX_XSCALE_X); pte_l2_s_cache_mode |= L2_XSCALE_T_TEX(TEX_XSCALE_X); #endif /* XSCALE_CACHE_READ_WRITE_ALLOCATE */ #ifdef XSCALE_CACHE_WRITE_THROUGH /* * Some versions of the XScale core have various bugs in * their cache units, the work-around for which is to run * the cache in write-through mode. Unfortunately, this * has a major (negative) impact on performance. So, we * go ahead and run fast-and-loose, in the hopes that we * don't line up the planets in a way that will trip the * bugs. * * However, we give you the option to be slow-but-correct. */ write_through = 1; #elif defined(XSCALE_CACHE_WRITE_BACK) /* force write back cache mode */ write_through = 0; #elif defined(CPU_XSCALE_PXA2X0) /* * Intel PXA2[15]0 processors are known to have a bug in * write-back cache on revision 4 and earlier (stepping * A[01] and B[012]). Fixed for C0 and later. */ { uint32_t id, type; id = cpu_ident(); type = id & ~(CPU_ID_XSCALE_COREREV_MASK|CPU_ID_REVISION_MASK); if (type == CPU_ID_PXA250 || type == CPU_ID_PXA210) { if ((id & CPU_ID_REVISION_MASK) < 5) { /* write through for stepping A0-1 and B0-2 */ write_through = 1; } } } #endif /* XSCALE_CACHE_WRITE_THROUGH */ if (write_through) { pte_l1_s_cache_mode = L1_S_C; pte_l2_l_cache_mode = L2_C; pte_l2_s_cache_mode = L2_C; } #if (ARM_NMMUS > 1) xscale_use_minidata = 1; #endif pte_l2_s_prot_u = L2_S_PROT_U_xscale; pte_l2_s_prot_w = L2_S_PROT_W_xscale; pte_l2_s_prot_mask = L2_S_PROT_MASK_xscale; pte_l1_s_proto = L1_S_PROTO_xscale; pte_l1_c_proto = L1_C_PROTO_xscale; pte_l2_s_proto = L2_S_PROTO_xscale; #ifdef CPU_XSCALE_CORE3 pmap_copy_page_func = pmap_copy_page_generic; pmap_copy_page_offs_func = pmap_copy_page_offs_generic; pmap_zero_page_func = pmap_zero_page_generic; xscale_use_minidata = 0; /* Make sure it is L2-cachable */ pte_l1_s_cache_mode |= L1_S_XSCALE_TEX(TEX_XSCALE_T); pte_l1_s_cache_mode_pt = pte_l1_s_cache_mode &~ L1_S_XSCALE_P; pte_l2_l_cache_mode |= L2_XSCALE_L_TEX(TEX_XSCALE_T) ; pte_l2_l_cache_mode_pt = pte_l1_s_cache_mode; pte_l2_s_cache_mode |= L2_XSCALE_T_TEX(TEX_XSCALE_T); pte_l2_s_cache_mode_pt = pte_l2_s_cache_mode; #else pmap_copy_page_func = pmap_copy_page_xscale; pmap_copy_page_offs_func = pmap_copy_page_offs_xscale; pmap_zero_page_func = pmap_zero_page_xscale; #endif /* * Disable ECC protection of page table access, for now. */ __asm __volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (auxctl)); auxctl &= ~XSCALE_AUXCTL_P; __asm __volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (auxctl)); } /* * xscale_setup_minidata: * * Set up the mini-data cache clean area. We require the * caller to allocate the right amount of physically and * virtually contiguous space. */ extern vm_offset_t xscale_minidata_clean_addr; extern vm_size_t xscale_minidata_clean_size; /* already initialized */ void xscale_setup_minidata(vm_offset_t l1pt, vm_offset_t va, vm_paddr_t pa) { pd_entry_t *pde = (pd_entry_t *) l1pt; pt_entry_t *pte; vm_size_t size; uint32_t auxctl; xscale_minidata_clean_addr = va; /* Round it to page size. */ size = (xscale_minidata_clean_size + L2_S_OFFSET) & L2_S_FRAME; for (; size != 0; va += L2_S_SIZE, pa += L2_S_SIZE, size -= L2_S_SIZE) { pte = (pt_entry_t *) kernel_pt_lookup( pde[L1_IDX(va)] & L1_C_ADDR_MASK); if (pte == NULL) panic("xscale_setup_minidata: can't find L2 table for " "VA 0x%08x", (u_int32_t) va); pte[l2pte_index(va)] = L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X); } /* * Configure the mini-data cache for write-back with * read/write-allocate. * * NOTE: In order to reconfigure the mini-data cache, we must * make sure it contains no valid data! In order to do that, * we must issue a global data cache invalidate command! * * WE ASSUME WE ARE RUNNING UN-CACHED WHEN THIS ROUTINE IS CALLED! * THIS IS VERY IMPORTANT! */ /* Invalidate data and mini-data. */ __asm __volatile("mcr p15, 0, %0, c7, c6, 0" : : "r" (0)); __asm __volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (auxctl)); auxctl = (auxctl & ~XSCALE_AUXCTL_MD_MASK) | XSCALE_AUXCTL_MD_WB_RWA; __asm __volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (auxctl)); } #endif /* * Allocate an L1 translation table for the specified pmap. * This is called at pmap creation time. */ static void pmap_alloc_l1(pmap_t pm) { struct l1_ttable *l1; u_int8_t domain; /* * Remove the L1 at the head of the LRU list */ mtx_lock(&l1_lru_lock); l1 = TAILQ_FIRST(&l1_lru_list); TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); /* * Pick the first available domain number, and update * the link to the next number. */ domain = l1->l1_domain_first; l1->l1_domain_first = l1->l1_domain_free[domain]; /* * If there are still free domain numbers in this L1, * put it back on the TAIL of the LRU list. */ if (++l1->l1_domain_use_count < PMAP_DOMAINS) TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); mtx_unlock(&l1_lru_lock); /* * Fix up the relevant bits in the pmap structure */ pm->pm_l1 = l1; pm->pm_domain = domain + 1; } /* * Free an L1 translation table. * This is called at pmap destruction time. */ static void pmap_free_l1(pmap_t pm) { struct l1_ttable *l1 = pm->pm_l1; mtx_lock(&l1_lru_lock); /* * If this L1 is currently on the LRU list, remove it. */ if (l1->l1_domain_use_count < PMAP_DOMAINS) TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); /* * Free up the domain number which was allocated to the pmap */ l1->l1_domain_free[pm->pm_domain - 1] = l1->l1_domain_first; l1->l1_domain_first = pm->pm_domain - 1; l1->l1_domain_use_count--; /* * The L1 now must have at least 1 free domain, so add * it back to the LRU list. If the use count is zero, * put it at the head of the list, otherwise it goes * to the tail. */ if (l1->l1_domain_use_count == 0) { TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru); } else TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); mtx_unlock(&l1_lru_lock); } /* * Returns a pointer to the L2 bucket associated with the specified pmap * and VA, or NULL if no L2 bucket exists for the address. */ static PMAP_INLINE struct l2_bucket * pmap_get_l2_bucket(pmap_t pm, vm_offset_t va) { struct l2_dtable *l2; struct l2_bucket *l2b; u_short l1idx; l1idx = L1_IDX(va); if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL || (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL) return (NULL); return (l2b); } /* * Returns a pointer to the L2 bucket associated with the specified pmap * and VA. * * If no L2 bucket exists, perform the necessary allocations to put an L2 * bucket/page table in place. * * Note that if a new L2 bucket/page was allocated, the caller *must* * increment the bucket occupancy counter appropriately *before* * releasing the pmap's lock to ensure no other thread or cpu deallocates * the bucket/page in the meantime. */ static struct l2_bucket * pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va) { struct l2_dtable *l2; struct l2_bucket *l2b; u_short l1idx; l1idx = L1_IDX(va); PMAP_ASSERT_LOCKED(pm); rw_assert(&pvh_global_lock, RA_WLOCKED); if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) { /* * No mapping at this address, as there is * no entry in the L1 table. * Need to allocate a new l2_dtable. */ PMAP_UNLOCK(pm); rw_wunlock(&pvh_global_lock); if ((l2 = uma_zalloc(l2table_zone, M_NOWAIT)) == NULL) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); return (NULL); } rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); if (pm->pm_l2[L2_IDX(l1idx)] != NULL) { /* * Someone already allocated the l2_dtable while * we were doing the same. */ uma_zfree(l2table_zone, l2); l2 = pm->pm_l2[L2_IDX(l1idx)]; } else { bzero(l2, sizeof(*l2)); /* * Link it into the parent pmap */ pm->pm_l2[L2_IDX(l1idx)] = l2; } } l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; /* * Fetch pointer to the L2 page table associated with the address. */ if (l2b->l2b_kva == NULL) { pt_entry_t *ptep; /* * No L2 page table has been allocated. Chances are, this * is because we just allocated the l2_dtable, above. */ l2->l2_occupancy++; PMAP_UNLOCK(pm); rw_wunlock(&pvh_global_lock); ptep = uma_zalloc(l2zone, M_NOWAIT); rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); - if (l2b->l2b_kva != 0) { + if (l2b->l2b_kva != NULL) { /* We lost the race. */ l2->l2_occupancy--; uma_zfree(l2zone, ptep); return (l2b); } l2b->l2b_phys = vtophys(ptep); if (ptep == NULL) { /* * Oops, no more L2 page tables available at this * time. We may need to deallocate the l2_dtable * if we allocated a new one above. */ l2->l2_occupancy--; if (l2->l2_occupancy == 0) { pm->pm_l2[L2_IDX(l1idx)] = NULL; uma_zfree(l2table_zone, l2); } return (NULL); } l2b->l2b_kva = ptep; l2b->l2b_l1idx = l1idx; } return (l2b); } static PMAP_INLINE void #ifndef PMAP_INCLUDE_PTE_SYNC pmap_free_l2_ptp(pt_entry_t *l2) #else pmap_free_l2_ptp(boolean_t need_sync, pt_entry_t *l2) #endif { #ifdef PMAP_INCLUDE_PTE_SYNC /* * Note: With a write-back cache, we may need to sync this * L2 table before re-using it. * This is because it may have belonged to a non-current * pmap, in which case the cache syncs would have been * skipped when the pages were being unmapped. If the * L2 table were then to be immediately re-allocated to * the *current* pmap, it may well contain stale mappings * which have not yet been cleared by a cache write-back * and so would still be visible to the mmu. */ if (need_sync) PTE_SYNC_RANGE(l2, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t)); #endif uma_zfree(l2zone, l2); } /* * One or more mappings in the specified L2 descriptor table have just been * invalidated. * * Garbage collect the metadata and descriptor table itself if necessary. * * The pmap lock must be acquired when this is called (not necessary * for the kernel pmap). */ static void pmap_free_l2_bucket(pmap_t pm, struct l2_bucket *l2b, u_int count) { struct l2_dtable *l2; pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep; u_short l1idx; /* * Update the bucket's reference count according to how many * PTEs the caller has just invalidated. */ l2b->l2b_occupancy -= count; /* * Note: * * Level 2 page tables allocated to the kernel pmap are never freed * as that would require checking all Level 1 page tables and * removing any references to the Level 2 page table. See also the * comment elsewhere about never freeing bootstrap L2 descriptors. * * We make do with just invalidating the mapping in the L2 table. * * This isn't really a big deal in practice and, in fact, leads * to a performance win over time as we don't need to continually * alloc/free. */ if (l2b->l2b_occupancy > 0 || pm == kernel_pmap) return; /* * There are no more valid mappings in this level 2 page table. * Go ahead and NULL-out the pointer in the bucket, then * free the page table. */ l1idx = l2b->l2b_l1idx; ptep = l2b->l2b_kva; l2b->l2b_kva = NULL; pl1pd = &pm->pm_l1->l1_kva[l1idx]; /* * If the L1 slot matches the pmap's domain * number, then invalidate it. */ l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK); if (l1pd == (L1_C_DOM(pm->pm_domain) | L1_TYPE_C)) { *pl1pd = 0; PTE_SYNC(pl1pd); } /* * Release the L2 descriptor table back to the pool cache. */ #ifndef PMAP_INCLUDE_PTE_SYNC pmap_free_l2_ptp(ptep); #else pmap_free_l2_ptp(!pmap_is_current(pm), ptep); #endif /* * Update the reference count in the associated l2_dtable */ l2 = pm->pm_l2[L2_IDX(l1idx)]; if (--l2->l2_occupancy > 0) return; /* * There are no more valid mappings in any of the Level 1 * slots managed by this l2_dtable. Go ahead and NULL-out * the pointer in the parent pmap and free the l2_dtable. */ pm->pm_l2[L2_IDX(l1idx)] = NULL; uma_zfree(l2table_zone, l2); } /* * Pool cache constructors for L2 descriptor tables, metadata and pmap * structures. */ static int pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags) { #ifndef PMAP_INCLUDE_PTE_SYNC struct l2_bucket *l2b; pt_entry_t *ptep, pte; vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK; /* * The mappings for these page tables were initially made using * pmap_kenter() by the pool subsystem. Therefore, the cache- * mode will not be right for page table mappings. To avoid * polluting the pmap_kenter() code with a special case for * page tables, we simply fix up the cache-mode here if it's not * correct. */ l2b = pmap_get_l2_bucket(kernel_pmap, va); ptep = &l2b->l2b_kva[l2pte_index(va)]; pte = *ptep; if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { /* * Page tables must have the cache-mode set to * Write-Thru. */ *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; PTE_SYNC(ptep); cpu_tlb_flushD_SE(va); cpu_cpwait(); } #endif memset(mem, 0, L2_TABLE_SIZE_REAL); PTE_SYNC_RANGE(mem, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t)); return (0); } /* * A bunch of routines to conditionally flush the caches/TLB depending * on whether the specified pmap actually needs to be flushed at any * given time. */ static PMAP_INLINE void pmap_tlb_flushID_SE(pmap_t pm, vm_offset_t va) { if (pmap_is_current(pm)) cpu_tlb_flushID_SE(va); } static PMAP_INLINE void pmap_tlb_flushD_SE(pmap_t pm, vm_offset_t va) { if (pmap_is_current(pm)) cpu_tlb_flushD_SE(va); } static PMAP_INLINE void pmap_tlb_flushID(pmap_t pm) { if (pmap_is_current(pm)) cpu_tlb_flushID(); } static PMAP_INLINE void pmap_tlb_flushD(pmap_t pm) { if (pmap_is_current(pm)) cpu_tlb_flushD(); } static int pmap_has_valid_mapping(pmap_t pm, vm_offset_t va) { pd_entry_t *pde; pt_entry_t *ptep; if (pmap_get_pde_pte(pm, va, &pde, &ptep) && ptep && ((*ptep & L2_TYPE_MASK) != L2_TYPE_INV)) return (1); return (0); } static PMAP_INLINE void pmap_idcache_wbinv_range(pmap_t pm, vm_offset_t va, vm_size_t len) { vm_size_t rest; CTR4(KTR_PMAP, "pmap_dcache_wbinv_range: pmap %p is_kernel %d va 0x%08x" " len 0x%x ", pm, pm == kernel_pmap, va, len); if (pmap_is_current(pm) || pm == kernel_pmap) { rest = MIN(PAGE_SIZE - (va & PAGE_MASK), len); while (len > 0) { if (pmap_has_valid_mapping(pm, va)) { cpu_idcache_wbinv_range(va, rest); cpu_l2cache_wbinv_range(va, rest); } len -= rest; va += rest; rest = MIN(PAGE_SIZE, len); } } } static PMAP_INLINE void pmap_dcache_wb_range(pmap_t pm, vm_offset_t va, vm_size_t len, boolean_t do_inv, boolean_t rd_only) { vm_size_t rest; CTR4(KTR_PMAP, "pmap_dcache_wb_range: pmap %p is_kernel %d va 0x%08x " "len 0x%x ", pm, pm == kernel_pmap, va, len); CTR2(KTR_PMAP, " do_inv %d rd_only %d", do_inv, rd_only); if (pmap_is_current(pm)) { rest = MIN(PAGE_SIZE - (va & PAGE_MASK), len); while (len > 0) { if (pmap_has_valid_mapping(pm, va)) { if (do_inv && rd_only) { cpu_dcache_inv_range(va, rest); cpu_l2cache_inv_range(va, rest); } else if (do_inv) { cpu_dcache_wbinv_range(va, rest); cpu_l2cache_wbinv_range(va, rest); } else if (!rd_only) { cpu_dcache_wb_range(va, rest); cpu_l2cache_wb_range(va, rest); } } len -= rest; va += rest; rest = MIN(PAGE_SIZE, len); } } } static PMAP_INLINE void pmap_idcache_wbinv_all(pmap_t pm) { if (pmap_is_current(pm)) { cpu_idcache_wbinv_all(); cpu_l2cache_wbinv_all(); } } #ifdef notyet static PMAP_INLINE void pmap_dcache_wbinv_all(pmap_t pm) { if (pmap_is_current(pm)) { cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); } } #endif /* * PTE_SYNC_CURRENT: * * Make sure the pte is written out to RAM. * We need to do this for one of two cases: * - We're dealing with the kernel pmap * - There is no pmap active in the cache/tlb. * - The specified pmap is 'active' in the cache/tlb. */ #ifdef PMAP_INCLUDE_PTE_SYNC #define PTE_SYNC_CURRENT(pm, ptep) \ do { \ if (PMAP_NEEDS_PTE_SYNC && \ pmap_is_current(pm)) \ PTE_SYNC(ptep); \ } while (/*CONSTCOND*/0) #else #define PTE_SYNC_CURRENT(pm, ptep) /* nothing */ #endif /* * cacheable == -1 means we must make the entry uncacheable, 1 means * cacheable; */ static __inline void pmap_set_cache_entry(pv_entry_t pv, pmap_t pm, vm_offset_t va, int cacheable) { struct l2_bucket *l2b; pt_entry_t *ptep, pte; l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va); ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; if (cacheable == 1) { pte = (*ptep & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode; if (l2pte_valid(pte)) { if (PV_BEEN_EXECD(pv->pv_flags)) { pmap_tlb_flushID_SE(pv->pv_pmap, pv->pv_va); } else if (PV_BEEN_REFD(pv->pv_flags)) { pmap_tlb_flushD_SE(pv->pv_pmap, pv->pv_va); } } } else { pte = *ptep &~ L2_S_CACHE_MASK; if ((va != pv->pv_va || pm != pv->pv_pmap) && l2pte_valid(pte)) { if (PV_BEEN_EXECD(pv->pv_flags)) { pmap_idcache_wbinv_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE); pmap_tlb_flushID_SE(pv->pv_pmap, pv->pv_va); } else if (PV_BEEN_REFD(pv->pv_flags)) { pmap_dcache_wb_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE, TRUE, (pv->pv_flags & PVF_WRITE) == 0); pmap_tlb_flushD_SE(pv->pv_pmap, pv->pv_va); } } } *ptep = pte; PTE_SYNC_CURRENT(pv->pv_pmap, ptep); } static void pmap_fix_cache(struct vm_page *pg, pmap_t pm, vm_offset_t va) { int pmwc = 0; int writable = 0, kwritable = 0, uwritable = 0; int entries = 0, kentries = 0, uentries = 0; struct pv_entry *pv; rw_assert(&pvh_global_lock, RA_WLOCKED); /* the cache gets written back/invalidated on context switch. * therefore, if a user page shares an entry in the same page or * with the kernel map and at least one is writable, then the * cache entry must be set write-through. */ TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) { /* generate a count of the pv_entry uses */ if (pv->pv_flags & PVF_WRITE) { if (pv->pv_pmap == kernel_pmap) kwritable++; else if (pv->pv_pmap == pm) uwritable++; writable++; } if (pv->pv_pmap == kernel_pmap) kentries++; else { if (pv->pv_pmap == pm) uentries++; entries++; } } /* * check if the user duplicate mapping has * been removed. */ if ((pm != kernel_pmap) && (((uentries > 1) && uwritable) || (uwritable > 1))) pmwc = 1; TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) { /* check for user uncachable conditions - order is important */ if (pm != kernel_pmap && (pv->pv_pmap == pm || pv->pv_pmap == kernel_pmap)) { if ((uentries > 1 && uwritable) || uwritable > 1) { /* user duplicate mapping */ if (pv->pv_pmap != kernel_pmap) pv->pv_flags |= PVF_MWC; if (!(pv->pv_flags & PVF_NC)) { pv->pv_flags |= PVF_NC; pmap_set_cache_entry(pv, pm, va, -1); } continue; } else /* no longer a duplicate user */ pv->pv_flags &= ~PVF_MWC; } /* * check for kernel uncachable conditions * kernel writable or kernel readable with writable user entry */ if ((kwritable && (entries || kentries > 1)) || (kwritable > 1) || ((kwritable != writable) && kentries && (pv->pv_pmap == kernel_pmap || (pv->pv_flags & PVF_WRITE) || (pv->pv_flags & PVF_MWC)))) { if (!(pv->pv_flags & PVF_NC)) { pv->pv_flags |= PVF_NC; pmap_set_cache_entry(pv, pm, va, -1); } continue; } /* kernel and user are cachable */ if ((pm == kernel_pmap) && !(pv->pv_flags & PVF_MWC) && (pv->pv_flags & PVF_NC)) { pv->pv_flags &= ~PVF_NC; if (pg->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) pmap_set_cache_entry(pv, pm, va, 1); continue; } /* user is no longer sharable and writable */ if (pm != kernel_pmap && (pv->pv_pmap == pm || pv->pv_pmap == kernel_pmap) && !pmwc && (pv->pv_flags & PVF_NC)) { pv->pv_flags &= ~(PVF_NC | PVF_MWC); if (pg->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) pmap_set_cache_entry(pv, pm, va, 1); } } if ((kwritable == 0) && (writable == 0)) { pg->md.pvh_attrs &= ~PVF_MOD; vm_page_aflag_clear(pg, PGA_WRITEABLE); return; } } /* * Modify pte bits for all ptes corresponding to the given physical address. * We use `maskbits' rather than `clearbits' because we're always passing * constants and the latter would require an extra inversion at run-time. */ static int pmap_clearbit(struct vm_page *pg, u_int maskbits) { struct l2_bucket *l2b; struct pv_entry *pv; pt_entry_t *ptep, npte, opte; pmap_t pm; vm_offset_t va; u_int oflags; int count = 0; rw_wlock(&pvh_global_lock); if (maskbits & PVF_WRITE) maskbits |= PVF_MOD; /* * Clear saved attributes (modify, reference) */ pg->md.pvh_attrs &= ~(maskbits & (PVF_MOD | PVF_REF)); if (TAILQ_EMPTY(&pg->md.pv_list)) { rw_wunlock(&pvh_global_lock); return (0); } /* * Loop over all current mappings setting/clearing as appropos */ TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) { va = pv->pv_va; pm = pv->pv_pmap; oflags = pv->pv_flags; if (!(oflags & maskbits)) { if ((maskbits & PVF_WRITE) && (pv->pv_flags & PVF_NC)) { if (pg->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) { PMAP_LOCK(pm); l2b = pmap_get_l2_bucket(pm, va); ptep = &l2b->l2b_kva[l2pte_index(va)]; *ptep |= pte_l2_s_cache_mode; PTE_SYNC(ptep); PMAP_UNLOCK(pm); } pv->pv_flags &= ~(PVF_NC | PVF_MWC); } continue; } pv->pv_flags &= ~maskbits; PMAP_LOCK(pm); l2b = pmap_get_l2_bucket(pm, va); ptep = &l2b->l2b_kva[l2pte_index(va)]; npte = opte = *ptep; if (maskbits & (PVF_WRITE|PVF_MOD)) { if ((pv->pv_flags & PVF_NC)) { /* * Entry is not cacheable: * * Don't turn caching on again if this is a * modified emulation. This would be * inconsitent with the settings created by * pmap_fix_cache(). Otherwise, it's safe * to re-enable cacheing. * * There's no need to call pmap_fix_cache() * here: all pages are losing their write * permission. */ if (maskbits & PVF_WRITE) { if (pg->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) npte |= pte_l2_s_cache_mode; pv->pv_flags &= ~(PVF_NC | PVF_MWC); } } else if (opte & L2_S_PROT_W) { vm_page_dirty(pg); /* * Entry is writable/cacheable: check if pmap * is current if it is flush it, otherwise it * won't be in the cache */ if (PV_BEEN_EXECD(oflags)) pmap_idcache_wbinv_range(pm, pv->pv_va, PAGE_SIZE); else if (PV_BEEN_REFD(oflags)) pmap_dcache_wb_range(pm, pv->pv_va, PAGE_SIZE, (maskbits & PVF_REF) ? TRUE : FALSE, FALSE); } /* make the pte read only */ npte &= ~L2_S_PROT_W; } if (maskbits & PVF_REF) { if ((pv->pv_flags & PVF_NC) == 0 && (maskbits & (PVF_WRITE|PVF_MOD)) == 0) { /* * Check npte here; we may have already * done the wbinv above, and the validity * of the PTE is the same for opte and * npte. */ if (npte & L2_S_PROT_W) { if (PV_BEEN_EXECD(oflags)) pmap_idcache_wbinv_range(pm, pv->pv_va, PAGE_SIZE); else if (PV_BEEN_REFD(oflags)) pmap_dcache_wb_range(pm, pv->pv_va, PAGE_SIZE, TRUE, FALSE); } else if ((npte & L2_TYPE_MASK) != L2_TYPE_INV) { /* XXXJRT need idcache_inv_range */ if (PV_BEEN_EXECD(oflags)) pmap_idcache_wbinv_range(pm, pv->pv_va, PAGE_SIZE); else if (PV_BEEN_REFD(oflags)) pmap_dcache_wb_range(pm, pv->pv_va, PAGE_SIZE, TRUE, TRUE); } } /* * Make the PTE invalid so that we will take a * page fault the next time the mapping is * referenced. */ npte &= ~L2_TYPE_MASK; npte |= L2_TYPE_INV; } if (npte != opte) { count++; *ptep = npte; PTE_SYNC(ptep); /* Flush the TLB entry if a current pmap. */ if (PV_BEEN_EXECD(oflags)) pmap_tlb_flushID_SE(pm, pv->pv_va); else if (PV_BEEN_REFD(oflags)) pmap_tlb_flushD_SE(pm, pv->pv_va); } PMAP_UNLOCK(pm); } if (maskbits & PVF_WRITE) vm_page_aflag_clear(pg, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); return (count); } /* * main pv_entry manipulation functions: * pmap_enter_pv: enter a mapping onto a vm_page list * pmap_remove_pv: remove a mappiing from a vm_page list * * NOTE: pmap_enter_pv expects to lock the pvh itself * pmap_remove_pv expects the caller to lock the pvh before calling */ /* * pmap_enter_pv: enter a mapping onto a vm_page's PV list * * => caller should hold the proper lock on pvh_global_lock * => caller should have pmap locked * => we will (someday) gain the lock on the vm_page's PV list * => caller should adjust ptp's wire_count before calling * => caller should not adjust pmap's wire_count */ static void pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, pmap_t pm, vm_offset_t va, u_int flags) { rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_ASSERT_LOCKED(pm); if (pg->md.pv_kva != 0) { pve->pv_pmap = kernel_pmap; pve->pv_va = pg->md.pv_kva; pve->pv_flags = PVF_WRITE | PVF_UNMAN; if (pm != kernel_pmap) PMAP_LOCK(kernel_pmap); TAILQ_INSERT_HEAD(&pg->md.pv_list, pve, pv_list); TAILQ_INSERT_HEAD(&kernel_pmap->pm_pvlist, pve, pv_plist); if (pm != kernel_pmap) PMAP_UNLOCK(kernel_pmap); pg->md.pv_kva = 0; if ((pve = pmap_get_pv_entry()) == NULL) panic("pmap_kenter_pv: no pv entries"); } pve->pv_pmap = pm; pve->pv_va = va; pve->pv_flags = flags; TAILQ_INSERT_HEAD(&pg->md.pv_list, pve, pv_list); TAILQ_INSERT_HEAD(&pm->pm_pvlist, pve, pv_plist); pg->md.pvh_attrs |= flags & (PVF_REF | PVF_MOD); if (pve->pv_flags & PVF_WIRED) ++pm->pm_stats.wired_count; vm_page_aflag_set(pg, PGA_REFERENCED); } /* * * pmap_find_pv: Find a pv entry * * => caller should hold lock on vm_page */ static PMAP_INLINE struct pv_entry * pmap_find_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va) { struct pv_entry *pv; rw_assert(&pvh_global_lock, RA_WLOCKED); TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) if (pm == pv->pv_pmap && va == pv->pv_va) break; return (pv); } /* * vector_page_setprot: * * Manipulate the protection of the vector page. */ void vector_page_setprot(int prot) { struct l2_bucket *l2b; pt_entry_t *ptep; l2b = pmap_get_l2_bucket(kernel_pmap, vector_page); ptep = &l2b->l2b_kva[l2pte_index(vector_page)]; *ptep = (*ptep & ~L1_S_PROT_MASK) | L2_S_PROT(PTE_KERNEL, prot); PTE_SYNC(ptep); cpu_tlb_flushD_SE(vector_page); cpu_cpwait(); } /* * pmap_remove_pv: try to remove a mapping from a pv_list * * => caller should hold proper lock on pmap_main_lock * => pmap should be locked * => caller should hold lock on vm_page [so that attrs can be adjusted] * => caller should adjust ptp's wire_count and free PTP if needed * => caller should NOT adjust pmap's wire_count * => we return the removed pve */ static void pmap_nuke_pv(struct vm_page *pg, pmap_t pm, struct pv_entry *pve) { struct pv_entry *pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_ASSERT_LOCKED(pm); TAILQ_REMOVE(&pg->md.pv_list, pve, pv_list); TAILQ_REMOVE(&pm->pm_pvlist, pve, pv_plist); if (pve->pv_flags & PVF_WIRED) --pm->pm_stats.wired_count; if (pg->md.pvh_attrs & PVF_MOD) vm_page_dirty(pg); if (TAILQ_FIRST(&pg->md.pv_list) == NULL) pg->md.pvh_attrs &= ~PVF_REF; else vm_page_aflag_set(pg, PGA_REFERENCED); if ((pve->pv_flags & PVF_NC) && ((pm == kernel_pmap) || (pve->pv_flags & PVF_WRITE) || !(pve->pv_flags & PVF_MWC))) pmap_fix_cache(pg, pm, 0); else if (pve->pv_flags & PVF_WRITE) { TAILQ_FOREACH(pve, &pg->md.pv_list, pv_list) if (pve->pv_flags & PVF_WRITE) break; if (!pve) { pg->md.pvh_attrs &= ~PVF_MOD; vm_page_aflag_clear(pg, PGA_WRITEABLE); } } pv = TAILQ_FIRST(&pg->md.pv_list); if (pv != NULL && (pv->pv_flags & PVF_UNMAN) && TAILQ_NEXT(pv, pv_list) == NULL) { pm = kernel_pmap; pg->md.pv_kva = pv->pv_va; /* a recursive pmap_nuke_pv */ TAILQ_REMOVE(&pg->md.pv_list, pv, pv_list); TAILQ_REMOVE(&pm->pm_pvlist, pv, pv_plist); if (pv->pv_flags & PVF_WIRED) --pm->pm_stats.wired_count; pg->md.pvh_attrs &= ~PVF_REF; pg->md.pvh_attrs &= ~PVF_MOD; vm_page_aflag_clear(pg, PGA_WRITEABLE); pmap_free_pv_entry(pv); } } static struct pv_entry * pmap_remove_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va) { struct pv_entry *pve; rw_assert(&pvh_global_lock, RA_WLOCKED); pve = TAILQ_FIRST(&pg->md.pv_list); while (pve) { if (pve->pv_pmap == pm && pve->pv_va == va) { /* match? */ pmap_nuke_pv(pg, pm, pve); break; } pve = TAILQ_NEXT(pve, pv_list); } if (pve == NULL && pg->md.pv_kva == va) pg->md.pv_kva = 0; return(pve); /* return removed pve */ } /* * * pmap_modify_pv: Update pv flags * * => caller should hold lock on vm_page [so that attrs can be adjusted] * => caller should NOT adjust pmap's wire_count * => we return the old flags * * Modify a physical-virtual mapping in the pv table */ static u_int pmap_modify_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va, u_int clr_mask, u_int set_mask) { struct pv_entry *npv; u_int flags, oflags; PMAP_ASSERT_LOCKED(pm); rw_assert(&pvh_global_lock, RA_WLOCKED); if ((npv = pmap_find_pv(pg, pm, va)) == NULL) return (0); /* * There is at least one VA mapping this page. */ if (clr_mask & (PVF_REF | PVF_MOD)) pg->md.pvh_attrs |= set_mask & (PVF_REF | PVF_MOD); oflags = npv->pv_flags; npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask; if ((flags ^ oflags) & PVF_WIRED) { if (flags & PVF_WIRED) ++pm->pm_stats.wired_count; else --pm->pm_stats.wired_count; } if ((flags ^ oflags) & PVF_WRITE) pmap_fix_cache(pg, pm, 0); return (oflags); } /* Function to set the debug level of the pmap code */ #ifdef PMAP_DEBUG void pmap_debug(int level) { pmap_debug_level = level; dprintf("pmap_debug: level=%d\n", pmap_debug_level); } #endif /* PMAP_DEBUG */ void pmap_pinit0(struct pmap *pmap) { PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap)); bcopy(kernel_pmap, pmap, sizeof(*pmap)); bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx)); PMAP_LOCK_INIT(pmap); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pv_memattr = VM_MEMATTR_DEFAULT; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { int shpgperproc = PMAP_SHPGPERPROC; l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); /* * Initialize the PV entry allocator. */ pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; uma_zone_reserve_kva(pvzone, pv_entry_max); pv_entry_high_water = 9 * (pv_entry_max / 10); /* * Now it is safe to enable pv_table recording. */ PDEBUG(1, printf("pmap_init: done!\n")); } int pmap_fault_fixup(pmap_t pm, vm_offset_t va, vm_prot_t ftype, int user) { struct l2_dtable *l2; struct l2_bucket *l2b; pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep, pte; vm_paddr_t pa; u_int l1idx; int rv = 0; l1idx = L1_IDX(va); rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); /* * If there is no l2_dtable for this address, then the process * has no business accessing it. * * Note: This will catch userland processes trying to access * kernel addresses. */ l2 = pm->pm_l2[L2_IDX(l1idx)]; if (l2 == NULL) goto out; /* * Likewise if there is no L2 descriptor table */ l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; if (l2b->l2b_kva == NULL) goto out; /* * Check the PTE itself. */ ptep = &l2b->l2b_kva[l2pte_index(va)]; pte = *ptep; if (pte == 0) goto out; /* * Catch a userland access to the vector page mapped at 0x0 */ if (user && (pte & L2_S_PROT_U) == 0) goto out; if (va == vector_page) goto out; pa = l2pte_pa(pte); if ((ftype & VM_PROT_WRITE) && (pte & L2_S_PROT_W) == 0) { /* * This looks like a good candidate for "page modified" * emulation... */ struct pv_entry *pv; struct vm_page *pg; /* Extract the physical address of the page */ if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) { goto out; } /* Get the current flags for this page. */ pv = pmap_find_pv(pg, pm, va); if (pv == NULL) { goto out; } /* * Do the flags say this page is writable? If not then it * is a genuine write fault. If yes then the write fault is * our fault as we did not reflect the write access in the * PTE. Now we know a write has occurred we can correct this * and also set the modified bit */ if ((pv->pv_flags & PVF_WRITE) == 0) { goto out; } pg->md.pvh_attrs |= PVF_REF | PVF_MOD; vm_page_dirty(pg); pv->pv_flags |= PVF_REF | PVF_MOD; /* * Re-enable write permissions for the page. No need to call * pmap_fix_cache(), since this is just a * modified-emulation fault, and the PVF_WRITE bit isn't * changing. We've already set the cacheable bits based on * the assumption that we can write to this page. */ *ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO | L2_S_PROT_W; PTE_SYNC(ptep); rv = 1; } else if ((pte & L2_TYPE_MASK) == L2_TYPE_INV) { /* * This looks like a good candidate for "page referenced" * emulation. */ struct pv_entry *pv; struct vm_page *pg; /* Extract the physical address of the page */ if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) goto out; /* Get the current flags for this page. */ pv = pmap_find_pv(pg, pm, va); if (pv == NULL) goto out; pg->md.pvh_attrs |= PVF_REF; pv->pv_flags |= PVF_REF; *ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO; PTE_SYNC(ptep); rv = 1; } /* * We know there is a valid mapping here, so simply * fix up the L1 if necessary. */ pl1pd = &pm->pm_l1->l1_kva[l1idx]; l1pd = l2b->l2b_phys | L1_C_DOM(pm->pm_domain) | L1_C_PROTO; if (*pl1pd != l1pd) { *pl1pd = l1pd; PTE_SYNC(pl1pd); rv = 1; } #ifdef DEBUG /* * If 'rv == 0' at this point, it generally indicates that there is a * stale TLB entry for the faulting address. This happens when two or * more processes are sharing an L1. Since we don't flush the TLB on * a context switch between such processes, we can take domain faults * for mappings which exist at the same VA in both processes. EVEN IF * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for * example. * * This is extremely likely to happen if pmap_enter() updated the L1 * entry for a recently entered mapping. In this case, the TLB is * flushed for the new mapping, but there may still be TLB entries for * other mappings belonging to other processes in the 1MB range * covered by the L1 entry. * * Since 'rv == 0', we know that the L1 already contains the correct * value, so the fault must be due to a stale TLB entry. * * Since we always need to flush the TLB anyway in the case where we * fixed up the L1, or frobbed the L2 PTE, we effectively deal with * stale TLB entries dynamically. * * However, the above condition can ONLY happen if the current L1 is * being shared. If it happens when the L1 is unshared, it indicates * that other parts of the pmap are not doing their job WRT managing * the TLB. */ if (rv == 0 && pm->pm_l1->l1_domain_use_count == 1) { printf("fixup: pm %p, va 0x%lx, ftype %d - nothing to do!\n", pm, (u_long)va, ftype); printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n", l2, l2b, ptep, pl1pd); printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n", pte, l1pd, last_fault_code); #ifdef DDB Debugger(); #endif } #endif cpu_tlb_flushID_SE(va); cpu_cpwait(); rv = 1; out: rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pm); return (rv); } void pmap_postinit(void) { struct l2_bucket *l2b; struct l1_ttable *l1; pd_entry_t *pl1pt; pt_entry_t *ptep, pte; vm_offset_t va, eva; u_int loop, needed; needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0); needed -= 1; l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK); for (loop = 0; loop < needed; loop++, l1++) { /* Allocate a L1 page table */ va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0, 0xffffffff, L1_TABLE_SIZE, 0); if (va == 0) panic("Cannot allocate L1 KVM"); eva = va + L1_TABLE_SIZE; pl1pt = (pd_entry_t *)va; while (va < eva) { l2b = pmap_get_l2_bucket(kernel_pmap, va); ptep = &l2b->l2b_kva[l2pte_index(va)]; pte = *ptep; pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; *ptep = pte; PTE_SYNC(ptep); cpu_tlb_flushD_SE(va); va += PAGE_SIZE; } pmap_init_l1(l1, pl1pt); } #ifdef DEBUG printf("pmap_postinit: Allocated %d static L1 descriptor tables\n", needed); #endif } /* * This is used to stuff certain critical values into the PCB where they * can be accessed quickly from cpu_switch() et al. */ void pmap_set_pcb_pagedir(pmap_t pm, struct pcb *pcb) { struct l2_bucket *l2b; pcb->pcb_pagedir = pm->pm_l1->l1_physaddr; pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | (DOMAIN_CLIENT << (pm->pm_domain * 2)); if (vector_page < KERNBASE) { pcb->pcb_pl1vec = &pm->pm_l1->l1_kva[L1_IDX(vector_page)]; l2b = pmap_get_l2_bucket(pm, vector_page); pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO | L1_C_DOM(pm->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL); } else pcb->pcb_pl1vec = NULL; } void pmap_activate(struct thread *td) { pmap_t pm; struct pcb *pcb; pm = vmspace_pmap(td->td_proc->p_vmspace); pcb = td->td_pcb; critical_enter(); pmap_set_pcb_pagedir(pm, pcb); if (td == curthread) { u_int cur_dacr, cur_ttb; __asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb)); __asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr)); cur_ttb &= ~(L1_TABLE_SIZE - 1); if (cur_ttb == (u_int)pcb->pcb_pagedir && cur_dacr == pcb->pcb_dacr) { /* * No need to switch address spaces. */ critical_exit(); return; } /* * We MUST, I repeat, MUST fix up the L1 entry corresponding * to 'vector_page' in the incoming L1 table before switching * to it otherwise subsequent interrupts/exceptions (including * domain faults!) will jump into hyperspace. */ if (pcb->pcb_pl1vec) { *pcb->pcb_pl1vec = pcb->pcb_l1vec; /* * Don't need to PTE_SYNC() at this point since * cpu_setttb() is about to flush both the cache * and the TLB. */ } cpu_domains(pcb->pcb_dacr); cpu_setttb(pcb->pcb_pagedir); } critical_exit(); } static int pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va) { pd_entry_t *pdep, pde; pt_entry_t *ptep, pte; vm_offset_t pa; int rv = 0; /* * Make sure the descriptor itself has the correct cache mode */ pdep = &kl1[L1_IDX(va)]; pde = *pdep; if (l1pte_section_p(pde)) { if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) { *pdep = (pde & ~L1_S_CACHE_MASK) | pte_l1_s_cache_mode_pt; PTE_SYNC(pdep); cpu_dcache_wbinv_range((vm_offset_t)pdep, sizeof(*pdep)); cpu_l2cache_wbinv_range((vm_offset_t)pdep, sizeof(*pdep)); rv = 1; } } else { pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); ptep = (pt_entry_t *)kernel_pt_lookup(pa); if (ptep == NULL) panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep); ptep = &ptep[l2pte_index(va)]; pte = *ptep; if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; PTE_SYNC(ptep); cpu_dcache_wbinv_range((vm_offset_t)ptep, sizeof(*ptep)); cpu_l2cache_wbinv_range((vm_offset_t)ptep, sizeof(*ptep)); rv = 1; } } return (rv); } static void pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap, pt_entry_t **ptep) { vm_offset_t va = *availp; struct l2_bucket *l2b; if (ptep) { l2b = pmap_get_l2_bucket(kernel_pmap, va); if (l2b == NULL) panic("pmap_alloc_specials: no l2b for 0x%x", va); *ptep = &l2b->l2b_kva[l2pte_index(va)]; } *vap = va; *availp = va + (PAGE_SIZE * pages); } /* * Bootstrap the system enough to run with virtual memory. * * On the arm this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ #define PMAP_STATIC_L2_SIZE 16 void pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt) { static struct l1_ttable static_l1; static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE]; struct l1_ttable *l1 = &static_l1; struct l2_dtable *l2; struct l2_bucket *l2b; pd_entry_t pde; pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va; pt_entry_t *ptep; pt_entry_t *qmap_pte; vm_paddr_t pa; vm_offset_t va; vm_size_t size; int l1idx, l2idx, l2next = 0; PDEBUG(1, printf("firstaddr = %08x, lastaddr = %08x\n", firstaddr, vm_max_kernel_address)); virtual_avail = firstaddr; kernel_pmap->pm_l1 = l1; kernel_l1pa = l1pt->pv_pa; /* * Scan the L1 translation table created by initarm() and create * the required metadata for all valid mappings found in it. */ for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) { pde = kernel_l1pt[l1idx]; /* * We're only interested in Coarse mappings. * pmap_extract() can deal with section mappings without * recourse to checking L2 metadata. */ if ((pde & L1_TYPE_MASK) != L1_TYPE_C) continue; /* * Lookup the KVA of this L2 descriptor table */ pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); ptep = (pt_entry_t *)kernel_pt_lookup(pa); if (ptep == NULL) { panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx", (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa); } /* * Fetch the associated L2 metadata structure. * Allocate a new one if necessary. */ if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { if (l2next == PMAP_STATIC_L2_SIZE) panic("pmap_bootstrap: out of static L2s"); kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 = &static_l2[l2next++]; } /* * One more L1 slot tracked... */ l2->l2_occupancy++; /* * Fill in the details of the L2 descriptor in the * appropriate bucket. */ l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; l2b->l2b_kva = ptep; l2b->l2b_phys = pa; l2b->l2b_l1idx = l1idx; /* * Establish an initial occupancy count for this descriptor */ for (l2idx = 0; l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t)); l2idx++) { if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) { l2b->l2b_occupancy++; } } /* * Make sure the descriptor itself has the correct cache mode. * If not, fix it, but whine about the problem. Port-meisters * should consider this a clue to fix up their initarm() * function. :) */ if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) { printf("pmap_bootstrap: WARNING! wrong cache mode for " "L2 pte @ %p\n", ptep); } } /* * Ensure the primary (kernel) L1 has the correct cache mode for * a page table. Bitch if it is not correctly set. */ for (va = (vm_offset_t)kernel_l1pt; va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) { if (pmap_set_pt_cache_mode(kernel_l1pt, va)) printf("pmap_bootstrap: WARNING! wrong cache mode for " "primary L1 @ 0x%x\n", va); } cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); cpu_tlb_flushID(); cpu_cpwait(); PMAP_LOCK_INIT(kernel_pmap); CPU_FILL(&kernel_pmap->pm_active); kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL; TAILQ_INIT(&kernel_pmap->pm_pvlist); /* * Initialize the global pv list lock. */ rw_init_flags(&pvh_global_lock, "pmap pv global", RW_RECURSE); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte); pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)csrc_pte); pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte); pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)cdst_pte); pmap_alloc_specials(&virtual_avail, 1, &qmap_addr, &qmap_pte); pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)qmap_pte); size = ((vm_max_kernel_address - pmap_curmaxkvaddr) + L1_S_OFFSET) / L1_S_SIZE; pmap_alloc_specials(&virtual_avail, round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE, &pmap_kernel_l2ptp_kva, NULL); size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE; pmap_alloc_specials(&virtual_avail, round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE, &pmap_kernel_l2dtable_kva, NULL); pmap_alloc_specials(&virtual_avail, 1, (vm_offset_t*)&_tmppt, NULL); pmap_alloc_specials(&virtual_avail, MAXDUMPPGS, (vm_offset_t *)&crashdumpmap, NULL); SLIST_INIT(&l1_list); TAILQ_INIT(&l1_lru_list); mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF); pmap_init_l1(l1, kernel_l1pt); cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); virtual_avail = round_page(virtual_avail); virtual_end = vm_max_kernel_address; kernel_vm_end = pmap_curmaxkvaddr; mtx_init(&cmtx, "TMP mappings mtx", NULL, MTX_DEF); mtx_init(&qmap_mtx, "quick mapping mtx", NULL, MTX_DEF); pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { struct pcb *pcb; pmap_idcache_wbinv_all(pmap); cpu_l2cache_wbinv_all(); pmap_tlb_flushID(pmap); cpu_cpwait(); if (vector_page < KERNBASE) { struct pcb *curpcb = PCPU_GET(curpcb); pcb = thread0.td_pcb; if (pmap_is_current(pmap)) { /* * Frob the L1 entry corresponding to the vector * page so that it contains the kernel pmap's domain * number. This will ensure pmap_remove() does not * pull the current vector page out from under us. */ critical_enter(); *pcb->pcb_pl1vec = pcb->pcb_l1vec; cpu_domains(pcb->pcb_dacr); cpu_setttb(pcb->pcb_pagedir); critical_exit(); } pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE); /* * Make sure cpu_switch(), et al, DTRT. This is safe to do * since this process has no remaining mappings of its own. */ curpcb->pcb_pl1vec = pcb->pcb_pl1vec; curpcb->pcb_l1vec = pcb->pcb_l1vec; curpcb->pcb_dacr = pcb->pcb_dacr; curpcb->pcb_pagedir = pcb->pcb_pagedir; } pmap_free_l1(pmap); dprintf("pmap_release()\n"); } /* * Helper function for pmap_grow_l2_bucket() */ static __inline int pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap) { struct l2_bucket *l2b; pt_entry_t *ptep; vm_paddr_t pa; struct vm_page *pg; pg = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (pg == NULL) return (1); pa = VM_PAGE_TO_PHYS(pg); if (pap) *pap = pa; l2b = pmap_get_l2_bucket(kernel_pmap, va); ptep = &l2b->l2b_kva[l2pte_index(va)]; *ptep = L2_S_PROTO | pa | cache_mode | L2_S_PROT(PTE_KERNEL, VM_PROT_READ | VM_PROT_WRITE); PTE_SYNC(ptep); return (0); } /* * This is the same as pmap_alloc_l2_bucket(), except that it is only * used by pmap_growkernel(). */ static __inline struct l2_bucket * pmap_grow_l2_bucket(pmap_t pm, vm_offset_t va) { struct l2_dtable *l2; struct l2_bucket *l2b; struct l1_ttable *l1; pd_entry_t *pl1pd; u_short l1idx; vm_offset_t nva; l1idx = L1_IDX(va); if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) { /* * No mapping at this address, as there is * no entry in the L1 table. * Need to allocate a new l2_dtable. */ nva = pmap_kernel_l2dtable_kva; if ((nva & PAGE_MASK) == 0) { /* * Need to allocate a backing page */ if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) return (NULL); } l2 = (struct l2_dtable *)nva; nva += sizeof(struct l2_dtable); if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva & PAGE_MASK)) { /* * The new l2_dtable straddles a page boundary. * Map in another page to cover it. */ if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) return (NULL); } pmap_kernel_l2dtable_kva = nva; /* * Link it into the parent pmap */ pm->pm_l2[L2_IDX(l1idx)] = l2; memset(l2, 0, sizeof(*l2)); } l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; /* * Fetch pointer to the L2 page table associated with the address. */ if (l2b->l2b_kva == NULL) { pt_entry_t *ptep; /* * No L2 page table has been allocated. Chances are, this * is because we just allocated the l2_dtable, above. */ nva = pmap_kernel_l2ptp_kva; ptep = (pt_entry_t *)nva; if ((nva & PAGE_MASK) == 0) { /* * Need to allocate a backing page */ if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt, &pmap_kernel_l2ptp_phys)) return (NULL); PTE_SYNC_RANGE(ptep, PAGE_SIZE / sizeof(pt_entry_t)); } memset(ptep, 0, L2_TABLE_SIZE_REAL); l2->l2_occupancy++; l2b->l2b_kva = ptep; l2b->l2b_l1idx = l1idx; l2b->l2b_phys = pmap_kernel_l2ptp_phys; pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL; pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL; } /* Distribute new L1 entry to all other L1s */ SLIST_FOREACH(l1, &l1_list, l1_link) { pl1pd = &l1->l1_kva[L1_IDX(va)]; *pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO; PTE_SYNC(pl1pd); } return (l2b); } /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { pmap_t kpm = kernel_pmap; if (addr <= pmap_curmaxkvaddr) return; /* we are OK */ /* * whoops! we need to add kernel PTPs */ /* Map 1MB at a time */ for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE) pmap_grow_l2_bucket(kpm, pmap_curmaxkvaddr); /* * flush out the cache, expensive but growkernel will happen so * rarely */ cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); cpu_tlb_flushD(); cpu_cpwait(); kernel_vm_end = pmap_curmaxkvaddr; } /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ void pmap_remove_pages(pmap_t pmap) { struct pv_entry *pv, *npv; struct l2_bucket *l2b = NULL; vm_page_t m; pt_entry_t *pt; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); cpu_idcache_wbinv_all(); cpu_l2cache_wbinv_all(); for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { if (pv->pv_flags & PVF_WIRED || pv->pv_flags & PVF_UNMAN) { /* Cannot remove wired or unmanaged pages now. */ npv = TAILQ_NEXT(pv, pv_plist); continue; } pmap->pm_stats.resident_count--; l2b = pmap_get_l2_bucket(pmap, pv->pv_va); KASSERT(l2b != NULL, ("No L2 bucket in pmap_remove_pages")); pt = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; m = PHYS_TO_VM_PAGE(*pt & L2_S_FRAME); KASSERT((vm_offset_t)m >= KERNBASE, ("Trying to access non-existent page va %x pte %x", pv->pv_va, *pt)); *pt = 0; PTE_SYNC(pt); npv = TAILQ_NEXT(pv, pv_plist); pmap_nuke_pv(m, pmap, pv); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); pmap_free_pv_entry(pv); pmap_free_l2_bucket(pmap, l2b, 1); } rw_wunlock(&pvh_global_lock); cpu_tlb_flushID(); cpu_cpwait(); PMAP_UNLOCK(pmap); } /*************************************************** * Low level mapping routines..... ***************************************************/ #ifdef ARM_HAVE_SUPERSECTIONS /* Map a super section into the KVA. */ void pmap_kenter_supersection(vm_offset_t va, uint64_t pa, int flags) { pd_entry_t pd = L1_S_PROTO | L1_S_SUPERSEC | (pa & L1_SUP_FRAME) | (((pa >> 32) & 0xf) << 20) | L1_S_PROT(PTE_KERNEL, VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL); struct l1_ttable *l1; vm_offset_t va0, va_end; KASSERT(((va | pa) & L1_SUP_OFFSET) == 0, ("Not a valid super section mapping")); if (flags & SECTION_CACHE) pd |= pte_l1_s_cache_mode; else if (flags & SECTION_PT) pd |= pte_l1_s_cache_mode_pt; va0 = va & L1_SUP_FRAME; va_end = va + L1_SUP_SIZE; SLIST_FOREACH(l1, &l1_list, l1_link) { va = va0; for (; va < va_end; va += L1_S_SIZE) { l1->l1_kva[L1_IDX(va)] = pd; PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); } } } #endif /* Map a section into the KVA. */ void pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags) { pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL, VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL); struct l1_ttable *l1; KASSERT(((va | pa) & L1_S_OFFSET) == 0, ("Not a valid section mapping")); if (flags & SECTION_CACHE) pd |= pte_l1_s_cache_mode; else if (flags & SECTION_PT) pd |= pte_l1_s_cache_mode_pt; SLIST_FOREACH(l1, &l1_list, l1_link) { l1->l1_kva[L1_IDX(va)] = pd; PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); } } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); return ((void *)crashdumpmap); } /* * add a wired page to the kva * note that in order for the mapping to take effect -- you * should do a invltlb after doing the pmap_kenter... */ static PMAP_INLINE void pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags) { struct l2_bucket *l2b; pt_entry_t *pte; pt_entry_t opte; struct pv_entry *pve; vm_page_t m; PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n", (uint32_t) va, (uint32_t) pa)); l2b = pmap_get_l2_bucket(kernel_pmap, va); if (l2b == NULL) l2b = pmap_grow_l2_bucket(kernel_pmap, va); KASSERT(l2b != NULL, ("No L2 Bucket")); pte = &l2b->l2b_kva[l2pte_index(va)]; opte = *pte; PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n", (uint32_t) pte, opte, *pte)); if (l2pte_valid(opte)) { pmap_kremove(va); } else { if (opte == 0) l2b->l2b_occupancy++; } *pte = L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, VM_PROT_READ | VM_PROT_WRITE); if (flags & KENTER_CACHE) *pte |= pte_l2_s_cache_mode; if (flags & KENTER_USER) *pte |= L2_S_PROT_U; PTE_SYNC(pte); /* * A kernel mapping may not be the page's only mapping, so create a PV * entry to ensure proper caching. * * The existence test for the pvzone is used to delay the recording of * kernel mappings until the VM system is fully initialized. * * This expects the physical memory to have a vm_page_array entry. */ if (pvzone != NULL && (m = vm_phys_paddr_to_vm_page(pa)) != NULL) { rw_wlock(&pvh_global_lock); if (!TAILQ_EMPTY(&m->md.pv_list) || m->md.pv_kva != 0) { if ((pve = pmap_get_pv_entry()) == NULL) panic("pmap_kenter_internal: no pv entries"); PMAP_LOCK(kernel_pmap); pmap_enter_pv(m, pve, kernel_pmap, va, PVF_WRITE | PVF_UNMAN); pmap_fix_cache(m, kernel_pmap, va); PMAP_UNLOCK(kernel_pmap); } else { m->md.pv_kva = va; } rw_wunlock(&pvh_global_lock); } } void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pmap_kenter_internal(va, pa, KENTER_CACHE); } void pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa) { pmap_kenter_internal(va, pa, 0); } void pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa) { vm_offset_t sva; KASSERT((size & PAGE_MASK) == 0, ("%s: device mapping not page-sized", __func__)); sva = va; while (size != 0) { pmap_kenter_internal(va, pa, 0); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } } void pmap_kremove_device(vm_offset_t va, vm_size_t size) { vm_offset_t sva; KASSERT((size & PAGE_MASK) == 0, ("%s: device mapping not page-sized", __func__)); sva = va; while (size != 0) { pmap_kremove(va); va += PAGE_SIZE; size -= PAGE_SIZE; } } void pmap_kenter_user(vm_offset_t va, vm_paddr_t pa) { pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER); /* * Call pmap_fault_fixup now, to make sure we'll have no exception * at the first use of the new address, or bad things will happen, * as we use one of these addresses in the exception handlers. */ pmap_fault_fixup(kernel_pmap, va, VM_PROT_READ|VM_PROT_WRITE, 1); } vm_paddr_t pmap_kextract(vm_offset_t va) { return (pmap_extract_locked(kernel_pmap, va)); } /* * remove a page from the kernel pagetables */ void pmap_kremove(vm_offset_t va) { struct l2_bucket *l2b; pt_entry_t *pte, opte; struct pv_entry *pve; vm_page_t m; vm_offset_t pa; l2b = pmap_get_l2_bucket(kernel_pmap, va); if (!l2b) return; KASSERT(l2b != NULL, ("No L2 Bucket")); pte = &l2b->l2b_kva[l2pte_index(va)]; opte = *pte; if (l2pte_valid(opte)) { /* pa = vtophs(va) taken from pmap_extract() */ if ((opte & L2_TYPE_MASK) == L2_TYPE_L) pa = (opte & L2_L_FRAME) | (va & L2_L_OFFSET); else pa = (opte & L2_S_FRAME) | (va & L2_S_OFFSET); /* note: should never have to remove an allocation * before the pvzone is initialized. */ rw_wlock(&pvh_global_lock); PMAP_LOCK(kernel_pmap); if (pvzone != NULL && (m = vm_phys_paddr_to_vm_page(pa)) && (pve = pmap_remove_pv(m, kernel_pmap, va))) pmap_free_pv_entry(pve); PMAP_UNLOCK(kernel_pmap); rw_wunlock(&pvh_global_lock); va = va & ~PAGE_MASK; cpu_dcache_wbinv_range(va, PAGE_SIZE); cpu_l2cache_wbinv_range(va, PAGE_SIZE); cpu_tlb_flushD_SE(va); cpu_cpwait(); *pte = 0; } } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) { vm_offset_t sva = *virt; vm_offset_t va = sva; PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, " "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end, prot)); while (start < end) { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } *virt = va; return (sva); } static void pmap_wb_page(vm_page_t m) { struct pv_entry *pv; TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) pmap_dcache_wb_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE, FALSE, (pv->pv_flags & PVF_WRITE) == 0); } static void pmap_inv_page(vm_page_t m) { struct pv_entry *pv; TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) pmap_dcache_wb_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE, TRUE, TRUE); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. */ void pmap_qenter(vm_offset_t va, vm_page_t *m, int count) { int i; for (i = 0; i < count; i++) { pmap_wb_page(m[i]); pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]), KENTER_CACHE); va += PAGE_SIZE; } } /* * this routine jerks page mappings from the * kernel -- it is meant only for temporary mappings. */ void pmap_qremove(vm_offset_t va, int count) { vm_paddr_t pa; int i; for (i = 0; i < count; i++) { pa = vtophys(va); if (pa) { pmap_inv_page(PHYS_TO_VM_PAGE(pa)); pmap_kremove(va); } va += PAGE_SIZE; } } /* * pmap_object_init_pt preloads the ptes for a given object * into the specified pmap. This eliminates the blast of soft * faults on process startup and immediately after an mmap. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is elgible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pd_entry_t *pde; pt_entry_t *pte; if (!pmap_get_pde_pte(pmap, addr, &pde, &pte)) return (FALSE); KASSERT(pte != NULL, ("Valid mapping but no pte ?")); if (*pte == 0) return (TRUE); return (FALSE); } /* * Fetch pointers to the PDE/PTE for the given pmap/VA pair. * Returns TRUE if the mapping exists, else FALSE. * * NOTE: This function is only used by a couple of arm-specific modules. * It is not safe to take any pmap locks here, since we could be right * in the middle of debugging the pmap anyway... * * It is possible for this routine to return FALSE even though a valid * mapping does exist. This is because we don't lock, so the metadata * state may be inconsistent. * * NOTE: We can return a NULL *ptp in the case where the L1 pde is * a "section" mapping. */ boolean_t pmap_get_pde_pte(pmap_t pm, vm_offset_t va, pd_entry_t **pdp, pt_entry_t **ptp) { struct l2_dtable *l2; pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep; u_short l1idx; if (pm->pm_l1 == NULL) return (FALSE); l1idx = L1_IDX(va); *pdp = pl1pd = &pm->pm_l1->l1_kva[l1idx]; l1pd = *pl1pd; if (l1pte_section_p(l1pd)) { *ptp = NULL; return (TRUE); } if (pm->pm_l2 == NULL) return (FALSE); l2 = pm->pm_l2[L2_IDX(l1idx)]; if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { return (FALSE); } *ptp = &ptep[l2pte_index(va)]; return (TRUE); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { pv_entry_t pv; pt_entry_t *ptep; struct l2_bucket *l2b; boolean_t flush = FALSE; pmap_t curpm; int flags = 0; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); if (TAILQ_EMPTY(&m->md.pv_list)) return; rw_wlock(&pvh_global_lock); /* * XXX This call shouldn't exist. Iterating over the PV list twice, * once in pmap_clearbit() and again below, is both unnecessary and * inefficient. The below code should itself write back the cache * entry before it destroys the mapping. */ pmap_clearbit(m, PVF_WRITE); curpm = vmspace_pmap(curproc->p_vmspace); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { if (flush == FALSE && (pv->pv_pmap == curpm || pv->pv_pmap == kernel_pmap)) flush = TRUE; PMAP_LOCK(pv->pv_pmap); /* * Cached contents were written-back in pmap_clearbit(), * but we still have to invalidate the cache entry to make * sure stale data are not retrieved when another page will be * mapped under this virtual address. */ if (pmap_is_current(pv->pv_pmap)) { cpu_dcache_inv_range(pv->pv_va, PAGE_SIZE); if (pmap_has_valid_mapping(pv->pv_pmap, pv->pv_va)) cpu_l2cache_inv_range(pv->pv_va, PAGE_SIZE); } if (pv->pv_flags & PVF_UNMAN) { /* remove the pv entry, but do not remove the mapping * and remember this is a kernel mapped page */ m->md.pv_kva = pv->pv_va; } else { /* remove the mapping and pv entry */ l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va); KASSERT(l2b != NULL, ("No l2 bucket")); ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; *ptep = 0; PTE_SYNC_CURRENT(pv->pv_pmap, ptep); pmap_free_l2_bucket(pv->pv_pmap, l2b, 1); pv->pv_pmap->pm_stats.resident_count--; flags |= pv->pv_flags; } pmap_nuke_pv(m, pv->pv_pmap, pv); PMAP_UNLOCK(pv->pv_pmap); pmap_free_pv_entry(pv); } if (flush) { if (PV_BEEN_EXECD(flags)) pmap_tlb_flushID(curpm); else pmap_tlb_flushD(curpm); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { struct l2_bucket *l2b; pt_entry_t *ptep, pte; vm_offset_t next_bucket; u_int flags; int flush; CTR4(KTR_PMAP, "pmap_protect: pmap %p sva 0x%08x eva 0x%08x prot %x", pm, sva, eva, prot); if ((prot & VM_PROT_READ) == 0) { pmap_remove(pm, sva, eva); return; } if (prot & VM_PROT_WRITE) { /* * If this is a read->write transition, just ignore it and let * vm_fault() take care of it later. */ return; } rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); /* * OK, at this point, we know we're doing write-protect operation. * If the pmap is active, write-back the range. */ pmap_dcache_wb_range(pm, sva, eva - sva, FALSE, FALSE); flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1; flags = 0; while (sva < eva) { next_bucket = L2_NEXT_BUCKET(sva); if (next_bucket > eva) next_bucket = eva; l2b = pmap_get_l2_bucket(pm, sva); if (l2b == NULL) { sva = next_bucket; continue; } ptep = &l2b->l2b_kva[l2pte_index(sva)]; while (sva < next_bucket) { if ((pte = *ptep) != 0 && (pte & L2_S_PROT_W) != 0) { struct vm_page *pg; u_int f; pg = PHYS_TO_VM_PAGE(l2pte_pa(pte)); pte &= ~L2_S_PROT_W; *ptep = pte; PTE_SYNC(ptep); if (!(pg->oflags & VPO_UNMANAGED)) { f = pmap_modify_pv(pg, pm, sva, PVF_WRITE, 0); if (f & PVF_WRITE) vm_page_dirty(pg); } else f = 0; if (flush >= 0) { flush++; flags |= f; } else if (PV_BEEN_EXECD(f)) pmap_tlb_flushID_SE(pm, sva); else if (PV_BEEN_REFD(f)) pmap_tlb_flushD_SE(pm, sva); } sva += PAGE_SIZE; ptep++; } } if (flush) { if (PV_BEEN_EXECD(flags)) pmap_tlb_flushID(pm); else if (PV_BEEN_REFD(flags)) pmap_tlb_flushD(pm); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pm); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind __unused) { int rv; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); rv = pmap_enter_locked(pmap, va, m, prot, flags); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (rv); } /* * The pvh global and pmap locks must be held. */ static int pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags) { struct l2_bucket *l2b = NULL; struct vm_page *opg; struct pv_entry *pve = NULL; pt_entry_t *ptep, npte, opte; u_int nflags; u_int oflags; vm_paddr_t pa; PMAP_ASSERT_LOCKED(pmap); rw_assert(&pvh_global_lock, RA_WLOCKED); if (va == vector_page) { pa = systempage.pv_pa; m = NULL; } else { if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); pa = VM_PAGE_TO_PHYS(m); } nflags = 0; if (prot & VM_PROT_WRITE) nflags |= PVF_WRITE; if (prot & VM_PROT_EXECUTE) nflags |= PVF_EXEC; if ((flags & PMAP_ENTER_WIRED) != 0) nflags |= PVF_WIRED; PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, prot = %x, " "flags = %x\n", (uint32_t) pmap, va, (uint32_t) m, prot, flags)); if (pmap == kernel_pmap) { l2b = pmap_get_l2_bucket(pmap, va); if (l2b == NULL) l2b = pmap_grow_l2_bucket(pmap, va); } else { do_l2b_alloc: l2b = pmap_alloc_l2_bucket(pmap, va); if (l2b == NULL) { if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); VM_WAIT; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); goto do_l2b_alloc; } return (KERN_RESOURCE_SHORTAGE); } } ptep = &l2b->l2b_kva[l2pte_index(va)]; opte = *ptep; npte = pa; oflags = 0; if (opte) { /* * There is already a mapping at this address. * If the physical address is different, lookup the * vm_page. */ if (l2pte_pa(opte) != pa) opg = PHYS_TO_VM_PAGE(l2pte_pa(opte)); else opg = m; } else opg = NULL; if ((prot & (VM_PROT_ALL)) || (!m || m->md.pvh_attrs & PVF_REF)) { /* * - The access type indicates that we don't need * to do referenced emulation. * OR * - The physical page has already been referenced * so no need to re-do referenced emulation here. */ npte |= L2_S_PROTO; nflags |= PVF_REF; if (m && ((prot & VM_PROT_WRITE) != 0 || (m->md.pvh_attrs & PVF_MOD))) { /* * This is a writable mapping, and the * page's mod state indicates it has * already been modified. Make it * writable from the outset. */ nflags |= PVF_MOD; if (!(m->md.pvh_attrs & PVF_MOD)) vm_page_dirty(m); } if (m && opte) vm_page_aflag_set(m, PGA_REFERENCED); } else { /* * Need to do page referenced emulation. */ npte |= L2_TYPE_INV; } if (prot & VM_PROT_WRITE) { npte |= L2_S_PROT_W; if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_WRITEABLE); } if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) npte |= pte_l2_s_cache_mode; if (m && m == opg) { /* * We're changing the attrs of an existing mapping. */ oflags = pmap_modify_pv(m, pmap, va, PVF_WRITE | PVF_EXEC | PVF_WIRED | PVF_MOD | PVF_REF, nflags); /* * We may need to flush the cache if we're * doing rw-ro... */ if (pmap_is_current(pmap) && (oflags & PVF_NC) == 0 && (opte & L2_S_PROT_W) != 0 && (prot & VM_PROT_WRITE) == 0 && (opte & L2_TYPE_MASK) != L2_TYPE_INV) { cpu_dcache_wb_range(va, PAGE_SIZE); cpu_l2cache_wb_range(va, PAGE_SIZE); } } else { /* * New mapping, or changing the backing page * of an existing mapping. */ if (opg) { /* * Replacing an existing mapping with a new one. * It is part of our managed memory so we * must remove it from the PV list */ if ((pve = pmap_remove_pv(opg, pmap, va))) { /* note for patch: the oflags/invalidation was moved * because PG_FICTITIOUS pages could free the pve */ oflags = pve->pv_flags; /* * If the old mapping was valid (ref/mod * emulation creates 'invalid' mappings * initially) then make sure to frob * the cache. */ if ((oflags & PVF_NC) == 0 && l2pte_valid(opte)) { if (PV_BEEN_EXECD(oflags)) { pmap_idcache_wbinv_range(pmap, va, PAGE_SIZE); } else if (PV_BEEN_REFD(oflags)) { pmap_dcache_wb_range(pmap, va, PAGE_SIZE, TRUE, (oflags & PVF_WRITE) == 0); } } /* free/allocate a pv_entry for UNMANAGED pages if * this physical page is not/is already mapped. */ if (m && (m->oflags & VPO_UNMANAGED) && !m->md.pv_kva && TAILQ_EMPTY(&m->md.pv_list)) { pmap_free_pv_entry(pve); pve = NULL; } } else if (m && (!(m->oflags & VPO_UNMANAGED) || m->md.pv_kva || !TAILQ_EMPTY(&m->md.pv_list))) pve = pmap_get_pv_entry(); } else if (m && (!(m->oflags & VPO_UNMANAGED) || m->md.pv_kva || !TAILQ_EMPTY(&m->md.pv_list))) pve = pmap_get_pv_entry(); if (m) { if ((m->oflags & VPO_UNMANAGED)) { if (!TAILQ_EMPTY(&m->md.pv_list) || m->md.pv_kva) { KASSERT(pve != NULL, ("No pv")); nflags |= PVF_UNMAN; pmap_enter_pv(m, pve, pmap, va, nflags); } else m->md.pv_kva = va; } else { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); KASSERT(pve != NULL, ("No pv")); pmap_enter_pv(m, pve, pmap, va, nflags); } } } /* * Make sure userland mappings get the right permissions */ if (pmap != kernel_pmap && va != vector_page) { npte |= L2_S_PROT_U; } /* * Keep the stats up to date */ if (opte == 0) { l2b->l2b_occupancy++; pmap->pm_stats.resident_count++; } /* * If this is just a wiring change, the two PTEs will be * identical, so there's no need to update the page table. */ if (npte != opte) { boolean_t is_cached = pmap_is_current(pmap); *ptep = npte; if (is_cached) { /* * We only need to frob the cache/tlb if this pmap * is current */ PTE_SYNC(ptep); if (L1_IDX(va) != L1_IDX(vector_page) && l2pte_valid(npte)) { /* * This mapping is likely to be accessed as * soon as we return to userland. Fix up the * L1 entry to avoid taking another * page/domain fault. */ pd_entry_t *pl1pd, l1pd; pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; if (*pl1pd != l1pd) { *pl1pd = l1pd; PTE_SYNC(pl1pd); } } } if (PV_BEEN_EXECD(oflags)) pmap_tlb_flushID_SE(pmap, va); else if (PV_BEEN_REFD(oflags)) pmap_tlb_flushD_SE(pmap, va); if (m) pmap_fix_cache(m, pmap, va); } return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); m = m_start; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { pmap_enter_locked(pmap, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP); m = TAILQ_NEXT(m, listq); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); pmap_enter_locked(pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * XXX Wired mappings of unmanaged pages cannot be counted by this pmap * implementation. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct l2_bucket *l2b; pt_entry_t *ptep, pte; pv_entry_t pv; vm_offset_t next_bucket; vm_page_t m; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (sva < eva) { next_bucket = L2_NEXT_BUCKET(sva); if (next_bucket > eva) next_bucket = eva; l2b = pmap_get_l2_bucket(pmap, sva); if (l2b == NULL) { sva = next_bucket; continue; } for (ptep = &l2b->l2b_kva[l2pte_index(sva)]; sva < next_bucket; sva += PAGE_SIZE, ptep++) { if ((pte = *ptep) == 0 || (m = PHYS_TO_VM_PAGE(l2pte_pa(pte))) == NULL || (m->oflags & VPO_UNMANAGED) != 0) continue; pv = pmap_find_pv(m, pmap, sva); if ((pv->pv_flags & PVF_WIRED) == 0) panic("pmap_unwire: pv %p isn't wired", pv); pv->pv_flags &= ~PVF_WIRED; pmap->pm_stats.wired_count--; } } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { vm_paddr_t pa; PMAP_LOCK(pmap); pa = pmap_extract_locked(pmap, va); PMAP_UNLOCK(pmap); return (pa); } static vm_paddr_t pmap_extract_locked(pmap_t pmap, vm_offset_t va) { struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; vm_paddr_t pa; u_int l1idx; if (pmap != kernel_pmap) PMAP_ASSERT_LOCKED(pmap); l1idx = L1_IDX(va); l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { /* * These should only happen for the kernel pmap. */ KASSERT(pmap == kernel_pmap, ("unexpected section")); /* XXX: what to do about the bits > 32 ? */ if (l1pd & L1_S_SUPERSEC) pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); } else { /* * Note that we can't rely on the validity of the L1 * descriptor as an indication that a mapping exists. * We have to look it up in the L2 dtable. */ l2 = pmap->pm_l2[L2_IDX(l1idx)]; if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) return (0); pte = ptep[l2pte_index(va)]; if (pte == 0) return (0); if ((pte & L2_TYPE_MASK) == L2_TYPE_L) pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); else pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); } return (pa); } /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given * protection. * */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; vm_paddr_t pa, paddr; vm_page_t m = NULL; u_int l1idx; l1idx = L1_IDX(va); paddr = 0; PMAP_LOCK(pmap); retry: l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { /* * These should only happen for kernel_pmap */ KASSERT(pmap == kernel_pmap, ("huh")); /* XXX: what to do about the bits > 32 ? */ if (l1pd & L1_S_SUPERSEC) pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) goto retry; if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } else { /* * Note that we can't rely on the validity of the L1 * descriptor as an indication that a mapping exists. * We have to look it up in the L2 dtable. */ l2 = pmap->pm_l2[L2_IDX(l1idx)]; if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { PMAP_UNLOCK(pmap); return (NULL); } ptep = &ptep[l2pte_index(va)]; pte = *ptep; if (pte == 0) { PMAP_UNLOCK(pmap); return (NULL); } if (pte & L2_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { if ((pte & L2_TYPE_MASK) == L2_TYPE_L) pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); else pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) goto retry; m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } PMAP_UNLOCK(pmap); PA_UNLOCK_COND(paddr); return (m); } vm_paddr_t pmap_dump_kextract(vm_offset_t va, pt2_entry_t *pte2p) { struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; vm_paddr_t pa; u_int l1idx; l1idx = L1_IDX(va); l1pd = kernel_pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { if (l1pd & L1_S_SUPERSEC) pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); pte = L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, VM_PROT_READ | VM_PROT_WRITE); } else { l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]; if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { pte = 0; pa = 0; goto out; } pte = ptep[l2pte_index(va)]; if (pte == 0) { pa = 0; goto out; } if ((pte & L2_TYPE_MASK) == L2_TYPE_L) pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); else pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); } out: if (pte2p != NULL) *pte2p = pte; return (pa); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit(pmap_t pmap) { PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap)); pmap_alloc_l1(pmap); bzero(pmap->pm_l2, sizeof(pmap->pm_l2)); CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); pmap->pm_stats.resident_count = 1; if (vector_page < KERNBASE) { pmap_enter(pmap, vector_page, PHYS_TO_VM_PAGE(systempage.pv_pa), VM_PROT_READ, PMAP_ENTER_WIRED | VM_PROT_READ, 0); } return (1); } /*************************************************** * page management routines. ***************************************************/ static void pmap_free_pv_entry(pv_entry_t pv) { pv_entry_count--; uma_zfree(pvzone, pv); } /* * get a new pv_entry, allocating a block from the system * when needed. * the memory allocation is performed bypassing the malloc code * because of the possibility of allocations at interrupt time. */ static pv_entry_t pmap_get_pv_entry(void) { pv_entry_t ret_value; pv_entry_count++; if (pv_entry_count > pv_entry_high_water) pagedaemon_wakeup(); ret_value = uma_zalloc(pvzone, M_NOWAIT); return ret_value; } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ #define PMAP_REMOVE_CLEAN_LIST_SIZE 3 void pmap_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct l2_bucket *l2b; vm_offset_t next_bucket; pt_entry_t *ptep; u_int total; u_int mappings, is_exec, is_refd; int flushall = 0; /* * we lock in the pmap => pv_head direction */ rw_wlock(&pvh_global_lock); PMAP_LOCK(pm); total = 0; while (sva < eva) { /* * Do one L2 bucket's worth at a time. */ next_bucket = L2_NEXT_BUCKET(sva); if (next_bucket > eva) next_bucket = eva; l2b = pmap_get_l2_bucket(pm, sva); if (l2b == NULL) { sva = next_bucket; continue; } ptep = &l2b->l2b_kva[l2pte_index(sva)]; mappings = 0; while (sva < next_bucket) { struct vm_page *pg; pt_entry_t pte; vm_paddr_t pa; pte = *ptep; if (pte == 0) { /* * Nothing here, move along */ sva += PAGE_SIZE; ptep++; continue; } pm->pm_stats.resident_count--; pa = l2pte_pa(pte); is_exec = 0; is_refd = 1; /* * Update flags. In a number of circumstances, * we could cluster a lot of these and do a * number of sequential pages in one go. */ if ((pg = PHYS_TO_VM_PAGE(pa)) != NULL) { struct pv_entry *pve; pve = pmap_remove_pv(pg, pm, sva); if (pve) { is_exec = PV_BEEN_EXECD(pve->pv_flags); is_refd = PV_BEEN_REFD(pve->pv_flags); pmap_free_pv_entry(pve); } } if (l2pte_valid(pte) && pmap_is_current(pm)) { if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) { total++; if (is_exec) { cpu_idcache_wbinv_range(sva, PAGE_SIZE); cpu_l2cache_wbinv_range(sva, PAGE_SIZE); cpu_tlb_flushID_SE(sva); } else if (is_refd) { cpu_dcache_wbinv_range(sva, PAGE_SIZE); cpu_l2cache_wbinv_range(sva, PAGE_SIZE); cpu_tlb_flushD_SE(sva); } } else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE) { /* flushall will also only get set for * for a current pmap */ cpu_idcache_wbinv_all(); cpu_l2cache_wbinv_all(); flushall = 1; total++; } } *ptep = 0; PTE_SYNC(ptep); sva += PAGE_SIZE; ptep++; mappings++; } pmap_free_l2_bucket(pm, l2b, mappings); } rw_wunlock(&pvh_global_lock); if (flushall) cpu_tlb_flushID(); PMAP_UNLOCK(pm); } /* * pmap_zero_page() * * Zero a given physical page by mapping it at a page hook point. * In doing the zero page op, the page we zero is mapped cachable, as with * StrongARM accesses to non-cached pages are non-burst making writing * _any_ bulk data very slow. */ #if ARM_MMU_GENERIC != 0 || defined(CPU_XSCALE_CORE3) void pmap_zero_page_generic(vm_paddr_t phys, int off, int size) { if (_arm_bzero && size >= _min_bzero_size && _arm_bzero((void *)(phys + off), size, IS_PHYSICAL) == 0) return; mtx_lock(&cmtx); /* * Hook in the page, zero it, invalidate the TLB as needed. * * Note the temporary zero-page mapping must be a non-cached page in * order to work without corruption when write-allocate is enabled. */ *cdst_pte = L2_S_PROTO | phys | L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE); PTE_SYNC(cdst_pte); cpu_tlb_flushD_SE(cdstp); cpu_cpwait(); if (off || size != PAGE_SIZE) bzero((void *)(cdstp + off), size); else bzero_page(cdstp); mtx_unlock(&cmtx); } #endif /* ARM_MMU_GENERIC != 0 */ #if ARM_MMU_XSCALE == 1 void pmap_zero_page_xscale(vm_paddr_t phys, int off, int size) { if (_arm_bzero && size >= _min_bzero_size && _arm_bzero((void *)(phys + off), size, IS_PHYSICAL) == 0) return; mtx_lock(&cmtx); /* * Hook in the page, zero it, and purge the cache for that * zeroed page. Invalidate the TLB as needed. */ *cdst_pte = L2_S_PROTO | phys | L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X); /* mini-data */ PTE_SYNC(cdst_pte); cpu_tlb_flushD_SE(cdstp); cpu_cpwait(); if (off || size != PAGE_SIZE) bzero((void *)(cdstp + off), size); else bzero_page(cdstp); mtx_unlock(&cmtx); xscale_cache_clean_minidata(); } /* * Change the PTEs for the specified kernel mappings such that they * will use the mini data cache instead of the main data cache. */ void pmap_use_minicache(vm_offset_t va, vm_size_t size) { struct l2_bucket *l2b; pt_entry_t *ptep, *sptep, pte; vm_offset_t next_bucket, eva; #if (ARM_NMMUS > 1) || defined(CPU_XSCALE_CORE3) if (xscale_use_minidata == 0) return; #endif eva = va + size; while (va < eva) { next_bucket = L2_NEXT_BUCKET(va); if (next_bucket > eva) next_bucket = eva; l2b = pmap_get_l2_bucket(kernel_pmap, va); sptep = ptep = &l2b->l2b_kva[l2pte_index(va)]; while (va < next_bucket) { pte = *ptep; if (!l2pte_minidata(pte)) { cpu_dcache_wbinv_range(va, PAGE_SIZE); cpu_tlb_flushD_SE(va); *ptep = pte & ~L2_B; } ptep++; va += PAGE_SIZE; } PTE_SYNC_RANGE(sptep, (u_int)(ptep - sptep)); } cpu_cpwait(); } #endif /* ARM_MMU_XSCALE == 1 */ /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { pmap_zero_page_func(VM_PAGE_TO_PHYS(m), 0, PAGE_SIZE); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { pmap_zero_page_func(VM_PAGE_TO_PHYS(m), off, size); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { pmap_zero_page(m); } #if 0 /* * pmap_clean_page() * * This is a local function used to work out the best strategy to clean * a single page referenced by its entry in the PV table. It should be used by * pmap_copy_page, pmap_zero page and maybe some others later on. * * Its policy is effectively: * o If there are no mappings, we don't bother doing anything with the cache. * o If there is one mapping, we clean just that page. * o If there are multiple mappings, we clean the entire cache. * * So that some functions can be further optimised, it returns 0 if it didn't * clean the entire cache, or 1 if it did. * * XXX One bug in this routine is that if the pv_entry has a single page * mapped at 0x00000000 a whole cache clean will be performed rather than * just the 1 page. Since this should not occur in everyday use and if it does * it will just result in not the most efficient clean for the page. * * We don't yet use this function but may want to. */ static int pmap_clean_page(struct pv_entry *pv, boolean_t is_src) { pmap_t pm, pm_to_clean = NULL; struct pv_entry *npv; u_int cache_needs_cleaning = 0; u_int flags = 0; vm_offset_t page_to_clean = 0; if (pv == NULL) { /* nothing mapped in so nothing to flush */ return (0); } /* * Since we flush the cache each time we change to a different * user vmspace, we only need to flush the page if it is in the * current pmap. */ if (curthread) pm = vmspace_pmap(curproc->p_vmspace); else pm = kernel_pmap; for (npv = pv; npv; npv = TAILQ_NEXT(npv, pv_list)) { if (npv->pv_pmap == kernel_pmap || npv->pv_pmap == pm) { flags |= npv->pv_flags; /* * The page is mapped non-cacheable in * this map. No need to flush the cache. */ if (npv->pv_flags & PVF_NC) { #ifdef DIAGNOSTIC if (cache_needs_cleaning) panic("pmap_clean_page: " "cache inconsistency"); #endif break; } else if (is_src && (npv->pv_flags & PVF_WRITE) == 0) continue; if (cache_needs_cleaning) { page_to_clean = 0; break; } else { page_to_clean = npv->pv_va; pm_to_clean = npv->pv_pmap; } cache_needs_cleaning = 1; } } if (page_to_clean) { if (PV_BEEN_EXECD(flags)) pmap_idcache_wbinv_range(pm_to_clean, page_to_clean, PAGE_SIZE); else pmap_dcache_wb_range(pm_to_clean, page_to_clean, PAGE_SIZE, !is_src, (flags & PVF_WRITE) == 0); } else if (cache_needs_cleaning) { if (PV_BEEN_EXECD(flags)) pmap_idcache_wbinv_all(pm); else pmap_dcache_wbinv_all(pm); return (1); } return (0); } #endif /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ /* * pmap_copy_page() * * Copy one physical page into another, by mapping the pages into * hook points. The same comment regarding cachability as in * pmap_zero_page also applies here. */ #if ARM_MMU_GENERIC != 0 || defined (CPU_XSCALE_CORE3) void pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst) { #if 0 struct vm_page *src_pg = PHYS_TO_VM_PAGE(src); #endif /* * Clean the source page. Hold the source page's lock for * the duration of the copy so that no other mappings can * be created while we have a potentially aliased mapping. */ #if 0 /* * XXX: Not needed while we call cpu_dcache_wbinv_all() in * pmap_copy_page(). */ (void) pmap_clean_page(TAILQ_FIRST(&src_pg->md.pv_list), TRUE); #endif /* * Map the pages into the page hook points, copy them, and purge * the cache for the appropriate page. Invalidate the TLB * as required. */ mtx_lock(&cmtx); *csrc_pte = L2_S_PROTO | src | L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | pte_l2_s_cache_mode; PTE_SYNC(csrc_pte); *cdst_pte = L2_S_PROTO | dst | L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode; PTE_SYNC(cdst_pte); cpu_tlb_flushD_SE(csrcp); cpu_tlb_flushD_SE(cdstp); cpu_cpwait(); bcopy_page(csrcp, cdstp); mtx_unlock(&cmtx); cpu_dcache_inv_range(csrcp, PAGE_SIZE); cpu_dcache_wbinv_range(cdstp, PAGE_SIZE); cpu_l2cache_inv_range(csrcp, PAGE_SIZE); cpu_l2cache_wbinv_range(cdstp, PAGE_SIZE); } void pmap_copy_page_offs_generic(vm_paddr_t a_phys, vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs, int cnt) { mtx_lock(&cmtx); *csrc_pte = L2_S_PROTO | a_phys | L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | pte_l2_s_cache_mode; PTE_SYNC(csrc_pte); *cdst_pte = L2_S_PROTO | b_phys | L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode; PTE_SYNC(cdst_pte); cpu_tlb_flushD_SE(csrcp); cpu_tlb_flushD_SE(cdstp); cpu_cpwait(); bcopy((char *)csrcp + a_offs, (char *)cdstp + b_offs, cnt); mtx_unlock(&cmtx); cpu_dcache_inv_range(csrcp + a_offs, cnt); cpu_dcache_wbinv_range(cdstp + b_offs, cnt); cpu_l2cache_inv_range(csrcp + a_offs, cnt); cpu_l2cache_wbinv_range(cdstp + b_offs, cnt); } #endif /* ARM_MMU_GENERIC != 0 */ #if ARM_MMU_XSCALE == 1 void pmap_copy_page_xscale(vm_paddr_t src, vm_paddr_t dst) { #if 0 /* XXX: Only needed for pmap_clean_page(), which is commented out. */ struct vm_page *src_pg = PHYS_TO_VM_PAGE(src); #endif /* * Clean the source page. Hold the source page's lock for * the duration of the copy so that no other mappings can * be created while we have a potentially aliased mapping. */ #if 0 /* * XXX: Not needed while we call cpu_dcache_wbinv_all() in * pmap_copy_page(). */ (void) pmap_clean_page(TAILQ_FIRST(&src_pg->md.pv_list), TRUE); #endif /* * Map the pages into the page hook points, copy them, and purge * the cache for the appropriate page. Invalidate the TLB * as required. */ mtx_lock(&cmtx); *csrc_pte = L2_S_PROTO | src | L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X); /* mini-data */ PTE_SYNC(csrc_pte); *cdst_pte = L2_S_PROTO | dst | L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X); /* mini-data */ PTE_SYNC(cdst_pte); cpu_tlb_flushD_SE(csrcp); cpu_tlb_flushD_SE(cdstp); cpu_cpwait(); bcopy_page(csrcp, cdstp); mtx_unlock(&cmtx); xscale_cache_clean_minidata(); } void pmap_copy_page_offs_xscale(vm_paddr_t a_phys, vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs, int cnt) { mtx_lock(&cmtx); *csrc_pte = L2_S_PROTO | a_phys | L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X); PTE_SYNC(csrc_pte); *cdst_pte = L2_S_PROTO | b_phys | L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X); PTE_SYNC(cdst_pte); cpu_tlb_flushD_SE(csrcp); cpu_tlb_flushD_SE(cdstp); cpu_cpwait(); bcopy((char *)csrcp + a_offs, (char *)cdstp + b_offs, cnt); mtx_unlock(&cmtx); xscale_cache_clean_minidata(); } #endif /* ARM_MMU_XSCALE == 1 */ void pmap_copy_page(vm_page_t src, vm_page_t dst) { cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); if (_arm_memcpy && PAGE_SIZE >= _min_memcpy_size && _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst), (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0) return; pmap_copy_page_func(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); } /* * We have code to do unmapped I/O. However, it isn't quite right and * causes un-page-aligned I/O to devices to fail (most notably newfs * or fsck). We give up a little performance to not allow unmapped I/O * to gain stability. */ int unmapped_buf_allowed = 0; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { vm_page_t a_pg, b_pg; vm_offset_t a_pg_offset, b_pg_offset; int cnt; cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); while (xfersize > 0) { a_pg = ma[a_offset >> PAGE_SHIFT]; a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); b_pg = mb[b_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); pmap_copy_page_offs_func(VM_PAGE_TO_PHYS(a_pg), a_pg_offset, VM_PAGE_TO_PHYS(b_pg), b_pg_offset, cnt); xfersize -= cnt; a_offset += cnt; b_offset += cnt; } } vm_offset_t pmap_quick_enter_page(vm_page_t m) { /* * Don't bother with a PCPU pageframe, since we don't support * SMP for anything pre-armv7. Use pmap_kenter() to ensure * caching is handled correctly for multiple mappings of the * same physical page. */ mtx_assert(&qmap_mtx, MA_NOTOWNED); mtx_lock(&qmap_mtx); pmap_kenter(qmap_addr, VM_PAGE_TO_PHYS(m)); return (qmap_addr); } void pmap_quick_remove_page(vm_offset_t addr) { KASSERT(addr == qmap_addr, ("pmap_quick_remove_page: invalid address")); mtx_assert(&qmap_mtx, MA_OWNED); pmap_kremove(addr); mtx_unlock(&qmap_mtx); } /* * this routine returns true if a physical page resides * in the given pmap. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (pv->pv_pmap == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { pv_entry_t pv; int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) if ((pv->pv_flags & PVF_WIRED) != 0) count++; rw_wunlock(&pvh_global_lock); return (count); } /* * This function is advisory. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { } /* * pmap_ts_referenced: * * Return the count of reference bits for a page, clearing all of them. */ int pmap_ts_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); return (pmap_clearbit(m, PVF_REF)); } boolean_t pmap_is_modified(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); if (m->md.pvh_attrs & PVF_MOD) return (TRUE); return(FALSE); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no mappings can be modified. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; if (m->md.pvh_attrs & PVF_MOD) pmap_clearbit(m, PVF_MOD); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); return ((m->md.pvh_attrs & PVF_REF) != 0); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0) pmap_clearbit(m, PVF_WRITE); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { struct l2_bucket *l2b; pt_entry_t *ptep, pte; vm_paddr_t pa; vm_page_t m; int val; boolean_t managed; PMAP_LOCK(pmap); retry: l2b = pmap_get_l2_bucket(pmap, addr); if (l2b == NULL) { val = 0; goto out; } ptep = &l2b->l2b_kva[l2pte_index(addr)]; pte = *ptep; if (!l2pte_valid(pte)) { val = 0; goto out; } val = MINCORE_INCORE; if (pte & L2_S_PROT_W) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; managed = false; pa = l2pte_pa(pte); m = PHYS_TO_VM_PAGE(pa); if (m != NULL && !(m->oflags & VPO_UNMANAGED)) managed = true; if (managed) { /* * The ARM pmap tries to maintain a per-mapping * reference bit. The trouble is that it's kept in * the PV entry, not the PTE, so it's costly to access * here. You would need to acquire the pvh global * lock, call pmap_find_pv(), and introduce a custom * version of vm_page_pa_tryrelock() that releases and * reacquires the pvh global lock. In the end, I * doubt it's worthwhile. This may falsely report * the given address as referenced. */ if ((m->md.pvh_attrs & PVF_REF) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else out: PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { } #define BOOTSTRAP_DEBUG /* * pmap_map_section: * * Create a single section mapping. */ void pmap_map_section(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot, int cache) { pd_entry_t *pde = (pd_entry_t *) l1pt; pd_entry_t fl; KASSERT(((va | pa) & L1_S_OFFSET) == 0, ("ouin2")); switch (cache) { case PTE_NOCACHE: default: fl = 0; break; case PTE_CACHE: fl = pte_l1_s_cache_mode; break; case PTE_PAGETABLE: fl = pte_l1_s_cache_mode_pt; break; } pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL, prot) | fl | L1_S_DOM(PMAP_DOMAIN_KERNEL); PTE_SYNC(&pde[va >> L1_S_SHIFT]); } /* * pmap_link_l2pt: * * Link the L2 page table specified by l2pv.pv_pa into the L1 * page table at the slot for "va". */ void pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv) { pd_entry_t *pde = (pd_entry_t *) l1pt, proto; u_int slot = va >> L1_S_SHIFT; proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO; #ifdef VERBOSE_INIT_ARM printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va); #endif pde[slot + 0] = proto | (l2pv->pv_pa + 0x000); PTE_SYNC(&pde[slot]); SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list); } /* * pmap_map_entry * * Create a single page mapping. */ void pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot, int cache) { pd_entry_t *pde = (pd_entry_t *) l1pt; pt_entry_t fl; pt_entry_t *pte; KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin")); switch (cache) { case PTE_NOCACHE: default: fl = 0; break; case PTE_CACHE: fl = pte_l2_s_cache_mode; break; case PTE_PAGETABLE: fl = pte_l2_s_cache_mode_pt; break; } if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) panic("pmap_map_entry: no L2 table for VA 0x%08x", va); pte = (pt_entry_t *) kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK); if (pte == NULL) panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va); pte[l2pte_index(va)] = L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | fl; PTE_SYNC(&pte[l2pte_index(va)]); } /* * pmap_map_chunk: * * Map a chunk of memory using the most efficient mappings * possible (section. large page, small page) into the * provided L1 and L2 tables at the specified virtual address. */ vm_size_t pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, vm_size_t size, int prot, int cache) { pd_entry_t *pde = (pd_entry_t *) l1pt; pt_entry_t *pte, f1, f2s, f2l; vm_size_t resid; int i; resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); if (l1pt == 0) panic("pmap_map_chunk: no L1 table provided"); #ifdef VERBOSE_INIT_ARM printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x " "prot=0x%x cache=%d\n", pa, va, size, resid, prot, cache); #endif switch (cache) { case PTE_NOCACHE: default: f1 = 0; f2l = 0; f2s = 0; break; case PTE_CACHE: f1 = pte_l1_s_cache_mode; f2l = pte_l2_l_cache_mode; f2s = pte_l2_s_cache_mode; break; case PTE_PAGETABLE: f1 = pte_l1_s_cache_mode_pt; f2l = pte_l2_l_cache_mode_pt; f2s = pte_l2_s_cache_mode_pt; break; } size = resid; while (resid > 0) { /* See if we can use a section mapping. */ if (L1_S_MAPPABLE_P(va, pa, resid)) { #ifdef VERBOSE_INIT_ARM printf("S"); #endif pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL, prot) | f1 | L1_S_DOM(PMAP_DOMAIN_KERNEL); PTE_SYNC(&pde[va >> L1_S_SHIFT]); va += L1_S_SIZE; pa += L1_S_SIZE; resid -= L1_S_SIZE; continue; } /* * Ok, we're going to use an L2 table. Make sure * one is actually in the corresponding L1 slot * for the current VA. */ if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) panic("pmap_map_chunk: no L2 table for VA 0x%08x", va); pte = (pt_entry_t *) kernel_pt_lookup( pde[L1_IDX(va)] & L1_C_ADDR_MASK); if (pte == NULL) panic("pmap_map_chunk: can't find L2 table for VA" "0x%08x", va); /* See if we can use a L2 large page mapping. */ if (L2_L_MAPPABLE_P(va, pa, resid)) { #ifdef VERBOSE_INIT_ARM printf("L"); #endif for (i = 0; i < 16; i++) { pte[l2pte_index(va) + i] = L2_L_PROTO | pa | L2_L_PROT(PTE_KERNEL, prot) | f2l; PTE_SYNC(&pte[l2pte_index(va) + i]); } va += L2_L_SIZE; pa += L2_L_SIZE; resid -= L2_L_SIZE; continue; } /* Use a small page mapping. */ #ifdef VERBOSE_INIT_ARM printf("P"); #endif pte[l2pte_index(va)] = L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | f2s; PTE_SYNC(&pte[l2pte_index(va)]); va += PAGE_SIZE; pa += PAGE_SIZE; resid -= PAGE_SIZE; } #ifdef VERBOSE_INIT_ARM printf("\n"); #endif return (size); } void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { /* * Remember the memattr in a field that gets used to set the appropriate * bits in the PTEs as mappings are established. */ m->md.pv_memattr = ma; /* * It appears that this function can only be called before any mappings * for the page are established on ARM. If this ever changes, this code * will need to walk the pv_list and make each of the existing mappings * uncacheable, being careful to sync caches and PTEs (and maybe * invalidate TLB?) for any current mapping it modifies. */ if (m->md.pv_kva != 0 || TAILQ_FIRST(&m->md.pv_list) != NULL) panic("Can't change memattr on page with existing mappings"); } Index: head/sys/arm/arm/pmap-v6.c =================================================================== --- head/sys/arm/arm/pmap-v6.c (revision 298054) +++ head/sys/arm/arm/pmap-v6.c (revision 298055) @@ -1,6653 +1,6653 @@ /*- * Copyright (c) 1991 Regents of the University of California. * Copyright (c) 1994 John S. Dyson * Copyright (c) 1994 David Greenman * Copyright (c) 2005-2010 Alan L. Cox * Copyright (c) 2014-2016 Svatopluk Kraus * Copyright (c) 2014-2016 Michal Meloun * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_vm.h" #include "opt_pmap.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #else #include #endif #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #ifndef DIAGNOSTIC #define PMAP_INLINE __inline #else #define PMAP_INLINE #endif #ifdef PMAP_DEBUG static void pmap_zero_page_check(vm_page_t m); void pmap_debug(int level); int pmap_pid_dump(int pid); #define PDEBUG(_lev_,_stat_) \ if (pmap_debug_level >= (_lev_)) \ ((_stat_)) #define dprintf printf int pmap_debug_level = 1; #else /* PMAP_DEBUG */ #define PDEBUG(_lev_,_stat_) /* Nothing */ #define dprintf(x, arg...) #endif /* PMAP_DEBUG */ /* * Level 2 page tables map definion ('max' is excluded). */ #define PT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) #define PT2V_MAX_ADDRESS ((vm_offset_t)PT2MAP + PT2MAP_SIZE) #define UPT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) #define UPT2V_MAX_ADDRESS \ ((vm_offset_t)(PT2MAP + (KERNBASE >> PT2MAP_SHIFT))) /* * Promotion to a 1MB (PTE1) page mapping requires that the corresponding * 4KB (PTE2) page mappings have identical settings for the following fields: */ #define PTE2_PROMOTE (PTE2_V | PTE2_A | PTE2_NM | PTE2_S | PTE2_NG | \ PTE2_NX | PTE2_RO | PTE2_U | PTE2_W | \ PTE2_ATTR_MASK) #define PTE1_PROMOTE (PTE1_V | PTE1_A | PTE1_NM | PTE1_S | PTE1_NG | \ PTE1_NX | PTE1_RO | PTE1_U | PTE1_W | \ PTE1_ATTR_MASK) #define ATTR_TO_L1(l2_attr) ((((l2_attr) & L2_TEX0) ? L1_S_TEX0 : 0) | \ (((l2_attr) & L2_C) ? L1_S_C : 0) | \ (((l2_attr) & L2_B) ? L1_S_B : 0) | \ (((l2_attr) & PTE2_A) ? PTE1_A : 0) | \ (((l2_attr) & PTE2_NM) ? PTE1_NM : 0) | \ (((l2_attr) & PTE2_S) ? PTE1_S : 0) | \ (((l2_attr) & PTE2_NG) ? PTE1_NG : 0) | \ (((l2_attr) & PTE2_NX) ? PTE1_NX : 0) | \ (((l2_attr) & PTE2_RO) ? PTE1_RO : 0) | \ (((l2_attr) & PTE2_U) ? PTE1_U : 0) | \ (((l2_attr) & PTE2_W) ? PTE1_W : 0)) #define ATTR_TO_L2(l1_attr) ((((l1_attr) & L1_S_TEX0) ? L2_TEX0 : 0) | \ (((l1_attr) & L1_S_C) ? L2_C : 0) | \ (((l1_attr) & L1_S_B) ? L2_B : 0) | \ (((l1_attr) & PTE1_A) ? PTE2_A : 0) | \ (((l1_attr) & PTE1_NM) ? PTE2_NM : 0) | \ (((l1_attr) & PTE1_S) ? PTE2_S : 0) | \ (((l1_attr) & PTE1_NG) ? PTE2_NG : 0) | \ (((l1_attr) & PTE1_NX) ? PTE2_NX : 0) | \ (((l1_attr) & PTE1_RO) ? PTE2_RO : 0) | \ (((l1_attr) & PTE1_U) ? PTE2_U : 0) | \ (((l1_attr) & PTE1_W) ? PTE2_W : 0)) /* * PTE2 descriptors creation macros. */ #define PTE2_ATTR_DEFAULT vm_memattr_to_pte2(VM_MEMATTR_DEFAULT) #define PTE2_ATTR_PT vm_memattr_to_pte2(pt_memattr) #define PTE2_KPT(pa) PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_PT) #define PTE2_KPT_NG(pa) PTE2_KERN_NG(pa, PTE2_AP_KRW, PTE2_ATTR_PT) #define PTE2_KRW(pa) PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT) #define PTE2_KRO(pa) PTE2_KERN(pa, PTE2_AP_KR, PTE2_ATTR_DEFAULT) #define PV_STATS #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif /* * The boot_pt1 is used temporary in very early boot stage as L1 page table. * We can init many things with no memory allocation thanks to its static * allocation and this brings two main advantages: * (1) other cores can be started very simply, * (2) various boot loaders can be supported as its arguments can be processed * in virtual address space and can be moved to safe location before * first allocation happened. * Only disadvantage is that boot_pt1 is used only in very early boot stage. * However, the table is uninitialized and so lays in bss. Therefore kernel * image size is not influenced. * * QQQ: In the future, maybe, boot_pt1 can be used for soft reset and * CPU suspend/resume game. */ extern pt1_entry_t boot_pt1[]; vm_paddr_t base_pt1; pt1_entry_t *kern_pt1; pt2_entry_t *kern_pt2tab; pt2_entry_t *PT2MAP; static uint32_t ttb_flags; static vm_memattr_t pt_memattr; ttb_entry_t pmap_kern_ttb; struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ static vm_offset_t kernel_vm_end_new; vm_offset_t kernel_vm_end = KERNBASE + NKPT2PG * NPT2_IN_PG * PTE1_SIZE; vm_offset_t vm_max_kernel_address; vm_paddr_t kernel_l1pa; static struct rwlock __aligned(CACHE_LINE_SIZE) pvh_global_lock; /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static struct md_page *pv_table; /* XXX: Is it used only the list in md_page? */ static int shpgperproc = PMAP_SHPGPERPROC; struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ int pv_maxchunks; /* How many chunks we have KVA for */ vm_offset_t pv_vafree; /* freelist stored in the PTE */ vm_paddr_t first_managed_pa; #define pa_to_pvh(pa) (&pv_table[pte1_index(pa - first_managed_pa)]) /* * All those kernel PT submaps that BSD is so fond of */ struct sysmaps { struct mtx lock; pt2_entry_t *CMAP1; pt2_entry_t *CMAP2; pt2_entry_t *CMAP3; caddr_t CADDR1; caddr_t CADDR2; caddr_t CADDR3; }; static struct sysmaps sysmaps_pcpu[MAXCPU]; static pt2_entry_t *CMAP3; static caddr_t CADDR3; caddr_t _tmppt = 0; -struct msgbuf *msgbufp = 0; /* XXX move it to machdep.c */ +struct msgbuf *msgbufp = NULL; /* XXX move it to machdep.c */ /* * Crashdump maps. */ static caddr_t crashdumpmap; -static pt2_entry_t *PMAP1 = 0, *PMAP2; -static pt2_entry_t *PADDR1 = 0, *PADDR2; +static pt2_entry_t *PMAP1 = NULL, *PMAP2; +static pt2_entry_t *PADDR1 = NULL, *PADDR2; #ifdef DDB static pt2_entry_t *PMAP3; static pt2_entry_t *PADDR3; static int PMAP3cpu __unused; /* for SMP only */ #endif #ifdef SMP static int PMAP1cpu; static int PMAP1changedcpu; SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, &PMAP1changedcpu, 0, "Number of times pmap_pte2_quick changed CPU with same PMAP1"); #endif static int PMAP1changed; SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, &PMAP1changed, 0, "Number of times pmap_pte2_quick changed PMAP1"); static int PMAP1unchanged; SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, &PMAP1unchanged, 0, "Number of times pmap_pte2_quick didn't change PMAP1"); static struct mtx PMAP2mutex; static __inline void pt2_wirecount_init(vm_page_t m); static boolean_t pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va); void cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size); /* * Function to set the debug level of the pmap code. */ #ifdef PMAP_DEBUG void pmap_debug(int level) { pmap_debug_level = level; dprintf("pmap_debug: level=%d\n", pmap_debug_level); } #endif /* PMAP_DEBUG */ /* * This table must corespond with memory attribute configuration in vm.h. * First entry is used for normal system mapping. * * Device memory is always marked as shared. * Normal memory is shared only in SMP . * Not outer shareable bits are not used yet. * Class 6 cannot be used on ARM11. */ #define TEXDEF_TYPE_SHIFT 0 #define TEXDEF_TYPE_MASK 0x3 #define TEXDEF_INNER_SHIFT 2 #define TEXDEF_INNER_MASK 0x3 #define TEXDEF_OUTER_SHIFT 4 #define TEXDEF_OUTER_MASK 0x3 #define TEXDEF_NOS_SHIFT 6 #define TEXDEF_NOS_MASK 0x1 #define TEX(t, i, o, s) \ ((t) << TEXDEF_TYPE_SHIFT) | \ ((i) << TEXDEF_INNER_SHIFT) | \ ((o) << TEXDEF_OUTER_SHIFT | \ ((s) << TEXDEF_NOS_SHIFT)) static uint32_t tex_class[8] = { /* type inner cache outer cache */ TEX(PRRR_MEM, NMRR_WB_WA, NMRR_WB_WA, 0), /* 0 - ATTR_WB_WA */ TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 1 - ATTR_NOCACHE */ TEX(PRRR_DEV, NMRR_NC, NMRR_NC, 0), /* 2 - ATTR_DEVICE */ TEX(PRRR_SO, NMRR_NC, NMRR_NC, 0), /* 3 - ATTR_SO */ TEX(PRRR_MEM, NMRR_WT, NMRR_WT, 0), /* 4 - ATTR_WT */ TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 5 - NOT USED YET */ TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 6 - NOT USED YET */ TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 7 - NOT USED YET */ }; #undef TEX static uint32_t pte2_attr_tab[8] = { PTE2_ATTR_WB_WA, /* 0 - VM_MEMATTR_WB_WA */ PTE2_ATTR_NOCACHE, /* 1 - VM_MEMATTR_NOCACHE */ PTE2_ATTR_DEVICE, /* 2 - VM_MEMATTR_DEVICE */ PTE2_ATTR_SO, /* 3 - VM_MEMATTR_SO */ PTE2_ATTR_WT, /* 4 - VM_MEMATTR_WRITE_THROUGH */ 0, /* 5 - NOT USED YET */ 0, /* 6 - NOT USED YET */ 0 /* 7 - NOT USED YET */ }; CTASSERT(VM_MEMATTR_WB_WA == 0); CTASSERT(VM_MEMATTR_NOCACHE == 1); CTASSERT(VM_MEMATTR_DEVICE == 2); CTASSERT(VM_MEMATTR_SO == 3); CTASSERT(VM_MEMATTR_WRITE_THROUGH == 4); static inline uint32_t vm_memattr_to_pte2(vm_memattr_t ma) { KASSERT((u_int)ma < 5, ("%s: bad vm_memattr_t %d", __func__, ma)); return (pte2_attr_tab[(u_int)ma]); } static inline uint32_t vm_page_pte2_attr(vm_page_t m) { return (vm_memattr_to_pte2(m->md.pat_mode)); } /* * Convert TEX definition entry to TTB flags. */ static uint32_t encode_ttb_flags(int idx) { uint32_t inner, outer, nos, reg; inner = (tex_class[idx] >> TEXDEF_INNER_SHIFT) & TEXDEF_INNER_MASK; outer = (tex_class[idx] >> TEXDEF_OUTER_SHIFT) & TEXDEF_OUTER_MASK; nos = (tex_class[idx] >> TEXDEF_NOS_SHIFT) & TEXDEF_NOS_MASK; reg = nos << 5; reg |= outer << 3; if (cpuinfo.coherent_walk) reg |= (inner & 0x1) << 6; reg |= (inner & 0x2) >> 1; #ifdef SMP reg |= 1 << 1; #endif return reg; } /* * Set TEX remapping registers in current CPU. */ void pmap_set_tex(void) { uint32_t prrr, nmrr; uint32_t type, inner, outer, nos; int i; #ifdef PMAP_PTE_NOCACHE /* XXX fixme */ if (cpuinfo.coherent_walk) { pt_memattr = VM_MEMATTR_WB_WA; ttb_flags = encode_ttb_flags(0); } else { pt_memattr = VM_MEMATTR_NOCACHE; ttb_flags = encode_ttb_flags(1); } #else pt_memattr = VM_MEMATTR_WB_WA; ttb_flags = encode_ttb_flags(0); #endif prrr = 0; nmrr = 0; /* Build remapping register from TEX classes. */ for (i = 0; i < 8; i++) { type = (tex_class[i] >> TEXDEF_TYPE_SHIFT) & TEXDEF_TYPE_MASK; inner = (tex_class[i] >> TEXDEF_INNER_SHIFT) & TEXDEF_INNER_MASK; outer = (tex_class[i] >> TEXDEF_OUTER_SHIFT) & TEXDEF_OUTER_MASK; nos = (tex_class[i] >> TEXDEF_NOS_SHIFT) & TEXDEF_NOS_MASK; prrr |= type << (i * 2); prrr |= nos << (i + 24); nmrr |= inner << (i * 2); nmrr |= outer << (i * 2 + 16); } /* Add shareable bits for device memory. */ prrr |= PRRR_DS0 | PRRR_DS1; /* Add shareable bits for normal memory in SMP case. */ #ifdef SMP prrr |= PRRR_NS1; #endif cp15_prrr_set(prrr); cp15_nmrr_set(nmrr); /* Caches are disabled, so full TLB flush should be enough. */ tlb_flush_all_local(); } /* * KERNBASE must be multiple of NPT2_IN_PG * PTE1_SIZE. In other words, * KERNBASE is mapped by first L2 page table in L2 page table page. It * meets same constrain due to PT2MAP being placed just under KERNBASE. */ CTASSERT((KERNBASE & (NPT2_IN_PG * PTE1_SIZE - 1)) == 0); CTASSERT((KERNBASE - VM_MAXUSER_ADDRESS) >= PT2MAP_SIZE); /* * In crazy dreams, PAGE_SIZE could be a multiple of PTE2_SIZE in general. * For now, anyhow, the following check must be fulfilled. */ CTASSERT(PAGE_SIZE == PTE2_SIZE); /* * We don't want to mess up MI code with all MMU and PMAP definitions, * so some things, which depend on other ones, are defined independently. * Now, it is time to check that we don't screw up something. */ CTASSERT(PDRSHIFT == PTE1_SHIFT); /* * Check L1 and L2 page table entries definitions consistency. */ CTASSERT(NB_IN_PT1 == (sizeof(pt1_entry_t) * NPTE1_IN_PT1)); CTASSERT(NB_IN_PT2 == (sizeof(pt2_entry_t) * NPTE2_IN_PT2)); /* * Check L2 page tables page consistency. */ CTASSERT(PAGE_SIZE == (NPT2_IN_PG * NB_IN_PT2)); CTASSERT((1 << PT2PG_SHIFT) == NPT2_IN_PG); /* * Check PT2TAB consistency. * PT2TAB_ENTRIES is defined as a division of NPTE1_IN_PT1 by NPT2_IN_PG. * This should be done without remainder. */ CTASSERT(NPTE1_IN_PT1 == (PT2TAB_ENTRIES * NPT2_IN_PG)); /* * A PT2MAP magic. * * All level 2 page tables (PT2s) are mapped continuously and accordingly * into PT2MAP address space. As PT2 size is less than PAGE_SIZE, this can * be done only if PAGE_SIZE is a multiple of PT2 size. All PT2s in one page * must be used together, but not necessary at once. The first PT2 in a page * must map things on correctly aligned address and the others must follow * in right order. */ #define NB_IN_PT2TAB (PT2TAB_ENTRIES * sizeof(pt2_entry_t)) #define NPT2_IN_PT2TAB (NB_IN_PT2TAB / NB_IN_PT2) #define NPG_IN_PT2TAB (NB_IN_PT2TAB / PAGE_SIZE) /* * Check PT2TAB consistency. * NPT2_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by NB_IN_PT2. * NPG_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by PAGE_SIZE. * The both should be done without remainder. */ CTASSERT(NB_IN_PT2TAB == (NPT2_IN_PT2TAB * NB_IN_PT2)); CTASSERT(NB_IN_PT2TAB == (NPG_IN_PT2TAB * PAGE_SIZE)); /* * The implementation was made general, however, with the assumption * bellow in mind. In case of another value of NPG_IN_PT2TAB, * the code should be once more rechecked. */ CTASSERT(NPG_IN_PT2TAB == 1); /* * Get offset of PT2 in a page * associated with given PT1 index. */ static __inline u_int page_pt2off(u_int pt1_idx) { return ((pt1_idx & PT2PG_MASK) * NB_IN_PT2); } /* * Get physical address of PT2 * associated with given PT2s page and PT1 index. */ static __inline vm_paddr_t page_pt2pa(vm_paddr_t pgpa, u_int pt1_idx) { return (pgpa + page_pt2off(pt1_idx)); } /* * Get first entry of PT2 * associated with given PT2s page and PT1 index. */ static __inline pt2_entry_t * page_pt2(vm_offset_t pgva, u_int pt1_idx) { return ((pt2_entry_t *)(pgva + page_pt2off(pt1_idx))); } /* * Get virtual address of PT2s page (mapped in PT2MAP) * which holds PT2 which holds entry which maps given virtual address. */ static __inline vm_offset_t pt2map_pt2pg(vm_offset_t va) { va &= ~(NPT2_IN_PG * PTE1_SIZE - 1); return ((vm_offset_t)pt2map_entry(va)); } /***************************************************************************** * * THREE pmap initialization milestones exist: * * locore.S * -> fundamental init (including MMU) in ASM * * initarm() * -> fundamental init continues in C * -> first available physical address is known * * pmap_bootstrap_prepare() -> FIRST PMAP MILESTONE (first epoch begins) * -> basic (safe) interface for physical address allocation is made * -> basic (safe) interface for virtual mapping is made * -> limited not SMP coherent work is possible * * -> more fundamental init continues in C * -> locks and some more things are available * -> all fundamental allocations and mappings are done * * pmap_bootstrap() -> SECOND PMAP MILESTONE (second epoch begins) * -> phys_avail[] and virtual_avail is set * -> control is passed to vm subsystem * -> physical and virtual address allocation are off limit * -> low level mapping functions, some SMP coherent, * are available, which cannot be used before vm subsystem * is being inited * * mi_startup() * -> vm subsystem is being inited * * pmap_init() -> THIRD PMAP MILESTONE (third epoch begins) * -> pmap is fully inited * *****************************************************************************/ /***************************************************************************** * * PMAP first stage initialization and utility functions * for pre-bootstrap epoch. * * After pmap_bootstrap_prepare() is called, the following functions * can be used: * * (1) strictly only for this stage functions for physical page allocations, * virtual space allocations, and mappings: * * vm_paddr_t pmap_preboot_get_pages(u_int num); * void pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num); * vm_offset_t pmap_preboot_reserve_pages(u_int num); * vm_offset_t pmap_preboot_get_vpages(u_int num); * void pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, * vm_prot_t prot, vm_memattr_t attr); * * (2) for all stages: * * vm_paddr_t pmap_kextract(vm_offset_t va); * * NOTE: This is not SMP coherent stage. * *****************************************************************************/ #define KERNEL_P2V(pa) \ ((vm_offset_t)((pa) - arm_physmem_kernaddr + KERNVIRTADDR)) #define KERNEL_V2P(va) \ ((vm_paddr_t)((va) - KERNVIRTADDR + arm_physmem_kernaddr)) static vm_paddr_t last_paddr; /* * Pre-bootstrap epoch page allocator. */ vm_paddr_t pmap_preboot_get_pages(u_int num) { vm_paddr_t ret; ret = last_paddr; last_paddr += num * PAGE_SIZE; return (ret); } /* * The fundamental initalization of PMAP stuff. * * Some things already happened in locore.S and some things could happen * before pmap_bootstrap_prepare() is called, so let's recall what is done: * 1. Caches are disabled. * 2. We are running on virtual addresses already with 'boot_pt1' * as L1 page table. * 3. So far, all virtual addresses can be converted to physical ones and * vice versa by the following macros: * KERNEL_P2V(pa) .... physical to virtual ones, * KERNEL_V2P(va) .... virtual to physical ones. * * What is done herein: * 1. The 'boot_pt1' is replaced by real kernel L1 page table 'kern_pt1'. * 2. PT2MAP magic is brought to live. * 3. Basic preboot functions for page allocations and mappings can be used. * 4. Everything is prepared for L1 cache enabling. * * Variations: * 1. To use second TTB register, so kernel and users page tables will be * separated. This way process forking - pmap_pinit() - could be faster, * it saves physical pages and KVA per a process, and it's simple change. * However, it will lead, due to hardware matter, to the following: * (a) 2G space for kernel and 2G space for users. * (b) 1G space for kernel in low addresses and 3G for users above it. * A question is: Is the case (b) really an option? Note that case (b) * does save neither physical memory and KVA. */ void pmap_bootstrap_prepare(vm_paddr_t last) { vm_paddr_t pt2pg_pa, pt2tab_pa, pa, size; vm_offset_t pt2pg_va; pt1_entry_t *pte1p; pt2_entry_t *pte2p; u_int i; uint32_t actlr_mask, actlr_set, l1_attr; /* * Now, we are going to make real kernel mapping. Note that we are * already running on some mapping made in locore.S and we expect * that it's large enough to ensure nofault access to physical memory * allocated herein before switch. * * As kernel image and everything needed before are and will be mapped * by section mappings, we align last physical address to PTE1_SIZE. */ last_paddr = pte1_roundup(last); /* * Allocate and zero page(s) for kernel L1 page table. * * Note that it's first allocation on space which was PTE1_SIZE * aligned and as such base_pt1 is aligned to NB_IN_PT1 too. */ base_pt1 = pmap_preboot_get_pages(NPG_IN_PT1); kern_pt1 = (pt1_entry_t *)KERNEL_P2V(base_pt1); bzero((void*)kern_pt1, NB_IN_PT1); pte1_sync_range(kern_pt1, NB_IN_PT1); /* Allocate and zero page(s) for kernel PT2TAB. */ pt2tab_pa = pmap_preboot_get_pages(NPG_IN_PT2TAB); kern_pt2tab = (pt2_entry_t *)KERNEL_P2V(pt2tab_pa); bzero(kern_pt2tab, NB_IN_PT2TAB); pte2_sync_range(kern_pt2tab, NB_IN_PT2TAB); /* Allocate and zero page(s) for kernel L2 page tables. */ pt2pg_pa = pmap_preboot_get_pages(NKPT2PG); pt2pg_va = KERNEL_P2V(pt2pg_pa); size = NKPT2PG * PAGE_SIZE; bzero((void*)pt2pg_va, size); pte2_sync_range((pt2_entry_t *)pt2pg_va, size); /* * Add a physical memory segment (vm_phys_seg) corresponding to the * preallocated pages for kernel L2 page tables so that vm_page * structures representing these pages will be created. The vm_page * structures are required for promotion of the corresponding kernel * virtual addresses to section mappings. */ vm_phys_add_seg(pt2tab_pa, pmap_preboot_get_pages(0)); /* * Insert allocated L2 page table pages to PT2TAB and make * link to all PT2s in L1 page table. See how kernel_vm_end * is initialized. * * We play simple and safe. So every KVA will have underlaying * L2 page table, even kernel image mapped by sections. */ pte2p = kern_pt2tab_entry(KERNBASE); for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += PTE2_SIZE) pt2tab_store(pte2p++, PTE2_KPT(pa)); pte1p = kern_pte1(KERNBASE); for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += NB_IN_PT2) pte1_store(pte1p++, PTE1_LINK(pa)); /* Make section mappings for kernel. */ l1_attr = ATTR_TO_L1(PTE2_ATTR_DEFAULT); pte1p = kern_pte1(KERNBASE); for (pa = KERNEL_V2P(KERNBASE); pa < last; pa += PTE1_SIZE) pte1_store(pte1p++, PTE1_KERN(pa, PTE1_AP_KRW, l1_attr)); /* * Get free and aligned space for PT2MAP and make L1 page table links * to L2 page tables held in PT2TAB. * * Note that pages holding PT2s are stored in PT2TAB as pt2_entry_t * descriptors and PT2TAB page(s) itself is(are) used as PT2s. Thus * each entry in PT2TAB maps all PT2s in a page. This implies that * virtual address of PT2MAP must be aligned to NPT2_IN_PG * PTE1_SIZE. */ PT2MAP = (pt2_entry_t *)(KERNBASE - PT2MAP_SIZE); pte1p = kern_pte1((vm_offset_t)PT2MAP); for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { pte1_store(pte1p++, PTE1_LINK(pa)); } /* * Store PT2TAB in PT2TAB itself, i.e. self reference mapping. * Each pmap will hold own PT2TAB, so the mapping should be not global. */ pte2p = kern_pt2tab_entry((vm_offset_t)PT2MAP); for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); } /* * Choose correct L2 page table and make mappings for allocations * made herein which replaces temporary locore.S mappings after a while. * Note that PT2MAP cannot be used until we switch to kern_pt1. * * Note, that these allocations started aligned on 1M section and * kernel PT1 was allocated first. Making of mappings must follow * order of physical allocations as we've used KERNEL_P2V() macro * for virtual addresses resolution. */ pte2p = kern_pt2tab_entry((vm_offset_t)kern_pt1); pt2pg_va = KERNEL_P2V(pte2_pa(pte2_load(pte2p))); pte2p = page_pt2(pt2pg_va, pte1_index((vm_offset_t)kern_pt1)); /* Make mapping for kernel L1 page table. */ for (pa = base_pt1, i = 0; i < NPG_IN_PT1; i++, pa += PTE2_SIZE) pte2_store(pte2p++, PTE2_KPT(pa)); /* Make mapping for kernel PT2TAB. */ for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) pte2_store(pte2p++, PTE2_KPT(pa)); /* Finally, switch from 'boot_pt1' to 'kern_pt1'. */ pmap_kern_ttb = base_pt1 | ttb_flags; cpuinfo_get_actlr_modifier(&actlr_mask, &actlr_set); reinit_mmu(pmap_kern_ttb, actlr_mask, actlr_set); /* * Initialize the first available KVA. As kernel image is mapped by * sections, we are leaving some gap behind. */ virtual_avail = (vm_offset_t)kern_pt2tab + NPG_IN_PT2TAB * PAGE_SIZE; } /* * Setup L2 page table page for given KVA. * Used in pre-bootstrap epoch. * * Note that we have allocated NKPT2PG pages for L2 page tables in advance * and used them for mapping KVA starting from KERNBASE. However, this is not * enough. Vectors and devices need L2 page tables too. Note that they are * even above VM_MAX_KERNEL_ADDRESS. */ static __inline vm_paddr_t pmap_preboot_pt2pg_setup(vm_offset_t va) { pt2_entry_t *pte2p, pte2; vm_paddr_t pt2pg_pa; /* Get associated entry in PT2TAB. */ pte2p = kern_pt2tab_entry(va); /* Just return, if PT2s page exists already. */ pte2 = pt2tab_load(pte2p); if (pte2_is_valid(pte2)) return (pte2_pa(pte2)); KASSERT(va >= VM_MAX_KERNEL_ADDRESS, ("%s: NKPT2PG too small", __func__)); /* * Allocate page for PT2s and insert it to PT2TAB. * In other words, map it into PT2MAP space. */ pt2pg_pa = pmap_preboot_get_pages(1); pt2tab_store(pte2p, PTE2_KPT(pt2pg_pa)); /* Zero all PT2s in allocated page. */ bzero((void*)pt2map_pt2pg(va), PAGE_SIZE); pte2_sync_range((pt2_entry_t *)pt2map_pt2pg(va), PAGE_SIZE); return (pt2pg_pa); } /* * Setup L2 page table for given KVA. * Used in pre-bootstrap epoch. */ static void pmap_preboot_pt2_setup(vm_offset_t va) { pt1_entry_t *pte1p; vm_paddr_t pt2pg_pa, pt2_pa; /* Setup PT2's page. */ pt2pg_pa = pmap_preboot_pt2pg_setup(va); pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(va)); /* Insert PT2 to PT1. */ pte1p = kern_pte1(va); pte1_store(pte1p, PTE1_LINK(pt2_pa)); } /* * Get L2 page entry associated with given KVA. * Used in pre-bootstrap epoch. */ static __inline pt2_entry_t* pmap_preboot_vtopte2(vm_offset_t va) { pt1_entry_t *pte1p; /* Setup PT2 if needed. */ pte1p = kern_pte1(va); if (!pte1_is_valid(pte1_load(pte1p))) /* XXX - sections ?! */ pmap_preboot_pt2_setup(va); return (pt2map_entry(va)); } /* * Pre-bootstrap epoch page(s) mapping(s). */ void pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num) { u_int i; pt2_entry_t *pte2p; /* Map all the pages. */ for (i = 0; i < num; i++) { pte2p = pmap_preboot_vtopte2(va); pte2_store(pte2p, PTE2_KRW(pa)); va += PAGE_SIZE; pa += PAGE_SIZE; } } /* * Pre-bootstrap epoch virtual space alocator. */ vm_offset_t pmap_preboot_reserve_pages(u_int num) { u_int i; vm_offset_t start, va; pt2_entry_t *pte2p; /* Allocate virtual space. */ start = va = virtual_avail; virtual_avail += num * PAGE_SIZE; /* Zero the mapping. */ for (i = 0; i < num; i++) { pte2p = pmap_preboot_vtopte2(va); pte2_store(pte2p, 0); va += PAGE_SIZE; } return (start); } /* * Pre-bootstrap epoch page(s) allocation and mapping(s). */ vm_offset_t pmap_preboot_get_vpages(u_int num) { vm_paddr_t pa; vm_offset_t va; /* Allocate physical page(s). */ pa = pmap_preboot_get_pages(num); /* Allocate virtual space. */ va = virtual_avail; virtual_avail += num * PAGE_SIZE; /* Map and zero all. */ pmap_preboot_map_pages(pa, va, num); bzero((void *)va, num * PAGE_SIZE); return (va); } /* * Pre-bootstrap epoch page mapping(s) with attributes. */ void pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, vm_prot_t prot, vm_memattr_t attr) { u_int num; u_int l1_attr, l1_prot, l2_prot, l2_attr; pt1_entry_t *pte1p; pt2_entry_t *pte2p; l2_prot = prot & VM_PROT_WRITE ? PTE2_AP_KRW : PTE2_AP_KR; l2_prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX; l2_attr = vm_memattr_to_pte2(attr); l1_prot = ATTR_TO_L1(l2_prot); l1_attr = ATTR_TO_L1(l2_attr); /* Map all the pages. */ num = round_page(size); while (num > 0) { if ((((va | pa) & PTE1_OFFSET) == 0) && (num >= PTE1_SIZE)) { pte1p = kern_pte1(va); pte1_store(pte1p, PTE1_KERN(pa, l1_prot, l1_attr)); va += PTE1_SIZE; pa += PTE1_SIZE; num -= PTE1_SIZE; } else { pte2p = pmap_preboot_vtopte2(va); pte2_store(pte2p, PTE2_KERN(pa, l2_prot, l2_attr)); va += PAGE_SIZE; pa += PAGE_SIZE; num -= PAGE_SIZE; } } } /* * Extract from the kernel page table the physical address * that is mapped by the given virtual address "va". */ vm_paddr_t pmap_kextract(vm_offset_t va) { vm_paddr_t pa; pt1_entry_t pte1; pt2_entry_t pte2; pte1 = pte1_load(kern_pte1(va)); if (pte1_is_section(pte1)) { pa = pte1_pa(pte1) | (va & PTE1_OFFSET); } else if (pte1_is_link(pte1)) { /* * We should beware of concurrent promotion that changes * pte1 at this point. However, it's not a problem as PT2 * page is preserved by promotion in PT2TAB. So even if * it happens, using of PT2MAP is still safe. * * QQQ: However, concurrent removing is a problem which * ends in abort on PT2MAP space. Locking must be used * to deal with this. */ pte2 = pte2_load(pt2map_entry(va)); pa = pte2_pa(pte2) | (va & PTE2_OFFSET); } else { panic("%s: va %#x pte1 %#x", __func__, va, pte1); } return (pa); } /* * Extract from the kernel page table the physical address * that is mapped by the given virtual address "va". Also * return L2 page table entry which maps the address. * * This is only intended to be used for panic dumps. */ vm_paddr_t pmap_dump_kextract(vm_offset_t va, pt2_entry_t *pte2p) { vm_paddr_t pa; pt1_entry_t pte1; pt2_entry_t pte2; pte1 = pte1_load(kern_pte1(va)); if (pte1_is_section(pte1)) { pa = pte1_pa(pte1) | (va & PTE1_OFFSET); pte2 = pa | ATTR_TO_L2(pte1) | PTE2_V; } else if (pte1_is_link(pte1)) { pte2 = pte2_load(pt2map_entry(va)); pa = pte2_pa(pte2); } else { pte2 = 0; pa = 0; } if (pte2p != NULL) *pte2p = pte2; return (pa); } /***************************************************************************** * * PMAP second stage initialization and utility functions * for bootstrap epoch. * * After pmap_bootstrap() is called, the following functions for * mappings can be used: * * void pmap_kenter(vm_offset_t va, vm_paddr_t pa); * void pmap_kremove(vm_offset_t va); * vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, * int prot); * * NOTE: This is not SMP coherent stage. And physical page allocation is not * allowed during this stage. * *****************************************************************************/ /* * Initialize kernel PMAP locks and lists, kernel_pmap itself, and * reserve various virtual spaces for temporary mappings. */ void pmap_bootstrap(vm_offset_t firstaddr) { pt2_entry_t *unused __unused; struct sysmaps *sysmaps; u_int i; /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); kernel_l1pa = (vm_paddr_t)kern_pt1; /* for libkvm */ kernel_pmap->pm_pt1 = kern_pt1; kernel_pmap->pm_pt2tab = kern_pt2tab; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); LIST_INIT(&allpmaps); /* * Request a spin mutex so that changes to allpmaps cannot be * preempted by smp_rendezvous_cpus(). */ mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) do { \ v = (c)pmap_preboot_reserve_pages(n); \ p = pt2map_entry((vm_offset_t)v); \ } while (0) /* * Local CMAP1/CMAP2 are used for zeroing and copying pages. * Local CMAP3 is used for data cache cleaning. * Global CMAP3 is used for the idle process page zeroing. */ for (i = 0; i < MAXCPU; i++) { sysmaps = &sysmaps_pcpu[i]; mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1); SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1); SYSMAP(caddr_t, sysmaps->CMAP3, sysmaps->CADDR3, 1); } SYSMAP(caddr_t, CMAP3, CADDR3, 1); /* * Crashdump maps. */ SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS); /* * _tmppt is used for reading arbitrary physical pages via /dev/mem. */ SYSMAP(caddr_t, unused, _tmppt, 1); /* * PADDR1 and PADDR2 are used by pmap_pte2_quick() and pmap_pte2(), * respectively. PADDR3 is used by pmap_pte2_ddb(). */ SYSMAP(pt2_entry_t *, PMAP1, PADDR1, 1); SYSMAP(pt2_entry_t *, PMAP2, PADDR2, 1); #ifdef DDB SYSMAP(pt2_entry_t *, PMAP3, PADDR3, 1); #endif mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); /* * Note that in very short time in initarm(), we are going to * initialize phys_avail[] array and no futher page allocation * can happen after that until vm subsystem will be initialized. */ kernel_vm_end_new = kernel_vm_end; virtual_end = vm_max_kernel_address; } static void pmap_init_qpages(void) { struct pcpu *pc; int i; CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); if (pc->pc_qmap_addr == 0) panic("%s: unable to allocate KVA", __func__); } } SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_qpages, NULL); /* * The function can already be use in second initialization stage. * As such, the function DOES NOT call pmap_growkernel() where PT2 * allocation can happen. So if used, be sure that PT2 for given * virtual address is allocated already! * * Add a wired page to the kva. * Note: not SMP coherent. */ static __inline void pmap_kenter_prot_attr(vm_offset_t va, vm_paddr_t pa, uint32_t prot, uint32_t attr) { pt1_entry_t *pte1p; pt2_entry_t *pte2p; pte1p = kern_pte1(va); if (!pte1_is_valid(pte1_load(pte1p))) { /* XXX - sections ?! */ /* * This is a very low level function, so PT2 and particularly * PT2PG associated with given virtual address must be already * allocated. It's a pain mainly during pmap initialization * stage. However, called after pmap initialization with * virtual address not under kernel_vm_end will lead to * the same misery. */ if (!pte2_is_valid(pte2_load(kern_pt2tab_entry(va)))) panic("%s: kernel PT2 not allocated!", __func__); } pte2p = pt2map_entry(va); pte2_store(pte2p, PTE2_KERN(pa, prot, attr)); } PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt2_entry_t *pte2p; pte2p = pt2map_entry(va); pte2_clear(pte2p); } /* * Share new kernel PT2PG with all pmaps. * The caller is responsible for maintaining TLB consistency. */ static void pmap_kenter_pt2tab(vm_offset_t va, pt2_entry_t npte2) { pmap_t pmap; pt2_entry_t *pte2p; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { pte2p = pmap_pt2tab_entry(pmap, va); pt2tab_store(pte2p, npte2); } mtx_unlock_spin(&allpmaps_lock); } /* * Share new kernel PTE1 with all pmaps. * The caller is responsible for maintaining TLB consistency. */ static void pmap_kenter_pte1(vm_offset_t va, pt1_entry_t npte1) { pmap_t pmap; pt1_entry_t *pte1p; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { pte1p = pmap_pte1(pmap, va); pte1_store(pte1p, npte1); } mtx_unlock_spin(&allpmaps_lock); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. * * NOTE: Read the comments above pmap_kenter_prot_attr() as * the function is used herein! */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { vm_offset_t va, sva; vm_paddr_t pte1_offset; pt1_entry_t npte1; uint32_t l1prot, l2prot; uint32_t l1attr, l2attr; PDEBUG(1, printf("%s: virt = %#x, start = %#x, end = %#x (size = %#x)," " prot = %d\n", __func__, *virt, start, end, end - start, prot)); l2prot = (prot & VM_PROT_WRITE) ? PTE2_AP_KRW : PTE2_AP_KR; l2prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX; l1prot = ATTR_TO_L1(l2prot); l2attr = PTE2_ATTR_DEFAULT; l1attr = ATTR_TO_L1(l2attr); va = *virt; /* * Does the physical address range's size and alignment permit at * least one section mapping to be created? */ pte1_offset = start & PTE1_OFFSET; if ((end - start) - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) >= PTE1_SIZE) { /* * Increase the starting virtual address so that its alignment * does not preclude the use of section mappings. */ if ((va & PTE1_OFFSET) < pte1_offset) va = pte1_trunc(va) + pte1_offset; else if ((va & PTE1_OFFSET) > pte1_offset) va = pte1_roundup(va) + pte1_offset; } sva = va; while (start < end) { if ((start & PTE1_OFFSET) == 0 && end - start >= PTE1_SIZE) { KASSERT((va & PTE1_OFFSET) == 0, ("%s: misaligned va %#x", __func__, va)); npte1 = PTE1_KERN(start, l1prot, l1attr); pmap_kenter_pte1(va, npte1); va += PTE1_SIZE; start += PTE1_SIZE; } else { pmap_kenter_prot_attr(va, start, l2prot, l2attr); va += PAGE_SIZE; start += PAGE_SIZE; } } tlb_flush_range(sva, va - sva); *virt = va; return (sva); } /* * Make a temporary mapping for a physical address. * This is only intended to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; /* QQQ: 'i' should be less or equal to MAXDUMPPGS. */ va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); tlb_flush_local(va); return ((void *)crashdumpmap); } /************************************* * * TLB & cache maintenance routines. * *************************************/ /* * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_tlb_flush(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) tlb_flush(va); } PMAP_INLINE void pmap_tlb_flush_range(pmap_t pmap, vm_offset_t sva, vm_size_t size) { if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) tlb_flush_range(sva, size); } /* * Abuse the pte2 nodes for unmapped kva to thread a kva freelist through. * Requirements: * - Must deal with pages in order to ensure that none of the PTE2_* bits * are ever set, PTE2_V in particular. * - Assumes we can write to pte2s without pte2_store() atomic ops. * - Assumes nothing will ever test these addresses for 0 to indicate * no mapping instead of correctly checking PTE2_V. * - Assumes a vm_offset_t will fit in a pte2 (true for arm). * Because PTE2_V is never set, there can be no mappings to invalidate. */ static vm_offset_t pmap_pte2list_alloc(vm_offset_t *head) { pt2_entry_t *pte2p; vm_offset_t va; va = *head; if (va == 0) panic("pmap_ptelist_alloc: exhausted ptelist KVA"); pte2p = pt2map_entry(va); *head = *pte2p; if (*head & PTE2_V) panic("%s: va with PTE2_V set!", __func__); *pte2p = 0; return (va); } static void pmap_pte2list_free(vm_offset_t *head, vm_offset_t va) { pt2_entry_t *pte2p; if (va & PTE2_V) panic("%s: freeing va with PTE2_V set!", __func__); pte2p = pt2map_entry(va); *pte2p = *head; /* virtual! PTE2_V is 0 though */ *head = va; } static void pmap_pte2list_init(vm_offset_t *head, void *base, int npages) { int i; vm_offset_t va; *head = 0; for (i = npages - 1; i >= 0; i--) { va = (vm_offset_t)base + i * PAGE_SIZE; pmap_pte2list_free(head, va); } } /***************************************************************************** * * PMAP third and final stage initialization. * * After pmap_init() is called, PMAP subsystem is fully initialized. * *****************************************************************************/ SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, "Max number of PV entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, "Page share factor per proc"); static u_long nkpt2pg = NKPT2PG; SYSCTL_ULONG(_vm_pmap, OID_AUTO, nkpt2pg, CTLFLAG_RD, &nkpt2pg, 0, "Pre-allocated pages for kernel PT2s"); static int sp_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &sp_enabled, 0, "Are large page mappings enabled?"); static SYSCTL_NODE(_vm_pmap, OID_AUTO, pte1, CTLFLAG_RD, 0, "1MB page mapping counters"); static u_long pmap_pte1_demotions; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pte1_demotions, 0, "1MB page demotions"); static u_long pmap_pte1_mappings; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, mappings, CTLFLAG_RD, &pmap_pte1_mappings, 0, "1MB page mappings"); static u_long pmap_pte1_p_failures; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_pte1_p_failures, 0, "1MB page promotion failures"); static u_long pmap_pte1_promotions; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, promotions, CTLFLAG_RD, &pmap_pte1_promotions, 0, "1MB page promotions"); static __inline ttb_entry_t pmap_ttb_get(pmap_t pmap) { return (vtophys(pmap->pm_pt1) | ttb_flags); } /* * Initialize a vm_page's machine-dependent fields. * * Variations: * 1. Pages for L2 page tables are always not managed. So, pv_list and * pt2_wirecount can share same physical space. However, proper * initialization on a page alloc for page tables and reinitialization * on the page free must be ensured. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); pt2_wirecount_init(m); m->md.pat_mode = VM_MEMATTR_DEFAULT; } /* * Virtualization for faster way how to zero whole page. */ static __inline void pagezero(void *page) { bzero(page, PAGE_SIZE); } /* * Zero L2 page table page. * Use same KVA as in pmap_zero_page(). */ static __inline vm_paddr_t pmap_pt2pg_zero(vm_page_t m) { vm_paddr_t pa; struct sysmaps *sysmaps; pa = VM_PAGE_TO_PHYS(m); /* * XXX: For now, we map whole page even if it's already zero, * to sync it even if the sync is only DSB. */ sched_pin(); sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (pte2_load(sysmaps->CMAP2) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(pa, PTE2_AP_KRW, vm_page_pte2_attr(m))); /* Even VM_ALLOC_ZERO request is only advisory. */ if ((m->flags & PG_ZERO) == 0) pagezero(sysmaps->CADDR2); pte2_sync_range((pt2_entry_t *)sysmaps->CADDR2, PAGE_SIZE); pte2_clear(sysmaps->CMAP2); tlb_flush((vm_offset_t)sysmaps->CADDR2); sched_unpin(); mtx_unlock(&sysmaps->lock); return (pa); } /* * Init just allocated page as L2 page table(s) holder * and return its physical address. */ static __inline vm_paddr_t pmap_pt2pg_init(pmap_t pmap, vm_offset_t va, vm_page_t m) { vm_paddr_t pa; pt2_entry_t *pte2p; /* Check page attributes. */ if (m->md.pat_mode != pt_memattr) pmap_page_set_memattr(m, pt_memattr); /* Zero page and init wire counts. */ pa = pmap_pt2pg_zero(m); pt2_wirecount_init(m); /* * Map page to PT2MAP address space for given pmap. * Note that PT2MAP space is shared with all pmaps. */ if (pmap == kernel_pmap) pmap_kenter_pt2tab(va, PTE2_KPT(pa)); else { pte2p = pmap_pt2tab_entry(pmap, va); pt2tab_store(pte2p, PTE2_KPT_NG(pa)); } return (pa); } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { vm_size_t s; pt2_entry_t *pte2p, pte2; u_int i, pte1_idx, pv_npg; PDEBUG(1, printf("%s: phys_start = %#x\n", __func__, PHYSADDR)); /* * Initialize the vm page array entries for kernel pmap's * L2 page table pages allocated in advance. */ pte1_idx = pte1_index(KERNBASE - PT2MAP_SIZE); pte2p = kern_pt2tab_entry(KERNBASE - PT2MAP_SIZE); for (i = 0; i < nkpt2pg + NPG_IN_PT2TAB; i++, pte2p++) { vm_paddr_t pa; vm_page_t m; pte2 = pte2_load(pte2p); KASSERT(pte2_is_valid(pte2), ("%s: no valid entry", __func__)); pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pa); KASSERT(m >= vm_page_array && m < &vm_page_array[vm_page_array_size], ("%s: L2 page table page is out of range", __func__)); m->pindex = pte1_idx; m->phys_addr = pa; pte1_idx += NPT2_IN_PG; } /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); /* * Are large page mappings enabled? */ TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled); if (sp_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("%s: can't assign to pagesizes[1]", __func__)); pagesizes[1] = PTE1_SIZE; } /* * Calculate the size of the pv head table for sections. * Handle the possibility that "vm_phys_segs[...].end" is zero. * Note that the table is only for sections which could be promoted. */ first_managed_pa = pte1_trunc(vm_phys_segs[0].start); pv_npg = (pte1_trunc(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE) - first_managed_pa) / PTE1_SIZE + 1; /* * Allocate memory for the pv head table for sections. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); if (pv_chunkbase == NULL) panic("%s: not enough kvm for pv chunks", __func__); pmap_pte2list_init(&pv_vafree, pv_chunkbase, pv_maxchunks); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { u_int anychanged; pt2_entry_t *epte2p, *pte2p, pte2; vm_page_t m; vm_paddr_t pa; anychanged = 0; pte2p = pt2map_entry(sva); epte2p = pte2p + count; while (pte2p < epte2p) { m = *ma++; pa = VM_PAGE_TO_PHYS(m); pte2 = pte2_load(pte2p); if ((pte2_pa(pte2) != pa) || (pte2_attr(pte2) != vm_page_pte2_attr(m))) { anychanged++; pte2_store(pte2p, PTE2_KERN(pa, PTE2_AP_KRW, vm_page_pte2_attr(m))); } pte2p++; } if (__predict_false(anychanged)) tlb_flush_range(sva, count * PAGE_SIZE); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } tlb_flush_range(sva, va - sva); } /* * Are we current address space or kernel? */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || (pmap == vmspace_pmap(curthread->td_proc->p_vmspace))); } /* * If the given pmap is not the current or kernel pmap, the returned * pte2 must be released by passing it to pmap_pte2_release(). */ static pt2_entry_t * pmap_pte2(pmap_t pmap, vm_offset_t va) { pt1_entry_t pte1; vm_paddr_t pt2pg_pa; pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) panic("%s: attempt to map PTE1", __func__); if (pte1_is_link(pte1)) { /* Are we current address space or kernel? */ if (pmap_is_current(pmap)) return (pt2map_entry(va)); /* Note that L2 page table size is not equal to PAGE_SIZE. */ pt2pg_pa = trunc_page(pte1_link_pa(pte1)); mtx_lock(&PMAP2mutex); if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); tlb_flush((vm_offset_t)PADDR2); } return (PADDR2 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); } return (NULL); } /* * Releases a pte2 that was obtained from pmap_pte2(). * Be prepared for the pte2p being NULL. */ static __inline void pmap_pte2_release(pt2_entry_t *pte2p) { if ((pt2_entry_t *)(trunc_page((vm_offset_t)pte2p)) == PADDR2) { mtx_unlock(&PMAP2mutex); } } /* * Super fast pmap_pte2 routine best used when scanning * the pv lists. This eliminates many coarse-grained * invltlb calls. Note that many of the pv list * scans are across different pmaps. It is very wasteful * to do an entire tlb flush for checking a single mapping. * * If the given pmap is not the current pmap, pvh_global_lock * must be held and curthread pinned to a CPU. */ static pt2_entry_t * pmap_pte2_quick(pmap_t pmap, vm_offset_t va) { pt1_entry_t pte1; vm_paddr_t pt2pg_pa; pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) panic("%s: attempt to map PTE1", __func__); if (pte1_is_link(pte1)) { /* Are we current address space or kernel? */ if (pmap_is_current(pmap)) return (pt2map_entry(va)); rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("%s: curthread not pinned", __func__)); /* Note that L2 page table size is not equal to PAGE_SIZE. */ pt2pg_pa = trunc_page(pte1_link_pa(pte1)); if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif tlb_flush_local((vm_offset_t)PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); tlb_flush_local((vm_offset_t)PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; return (PADDR1 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); } return (NULL); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { vm_paddr_t pa; pt1_entry_t pte1; pt2_entry_t *pte2p; PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) pa = pte1_pa(pte1) | (va & PTE1_OFFSET); else if (pte1_is_link(pte1)) { pte2p = pmap_pte2(pmap, va); pa = pte2_pa(pte2_load(pte2p)) | (va & PTE2_OFFSET); pmap_pte2_release(pte2p); } else pa = 0; PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { vm_paddr_t pa, lockpa; pt1_entry_t pte1; pt2_entry_t pte2, *pte2p; vm_page_t m; lockpa = 0; m = NULL; PMAP_LOCK(pmap); retry: pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) { if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) { pa = pte1_pa(pte1) | (va & PTE1_OFFSET); if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) goto retry; m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } else if (pte1_is_link(pte1)) { pte2p = pmap_pte2(pmap, va); pte2 = pte2_load(pte2p); pmap_pte2_release(pte2p); if (pte2_is_valid(pte2) && (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) { pa = pte2_pa(pte2); if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) goto retry; m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } PA_UNLOCK_COND(lockpa); PMAP_UNLOCK(pmap); return (m); } /* * Grow the number of kernel L2 page table entries, if needed. */ void pmap_growkernel(vm_offset_t addr) { vm_page_t m; vm_paddr_t pt2pg_pa, pt2_pa; pt1_entry_t pte1; pt2_entry_t pte2; PDEBUG(1, printf("%s: addr = %#x\n", __func__, addr)); /* * All the time kernel_vm_end is first KVA for which underlying * L2 page table is either not allocated or linked from L1 page table * (not considering sections). Except for two possible cases: * * (1) in the very beginning as long as pmap_growkernel() was * not called, it could be first unused KVA (which is not * rounded up to PTE1_SIZE), * * (2) when all KVA space is mapped and kernel_map->max_offset * address is not rounded up to PTE1_SIZE. (For example, * it could be 0xFFFFFFFF.) */ kernel_vm_end = pte1_roundup(kernel_vm_end); mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, PTE1_SIZE); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { pte1 = pte1_load(kern_pte1(kernel_vm_end)); if (pte1_is_valid(pte1)) { kernel_vm_end += PTE1_SIZE; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } /* * kernel_vm_end_new is used in pmap_pinit() when kernel * mappings are entered to new pmap all at once to avoid race * between pmap_kenter_pte1() and kernel_vm_end increase. * The same aplies to pmap_kenter_pt2tab(). */ kernel_vm_end_new = kernel_vm_end + PTE1_SIZE; pte2 = pt2tab_load(kern_pt2tab_entry(kernel_vm_end)); if (!pte2_is_valid(pte2)) { /* * Install new PT2s page into kernel PT2TAB. */ m = vm_page_alloc(NULL, pte1_index(kernel_vm_end) & ~PT2PG_MASK, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) panic("%s: no memory to grow kernel", __func__); /* * QQQ: To link all new L2 page tables from L1 page * table now and so pmap_kenter_pte1() them * at once together with pmap_kenter_pt2tab() * could be nice speed up. However, * pmap_growkernel() does not happen so often... * QQQ: The other TTBR is another option. */ pt2pg_pa = pmap_pt2pg_init(kernel_pmap, kernel_vm_end, m); } else pt2pg_pa = pte2_pa(pte2); pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(kernel_vm_end)); pmap_kenter_pte1(kernel_vm_end, PTE1_LINK(pt2_pa)); kernel_vm_end = kernel_vm_end_new; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = vm_max_kernel_address - KERNBASE; return (sysctl_handle_long(oidp, &ksize, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = vm_max_kernel_address - kernel_vm_end; return (sysctl_handle_long(oidp, &kfree, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); /*********************************************** * * Pmap allocation/deallocation routines. * ***********************************************/ /* * Initialize the pmap for the swapper process. */ void pmap_pinit0(pmap_t pmap) { PDEBUG(1, printf("%s: pmap = %p\n", __func__, pmap)); PMAP_LOCK_INIT(pmap); /* * Kernel page table directory and pmap stuff around is already * initialized, we are using it right now and here. So, finish * only PMAP structures initialization for process0 ... * * Since the L1 page table and PT2TAB is shared with the kernel pmap, * which is already included in the list "allpmaps", this pmap does * not need to be inserted into that list. */ pmap->pm_pt1 = kern_pt1; pmap->pm_pt2tab = kern_pt2tab; CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); CPU_SET(0, &pmap->pm_active); } static __inline void pte1_copy_nosync(pt1_entry_t *spte1p, pt1_entry_t *dpte1p, vm_offset_t sva, vm_offset_t eva) { u_int idx, count; idx = pte1_index(sva); count = (pte1_index(eva) - idx + 1) * sizeof(pt1_entry_t); bcopy(spte1p + idx, dpte1p + idx, count); } static __inline void pt2tab_copy_nosync(pt2_entry_t *spte2p, pt2_entry_t *dpte2p, vm_offset_t sva, vm_offset_t eva) { u_int idx, count; idx = pt2tab_index(sva); count = (pt2tab_index(eva) - idx + 1) * sizeof(pt2_entry_t); bcopy(spte2p + idx, dpte2p + idx, count); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit(pmap_t pmap) { pt1_entry_t *pte1p; pt2_entry_t *pte2p; vm_paddr_t pa, pt2tab_pa; u_int i; PDEBUG(6, printf("%s: pmap = %p, pm_pt1 = %p\n", __func__, pmap, pmap->pm_pt1)); /* * No need to allocate L2 page table space yet but we do need * a valid L1 page table and PT2TAB table. * * Install shared kernel mappings to these tables. It's a little * tricky as some parts of KVA are reserved for vectors, devices, * and whatever else. These parts are supposed to be above * vm_max_kernel_address. Thus two regions should be installed: * * (1) . * * QQQ: The second region should be stable enough to be installed * only once in time when the tables are allocated. * QQQ: Maybe copy of both regions at once could be faster ... * QQQ: Maybe the other TTBR is an option. * * Finally, install own PT2TAB table to these tables. */ if (pmap->pm_pt1 == NULL) { pmap->pm_pt1 = (pt1_entry_t *)kmem_alloc_contig(kernel_arena, NB_IN_PT1, M_NOWAIT | M_ZERO, 0, -1UL, NB_IN_PT1, 0, pt_memattr); if (pmap->pm_pt1 == NULL) return (0); } if (pmap->pm_pt2tab == NULL) { /* * QQQ: (1) PT2TAB must be contiguous. If PT2TAB is one page * only, what should be the only size for 32 bit systems, * then we could allocate it with vm_page_alloc() and all * the stuff needed as other L2 page table pages. * (2) Note that a process PT2TAB is special L2 page table * page. Its mapping in kernel_arena is permanent and can * be used no matter which process is current. Its mapping * in PT2MAP can be used only for current process. */ pmap->pm_pt2tab = (pt2_entry_t *)kmem_alloc_attr(kernel_arena, NB_IN_PT2TAB, M_NOWAIT | M_ZERO, 0, -1UL, pt_memattr); if (pmap->pm_pt2tab == NULL) { /* * QQQ: As struct pmap is allocated from UMA with * UMA_ZONE_NOFREE flag, it's important to leave * no allocation in pmap if initialization failed. */ kmem_free(kernel_arena, (vm_offset_t)pmap->pm_pt1, NB_IN_PT1); pmap->pm_pt1 = NULL; return (0); } /* * QQQ: Each L2 page table page vm_page_t has pindex set to * pte1 index of virtual address mapped by this page. * It's not valid for non kernel PT2TABs themselves. * The pindex of these pages can not be altered because * of the way how they are allocated now. However, it * should not be a problem. */ } mtx_lock_spin(&allpmaps_lock); /* * To avoid race with pmap_kenter_pte1() and pmap_kenter_pt2tab(), * kernel_vm_end_new is used here instead of kernel_vm_end. */ pte1_copy_nosync(kern_pt1, pmap->pm_pt1, KERNBASE, kernel_vm_end_new - 1); pte1_copy_nosync(kern_pt1, pmap->pm_pt1, vm_max_kernel_address, 0xFFFFFFFF); pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, KERNBASE, kernel_vm_end_new - 1); pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, vm_max_kernel_address, 0xFFFFFFFF); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* * Store PT2MAP PT2 pages (a.k.a. PT2TAB) in PT2TAB itself. * I.e. self reference mapping. The PT2TAB is private, however mapped * into shared PT2MAP space, so the mapping should be not global. */ pt2tab_pa = vtophys(pmap->pm_pt2tab); pte2p = pmap_pt2tab_entry(pmap, (vm_offset_t)PT2MAP); for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); } /* Insert PT2MAP PT2s into pmap PT1. */ pte1p = pmap_pte1(pmap, (vm_offset_t)PT2MAP); for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { pte1_store(pte1p++, PTE1_LINK(pa)); } /* * Now synchronize new mapping which was made above. */ pte1_sync_range(pmap->pm_pt1, NB_IN_PT1); pte2_sync_range(pmap->pm_pt2tab, NB_IN_PT2TAB); CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); return (1); } #ifdef INVARIANTS static boolean_t pt2tab_user_is_empty(pt2_entry_t *tab) { u_int i, end; end = pt2tab_index(VM_MAXUSER_ADDRESS); for (i = 0; i < end; i++) if (tab[i] != 0) return (FALSE); return (TRUE); } #endif /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { #ifdef INVARIANTS vm_offset_t start, end; #endif KASSERT(pmap->pm_stats.resident_count == 0, ("%s: pmap resident count %ld != 0", __func__, pmap->pm_stats.resident_count)); KASSERT(pt2tab_user_is_empty(pmap->pm_pt2tab), ("%s: has allocated user PT2(s)", __func__)); KASSERT(CPU_EMPTY(&pmap->pm_active), ("%s: pmap %p is active on some CPU(s)", __func__, pmap)); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); #ifdef INVARIANTS start = pte1_index(KERNBASE) * sizeof(pt1_entry_t); end = (pte1_index(0xFFFFFFFF) + 1) * sizeof(pt1_entry_t); bzero((char *)pmap->pm_pt1 + start, end - start); start = pt2tab_index(KERNBASE) * sizeof(pt2_entry_t); end = (pt2tab_index(0xFFFFFFFF) + 1) * sizeof(pt2_entry_t); bzero((char *)pmap->pm_pt2tab + start, end - start); #endif /* * We are leaving PT1 and PT2TAB allocated on released pmap, * so hopefully UMA vmspace_zone will always be inited with * UMA_ZONE_NOFREE flag. */ } /********************************************************* * * L2 table pages and their pages management routines. * *********************************************************/ /* * Virtual interface for L2 page table wire counting. * * Each L2 page table in a page has own counter which counts a number of * valid mappings in a table. Global page counter counts mappings in all * tables in a page plus a single itself mapping in PT2TAB. * * During a promotion we leave the associated L2 page table counter * untouched, so the table (strictly speaking a page which holds it) * is never freed if promoted. * * If a page m->wire_count == 1 then no valid mappings exist in any L2 page * table in the page and the page itself is only mapped in PT2TAB. */ static __inline void pt2_wirecount_init(vm_page_t m) { u_int i; /* * Note: A page m is allocated with VM_ALLOC_WIRED flag and * m->wire_count should be already set correctly. * So, there is no need to set it again herein. */ for (i = 0; i < NPT2_IN_PG; i++) m->md.pt2_wirecount[i] = 0; } static __inline void pt2_wirecount_inc(vm_page_t m, uint32_t pte1_idx) { /* * Note: A just modificated pte2 (i.e. already allocated) * is acquiring one extra reference which must be * explicitly cleared. It influences the KASSERTs herein. * All L2 page tables in a page always belong to the same * pmap, so we allow only one extra reference for the page. */ KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] < (NPTE2_IN_PT2 + 1), ("%s: PT2 is overflowing ...", __func__)); KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), ("%s: PT2PG is overflowing ...", __func__)); m->wire_count++; m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]++; } static __inline void pt2_wirecount_dec(vm_page_t m, uint32_t pte1_idx) { KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] != 0, ("%s: PT2 is underflowing ...", __func__)); KASSERT(m->wire_count > 1, ("%s: PT2PG is underflowing ...", __func__)); m->wire_count--; m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]--; } static __inline void pt2_wirecount_set(vm_page_t m, uint32_t pte1_idx, uint16_t count) { KASSERT(count <= NPTE2_IN_PT2, ("%s: invalid count %u", __func__, count)); KASSERT(m->wire_count > m->md.pt2_wirecount[pte1_idx & PT2PG_MASK], ("%s: PT2PG corrupting (%u, %u) ...", __func__, m->wire_count, m->md.pt2_wirecount[pte1_idx & PT2PG_MASK])); m->wire_count -= m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]; m->wire_count += count; m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] = count; KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), ("%s: PT2PG is overflowed (%u) ...", __func__, m->wire_count)); } static __inline uint32_t pt2_wirecount_get(vm_page_t m, uint32_t pte1_idx) { return (m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]); } static __inline boolean_t pt2_is_empty(vm_page_t m, vm_offset_t va) { return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == 0); } static __inline boolean_t pt2_is_full(vm_page_t m, vm_offset_t va) { return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == NPTE2_IN_PT2); } static __inline boolean_t pt2pg_is_empty(vm_page_t m) { return (m->wire_count == 1); } /* * This routine is called if the L2 page table * is not mapped correctly. */ static vm_page_t _pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) { uint32_t pte1_idx; pt1_entry_t *pte1p; pt2_entry_t pte2; vm_page_t m; vm_paddr_t pt2pg_pa, pt2_pa; pte1_idx = pte1_index(va); pte1p = pmap->pm_pt1 + pte1_idx; KASSERT(pte1_load(pte1p) == 0, ("%s: pm_pt1[%#x] is not zero: %#x", __func__, pte1_idx, pte1_load(pte1p))); pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, va)); if (!pte2_is_valid(pte2)) { /* * Install new PT2s page into pmap PT2TAB. */ m = vm_page_alloc(NULL, pte1_idx & ~PT2PG_MASK, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) { if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); VM_WAIT; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, * the L2 page table page may have been allocated. */ return (NULL); } pmap->pm_stats.resident_count++; pt2pg_pa = pmap_pt2pg_init(pmap, va, m); } else { pt2pg_pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pt2pg_pa); } pt2_wirecount_inc(m, pte1_idx); pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); pte1_store(pte1p, PTE1_LINK(pt2_pa)); return (m); } static vm_page_t pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) { u_int pte1_idx; pt1_entry_t *pte1p, pte1; vm_page_t m; pte1_idx = pte1_index(va); retry: pte1p = pmap->pm_pt1 + pte1_idx; pte1 = pte1_load(pte1p); /* * This supports switching from a 1MB page to a * normal 4K page. */ if (pte1_is_section(pte1)) { (void)pmap_demote_pte1(pmap, pte1p, va); /* * Reload pte1 after demotion. * * Note: Demotion can even fail as either PT2 is not find for * the virtual address or PT2PG can not be allocated. */ pte1 = pte1_load(pte1p); } /* * If the L2 page table page is mapped, we just increment the * hold count, and activate it. */ if (pte1_is_link(pte1)) { m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); pt2_wirecount_inc(m, pte1_idx); } else { /* * Here if the PT2 isn't mapped, or if it has * been deallocated. */ m = _pmap_allocpte2(pmap, va, flags); if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) goto retry; } return (m); } static __inline void pmap_free_zero_pages(struct spglist *free) { vm_page_t m; while ((m = SLIST_FIRST(free)) != NULL) { SLIST_REMOVE_HEAD(free, plinks.s.ss); /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused L2 page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free) { /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ #ifdef PMAP_DEBUG pmap_zero_page_check(m); #endif m->flags |= PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Unwire L2 page tables page. */ static void pmap_unwire_pt2pg(pmap_t pmap, vm_offset_t va, vm_page_t m) { pt1_entry_t *pte1p, opte1 __unused; pt2_entry_t *pte2p; uint32_t i; KASSERT(pt2pg_is_empty(m), ("%s: pmap %p PT2PG %p wired", __func__, pmap, m)); /* * Unmap all L2 page tables in the page from L1 page table. * * QQQ: Individual L2 page tables (except the last one) can be unmapped * earlier. However, we are doing that this way. */ KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), ("%s: pmap %p va %#x PT2PG %p bad index", __func__, pmap, va, m)); pte1p = pmap->pm_pt1 + m->pindex; for (i = 0; i < NPT2_IN_PG; i++, pte1p++) { KASSERT(m->md.pt2_wirecount[i] == 0, ("%s: pmap %p PT2 %u (PG %p) wired", __func__, pmap, i, m)); opte1 = pte1_load(pte1p); if (pte1_is_link(opte1)) { pte1_clear(pte1p); /* * Flush intermediate TLB cache. */ pmap_tlb_flush(pmap, (m->pindex + i) << PTE1_SHIFT); } #ifdef INVARIANTS else KASSERT((opte1 == 0) || pte1_is_section(opte1), ("%s: pmap %p va %#x bad pte1 %x at %u", __func__, pmap, va, opte1, i)); #endif } /* * Unmap the page from PT2TAB. */ pte2p = pmap_pt2tab_entry(pmap, va); (void)pt2tab_load_clear(pte2p); pmap_tlb_flush(pmap, pt2map_pt2pg(va)); m->wire_count = 0; pmap->pm_stats.resident_count--; /* * This is a release store so that the ordinary store unmapping * the L2 page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); } /* * Decrements a L2 page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static __inline boolean_t pmap_unwire_pt2(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { pt2_wirecount_dec(m, pte1_index(va)); if (pt2pg_is_empty(m)) { /* * QQQ: Wire count is zero, so whole page should be zero and * we can set PG_ZERO flag to it. * Note that when promotion is enabled, it takes some * more efforts. See pmap_unwire_pt2_all() below. */ pmap_unwire_pt2pg(pmap, va, m); pmap_add_delayed_free_list(m, free); return (TRUE); } else return (FALSE); } /* * Drop a L2 page table page's wire count at once, which is used to record * the number of valid L2 page table entries within the page. If the wire * count drops to zero, then the L2 page table page is unmapped. */ static __inline void pmap_unwire_pt2_all(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { u_int pte1_idx = pte1_index(va); KASSERT(m->pindex == (pte1_idx & ~PT2PG_MASK), ("%s: PT2 page's pindex is wrong", __func__)); KASSERT(m->wire_count > pt2_wirecount_get(m, pte1_idx), ("%s: bad pt2 wire count %u > %u", __func__, m->wire_count, pt2_wirecount_get(m, pte1_idx))); /* * It's possible that the L2 page table was never used. * It happened in case that a section was created without promotion. */ if (pt2_is_full(m, va)) { pt2_wirecount_set(m, pte1_idx, 0); /* * QQQ: We clear L2 page table now, so when L2 page table page * is going to be freed, we can set it PG_ZERO flag ... * This function is called only on section mappings, so * hopefully it's not to big overload. * * XXX: If pmap is current, existing PT2MAP mapping could be * used for zeroing. */ pmap_zero_page_area(m, page_pt2off(pte1_idx), NB_IN_PT2); } #ifdef INVARIANTS else KASSERT(pt2_is_empty(m, va), ("%s: PT2 is not empty (%u)", __func__, pt2_wirecount_get(m, pte1_idx))); #endif if (pt2pg_is_empty(m)) { pmap_unwire_pt2pg(pmap, va, m); pmap_add_delayed_free_list(m, free); } } /* * After removing a L2 page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static boolean_t pmap_unuse_pt2(pmap_t pmap, vm_offset_t va, struct spglist *free) { pt1_entry_t pte1; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (FALSE); pte1 = pte1_load(pmap_pte1(pmap, va)); mpte = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); return (pmap_unwire_pt2(pmap, va, mpte, free)); } /************************************* * * Page management routines. * *************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 11); CTASSERT(_NPCPV == 336); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ #define PC_FREE10 0x0000fffful /* Free values for index 10 */ static const uint32_t pc_freemask[_NPCM] = { PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE10 }; SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif /* * Is given page managed? */ static __inline boolean_t is_managed(vm_paddr_t pa) { vm_offset_t pgnum; vm_page_t m; pgnum = atop(pa); if (pgnum >= first_page) { m = PHYS_TO_VM_PAGE(pa); if (m == NULL) return (FALSE); if ((m->oflags & VPO_UNMANAGED) == 0) return (TRUE); } return (FALSE); } static __inline boolean_t pte1_is_managed(pt1_entry_t pte1) { return (is_managed(pte1_pa(pte1))); } static __inline boolean_t pte2_is_managed(pt2_entry_t pte2) { return (is_managed(pte2_pa(pte2))); } /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. */ static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap) { struct pch newtail; struct pv_chunk *pc; struct md_page *pvh; pt1_entry_t *pte1p; pmap_t pmap; pt2_entry_t *pte2p, tpte2; pv_entry_t pv; vm_offset_t va; vm_page_t m, m_pc; struct spglist free; uint32_t inuse; int bit, field, freed; PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); pmap = NULL; m_pc = NULL; SLIST_INIT(&free); TAILQ_INIT(&newtail); while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || SLIST_EMPTY(&free))) { TAILQ_REMOVE(&pv_chunks, pc, pc_lru); if (pmap != pc->pc_pmap) { if (pmap != NULL) { if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { pmap = NULL; TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } } /* * Destroy every non-wired, 4 KB page mapping in the chunk. */ freed = 0; for (field = 0; field < _NPCM; field++) { for (inuse = ~pc->pc_map[field] & pc_freemask[field]; inuse != 0; inuse &= ~(1UL << bit)) { bit = ffs(inuse) - 1; pv = &pc->pc_pventry[field * 32 + bit]; va = pv->pv_va; pte1p = pmap_pte1(pmap, va); if (pte1_is_section(pte1_load(pte1p))) continue; pte2p = pmap_pte2(pmap, va); tpte2 = pte2_load(pte2p); if ((tpte2 & PTE2_W) == 0) tpte2 = pte2_load_clear(pte2p); pmap_pte2_release(pte2p); if ((tpte2 & PTE2_W) != 0) continue; KASSERT(tpte2 != 0, ("pmap_pv_reclaim: pmap %p va %#x zero pte", pmap, va)); pmap_tlb_flush(pmap, va); m = PHYS_TO_VM_PAGE(pte2_pa(tpte2)); if (pte2_is_dirty(tpte2)) vm_page_dirty(m); if ((tpte2 & PTE2_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) { vm_page_aflag_clear(m, PGA_WRITEABLE); } } pc->pc_map[field] |= 1UL << bit; pmap_unuse_pt2(pmap, va, &free); freed++; } } if (freed == 0) { TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } /* Every freed mapping is for a 4 KB page. */ pmap->pm_stats.resident_count -= freed; PV_STAT(pv_entry_frees += freed); PV_STAT(pv_entry_spare += freed); pv_entry_count -= freed; TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != pc_freemask[field]) { TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); /* * One freed pv entry in locked_pmap is * sufficient. */ if (pmap == locked_pmap) goto out; break; } if (field == _NPCM) { PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* Entire chunk is free; return it. */ m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); break; } } out: TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); if (pmap != NULL) { if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) { m_pc = SLIST_FIRST(&free); SLIST_REMOVE_HEAD(&free, plinks.s.ss); /* Recycle a freed page table page. */ m_pc->wire_count = 1; atomic_add_int(&vm_cnt.v_wire_count, 1); } pmap_free_zero_pages(&free); return (m_pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, PQ_NONE); vm_page_free(m); pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); } /* * Free the pv_entry back to the free list. */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 32; bit = idx % 32; pc->pc_map[field] |= 1ul << bit; for (idx = 0; idx < _NPCM; idx++) if (pc->pc_map[idx] != pc_freemask[idx]) { /* * 98% of the time, pc is already at the head of the * list. If it isn't already, move it to the head. */ if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != pc)) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } /* * Get a new pv_entry, allocating a block from the system * when needed. */ static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) if (ratecheck(&lastprint, &printinterval)) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entry_max tunable.\n"); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = ffs(pc->pc_map[field]) - 1; break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 32 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != 0) { PV_STAT(pv_entry_spare--); return (pv); /* not full, return */ } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare--); return (pv); } } /* * Access to the pte2list "pv_vafree" is synchronized by the pvh * global lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_pte2list_alloc() completes. */ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } m = pmap_pv_reclaim(pmap); if (m == NULL) goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); pc = (struct pv_chunk *)pmap_pte2list_alloc(&pv_vafree); pmap_qenter((vm_offset_t)pc, &m, 1); pc->pc_pmap = pmap; pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); return (pv); } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } return (pv); } static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } static void pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { struct md_page *pvh; rw_assert(&pvh_global_lock, RA_WLOCKED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } static void pmap_pv_demote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PTE1_OFFSET) == 0, ("pmap_pv_demote_pte1: pa is not 1mpage aligned")); /* * Transfer the 1mpage's pv entry for this mapping to the first * page's pv list. */ pvh = pa_to_pvh(pa); va = pte1_trunc(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pte1: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTE2_IN_PT2 - 1 pv entries. */ va_last = va + PTE1_SIZE - PAGE_SIZE; do { m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_pte1: page %p is not managed", m)); va += PAGE_SIZE; pmap_insert_entry(pmap, va, m); } while (va < va_last); } static void pmap_pv_promote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PTE1_OFFSET) == 0, ("pmap_pv_promote_pte1: pa is not 1mpage aligned")); /* * Transfer the first page's pv entry for this mapping to the * 1mpage's pv list. Aside from avoiding the cost of a call * to get_pv_entry(), a transfer avoids the possibility that * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim() * removes one of the mappings that is being promoted. */ m = PHYS_TO_VM_PAGE(pa); va = pte1_trunc(va); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pte1: pv not found")); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTE2_IN_PT2 - 1 pv entries. */ va_last = va + PTE1_SIZE - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } /* * Conditionally create a pv entry. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); } /* * Create the pv entries for each of the pages within a section. */ static boolean_t pmap_pv_insert_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); } /* * Tries to promote the NPTE2_IN_PT2, contiguous 4KB page mappings that are * within a single page table page (PT2) to a single 1MB page mapping. * For promotion to occur, two conditions must be met: (1) the 4KB page * mappings must map aligned, contiguous physical memory and (2) the 4KB page * mappings must have identical characteristics. * * Managed (PG_MANAGED) mappings within the kernel address space are not * promoted. The reason is that kernel PTE1s are replicated in each pmap but * pmap_remove_write(), pmap_clear_modify(), and pmap_clear_reference() only * read the PTE1 from the kernel pmap. */ static void pmap_promote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) { pt1_entry_t npte1; pt2_entry_t *fpte2p, fpte2, fpte2_fav; pt2_entry_t *pte2p, pte2; vm_offset_t pteva __unused; vm_page_t m __unused; PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, pmap, va, pte1_load(pte1p), pte1p)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Examine the first PTE2 in the specified PT2. Abort if this PTE2 is * either invalid, unused, or does not map the first 4KB physical page * within a 1MB page. */ fpte2p = pmap_pte2_quick(pmap, pte1_trunc(va)); setpte1: fpte2 = pte2_load(fpte2p); if ((fpte2 & ((PTE2_FRAME & PTE1_OFFSET) | PTE2_A | PTE2_V)) != (PTE2_A | PTE2_V)) { pmap_pte1_p_failures++; CTR3(KTR_PMAP, "%s: failure(1) for va %#x in pmap %p", __func__, va, pmap); return; } if (pte2_is_managed(fpte2) && pmap == kernel_pmap) { pmap_pte1_p_failures++; CTR3(KTR_PMAP, "%s: failure(2) for va %#x in pmap %p", __func__, va, pmap); return; } if ((fpte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { /* * When page is not modified, PTE2_RO can be set without * a TLB invalidation. * * Note: When modified bit is being set, then in hardware case, * the TLB entry is re-read (updated) from PT2, and in * software case (abort), the PTE2 is read from PT2 and * TLB flushed if changed. The following cmpset() solves * any race with setting this bit in both cases. */ if (!pte2_cmpset(fpte2p, fpte2, fpte2 | PTE2_RO)) goto setpte1; fpte2 |= PTE2_RO; } /* * Examine each of the other PTE2s in the specified PT2. Abort if this * PTE2 maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE2. */ fpte2_fav = (fpte2 & (PTE2_FRAME | PTE2_A | PTE2_V)); fpte2_fav += PTE1_SIZE - PTE2_SIZE; /* examine from the end */ for (pte2p = fpte2p + NPTE2_IN_PT2 - 1; pte2p > fpte2p; pte2p--) { setpte2: pte2 = pte2_load(pte2p); if ((pte2 & (PTE2_FRAME | PTE2_A | PTE2_V)) != fpte2_fav) { pmap_pte1_p_failures++; CTR3(KTR_PMAP, "%s: failure(3) for va %#x in pmap %p", __func__, va, pmap); return; } if ((pte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { /* * When page is not modified, PTE2_RO can be set * without a TLB invalidation. See note above. */ if (!pte2_cmpset(pte2p, pte2, pte2 | PTE2_RO)) goto setpte2; pte2 |= PTE2_RO; pteva = pte1_trunc(va) | (pte2 & PTE1_OFFSET & PTE2_FRAME); CTR3(KTR_PMAP, "%s: protect for va %#x in pmap %p", __func__, pteva, pmap); } if ((pte2 & PTE2_PROMOTE) != (fpte2 & PTE2_PROMOTE)) { pmap_pte1_p_failures++; CTR3(KTR_PMAP, "%s: failure(4) for va %#x in pmap %p", __func__, va, pmap); return; } fpte2_fav -= PTE2_SIZE; } /* * The page table page in its current state will stay in PT2TAB * until the PTE1 mapping the section is demoted by pmap_demote_pte1() * or destroyed by pmap_remove_pte1(). * * Note that L2 page table size is not equal to PAGE_SIZE. */ m = PHYS_TO_VM_PAGE(trunc_page(pte1_link_pa(pte1_load(pte1p)))); KASSERT(m >= vm_page_array && m < &vm_page_array[vm_page_array_size], ("%s: PT2 page is out of range", __func__)); KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), ("%s: PT2 page's pindex is wrong", __func__)); /* * Get pte1 from pte2 format. */ npte1 = (fpte2 & PTE1_FRAME) | ATTR_TO_L1(fpte2) | PTE1_V; /* * Promote the pv entries. */ if (pte2_is_managed(fpte2)) pmap_pv_promote_pte1(pmap, va, pte1_pa(npte1)); /* * Map the section. */ if (pmap == kernel_pmap) pmap_kenter_pte1(va, npte1); else pte1_store(pte1p, npte1); /* * Flush old small mappings. We call single pmap_tlb_flush() in * pmap_demote_pte1() and pmap_remove_pte1(), so we must be sure that * no small mappings survive. We assume that given pmap is current and * don't play game with PTE2_NG. */ pmap_tlb_flush_range(pmap, pte1_trunc(va), PTE1_SIZE); pmap_pte1_promotions++; CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", __func__, va, pmap); PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); } /* * Zero L2 page table page. */ static __inline void pmap_clear_pt2(pt2_entry_t *fpte2p) { pt2_entry_t *pte2p; for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) pte2_clear(pte2p); } /* * Removes a 1MB page mapping from the kernel pmap. */ static void pmap_remove_kernel_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) { vm_page_t m; uint32_t pte1_idx; pt2_entry_t *fpte2p; vm_paddr_t pt2_pa; PMAP_LOCK_ASSERT(pmap, MA_OWNED); m = pmap_pt2_page(pmap, va); if (m == NULL) /* * QQQ: Is this function called only on promoted pte1? * We certainly do section mappings directly * (without promotion) in kernel !!! */ panic("%s: missing pt2 page", __func__); pte1_idx = pte1_index(va); /* * Initialize the L2 page table. */ fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); pmap_clear_pt2(fpte2p); /* * Remove the mapping. */ pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(m), pte1_idx); pmap_kenter_pte1(va, PTE1_LINK(pt2_pa)); /* * QQQ: We do not need to invalidate PT2MAP mapping * as we did not change it. I.e. the L2 page table page * was and still is mapped the same way. */ } /* * Do the things to unmap a section in a process */ static void pmap_remove_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, struct spglist *free) { pt1_entry_t opte1; struct md_page *pvh; vm_offset_t eva, va; vm_page_t m; PDEBUG(6, printf("%s(%p): va %#x pte1 %#x at %p\n", __func__, pmap, sva, pte1_load(pte1p), pte1p)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PTE1_OFFSET) == 0, ("%s: sva is not 1mpage aligned", __func__)); /* * Clear and invalidate the mapping. It should occupy one and only TLB * entry. So, pmap_tlb_flush() called with aligned address should be * sufficient. */ opte1 = pte1_load_clear(pte1p); pmap_tlb_flush(pmap, sva); if (pte1_is_wired(opte1)) pmap->pm_stats.wired_count -= PTE1_SIZE / PAGE_SIZE; pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; if (pte1_is_managed(opte1)) { pvh = pa_to_pvh(pte1_pa(opte1)); pmap_pvh_free(pvh, pmap, sva); eva = sva + PTE1_SIZE; for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); va < eva; va += PAGE_SIZE, m++) { if (pte1_is_dirty(opte1)) vm_page_dirty(m); if (opte1 & PTE1_A) vm_page_aflag_set(m, PGA_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } if (pmap == kernel_pmap) { /* * L2 page table(s) can't be removed from kernel map as * kernel counts on it (stuff around pmap_growkernel()). */ pmap_remove_kernel_pte1(pmap, pte1p, sva); } else { /* * Get associated L2 page table page. * It's possible that the page was never allocated. */ m = pmap_pt2_page(pmap, sva); if (m != NULL) pmap_unwire_pt2_all(pmap, sva, m, free); } } /* * Fills L2 page table page with mappings to consecutive physical pages. */ static __inline void pmap_fill_pt2(pt2_entry_t *fpte2p, pt2_entry_t npte2) { pt2_entry_t *pte2p; for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) { pte2_store(pte2p, npte2); npte2 += PTE2_SIZE; } } /* * Tries to demote a 1MB page mapping. If demotion fails, the * 1MB page mapping is invalidated. */ static boolean_t pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) { pt1_entry_t opte1, npte1; pt2_entry_t *fpte2p, npte2; vm_paddr_t pt2pg_pa, pt2_pa; vm_page_t m; struct spglist free; uint32_t pte1_idx, isnew = 0; PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, pmap, va, pte1_load(pte1p), pte1p)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); opte1 = pte1_load(pte1p); KASSERT(pte1_is_section(opte1), ("%s: opte1 not a section", __func__)); if ((opte1 & PTE1_A) == 0 || (m = pmap_pt2_page(pmap, va)) == NULL) { KASSERT(!pte1_is_wired(opte1), ("%s: PT2 page for a wired mapping is missing", __func__)); /* * Invalidate the 1MB page mapping and return * "failure" if the mapping was never accessed or the * allocation of the new page table page fails. */ if ((opte1 & PTE1_A) == 0 || (m = vm_page_alloc(NULL, pte1_index(va) & ~PT2PG_MASK, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); pmap_remove_pte1(pmap, pte1p, pte1_trunc(va), &free); pmap_free_zero_pages(&free); CTR3(KTR_PMAP, "%s: failure for va %#x in pmap %p", __func__, va, pmap); return (FALSE); } if (va < VM_MAXUSER_ADDRESS) pmap->pm_stats.resident_count++; isnew = 1; /* * We init all L2 page tables in the page even if * we are going to change everything for one L2 page * table in a while. */ pt2pg_pa = pmap_pt2pg_init(pmap, va, m); } else { if (va < VM_MAXUSER_ADDRESS) { if (pt2_is_empty(m, va)) isnew = 1; /* Demoting section w/o promotion. */ #ifdef INVARIANTS else KASSERT(pt2_is_full(m, va), ("%s: bad PT2 wire" " count %u", __func__, pt2_wirecount_get(m, pte1_index(va)))); #endif } } pt2pg_pa = VM_PAGE_TO_PHYS(m); pte1_idx = pte1_index(va); /* * If the pmap is current, then the PT2MAP can provide access to * the page table page (promoted L2 page tables are not unmapped). * Otherwise, temporarily map the L2 page table page (m) into * the kernel's address space at either PADDR1 or PADDR2. * * Note that L2 page table size is not equal to PAGE_SIZE. */ if (pmap_is_current(pmap)) fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) { if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif tlb_flush_local((vm_offset_t)PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); tlb_flush_local((vm_offset_t)PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; fpte2p = page_pt2((vm_offset_t)PADDR1, pte1_idx); } else { mtx_lock(&PMAP2mutex); if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); tlb_flush((vm_offset_t)PADDR2); } fpte2p = page_pt2((vm_offset_t)PADDR2, pte1_idx); } pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); npte1 = PTE1_LINK(pt2_pa); KASSERT((opte1 & PTE1_A) != 0, ("%s: opte1 is missing PTE1_A", __func__)); KASSERT((opte1 & (PTE1_NM | PTE1_RO)) != PTE1_NM, ("%s: opte1 has PTE1_NM", __func__)); /* * Get pte2 from pte1 format. */ npte2 = pte1_pa(opte1) | ATTR_TO_L2(opte1) | PTE2_V; /* * If the L2 page table page is new, initialize it. If the mapping * has changed attributes, update the page table entries. */ if (isnew != 0) { pt2_wirecount_set(m, pte1_idx, NPTE2_IN_PT2); pmap_fill_pt2(fpte2p, npte2); } else if ((pte2_load(fpte2p) & PTE2_PROMOTE) != (npte2 & PTE2_PROMOTE)) pmap_fill_pt2(fpte2p, npte2); KASSERT(pte2_pa(pte2_load(fpte2p)) == pte2_pa(npte2), ("%s: fpte2p and npte2 map different physical addresses", __func__)); if (fpte2p == PADDR2) mtx_unlock(&PMAP2mutex); /* * Demote the mapping. This pmap is locked. The old PTE1 has * PTE1_A set. If the old PTE1 has not PTE1_RO set, it also * has not PTE1_NM set. Thus, there is no danger of a race with * another processor changing the setting of PTE1_A and/or PTE1_NM * between the read above and the store below. */ if (pmap == kernel_pmap) pmap_kenter_pte1(va, npte1); else pte1_store(pte1p, npte1); /* * Flush old big mapping. The mapping should occupy one and only * TLB entry. So, pmap_tlb_flush() called with aligned address * should be sufficient. */ pmap_tlb_flush(pmap, pte1_trunc(va)); /* * Demote the pv entry. This depends on the earlier demotion * of the mapping. Specifically, the (re)creation of a per- * page pv entry might trigger the execution of pmap_pv_reclaim(), * which might reclaim a newly (re)created per-page pv entry * and destroy the associated mapping. In order to destroy * the mapping, the PTE1 must have already changed from mapping * the 1mpage to referencing the page table page. */ if (pte1_is_managed(opte1)) pmap_pv_demote_pte1(pmap, va, pte1_pa(opte1)); pmap_pte1_demotions++; CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", __func__, va, pmap); PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); return (TRUE); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { pt1_entry_t *pte1p; pt2_entry_t *pte2p; pt2_entry_t npte2, opte2; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte2, om; boolean_t wired; va = trunc_page(va); mpte2 = NULL; wired = (flags & PMAP_ENTER_WIRED) != 0; KASSERT(va <= vm_max_kernel_address, ("%s: toobig", __func__)); KASSERT(va < UPT2V_MIN_ADDRESS || va >= UPT2V_MAX_ADDRESS, ("%s: invalid to pmap_enter page table pages (va: 0x%x)", __func__, va)); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { mpte2 = pmap_allocpte2(pmap, va, flags); if (mpte2 == NULL) { KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, ("pmap_allocpte2 failed with sleep allowed")); sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } } pte1p = pmap_pte1(pmap, va); if (pte1_is_section(pte1_load(pte1p))) panic("%s: attempted on 1MB page", __func__); pte2p = pmap_pte2_quick(pmap, va); if (pte2p == NULL) panic("%s: invalid L1 page table entry va=%#x", __func__, va); om = NULL; pa = VM_PAGE_TO_PHYS(m); opte2 = pte2_load(pte2p); opa = pte2_pa(opte2); /* * Mapping has not changed, must be protection or wiring change. */ if (pte2_is_valid(opte2) && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT2 pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT2 page will be also. */ if (wired && !pte2_is_wired(opte2)) pmap->pm_stats.wired_count++; else if (!wired && pte2_is_wired(opte2)) pmap->pm_stats.wired_count--; /* * Remove extra pte2 reference */ if (mpte2) pt2_wirecount_dec(mpte2, pte1_index(va)); if (pte2_is_managed(opte2)) om = m; goto validate; } /* * QQQ: We think that changing physical address on writeable mapping * is not safe. Well, maybe on kernel address space with correct * locking, it can make a sense. However, we have no idea why * anyone should do that on user address space. Are we wrong? */ KASSERT((opa == 0) || (opa == pa) || !pte2_is_valid(opte2) || ((opte2 & PTE2_RO) != 0), ("%s: pmap %p va %#x(%#x) opa %#x pa %#x - gotcha %#x %#x!", __func__, pmap, va, opte2, opa, pa, flags, prot)); pv = NULL; /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { if (pte2_is_wired(opte2)) pmap->pm_stats.wired_count--; if (pte2_is_managed(opte2)) { om = PHYS_TO_VM_PAGE(opa); pv = pmap_pvh_remove(&om->md, pmap, va); } /* * Remove extra pte2 reference */ if (mpte2 != NULL) pt2_wirecount_dec(mpte2, va >> PTE1_SHIFT); } else pmap->pm_stats.resident_count++; /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("%s: managed mapping within the clean submap", __func__)); if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } else if (pv != NULL) free_pv_entry(pmap, pv); /* * Increment counters */ if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. */ npte2 = PTE2(pa, PTE2_NM, vm_page_pte2_attr(m)); if (prot & VM_PROT_WRITE) { if (pte2_is_managed(npte2)) vm_page_aflag_set(m, PGA_WRITEABLE); } else npte2 |= PTE2_RO; if ((prot & VM_PROT_EXECUTE) == 0) npte2 |= PTE2_NX; if (wired) npte2 |= PTE2_W; if (va < VM_MAXUSER_ADDRESS) npte2 |= PTE2_U; if (pmap != kernel_pmap) npte2 |= PTE2_NG; /* * If the mapping or permission bits are different, we need * to update the pte2. * * QQQ: Think again and again what to do * if the mapping is going to be changed! */ if ((opte2 & ~(PTE2_NM | PTE2_A)) != (npte2 & ~(PTE2_NM | PTE2_A))) { /* * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA * is set. Do it now, before the mapping is stored and made * valid for hardware table walk. If done later, there is a race * for other threads of current process in lazy loading case. * Don't do it for kernel memory which is mapped with exec * permission even if the memory isn't going to hold executable * code. The only time when icache sync is needed is after * kernel module is loaded and the relocation info is processed. * And it's done in elf_cpu_load_file(). * * QQQ: (1) Does it exist any better way where * or how to sync icache? * (2) Now, we do it on a page basis. */ if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && m->md.pat_mode == VM_MEMATTR_WB_WA && (opa != pa || (opte2 & PTE2_NX))) cache_icache_sync_fresh(va, pa, PAGE_SIZE); npte2 |= PTE2_A; if (flags & VM_PROT_WRITE) npte2 &= ~PTE2_NM; if (opte2 & PTE2_V) { /* Change mapping with break-before-make approach. */ opte2 = pte2_load_clear(pte2p); pmap_tlb_flush(pmap, va); pte2_store(pte2p, npte2); if (opte2 & PTE2_A) { if (pte2_is_managed(opte2)) vm_page_aflag_set(om, PGA_REFERENCED); } if (pte2_is_dirty(opte2)) { if (pte2_is_managed(opte2)) vm_page_dirty(om); } if (pte2_is_managed(opte2) && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) vm_page_aflag_clear(om, PGA_WRITEABLE); } else pte2_store(pte2p, npte2); } #if 0 else { /* * QQQ: In time when both access and not mofified bits are * emulated by software, this should not happen. Some * analysis is need, if this really happen. Missing * tlb flush somewhere could be the reason. */ panic("%s: pmap %p va %#x opte2 %x npte2 %x !!", __func__, pmap, va, opte2, npte2); } #endif /* * If both the L2 page table page and the reservation are fully * populated, then attempt promotion. */ if ((mpte2 == NULL || pt2_is_full(mpte2, va)) && sp_enabled && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pte1(pmap, pte1p, va); sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } /* * Do the things to unmap a page in a process. */ static int pmap_remove_pte2(pmap_t pmap, pt2_entry_t *pte2p, vm_offset_t va, struct spglist *free) { pt2_entry_t opte2; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Clear and invalidate the mapping. */ opte2 = pte2_load_clear(pte2p); pmap_tlb_flush(pmap, va); KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %#x not link pte2 %#x", __func__, pmap, va, opte2)); if (opte2 & PTE2_W) pmap->pm_stats.wired_count -= 1; pmap->pm_stats.resident_count -= 1; if (pte2_is_managed(opte2)) { m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); if (pte2_is_dirty(opte2)) vm_page_dirty(m); if (opte2 & PTE2_A) vm_page_aflag_set(m, PGA_REFERENCED); pmap_remove_entry(pmap, m, va); } return (pmap_unuse_pt2(pmap, va, free)); } /* * Remove a single page from a process address space. */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free) { pt2_entry_t *pte2p; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("%s: curthread not pinned", __func__)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((pte2p = pmap_pte2_quick(pmap, va)) == NULL || !pte2_is_valid(pte2_load(pte2p))) return; pmap_remove_pte2(pmap, pte2p, va, free); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t nextva; pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; struct spglist free; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); PMAP_LOCK(pmap); /* * Special handling of removing one page. A very common * operation and easy to short circuit some code. */ if (sva + PAGE_SIZE == eva) { pte1 = pte1_load(pmap_pte1(pmap, sva)); if (pte1_is_link(pte1)) { pmap_remove_page(pmap, sva, &free); goto out; } } for (; sva < eva; sva = nextva) { /* * Calculate address for next L2 page table. */ nextva = pte1_trunc(sva + PTE1_SIZE); if (nextva < sva) nextva = eva; if (pmap->pm_stats.resident_count == 0) break; pte1p = pmap_pte1(pmap, sva); pte1 = pte1_load(pte1p); /* * Weed out invalid mappings. Note: we assume that the L1 page * table is always allocated, and in kernel virtual. */ if (pte1 == 0) continue; if (pte1_is_section(pte1)) { /* * Are we removing the entire large page? If not, * demote the mapping and fall through. */ if (sva + PTE1_SIZE == nextva && eva >= nextva) { pmap_remove_pte1(pmap, pte1p, sva, &free); continue; } else if (!pmap_demote_pte1(pmap, pte1p, sva)) { /* The large page mapping was destroyed. */ continue; } #ifdef INVARIANTS else { /* Update pte1 after demotion. */ pte1 = pte1_load(pte1p); } #endif } KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" " is not link", __func__, pmap, sva, pte1, pte1p)); /* * Limit our scan to either the end of the va represented * by the current L2 page table page, or to the end of the * range being removed. */ if (nextva > eva) nextva = eva; for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, sva += PAGE_SIZE) { pte2 = pte2_load(pte2p); if (!pte2_is_valid(pte2)) continue; if (pmap_remove_pte2(pmap, pte2p, sva, &free)) break; } } out: sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; pmap_t pmap; pt2_entry_t *pte2p, opte2; pt1_entry_t *pte1p; vm_offset_t va; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, va); (void)pmap_demote_pte1(pmap, pte1p, va); PMAP_UNLOCK(pmap); } small_mappings: while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap->pm_stats.resident_count--; pte1p = pmap_pte1(pmap, pv->pv_va); KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found " "a 1mpage in page %p's pv list", __func__, m)); pte2p = pmap_pte2_quick(pmap, pv->pv_va); opte2 = pte2_load_clear(pte2p); pmap_tlb_flush(pmap, pv->pv_va); KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %x zero pte2", __func__, pmap, pv->pv_va)); if (pte2_is_wired(opte2)) pmap->pm_stats.wired_count--; if (opte2 & PTE2_A) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if (pte2_is_dirty(opte2)) vm_page_dirty(m); pmap_unuse_pt2(pmap, pv->pv_va, &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); pmap_free_zero_pages(&free); } /* * Just subroutine for pmap_remove_pages() to reasonably satisfy * good coding style, a.k.a. 80 character line width limit hell. */ static __inline void pmap_remove_pte1_quick(pmap_t pmap, pt1_entry_t pte1, pv_entry_t pv, struct spglist *free) { vm_paddr_t pa; vm_page_t m, mt, mpt2pg; struct md_page *pvh; pa = pte1_pa(pte1); m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", __func__, m, m->phys_addr, pa)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("%s: bad pte1 %#x", __func__, pte1)); if (pte1_is_dirty(pte1)) { for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) vm_page_dirty(mt); } pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; pvh = pa_to_pvh(pa); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpt2pg = pmap_pt2_page(pmap, pv->pv_va); if (mpt2pg != NULL) pmap_unwire_pt2_all(pmap, pv->pv_va, mpt2pg, free); } /* * Just subroutine for pmap_remove_pages() to reasonably satisfy * good coding style, a.k.a. 80 character line width limit hell. */ static __inline void pmap_remove_pte2_quick(pmap_t pmap, pt2_entry_t pte2, pv_entry_t pv, struct spglist *free) { vm_paddr_t pa; vm_page_t m; struct md_page *pvh; pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", __func__, m, m->phys_addr, pa)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("%s: bad pte2 %#x", __func__, pte2)); if (pte2_is_dirty(pte2)) vm_page_dirty(m); pmap->pm_stats.resident_count--; TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(pa); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } pmap_unuse_pt2(pmap, pv->pv_va, free); } /* * Remove all pages from specified address space this aids process * exit speeds. Also, this code is special cased for current process * only, but can have the more generic (and slightly slower) mode enabled. * This is much faster than pmap_remove in the case of running down * an entire address space. */ void pmap_remove_pages(pmap_t pmap) { pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; pv_entry_t pv; struct pv_chunk *pc, *npc; struct spglist free; int field, idx; int32_t bit; uint32_t inuse, bitmask; boolean_t allfree; /* * Assert that the given pmap is only active on the current * CPU. Unfortunately, we cannot block another CPU from * activating the pmap while this function is executing. */ KASSERT(pmap == vmspace_pmap(curthread->td_proc->p_vmspace), ("%s: non-current pmap %p", __func__, pmap)); #if defined(SMP) && defined(INVARIANTS) { cpuset_t other_cpus; sched_pin(); other_cpus = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &other_cpus); sched_unpin(); KASSERT(CPU_EMPTY(&other_cpus), ("%s: pmap %p active on other cpus", __func__, pmap)); } #endif SLIST_INIT(&free); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { KASSERT(pc->pc_pmap == pmap, ("%s: wrong pmap %p %p", __func__, pmap, pc->pc_pmap)); allfree = TRUE; for (field = 0; field < _NPCM; field++) { inuse = (~(pc->pc_map[field])) & pc_freemask[field]; while (inuse != 0) { bit = ffs(inuse) - 1; bitmask = 1UL << bit; idx = field * 32 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; /* * Note that we cannot remove wired pages * from a process' mapping at this time */ pte1p = pmap_pte1(pmap, pv->pv_va); pte1 = pte1_load(pte1p); if (pte1_is_section(pte1)) { if (pte1_is_wired(pte1)) { allfree = FALSE; continue; } pte1_clear(pte1p); pmap_remove_pte1_quick(pmap, pte1, pv, &free); } else if (pte1_is_link(pte1)) { pte2p = pt2map_entry(pv->pv_va); pte2 = pte2_load(pte2p); if (!pte2_is_valid(pte2)) { printf("%s: pmap %p va %#x " "pte2 %#x\n", __func__, pmap, pv->pv_va, pte2); panic("bad pte2"); } if (pte2_is_wired(pte2)) { allfree = FALSE; continue; } pte2_clear(pte2p); pmap_remove_pte2_quick(pmap, pte2, pv, &free); } else { printf("%s: pmap %p va %#x pte1 %#x\n", __func__, pmap, pv->pv_va, pte1); panic("bad pte1"); } /* Mark free */ PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc->pc_map[field] |= bitmask; } } if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } tlb_flush_all_ng_local(); sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * This code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No L2 page table pages. * but is *MUCH* faster than pmap_enter... */ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpt2pg) { pt2_entry_t *pte2p, pte2; vm_paddr_t pa; struct spglist free; uint32_t l2prot; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("%s: managed mapping within the clean submap", __func__)); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a L2 page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { u_int pte1_idx; pt1_entry_t pte1, *pte1p; vm_paddr_t pt2_pa; /* * Get L1 page table things. */ pte1_idx = pte1_index(va); pte1p = pmap_pte1(pmap, va); pte1 = pte1_load(pte1p); if (mpt2pg && (mpt2pg->pindex == (pte1_idx & ~PT2PG_MASK))) { /* * Each of NPT2_IN_PG L2 page tables on the page can * come here. Make sure that associated L1 page table * link is established. * * QQQ: It comes that we don't establish all links to * L2 page tables for newly allocated L2 page * tables page. */ KASSERT(!pte1_is_section(pte1), ("%s: pte1 %#x is section", __func__, pte1)); if (!pte1_is_link(pte1)) { pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(mpt2pg), pte1_idx); pte1_store(pte1p, PTE1_LINK(pt2_pa)); } pt2_wirecount_inc(mpt2pg, pte1_idx); } else { /* * If the L2 page table page is mapped, we just * increment the hold count, and activate it. */ if (pte1_is_section(pte1)) { return (NULL); } else if (pte1_is_link(pte1)) { mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); pt2_wirecount_inc(mpt2pg, pte1_idx); } else { mpt2pg = _pmap_allocpte2(pmap, va, PMAP_ENTER_NOSLEEP); if (mpt2pg == NULL) return (NULL); } } } else { mpt2pg = NULL; } /* * This call to pt2map_entry() makes the assumption that we are * entering the page into the current pmap. In order to support * quick entry into any pmap, one would likely use pmap_pte2_quick(). * But that isn't as quick as pt2map_entry(). */ pte2p = pt2map_entry(va); pte2 = pte2_load(pte2p); if (pte2_is_valid(pte2)) { if (mpt2pg != NULL) { /* * Remove extra pte2 reference */ pt2_wirecount_dec(mpt2pg, pte1_index(va)); mpt2pg = NULL; } return (NULL); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m)) { if (mpt2pg != NULL) { SLIST_INIT(&free); if (pmap_unwire_pt2(pmap, va, mpt2pg, &free)) { pmap_tlb_flush(pmap, va); pmap_free_zero_pages(&free); } mpt2pg = NULL; } return (NULL); } /* * Increment counters */ pmap->pm_stats.resident_count++; /* * Now validate mapping with RO protection */ pa = VM_PAGE_TO_PHYS(m); l2prot = PTE2_RO | PTE2_NM; if (va < VM_MAXUSER_ADDRESS) l2prot |= PTE2_U | PTE2_NG; if ((prot & VM_PROT_EXECUTE) == 0) l2prot |= PTE2_NX; else if (m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap) { /* * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA * is set. QQQ: For more info, see comments in pmap_enter(). */ cache_icache_sync_fresh(va, pa, PAGE_SIZE); } pte2_store(pte2p, PTE2(pa, l2prot, vm_page_pte2_attr(m))); return (mpt2pg); } void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Tries to create 1MB page mapping. Returns TRUE if successful and * FALSE otherwise. Fails if (1) a page table page cannot be allocated without * blocking, (2) a mapping already exists at the specified virtual address, or * (3) a pv entry cannot be allocated without reclaiming another pv entry. */ static boolean_t pmap_enter_pte1(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { pt1_entry_t *pte1p; vm_paddr_t pa; uint32_t l1prot; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pte1p = pmap_pte1(pmap, va); if (pte1_is_valid(pte1_load(pte1p))) { CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", __func__, va, pmap); return (FALSE); } if ((m->oflags & VPO_UNMANAGED) == 0) { /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pte1(pmap, va, VM_PAGE_TO_PHYS(m))) { CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", __func__, va, pmap); return (FALSE); } } /* * Increment counters. */ pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE; /* * Map the section. * * QQQ: Why VM_PROT_WRITE is not evaluated and the mapping is * made readonly? */ pa = VM_PAGE_TO_PHYS(m); l1prot = PTE1_RO | PTE1_NM; if (va < VM_MAXUSER_ADDRESS) l1prot |= PTE1_U | PTE1_NG; if ((prot & VM_PROT_EXECUTE) == 0) l1prot |= PTE1_NX; else if (m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap) { /* * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA * is set. QQQ: For more info, see comments in pmap_enter(). */ cache_icache_sync_fresh(va, pa, PTE1_SIZE); } pte1_store(pte1p, PTE1(pa, l1prot, ATTR_TO_L1(vm_page_pte2_attr(m)))); pmap_pte1_mappings++; CTR3(KTR_PMAP, "%s: success for va %#lx in pmap %p", __func__, va, pmap); return (TRUE); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_offset_t va; vm_page_t m, mpt2pg; vm_pindex_t diff, psize; PDEBUG(6, printf("%s: pmap %p start %#x end %#x m %p prot %#x\n", __func__, pmap, start, end, m_start, prot)); VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpt2pg = NULL; m = m_start; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & PTE1_OFFSET) == 0 && va + PTE1_SIZE <= end && m->psind == 1 && sp_enabled && pmap_enter_pte1(pmap, va, m, prot)) m = &m[PTE1_SIZE / PAGE_SIZE - 1]; else mpt2pg = pmap_enter_quick_locked(pmap, va, m, prot, mpt2pg); m = TAILQ_NEXT(m, listq); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pt1_entry_t *pte1p; vm_paddr_t pa, pte2_pa; vm_page_t p; vm_memattr_t pat_mode; u_int l1attr, l1prot; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("%s: non-device object", __func__)); if ((addr & PTE1_OFFSET) == 0 && (size & PTE1_OFFSET) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("%s: invalid page %p", __func__, p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 1MB page boundary. */ pte2_pa = VM_PAGE_TO_PHYS(p); if (pte2_pa & PTE1_OFFSET) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = pte2_pa + PAGE_SIZE; pa < pte2_pa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("%s: invalid page %p", __func__, p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* * Map using 1MB pages. * * QQQ: Well, we are mapping a section, so same condition must * be hold like during promotion. It looks that only RW mapping * is done here, so readonly mapping must be done elsewhere. */ l1prot = PTE1_U | PTE1_NG | PTE1_RW | PTE1_M | PTE1_A; l1attr = ATTR_TO_L1(vm_memattr_to_pte2(pat_mode)); PMAP_LOCK(pmap); for (pa = pte2_pa; pa < pte2_pa + size; pa += PTE1_SIZE) { pte1p = pmap_pte1(pmap, addr); if (!pte1_is_valid(pte1_load(pte1p))) { pte1_store(pte1p, PTE1(pa, l1prot, l1attr)); pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE; pmap_pte1_mappings++; } /* Else continue on if the PTE1 is already valid. */ addr += PTE1_SIZE; } PMAP_UNLOCK(pmap); } } /* * Do the things to protect a 1mpage in a process. */ static void pmap_protect_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, vm_prot_t prot) { pt1_entry_t npte1, opte1; vm_offset_t eva, va; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PTE1_OFFSET) == 0, ("%s: sva is not 1mpage aligned", __func__)); retry: opte1 = npte1 = pte1_load(pte1p); if (pte1_is_managed(opte1)) { eva = sva + PTE1_SIZE; for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); va < eva; va += PAGE_SIZE, m++) if (pte1_is_dirty(opte1)) vm_page_dirty(m); } if ((prot & VM_PROT_WRITE) == 0) npte1 |= PTE1_RO | PTE1_NM; if ((prot & VM_PROT_EXECUTE) == 0) npte1 |= PTE1_NX; /* * QQQ: Herein, execute permission is never set. * It only can be cleared. So, no icache * syncing is needed. */ if (npte1 != opte1) { if (!pte1_cmpset(pte1p, opte1, npte1)) goto retry; pmap_tlb_flush(pmap, sva); } } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { boolean_t pv_lists_locked; vm_offset_t nextva; pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, opte2, npte2; KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); if (prot == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE)) return; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } PMAP_LOCK(pmap); for (; sva < eva; sva = nextva) { /* * Calculate address for next L2 page table. */ nextva = pte1_trunc(sva + PTE1_SIZE); if (nextva < sva) nextva = eva; pte1p = pmap_pte1(pmap, sva); pte1 = pte1_load(pte1p); /* * Weed out invalid mappings. Note: we assume that L1 page * page table is always allocated, and in kernel virtual. */ if (pte1 == 0) continue; if (pte1_is_section(pte1)) { /* * Are we protecting the entire large page? If not, * demote the mapping and fall through. */ if (sva + PTE1_SIZE == nextva && eva >= nextva) { pmap_protect_pte1(pmap, pte1p, sva, prot); continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pte1(pmap, pte1p, sva)) { /* * The large page mapping * was destroyed. */ continue; } #ifdef INVARIANTS else { /* Update pte1 after demotion */ pte1 = pte1_load(pte1p); } #endif } } KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" " is not link", __func__, pmap, sva, pte1, pte1p)); /* * Limit our scan to either the end of the va represented * by the current L2 page table page, or to the end of the * range being protected. */ if (nextva > eva) nextva = eva; for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, sva += PAGE_SIZE) { vm_page_t m; retry: opte2 = npte2 = pte2_load(pte2p); if (!pte2_is_valid(opte2)) continue; if ((prot & VM_PROT_WRITE) == 0) { if (pte2_is_managed(opte2) && pte2_is_dirty(opte2)) { m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); vm_page_dirty(m); } npte2 |= PTE2_RO | PTE2_NM; } if ((prot & VM_PROT_EXECUTE) == 0) npte2 |= PTE2_NX; /* * QQQ: Herein, execute permission is never set. * It only can be cleared. So, no icache * syncing is needed. */ if (npte2 != opte2) { if (!pte2_cmpset(pte2p, opte2, npte2)) goto retry; pmap_tlb_flush(pmap, sva); } } } if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * pmap_pvh_wired_mappings: * * Return the updated number "count" of managed mappings that are wired. */ static int pmap_pvh_wired_mappings(struct md_page *pvh, int count) { pmap_t pmap; pt1_entry_t pte1; pt2_entry_t pte2; pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); if (pte1_is_section(pte1)) { if (pte1_is_wired(pte1)) count++; } else { KASSERT(pte1_is_link(pte1), ("%s: pte1 %#x is not link", __func__, pte1)); pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); if (pte2_is_wired(pte2)) count++; } PMAP_UNLOCK(pmap); } sched_unpin(); return (count); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); count = pmap_pvh_wired_mappings(&m->md, count); if ((m->flags & PG_FICTITIOUS) == 0) { count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count); } rw_wunlock(&pvh_global_lock); return (count); } /* * Returns TRUE if any of the given mappings were used to modify * physical memory. Otherwise, returns FALSE. Both page and 1mpage * mappings are supported. */ static boolean_t pmap_is_modified_pvh(struct md_page *pvh) { pv_entry_t pv; pt1_entry_t pte1; pt2_entry_t pte2; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); if (pte1_is_section(pte1)) { rv = pte1_is_dirty(pte1); } else { KASSERT(pte1_is_link(pte1), ("%s: pte1 %#x is not link", __func__, pte1)); pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); rv = pte2_is_dirty(pte2); } PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTE2s can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pt1_entry_t pte1; pt2_entry_t pte2; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, addr)); if (pte1_is_link(pte1)) { pte2 = pte2_load(pt2map_entry(addr)); rv = !pte2_is_valid(pte2) ; } PMAP_UNLOCK(pmap); return (rv); } /* * Returns TRUE if any of the given mappings were referenced and FALSE * otherwise. Both page and 1mpage mappings are supported. */ static boolean_t pmap_is_referenced_pvh(struct md_page *pvh) { pv_entry_t pv; pt1_entry_t pte1; pt2_entry_t pte2; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); if (pte1_is_section(pte1)) { rv = (pte1 & (PTE1_A | PTE1_V)) == (PTE1_A | PTE1_V); } else { pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); rv = (pte2 & (PTE2_A | PTE2_V)) == (PTE2_A | PTE2_V); } PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); rw_wlock(&pvh_global_lock); rv = pmap_is_referenced_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } #define PMAP_TS_REFERENCED_MAX 5 /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf; pmap_t pmap; pt1_entry_t *pte1p, opte1; pt2_entry_t *pte2p; vm_paddr_t pa; int rtval = 0; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); pa = VM_PAGE_TO_PHYS(m); pvh = pa_to_pvh(pa); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0 || (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, pv->pv_va); opte1 = pte1_load(pte1p); if ((opte1 & PTE1_A) != 0) { /* * Since this reference bit is shared by 256 4KB pages, * it should not be cleared every time it is tested. * Apply a simple "hash" function on the physical page * number, the virtual section number, and the pmap * address to select one 4KB page out of the 256 * on which testing the reference bit will result * in clearing that bit. This function is designed * to avoid the selection of the same 4KB page * for every 1MB page mapping. * * On demotion, a mapping that hasn't been referenced * is simply destroyed. To avoid the possibility of a * subsequent page fault on a demoted wired mapping, * always leave its reference bit set. Moreover, * since the section is wired, the current state of * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PTE1_SHIFT) ^ (uintptr_t)pmap) & (NPTE2_IN_PG - 1)) == 0 && !pte1_is_wired(opte1)) { pte1_clear_bit(pte1p, PTE1_A); pmap_tlb_flush(pmap, pv->pv_va); } rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); } if (rtval >= PMAP_TS_REFERENCED_MAX) goto out; } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, pv->pv_va); KASSERT(pte1_is_link(pte1_load(pte1p)), ("%s: not found a link in page %p's pv list", __func__, m)); pte2p = pmap_pte2_quick(pmap, pv->pv_va); if ((pte2_load(pte2p) & PTE2_A) != 0) { pte2_clear_bit(pte2p, PTE2_A); pmap_tlb_flush(pmap, pv->pv_va); rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval < PMAP_TS_REFERENCED_MAX); out: sched_unpin(); rw_wunlock(&pvh_global_lock); return (rtval); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t nextva; pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; boolean_t pv_lists_locked; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } PMAP_LOCK(pmap); for (; sva < eva; sva = nextva) { nextva = pte1_trunc(sva + PTE1_SIZE); if (nextva < sva) nextva = eva; pte1p = pmap_pte1(pmap, sva); pte1 = pte1_load(pte1p); /* * Weed out invalid mappings. Note: we assume that L1 page * page table is always allocated, and in kernel virtual. */ if (pte1 == 0) continue; if (pte1_is_section(pte1)) { if (!pte1_is_wired(pte1)) panic("%s: pte1 %#x not wired", __func__, pte1); /* * Are we unwiring the entire large page? If not, * demote the mapping and fall through. */ if (sva + PTE1_SIZE == nextva && eva >= nextva) { pte1_clear_bit(pte1p, PTE1_W); pmap->pm_stats.wired_count -= PTE1_SIZE / PAGE_SIZE; continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); /* Repeat sva. */ goto resume; } sched_pin(); } if (!pmap_demote_pte1(pmap, pte1p, sva)) panic("%s: demotion failed", __func__); #ifdef INVARIANTS else { /* Update pte1 after demotion */ pte1 = pte1_load(pte1p); } #endif } } KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" " is not link", __func__, pmap, sva, pte1, pte1p)); /* * Limit our scan to either the end of the va represented * by the current L2 page table page, or to the end of the * range being protected. */ if (nextva > eva) nextva = eva; for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, sva += PAGE_SIZE) { pte2 = pte2_load(pte2p); if (!pte2_is_valid(pte2)) continue; if (!pte2_is_wired(pte2)) panic("%s: pte2 %#x is missing PTE2_W", __func__, pte2); /* * PTE2_W must be cleared atomically. Although the pmap * lock synchronizes access to PTE2_W, another processor * could be changing PTE2_NM and/or PTE2_A concurrently. */ pte2_clear_bit(pte2p, PTE2_W); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pt1_entry_t *pte1p; pt2_entry_t *pte2p, opte2; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, va); if (!(pte1_load(pte1p) & PTE1_RO)) (void)pmap_demote_pte1(pmap, pte1p, va); PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, pv->pv_va); KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" " a section in page %p's pv list", __func__, m)); pte2p = pmap_pte2_quick(pmap, pv->pv_va); retry: opte2 = pte2_load(pte2p); if (!(opte2 & PTE2_RO)) { if (!pte2_cmpset(pte2p, opte2, opte2 | (PTE2_RO | PTE2_NM))) goto retry; if (pte2_is_dirty(opte2)) vm_page_dirty(m); pmap_tlb_flush(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { pt1_entry_t *pte1p, opte1; pt2_entry_t *pte2p, pte2; vm_offset_t pdnxt; vm_page_t m; boolean_t pv_lists_locked; if (advice != MADV_DONTNEED && advice != MADV_FREE) return; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pdnxt = pte1_trunc(sva + PTE1_SIZE); if (pdnxt < sva) pdnxt = eva; pte1p = pmap_pte1(pmap, sva); opte1 = pte1_load(pte1p); if (!pte1_is_valid(opte1)) /* XXX */ continue; else if (pte1_is_section(opte1)) { if (!pte1_is_managed(opte1)) continue; if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pte1(pmap, pte1p, sva)) { /* * The large page mapping was destroyed. */ continue; } /* * Unless the page mappings are wired, remove the * mapping to a single page so that a subsequent * access may repromote. Since the underlying L2 page * table is fully populated, this removal never * frees a L2 page table page. */ if (!pte1_is_wired(opte1)) { pte2p = pmap_pte2_quick(pmap, sva); KASSERT(pte2_is_valid(pte2_load(pte2p)), ("%s: invalid PTE2", __func__)); pmap_remove_pte2(pmap, pte2p, sva, NULL); } } if (pdnxt > eva) pdnxt = eva; for (pte2p = pmap_pte2_quick(pmap, sva); sva != pdnxt; pte2p++, sva += PAGE_SIZE) { pte2 = pte2_load(pte2p); if (!pte2_is_valid(pte2) || !pte2_is_managed(pte2)) continue; else if (pte2_is_dirty(pte2)) { if (advice == MADV_DONTNEED) { /* * Future calls to pmap_is_modified() * can be avoided by making the page * dirty now. */ m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); vm_page_dirty(m); } pte2_set_bit(pte2p, PTE2_NM); pte2_clear_bit(pte2p, PTE2_A); } else if ((pte2 & PTE2_A) != 0) pte2_clear_bit(pte2p, PTE2_A); else continue; pmap_tlb_flush(pmap, sva); } } if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pt1_entry_t *pte1p, opte1; pt2_entry_t *pte2p, opte2; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("%s: page %p is exclusive busy", __func__, m)); /* * If the page is not PGA_WRITEABLE, then no PTE2s can have PTE2_NM * cleared. If the object containing the page is locked and the page * is not exclusive busied, then PGA_WRITEABLE cannot be concurrently * set. */ if ((m->flags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, va); opte1 = pte1_load(pte1p); if (!(opte1 & PTE1_RO)) { if (pmap_demote_pte1(pmap, pte1p, va) && !pte1_is_wired(opte1)) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - pte1_pa(opte1); pte2p = pmap_pte2_quick(pmap, va); opte2 = pte2_load(pte2p); if ((opte2 & PTE2_V)) { pte2_set_bit(pte2p, PTE2_NM | PTE2_RO); vm_page_dirty(m); pmap_tlb_flush(pmap, va); } } } PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, pv->pv_va); KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" " a section in page %p's pv list", __func__, m)); pte2p = pmap_pte2_quick(pmap, pv->pv_va); if (pte2_is_dirty(pte2_load(pte2p))) { pte2_set_bit(pte2p, PTE2_NM); pmap_tlb_flush(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } sched_unpin(); rw_wunlock(&pvh_global_lock); } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { struct sysmaps *sysmaps; vm_memattr_t oma; vm_paddr_t pa; oma = m->md.pat_mode; m->md.pat_mode = ma; CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d", __func__, m, VM_PAGE_TO_PHYS(m), oma, ma); if ((m->flags & PG_FICTITIOUS) != 0) return; #if 0 /* * If "m" is a normal page, flush it from the cache. * * First, try to find an existing mapping of the page by sf * buffer. sf_buf_invalidate_cache() modifies mapping and * flushes the cache. */ if (sf_buf_invalidate_cache(m, oma)) return; #endif /* * If page is not mapped by sf buffer, map the page * transient and do invalidation. */ if (ma != oma) { pa = VM_PAGE_TO_PHYS(m); sched_pin(); sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("%s: CMAP2 busy", __func__); pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(pa, PTE2_AP_KRW, vm_memattr_to_pte2(ma))); dcache_wbinv_poc((vm_offset_t)sysmaps->CADDR2, pa, PAGE_SIZE); pte2_clear(sysmaps->CMAP2); tlb_flush((vm_offset_t)sysmaps->CADDR2); sched_unpin(); mtx_unlock(&sysmaps->lock); } } /* * Miscellaneous support routines follow */ /* * Returns TRUE if the given page is mapped individually or as part of * a 1mpage. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { boolean_t rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); rw_wunlock(&pvh_global_lock); return (rv); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); rv = FALSE; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { struct sysmaps *sysmaps; sched_pin(); sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (pte2_load(sysmaps->CMAP2) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, vm_page_pte2_attr(m))); pagezero(sysmaps->CADDR2); pte2_clear(sysmaps->CMAP2); tlb_flush((vm_offset_t)sysmaps->CADDR2); sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { struct sysmaps *sysmaps; sched_pin(); sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (pte2_load(sysmaps->CMAP2) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, vm_page_pte2_attr(m))); if (off == 0 && size == PAGE_SIZE) pagezero(sysmaps->CADDR2); else bzero(sysmaps->CADDR2 + off, size); pte2_clear(sysmaps->CMAP2); tlb_flush((vm_offset_t)sysmaps->CADDR2); sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { if (pte2_load(CMAP3) != 0) panic("%s: CMAP3 busy", __func__); sched_pin(); pte2_store(CMAP3, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, vm_page_pte2_attr(m))); pagezero(CADDR3); pte2_clear(CMAP3); tlb_flush((vm_offset_t)CADDR3); sched_unpin(); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t src, vm_page_t dst) { struct sysmaps *sysmaps; sched_pin(); sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (pte2_load(sysmaps->CMAP1) != 0) panic("%s: CMAP1 busy", __func__); if (pte2_load(sysmaps->CMAP2) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(sysmaps->CMAP1, PTE2_KERN_NG(VM_PAGE_TO_PHYS(src), PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(src))); pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(dst), PTE2_AP_KRW, vm_page_pte2_attr(dst))); bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); pte2_clear(sysmaps->CMAP1); tlb_flush((vm_offset_t)sysmaps->CADDR1); pte2_clear(sysmaps->CMAP2); tlb_flush((vm_offset_t)sysmaps->CADDR2); sched_unpin(); mtx_unlock(&sysmaps->lock); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { struct sysmaps *sysmaps; vm_page_t a_pg, b_pg; char *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; sched_pin(); sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP1 != 0) panic("pmap_copy_pages: CMAP1 busy"); if (*sysmaps->CMAP2 != 0) panic("pmap_copy_pages: CMAP2 busy"); while (xfersize > 0) { a_pg = ma[a_offset >> PAGE_SHIFT]; a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); b_pg = mb[b_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); pte2_store(sysmaps->CMAP1, PTE2_KERN_NG(VM_PAGE_TO_PHYS(a_pg), PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(a_pg))); tlb_flush_local((vm_offset_t)sysmaps->CADDR1); pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(b_pg), PTE2_AP_KRW, vm_page_pte2_attr(b_pg))); tlb_flush_local((vm_offset_t)sysmaps->CADDR2); a_cp = sysmaps->CADDR1 + a_pg_offset; b_cp = sysmaps->CADDR2 + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } pte2_clear(sysmaps->CMAP1); tlb_flush((vm_offset_t)sysmaps->CADDR1); pte2_clear(sysmaps->CMAP2); tlb_flush((vm_offset_t)sysmaps->CADDR2); sched_unpin(); mtx_unlock(&sysmaps->lock); } vm_offset_t pmap_quick_enter_page(vm_page_t m) { pt2_entry_t *pte2p; vm_offset_t qmap_addr; critical_enter(); qmap_addr = PCPU_GET(qmap_addr); pte2p = pt2map_entry(qmap_addr); KASSERT(pte2_load(pte2p) == 0, ("%s: PTE2 busy", __func__)); pte2_store(pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, vm_page_pte2_attr(m))); return (qmap_addr); } void pmap_quick_remove_page(vm_offset_t addr) { pt2_entry_t *pte2p; vm_offset_t qmap_addr; qmap_addr = PCPU_GET(qmap_addr); pte2p = pt2map_entry(qmap_addr); KASSERT(addr == qmap_addr, ("%s: invalid address", __func__)); KASSERT(pte2_load(pte2p) != 0, ("%s: PTE2 not in use", __func__)); pte2_clear(pte2p); tlb_flush(qmap_addr); critical_exit(); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { struct spglist free; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t nextva; if (dst_addr != src_addr) return; if (!pmap_is_current(src_pmap)) return; rw_wlock(&pvh_global_lock); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } sched_pin(); for (addr = src_addr; addr < end_addr; addr = nextva) { pt2_entry_t *src_pte2p, *dst_pte2p; vm_page_t dst_mpt2pg, src_mpt2pg; pt1_entry_t src_pte1; u_int pte1_idx; KASSERT(addr < VM_MAXUSER_ADDRESS, ("%s: invalid to pmap_copy page tables", __func__)); nextva = pte1_trunc(addr + PTE1_SIZE); if (nextva < addr) nextva = end_addr; pte1_idx = pte1_index(addr); src_pte1 = src_pmap->pm_pt1[pte1_idx]; if (pte1_is_section(src_pte1)) { if ((addr & PTE1_OFFSET) != 0 || (addr + PTE1_SIZE) > end_addr) continue; if (dst_pmap->pm_pt1[pte1_idx] == 0 && (!pte1_is_managed(src_pte1) || pmap_pv_insert_pte1(dst_pmap, addr, pte1_pa(src_pte1)))) { dst_pmap->pm_pt1[pte1_idx] = src_pte1 & ~PTE1_W; dst_pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE; pmap_pte1_mappings++; } continue; } else if (!pte1_is_link(src_pte1)) continue; src_mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(src_pte1)); /* * We leave PT2s to be linked from PT1 even if they are not * referenced until all PT2s in a page are without reference. * * QQQ: It could be changed ... */ #if 0 /* single_pt2_link_is_cleared */ KASSERT(pt2_wirecount_get(src_mpt2pg, pte1_idx) > 0, ("%s: source page table page is unused", __func__)); #else if (pt2_wirecount_get(src_mpt2pg, pte1_idx) == 0) continue; #endif if (nextva > end_addr) nextva = end_addr; src_pte2p = pt2map_entry(addr); while (addr < nextva) { pt2_entry_t temp_pte2; temp_pte2 = pte2_load(src_pte2p); /* * we only virtual copy managed pages */ if (pte2_is_managed(temp_pte2)) { dst_mpt2pg = pmap_allocpte2(dst_pmap, addr, PMAP_ENTER_NOSLEEP); if (dst_mpt2pg == NULL) goto out; dst_pte2p = pmap_pte2_quick(dst_pmap, addr); if (!pte2_is_valid(pte2_load(dst_pte2p)) && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(pte2_pa(temp_pte2)))) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ temp_pte2 &= ~(PTE2_W | PTE2_A); temp_pte2 |= PTE2_NM; pte2_store(dst_pte2p, temp_pte2); dst_pmap->pm_stats.resident_count++; } else { SLIST_INIT(&free); if (pmap_unwire_pt2(dst_pmap, addr, dst_mpt2pg, &free)) { pmap_tlb_flush(dst_pmap, addr); pmap_free_zero_pages(&free); } goto out; } if (pt2_wirecount_get(dst_mpt2pg, pte1_idx) >= pt2_wirecount_get(src_mpt2pg, pte1_idx)) break; } addr += PAGE_SIZE; src_pte2p++; } } out: sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more section mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t pte1_offset; if (size < PTE1_SIZE) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); pte1_offset = offset & PTE1_OFFSET; if (size - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) < PTE1_SIZE || (*addr & PTE1_OFFSET) == pte1_offset) return; if ((*addr & PTE1_OFFSET) < pte1_offset) *addr = pte1_trunc(*addr) + pte1_offset; else *addr = pte1_roundup(*addr) + pte1_offset; } void pmap_activate(struct thread *td) { pmap_t pmap, oldpmap; u_int cpuid, ttb; PDEBUG(9, printf("%s: td = %08x\n", __func__, (uint32_t)td)); critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); cpuid = PCPU_GET(cpuid); #if defined(SMP) CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); CPU_SET_ATOMIC(cpuid, &pmap->pm_active); #else CPU_CLR(cpuid, &oldpmap->pm_active); CPU_SET(cpuid, &pmap->pm_active); #endif ttb = pmap_ttb_get(pmap); /* * pmap_activate is for the current thread on the current cpu */ td->td_pcb->pcb_pagedir = ttb; cp15_ttbr_set(ttb); PCPU_SET(curpmap, pmap); critical_exit(); } /* * Perform the pmap work for mincore. */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; vm_paddr_t pa; boolean_t managed; int val; PMAP_LOCK(pmap); retry: pte1p = pmap_pte1(pmap, addr); pte1 = pte1_load(pte1p); if (pte1_is_section(pte1)) { pa = trunc_page(pte1_pa(pte1) | (addr & PTE1_OFFSET)); managed = pte1_is_managed(pte1); val = MINCORE_SUPER | MINCORE_INCORE; if (pte1_is_dirty(pte1)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if (pte1 & PTE1_A) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } else if (pte1_is_link(pte1)) { pte2p = pmap_pte2(pmap, addr); pte2 = pte2_load(pte2p); pmap_pte2_release(pte2p); pa = pte2_pa(pte2); managed = pte2_is_managed(pte2); val = MINCORE_INCORE; if (pte2_is_dirty(pte2)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if (pte2 & PTE2_A) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } else { managed = FALSE; val = 0; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa) { vm_offset_t sva; uint32_t l2attr; KASSERT((size & PAGE_MASK) == 0, ("%s: device mapping not page-sized", __func__)); sva = va; l2attr = vm_memattr_to_pte2(VM_MEMATTR_DEVICE); while (size != 0) { pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, l2attr); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } tlb_flush_range(sva, va - sva); } void pmap_kremove_device(vm_offset_t va, vm_size_t size) { vm_offset_t sva; KASSERT((size & PAGE_MASK) == 0, ("%s: device mapping not page-sized", __func__)); sva = va; while (size != 0) { pmap_kremove(va); va += PAGE_SIZE; size -= PAGE_SIZE; } tlb_flush_range(sva, va - sva); } void pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb) { pcb->pcb_pagedir = pmap_ttb_get(pmap); } /* * Clean L1 data cache range by physical address. * The range must be within a single page. */ static void pmap_dcache_wb_pou(vm_paddr_t pa, vm_size_t size, uint32_t attr) { struct sysmaps *sysmaps; KASSERT(((pa & PAGE_MASK) + size) <= PAGE_SIZE, ("%s: not on single page", __func__)); sched_pin(); sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP3) panic("%s: CMAP3 busy", __func__); pte2_store(sysmaps->CMAP3, PTE2_KERN_NG(pa, PTE2_AP_KRW, attr)); dcache_wb_pou((vm_offset_t)sysmaps->CADDR3 + (pa & PAGE_MASK), size); pte2_clear(sysmaps->CMAP3); tlb_flush((vm_offset_t)sysmaps->CADDR3); sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * Sync instruction cache range which is not mapped yet. */ void cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size) { uint32_t len, offset; vm_page_t m; /* Write back d-cache on given address range. */ offset = pa & PAGE_MASK; for ( ; size != 0; size -= len, pa += len, offset = 0) { len = min(PAGE_SIZE - offset, size); m = PHYS_TO_VM_PAGE(pa); KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", __func__, pa)); pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m)); } /* * I-cache is VIPT. Only way how to flush all virtual mappings * on given physical address is to invalidate all i-cache. */ icache_inv_all(); } void pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t size) { /* Write back d-cache on given address range. */ if (va >= VM_MIN_KERNEL_ADDRESS) { dcache_wb_pou(va, size); } else { uint32_t len, offset; vm_paddr_t pa; vm_page_t m; offset = va & PAGE_MASK; for ( ; size != 0; size -= len, va += len, offset = 0) { pa = pmap_extract(pmap, va); /* offset is preserved */ len = min(PAGE_SIZE - offset, size); m = PHYS_TO_VM_PAGE(pa); KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", __func__, pa)); pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m)); } } /* * I-cache is VIPT. Only way how to flush all virtual mappings * on given physical address is to invalidate all i-cache. */ icache_inv_all(); } /* * The implementation of pmap_fault() uses IN_RANGE2() macro which * depends on the fact that given range size is a power of 2. */ CTASSERT(powerof2(NB_IN_PT1)); CTASSERT(powerof2(PT2MAP_SIZE)); #define IN_RANGE2(addr, start, size) \ ((vm_offset_t)(start) == ((vm_offset_t)(addr) & ~((size) - 1))) /* * Handle access and R/W emulation faults. */ int pmap_fault(pmap_t pmap, vm_offset_t far, uint32_t fsr, int idx, bool usermode) { pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; if (pmap == NULL) pmap = kernel_pmap; /* * In kernel, we should never get abort with FAR which is in range of * pmap->pm_pt1 or PT2MAP address spaces. If it happens, stop here * and print out a useful abort message and even get to the debugger * otherwise it likely ends with never ending loop of aborts. */ if (__predict_false(IN_RANGE2(far, pmap->pm_pt1, NB_IN_PT1))) { /* * All L1 tables should always be mapped and present. * However, we check only current one herein. For user mode, * only permission abort from malicious user is not fatal. * And alignment abort as it may have higher priority. */ if (!usermode || (idx != FAULT_ALIGN && idx != FAULT_PERM_L2)) { CTR4(KTR_PMAP, "%s: pmap %#x pm_pt1 %#x far %#x", __func__, pmap, pmap->pm_pt1, far); panic("%s: pm_pt1 abort", __func__); } return (KERN_INVALID_ADDRESS); } if (__predict_false(IN_RANGE2(far, PT2MAP, PT2MAP_SIZE))) { /* * PT2MAP should be always mapped and present in current * L1 table. However, only existing L2 tables are mapped * in PT2MAP. For user mode, only L2 translation abort and * permission abort from malicious user is not fatal. * And alignment abort as it may have higher priority. */ if (!usermode || (idx != FAULT_ALIGN && idx != FAULT_TRAN_L2 && idx != FAULT_PERM_L2)) { CTR4(KTR_PMAP, "%s: pmap %#x PT2MAP %#x far %#x", __func__, pmap, PT2MAP, far); panic("%s: PT2MAP abort", __func__); } return (KERN_INVALID_ADDRESS); } /* * Accesss bits for page and section. Note that the entry * is not in TLB yet, so TLB flush is not necessary. * * QQQ: This is hardware emulation, we do not call userret() * for aborts from user mode. * We do not lock PMAP, so cmpset() is a need. Hopefully, * no one removes the mapping when we are here. */ if (idx == FAULT_ACCESS_L2) { pte2p = pt2map_entry(far); pte2_seta: pte2 = pte2_load(pte2p); if (pte2_is_valid(pte2)) { if (!pte2_cmpset(pte2p, pte2, pte2 | PTE2_A)) { goto pte2_seta; } return (KERN_SUCCESS); } } if (idx == FAULT_ACCESS_L1) { pte1p = pmap_pte1(pmap, far); pte1_seta: pte1 = pte1_load(pte1p); if (pte1_is_section(pte1)) { if (!pte1_cmpset(pte1p, pte1, pte1 | PTE1_A)) { goto pte1_seta; } return (KERN_SUCCESS); } } /* * Handle modify bits for page and section. Note that the modify * bit is emulated by software. So PTEx_RO is software read only * bit and PTEx_NM flag is real hardware read only bit. * * QQQ: This is hardware emulation, we do not call userret() * for aborts from user mode. * We do not lock PMAP, so cmpset() is a need. Hopefully, * no one removes the mapping when we are here. */ if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L2)) { pte2p = pt2map_entry(far); pte2_setrw: pte2 = pte2_load(pte2p); if (pte2_is_valid(pte2) && !(pte2 & PTE2_RO) && (pte2 & PTE2_NM)) { if (!pte2_cmpset(pte2p, pte2, pte2 & ~PTE2_NM)) { goto pte2_setrw; } tlb_flush(trunc_page(far)); return (KERN_SUCCESS); } } if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L1)) { pte1p = pmap_pte1(pmap, far); pte1_setrw: pte1 = pte1_load(pte1p); if (pte1_is_section(pte1) && !(pte1 & PTE1_RO) && (pte1 & PTE1_NM)) { if (!pte1_cmpset(pte1p, pte1, pte1 & ~PTE1_NM)) { goto pte1_setrw; } tlb_flush(pte1_trunc(far)); return (KERN_SUCCESS); } } /* * QQQ: The previous code, mainly fast handling of access and * modify bits aborts, could be moved to ASM. Now we are * starting to deal with not fast aborts. */ #ifdef INVARIANTS /* * Read an entry in PT2TAB associated with both pmap and far. * It's safe because PT2TAB is always mapped. * * QQQ: We do not lock PMAP, so false positives could happen if * the mapping is removed concurrently. */ pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, far)); if (pte2_is_valid(pte2)) { /* * Now, when we know that L2 page table is allocated, * we can use PT2MAP to get L2 page table entry. */ pte2 = pte2_load(pt2map_entry(far)); if (pte2_is_valid(pte2)) { /* * If L2 page table entry is valid, make sure that * L1 page table entry is valid too. Note that we * leave L2 page entries untouched when promoted. */ pte1 = pte1_load(pmap_pte1(pmap, far)); if (!pte1_is_valid(pte1)) { panic("%s: missing L1 page entry (%p, %#x)", __func__, pmap, far); } } } #endif return (KERN_FAILURE); } #if defined(PMAP_DEBUG) /* * Reusing of KVA used in pmap_zero_page function !!! */ static void pmap_zero_page_check(vm_page_t m) { uint32_t *p, *end; struct sysmaps *sysmaps; sched_pin(); sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (pte2_load(sysmaps->CMAP2) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(sysmaps->CMAP2, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, vm_page_pte2_attr(m))); end = (uint32_t*)(sysmaps->CADDR2 + PAGE_SIZE); for (p = (uint32_t*)sysmaps->CADDR2; p < end; p++) if (*p != 0) panic("%s: page %p not zero, va: %p", __func__, m, sysmaps->CADDR2); pte2_clear(sysmaps->CMAP2); tlb_flush((vm_offset_t)sysmaps->CADDR2); sched_unpin(); mtx_unlock(&sysmaps->lock); } int pmap_pid_dump(int pid) { pmap_t pmap; struct proc *p; int npte2 = 0; int i, j, index; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid != pid || p->p_vmspace == NULL) continue; index = 0; pmap = vmspace_pmap(p->p_vmspace); for (i = 0; i < NPTE1_IN_PT1; i++) { pt1_entry_t pte1; pt2_entry_t *pte2p, pte2; vm_offset_t base, va; vm_paddr_t pa; vm_page_t m; base = i << PTE1_SHIFT; pte1 = pte1_load(&pmap->pm_pt1[i]); if (pte1_is_section(pte1)) { /* * QQQ: Do something here! */ } else if (pte1_is_link(pte1)) { for (j = 0; j < NPTE2_IN_PT2; j++) { va = base + (j << PAGE_SHIFT); if (va >= VM_MIN_KERNEL_ADDRESS) { if (index) { index = 0; printf("\n"); } sx_sunlock(&allproc_lock); return (npte2); } pte2p = pmap_pte2(pmap, va); pte2 = pte2_load(pte2p); pmap_pte2_release(pte2p); if (!pte2_is_valid(pte2)) continue; pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pa); printf("va: 0x%x, pa: 0x%x, h: %d, w:" " %d, f: 0x%x", va, pa, m->hold_count, m->wire_count, m->flags); npte2++; index++; if (index >= 2) { index = 0; printf("\n"); } else { printf(" "); } } } } } sx_sunlock(&allproc_lock); return (npte2); } #endif #ifdef DDB static pt2_entry_t * pmap_pte2_ddb(pmap_t pmap, vm_offset_t va) { pt1_entry_t pte1; vm_paddr_t pt2pg_pa; pte1 = pte1_load(pmap_pte1(pmap, va)); if (!pte1_is_link(pte1)) return (NULL); if (pmap_is_current(pmap)) return (pt2map_entry(va)); /* Note that L2 page table size is not equal to PAGE_SIZE. */ pt2pg_pa = trunc_page(pte1_link_pa(pte1)); if (pte2_pa(pte2_load(PMAP3)) != pt2pg_pa) { pte2_store(PMAP3, PTE2_KPT(pt2pg_pa)); #ifdef SMP PMAP3cpu = PCPU_GET(cpuid); #endif tlb_flush_local((vm_offset_t)PADDR3); } #ifdef SMP else if (PMAP3cpu != PCPU_GET(cpuid)) { PMAP3cpu = PCPU_GET(cpuid); tlb_flush_local((vm_offset_t)PADDR3); } #endif return (PADDR3 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); } static void dump_pmap(pmap_t pmap) { printf("pmap %p\n", pmap); printf(" pm_pt1: %p\n", pmap->pm_pt1); printf(" pm_pt2tab: %p\n", pmap->pm_pt2tab); printf(" pm_active: 0x%08lX\n", pmap->pm_active.__bits[0]); } DB_SHOW_COMMAND(pmaps, pmap_list_pmaps) { pmap_t pmap; LIST_FOREACH(pmap, &allpmaps, pm_list) { dump_pmap(pmap); } } static int pte2_class(pt2_entry_t pte2) { int cls; cls = (pte2 >> 2) & 0x03; cls |= (pte2 >> 4) & 0x04; return (cls); } static void dump_section(pmap_t pmap, uint32_t pte1_idx) { } static void dump_link(pmap_t pmap, uint32_t pte1_idx, boolean_t invalid_ok) { uint32_t i; vm_offset_t va; pt2_entry_t *pte2p, pte2; vm_page_t m; va = pte1_idx << PTE1_SHIFT; pte2p = pmap_pte2_ddb(pmap, va); for (i = 0; i < NPTE2_IN_PT2; i++, pte2p++, va += PAGE_SIZE) { pte2 = pte2_load(pte2p); if (pte2 == 0) continue; if (!pte2_is_valid(pte2)) { printf(" 0x%08X: 0x%08X", va, pte2); if (!invalid_ok) printf(" - not valid !!!"); printf("\n"); continue; } m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); printf(" 0x%08X: 0x%08X, TEX%d, s:%d, g:%d, m:%p", va , pte2, pte2_class(pte2), !!(pte2 & PTE2_S), !(pte2 & PTE2_NG), m); if (m != NULL) { printf(" v:%d h:%d w:%d f:0x%04X\n", m->valid, m->hold_count, m->wire_count, m->flags); } else { printf("\n"); } } } static __inline boolean_t is_pv_chunk_space(vm_offset_t va) { if ((((vm_offset_t)pv_chunkbase) <= va) && (va < ((vm_offset_t)pv_chunkbase + PAGE_SIZE * pv_maxchunks))) return (TRUE); return (FALSE); } DB_SHOW_COMMAND(pmap, pmap_pmap_print) { /* XXX convert args. */ pmap_t pmap = (pmap_t)addr; pt1_entry_t pte1; pt2_entry_t pte2; vm_offset_t va, eva; vm_page_t m; uint32_t i; boolean_t invalid_ok, dump_link_ok, dump_pv_chunk; if (have_addr) { pmap_t pm; LIST_FOREACH(pm, &allpmaps, pm_list) if (pm == pmap) break; if (pm == NULL) { printf("given pmap %p is not in allpmaps list\n", pmap); return; } } else pmap = PCPU_GET(curpmap); eva = (modif[0] == 'u') ? VM_MAXUSER_ADDRESS : 0xFFFFFFFF; dump_pv_chunk = FALSE; /* XXX evaluate from modif[] */ printf("pmap: 0x%08X\n", (uint32_t)pmap); printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); for(i = 0; i < NPTE1_IN_PT1; i++) { pte1 = pte1_load(&pmap->pm_pt1[i]); if (pte1 == 0) continue; va = i << PTE1_SHIFT; if (va >= eva) break; if (pte1_is_section(pte1)) { printf("0x%08X: Section 0x%08X, s:%d g:%d\n", va, pte1, !!(pte1 & PTE1_S), !(pte1 & PTE1_NG)); dump_section(pmap, i); } else if (pte1_is_link(pte1)) { dump_link_ok = TRUE; invalid_ok = FALSE; pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X m: %p", va, pte1, pte2, m); if (is_pv_chunk_space(va)) { printf(" - pv_chunk space"); if (dump_pv_chunk) invalid_ok = TRUE; else dump_link_ok = FALSE; } else if (m != NULL) printf(" w:%d w2:%u", m->wire_count, pt2_wirecount_get(m, pte1_index(va))); if (pte2 == 0) printf(" !!! pt2tab entry is ZERO"); else if (pte2_pa(pte1) != pte2_pa(pte2)) printf(" !!! pt2tab entry is DIFFERENT - m: %p", PHYS_TO_VM_PAGE(pte2_pa(pte2))); printf("\n"); if (dump_link_ok) dump_link(pmap, i, invalid_ok); } else printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); } } static void dump_pt2tab(pmap_t pmap) { uint32_t i; pt2_entry_t pte2; vm_offset_t va; vm_paddr_t pa; vm_page_t m; printf("PT2TAB:\n"); for (i = 0; i < PT2TAB_ENTRIES; i++) { pte2 = pte2_load(&pmap->pm_pt2tab[i]); if (!pte2_is_valid(pte2)) continue; va = i << PT2TAB_SHIFT; pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pa); printf(" 0x%08X: 0x%08X, TEX%d, s:%d, m:%p", va, pte2, pte2_class(pte2), !!(pte2 & PTE2_S), m); if (m != NULL) printf(" , h: %d, w: %d, f: 0x%04X pidx: %lld", m->hold_count, m->wire_count, m->flags, m->pindex); printf("\n"); } } DB_SHOW_COMMAND(pmap_pt2tab, pmap_pt2tab_print) { /* XXX convert args. */ pmap_t pmap = (pmap_t)addr; pt1_entry_t pte1; pt2_entry_t pte2; vm_offset_t va; uint32_t i, start; if (have_addr) { printf("supported only on current pmap\n"); return; } pmap = PCPU_GET(curpmap); printf("curpmap: 0x%08X\n", (uint32_t)pmap); printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); start = pte1_index((vm_offset_t)PT2MAP); for (i = start; i < (start + NPT2_IN_PT2TAB); i++) { pte1 = pte1_load(&pmap->pm_pt1[i]); if (pte1 == 0) continue; va = i << PTE1_SHIFT; if (pte1_is_section(pte1)) { printf("0x%08X: Section 0x%08X, s:%d\n", va, pte1, !!(pte1 & PTE1_S)); dump_section(pmap, i); } else if (pte1_is_link(pte1)) { pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X\n", va, pte1, pte2); if (pte2 == 0) printf(" !!! pt2tab entry is ZERO\n"); } else printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); } dump_pt2tab(pmap); } #endif Index: head/sys/arm/at91/at91_mci.c =================================================================== --- head/sys/arm/at91/at91_mci.c (revision 298054) +++ head/sys/arm/at91/at91_mci.c (revision 298055) @@ -1,1415 +1,1415 @@ /*- * Copyright (c) 2006 Bernd Walter. All rights reserved. * Copyright (c) 2006 M. Warner Losh. All rights reserved. * Copyright (c) 2010 Greg Ansley. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include #endif #include "mmcbr_if.h" #include "opt_at91.h" /* * About running the MCI bus above 25MHz * * Historically, the MCI bus has been run at 30MHz on systems with a 60MHz * master clock, in part due to a bug in dev/mmc.c making always request * 30MHz, and in part over clocking the bus because 15MHz was too slow. * Fixing that bug causes the mmc driver to request a 25MHz clock (as it * should) and the logic in at91_mci_update_ios() picks the highest speed that * doesn't exceed that limit. With a 60MHz MCK that would be 15MHz, and * that's a real performance buzzkill when you've been getting away with 30MHz * all along. * * By defining AT91_MCI_ALLOW_OVERCLOCK (or setting the allow_overclock=1 * device hint or sysctl) you can enable logic in at91_mci_update_ios() to * overlcock the SD bus a little by running it at MCK / 2 when the requested * speed is 25MHz and the next highest speed is 15MHz or less. This appears * to work on virtually all SD cards, since it is what this driver has been * doing prior to the introduction of this option, where the overclocking vs * underclocking decision was automaticly "overclock". Modern SD cards can * run at 45mhz/1-bit in standard mode (high speed mode enable commands not * sent) without problems. * * Speaking of high-speed mode, the rm9200 manual says the MCI device supports * the SD v1.0 specification and can run up to 50MHz. This is interesting in * that the SD v1.0 spec caps the speed at 25MHz; high speed mode was added in * the v1.10 spec. Furthermore, high speed mode doesn't just crank up the * clock, it alters the signal timing. The rm9200 MCI device doesn't support * these altered timings. So while speeds over 25MHz may work, they only work * in what the SD spec calls "default" speed mode, and it amounts to violating * the spec by overclocking the bus. * * If you also enable 4-wire mode it's possible transfers faster than 25MHz * will fail. On the AT91RM9200, due to bugs in the bus contention logic, if * you have the USB host device and OHCI driver enabled will fail. Even * underclocking to 15MHz, intermittant overrun and underrun errors occur. * Note that you don't even need to have usb devices attached to the system, * the errors begin to occur as soon as the OHCI driver sets the register bit * to enable periodic transfers. It appears (based on brief investigation) * that the usb host controller uses so much ASB bandwidth that sometimes the * DMA for MCI transfers doesn't get a bus grant in time and data gets * dropped. Adding even a modicum of network activity changes the symptom * from intermittant to very frequent. Members of the AT91SAM9 family have * corrected this problem, or are at least better about their use of the bus. */ #ifndef AT91_MCI_ALLOW_OVERCLOCK #define AT91_MCI_ALLOW_OVERCLOCK 1 #endif /* * Allocate 2 bounce buffers we'll use to endian-swap the data due to the rm9200 * erratum. We use a pair of buffers because when reading that lets us begin * endian-swapping the data in the first buffer while the DMA is reading into * the second buffer. (We can't use the same trick for writing because we might * not get all the data in the 2nd buffer swapped before the hardware needs it; * dealing with that would add complexity to the driver.) * * The buffers are sized at 16K each due to the way the busdma cache sync * operations work on arm. A dcache_inv_range() operation on a range larger * than 16K gets turned into a dcache_wbinv_all(). That needlessly flushes the * entire data cache, impacting overall system performance. */ #define BBCOUNT 2 #define BBSIZE (16*1024) #define MAX_BLOCKS ((BBSIZE*BBCOUNT)/512) static int mci_debug; struct at91_mci_softc { void *intrhand; /* Interrupt handle */ device_t dev; int sc_cap; #define CAP_HAS_4WIRE 1 /* Has 4 wire bus */ #define CAP_NEEDS_BYTESWAP 2 /* broken hardware needing bounce */ #define CAP_MCI1_REV2XX 4 /* MCI 1 rev 2.x */ int flags; #define PENDING_CMD 0x01 #define PENDING_STOP 0x02 #define CMD_MULTIREAD 0x10 #define CMD_MULTIWRITE 0x20 int has_4wire; int allow_overclock; struct resource *irq_res; /* IRQ resource */ struct resource *mem_res; /* Memory resource */ struct mtx sc_mtx; bus_dma_tag_t dmatag; struct mmc_host host; int bus_busy; struct mmc_request *req; struct mmc_command *curcmd; bus_dmamap_t bbuf_map[BBCOUNT]; char * bbuf_vaddr[BBCOUNT]; /* bounce bufs in KVA space */ uint32_t bbuf_len[BBCOUNT]; /* len currently queued for bounce buf */ uint32_t bbuf_curidx; /* which bbuf is the active DMA buffer */ uint32_t xfer_offset; /* offset so far into caller's buf */ }; /* bus entry points */ static int at91_mci_probe(device_t dev); static int at91_mci_attach(device_t dev); static int at91_mci_detach(device_t dev); static void at91_mci_intr(void *); /* helper routines */ static int at91_mci_activate(device_t dev); static void at91_mci_deactivate(device_t dev); static int at91_mci_is_mci1rev2xx(void); #define AT91_MCI_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define AT91_MCI_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define AT91_MCI_LOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ "mci", MTX_DEF) #define AT91_MCI_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); #define AT91_MCI_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); #define AT91_MCI_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); static inline uint32_t RD4(struct at91_mci_softc *sc, bus_size_t off) { return (bus_read_4(sc->mem_res, off)); } static inline void WR4(struct at91_mci_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off, val); } static void at91_bswap_buf(struct at91_mci_softc *sc, void * dptr, void * sptr, uint32_t memsize) { uint32_t * dst = (uint32_t *)dptr; uint32_t * src = (uint32_t *)sptr; uint32_t i; /* * If the hardware doesn't need byte-swapping, let bcopy() do the * work. Use bounce buffer even if we don't need byteswap, since * buffer may straddle a page boundry, and we don't handle * multi-segment transfers in hardware. Seen from 'bsdlabel -w' which * uses raw geom access to the volume. Greg Ansley (gja (at) * ansley.com) */ if (!(sc->sc_cap & CAP_NEEDS_BYTESWAP)) { memcpy(dptr, sptr, memsize); return; } /* * Nice performance boost for slightly unrolling this loop. * (But very little extra boost for further unrolling it.) */ for (i = 0; i < memsize; i += 16) { *dst++ = bswap32(*src++); *dst++ = bswap32(*src++); *dst++ = bswap32(*src++); *dst++ = bswap32(*src++); } /* Mop up the last 1-3 words, if any. */ for (i = 0; i < (memsize & 0x0F); i += 4) { *dst++ = bswap32(*src++); } } static void at91_mci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error != 0) return; *(bus_addr_t *)arg = segs[0].ds_addr; } static void at91_mci_pdc_disable(struct at91_mci_softc *sc) { WR4(sc, PDC_PTCR, PDC_PTCR_TXTDIS | PDC_PTCR_RXTDIS); WR4(sc, PDC_RPR, 0); WR4(sc, PDC_RCR, 0); WR4(sc, PDC_RNPR, 0); WR4(sc, PDC_RNCR, 0); WR4(sc, PDC_TPR, 0); WR4(sc, PDC_TCR, 0); WR4(sc, PDC_TNPR, 0); WR4(sc, PDC_TNCR, 0); } /* * Reset the controller, then restore most of the current state. * * This is called after detecting an error. It's also called after stopping a * multi-block write, to un-wedge the device so that it will handle the NOTBUSY * signal correctly. See comments in at91_mci_stop_done() for more details. */ static void at91_mci_reset(struct at91_mci_softc *sc) { uint32_t mr; uint32_t sdcr; uint32_t dtor; uint32_t imr; at91_mci_pdc_disable(sc); /* save current state */ imr = RD4(sc, MCI_IMR); mr = RD4(sc, MCI_MR) & 0x7fff; sdcr = RD4(sc, MCI_SDCR); dtor = RD4(sc, MCI_DTOR); /* reset the controller */ WR4(sc, MCI_IDR, 0xffffffff); WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* restore state */ WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN); WR4(sc, MCI_MR, mr); WR4(sc, MCI_SDCR, sdcr); WR4(sc, MCI_DTOR, dtor); WR4(sc, MCI_IER, imr); /* * Make sure sdio interrupts will fire. Not sure why reading * SR ensures that, but this is in the linux driver. */ RD4(sc, MCI_SR); } static void at91_mci_init(device_t dev) { struct at91_mci_softc *sc = device_get_softc(dev); uint32_t val; WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */ WR4(sc, MCI_IDR, 0xffffffff); /* Turn off interrupts */ WR4(sc, MCI_DTOR, MCI_DTOR_DTOMUL_1M | 1); val = MCI_MR_PDCMODE; val |= 0x34a; /* PWSDIV = 3; CLKDIV = 74 */ // if (sc->sc_cap & CAP_MCI1_REV2XX) // val |= MCI_MR_RDPROOF | MCI_MR_WRPROOF; WR4(sc, MCI_MR, val); #ifndef AT91_MCI_SLOT_B WR4(sc, MCI_SDCR, 0); /* SLOT A, 1 bit bus */ #else /* * XXX Really should add second "unit" but nobody using using * a two slot card that we know of. XXX */ WR4(sc, MCI_SDCR, 1); /* SLOT B, 1 bit bus */ #endif /* * Enable controller, including power-save. The slower clock * of the power-save mode is only in effect when there is no * transfer in progress, so it can be left in this mode all * the time. */ WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN); } static void at91_mci_fini(device_t dev) { struct at91_mci_softc *sc = device_get_softc(dev); WR4(sc, MCI_IDR, 0xffffffff); /* Turn off interrupts */ at91_mci_pdc_disable(sc); WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */ } static int at91_mci_probe(device_t dev) { #ifdef FDT if (!ofw_bus_is_compatible(dev, "atmel,hsmci")) return (ENXIO); #endif device_set_desc(dev, "MCI mmc/sd host bridge"); return (0); } static int at91_mci_attach(device_t dev) { struct at91_mci_softc *sc = device_get_softc(dev); struct sysctl_ctx_list *sctx; struct sysctl_oid *soid; device_t child; int err, i; sctx = device_get_sysctl_ctx(dev); soid = device_get_sysctl_tree(dev); sc->dev = dev; sc->sc_cap = 0; if (at91_is_rm92()) sc->sc_cap |= CAP_NEEDS_BYTESWAP; /* * MCI1 Rev 2 controllers need some workarounds, flag if so. */ if (at91_mci_is_mci1rev2xx()) sc->sc_cap |= CAP_MCI1_REV2XX; err = at91_mci_activate(dev); if (err) goto out; AT91_MCI_LOCK_INIT(sc); at91_mci_fini(dev); at91_mci_init(dev); /* * Allocate DMA tags and maps and bounce buffers. * * The parms in the tag_create call cause the dmamem_alloc call to * create each bounce buffer as a single contiguous buffer of BBSIZE * bytes aligned to a 4096 byte boundary. * * Do not use DMA_COHERENT for these buffers because that maps the * memory as non-cachable, which prevents cache line burst fills/writes, * which is something we need since we're trying to overlap the * byte-swapping with the DMA operations. */ err = bus_dma_tag_create(bus_get_dma_tag(dev), 4096, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, BBSIZE, 1, BBSIZE, 0, NULL, NULL, &sc->dmatag); if (err != 0) goto out; for (i = 0; i < BBCOUNT; ++i) { err = bus_dmamem_alloc(sc->dmatag, (void **)&sc->bbuf_vaddr[i], BUS_DMA_NOWAIT, &sc->bbuf_map[i]); if (err != 0) goto out; } /* * Activate the interrupt */ err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, at91_mci_intr, sc, &sc->intrhand); if (err) { AT91_MCI_LOCK_DESTROY(sc); goto out; } /* * Allow 4-wire to be initially set via #define. * Allow a device hint to override that. * Allow a sysctl to override that. */ #if defined(AT91_MCI_HAS_4WIRE) && AT91_MCI_HAS_4WIRE != 0 sc->has_4wire = 1; #endif resource_int_value(device_get_name(dev), device_get_unit(dev), "4wire", &sc->has_4wire); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "4wire", CTLFLAG_RW, &sc->has_4wire, 0, "has 4 wire SD Card bus"); if (sc->has_4wire) sc->sc_cap |= CAP_HAS_4WIRE; sc->allow_overclock = AT91_MCI_ALLOW_OVERCLOCK; resource_int_value(device_get_name(dev), device_get_unit(dev), "allow_overclock", &sc->allow_overclock); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "allow_overclock", CTLFLAG_RW, &sc->allow_overclock, 0, "Allow up to 30MHz clock for 25MHz request when next highest speed 15MHz or less."); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "debug", CTLFLAG_RWTUN, &mci_debug, 0, "enable debug output"); /* * Our real min freq is master_clock/512, but upper driver layers are * going to set the min speed during card discovery, and the right speed * for that is 400kHz, so advertise a safe value just under that. * * For max speed, while the rm9200 manual says the max is 50mhz, it also * says it supports only the SD v1.0 spec, which means the real limit is * 25mhz. On the other hand, historical use has been to slightly violate * the standard by running the bus at 30MHz. For more information on * that, see the comments at the top of this file. */ sc->host.f_min = 375000; sc->host.f_max = at91_master_clock / 2; if (sc->host.f_max > 25000000) sc->host.f_max = 25000000; sc->host.host_ocr = MMC_OCR_320_330 | MMC_OCR_330_340; sc->host.caps = 0; if (sc->sc_cap & CAP_HAS_4WIRE) sc->host.caps |= MMC_CAP_4_BIT_DATA; child = device_add_child(dev, "mmc", 0); device_set_ivars(dev, &sc->host); err = bus_generic_attach(dev); out: if (err) at91_mci_deactivate(dev); return (err); } static int at91_mci_detach(device_t dev) { struct at91_mci_softc *sc = device_get_softc(dev); at91_mci_fini(dev); at91_mci_deactivate(dev); bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[0], sc->bbuf_map[0]); bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[1], sc->bbuf_map[1]); bus_dma_tag_destroy(sc->dmatag); return (EBUSY); /* XXX */ } static int at91_mci_activate(device_t dev) { struct at91_mci_softc *sc; int rid; sc = device_get_softc(dev); rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) goto errout; rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res == NULL) goto errout; return (0); errout: at91_mci_deactivate(dev); return (ENOMEM); } static void at91_mci_deactivate(device_t dev) { struct at91_mci_softc *sc; sc = device_get_softc(dev); if (sc->intrhand) bus_teardown_intr(dev, sc->irq_res, sc->intrhand); - sc->intrhand = 0; + sc->intrhand = NULL; bus_generic_detach(sc->dev); if (sc->mem_res) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem_res), sc->mem_res); - sc->mem_res = 0; + sc->mem_res = NULL; if (sc->irq_res) bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res), sc->irq_res); - sc->irq_res = 0; + sc->irq_res = NULL; return; } static int at91_mci_is_mci1rev2xx(void) { switch (soc_info.type) { case AT91_T_SAM9260: case AT91_T_SAM9263: case AT91_T_CAP9: case AT91_T_SAM9G10: case AT91_T_SAM9G20: case AT91_T_SAM9RL: return(1); default: return (0); } } static int at91_mci_update_ios(device_t brdev, device_t reqdev) { struct at91_mci_softc *sc; struct mmc_ios *ios; uint32_t clkdiv; uint32_t freq; sc = device_get_softc(brdev); ios = &sc->host.ios; /* * Calculate our closest available clock speed that doesn't exceed the * requested speed. * * When overclocking is allowed, the requested clock is 25MHz, the * computed frequency is 15MHz or smaller and clockdiv is 1, use * clockdiv of 0 to double that. If less than 12.5MHz, double * regardless of the overclocking setting. * * Whatever we come up with, store it back into ios->clock so that the * upper layer drivers can report the actual speed of the bus. */ if (ios->clock == 0) { WR4(sc, MCI_CR, MCI_CR_MCIDIS); clkdiv = 0; } else { WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN); if ((at91_master_clock % (ios->clock * 2)) == 0) clkdiv = ((at91_master_clock / ios->clock) / 2) - 1; else clkdiv = (at91_master_clock / ios->clock) / 2; freq = at91_master_clock / ((clkdiv+1) * 2); if (clkdiv == 1 && ios->clock == 25000000 && freq <= 15000000) { if (sc->allow_overclock || freq <= 12500000) { clkdiv = 0; freq = at91_master_clock / ((clkdiv+1) * 2); } } ios->clock = freq; } if (ios->bus_width == bus_width_4) WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) | MCI_SDCR_SDCBUS); else WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) & ~MCI_SDCR_SDCBUS); WR4(sc, MCI_MR, (RD4(sc, MCI_MR) & ~MCI_MR_CLKDIV) | clkdiv); /* Do we need a settle time here? */ /* XXX We need to turn the device on/off here with a GPIO pin */ return (0); } static void at91_mci_start_cmd(struct at91_mci_softc *sc, struct mmc_command *cmd) { uint32_t cmdr, mr; struct mmc_data *data; sc->curcmd = cmd; data = cmd->data; /* XXX Upper layers don't always set this */ cmd->mrq = sc->req; /* Begin setting up command register. */ cmdr = cmd->opcode; if (sc->host.ios.bus_mode == opendrain) cmdr |= MCI_CMDR_OPDCMD; /* Set up response handling. Allow max timeout for responses. */ if (MMC_RSP(cmd->flags) == MMC_RSP_NONE) cmdr |= MCI_CMDR_RSPTYP_NO; else { cmdr |= MCI_CMDR_MAXLAT; if (cmd->flags & MMC_RSP_136) cmdr |= MCI_CMDR_RSPTYP_136; else cmdr |= MCI_CMDR_RSPTYP_48; } /* * If there is no data transfer, just set up the right interrupt mask * and start the command. * * The interrupt mask needs to be CMDRDY plus all non-data-transfer * errors. It's important to leave the transfer-related errors out, to * avoid spurious timeout or crc errors on a STOP command following a * multiblock read. When a multiblock read is in progress, sending a * STOP in the middle of a block occasionally triggers such errors, but * we're totally disinterested in them because we've already gotten all * the data we wanted without error before sending the STOP command. */ if (data == NULL) { uint32_t ier = MCI_SR_CMDRDY | MCI_SR_RTOE | MCI_SR_RENDE | MCI_SR_RCRCE | MCI_SR_RDIRE | MCI_SR_RINDE; at91_mci_pdc_disable(sc); if (cmd->opcode == MMC_STOP_TRANSMISSION) cmdr |= MCI_CMDR_TRCMD_STOP; /* Ignore response CRC on CMD2 and ACMD41, per standard. */ if (cmd->opcode == MMC_SEND_OP_COND || cmd->opcode == ACMD_SD_SEND_OP_COND) ier &= ~MCI_SR_RCRCE; if (mci_debug) printf("CMDR %x (opcode %d) ARGR %x no data\n", cmdr, cmd->opcode, cmd->arg); WR4(sc, MCI_ARGR, cmd->arg); WR4(sc, MCI_CMDR, cmdr); WR4(sc, MCI_IDR, 0xffffffff); WR4(sc, MCI_IER, ier); return; } /* There is data, set up the transfer-related parts of the command. */ if (data->flags & MMC_DATA_READ) cmdr |= MCI_CMDR_TRDIR; if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE)) cmdr |= MCI_CMDR_TRCMD_START; if (data->flags & MMC_DATA_STREAM) cmdr |= MCI_CMDR_TRTYP_STREAM; else if (data->flags & MMC_DATA_MULTI) { cmdr |= MCI_CMDR_TRTYP_MULTIPLE; sc->flags |= (data->flags & MMC_DATA_READ) ? CMD_MULTIREAD : CMD_MULTIWRITE; } /* * Disable PDC until we're ready. * * Set block size and turn on PDC mode for dma xfer. * Note that the block size is the smaller of the amount of data to be * transferred, or 512 bytes. The 512 size is fixed by the standard; * smaller blocks are possible, but never larger. */ WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS); mr = RD4(sc,MCI_MR) & ~MCI_MR_BLKLEN; mr |= min(data->len, 512) << 16; WR4(sc, MCI_MR, mr | MCI_MR_PDCMODE|MCI_MR_PDCPADV); /* * Set up DMA. * * Use bounce buffers even if we don't need to byteswap, because doing * multi-block IO with large DMA buffers is way fast (compared to * single-block IO), even after incurring the overhead of also copying * from/to the caller's buffers (which may be in non-contiguous physical * pages). * * In an ideal non-byteswap world we could create a dma tag that allows * for discontiguous segments and do the IO directly from/to the * caller's buffer(s), using ENDRX/ENDTX interrupts to chain the * discontiguous buffers through the PDC. Someday. * * If a read is bigger than 2k, split it in half so that we can start * byte-swapping the first half while the second half is on the wire. * It would be best if we could split it into 8k chunks, but we can't * always keep up with the byte-swapping due to other system activity, * and if an RXBUFF interrupt happens while we're still handling the * byte-swap from the prior buffer (IE, we haven't returned from * handling the prior interrupt yet), then data will get dropped on the * floor and we can't easily recover from that. The right fix for that * would be to have the interrupt handling only keep the DMA flowing and * enqueue filled buffers to be byte-swapped in a non-interrupt context. * Even that won't work on the write side of things though; in that * context we have to have all the data ready to go before starting the * dma. * * XXX what about stream transfers? */ sc->xfer_offset = 0; sc->bbuf_curidx = 0; if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE)) { uint32_t len; uint32_t remaining = data->len; bus_addr_t paddr; int err; if (remaining > (BBCOUNT*BBSIZE)) panic("IO read size exceeds MAXDATA\n"); if (data->flags & MMC_DATA_READ) { if (remaining > 2048) // XXX len = remaining / 2; else len = remaining; err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0], sc->bbuf_vaddr[0], len, at91_mci_getaddr, &paddr, BUS_DMA_NOWAIT); if (err != 0) panic("IO read dmamap_load failed\n"); bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0], BUS_DMASYNC_PREREAD); WR4(sc, PDC_RPR, paddr); WR4(sc, PDC_RCR, len / 4); sc->bbuf_len[0] = len; remaining -= len; if (remaining == 0) { sc->bbuf_len[1] = 0; } else { len = remaining; err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1], sc->bbuf_vaddr[1], len, at91_mci_getaddr, &paddr, BUS_DMA_NOWAIT); if (err != 0) panic("IO read dmamap_load failed\n"); bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1], BUS_DMASYNC_PREREAD); WR4(sc, PDC_RNPR, paddr); WR4(sc, PDC_RNCR, len / 4); sc->bbuf_len[1] = len; remaining -= len; } WR4(sc, PDC_PTCR, PDC_PTCR_RXTEN); } else { len = min(BBSIZE, remaining); at91_bswap_buf(sc, sc->bbuf_vaddr[0], data->data, len); err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0], sc->bbuf_vaddr[0], len, at91_mci_getaddr, &paddr, BUS_DMA_NOWAIT); if (err != 0) panic("IO write dmamap_load failed\n"); bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0], BUS_DMASYNC_PREWRITE); /* * Erratum workaround: PDC transfer length on a write * must not be smaller than 12 bytes (3 words); only * blklen bytes (set above) are actually transferred. */ WR4(sc, PDC_TPR,paddr); WR4(sc, PDC_TCR, (len < 12) ? 3 : len / 4); sc->bbuf_len[0] = len; remaining -= len; if (remaining == 0) { sc->bbuf_len[1] = 0; } else { len = remaining; at91_bswap_buf(sc, sc->bbuf_vaddr[1], ((char *)data->data)+BBSIZE, len); err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1], sc->bbuf_vaddr[1], len, at91_mci_getaddr, &paddr, BUS_DMA_NOWAIT); if (err != 0) panic("IO write dmamap_load failed\n"); bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1], BUS_DMASYNC_PREWRITE); WR4(sc, PDC_TNPR, paddr); WR4(sc, PDC_TNCR, (len < 12) ? 3 : len / 4); sc->bbuf_len[1] = len; remaining -= len; } /* do not enable PDC xfer until CMDRDY asserted */ } data->xfer_len = 0; /* XXX what's this? appears to be unused. */ } if (mci_debug) printf("CMDR %x (opcode %d) ARGR %x with data len %d\n", cmdr, cmd->opcode, cmd->arg, cmd->data->len); WR4(sc, MCI_ARGR, cmd->arg); WR4(sc, MCI_CMDR, cmdr); WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_CMDRDY); } static void at91_mci_next_operation(struct at91_mci_softc *sc) { struct mmc_request *req; req = sc->req; if (req == NULL) return; if (sc->flags & PENDING_CMD) { sc->flags &= ~PENDING_CMD; at91_mci_start_cmd(sc, req->cmd); return; } else if (sc->flags & PENDING_STOP) { sc->flags &= ~PENDING_STOP; at91_mci_start_cmd(sc, req->stop); return; } WR4(sc, MCI_IDR, 0xffffffff); sc->req = NULL; sc->curcmd = NULL; //printf("req done\n"); req->done(req); } static int at91_mci_request(device_t brdev, device_t reqdev, struct mmc_request *req) { struct at91_mci_softc *sc = device_get_softc(brdev); AT91_MCI_LOCK(sc); if (sc->req != NULL) { AT91_MCI_UNLOCK(sc); return (EBUSY); } //printf("new req\n"); sc->req = req; sc->flags = PENDING_CMD; if (sc->req->stop) sc->flags |= PENDING_STOP; at91_mci_next_operation(sc); AT91_MCI_UNLOCK(sc); return (0); } static int at91_mci_get_ro(device_t brdev, device_t reqdev) { return (0); } static int at91_mci_acquire_host(device_t brdev, device_t reqdev) { struct at91_mci_softc *sc = device_get_softc(brdev); int err = 0; AT91_MCI_LOCK(sc); while (sc->bus_busy) msleep(sc, &sc->sc_mtx, PZERO, "mciah", hz / 5); sc->bus_busy++; AT91_MCI_UNLOCK(sc); return (err); } static int at91_mci_release_host(device_t brdev, device_t reqdev) { struct at91_mci_softc *sc = device_get_softc(brdev); AT91_MCI_LOCK(sc); sc->bus_busy--; wakeup(sc); AT91_MCI_UNLOCK(sc); return (0); } static void at91_mci_read_done(struct at91_mci_softc *sc, uint32_t sr) { struct mmc_command *cmd = sc->curcmd; char * dataptr = (char *)cmd->data->data; uint32_t curidx = sc->bbuf_curidx; uint32_t len = sc->bbuf_len[curidx]; /* * We arrive here when a DMA transfer for a read is done, whether it's * a single or multi-block read. * * We byte-swap the buffer that just completed, and if that is the * last buffer that's part of this read then we move on to the next * operation, otherwise we wait for another ENDRX for the next bufer. */ bus_dmamap_sync(sc->dmatag, sc->bbuf_map[curidx], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->dmatag, sc->bbuf_map[curidx]); at91_bswap_buf(sc, dataptr + sc->xfer_offset, sc->bbuf_vaddr[curidx], len); if (mci_debug) { printf("read done sr %x curidx %d len %d xfer_offset %d\n", sr, curidx, len, sc->xfer_offset); } sc->xfer_offset += len; sc->bbuf_curidx = !curidx; /* swap buffers */ /* * If we've transferred all the data, move on to the next operation. * * If we're still transferring the last buffer, RNCR is already zero but * we have to write a zero anyway to clear the ENDRX status so we don't * re-interrupt until the last buffer is done. */ if (sc->xfer_offset == cmd->data->len) { WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS); cmd->error = MMC_ERR_NONE; at91_mci_next_operation(sc); } else { WR4(sc, PDC_RNCR, 0); WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_ENDRX); } } static void at91_mci_write_done(struct at91_mci_softc *sc, uint32_t sr) { struct mmc_command *cmd = sc->curcmd; /* * We arrive here when the entire DMA transfer for a write is done, * whether it's a single or multi-block write. If it's multi-block we * have to immediately move on to the next operation which is to send * the stop command. If it's a single-block transfer we need to wait * for NOTBUSY, but if that's already asserted we can avoid another * interrupt and just move on to completing the request right away. */ WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS); bus_dmamap_sync(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx]); if ((cmd->data->flags & MMC_DATA_MULTI) || (sr & MCI_SR_NOTBUSY)) { cmd->error = MMC_ERR_NONE; at91_mci_next_operation(sc); } else { WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY); } } static void at91_mci_notbusy(struct at91_mci_softc *sc) { struct mmc_command *cmd = sc->curcmd; /* * We arrive here by either completion of a single-block write, or * completion of the stop command that ended a multi-block write (and, * I suppose, after a card-select or erase, but I haven't tested * those). Anyway, we're done and it's time to move on to the next * command. */ cmd->error = MMC_ERR_NONE; at91_mci_next_operation(sc); } static void at91_mci_stop_done(struct at91_mci_softc *sc, uint32_t sr) { struct mmc_command *cmd = sc->curcmd; /* * We arrive here after receiving CMDRDY for a MMC_STOP_TRANSMISSION * command. Depending on the operation being stopped, we may have to * do some unusual things to work around hardware bugs. */ /* * This is known to be true of at91rm9200 hardware; it may or may not * apply to more recent chips: * * After stopping a multi-block write, the NOTBUSY bit in MCI_SR does * not properly reflect the actual busy state of the card as signaled * on the DAT0 line; it always claims the card is not-busy. If we * believe that and let operations continue, following commands will * fail with response timeouts (except of course MMC_SEND_STATUS -- it * indicates the card is busy in the PRG state, which was the smoking * gun that showed MCI_SR NOTBUSY was not tracking DAT0 correctly). * * The atmel docs are emphatic: "This flag [NOTBUSY] must be used only * for Write Operations." I guess technically since we sent a stop * it's not a write operation anymore. But then just what did they * think it meant for the stop command to have "...an optional busy * signal transmitted on the data line" according to the SD spec? * * I tried a variety of things to un-wedge the MCI and get the status * register to reflect NOTBUSY correctly again, but the only thing * that worked was a full device reset. It feels like an awfully big * hammer, but doing a full reset after every multiblock write is * still faster than doing single-block IO (by almost two orders of * magnitude: 20KB/sec improves to about 1.8MB/sec best case). * * After doing the reset, wait for a NOTBUSY interrupt before * continuing with the next operation. * * This workaround breaks multiwrite on the rev2xx parts, but some other * workaround is needed. */ if ((sc->flags & CMD_MULTIWRITE) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) { at91_mci_reset(sc); WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY); return; } /* * This is known to be true of at91rm9200 hardware; it may or may not * apply to more recent chips: * * After stopping a multi-block read, loop to read and discard any * data that coasts in after we sent the stop command. The docs don't * say anything about it, but empirical testing shows that 1-3 * additional words of data get buffered up in some unmentioned * internal fifo and if we don't read and discard them here they end * up on the front of the next read DMA transfer we do. * * This appears to be unnecessary for rev2xx parts. */ if ((sc->flags & CMD_MULTIREAD) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) { uint32_t sr; int count = 0; do { sr = RD4(sc, MCI_SR); if (sr & MCI_SR_RXRDY) { RD4(sc, MCI_RDR); ++count; } } while (sr & MCI_SR_RXRDY); at91_mci_reset(sc); } cmd->error = MMC_ERR_NONE; at91_mci_next_operation(sc); } static void at91_mci_cmdrdy(struct at91_mci_softc *sc, uint32_t sr) { struct mmc_command *cmd = sc->curcmd; int i; if (cmd == NULL) return; /* * We get here at the end of EVERY command. We retrieve the command * response (if any) then decide what to do next based on the command. */ if (cmd->flags & MMC_RSP_PRESENT) { for (i = 0; i < ((cmd->flags & MMC_RSP_136) ? 4 : 1); i++) { cmd->resp[i] = RD4(sc, MCI_RSPR + i * 4); if (mci_debug) printf("RSPR[%d] = %x sr=%x\n", i, cmd->resp[i], sr); } } /* * If this was a stop command, go handle the various special * conditions (read: bugs) that have to be dealt with following a stop. */ if (cmd->opcode == MMC_STOP_TRANSMISSION) { at91_mci_stop_done(sc, sr); return; } /* * If this command can continue to assert BUSY beyond the response then * we need to wait for NOTBUSY before the command is really done. * * Note that this may not work properly on the at91rm9200. It certainly * doesn't work for the STOP command that follows a multi-block write, * so post-stop CMDRDY is handled separately; see the special handling * in at91_mci_stop_done(). * * Beside STOP, there are other R1B-type commands that use the busy * signal after CMDRDY: CMD7 (card select), CMD28-29 (write protect), * CMD38 (erase). I haven't tested any of them, but I rather expect * them all to have the same sort of problem with MCI_SR not actually * reflecting the state of the DAT0-line busy indicator. So this code * may need to grow some sort of special handling for them too. (This * just in: CMD7 isn't a problem right now because dev/mmc.c incorrectly * sets the response flags to R1 rather than R1B.) XXX */ if ((cmd->flags & MMC_RSP_BUSY)) { WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY); return; } /* * If there is a data transfer with this command, then... * - If it's a read, we need to wait for ENDRX. * - If it's a write, now is the time to enable the PDC, and we need * to wait for a BLKE that follows a TXBUFE, because if we're doing * a split transfer we get a BLKE after the first half (when TPR/TCR * get loaded from TNPR/TNCR). So first we wait for the TXBUFE, and * the handling for that interrupt will then invoke the wait for the * subsequent BLKE which indicates actual completion. */ if (cmd->data) { uint32_t ier; if (cmd->data->flags & MMC_DATA_READ) { ier = MCI_SR_ENDRX; } else { ier = MCI_SR_TXBUFE; WR4(sc, PDC_PTCR, PDC_PTCR_TXTEN); } WR4(sc, MCI_IER, MCI_SR_ERROR | ier); return; } /* * If we made it to here, we don't need to wait for anything more for * the current command, move on to the next command (will complete the * request if there is no next command). */ cmd->error = MMC_ERR_NONE; at91_mci_next_operation(sc); } static void at91_mci_intr(void *arg) { struct at91_mci_softc *sc = (struct at91_mci_softc*)arg; struct mmc_command *cmd = sc->curcmd; uint32_t sr, isr; AT91_MCI_LOCK(sc); sr = RD4(sc, MCI_SR); isr = sr & RD4(sc, MCI_IMR); if (mci_debug) printf("i 0x%x sr 0x%x\n", isr, sr); /* * All interrupts are one-shot; disable it now. * The next operation will re-enable whatever interrupts it wants. */ WR4(sc, MCI_IDR, isr); if (isr & MCI_SR_ERROR) { if (isr & (MCI_SR_RTOE | MCI_SR_DTOE)) cmd->error = MMC_ERR_TIMEOUT; else if (isr & (MCI_SR_RCRCE | MCI_SR_DCRCE)) cmd->error = MMC_ERR_BADCRC; else if (isr & (MCI_SR_OVRE | MCI_SR_UNRE)) cmd->error = MMC_ERR_FIFO; else cmd->error = MMC_ERR_FAILED; /* * CMD8 is used to probe for SDHC cards, a standard SD card * will get a response timeout; don't report it because it's a * normal and expected condition. One might argue that all * error reporting should be left to higher levels, but when * they report at all it's always EIO, which isn't very * helpful. XXX bootverbose? */ if (cmd->opcode != 8) { device_printf(sc->dev, "IO error; status MCI_SR = 0x%b cmd opcode = %d%s\n", sr, MCI_SR_BITSTRING, cmd->opcode, (cmd->opcode != 12) ? "" : (sc->flags & CMD_MULTIREAD) ? " after read" : " after write"); /* XXX not sure RTOE needs a full reset, just a retry */ at91_mci_reset(sc); } at91_mci_next_operation(sc); } else { if (isr & MCI_SR_TXBUFE) { // printf("TXBUFE\n"); /* * We need to wait for a BLKE that follows TXBUFE * (intermediate BLKEs might happen after ENDTXes if * we're chaining multiple buffers). If BLKE is also * asserted at the time we get TXBUFE, we can avoid * another interrupt and process it right away, below. */ if (sr & MCI_SR_BLKE) isr |= MCI_SR_BLKE; else WR4(sc, MCI_IER, MCI_SR_BLKE); } if (isr & MCI_SR_RXBUFF) { // printf("RXBUFF\n"); } if (isr & MCI_SR_ENDTX) { // printf("ENDTX\n"); } if (isr & MCI_SR_ENDRX) { // printf("ENDRX\n"); at91_mci_read_done(sc, sr); } if (isr & MCI_SR_NOTBUSY) { // printf("NOTBUSY\n"); at91_mci_notbusy(sc); } if (isr & MCI_SR_DTIP) { // printf("Data transfer in progress\n"); } if (isr & MCI_SR_BLKE) { // printf("Block transfer end\n"); at91_mci_write_done(sc, sr); } if (isr & MCI_SR_TXRDY) { // printf("Ready to transmit\n"); } if (isr & MCI_SR_RXRDY) { // printf("Ready to receive\n"); } if (isr & MCI_SR_CMDRDY) { // printf("Command ready\n"); at91_mci_cmdrdy(sc, sr); } } AT91_MCI_UNLOCK(sc); } static int at91_mci_read_ivar(device_t bus, device_t child, int which, uintptr_t *result) { struct at91_mci_softc *sc = device_get_softc(bus); switch (which) { default: return (EINVAL); case MMCBR_IVAR_BUS_MODE: *(int *)result = sc->host.ios.bus_mode; break; case MMCBR_IVAR_BUS_WIDTH: *(int *)result = sc->host.ios.bus_width; break; case MMCBR_IVAR_CHIP_SELECT: *(int *)result = sc->host.ios.chip_select; break; case MMCBR_IVAR_CLOCK: *(int *)result = sc->host.ios.clock; break; case MMCBR_IVAR_F_MIN: *(int *)result = sc->host.f_min; break; case MMCBR_IVAR_F_MAX: *(int *)result = sc->host.f_max; break; case MMCBR_IVAR_HOST_OCR: *(int *)result = sc->host.host_ocr; break; case MMCBR_IVAR_MODE: *(int *)result = sc->host.mode; break; case MMCBR_IVAR_OCR: *(int *)result = sc->host.ocr; break; case MMCBR_IVAR_POWER_MODE: *(int *)result = sc->host.ios.power_mode; break; case MMCBR_IVAR_VDD: *(int *)result = sc->host.ios.vdd; break; case MMCBR_IVAR_CAPS: if (sc->has_4wire) { sc->sc_cap |= CAP_HAS_4WIRE; sc->host.caps |= MMC_CAP_4_BIT_DATA; } else { sc->sc_cap &= ~CAP_HAS_4WIRE; sc->host.caps &= ~MMC_CAP_4_BIT_DATA; } *(int *)result = sc->host.caps; break; case MMCBR_IVAR_MAX_DATA: /* * Something is wrong with the 2x parts and multiblock, so * just do 1 block at a time for now, which really kills * performance. */ if (sc->sc_cap & CAP_MCI1_REV2XX) *(int *)result = 1; else *(int *)result = MAX_BLOCKS; break; } return (0); } static int at91_mci_write_ivar(device_t bus, device_t child, int which, uintptr_t value) { struct at91_mci_softc *sc = device_get_softc(bus); switch (which) { default: return (EINVAL); case MMCBR_IVAR_BUS_MODE: sc->host.ios.bus_mode = value; break; case MMCBR_IVAR_BUS_WIDTH: sc->host.ios.bus_width = value; break; case MMCBR_IVAR_CHIP_SELECT: sc->host.ios.chip_select = value; break; case MMCBR_IVAR_CLOCK: sc->host.ios.clock = value; break; case MMCBR_IVAR_MODE: sc->host.mode = value; break; case MMCBR_IVAR_OCR: sc->host.ocr = value; break; case MMCBR_IVAR_POWER_MODE: sc->host.ios.power_mode = value; break; case MMCBR_IVAR_VDD: sc->host.ios.vdd = value; break; /* These are read-only */ case MMCBR_IVAR_CAPS: case MMCBR_IVAR_HOST_OCR: case MMCBR_IVAR_F_MIN: case MMCBR_IVAR_F_MAX: case MMCBR_IVAR_MAX_DATA: return (EINVAL); } return (0); } static device_method_t at91_mci_methods[] = { /* device_if */ DEVMETHOD(device_probe, at91_mci_probe), DEVMETHOD(device_attach, at91_mci_attach), DEVMETHOD(device_detach, at91_mci_detach), /* Bus interface */ DEVMETHOD(bus_read_ivar, at91_mci_read_ivar), DEVMETHOD(bus_write_ivar, at91_mci_write_ivar), /* mmcbr_if */ DEVMETHOD(mmcbr_update_ios, at91_mci_update_ios), DEVMETHOD(mmcbr_request, at91_mci_request), DEVMETHOD(mmcbr_get_ro, at91_mci_get_ro), DEVMETHOD(mmcbr_acquire_host, at91_mci_acquire_host), DEVMETHOD(mmcbr_release_host, at91_mci_release_host), DEVMETHOD_END }; static driver_t at91_mci_driver = { "at91_mci", at91_mci_methods, sizeof(struct at91_mci_softc), }; static devclass_t at91_mci_devclass; #ifdef FDT DRIVER_MODULE(at91_mci, simplebus, at91_mci_driver, at91_mci_devclass, NULL, NULL); #else DRIVER_MODULE(at91_mci, atmelarm, at91_mci_driver, at91_mci_devclass, NULL, NULL); #endif DRIVER_MODULE(mmc, at91_mci, mmc_driver, mmc_devclass, NULL, NULL); MODULE_DEPEND(at91_mci, mmc, 1, 1, 1); Index: head/sys/arm/at91/at91_pio.c =================================================================== --- head/sys/arm/at91/at91_pio.c (revision 298054) +++ head/sys/arm/at91/at91_pio.c (revision 298055) @@ -1,654 +1,654 @@ /*- * Copyright (c) 2006 M. Warner Losh. All rights reserved. * Copyright (C) 2012 Ian Lepore. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include #endif #define MAX_CHANGE 64 struct at91_pio_softc { device_t dev; /* Myself */ void *intrhand; /* Interrupt handle */ struct resource *irq_res; /* IRQ resource */ struct resource *mem_res; /* Memory resource */ struct sx sc_mtx; /* basically a perimeter lock */ struct cdev *cdev; struct selinfo selp; int buflen; uint8_t buf[MAX_CHANGE]; int flags; #define OPENED 1 }; static inline uint32_t RD4(struct at91_pio_softc *sc, bus_size_t off) { return (bus_read_4(sc->mem_res, off)); } static inline void WR4(struct at91_pio_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off, val); } #define AT91_PIO_LOCK(_sc) sx_xlock(&(_sc)->sc_mtx) #define AT91_PIO_UNLOCK(_sc) sx_xunlock(&(_sc)->sc_mtx) #define AT91_PIO_LOCK_INIT(_sc) \ sx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev)) #define AT91_PIO_LOCK_DESTROY(_sc) sx_destroy(&_sc->sc_mtx); #define AT91_PIO_ASSERT_LOCKED(_sc) sx_assert(&_sc->sc_mtx, SA_XLOCKED); #define AT91_PIO_ASSERT_UNLOCKED(_sc) sx_assert(&_sc->sc_mtx, SA_UNLOCKED); #define CDEV2SOFTC(dev) ((dev)->si_drv1) static devclass_t at91_pio_devclass; /* bus entry points */ static int at91_pio_probe(device_t dev); static int at91_pio_attach(device_t dev); static int at91_pio_detach(device_t dev); static void at91_pio_intr(void *); /* helper routines */ static int at91_pio_activate(device_t dev); static void at91_pio_deactivate(device_t dev); /* cdev routines */ static d_open_t at91_pio_open; static d_close_t at91_pio_close; static d_read_t at91_pio_read; static d_poll_t at91_pio_poll; static d_ioctl_t at91_pio_ioctl; static struct cdevsw at91_pio_cdevsw = { .d_version = D_VERSION, .d_open = at91_pio_open, .d_close = at91_pio_close, .d_read = at91_pio_read, .d_poll = at91_pio_poll, .d_ioctl = at91_pio_ioctl }; static int at91_pio_probe(device_t dev) { const char *name; #ifdef FDT if (!ofw_bus_is_compatible(dev, "atmel,at91rm9200-gpio")) return (ENXIO); #endif switch (device_get_unit(dev)) { case 0: name = "PIOA"; break; case 1: name = "PIOB"; break; case 2: name = "PIOC"; break; case 3: name = "PIOD"; break; case 4: name = "PIOE"; break; case 5: name = "PIOF"; break; default: name = "PIO"; break; } device_set_desc(dev, name); return (0); } static int at91_pio_attach(device_t dev) { struct at91_pio_softc *sc; int err; sc = device_get_softc(dev); sc->dev = dev; err = at91_pio_activate(dev); if (err) goto out; if (bootverbose) device_printf(dev, "ABSR: %#x OSR: %#x PSR:%#x ODSR: %#x\n", RD4(sc, PIO_ABSR), RD4(sc, PIO_OSR), RD4(sc, PIO_PSR), RD4(sc, PIO_ODSR)); AT91_PIO_LOCK_INIT(sc); /* * Activate the interrupt, but disable all interrupts in the hardware. */ WR4(sc, PIO_IDR, 0xffffffff); err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC, NULL, at91_pio_intr, sc, &sc->intrhand); if (err) { AT91_PIO_LOCK_DESTROY(sc); goto out; } sc->cdev = make_dev(&at91_pio_cdevsw, device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "pio%d", device_get_unit(dev)); if (sc->cdev == NULL) { err = ENOMEM; goto out; } sc->cdev->si_drv1 = sc; out: if (err) at91_pio_deactivate(dev); return (err); } static int at91_pio_detach(device_t dev) { return (EBUSY); /* XXX */ } static int at91_pio_activate(device_t dev) { struct at91_pio_softc *sc; int rid; sc = device_get_softc(dev); rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) goto errout; rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | RF_SHAREABLE); if (sc->irq_res == NULL) goto errout; return (0); errout: at91_pio_deactivate(dev); return (ENOMEM); } static void at91_pio_deactivate(device_t dev) { struct at91_pio_softc *sc; sc = device_get_softc(dev); if (sc->intrhand) bus_teardown_intr(dev, sc->irq_res, sc->intrhand); - sc->intrhand = 0; + sc->intrhand = NULL; bus_generic_detach(sc->dev); if (sc->mem_res) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem_res), sc->mem_res); - sc->mem_res = 0; + sc->mem_res = NULL; if (sc->irq_res) bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res), sc->irq_res); - sc->irq_res = 0; + sc->irq_res = NULL; } static void at91_pio_intr(void *xsc) { struct at91_pio_softc *sc = xsc; uint32_t status; int i; /* Reading the status also clears the interrupt. */ status = RD4(sc, PIO_ISR) & RD4(sc, PIO_IMR); if (status != 0) { AT91_PIO_LOCK(sc); for (i = 0; status != 0 && sc->buflen < MAX_CHANGE; ++i) { if (status & 1) sc->buf[sc->buflen++] = (uint8_t)i; status >>= 1; } AT91_PIO_UNLOCK(sc); wakeup(sc); selwakeup(&sc->selp); } } static int at91_pio_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct at91_pio_softc *sc; sc = CDEV2SOFTC(dev); AT91_PIO_LOCK(sc); if (!(sc->flags & OPENED)) { sc->flags |= OPENED; } AT91_PIO_UNLOCK(sc); return (0); } static int at91_pio_close(struct cdev *dev, int fflag, int devtype, struct thread *td) { struct at91_pio_softc *sc; sc = CDEV2SOFTC(dev); AT91_PIO_LOCK(sc); sc->flags &= ~OPENED; AT91_PIO_UNLOCK(sc); return (0); } static int at91_pio_poll(struct cdev *dev, int events, struct thread *td) { struct at91_pio_softc *sc; int revents = 0; sc = CDEV2SOFTC(dev); AT91_PIO_LOCK(sc); if (events & (POLLIN | POLLRDNORM)) { if (sc->buflen != 0) revents |= events & (POLLIN | POLLRDNORM); else selrecord(td, &sc->selp); } AT91_PIO_UNLOCK(sc); return (revents); } static int at91_pio_read(struct cdev *dev, struct uio *uio, int flag) { struct at91_pio_softc *sc; int err, ret, len; sc = CDEV2SOFTC(dev); AT91_PIO_LOCK(sc); err = 0; ret = 0; while (uio->uio_resid) { while (sc->buflen == 0 && err == 0) err = msleep(sc, &sc->sc_mtx, PCATCH | PZERO, "prd", 0); if (err != 0) break; len = MIN(sc->buflen, uio->uio_resid); err = uiomove(sc->buf, len, uio); if (err != 0) break; /* * If we read the whole thing no datacopy is needed, * otherwise we move the data down. */ ret += len; if (sc->buflen == len) sc->buflen = 0; else { bcopy(sc->buf + len, sc->buf, sc->buflen - len); sc->buflen -= len; } /* If there's no data left, end the read. */ if (sc->buflen == 0) break; } AT91_PIO_UNLOCK(sc); return (err); } static void at91_pio_bang32(struct at91_pio_softc *sc, uint32_t bits, uint32_t datapin, uint32_t clockpin) { int i; for (i = 0; i < 32; i++) { if (bits & 0x80000000) WR4(sc, PIO_SODR, datapin); else WR4(sc, PIO_CODR, datapin); bits <<= 1; WR4(sc, PIO_CODR, clockpin); WR4(sc, PIO_SODR, clockpin); } } static void at91_pio_bang(struct at91_pio_softc *sc, uint8_t bits, uint32_t bitcount, uint32_t datapin, uint32_t clockpin) { int i; for (i = 0; i < bitcount; i++) { if (bits & 0x80) WR4(sc, PIO_SODR, datapin); else WR4(sc, PIO_CODR, datapin); bits <<= 1; WR4(sc, PIO_CODR, clockpin); WR4(sc, PIO_SODR, clockpin); } } static int at91_pio_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct at91_pio_softc *sc; struct at91_gpio_cfg *cfg; struct at91_gpio_info *info; struct at91_gpio_bang *bang; struct at91_gpio_bang_many *bangmany; uint32_t i, num; uint8_t many[1024], *walker; int err; int bitcount; sc = CDEV2SOFTC(dev); switch(cmd) { case AT91_GPIO_SET: /* turn bits on */ WR4(sc, PIO_SODR, *(uint32_t *)data); return (0); case AT91_GPIO_CLR: /* turn bits off */ WR4(sc, PIO_CODR, *(uint32_t *)data); return (0); case AT91_GPIO_READ: /* Get the status of input bits */ *(uint32_t *)data = RD4(sc, PIO_PDSR); return (0); case AT91_GPIO_CFG: /* Configure AT91_GPIO pins */ cfg = (struct at91_gpio_cfg *)data; if (cfg->cfgmask & AT91_GPIO_CFG_INPUT) { WR4(sc, PIO_OER, cfg->iomask & ~cfg->input); WR4(sc, PIO_ODR, cfg->iomask & cfg->input); } if (cfg->cfgmask & AT91_GPIO_CFG_HI_Z) { WR4(sc, PIO_MDDR, cfg->iomask & ~cfg->hi_z); WR4(sc, PIO_MDER, cfg->iomask & cfg->hi_z); } if (cfg->cfgmask & AT91_GPIO_CFG_PULLUP) { WR4(sc, PIO_PUDR, cfg->iomask & ~cfg->pullup); WR4(sc, PIO_PUER, cfg->iomask & cfg->pullup); } if (cfg->cfgmask & AT91_GPIO_CFG_GLITCH) { WR4(sc, PIO_IFDR, cfg->iomask & ~cfg->glitch); WR4(sc, PIO_IFER, cfg->iomask & cfg->glitch); } if (cfg->cfgmask & AT91_GPIO_CFG_GPIO) { WR4(sc, PIO_PDR, cfg->iomask & ~cfg->gpio); WR4(sc, PIO_PER, cfg->iomask & cfg->gpio); } if (cfg->cfgmask & AT91_GPIO_CFG_PERIPH) { WR4(sc, PIO_ASR, cfg->iomask & ~cfg->periph); WR4(sc, PIO_BSR, cfg->iomask & cfg->periph); } if (cfg->cfgmask & AT91_GPIO_CFG_INTR) { WR4(sc, PIO_IDR, cfg->iomask & ~cfg->intr); WR4(sc, PIO_IER, cfg->iomask & cfg->intr); } return (0); case AT91_GPIO_BANG: bang = (struct at91_gpio_bang *)data; at91_pio_bang32(sc, bang->bits, bang->datapin, bang->clockpin); return (0); case AT91_GPIO_BANG_MANY: bangmany = (struct at91_gpio_bang_many *)data; walker = (uint8_t *)bangmany->bits; bitcount = bangmany->numbits; while (bitcount > 0) { num = MIN((bitcount + 7) / 8, sizeof(many)); err = copyin(walker, many, num); if (err) return err; for (i = 0; i < num && bitcount > 0; i++, bitcount -= 8) if (bitcount >= 8) at91_pio_bang(sc, many[i], 8, bangmany->datapin, bangmany->clockpin); else at91_pio_bang(sc, many[i], bitcount, bangmany->datapin, bangmany->clockpin); walker += num; } return (0); case AT91_GPIO_INFO: /* Learn about this device's AT91_GPIO bits */ info = (struct at91_gpio_info *)data; info->output_status = RD4(sc, PIO_ODSR); info->input_status = RD4(sc, PIO_OSR); info->highz_status = RD4(sc, PIO_MDSR); info->pullup_status = RD4(sc, PIO_PUSR); info->glitch_status = RD4(sc, PIO_IFSR); info->enabled_status = RD4(sc, PIO_PSR); info->periph_status = RD4(sc, PIO_ABSR); info->intr_status = RD4(sc, PIO_IMR); memset(info->extra_status, 0, sizeof(info->extra_status)); return (0); } return (ENOTTY); } /* * The following functions are called early in the boot process, so * don't use bus_space, as that isn't yet available when we need to use * them. */ void at91_pio_use_periph_a(uint32_t pio, uint32_t periph_a_mask, int use_pullup) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); PIO[PIO_ASR / 4] = periph_a_mask; PIO[PIO_PDR / 4] = periph_a_mask; if (use_pullup) PIO[PIO_PUER / 4] = periph_a_mask; else PIO[PIO_PUDR / 4] = periph_a_mask; } void at91_pio_use_periph_b(uint32_t pio, uint32_t periph_b_mask, int use_pullup) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); PIO[PIO_BSR / 4] = periph_b_mask; PIO[PIO_PDR / 4] = periph_b_mask; if (use_pullup) PIO[PIO_PUER / 4] = periph_b_mask; else PIO[PIO_PUDR / 4] = periph_b_mask; } void at91_pio_use_gpio(uint32_t pio, uint32_t gpio_mask) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); PIO[PIO_PER / 4] = gpio_mask; } void at91_pio_gpio_input(uint32_t pio, uint32_t input_enable_mask) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); PIO[PIO_ODR / 4] = input_enable_mask; } void at91_pio_gpio_output(uint32_t pio, uint32_t output_enable_mask, int use_pullup) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); PIO[PIO_OER / 4] = output_enable_mask; if (use_pullup) PIO[PIO_PUER / 4] = output_enable_mask; else PIO[PIO_PUDR / 4] = output_enable_mask; } void at91_pio_gpio_high_z(uint32_t pio, uint32_t high_z_mask, int enable) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); if (enable) PIO[PIO_MDER / 4] = high_z_mask; else PIO[PIO_MDDR / 4] = high_z_mask; } void at91_pio_gpio_set(uint32_t pio, uint32_t data_mask) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); PIO[PIO_SODR / 4] = data_mask; } void at91_pio_gpio_clear(uint32_t pio, uint32_t data_mask) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); PIO[PIO_CODR / 4] = data_mask; } uint32_t at91_pio_gpio_get(uint32_t pio, uint32_t data_mask) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); return (PIO[PIO_PDSR / 4] & data_mask); } void at91_pio_gpio_set_deglitch(uint32_t pio, uint32_t data_mask, int use_deglitch) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); if (use_deglitch) PIO[PIO_IFER / 4] = data_mask; else PIO[PIO_IFDR / 4] = data_mask; } void at91_pio_gpio_pullup(uint32_t pio, uint32_t data_mask, int do_pullup) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); if (do_pullup) PIO[PIO_PUER / 4] = data_mask; else PIO[PIO_PUDR / 4] = data_mask; } void at91_pio_gpio_set_interrupt(uint32_t pio, uint32_t data_mask, int enable_interrupt) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); if (enable_interrupt) PIO[PIO_IER / 4] = data_mask; else PIO[PIO_IDR / 4] = data_mask; } uint32_t at91_pio_gpio_clear_interrupt(uint32_t pio) { uint32_t *PIO = (uint32_t *)(AT91_BASE + pio); /* Reading this register will clear the interrupts. */ return (PIO[PIO_ISR / 4]); } static void at91_pio_new_pass(device_t dev) { device_printf(dev, "Pass %d\n", bus_current_pass); } static device_method_t at91_pio_methods[] = { /* Device interface */ DEVMETHOD(device_probe, at91_pio_probe), DEVMETHOD(device_attach, at91_pio_attach), DEVMETHOD(device_detach, at91_pio_detach), DEVMETHOD(bus_new_pass, at91_pio_new_pass), DEVMETHOD_END }; static driver_t at91_pio_driver = { "at91_pio", at91_pio_methods, sizeof(struct at91_pio_softc), }; #ifdef FDT EARLY_DRIVER_MODULE(at91_pio, at91_pinctrl, at91_pio_driver, at91_pio_devclass, NULL, NULL, BUS_PASS_INTERRUPT); #else DRIVER_MODULE(at91_pio, atmelarm, at91_pio_driver, at91_pio_devclass, NULL, NULL); #endif Index: head/sys/arm/at91/at91_pmc.c =================================================================== --- head/sys/arm/at91/at91_pmc.c (revision 298054) +++ head/sys/arm/at91/at91_pmc.c (revision 298055) @@ -1,720 +1,720 @@ /*- * Copyright (c) 2006 M. Warner Losh. All rights reserved. * Copyright (c) 2010 Greg Ansley. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include #endif static struct at91_pmc_softc { bus_space_tag_t sc_st; bus_space_handle_t sc_sh; struct resource *mem_res; /* Memory resource */ device_t dev; } *pmc_softc; static uint32_t pllb_init; MALLOC_DECLARE(M_PMC); MALLOC_DEFINE(M_PMC, "at91_pmc_clocks", "AT91 PMC Clock descriptors"); #define AT91_PMC_BASE 0xffffc00 static void at91_pmc_set_pllb_mode(struct at91_pmc_clock *, int); static void at91_pmc_set_upll_mode(struct at91_pmc_clock *, int); static void at91_pmc_set_sys_mode(struct at91_pmc_clock *, int); static void at91_pmc_set_periph_mode(struct at91_pmc_clock *, int); static void at91_pmc_clock_alias(const char *name, const char *alias); static struct at91_pmc_clock slck = { .name = "slck", /* 32,768 Hz slow clock */ .hz = 32768, .refcnt = 1, .id = 0, .primary = 1, }; /* * NOTE: Clocks for "ordinary peripheral" devices e.g. spi0, udp0, uhp0 etc. * are now created automatically. Only "system" clocks need be defined here. */ static struct at91_pmc_clock main_ck = { .name = "main", /* Main clock */ .refcnt = 0, .id = 1, .primary = 1, .pmc_mask = PMC_IER_MOSCS, }; static struct at91_pmc_clock plla = { .name = "plla", /* PLLA Clock, used for CPU clocking */ .parent = &main_ck, .refcnt = 1, .id = 0, .primary = 1, .pll = 1, .pmc_mask = PMC_IER_LOCKA, }; static struct at91_pmc_clock pllb = { .name = "pllb", /* PLLB Clock, used for USB functions */ .parent = &main_ck, .refcnt = 0, .id = 0, .primary = 1, .pll = 1, .pmc_mask = PMC_IER_LOCKB, .set_mode = &at91_pmc_set_pllb_mode, }; /* Used by USB on at91sam9g45 */ static struct at91_pmc_clock upll = { .name = "upll", /* UTMI PLL, used for USB functions on 9G45 family */ .parent = &main_ck, .refcnt = 0, .id = 0, .primary = 1, .pll = 1, .pmc_mask = (1 << 6), .set_mode = &at91_pmc_set_upll_mode, }; static struct at91_pmc_clock udpck = { .name = "udpck", .parent = &pllb, .pmc_mask = PMC_SCER_UDP, .set_mode = at91_pmc_set_sys_mode }; static struct at91_pmc_clock uhpck = { .name = "uhpck", .parent = &pllb, .pmc_mask = PMC_SCER_UHP, .set_mode = at91_pmc_set_sys_mode }; static struct at91_pmc_clock mck = { .name = "mck", /* Master (Peripheral) Clock */ .pmc_mask = PMC_IER_MCKRDY, .refcnt = 0, }; static struct at91_pmc_clock cpu = { .name = "cpu", /* CPU Clock */ .parent = &plla, .pmc_mask = PMC_SCER_PCK, .refcnt = 0, }; /* "+32" or the automatic peripheral clocks */ static struct at91_pmc_clock *clock_list[16+32] = { &slck, &main_ck, &plla, &pllb, &upll, &udpck, &uhpck, &mck, &cpu }; static inline uint32_t RD4(struct at91_pmc_softc *sc, bus_size_t off) { if (sc == NULL) { uint32_t *p = (uint32_t *)(AT91_BASE + AT91_PMC_BASE + off); return *p; } return (bus_read_4(sc->mem_res, off)); } static inline void WR4(struct at91_pmc_softc *sc, bus_size_t off, uint32_t val) { if (sc == NULL) { uint32_t *p = (uint32_t *)(AT91_BASE + AT91_PMC_BASE + off); *p = val; } else bus_write_4(sc->mem_res, off, val); } /* * The following is unused currently since we don't ever set the PLLA * frequency of the device. If we did, we'd have to also pay attention * to the ICPLLA bit in the PMC_PLLICPR register for frequencies lower * than ~600MHz, which the PMC code doesn't do right now. */ uint32_t at91_pmc_800mhz_plla_outb(int freq) { uint32_t outa; /* * Set OUTA, per the data sheet. See Table 46-16 titled * PLLA Frequency Regarding ICPLLA and OUTA in the SAM9X25 doc, * Table 46-17 in the SAM9G20 doc, or Table 46-16 in the SAM9G45 doc. * Note: the frequencies overlap by 5MHz, so we add 3 here to * center shoot the transition. */ freq /= 1000000; /* MHz */ if (freq >= 800) freq = 800; freq += 3; /* Allow for overlap. */ outa = 3 - ((freq / 50) & 3); /* 750 / 50 = 7, see table */ return (1 << 29)| (outa << 14); } uint32_t at91_pmc_800mhz_pllb_outb(int freq) { return (0); } void at91_pmc_set_pllb_mode(struct at91_pmc_clock *clk, int on) { struct at91_pmc_softc *sc = pmc_softc; uint32_t value; value = on ? pllb_init : 0; /* * Only write to the register if the value is changing. Besides being * good common sense, this works around RM9200 Errata #26 (CKGR_PLL[AB]R * must not be written with the same value currently in the register). */ if (RD4(sc, CKGR_PLLBR) != value) { WR4(sc, CKGR_PLLBR, value); while (on && (RD4(sc, PMC_SR) & PMC_IER_LOCKB) != PMC_IER_LOCKB) continue; } } static void at91_pmc_set_upll_mode(struct at91_pmc_clock *clk, int on) { struct at91_pmc_softc *sc = pmc_softc; uint32_t value; if (on) { on = PMC_IER_LOCKU; value = CKGR_UCKR_UPLLEN | CKGR_UCKR_BIASEN; } else value = 0; WR4(sc, CKGR_UCKR, RD4(sc, CKGR_UCKR) | value); while ((RD4(sc, PMC_SR) & PMC_IER_LOCKU) != on) continue; WR4(sc, PMC_USB, PMC_USB_USBDIV(9) | PMC_USB_USBS); WR4(sc, PMC_SCER, PMC_SCER_UHP_SAM9); } static void at91_pmc_set_sys_mode(struct at91_pmc_clock *clk, int on) { struct at91_pmc_softc *sc = pmc_softc; WR4(sc, on ? PMC_SCER : PMC_SCDR, clk->pmc_mask); if (on) while ((RD4(sc, PMC_SCSR) & clk->pmc_mask) != clk->pmc_mask) continue; else while ((RD4(sc, PMC_SCSR) & clk->pmc_mask) == clk->pmc_mask) continue; } static void at91_pmc_set_periph_mode(struct at91_pmc_clock *clk, int on) { struct at91_pmc_softc *sc = pmc_softc; WR4(sc, on ? PMC_PCER : PMC_PCDR, clk->pmc_mask); if (on) while ((RD4(sc, PMC_PCSR) & clk->pmc_mask) != clk->pmc_mask) continue; else while ((RD4(sc, PMC_PCSR) & clk->pmc_mask) == clk->pmc_mask) continue; } struct at91_pmc_clock * at91_pmc_clock_add(const char *name, uint32_t irq, struct at91_pmc_clock *parent) { struct at91_pmc_clock *clk; int i, buflen; clk = malloc(sizeof(*clk), M_PMC, M_NOWAIT | M_ZERO); if (clk == NULL) goto err; buflen = strlen(name) + 1; clk->name = malloc(buflen, M_PMC, M_NOWAIT); if (clk->name == NULL) goto err; strlcpy(clk->name, name, buflen); clk->pmc_mask = 1 << irq; clk->set_mode = &at91_pmc_set_periph_mode; if (parent == NULL) clk->parent = &mck; else clk->parent = parent; for (i = 0; i < sizeof(clock_list) / sizeof(clock_list[0]); i++) { if (clock_list[i] == NULL) { clock_list[i] = clk; return (clk); } } err: if (clk != NULL) { if (clk->name != NULL) free(clk->name, M_PMC); free(clk, M_PMC); } panic("could not allocate pmc clock '%s'", name); return (NULL); } static void at91_pmc_clock_alias(const char *name, const char *alias) { struct at91_pmc_clock *clk, *alias_clk; clk = at91_pmc_clock_ref(name); if (clk) alias_clk = at91_pmc_clock_add(alias, 0, clk->parent); if (clk && alias_clk) { alias_clk->hz = clk->hz; alias_clk->pmc_mask = clk->pmc_mask; alias_clk->set_mode = clk->set_mode; } } struct at91_pmc_clock * at91_pmc_clock_ref(const char *name) { int i; for (i = 0; i < sizeof(clock_list) / sizeof(clock_list[0]); i++) { if (clock_list[i] == NULL) break; if (strcmp(name, clock_list[i]->name) == 0) return (clock_list[i]); } return (NULL); } void at91_pmc_clock_deref(struct at91_pmc_clock *clk) { if (clk == NULL) return; } void at91_pmc_clock_enable(struct at91_pmc_clock *clk) { if (clk == NULL) return; /* XXX LOCKING? XXX */ if (clk->parent) at91_pmc_clock_enable(clk->parent); if (clk->refcnt++ == 0 && clk->set_mode) clk->set_mode(clk, 1); } void at91_pmc_clock_disable(struct at91_pmc_clock *clk) { if (clk == NULL) return; /* XXX LOCKING? XXX */ if (--clk->refcnt == 0 && clk->set_mode) clk->set_mode(clk, 0); if (clk->parent) at91_pmc_clock_disable(clk->parent); } static int at91_pmc_pll_rate(struct at91_pmc_clock *clk, uint32_t reg) { uint32_t mul, div, freq; freq = clk->parent->hz; div = (reg >> clk->pll_div_shift) & clk->pll_div_mask; mul = (reg >> clk->pll_mul_shift) & clk->pll_mul_mask; #if 0 printf("pll = (%d / %d) * %d = %d\n", freq, div, mul + 1, (freq/div) * (mul+1)); #endif if (div != 0 && mul != 0) { freq /= div; freq *= mul + 1; } else freq = 0; clk->hz = freq; return (freq); } static uint32_t at91_pmc_pll_calc(struct at91_pmc_clock *clk, uint32_t out_freq) { uint32_t i, div = 0, mul = 0, diff = 1 << 30; unsigned ret = 0x3e00; if (out_freq > clk->pll_max_out) goto fail; for (i = 1; i < 256; i++) { int32_t diff1; uint32_t input, mul1; input = clk->parent->hz / i; if (input < clk->pll_min_in) break; if (input > clk->pll_max_in) continue; mul1 = out_freq / input; if (mul1 > (clk->pll_mul_mask + 1)) continue; if (mul1 == 0) break; diff1 = out_freq - input * mul1; if (diff1 < 0) diff1 = -diff1; if (diff > diff1) { diff = diff1; div = i; mul = mul1; if (diff == 0) break; } } if (diff > (out_freq >> PMC_PLL_SHIFT_TOL)) goto fail; if (clk->set_outb != NULL) ret |= clk->set_outb(out_freq); return (ret | ((mul - 1) << clk->pll_mul_shift) | (div << clk->pll_div_shift)); fail: return (0); } #if !defined(AT91C_MAIN_CLOCK) static const unsigned int at91_main_clock_tbl[] = { 3000000, 3276800, 3686400, 3840000, 4000000, 4433619, 4915200, 5000000, 5242880, 6000000, 6144000, 6400000, 6553600, 7159090, 7372800, 7864320, 8000000, 9830400, 10000000, 11059200, 12000000, 12288000, 13560000, 14318180, 14745600, 16000000, 17344700, 18432000, 20000000 }; #define MAIN_CLOCK_TBL_LEN (sizeof(at91_main_clock_tbl) / sizeof(*at91_main_clock_tbl)) #endif static unsigned int at91_pmc_sense_main_clock(void) { #if !defined(AT91C_MAIN_CLOCK) unsigned int ckgr_val; unsigned int diff, matchdiff, freq; int i; ckgr_val = (RD4(NULL, CKGR_MCFR) & CKGR_MCFR_MAINF_MASK) << 11; /* * Clocks up to 50MHz can be connected to some models. If * the frequency is >= 21MHz, assume that the slow clock can * measure it correctly, and that any error can be adequately * compensated for by roudning to the nearest 500Hz. Users * with fast, or odd-ball clocks will need to set * AT91C_MAIN_CLOCK in the kernel config file. */ if (ckgr_val >= 21000000) return ((ckgr_val + 250) / 500 * 500); /* * Try to find the standard frequency that match best. */ freq = at91_main_clock_tbl[0]; matchdiff = abs(ckgr_val - at91_main_clock_tbl[0]); for (i = 1; i < MAIN_CLOCK_TBL_LEN; i++) { diff = abs(ckgr_val - at91_main_clock_tbl[i]); if (diff < matchdiff) { freq = at91_main_clock_tbl[i]; matchdiff = diff; } } return (freq); #else return (AT91C_MAIN_CLOCK); #endif } void at91_pmc_init_clock(void) { struct at91_pmc_softc *sc = NULL; unsigned int main_clock; uint32_t mckr; uint32_t mdiv; soc_info.soc_data->soc_clock_init(); main_clock = at91_pmc_sense_main_clock(); if (at91_is_sam9() || at91_is_sam9xe()) { uhpck.pmc_mask = PMC_SCER_UHP_SAM9; udpck.pmc_mask = PMC_SCER_UDP_SAM9; } /* There is no pllb on AT91SAM9G45 */ if (at91_cpu_is(AT91_T_SAM9G45)) { uhpck.parent = &upll; uhpck.pmc_mask = PMC_SCER_UHP_SAM9; } mckr = RD4(sc, PMC_MCKR); main_ck.hz = main_clock; /* * Note: this means outa calc code for plla never used since * we never change it. If we did, we'd also have to mind * ICPLLA to get the charge pump current right. */ at91_pmc_pll_rate(&plla, RD4(sc, CKGR_PLLAR)); if (at91_cpu_is(AT91_T_SAM9G45) && (mckr & PMC_MCKR_PLLADIV2)) plla.hz /= 2; /* * Initialize the usb clock. This sets up pllb, but disables the * actual clock. XXX except for the if 0 :( */ if (!at91_cpu_is(AT91_T_SAM9G45)) { pllb_init = at91_pmc_pll_calc(&pllb, 48000000 * 2) | 0x10000000; at91_pmc_pll_rate(&pllb, pllb_init); #if 0 /* Turn off USB clocks */ at91_pmc_set_periph_mode(&ohci_clk, 0); at91_pmc_set_periph_mode(&udc_clk, 0); #endif } if (at91_is_rm92()) { WR4(sc, PMC_SCDR, PMC_SCER_UHP | PMC_SCER_UDP); WR4(sc, PMC_SCER, PMC_SCER_MCKUDP); } else WR4(sc, PMC_SCDR, PMC_SCER_UHP_SAM9 | PMC_SCER_UDP_SAM9); /* * MCK and PCU derive from one of the primary clocks. Initialize * this relationship. */ mck.parent = clock_list[mckr & 0x3]; mck.parent->refcnt++; cpu.hz = mck.hz = mck.parent->hz / (1 << ((mckr & PMC_MCKR_PRES_MASK) >> 2)); mdiv = (mckr & PMC_MCKR_MDIV_MASK) >> 8; if (at91_is_sam9() || at91_is_sam9xe()) { /* * On AT91SAM9G45 when mdiv == 3 we need to divide * MCK by 3 but not, for example, on 9g20. */ if (!at91_cpu_is(AT91_T_SAM9G45) || mdiv <= 2) mdiv *= 2; if (mdiv > 0) mck.hz /= mdiv; } else mck.hz /= (1 + mdiv); /* Only found on SAM9G20 */ if (at91_cpu_is(AT91_T_SAM9G20)) cpu.hz /= (mckr & PMC_MCKR_PDIV) ? 2 : 1; at91_master_clock = mck.hz; /* These clocks refrenced by "special" names */ at91_pmc_clock_alias("ohci0", "ohci_clk"); at91_pmc_clock_alias("udp0", "udp_clk"); /* Turn off "Progamable" clocks */ WR4(sc, PMC_SCDR, PMC_SCER_PCK0 | PMC_SCER_PCK1 | PMC_SCER_PCK2 | PMC_SCER_PCK3); /* XXX kludge, turn on all peripherals */ WR4(sc, PMC_PCER, 0xffffffff); /* Disable all interrupts for PMC */ WR4(sc, PMC_IDR, 0xffffffff); } static void at91_pmc_deactivate(device_t dev) { struct at91_pmc_softc *sc; sc = device_get_softc(dev); bus_generic_detach(sc->dev); if (sc->mem_res) bus_release_resource(dev, SYS_RES_IOPORT, rman_get_rid(sc->mem_res), sc->mem_res); - sc->mem_res = 0; + sc->mem_res = NULL; } static int at91_pmc_activate(device_t dev) { struct at91_pmc_softc *sc; int rid; sc = device_get_softc(dev); rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) goto errout; return (0); errout: at91_pmc_deactivate(dev); return (ENOMEM); } static int at91_pmc_probe(device_t dev) { #ifdef FDT if (!ofw_bus_is_compatible(dev, "atmel,at91rm9200-pmc") && !ofw_bus_is_compatible(dev, "atmel,at91sam9260-pmc") && !ofw_bus_is_compatible(dev, "atmel,at91sam9g45-pmc") && !ofw_bus_is_compatible(dev, "atmel,at91sam9x5-pmc")) return (ENXIO); #endif device_set_desc(dev, "PMC"); return (0); } static int at91_pmc_attach(device_t dev) { int err; pmc_softc = device_get_softc(dev); pmc_softc->dev = dev; if ((err = at91_pmc_activate(dev)) != 0) return (err); /* * Configure main clock frequency. */ at91_pmc_init_clock(); /* * Display info about clocks previously computed */ device_printf(dev, "Primary: %d Hz PLLA: %d MHz CPU: %d MHz MCK: %d MHz\n", main_ck.hz, plla.hz / 1000000, cpu.hz / 1000000, mck.hz / 1000000); return (0); } static device_method_t at91_pmc_methods[] = { DEVMETHOD(device_probe, at91_pmc_probe), DEVMETHOD(device_attach, at91_pmc_attach), DEVMETHOD_END }; static driver_t at91_pmc_driver = { "at91_pmc", at91_pmc_methods, sizeof(struct at91_pmc_softc), }; static devclass_t at91_pmc_devclass; #ifdef FDT EARLY_DRIVER_MODULE(at91_pmc, simplebus, at91_pmc_driver, at91_pmc_devclass, NULL, NULL, BUS_PASS_CPU); #else EARLY_DRIVER_MODULE(at91_pmc, atmelarm, at91_pmc_driver, at91_pmc_devclass, NULL, NULL, BUS_PASS_CPU); #endif Index: head/sys/arm/at91/at91_rtc.c =================================================================== --- head/sys/arm/at91/at91_rtc.c (revision 298054) +++ head/sys/arm/at91/at91_rtc.c (revision 298055) @@ -1,364 +1,364 @@ /*- * Copyright (c) 2006 M. Warner Losh. All rights reserved. * Copyright (c) 2012 Ian Lepore. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Driver for the at91 on-chip realtime clock. * * This driver does not currently support alarms, just date and time. * * The RTC on the AT91RM9200 resets when the core rests, so it is useless as a * source of time (except when the CPU clock is powered down to save power, * which we don't currently do). On AT91SAM9 chips, the RTC survives chip * reset, and there's provisions for it to keep time via battery backup if the * system loses power. On those systems, we use it as a RTC. We tell the two * apart because the century field is 19 on AT91RM9200 on reset, or on AT91SAM9 * chips that haven't had their time properly set. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "clock_if.h" /* * The driver has all the infrastructure to use interrupts but doesn't actually * have any need to do so right now. There's a non-zero cost for installing the * handler because the RTC shares the system interrupt (IRQ 1), and thus will * get called a lot for no reason at all. */ #define AT91_RTC_USE_INTERRUPTS_NOT struct at91_rtc_softc { device_t dev; /* Myself */ void *intrhand; /* Interrupt handle */ struct resource *irq_res; /* IRQ resource */ struct resource *mem_res; /* Memory resource */ struct mtx sc_mtx; /* basically a perimeter lock */ }; static inline uint32_t RD4(struct at91_rtc_softc *sc, bus_size_t off) { return bus_read_4(sc->mem_res, off); } static inline void WR4(struct at91_rtc_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off, val); } #define AT91_RTC_LOCK(_sc) mtx_lock_spin(&(_sc)->sc_mtx) #define AT91_RTC_UNLOCK(_sc) mtx_unlock_spin(&(_sc)->sc_mtx) #define AT91_RTC_LOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ "rtc", MTX_SPIN) #define AT91_RTC_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); #define AT91_RTC_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); #define AT91_RTC_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); static devclass_t at91_rtc_devclass; /* bus entry points */ static int at91_rtc_probe(device_t dev); static int at91_rtc_attach(device_t dev); static int at91_rtc_detach(device_t dev); /* helper routines */ static int at91_rtc_activate(device_t dev); static void at91_rtc_deactivate(device_t dev); #ifdef AT91_RTC_USE_INTERRUPTS static int at91_rtc_intr(void *xsc) { struct at91_rtc_softc *sc; uint32_t status; sc = xsc; /* Must clear the status bits after reading them to re-arm. */ status = RD4(sc, RTC_SR); WR4(sc, RTC_SCCR, status); if (status == 0) return; AT91_RTC_LOCK(sc); /* Do something here */ AT91_RTC_UNLOCK(sc); wakeup(sc); return (FILTER_HANDLED); } #endif static int at91_rtc_probe(device_t dev) { device_set_desc(dev, "RTC"); return (0); } static int at91_rtc_attach(device_t dev) { struct at91_rtc_softc *sc = device_get_softc(dev); int err; sc->dev = dev; err = at91_rtc_activate(dev); if (err) goto out; AT91_RTC_LOCK_INIT(sc); /* * Disable all interrupts in the hardware. * Clear all bits in the status register. * Set 24-hour-clock mode. */ WR4(sc, RTC_IDR, 0xffffffff); WR4(sc, RTC_SCCR, 0x1f); WR4(sc, RTC_MR, 0); #ifdef AT91_RTC_USE_INTERRUPTS err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC, at91_rtc_intr, NULL, sc, &sc->intrhand); if (err) { AT91_RTC_LOCK_DESTROY(sc); goto out; } #endif /* * Read the calendar register. If the century is 19 then the clock has * never been set. Try to store an invalid value into the register, * which will turn on the error bit in RTC_VER, and our getclock code * knows to return EINVAL if any error bits are on. */ if (RTC_CALR_CEN(RD4(sc, RTC_CALR)) == 19) WR4(sc, RTC_CALR, 0); /* * Register as a time of day clock with 1-second resolution. */ clock_register(dev, 1000000); out: if (err) at91_rtc_deactivate(dev); return (err); } /* * Cannot support detach, since there's no clock_unregister function. */ static int at91_rtc_detach(device_t dev) { return (EBUSY); } static int at91_rtc_activate(device_t dev) { struct at91_rtc_softc *sc; int rid; sc = device_get_softc(dev); rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) goto errout; #ifdef AT91_RTC_USE_INTERRUPTS rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | RF_SHAREABLE); if (sc->irq_res == NULL) goto errout; #endif return (0); errout: at91_rtc_deactivate(dev); return (ENOMEM); } static void at91_rtc_deactivate(device_t dev) { struct at91_rtc_softc *sc; sc = device_get_softc(dev); #ifdef AT91_RTC_USE_INTERRUPTS WR4(sc, RTC_IDR, 0xffffffff); if (sc->intrhand) bus_teardown_intr(dev, sc->irq_res, sc->intrhand); - sc->intrhand = 0; + sc->intrhand = NULL; #endif bus_generic_detach(sc->dev); if (sc->mem_res) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem_res), sc->mem_res); - sc->mem_res = 0; + sc->mem_res = NULL; #ifdef AT91_RTC_USE_INTERRUPTS if (sc->irq_res) bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res), sc->irq_res); - sc->irq_res = 0; + sc->irq_res = NULL; #endif return; } /* * Get the time of day clock and return it in ts. * Return 0 on success, an error number otherwise. */ static int at91_rtc_gettime(device_t dev, struct timespec *ts) { struct clocktime ct; uint32_t calr, calr2, timr, timr2; struct at91_rtc_softc *sc; sc = device_get_softc(dev); /* If the error bits are set we can't return useful values. */ if (RD4(sc, RTC_VER) & (RTC_VER_NVTIM | RTC_VER_NVCAL)) return EINVAL; /* * The RTC hardware can update registers while the CPU is reading them. * The manual advises reading until you obtain the same values twice. * Interleaving the reads (rather than timr, timr2, calr, calr2 order) * also ensures we don't miss a midnight rollover/carry between reads. */ do { timr = RD4(sc, RTC_TIMR); calr = RD4(sc, RTC_CALR); timr2 = RD4(sc, RTC_TIMR); calr2 = RD4(sc, RTC_CALR); } while (timr != timr2 || calr != calr2); ct.nsec = 0; ct.sec = RTC_TIMR_SEC(timr); ct.min = RTC_TIMR_MIN(timr); ct.hour = RTC_TIMR_HR(timr); ct.year = RTC_CALR_CEN(calr) * 100 + RTC_CALR_YEAR(calr); ct.mon = RTC_CALR_MON(calr); ct.day = RTC_CALR_DAY(calr); ct.dow = -1; return clock_ct_to_ts(&ct, ts); } /* * Set the time of day clock based on the value of the struct timespec arg. * Return 0 on success, an error number otherwise. */ static int at91_rtc_settime(device_t dev, struct timespec *ts) { struct at91_rtc_softc *sc; struct clocktime ct; int rv; sc = device_get_softc(dev); clock_ts_to_ct(ts, &ct); /* * Can't set the clock unless a second has elapsed since we last did so. */ while ((RD4(sc, RTC_SR) & RTC_SR_SECEV) == 0) cpu_spinwait(); /* * Stop the clocks for an update; wait until hardware is ready. * Clear the update-ready status after it gets asserted (the manual says * to do this before updating the value registers). */ WR4(sc, RTC_CR, RTC_CR_UPDCAL | RTC_CR_UPDTIM); while ((RD4(sc, RTC_SR) & RTC_SR_ACKUPD) == 0) cpu_spinwait(); WR4(sc, RTC_SCCR, RTC_SR_ACKUPD); /* * Set the values in the hardware, then check whether the hardware was * happy with them so we can return the correct status. */ WR4(sc, RTC_TIMR, RTC_TIMR_MK(ct.hour, ct.min, ct.sec)); WR4(sc, RTC_CALR, RTC_CALR_MK(ct.year, ct.mon, ct.day, ct.dow+1)); if (RD4(sc, RTC_VER) & (RTC_VER_NVTIM | RTC_VER_NVCAL)) rv = EINVAL; else rv = 0; /* * Restart the clocks (turn off the update bits). * Clear the second-event bit (because the manual says to). */ WR4(sc, RTC_CR, RD4(sc, RTC_CR) & ~(RTC_CR_UPDCAL | RTC_CR_UPDTIM)); WR4(sc, RTC_SCCR, RTC_SR_SECEV); return (0); } static device_method_t at91_rtc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, at91_rtc_probe), DEVMETHOD(device_attach, at91_rtc_attach), DEVMETHOD(device_detach, at91_rtc_detach), /* clock interface */ DEVMETHOD(clock_gettime, at91_rtc_gettime), DEVMETHOD(clock_settime, at91_rtc_settime), DEVMETHOD_END }; static driver_t at91_rtc_driver = { "at91_rtc", at91_rtc_methods, sizeof(struct at91_rtc_softc), }; DRIVER_MODULE(at91_rtc, atmelarm, at91_rtc_driver, at91_rtc_devclass, 0, 0); Index: head/sys/arm/at91/at91_ssc.c =================================================================== --- head/sys/arm/at91/at91_ssc.c (revision 298054) +++ head/sys/arm/at91/at91_ssc.c (revision 298055) @@ -1,270 +1,270 @@ /*- * Copyright (c) 2006 M. Warner Losh. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include struct at91_ssc_softc { device_t dev; /* Myself */ void *intrhand; /* Interrupt handle */ struct resource *irq_res; /* IRQ resource */ struct resource *mem_res; /* Memory resource */ struct mtx sc_mtx; /* basically a perimeter lock */ struct cdev *cdev; int flags; #define OPENED 1 }; static inline uint32_t RD4(struct at91_ssc_softc *sc, bus_size_t off) { return bus_read_4(sc->mem_res, off); } static inline void WR4(struct at91_ssc_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off, val); } #define AT91_SSC_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define AT91_SSC_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define AT91_SSC_LOCK_INIT(_sc) \ mtx_init(&(_sc)->sc_mtx, device_get_nameunit((_sc)->dev), \ "ssc", MTX_DEF) #define AT91_SSC_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx); #define AT91_SSC_ASSERT_LOCKED(_sc) mtx_assert(&(_sc)->sc_mtx, MA_OWNED); #define AT91_SSC_ASSERT_UNLOCKED(_sc) mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED); #define CDEV2SOFTC(dev) ((dev)->si_drv1) static devclass_t at91_ssc_devclass; /* bus entry points */ static int at91_ssc_probe(device_t dev); static int at91_ssc_attach(device_t dev); static int at91_ssc_detach(device_t dev); static void at91_ssc_intr(void *); /* helper routines */ static int at91_ssc_activate(device_t dev); static void at91_ssc_deactivate(device_t dev); /* cdev routines */ static d_open_t at91_ssc_open; static d_close_t at91_ssc_close; static d_read_t at91_ssc_read; static d_write_t at91_ssc_write; static struct cdevsw at91_ssc_cdevsw = { .d_version = D_VERSION, .d_open = at91_ssc_open, .d_close = at91_ssc_close, .d_read = at91_ssc_read, .d_write = at91_ssc_write, }; static int at91_ssc_probe(device_t dev) { device_set_desc(dev, "SSC"); return (0); } static int at91_ssc_attach(device_t dev) { struct at91_ssc_softc *sc = device_get_softc(dev); int err; sc->dev = dev; err = at91_ssc_activate(dev); if (err) goto out; AT91_SSC_LOCK_INIT(sc); /* * Activate the interrupt */ err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, at91_ssc_intr, sc, &sc->intrhand); if (err) { AT91_SSC_LOCK_DESTROY(sc); goto out; } sc->cdev = make_dev(&at91_ssc_cdevsw, device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "ssc%d", device_get_unit(dev)); if (sc->cdev == NULL) { err = ENOMEM; goto out; } sc->cdev->si_drv1 = sc; // Init for TSC needs WR4(sc, SSC_CR, SSC_CR_SWRST); WR4(sc, SSC_CMR, 0); // clock divider unused WR4(sc, SSC_RCMR, SSC_RCMR_CKS_RK | SSC_RCMR_CKO_NONE | SSC_RCMR_START_FALL_EDGE_RF); WR4(sc, SSC_RFMR, 0x1f | SSC_RFMR_MSFBF | SSC_RFMR_FSOS_NONE); WR4(sc, SSC_TCMR, SSC_TCMR_CKS_TK | SSC_TCMR_CKO_NONE | SSC_RCMR_START_CONT); WR4(sc, SSC_TFMR, 0x1f | SSC_TFMR_DATDEF | SSC_TFMR_MSFBF | SSC_TFMR_FSOS_NEG_PULSE); out: if (err) at91_ssc_deactivate(dev); return (err); } static int at91_ssc_detach(device_t dev) { return (EBUSY); /* XXX */ } static int at91_ssc_activate(device_t dev) { struct at91_ssc_softc *sc; int rid; sc = device_get_softc(dev); rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) goto errout; rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res == NULL) goto errout; return (0); errout: at91_ssc_deactivate(dev); return (ENOMEM); } static void at91_ssc_deactivate(device_t dev) { struct at91_ssc_softc *sc; sc = device_get_softc(dev); if (sc->intrhand) bus_teardown_intr(dev, sc->irq_res, sc->intrhand); - sc->intrhand = 0; + sc->intrhand = NULL; bus_generic_detach(sc->dev); if (sc->mem_res) bus_release_resource(dev, SYS_RES_IOPORT, rman_get_rid(sc->mem_res), sc->mem_res); - sc->mem_res = 0; + sc->mem_res = NULL; if (sc->irq_res) bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res), sc->irq_res); - sc->irq_res = 0; + sc->irq_res = NULL; return; } static void at91_ssc_intr(void *xsc) { struct at91_ssc_softc *sc = xsc; wakeup(sc); return; } static int at91_ssc_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct at91_ssc_softc *sc; sc = CDEV2SOFTC(dev); AT91_SSC_LOCK(sc); if (!(sc->flags & OPENED)) { sc->flags |= OPENED; } AT91_SSC_UNLOCK(sc); return (0); } static int at91_ssc_close(struct cdev *dev, int fflag, int devtype, struct thread *td) { struct at91_ssc_softc *sc; sc = CDEV2SOFTC(dev); AT91_SSC_LOCK(sc); sc->flags &= ~OPENED; AT91_SSC_UNLOCK(sc); return (0); } static int at91_ssc_read(struct cdev *dev, struct uio *uio, int flag) { return EIO; } static int at91_ssc_write(struct cdev *dev, struct uio *uio, int flag) { return EIO; } static device_method_t at91_ssc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, at91_ssc_probe), DEVMETHOD(device_attach, at91_ssc_attach), DEVMETHOD(device_detach, at91_ssc_detach), { 0, 0 } }; static driver_t at91_ssc_driver = { "at91_ssc", at91_ssc_methods, sizeof(struct at91_ssc_softc), }; DRIVER_MODULE(at91_ssc, atmelarm, at91_ssc_driver, at91_ssc_devclass, 0, 0); Index: head/sys/arm/at91/at91_twi.c =================================================================== --- head/sys/arm/at91/at91_twi.c (revision 298054) +++ head/sys/arm/at91/at91_twi.c (revision 298055) @@ -1,429 +1,429 @@ /*- * Copyright (c) 2006 M. Warner Losh. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "iicbus_if.h" #ifdef FDT #include #include #include #endif #define TWI_SLOW_CLOCK 1500 #define TWI_FAST_CLOCK 45000 #define TWI_FASTEST_CLOCK 90000 struct at91_twi_softc { device_t dev; /* Myself */ void *intrhand; /* Interrupt handle */ struct resource *irq_res; /* IRQ resource */ struct resource *mem_res; /* Memory resource */ struct mtx sc_mtx; /* basically a perimeter lock */ volatile uint32_t flags; uint32_t cwgr; int sc_started; int twi_addr; device_t iicbus; }; static inline uint32_t RD4(struct at91_twi_softc *sc, bus_size_t off) { return bus_read_4(sc->mem_res, off); } static inline void WR4(struct at91_twi_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off, val); } #define AT91_TWI_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define AT91_TWI_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define AT91_TWI_LOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ "twi", MTX_DEF) #define AT91_TWI_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); #define AT91_TWI_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); #define AT91_TWI_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); #define TWI_DEF_CLK 100000 static devclass_t at91_twi_devclass; /* bus entry points */ static int at91_twi_probe(device_t dev); static int at91_twi_attach(device_t dev); static int at91_twi_detach(device_t dev); static void at91_twi_intr(void *); /* helper routines */ static int at91_twi_activate(device_t dev); static void at91_twi_deactivate(device_t dev); static int at91_twi_probe(device_t dev) { #ifdef FDT /* XXXX need a whole list, since there's at least 4 different ones */ if (!ofw_bus_is_compatible(dev, "atmel,at91sam9g20-i2c")) return (ENXIO); #endif device_set_desc(dev, "TWI"); return (0); } static int at91_twi_attach(device_t dev) { struct at91_twi_softc *sc = device_get_softc(dev); int err; sc->dev = dev; err = at91_twi_activate(dev); if (err) goto out; AT91_TWI_LOCK_INIT(sc); #ifdef FDT /* * Disable devices need to hold their resources, so return now and not attach * the iicbus, setup interrupt handlers, etc. */ if (!ofw_bus_status_okay(dev)) return 0; #endif /* * Activate the interrupt */ err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, at91_twi_intr, sc, &sc->intrhand); if (err) { AT91_TWI_LOCK_DESTROY(sc); goto out; } sc->cwgr = TWI_CWGR_CKDIV(8 * at91_master_clock / TWI_FASTEST_CLOCK) | TWI_CWGR_CHDIV(TWI_CWGR_DIV(TWI_DEF_CLK)) | TWI_CWGR_CLDIV(TWI_CWGR_DIV(TWI_DEF_CLK)); WR4(sc, TWI_CR, TWI_CR_SWRST); WR4(sc, TWI_CR, TWI_CR_MSEN | TWI_CR_SVDIS); WR4(sc, TWI_CWGR, sc->cwgr); if ((sc->iicbus = device_add_child(dev, "iicbus", -1)) == NULL) device_printf(dev, "could not allocate iicbus instance\n"); /* probe and attach the iicbus */ bus_generic_attach(dev); out: if (err) at91_twi_deactivate(dev); return (err); } static int at91_twi_detach(device_t dev) { struct at91_twi_softc *sc; int rv; sc = device_get_softc(dev); at91_twi_deactivate(dev); if (sc->iicbus && (rv = device_delete_child(dev, sc->iicbus)) != 0) return (rv); AT91_TWI_LOCK_DESTROY(sc); return (0); } static int at91_twi_activate(device_t dev) { struct at91_twi_softc *sc; int rid; sc = device_get_softc(dev); rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) goto errout; rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res == NULL) goto errout; return (0); errout: at91_twi_deactivate(dev); return (ENOMEM); } static void at91_twi_deactivate(device_t dev) { struct at91_twi_softc *sc; sc = device_get_softc(dev); if (sc->intrhand) bus_teardown_intr(dev, sc->irq_res, sc->intrhand); - sc->intrhand = 0; + sc->intrhand = NULL; bus_generic_detach(sc->dev); if (sc->mem_res) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem_res), sc->mem_res); - sc->mem_res = 0; + sc->mem_res = NULL; if (sc->irq_res) bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res), sc->irq_res); - sc->irq_res = 0; + sc->irq_res = NULL; return; } static void at91_twi_intr(void *xsc) { struct at91_twi_softc *sc = xsc; uint32_t status; status = RD4(sc, TWI_SR); if (status == 0) return; AT91_TWI_LOCK(sc); sc->flags |= status & (TWI_SR_OVRE | TWI_SR_UNRE | TWI_SR_NACK); if (status & TWI_SR_RXRDY) sc->flags |= TWI_SR_RXRDY; if (status & TWI_SR_TXRDY) sc->flags |= TWI_SR_TXRDY; if (status & TWI_SR_TXCOMP) sc->flags |= TWI_SR_TXCOMP; WR4(sc, TWI_IDR, status); wakeup(sc); AT91_TWI_UNLOCK(sc); return; } static int at91_twi_wait(struct at91_twi_softc *sc, uint32_t bit) { int err = 0; int counter = 100000; uint32_t sr; AT91_TWI_ASSERT_LOCKED(sc); while (!((sr = RD4(sc, TWI_SR)) & bit) && counter-- > 0 && !(sr & TWI_SR_NACK)) continue; if (counter <= 0) err = EBUSY; else if (sr & TWI_SR_NACK) err = ENXIO; // iic nack convention return (err); } static int at91_twi_rst_card(device_t dev, u_char speed, u_char addr, u_char *oldaddr) { struct at91_twi_softc *sc; int clk; sc = device_get_softc(dev); AT91_TWI_LOCK(sc); if (oldaddr) *oldaddr = sc->twi_addr; sc->twi_addr = addr; /* * speeds are for 1.5kb/s, 45kb/s and 90kb/s. */ switch (speed) { case IIC_SLOW: clk = TWI_SLOW_CLOCK; break; case IIC_FAST: clk = TWI_FAST_CLOCK; break; case IIC_UNKNOWN: case IIC_FASTEST: default: clk = TWI_FASTEST_CLOCK; break; } sc->cwgr = TWI_CWGR_CKDIV(1) | TWI_CWGR_CHDIV(TWI_CWGR_DIV(clk)) | TWI_CWGR_CLDIV(TWI_CWGR_DIV(clk)); WR4(sc, TWI_CR, TWI_CR_SWRST); WR4(sc, TWI_CR, TWI_CR_MSEN | TWI_CR_SVDIS); WR4(sc, TWI_CWGR, sc->cwgr); AT91_TWI_UNLOCK(sc); return 0; } static int at91_twi_callback(device_t dev, int index, caddr_t data) { int error = 0; switch (index) { case IIC_REQUEST_BUS: break; case IIC_RELEASE_BUS: break; default: error = EINVAL; } return (error); } static int at91_twi_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs) { struct at91_twi_softc *sc; int i, len, err; uint32_t rdwr; uint8_t *buf; uint32_t sr; sc = device_get_softc(dev); err = 0; AT91_TWI_LOCK(sc); for (i = 0; i < nmsgs; i++) { /* * The linux atmel driver doesn't use the internal device * address feature of twi. A separate i2c message needs to * be written to use this. * See http://lists.arm.linux.org.uk/pipermail/linux-arm-kernel/2004-September/024411.html * for details. Upon reflection, we could use this as an * optimization, but it is unclear the code bloat will * result in faster/better operations. */ rdwr = (msgs[i].flags & IIC_M_RD) ? TWI_MMR_MREAD : 0; WR4(sc, TWI_MMR, TWI_MMR_DADR(msgs[i].slave) | rdwr); len = msgs[i].len; buf = msgs[i].buf; /* zero byte transfers aren't allowed */ if (len == 0 || buf == NULL) { err = EINVAL; goto out; } if (len == 1 && msgs[i].flags & IIC_M_RD) WR4(sc, TWI_CR, TWI_CR_START | TWI_CR_STOP); else WR4(sc, TWI_CR, TWI_CR_START); if (msgs[i].flags & IIC_M_RD) { sr = RD4(sc, TWI_SR); while (!(sr & TWI_SR_TXCOMP)) { if ((sr = RD4(sc, TWI_SR)) & TWI_SR_RXRDY) { len--; *buf++ = RD4(sc, TWI_RHR) & 0xff; if (len == 1) WR4(sc, TWI_CR, TWI_CR_STOP); } } if (len > 0 || (sr & TWI_SR_NACK)) { err = ENXIO; // iic nack convention goto out; } } else { while (len--) { if ((err = at91_twi_wait(sc, TWI_SR_TXRDY))) goto out; WR4(sc, TWI_THR, *buf++); } WR4(sc, TWI_CR, TWI_CR_STOP); } if ((err = at91_twi_wait(sc, TWI_SR_TXCOMP))) break; } out: if (err) { WR4(sc, TWI_CR, TWI_CR_SWRST); WR4(sc, TWI_CR, TWI_CR_MSEN | TWI_CR_SVDIS); WR4(sc, TWI_CWGR, sc->cwgr); } AT91_TWI_UNLOCK(sc); return (err); } static device_method_t at91_twi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, at91_twi_probe), DEVMETHOD(device_attach, at91_twi_attach), DEVMETHOD(device_detach, at91_twi_detach), /* iicbus interface */ DEVMETHOD(iicbus_callback, at91_twi_callback), DEVMETHOD(iicbus_reset, at91_twi_rst_card), DEVMETHOD(iicbus_transfer, at91_twi_transfer), DEVMETHOD_END }; static driver_t at91_twi_driver = { "at91_twi", at91_twi_methods, sizeof(struct at91_twi_softc), }; #ifdef FDT DRIVER_MODULE(at91_twi, simplebus, at91_twi_driver, at91_twi_devclass, NULL, NULL); #else DRIVER_MODULE(at91_twi, atmelarm, at91_twi_driver, at91_twi_devclass, NULL, NULL); #endif DRIVER_MODULE(iicbus, at91_twi, iicbus_driver, iicbus_devclass, NULL, NULL); MODULE_DEPEND(at91_twi, iicbus, 1, 1, 1); Index: head/sys/arm/at91/if_ate.c =================================================================== --- head/sys/arm/at91/if_ate.c (revision 298054) +++ head/sys/arm/at91/if_ate.c (revision 298055) @@ -1,1531 +1,1531 @@ /*- * Copyright (c) 2006 M. Warner Losh. All rights reserved. * Copyright (c) 2009 Greg Ansley. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* TODO * * 1) Turn on the clock in pmc? Turn off? * 2) GPIO initializtion in board setup code. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #include #endif #include #include #include #include #include "opt_at91.h" #include #include #include #ifdef FDT #include #include #include #endif #include "miibus_if.h" /* * Driver-specific flags. */ #define ATE_FLAG_DETACHING 0x01 #define ATE_FLAG_MULTICAST 0x02 /* * Old EMAC assumes whole packet fits in one buffer; * new EBACB assumes all receive buffers are 128 bytes */ #define RX_BUF_SIZE(sc) (sc->is_emacb ? 128 : MCLBYTES) /* * EMACB has an 11 bit counter for Rx/Tx Descriptors * for max total of 1024 decriptors each. */ #define ATE_MAX_RX_DESCR 1024 #define ATE_MAX_TX_DESCR 1024 /* How many buffers to allocate */ #define ATE_MAX_TX_BUFFERS 4 /* We have ping-pong tx buffers */ /* How much memory to use for rx buffers */ #define ATE_RX_MEMORY (ATE_MAX_RX_DESCR * 128) /* Actual number of descriptors we allocate */ #define ATE_NUM_RX_DESCR ATE_MAX_RX_DESCR #define ATE_NUM_TX_DESCR ATE_MAX_TX_BUFFERS #if ATE_NUM_TX_DESCR > ATE_MAX_TX_DESCR #error "Can't have more TX buffers that descriptors" #endif #if ATE_NUM_RX_DESCR > ATE_MAX_RX_DESCR #error "Can't have more RX buffers that descriptors" #endif /* Wrap indexes the same way the hardware does */ #define NEXT_RX_IDX(sc, cur) \ ((sc->rx_descs[cur].addr & ETH_WRAP_BIT) ? 0 : (cur + 1)) #define NEXT_TX_IDX(sc, cur) \ ((sc->tx_descs[cur].status & ETHB_TX_WRAP) ? 0 : (cur + 1)) struct ate_softc { struct ifnet *ifp; /* ifnet pointer */ struct mtx sc_mtx; /* Basically a perimeter lock */ device_t dev; /* Myself */ device_t miibus; /* My child miibus */ struct resource *irq_res; /* IRQ resource */ struct resource *mem_res; /* Memory resource */ struct callout tick_ch; /* Tick callout */ struct ifmib_iso_8802_3 mibdata; /* Stuff for network mgmt */ bus_dma_tag_t mtag; /* bus dma tag for mbufs */ bus_dma_tag_t rx_tag; bus_dma_tag_t rx_desc_tag; bus_dmamap_t rx_desc_map; bus_dmamap_t rx_map[ATE_MAX_RX_DESCR]; bus_addr_t rx_desc_phys; /* PA of rx descriptors */ eth_rx_desc_t *rx_descs; /* VA of rx descriptors */ void *rx_buf[ATE_NUM_RX_DESCR]; /* RX buffer space */ int rxhead; /* Current RX map/desc index */ uint32_t rx_buf_size; /* Size of Rx buffers */ bus_dma_tag_t tx_desc_tag; bus_dmamap_t tx_desc_map; bus_dmamap_t tx_map[ATE_MAX_TX_BUFFERS]; bus_addr_t tx_desc_phys; /* PA of tx descriptors */ eth_tx_desc_t *tx_descs; /* VA of tx descriptors */ int txhead; /* Current TX map/desc index */ int txtail; /* Current TX map/desc index */ struct mbuf *sent_mbuf[ATE_MAX_TX_BUFFERS]; /* Sent mbufs */ void *intrhand; /* Interrupt handle */ int flags; int if_flags; int use_rmii; int is_emacb; /* SAM9x hardware version */ }; static inline uint32_t RD4(struct ate_softc *sc, bus_size_t off) { return (bus_read_4(sc->mem_res, off)); } static inline void WR4(struct ate_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off, val); } static inline void BARRIER(struct ate_softc *sc, bus_size_t off, bus_size_t len, int flags) { bus_barrier(sc->mem_res, off, len, flags); } #define ATE_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define ATE_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define ATE_LOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ MTX_NETWORK_LOCK, MTX_DEF) #define ATE_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); #define ATE_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); #define ATE_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); static devclass_t ate_devclass; /* * ifnet entry points. */ static void ateinit_locked(void *); static void atestart_locked(struct ifnet *); static void ateinit(void *); static void atestart(struct ifnet *); static void atestop(struct ate_softc *); static int ateioctl(struct ifnet * ifp, u_long, caddr_t); /* * Bus entry points. */ static int ate_probe(device_t dev); static int ate_attach(device_t dev); static int ate_detach(device_t dev); static void ate_intr(void *); /* * Helper routines. */ static int ate_activate(device_t dev); static void ate_deactivate(struct ate_softc *sc); static int ate_ifmedia_upd(struct ifnet *ifp); static void ate_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); static int ate_get_mac(struct ate_softc *sc, u_char *eaddr); static void ate_set_mac(struct ate_softc *sc, u_char *eaddr); static void ate_rxfilter(struct ate_softc *sc); static int ate_miibus_readreg(device_t dev, int phy, int reg); static int ate_miibus_writereg(device_t dev, int phy, int reg, int data); /* * The AT91 family of products has the ethernet interface called EMAC. * However, it isn't self identifying. It is anticipated that the parent bus * code will take care to only add ate devices where they really are. As * such, we do nothing here to identify the device and just set its name. * However, FDT makes it self-identifying. */ static int ate_probe(device_t dev) { #ifdef FDT if (!ofw_bus_is_compatible(dev, "cdns,at91rm9200-emac") && !ofw_bus_is_compatible(dev, "cdns,emac") && !ofw_bus_is_compatible(dev, "cdns,at32ap7000-macb")) return (ENXIO); #endif device_set_desc(dev, "EMAC"); return (0); } #ifdef FDT /* * We have to know if we're using MII or RMII attachment * for the MACB to talk to the PHY correctly. With FDT, * we must use rmii if there's a proprety phy-mode * equal to "rmii". Otherwise we MII mode is used. */ static void ate_set_rmii(struct ate_softc *sc) { phandle_t node; char prop[10]; ssize_t len; node = ofw_bus_get_node(sc->dev); memset(prop, 0 ,sizeof(prop)); len = OF_getproplen(node, "phy-mode"); if (len != 4) return; if (OF_getprop(node, "phy-mode", prop, len) != len) return; if (strncmp(prop, "rmii", 4) == 0) sc->use_rmii = 1; } #else /* * We have to know if we're using MII or RMII attachment * for the MACB to talk to the PHY correctly. Without FDT, * there's no good way to do this. So, if the config file * has 'option AT91_ATE_USE_RMII', then we'll force RMII. * Otherwise, we'll use what the bootloader setup. Either * it setup RMII or MII, in which case we'll get it right, * or it did nothing, and we'll fall back to MII and the * option would override if present. */ static void ate_set_rmii(struct ate_softc *sc) { /* Default to what boot rom did */ if (!sc->is_emacb) sc->use_rmii = (RD4(sc, ETH_CFG) & ETH_CFG_RMII) == ETH_CFG_RMII; else sc->use_rmii = (RD4(sc, ETHB_UIO) & ETHB_UIO_RMII) == ETHB_UIO_RMII; #ifdef AT91_ATE_USE_RMII /* Compile time override */ sc->use_rmii = 1; #endif } #endif static int ate_attach(device_t dev) { struct ate_softc *sc; struct ifnet *ifp = NULL; struct sysctl_ctx_list *sctx; struct sysctl_oid *soid; u_char eaddr[ETHER_ADDR_LEN]; uint32_t rnd; int rid, err; sc = device_get_softc(dev); sc->dev = dev; ATE_LOCK_INIT(sc); rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) { device_printf(dev, "could not allocate memory resources.\n"); err = ENOMEM; goto out; } rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(dev, "could not allocate interrupt resources.\n"); err = ENOMEM; goto out; } /* New or old version, chooses buffer size. */ #ifdef FDT sc->is_emacb = ofw_bus_is_compatible(dev, "cdns,at32ap7000-macb"); #else sc->is_emacb = at91_is_sam9() || at91_is_sam9xe(); #endif sc->rx_buf_size = RX_BUF_SIZE(sc); err = ate_activate(dev); if (err) goto out; ate_set_rmii(sc); /* Sysctls */ sctx = device_get_sysctl_ctx(dev); soid = device_get_sysctl_tree(dev); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "rmii", CTLFLAG_RW, &sc->use_rmii, 0, "rmii in use"); /* Calling atestop before ifp is set is OK. */ ATE_LOCK(sc); atestop(sc); ATE_UNLOCK(sc); callout_init_mtx(&sc->tick_ch, &sc->sc_mtx, 0); if ((err = ate_get_mac(sc, eaddr)) != 0) { /* No MAC address configured. Generate the random one. */ if (bootverbose) device_printf(dev, "Generating random ethernet address.\n"); rnd = arc4random(); /* * Set OUI to convenient locally assigned address. 'b' * is 0x62, which has the locally assigned bit set, and * the broadcast/multicast bit clear. */ eaddr[0] = 'b'; eaddr[1] = 's'; eaddr[2] = 'd'; eaddr[3] = (rnd >> 16) & 0xff; eaddr[4] = (rnd >> 8) & 0xff; eaddr[5] = (rnd >> 0) & 0xff; } sc->ifp = ifp = if_alloc(IFT_ETHER); err = mii_attach(dev, &sc->miibus, ifp, ate_ifmedia_upd, ate_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (err != 0) { device_printf(dev, "attaching PHYs failed\n"); goto out; } /* * XXX: Clear the isolate bit, or we won't get up, * at least on the HL201 */ ate_miibus_writereg(dev, 0, 0, 0x3000); ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_MTU; /* The hw bits already set. */ ifp->if_start = atestart; ifp->if_ioctl = ateioctl; ifp->if_init = ateinit; ifp->if_baudrate = 10000000; IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; IFQ_SET_READY(&ifp->if_snd); ifp->if_linkmib = &sc->mibdata; ifp->if_linkmiblen = sizeof(sc->mibdata); sc->mibdata.dot3Compliance = DOT3COMPLIANCE_COLLS; sc->if_flags = ifp->if_flags; ether_ifattach(ifp, eaddr); /* Activate the interrupt. */ err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ate_intr, sc, &sc->intrhand); if (err) { device_printf(dev, "could not establish interrupt handler.\n"); ether_ifdetach(ifp); goto out; } out: if (err) ate_detach(dev); return (err); } static int ate_detach(device_t dev) { struct ate_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); KASSERT(sc != NULL, ("[ate: %d]: sc is NULL", __LINE__)); ifp = sc->ifp; if (device_is_attached(dev)) { ATE_LOCK(sc); sc->flags |= ATE_FLAG_DETACHING; atestop(sc); ATE_UNLOCK(sc); callout_drain(&sc->tick_ch); ether_ifdetach(ifp); } if (sc->miibus != NULL) { device_delete_child(dev, sc->miibus); sc->miibus = NULL; } bus_generic_detach(sc->dev); ate_deactivate(sc); if (sc->intrhand != NULL) { bus_teardown_intr(dev, sc->irq_res, sc->intrhand); sc->intrhand = NULL; } if (ifp != NULL) { if_free(ifp); sc->ifp = NULL; } if (sc->mem_res != NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rman_get_rid(sc->mem_res), sc->mem_res); sc->mem_res = NULL; } if (sc->irq_res != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res), sc->irq_res); sc->irq_res = NULL; } ATE_LOCK_DESTROY(sc); return (0); } static void ate_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error != 0) return; *(bus_addr_t *)arg = segs[0].ds_addr; } static void ate_load_rx_buf(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct ate_softc *sc; if (error != 0) return; sc = (struct ate_softc *)arg; bus_dmamap_sync(sc->rx_desc_tag, sc->rx_desc_map, BUS_DMASYNC_PREWRITE); sc->rx_descs[sc->rxhead].addr = segs[0].ds_addr; sc->rx_descs[sc->rxhead].status = 0; bus_dmamap_sync(sc->rx_desc_tag, sc->rx_desc_map, BUS_DMASYNC_POSTWRITE); } static uint32_t ate_mac_hash(const uint8_t *buf) { uint32_t index = 0; for (int i = 0; i < 48; i++) { index ^= ((buf[i >> 3] >> (i & 7)) & 1) << (i % 6); } return (index); } /* * Compute the multicast filter for this device. */ static int ate_setmcast(struct ate_softc *sc) { uint32_t index; uint32_t mcaf[2]; u_char *af = (u_char *) mcaf; struct ifmultiaddr *ifma; struct ifnet *ifp; ifp = sc->ifp; if ((ifp->if_flags & IFF_PROMISC) != 0) return (0); if ((ifp->if_flags & IFF_ALLMULTI) != 0) { WR4(sc, ETH_HSL, 0xffffffff); WR4(sc, ETH_HSH, 0xffffffff); return (1); } /* Compute the multicast hash. */ mcaf[0] = 0; mcaf[1] = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; index = ate_mac_hash(LLADDR((struct sockaddr_dl *) ifma->ifma_addr)); af[index >> 3] |= 1 << (index & 7); } if_maddr_runlock(ifp); /* * Write the hash to the hash register. This card can also * accept unicast packets as well as multicast packets using this * register for easier bridging operations, but we don't take * advantage of that. Locks here are to avoid LOR with the * if_maddr_rlock, but might not be strictly necessary. */ WR4(sc, ETH_HSL, mcaf[0]); WR4(sc, ETH_HSH, mcaf[1]); return (mcaf[0] || mcaf[1]); } static int ate_activate(device_t dev) { struct ate_softc *sc; int i; sc = device_get_softc(dev); /* Allocate DMA tags and maps for TX mbufs */ if (bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, busdma_lock_mutex, &sc->sc_mtx, &sc->mtag)) goto errout; for (i = 0; i < ATE_MAX_TX_BUFFERS; i++) { if ( bus_dmamap_create(sc->mtag, 0, &sc->tx_map[i])) goto errout; } /* DMA tag and map for the RX descriptors. */ if (bus_dma_tag_create(bus_get_dma_tag(dev), sizeof(eth_rx_desc_t), 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, ATE_NUM_RX_DESCR * sizeof(eth_rx_desc_t), 1, ATE_NUM_RX_DESCR * sizeof(eth_rx_desc_t), 0, busdma_lock_mutex, &sc->sc_mtx, &sc->rx_desc_tag)) goto errout; if (bus_dmamem_alloc(sc->rx_desc_tag, (void **)&sc->rx_descs, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &sc->rx_desc_map) != 0) goto errout; if (bus_dmamap_load(sc->rx_desc_tag, sc->rx_desc_map, sc->rx_descs, ATE_NUM_RX_DESCR * sizeof(eth_rx_desc_t), ate_getaddr, &sc->rx_desc_phys, 0) != 0) goto errout; /* Allocate DMA tags and maps for RX. buffers */ if (bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, sc->rx_buf_size, 1, sc->rx_buf_size, 0, busdma_lock_mutex, &sc->sc_mtx, &sc->rx_tag)) goto errout; /* * Allocate our RX buffers. * This chip has a RX structure that's filled in. * XXX On MACB (SAM9 part) we should receive directly into mbuf * to avoid the copy. XXX */ sc->rxhead = 0; for (sc->rxhead = 0; sc->rxhead < ATE_RX_MEMORY/sc->rx_buf_size; sc->rxhead++) { if (bus_dmamem_alloc(sc->rx_tag, (void **)&sc->rx_buf[sc->rxhead], BUS_DMA_NOWAIT, &sc->rx_map[sc->rxhead]) != 0) goto errout; if (bus_dmamap_load(sc->rx_tag, sc->rx_map[sc->rxhead], sc->rx_buf[sc->rxhead], sc->rx_buf_size, ate_load_rx_buf, sc, 0) != 0) { printf("bus_dmamem_load\n"); goto errout; } bus_dmamap_sync(sc->rx_tag, sc->rx_map[sc->rxhead], BUS_DMASYNC_PREREAD); } /* * For the last buffer, set the wrap bit so the controller * restarts from the first descriptor. */ sc->rx_descs[--sc->rxhead].addr |= ETH_WRAP_BIT; sc->rxhead = 0; /* Flush the memory for the EMAC rx descriptor. */ bus_dmamap_sync(sc->rx_desc_tag, sc->rx_desc_map, BUS_DMASYNC_PREWRITE); /* Write the descriptor queue address. */ WR4(sc, ETH_RBQP, sc->rx_desc_phys); /* * DMA tag and map for the TX descriptors. */ if (bus_dma_tag_create(bus_get_dma_tag(dev), sizeof(eth_tx_desc_t), 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, ATE_MAX_TX_BUFFERS * sizeof(eth_tx_desc_t), 1, ATE_MAX_TX_BUFFERS * sizeof(eth_tx_desc_t), 0, busdma_lock_mutex, &sc->sc_mtx, &sc->tx_desc_tag) != 0) goto errout; if (bus_dmamem_alloc(sc->tx_desc_tag, (void **)&sc->tx_descs, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &sc->tx_desc_map) != 0) goto errout; if (bus_dmamap_load(sc->tx_desc_tag, sc->tx_desc_map, sc->tx_descs, ATE_MAX_TX_BUFFERS * sizeof(eth_tx_desc_t), ate_getaddr, &sc->tx_desc_phys, 0) != 0) goto errout; /* Initilize descriptors; mark all empty */ for (i = 0; i < ATE_MAX_TX_BUFFERS; i++) { sc->tx_descs[i].addr =0; sc->tx_descs[i].status = ETHB_TX_USED; sc->sent_mbuf[i] = NULL; } /* Mark last entry to cause wrap when indexing through */ sc->tx_descs[ATE_MAX_TX_BUFFERS - 1].status = ETHB_TX_WRAP | ETHB_TX_USED; /* Flush the memory for the EMAC tx descriptor. */ bus_dmamap_sync(sc->tx_desc_tag, sc->tx_desc_map, BUS_DMASYNC_PREWRITE); sc->txhead = sc->txtail = 0; if (sc->is_emacb) { /* Write the descriptor queue address. */ WR4(sc, ETHB_TBQP, sc->tx_desc_phys); /* EMACB: Enable transceiver input clock */ WR4(sc, ETHB_UIO, RD4(sc, ETHB_UIO) | ETHB_UIO_CLKE); } return (0); errout: return (ENOMEM); } static void ate_deactivate(struct ate_softc *sc) { int i; KASSERT(sc != NULL, ("[ate, %d]: sc is NULL!", __LINE__)); if (sc->mtag != NULL) { for (i = 0; i < ATE_MAX_TX_BUFFERS; i++) { if (sc->sent_mbuf[i] != NULL) { bus_dmamap_sync(sc->mtag, sc->tx_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mtag, sc->tx_map[i]); m_freem(sc->sent_mbuf[i]); } bus_dmamap_destroy(sc->mtag, sc->tx_map[i]); sc->sent_mbuf[i] = NULL; sc->tx_map[i] = NULL; } bus_dma_tag_destroy(sc->mtag); } if (sc->rx_desc_tag != NULL) { if (sc->rx_descs != NULL) { if (sc->rx_desc_phys != 0) { bus_dmamap_sync(sc->rx_desc_tag, sc->rx_desc_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rx_desc_tag, sc->rx_desc_map); sc->rx_desc_phys = 0; } } } if (sc->rx_tag != NULL) { for (i = 0; sc->rx_buf[i] != NULL; i++) { if (sc->rx_descs[i].addr != 0) { bus_dmamap_sync(sc->rx_tag, sc->rx_map[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rx_tag, sc->rx_map[i]); sc->rx_descs[i].addr = 0; } bus_dmamem_free(sc->rx_tag, sc->rx_buf[i], sc->rx_map[i]); sc->rx_buf[i] = NULL; } bus_dma_tag_destroy(sc->rx_tag); } if (sc->rx_desc_tag != NULL) { if (sc->rx_descs != NULL) bus_dmamem_free(sc->rx_desc_tag, sc->rx_descs, sc->rx_desc_map); bus_dma_tag_destroy(sc->rx_desc_tag); sc->rx_descs = NULL; sc->rx_desc_tag = NULL; } if (sc->is_emacb) WR4(sc, ETHB_UIO, RD4(sc, ETHB_UIO) & ~ETHB_UIO_CLKE); } /* * Change media according to request. */ static int ate_ifmedia_upd(struct ifnet *ifp) { struct ate_softc *sc = ifp->if_softc; struct mii_data *mii; mii = device_get_softc(sc->miibus); ATE_LOCK(sc); mii_mediachg(mii); ATE_UNLOCK(sc); return (0); } /* * Notify the world which media we're using. */ static void ate_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct ate_softc *sc = ifp->if_softc; struct mii_data *mii; mii = device_get_softc(sc->miibus); ATE_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; ATE_UNLOCK(sc); } static void ate_stat_update(struct ate_softc *sc, int active) { uint32_t reg; /* * The speed and full/half-duplex state needs to be reflected * in the ETH_CFG register. */ reg = RD4(sc, ETH_CFG); reg &= ~(ETH_CFG_SPD | ETH_CFG_FD); if (IFM_SUBTYPE(active) != IFM_10_T) reg |= ETH_CFG_SPD; if (active & IFM_FDX) reg |= ETH_CFG_FD; WR4(sc, ETH_CFG, reg); } static void ate_tick(void *xsc) { struct ate_softc *sc = xsc; struct ifnet *ifp = sc->ifp; struct mii_data *mii; int active; uint32_t c; /* * The KB920x boot loader tests ETH_SR & ETH_SR_LINK and will ask * the MII if there's a link if this bit is clear. Not sure if we * should do the same thing here or not. */ ATE_ASSERT_LOCKED(sc); if (sc->miibus != NULL) { mii = device_get_softc(sc->miibus); active = mii->mii_media_active; mii_tick(mii); if (mii->mii_media_status & IFM_ACTIVE && active != mii->mii_media_active) ate_stat_update(sc, mii->mii_media_active); } /* * Update the stats as best we can. When we're done, clear * the status counters and start over. We're supposed to read these * registers often enough that they won't overflow. Hopefully * once a second is often enough. Some don't map well to * the dot3Stats mib, so for those we just count them as general * errors. Stats for iframes, ibutes, oframes and obytes are * collected elsewhere. These registers zero on a read to prevent * races. For all the collision stats, also update the collision * stats for the interface. */ sc->mibdata.dot3StatsAlignmentErrors += RD4(sc, ETH_ALE); sc->mibdata.dot3StatsFCSErrors += RD4(sc, ETH_SEQE); c = RD4(sc, ETH_SCOL); if_inc_counter(ifp, IFCOUNTER_COLLISIONS, c); sc->mibdata.dot3StatsSingleCollisionFrames += c; c = RD4(sc, ETH_MCOL); sc->mibdata.dot3StatsMultipleCollisionFrames += c; if_inc_counter(ifp, IFCOUNTER_COLLISIONS, c); sc->mibdata.dot3StatsSQETestErrors += RD4(sc, ETH_SQEE); sc->mibdata.dot3StatsDeferredTransmissions += RD4(sc, ETH_DTE); c = RD4(sc, ETH_LCOL); sc->mibdata.dot3StatsLateCollisions += c; if_inc_counter(ifp, IFCOUNTER_COLLISIONS, c); c = RD4(sc, ETH_ECOL); sc->mibdata.dot3StatsExcessiveCollisions += c; if_inc_counter(ifp, IFCOUNTER_COLLISIONS, c); sc->mibdata.dot3StatsCarrierSenseErrors += RD4(sc, ETH_CSE); sc->mibdata.dot3StatsFrameTooLongs += RD4(sc, ETH_ELR); sc->mibdata.dot3StatsInternalMacReceiveErrors += RD4(sc, ETH_DRFC); /* * Not sure where to lump these, so count them against the errors * for the interface. */ if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, RD4(sc, ETH_TUE)); if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, RD4(sc, ETH_CDE) + RD4(sc, ETH_RJB) + RD4(sc, ETH_USF)); /* Schedule another timeout one second from now. */ callout_reset(&sc->tick_ch, hz, ate_tick, sc); } static void ate_set_mac(struct ate_softc *sc, u_char *eaddr) { WR4(sc, ETH_SA1L, (eaddr[3] << 24) | (eaddr[2] << 16) | (eaddr[1] << 8) | eaddr[0]); WR4(sc, ETH_SA1H, (eaddr[5] << 8) | (eaddr[4])); } static int ate_get_mac(struct ate_softc *sc, u_char *eaddr) { bus_size_t sa_low_reg[] = { ETH_SA1L, ETH_SA2L, ETH_SA3L, ETH_SA4L }; bus_size_t sa_high_reg[] = { ETH_SA1H, ETH_SA2H, ETH_SA3H, ETH_SA4H }; uint32_t low, high; int i; /* * The boot loader may setup the MAC with an address(es), grab the * first MAC address from the SA[1-4][HL] registers. */ for (i = 0; i < 4; i++) { low = RD4(sc, sa_low_reg[i]); high = RD4(sc, sa_high_reg[i]); if ((low | (high & 0xffff)) != 0) { eaddr[0] = low & 0xff; eaddr[1] = (low >> 8) & 0xff; eaddr[2] = (low >> 16) & 0xff; eaddr[3] = (low >> 24) & 0xff; eaddr[4] = high & 0xff; eaddr[5] = (high >> 8) & 0xff; return (0); } } return (ENXIO); } static void ate_intr(void *xsc) { struct ate_softc *sc = xsc; struct ifnet *ifp = sc->ifp; struct mbuf *mb; eth_rx_desc_t *rxdhead; uint32_t status, reg, idx; int remain, count, done; status = RD4(sc, ETH_ISR); if (status == 0) return; if (status & ETH_ISR_RCOM) { bus_dmamap_sync(sc->rx_desc_tag, sc->rx_desc_map, BUS_DMASYNC_POSTREAD); rxdhead = &sc->rx_descs[sc->rxhead]; while (rxdhead->addr & ETH_CPU_OWNER) { if (!sc->is_emacb) { /* * Simulate SAM9 FIRST/LAST bits for RM9200. * RM9200 EMAC has only on Rx buffer per packet. * But sometime we are handed a zero lenght packet. */ if ((rxdhead->status & ETH_LEN_MASK) == 0) rxdhead->status = 0; /* Mark error */ else rxdhead->status |= ETH_BUF_FIRST | ETH_BUF_LAST; } if ((rxdhead->status & ETH_BUF_FIRST) == 0) { /* Something went wrong during RX so release back to EMAC all buffers of invalid packets. */ rxdhead->status = 0; rxdhead->addr &= ~ETH_CPU_OWNER; sc->rxhead = NEXT_RX_IDX(sc, sc->rxhead); rxdhead = &sc->rx_descs[sc->rxhead]; continue; } /* Find end of packet or start of next */ idx = sc->rxhead; if ((sc->rx_descs[idx].status & ETH_BUF_LAST) == 0) { idx = NEXT_RX_IDX(sc, idx); while ((sc->rx_descs[idx].addr & ETH_CPU_OWNER) && ((sc->rx_descs[idx].status & (ETH_BUF_FIRST|ETH_BUF_LAST))== 0)) idx = NEXT_RX_IDX(sc, idx); } /* Packet NOT yet completely in memory; we are done */ if ((sc->rx_descs[idx].addr & ETH_CPU_OWNER) == 0 || ((sc->rx_descs[idx].status & (ETH_BUF_FIRST|ETH_BUF_LAST))== 0)) break; /* Packets with no end descriptor are invalid. */ if ((sc->rx_descs[idx].status & ETH_BUF_LAST) == 0) { rxdhead->status &= ~ETH_BUF_FIRST; continue; } /* FCS is not coppied into mbuf. */ remain = (sc->rx_descs[idx].status & ETH_LEN_MASK) - 4; /* Get an appropriately sized mbuf. */ mb = m_get2(remain + ETHER_ALIGN, M_NOWAIT, MT_DATA, M_PKTHDR); if (mb == NULL) { if_inc_counter(sc->ifp, IFCOUNTER_IQDROPS, 1); rxdhead->status = 0; continue; } mb->m_data += ETHER_ALIGN; mb->m_pkthdr.rcvif = ifp; WR4(sc, ETH_RSR, RD4(sc, ETH_RSR)); /* Reset status */ /* Now we process the buffers that make up the packet */ do { /* Last buffer may just be 1-4 bytes of FCS so remain * may be zero for last decriptor. */ if (remain > 0) { /* Make sure we get the current bytes */ bus_dmamap_sync(sc->rx_tag, sc->rx_map[sc->rxhead], BUS_DMASYNC_POSTREAD); count = MIN(remain, sc->rx_buf_size); /* XXX Performance robbing copy. Could * recieve directly to mbufs if not an * RM9200. And even then we could likely * copy just the protocol headers. XXX */ m_append(mb, count, sc->rx_buf[sc->rxhead]); remain -= count; } done = (rxdhead->status & ETH_BUF_LAST) != 0; /* Return the descriptor to the EMAC */ rxdhead->status = 0; rxdhead->addr &= ~ETH_CPU_OWNER; bus_dmamap_sync(sc->rx_desc_tag, sc->rx_desc_map, BUS_DMASYNC_PREWRITE); /* Move on to next descriptor with wrap */ sc->rxhead = NEXT_RX_IDX(sc, sc->rxhead); rxdhead = &sc->rx_descs[sc->rxhead]; } while (!done); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); (*ifp->if_input)(ifp, mb); } } if (status & ETH_ISR_TCOM) { bus_dmamap_sync(sc->tx_desc_tag, sc->tx_desc_map, BUS_DMASYNC_POSTREAD); ATE_LOCK(sc); /* XXX TSR register should be cleared */ if (!sc->is_emacb) { /* Simulate Transmit descriptor table */ /* First packet done */ if (sc->txtail < sc->txhead) sc->tx_descs[sc->txtail].status |= ETHB_TX_USED; /* Second Packet done */ if (sc->txtail + 1 < sc->txhead && RD4(sc, ETH_TSR) & ETH_TSR_IDLE) sc->tx_descs[sc->txtail + 1].status |= ETHB_TX_USED; } while ((sc->tx_descs[sc->txtail].status & ETHB_TX_USED) && sc->sent_mbuf[sc->txtail] != NULL) { bus_dmamap_sync(sc->mtag, sc->tx_map[sc->txtail], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mtag, sc->tx_map[sc->txtail]); m_freem(sc->sent_mbuf[sc->txtail]); sc->tx_descs[sc->txtail].addr = 0; sc->sent_mbuf[sc->txtail] = NULL; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); sc->txtail = NEXT_TX_IDX(sc, sc->txtail); } /* Flush descriptors to EMAC */ bus_dmamap_sync(sc->tx_desc_tag, sc->tx_desc_map, BUS_DMASYNC_PREWRITE); /* * We're no longer busy, so clear the busy flag and call the * start routine to xmit more packets. */ sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; atestart_locked(sc->ifp); ATE_UNLOCK(sc); } if (status & ETH_ISR_RBNA) { /* Workaround RM9200 Errata #11 */ if (bootverbose) device_printf(sc->dev, "RBNA workaround\n"); reg = RD4(sc, ETH_CTL); WR4(sc, ETH_CTL, reg & ~ETH_CTL_RE); BARRIER(sc, ETH_CTL, 4, BUS_SPACE_BARRIER_WRITE); WR4(sc, ETH_CTL, reg | ETH_CTL_RE); } /* XXX need to work around SAM9260 errata 43.2.4.1: * disable the mac, reset tx buffer, enable mac on TUND */ } /* * Reset and initialize the chip. */ static void ateinit_locked(void *xsc) { struct ate_softc *sc = xsc; struct ifnet *ifp = sc->ifp; struct mii_data *mii; uint8_t eaddr[ETHER_ADDR_LEN]; uint32_t reg; ATE_ASSERT_LOCKED(sc); /* * XXX TODO(3) * we need to turn on the EMAC clock in the pmc. With the * default boot loader, this is already turned on. However, we * need to think about how best to turn it on/off as the interface * is brought up/down, as well as dealing with the mii bus... * * We also need to multiplex the pins correctly (in board_xxx.c). */ /* * There are two different ways that the mii bus is connected * to this chip mii or rmii. */ if (!sc->is_emacb) { /* RM9200 */ reg = RD4(sc, ETH_CFG); if (sc->use_rmii) reg |= ETH_CFG_RMII; else reg &= ~ETH_CFG_RMII; WR4(sc, ETH_CFG, reg); } else { /* SAM9 */ reg = ETHB_UIO_CLKE; reg |= (sc->use_rmii) ? ETHB_UIO_RMII : 0; WR4(sc, ETHB_UIO, reg); } ate_rxfilter(sc); /* * Set the chip MAC address. */ bcopy(IF_LLADDR(ifp), eaddr, ETHER_ADDR_LEN); ate_set_mac(sc, eaddr); /* Make sure we know state of TX queue */ sc->txhead = sc->txtail = 0; if (sc->is_emacb) { /* Write the descriptor queue address. */ WR4(sc, ETHB_TBQP, sc->tx_desc_phys); } /* * Turn on MACs and interrupt processing. */ WR4(sc, ETH_CTL, RD4(sc, ETH_CTL) | ETH_CTL_TE | ETH_CTL_RE); WR4(sc, ETH_IER, ETH_ISR_RCOM | ETH_ISR_TCOM | ETH_ISR_RBNA); /* Enable big packets. */ WR4(sc, ETH_CFG, RD4(sc, ETH_CFG) | ETH_CFG_BIG); /* * Set 'running' flag, and clear output active flag * and attempt to start the output. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; mii = device_get_softc(sc->miibus); mii_pollstat(mii); ate_stat_update(sc, mii->mii_media_active); atestart_locked(ifp); callout_reset(&sc->tick_ch, hz, ate_tick, sc); } /* * Dequeue packets and transmit. */ static void atestart_locked(struct ifnet *ifp) { struct ate_softc *sc = ifp->if_softc; struct mbuf *m, *mdefrag; bus_dma_segment_t segs[1]; int nseg, e; ATE_ASSERT_LOCKED(sc); if (ifp->if_drv_flags & IFF_DRV_OACTIVE) return; while (sc->tx_descs[sc->txhead].status & ETHB_TX_USED) { /* * Check to see if there's room to put another packet into the * xmit queue. The old EMAC version has a ping-pong buffer for * xmit packets. We use OACTIVE to indicate "we can stuff more * into our buffers (clear) or not (set)." */ /* RM9200 has only two hardware entries */ if (!sc->is_emacb && (RD4(sc, ETH_TSR) & ETH_TSR_BNQ) == 0) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } IFQ_DRV_DEQUEUE(&ifp->if_snd, m); - if (m == 0) + if (m == NULL) break; e = bus_dmamap_load_mbuf_sg(sc->mtag, sc->tx_map[sc->txhead], m, segs, &nseg, 0); if (e == EFBIG) { mdefrag = m_defrag(m, M_NOWAIT); if (mdefrag == NULL) { IFQ_DRV_PREPEND(&ifp->if_snd, m); return; } m = mdefrag; e = bus_dmamap_load_mbuf_sg(sc->mtag, sc->tx_map[sc->txhead], m, segs, &nseg, 0); } if (e != 0) { m_freem(m); continue; } /* * There's a small race between the loop in ate_intr finishing * and the check above to see if the packet was finished, as well * as when atestart gets called via other paths. Lose the race * gracefully and free the mbuf... */ if (sc->sent_mbuf[sc->txhead] != NULL) { bus_dmamap_sync(sc->mtag, sc->tx_map[sc->txtail], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mtag, sc->tx_map[sc->txtail]); m_free(sc->sent_mbuf[sc->txhead]); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); } sc->sent_mbuf[sc->txhead] = m; bus_dmamap_sync(sc->mtag, sc->tx_map[sc->txhead], BUS_DMASYNC_PREWRITE); /* Tell the hardware to xmit the packet. */ if (!sc->is_emacb) { WR4(sc, ETH_TAR, segs[0].ds_addr); BARRIER(sc, ETH_TAR, 4, BUS_SPACE_BARRIER_WRITE); WR4(sc, ETH_TCR, segs[0].ds_len); } else { bus_dmamap_sync(sc->tx_desc_tag, sc->tx_desc_map, BUS_DMASYNC_POSTWRITE); sc->tx_descs[sc->txhead].addr = segs[0].ds_addr; sc->tx_descs[sc->txhead].status = segs[0].ds_len | (sc->tx_descs[sc->txhead].status & ETHB_TX_WRAP) | ETHB_TX_BUF_LAST; bus_dmamap_sync(sc->tx_desc_tag, sc->tx_desc_map, BUS_DMASYNC_PREWRITE); WR4(sc, ETH_CTL, RD4(sc, ETH_CTL) | ETHB_CTL_TGO); } sc->txhead = NEXT_TX_IDX(sc, sc->txhead); /* Tap off here if there is a bpf listener. */ BPF_MTAP(ifp, m); } if ((sc->tx_descs[sc->txhead].status & ETHB_TX_USED) == 0) ifp->if_drv_flags |= IFF_DRV_OACTIVE; } static void ateinit(void *xsc) { struct ate_softc *sc = xsc; ATE_LOCK(sc); ateinit_locked(sc); ATE_UNLOCK(sc); } static void atestart(struct ifnet *ifp) { struct ate_softc *sc = ifp->if_softc; ATE_LOCK(sc); atestart_locked(ifp); ATE_UNLOCK(sc); } /* * Turn off interrupts, and stop the NIC. Can be called with sc->ifp NULL, * so be careful. */ static void atestop(struct ate_softc *sc) { struct ifnet *ifp; int i; ATE_ASSERT_LOCKED(sc); ifp = sc->ifp; if (ifp) { //ifp->if_timer = 0; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } callout_stop(&sc->tick_ch); /* * Enable some parts of the MAC that are needed always (like the * MII bus. This turns off the RE and TE bits, which will remain * off until ateinit() is called to turn them on. With RE and TE * turned off, there's no DMA to worry about after this write. */ WR4(sc, ETH_CTL, ETH_CTL_MPE); /* * Turn off all the configured options and revert to defaults. */ /* Make sure thate the MDIO clk is less than * 2.5 Mhz. Can no longer default to /32 since * SAM9 family may have MCK > 80 Mhz */ if (at91_master_clock <= 2000000) WR4(sc, ETH_CFG, ETH_CFG_CLK_8); else if (at91_master_clock <= 4000000) WR4(sc, ETH_CFG, ETH_CFG_CLK_16); else if (at91_master_clock <= 800000) WR4(sc, ETH_CFG, ETH_CFG_CLK_32); else WR4(sc, ETH_CFG, ETH_CFG_CLK_64); /* * Turn off all the interrupts, and ack any pending ones by reading * the ISR. */ WR4(sc, ETH_IDR, 0xffffffff); RD4(sc, ETH_ISR); /* * Clear out the Transmit and Receiver Status registers of any * errors they may be reporting */ WR4(sc, ETH_TSR, 0xffffffff); WR4(sc, ETH_RSR, 0xffffffff); /* Release TX resources. */ for (i = 0; i < ATE_MAX_TX_BUFFERS; i++) { if (sc->sent_mbuf[i] != NULL) { bus_dmamap_sync(sc->mtag, sc->tx_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mtag, sc->tx_map[i]); m_freem(sc->sent_mbuf[i]); sc->sent_mbuf[i] = NULL; } } /* Turn off transeiver input clock */ if (sc->is_emacb) WR4(sc, ETHB_UIO, RD4(sc, ETHB_UIO) & ~ETHB_UIO_CLKE); /* * XXX we should power down the EMAC if it isn't in use, after * putting it into loopback mode. This saves about 400uA according * to the datasheet. */ } static void ate_rxfilter(struct ate_softc *sc) { struct ifnet *ifp; uint32_t reg; int enabled; KASSERT(sc != NULL, ("[ate, %d]: sc is NULL!", __LINE__)); ATE_ASSERT_LOCKED(sc); ifp = sc->ifp; /* Wipe out old filter settings. */ reg = RD4(sc, ETH_CFG); reg &= ~(ETH_CFG_CAF | ETH_CFG_MTI | ETH_CFG_UNI); reg |= ETH_CFG_NBC; sc->flags &= ~ATE_FLAG_MULTICAST; /* Set new parameters. */ if ((ifp->if_flags & IFF_BROADCAST) != 0) reg &= ~ETH_CFG_NBC; if ((ifp->if_flags & IFF_PROMISC) != 0) { reg |= ETH_CFG_CAF; } else { enabled = ate_setmcast(sc); if (enabled != 0) { reg |= ETH_CFG_MTI; sc->flags |= ATE_FLAG_MULTICAST; } } WR4(sc, ETH_CFG, reg); } static int ateioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ate_softc *sc = ifp->if_softc; struct mii_data *mii; struct ifreq *ifr = (struct ifreq *)data; int drv_flags, flags; int mask, error, enabled; error = 0; flags = ifp->if_flags; drv_flags = ifp->if_drv_flags; switch (cmd) { case SIOCSIFFLAGS: ATE_LOCK(sc); if ((flags & IFF_UP) != 0) { if ((drv_flags & IFF_DRV_RUNNING) != 0) { if (((flags ^ sc->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) ate_rxfilter(sc); } else { if ((sc->flags & ATE_FLAG_DETACHING) == 0) ateinit_locked(sc); } } else if ((drv_flags & IFF_DRV_RUNNING) != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; atestop(sc); } sc->if_flags = flags; ATE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: if ((drv_flags & IFF_DRV_RUNNING) != 0) { ATE_LOCK(sc); enabled = ate_setmcast(sc); if (enabled != (sc->flags & ATE_FLAG_MULTICAST)) ate_rxfilter(sc); ATE_UNLOCK(sc); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: mii = device_get_softc(sc->miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd); break; case SIOCSIFCAP: mask = ifp->if_capenable ^ ifr->ifr_reqcap; if (mask & IFCAP_VLAN_MTU) { ATE_LOCK(sc); if (ifr->ifr_reqcap & IFCAP_VLAN_MTU) { WR4(sc, ETH_CFG, RD4(sc, ETH_CFG) | ETH_CFG_BIG); ifp->if_capenable |= IFCAP_VLAN_MTU; } else { WR4(sc, ETH_CFG, RD4(sc, ETH_CFG) & ~ETH_CFG_BIG); ifp->if_capenable &= ~IFCAP_VLAN_MTU; } ATE_UNLOCK(sc); } default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void ate_child_detached(device_t dev, device_t child) { struct ate_softc *sc; sc = device_get_softc(dev); if (child == sc->miibus) sc->miibus = NULL; } /* * MII bus support routines. */ static int ate_miibus_readreg(device_t dev, int phy, int reg) { struct ate_softc *sc; int val; /* * XXX if we implement agressive power savings, then we need * XXX to make sure that the clock to the emac is on here */ sc = device_get_softc(dev); DELAY(1); /* Hangs w/o this delay really 30.5us atm */ WR4(sc, ETH_MAN, ETH_MAN_REG_RD(phy, reg)); while ((RD4(sc, ETH_SR) & ETH_SR_IDLE) == 0) continue; val = RD4(sc, ETH_MAN) & ETH_MAN_VALUE_MASK; return (val); } static int ate_miibus_writereg(device_t dev, int phy, int reg, int data) { struct ate_softc *sc; /* * XXX if we implement agressive power savings, then we need * XXX to make sure that the clock to the emac is on here */ sc = device_get_softc(dev); WR4(sc, ETH_MAN, ETH_MAN_REG_WR(phy, reg, data)); while ((RD4(sc, ETH_SR) & ETH_SR_IDLE) == 0) continue; return (0); } static device_method_t ate_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ate_probe), DEVMETHOD(device_attach, ate_attach), DEVMETHOD(device_detach, ate_detach), /* Bus interface */ DEVMETHOD(bus_child_detached, ate_child_detached), /* MII interface */ DEVMETHOD(miibus_readreg, ate_miibus_readreg), DEVMETHOD(miibus_writereg, ate_miibus_writereg), DEVMETHOD_END }; static driver_t ate_driver = { "ate", ate_methods, sizeof(struct ate_softc), }; #ifdef FDT DRIVER_MODULE(ate, simplebus, ate_driver, ate_devclass, NULL, NULL); #else DRIVER_MODULE(ate, atmelarm, ate_driver, ate_devclass, NULL, NULL); #endif DRIVER_MODULE(miibus, ate, miibus_driver, miibus_devclass, NULL, NULL); MODULE_DEPEND(ate, miibus, 1, 1, 1); MODULE_DEPEND(ate, ether, 1, 1, 1); Index: head/sys/arm/cavium/cns11xx/if_ece.c =================================================================== --- head/sys/arm/cavium/cns11xx/if_ece.c (revision 298054) +++ head/sys/arm/cavium/cns11xx/if_ece.c (revision 298055) @@ -1,1955 +1,1955 @@ /*- * Copyright (c) 2009 Yohanes Nugroho * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #include #endif #include #include #include #include #include #include #include #include #include /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" static uint8_t vlan0_mac[ETHER_ADDR_LEN] = {0x00, 0xaa, 0xbb, 0xcc, 0xdd, 0x19}; /* * Boot loader expects the hardware state to be the same when we * restart the device (warm boot), so we need to save the initial * config values. */ int initial_switch_config; int initial_cpu_config; int initial_port0_config; int initial_port1_config; static inline uint32_t read_4(struct ece_softc *sc, bus_size_t off) { return (bus_read_4(sc->mem_res, off)); } static inline void write_4(struct ece_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off, val); } #define ECE_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define ECE_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define ECE_LOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ MTX_NETWORK_LOCK, MTX_DEF) #define ECE_TXLOCK(_sc) mtx_lock(&(_sc)->sc_mtx_tx) #define ECE_TXUNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx_tx) #define ECE_TXLOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx_tx, device_get_nameunit(_sc->dev), \ "ECE TX Lock", MTX_DEF) #define ECE_CLEANUPLOCK(_sc) mtx_lock(&(_sc)->sc_mtx_cleanup) #define ECE_CLEANUPUNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx_cleanup) #define ECE_CLEANUPLOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx_cleanup, device_get_nameunit(_sc->dev), \ "ECE cleanup Lock", MTX_DEF) #define ECE_RXLOCK(_sc) mtx_lock(&(_sc)->sc_mtx_rx) #define ECE_RXUNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx_rx) #define ECE_RXLOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx_rx, device_get_nameunit(_sc->dev), \ "ECE RX Lock", MTX_DEF) #define ECE_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); #define ECE_TXLOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx_tx); #define ECE_RXLOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx_rx); #define ECE_CLEANUPLOCK_DESTROY(_sc) \ mtx_destroy(&_sc->sc_mtx_cleanup); #define ECE_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); #define ECE_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); static devclass_t ece_devclass; /* ifnet entry points */ static void eceinit_locked(void *); static void ecestart_locked(struct ifnet *); static void eceinit(void *); static void ecestart(struct ifnet *); static void ecestop(struct ece_softc *); static int eceioctl(struct ifnet * ifp, u_long, caddr_t); /* bus entry points */ static int ece_probe(device_t dev); static int ece_attach(device_t dev); static int ece_detach(device_t dev); static void ece_intr(void *); static void ece_intr_qf(void *); static void ece_intr_status(void *xsc); /* helper routines */ static int ece_activate(device_t dev); static void ece_deactivate(device_t dev); static int ece_ifmedia_upd(struct ifnet *ifp); static void ece_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); static int ece_get_mac(struct ece_softc *sc, u_char *eaddr); static void ece_set_mac(struct ece_softc *sc, u_char *eaddr); static int configure_cpu_port(struct ece_softc *sc); static int configure_lan_port(struct ece_softc *sc, int phy_type); static void set_pvid(struct ece_softc *sc, int port0, int port1, int cpu); static void set_vlan_vid(struct ece_softc *sc, int vlan); static void set_vlan_member(struct ece_softc *sc, int vlan); static void set_vlan_tag(struct ece_softc *sc, int vlan); static int hardware_init(struct ece_softc *sc); static void ece_intr_rx_locked(struct ece_softc *sc, int count); static void ece_free_desc_dma_tx(struct ece_softc *sc); static void ece_free_desc_dma_rx(struct ece_softc *sc); static void ece_intr_task(void *arg, int pending __unused); static void ece_tx_task(void *arg, int pending __unused); static void ece_cleanup_task(void *arg, int pending __unused); static int ece_allocate_dma(struct ece_softc *sc); static void ece_intr_tx(void *xsc); static void clear_mac_entries(struct ece_softc *ec, int include_this_mac); static uint32_t read_mac_entry(struct ece_softc *ec, uint8_t *mac_result, int first); /*PHY related functions*/ static inline int phy_read(struct ece_softc *sc, int phy, int reg) { int val; int ii; int status; write_4(sc, PHY_CONTROL, PHY_RW_OK); write_4(sc, PHY_CONTROL, (PHY_ADDRESS(phy)|PHY_READ_COMMAND | PHY_REGISTER(reg))); for (ii = 0; ii < 0x1000; ii++) { status = read_4(sc, PHY_CONTROL); if (status & PHY_RW_OK) { /* Clear the rw_ok status, and clear other * bits value. */ write_4(sc, PHY_CONTROL, PHY_RW_OK); val = PHY_GET_DATA(status); return (val); } } return (0); } static inline void phy_write(struct ece_softc *sc, int phy, int reg, int data) { int ii; write_4(sc, PHY_CONTROL, PHY_RW_OK); write_4(sc, PHY_CONTROL, PHY_ADDRESS(phy) | PHY_REGISTER(reg) | PHY_WRITE_COMMAND | PHY_DATA(data)); for (ii = 0; ii < 0x1000; ii++) { if (read_4(sc, PHY_CONTROL) & PHY_RW_OK) { /* Clear the rw_ok status, and clear other * bits value. */ write_4(sc, PHY_CONTROL, PHY_RW_OK); return; } } } static int get_phy_type(struct ece_softc *sc) { uint16_t phy0_id = 0, phy1_id = 0; /* * Use SMI (MDC/MDIO) to read Link Partner's PHY Identifier * Register 1. */ phy0_id = phy_read(sc, 0, 0x2); phy1_id = phy_read(sc, 1, 0x2); if ((phy0_id == 0xFFFF) && (phy1_id == 0x000F)) return (ASIX_GIGA_PHY); else if ((phy0_id == 0x0243) && (phy1_id == 0x0243)) return (TWO_SINGLE_PHY); else if ((phy0_id == 0xFFFF) && (phy1_id == 0x0007)) return (VSC8601_GIGA_PHY); else if ((phy0_id == 0x0243) && (phy1_id == 0xFFFF)) return (IC_PLUS_PHY); return (NOT_FOUND_PHY); } static int ece_probe(device_t dev) { device_set_desc(dev, "Econa Ethernet Controller"); return (0); } static int ece_attach(device_t dev) { struct ece_softc *sc; struct ifnet *ifp = NULL; struct sysctl_ctx_list *sctx; struct sysctl_oid *soid; u_char eaddr[ETHER_ADDR_LEN]; int err; int i, rid; uint32_t rnd; err = 0; sc = device_get_softc(dev); sc->dev = dev; rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) goto out; power_on_network_interface(); rid = 0; sc->irq_res_status = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res_status == NULL) goto out; rid = 1; /*TSTC: Fm-Switch-Tx-Complete*/ sc->irq_res_tx = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res_tx == NULL) goto out; rid = 2; /*FSRC: Fm-Switch-Rx-Complete*/ sc->irq_res_rec = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res_rec == NULL) goto out; rid = 4; /*FSQF: Fm-Switch-Queue-Full*/ sc->irq_res_qf = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res_qf == NULL) goto out; err = ece_activate(dev); if (err) goto out; /* Sysctls */ sctx = device_get_sysctl_ctx(dev); soid = device_get_sysctl_tree(dev); ECE_LOCK_INIT(sc); callout_init_mtx(&sc->tick_ch, &sc->sc_mtx, 0); if ((err = ece_get_mac(sc, eaddr)) != 0) { /* No MAC address configured. Generate the random one. */ if (bootverbose) device_printf(dev, "Generating random ethernet address.\n"); rnd = arc4random(); /*from if_ae.c/if_ate.c*/ /* * Set OUI to convenient locally assigned address. 'b' * is 0x62, which has the locally assigned bit set, and * the broadcast/multicast bit clear. */ eaddr[0] = 'b'; eaddr[1] = 's'; eaddr[2] = 'd'; eaddr[3] = (rnd >> 16) & 0xff; eaddr[4] = (rnd >> 8) & 0xff; eaddr[5] = rnd & 0xff; for (i = 0; i < ETHER_ADDR_LEN; i++) eaddr[i] = vlan0_mac[i]; } ece_set_mac(sc, eaddr); sc->ifp = ifp = if_alloc(IFT_ETHER); /* Only one PHY at address 0 in this device. */ err = mii_attach(dev, &sc->miibus, ifp, ece_ifmedia_upd, ece_ifmedia_sts, BMSR_DEFCAPMASK, 0, MII_OFFSET_ANY, 0); if (err != 0) { device_printf(dev, "attaching PHYs failed\n"); goto out; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_capabilities = IFCAP_HWCSUM; ifp->if_hwassist = (CSUM_IP | CSUM_TCP | CSUM_UDP); ifp->if_capenable = ifp->if_capabilities; ifp->if_start = ecestart; ifp->if_ioctl = eceioctl; ifp->if_init = eceinit; ifp->if_snd.ifq_drv_maxlen = ECE_MAX_TX_BUFFERS - 1; IFQ_SET_MAXLEN(&ifp->if_snd, ECE_MAX_TX_BUFFERS - 1); IFQ_SET_READY(&ifp->if_snd); /* Create local taskq. */ TASK_INIT(&sc->sc_intr_task, 0, ece_intr_task, sc); TASK_INIT(&sc->sc_tx_task, 1, ece_tx_task, ifp); TASK_INIT(&sc->sc_cleanup_task, 2, ece_cleanup_task, sc); sc->sc_tq = taskqueue_create_fast("ece_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->sc_tq); if (sc->sc_tq == NULL) { device_printf(sc->dev, "could not create taskqueue\n"); goto out; } ether_ifattach(ifp, eaddr); /* * Activate interrupts */ err = bus_setup_intr(dev, sc->irq_res_rec, INTR_TYPE_NET | INTR_MPSAFE, NULL, ece_intr, sc, &sc->intrhand); if (err) { ether_ifdetach(ifp); ECE_LOCK_DESTROY(sc); goto out; } err = bus_setup_intr(dev, sc->irq_res_status, INTR_TYPE_NET | INTR_MPSAFE, NULL, ece_intr_status, sc, &sc->intrhand_status); if (err) { ether_ifdetach(ifp); ECE_LOCK_DESTROY(sc); goto out; } err = bus_setup_intr(dev, sc->irq_res_qf, INTR_TYPE_NET | INTR_MPSAFE, NULL,ece_intr_qf, sc, &sc->intrhand_qf); if (err) { ether_ifdetach(ifp); ECE_LOCK_DESTROY(sc); goto out; } err = bus_setup_intr(dev, sc->irq_res_tx, INTR_TYPE_NET | INTR_MPSAFE, NULL, ece_intr_tx, sc, &sc->intrhand_tx); if (err) { ether_ifdetach(ifp); ECE_LOCK_DESTROY(sc); goto out; } ECE_TXLOCK_INIT(sc); ECE_RXLOCK_INIT(sc); ECE_CLEANUPLOCK_INIT(sc); /* Enable all interrupt sources. */ write_4(sc, INTERRUPT_MASK, 0x00000000); /* Enable port 0. */ write_4(sc, PORT_0_CONFIG, read_4(sc, PORT_0_CONFIG) & ~(PORT_DISABLE)); taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->dev)); out: if (err) ece_deactivate(dev); if (err && ifp) if_free(ifp); return (err); } static int ece_detach(device_t dev) { struct ece_softc *sc = device_get_softc(dev); struct ifnet *ifp = sc->ifp; ecestop(sc); if (ifp != NULL) { ether_ifdetach(ifp); if_free(ifp); } ece_deactivate(dev); return (0); } static void ece_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { u_int32_t *paddr; KASSERT(nsegs == 1, ("wrong number of segments, should be 1")); paddr = arg; *paddr = segs->ds_addr; } static int ece_alloc_desc_dma_tx(struct ece_softc *sc) { int i; int error; /* Allocate a busdma tag and DMA safe memory for TX/RX descriptors. */ error = bus_dma_tag_create(sc->sc_parent_tag, /* parent */ 16, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ sizeof(eth_tx_desc_t)*ECE_MAX_TX_BUFFERS, /* max size */ 1, /*nsegments */ sizeof(eth_tx_desc_t)*ECE_MAX_TX_BUFFERS, 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sc->dmatag_data_tx); /* dmat */ /* Allocate memory for TX ring. */ error = bus_dmamem_alloc(sc->dmatag_data_tx, (void**)&(sc->desc_tx), BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &(sc->dmamap_ring_tx)); if (error) { if_printf(sc->ifp, "failed to allocate DMA memory\n"); bus_dma_tag_destroy(sc->dmatag_data_tx); sc->dmatag_data_tx = 0; return (ENXIO); } /* Load Ring DMA. */ error = bus_dmamap_load(sc->dmatag_data_tx, sc->dmamap_ring_tx, sc->desc_tx, sizeof(eth_tx_desc_t)*ECE_MAX_TX_BUFFERS, ece_getaddr, &(sc->ring_paddr_tx), BUS_DMA_NOWAIT); if (error) { if_printf(sc->ifp, "can't load descriptor\n"); bus_dmamem_free(sc->dmatag_data_tx, sc->desc_tx, sc->dmamap_ring_tx); sc->desc_tx = NULL; bus_dma_tag_destroy(sc->dmatag_data_tx); sc->dmatag_data_tx = 0; return (ENXIO); } /* Allocate a busdma tag for mbufs. Alignment is 2 bytes */ error = bus_dma_tag_create(sc->sc_parent_tag, /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ MCLBYTES*MAX_FRAGMENT, /* maxsize */ MAX_FRAGMENT, /* nsegments */ MCLBYTES, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sc->dmatag_ring_tx); /* dmat */ if (error) { if_printf(sc->ifp, "failed to create busdma tag for mbufs\n"); return (ENXIO); } for (i = 0; i < ECE_MAX_TX_BUFFERS; i++) { /* Create dma map for each descriptor. */ error = bus_dmamap_create(sc->dmatag_ring_tx, 0, &(sc->tx_desc[i].dmamap)); if (error) { if_printf(sc->ifp, "failed to create map for mbuf\n"); return (ENXIO); } } return (0); } static void ece_free_desc_dma_tx(struct ece_softc *sc) { int i; for (i = 0; i < ECE_MAX_TX_BUFFERS; i++) { if (sc->tx_desc[i].buff) { m_freem(sc->tx_desc[i].buff); sc->tx_desc[i].buff= 0; } } if (sc->ring_paddr_tx) { bus_dmamap_unload(sc->dmatag_data_tx, sc->dmamap_ring_tx); sc->ring_paddr_tx = 0; } if (sc->desc_tx) { bus_dmamem_free(sc->dmatag_data_tx, sc->desc_tx, sc->dmamap_ring_tx); sc->desc_tx = NULL; } if (sc->dmatag_data_tx) { bus_dma_tag_destroy(sc->dmatag_data_tx); sc->dmatag_data_tx = 0; } if (sc->dmatag_ring_tx) { for (i = 0; idmatag_ring_tx, sc->tx_desc[i].dmamap); sc->tx_desc[i].dmamap = 0; } bus_dma_tag_destroy(sc->dmatag_ring_tx); sc->dmatag_ring_tx = 0; } } static int ece_alloc_desc_dma_rx(struct ece_softc *sc) { int error; int i; /* Allocate a busdma tag and DMA safe memory for RX descriptors. */ error = bus_dma_tag_create(sc->sc_parent_tag, /* parent */ 16, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ /* maxsize, nsegments */ sizeof(eth_rx_desc_t)*ECE_MAX_RX_BUFFERS, 1, /* maxsegsz, flags */ sizeof(eth_rx_desc_t)*ECE_MAX_RX_BUFFERS, 0, NULL, NULL, /* lockfunc, lockfuncarg */ &sc->dmatag_data_rx); /* dmat */ /* Allocate RX ring. */ error = bus_dmamem_alloc(sc->dmatag_data_rx, (void**)&(sc->desc_rx), BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &(sc->dmamap_ring_rx)); if (error) { if_printf(sc->ifp, "failed to allocate DMA memory\n"); return (ENXIO); } /* Load dmamap. */ error = bus_dmamap_load(sc->dmatag_data_rx, sc->dmamap_ring_rx, sc->desc_rx, sizeof(eth_rx_desc_t)*ECE_MAX_RX_BUFFERS, ece_getaddr, &(sc->ring_paddr_rx), BUS_DMA_NOWAIT); if (error) { if_printf(sc->ifp, "can't load descriptor\n"); bus_dmamem_free(sc->dmatag_data_rx, sc->desc_rx, sc->dmamap_ring_rx); bus_dma_tag_destroy(sc->dmatag_data_rx); sc->desc_rx = NULL; return (ENXIO); } /* Allocate a busdma tag for mbufs. */ error = bus_dma_tag_create(sc->sc_parent_tag,/* parent */ 16, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ MCLBYTES, 1, /* maxsize, nsegments */ MCLBYTES, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sc->dmatag_ring_rx); /* dmat */ if (error) { if_printf(sc->ifp, "failed to create busdma tag for mbufs\n"); return (ENXIO); } for (i = 0; idmatag_ring_rx, 0, &sc->rx_desc[i].dmamap); if (error) { if_printf(sc->ifp, "failed to create map for mbuf\n"); return (ENXIO); } } error = bus_dmamap_create(sc->dmatag_ring_rx, 0, &sc->rx_sparemap); if (error) { if_printf(sc->ifp, "failed to create spare map\n"); return (ENXIO); } return (0); } static void ece_free_desc_dma_rx(struct ece_softc *sc) { int i; for (i = 0; i < ECE_MAX_RX_BUFFERS; i++) { if (sc->rx_desc[i].buff) { m_freem(sc->rx_desc[i].buff); sc->rx_desc[i].buff = NULL; } } if (sc->ring_paddr_rx) { bus_dmamap_unload(sc->dmatag_data_rx, sc->dmamap_ring_rx); sc->ring_paddr_rx = 0; } if (sc->desc_rx) { bus_dmamem_free(sc->dmatag_data_rx, sc->desc_rx, sc->dmamap_ring_rx); sc->desc_rx = NULL; } if (sc->dmatag_data_rx) { bus_dma_tag_destroy(sc->dmatag_data_rx); sc->dmatag_data_rx = NULL; } if (sc->dmatag_ring_rx) { for (i = 0; i < ECE_MAX_RX_BUFFERS; i++) bus_dmamap_destroy(sc->dmatag_ring_rx, sc->rx_desc[i].dmamap); bus_dmamap_destroy(sc->dmatag_ring_rx, sc->rx_sparemap); bus_dma_tag_destroy(sc->dmatag_ring_rx); sc->dmatag_ring_rx = NULL; } } static int ece_new_rxbuf(struct ece_softc *sc, struct rx_desc_info* descinfo) { struct mbuf *new_mbuf; bus_dma_segment_t seg[1]; bus_dmamap_t map; int error; int nsegs; bus_dma_tag_t tag; tag = sc->dmatag_ring_rx; new_mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (new_mbuf == NULL) return (ENOBUFS); new_mbuf->m_len = new_mbuf->m_pkthdr.len = MCLBYTES; error = bus_dmamap_load_mbuf_sg(tag, sc->rx_sparemap, new_mbuf, seg, &nsegs, BUS_DMA_NOWAIT); KASSERT(nsegs == 1, ("Too many segments returned!")); if (nsegs != 1 || error) { m_free(new_mbuf); return (ENOBUFS); } if (descinfo->buff != NULL) { bus_dmamap_sync(tag, descinfo->dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(tag, descinfo->dmamap); } map = descinfo->dmamap; descinfo->dmamap = sc->rx_sparemap; sc->rx_sparemap = map; bus_dmamap_sync(tag, descinfo->dmamap, BUS_DMASYNC_PREREAD); descinfo->buff = new_mbuf; descinfo->desc->data_ptr = seg->ds_addr; descinfo->desc->length = seg->ds_len - 2; return (0); } static int ece_allocate_dma(struct ece_softc *sc) { eth_tx_desc_t *desctx; eth_rx_desc_t *descrx; int i; int error; /* Create parent tag for tx and rx */ error = bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, 0,/* maxsize, nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->sc_parent_tag); ece_alloc_desc_dma_tx(sc); for (i = 0; i < ECE_MAX_TX_BUFFERS; i++) { desctx = (eth_tx_desc_t *)(&sc->desc_tx[i]); memset(desctx, 0, sizeof(eth_tx_desc_t)); desctx->length = MAX_PACKET_LEN; desctx->cown = 1; if (i == ECE_MAX_TX_BUFFERS - 1) desctx->eor = 1; } ece_alloc_desc_dma_rx(sc); for (i = 0; i < ECE_MAX_RX_BUFFERS; i++) { descrx = &(sc->desc_rx[i]); memset(descrx, 0, sizeof(eth_rx_desc_t)); sc->rx_desc[i].desc = descrx; sc->rx_desc[i].buff = 0; ece_new_rxbuf(sc, &(sc->rx_desc[i])); if (i == ECE_MAX_RX_BUFFERS - 1) descrx->eor = 1; } sc->tx_prod = 0; sc->tx_cons = 0; sc->last_rx = 0; sc->desc_curr_tx = 0; return (0); } static int ece_activate(device_t dev) { struct ece_softc *sc; int err; uint32_t mac_port_config; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; initial_switch_config = read_4(sc, SWITCH_CONFIG); initial_cpu_config = read_4(sc, CPU_PORT_CONFIG); initial_port0_config = read_4(sc, MAC_PORT_0_CONFIG); initial_port1_config = read_4(sc, MAC_PORT_1_CONFIG); /* Disable Port 0 */ mac_port_config = read_4(sc, MAC_PORT_0_CONFIG); mac_port_config |= (PORT_DISABLE); write_4(sc, MAC_PORT_0_CONFIG, mac_port_config); /* Disable Port 1 */ mac_port_config = read_4(sc, MAC_PORT_1_CONFIG); mac_port_config |= (PORT_DISABLE); write_4(sc, MAC_PORT_1_CONFIG, mac_port_config); err = ece_allocate_dma(sc); if (err) { if_printf(sc->ifp, "failed allocating dma\n"); goto out; } write_4(sc, TS_DESCRIPTOR_POINTER, sc->ring_paddr_tx); write_4(sc, TS_DESCRIPTOR_BASE_ADDR, sc->ring_paddr_tx); write_4(sc, FS_DESCRIPTOR_POINTER, sc->ring_paddr_rx); write_4(sc, FS_DESCRIPTOR_BASE_ADDR, sc->ring_paddr_rx); write_4(sc, FS_DMA_CONTROL, 1); return (0); out: return (ENXIO); } static void ece_deactivate(device_t dev) { struct ece_softc *sc; sc = device_get_softc(dev); if (sc->intrhand) bus_teardown_intr(dev, sc->irq_res_rec, sc->intrhand); sc->intrhand = 0; if (sc->intrhand_qf) bus_teardown_intr(dev, sc->irq_res_qf, sc->intrhand_qf); sc->intrhand_qf = 0; bus_generic_detach(sc->dev); if (sc->miibus) device_delete_child(sc->dev, sc->miibus); if (sc->mem_res) bus_release_resource(dev, SYS_RES_IOPORT, rman_get_rid(sc->mem_res), sc->mem_res); sc->mem_res = 0; if (sc->irq_res_rec) bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res_rec), sc->irq_res_rec); if (sc->irq_res_qf) bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res_qf), sc->irq_res_qf); if (sc->irq_res_qf) bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res_status), sc->irq_res_status); sc->irq_res_rec = 0; sc->irq_res_qf = 0; sc->irq_res_status = 0; ECE_TXLOCK_DESTROY(sc); ECE_RXLOCK_DESTROY(sc); ece_free_desc_dma_tx(sc); ece_free_desc_dma_rx(sc); return; } /* * Change media according to request. */ static int ece_ifmedia_upd(struct ifnet *ifp) { struct ece_softc *sc = ifp->if_softc; struct mii_data *mii; int error; mii = device_get_softc(sc->miibus); ECE_LOCK(sc); error = mii_mediachg(mii); ECE_UNLOCK(sc); return (error); } /* * Notify the world which media we're using. */ static void ece_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct ece_softc *sc = ifp->if_softc; struct mii_data *mii; mii = device_get_softc(sc->miibus); ECE_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; ECE_UNLOCK(sc); } static void ece_tick(void *xsc) { struct ece_softc *sc = xsc; struct mii_data *mii; int active; mii = device_get_softc(sc->miibus); active = mii->mii_media_active; mii_tick(mii); /* * Schedule another timeout one second from now. */ callout_reset(&sc->tick_ch, hz, ece_tick, sc); } static uint32_t read_mac_entry(struct ece_softc *ec, uint8_t *mac_result, int first) { uint32_t ii; struct arl_table_entry_t entry; uint32_t *entry_val; write_4(ec, ARL_TABLE_ACCESS_CONTROL_0, 0); write_4(ec, ARL_TABLE_ACCESS_CONTROL_1, 0); write_4(ec, ARL_TABLE_ACCESS_CONTROL_2, 0); if (first) write_4(ec, ARL_TABLE_ACCESS_CONTROL_0, 0x1); else write_4(ec, ARL_TABLE_ACCESS_CONTROL_0, 0x2); for (ii = 0; ii < 0x1000; ii++) if (read_4(ec, ARL_TABLE_ACCESS_CONTROL_1) & (0x1)) break; entry_val = (uint32_t*) (&entry); entry_val[0] = read_4(ec, ARL_TABLE_ACCESS_CONTROL_1); entry_val[1] = read_4(ec, ARL_TABLE_ACCESS_CONTROL_2); if (mac_result) memcpy(mac_result, entry.mac_addr, ETHER_ADDR_LEN); return (entry.table_end); } static uint32_t write_arl_table_entry(struct ece_softc *ec, uint32_t filter, uint32_t vlan_mac, uint32_t vlan_gid, uint32_t age_field, uint32_t port_map, const uint8_t *mac_addr) { uint32_t ii; uint32_t *entry_val; struct arl_table_entry_t entry; memset(&entry, 0, sizeof(entry)); entry.filter = filter; entry.vlan_mac = vlan_mac; entry.vlan_gid = vlan_gid; entry.age_field = age_field; entry.port_map = port_map; memcpy(entry.mac_addr, mac_addr, ETHER_ADDR_LEN); entry_val = (uint32_t*) (&entry); write_4(ec, ARL_TABLE_ACCESS_CONTROL_0, 0); write_4(ec, ARL_TABLE_ACCESS_CONTROL_1, 0); write_4(ec, ARL_TABLE_ACCESS_CONTROL_2, 0); write_4(ec, ARL_TABLE_ACCESS_CONTROL_1, entry_val[0]); write_4(ec, ARL_TABLE_ACCESS_CONTROL_2, entry_val[1]); write_4(ec, ARL_TABLE_ACCESS_CONTROL_0, ARL_WRITE_COMMAND); for (ii = 0; ii < 0x1000; ii++) if (read_4(ec, ARL_TABLE_ACCESS_CONTROL_1) & ARL_COMMAND_COMPLETE) return (1); /* Write OK. */ /* Write failed. */ return (0); } static void remove_mac_entry(struct ece_softc *sc, uint8_t *mac) { /* Invalid age_field mean erase this entry. */ write_arl_table_entry(sc, 0, 1, VLAN0_GROUP_ID, INVALID_ENTRY, VLAN0_GROUP, mac); } static void add_mac_entry(struct ece_softc *sc, uint8_t *mac) { write_arl_table_entry(sc, 0, 1, VLAN0_GROUP_ID, NEW_ENTRY, VLAN0_GROUP, mac); } /** * The behavior of ARL table reading and deletion is not well defined * in the documentation. To be safe, all mac addresses are put to a * list, then deleted. * */ static void clear_mac_entries(struct ece_softc *ec, int include_this_mac) { int table_end; struct mac_list * temp; struct mac_list * mac_list_header; struct mac_list * current; char mac[ETHER_ADDR_LEN]; - current = 0; - mac_list_header = 0; + current = NULL; + mac_list_header = NULL; table_end = read_mac_entry(ec, mac, 1); while (!table_end) { if (!include_this_mac && memcmp(mac, vlan0_mac, ETHER_ADDR_LEN) == 0) { /* Read next entry. */ table_end = read_mac_entry(ec, mac, 0); continue; } temp = (struct mac_list*)malloc(sizeof(struct mac_list), M_DEVBUF, M_NOWAIT | M_ZERO); memcpy(temp->mac_addr, mac, ETHER_ADDR_LEN); temp->next = 0; if (mac_list_header) { current->next = temp; current = temp; } else { mac_list_header = temp; current = temp; } /* Read next Entry */ table_end = read_mac_entry(ec, mac, 0); } current = mac_list_header; while (current) { remove_mac_entry(ec, current->mac_addr); temp = current; current = current->next; free(temp, M_DEVBUF); } } static int configure_lan_port(struct ece_softc *sc, int phy_type) { uint32_t sw_config; uint32_t mac_port_config; /* * Configure switch */ sw_config = read_4(sc, SWITCH_CONFIG); /* Enable fast aging. */ sw_config |= FAST_AGING; /* Enable IVL learning. */ sw_config |= IVL_LEARNING; /* Disable hardware NAT. */ sw_config &= ~(HARDWARE_NAT); sw_config |= SKIP_L2_LOOKUP_PORT_0 | SKIP_L2_LOOKUP_PORT_1| NIC_MODE; write_4(sc, SWITCH_CONFIG, sw_config); sw_config = read_4(sc, SWITCH_CONFIG); mac_port_config = read_4(sc, MAC_PORT_0_CONFIG); if (!(mac_port_config & 0x1) || (mac_port_config & 0x2)) if_printf(sc->ifp, "Link Down\n"); else write_4(sc, MAC_PORT_0_CONFIG, mac_port_config); return (0); } static void set_pvid(struct ece_softc *sc, int port0, int port1, int cpu) { uint32_t val; val = read_4(sc, VLAN_PORT_PVID) & (~(0x7 << 0)); write_4(sc, VLAN_PORT_PVID, val); val = read_4(sc, VLAN_PORT_PVID) | ((port0) & 0x07); write_4(sc, VLAN_PORT_PVID, val); val = read_4(sc, VLAN_PORT_PVID) & (~(0x7 << 4)); write_4(sc, VLAN_PORT_PVID, val); val = read_4(sc, VLAN_PORT_PVID) | (((port1) & 0x07) << 4); write_4(sc, VLAN_PORT_PVID, val); val = read_4(sc, VLAN_PORT_PVID) & (~(0x7 << 8)); write_4(sc, VLAN_PORT_PVID, val); val = read_4(sc, VLAN_PORT_PVID) | (((cpu) & 0x07) << 8); write_4(sc, VLAN_PORT_PVID, val); } /* VLAN related functions */ static void set_vlan_vid(struct ece_softc *sc, int vlan) { const uint32_t regs[] = { VLAN_VID_0_1, VLAN_VID_0_1, VLAN_VID_2_3, VLAN_VID_2_3, VLAN_VID_4_5, VLAN_VID_4_5, VLAN_VID_6_7, VLAN_VID_6_7 }; const int vids[] = { VLAN0_VID, VLAN1_VID, VLAN2_VID, VLAN3_VID, VLAN4_VID, VLAN5_VID, VLAN6_VID, VLAN7_VID }; uint32_t val; uint32_t reg; int vid; reg = regs[vlan]; vid = vids[vlan]; if (vlan & 1) { val = read_4(sc, reg); write_4(sc, reg, val & (~(0xFFF << 0))); val = read_4(sc, reg); write_4(sc, reg, val|((vid & 0xFFF) << 0)); } else { val = read_4(sc, reg); write_4(sc, reg, val & (~(0xFFF << 12))); val = read_4(sc, reg); write_4(sc, reg, val|((vid & 0xFFF) << 12)); } } static void set_vlan_member(struct ece_softc *sc, int vlan) { unsigned char shift; uint32_t val; int group; const int groups[] = { VLAN0_GROUP, VLAN1_GROUP, VLAN2_GROUP, VLAN3_GROUP, VLAN4_GROUP, VLAN5_GROUP, VLAN6_GROUP, VLAN7_GROUP }; group = groups[vlan]; shift = vlan*3; val = read_4(sc, VLAN_MEMBER_PORT_MAP) & (~(0x7 << shift)); write_4(sc, VLAN_MEMBER_PORT_MAP, val); val = read_4(sc, VLAN_MEMBER_PORT_MAP); write_4(sc, VLAN_MEMBER_PORT_MAP, val | ((group & 0x7) << shift)); } static void set_vlan_tag(struct ece_softc *sc, int vlan) { unsigned char shift; uint32_t val; int tag = 0; shift = vlan*3; val = read_4(sc, VLAN_TAG_PORT_MAP) & (~(0x7 << shift)); write_4(sc, VLAN_TAG_PORT_MAP, val); val = read_4(sc, VLAN_TAG_PORT_MAP); write_4(sc, VLAN_TAG_PORT_MAP, val | ((tag & 0x7) << shift)); } static int configure_cpu_port(struct ece_softc *sc) { uint32_t cpu_port_config; int i; cpu_port_config = read_4(sc, CPU_PORT_CONFIG); /* SA learning Disable */ cpu_port_config |= (SA_LEARNING_DISABLE); /* set data offset + 2 */ cpu_port_config &= ~(1U << 31); write_4(sc, CPU_PORT_CONFIG, cpu_port_config); if (!write_arl_table_entry(sc, 0, 1, VLAN0_GROUP_ID, STATIC_ENTRY, VLAN0_GROUP, vlan0_mac)) return (1); set_pvid(sc, PORT0_PVID, PORT1_PVID, CPU_PORT_PVID); for (i = 0; i < 8; i++) { set_vlan_vid(sc, i); set_vlan_member(sc, i); set_vlan_tag(sc, i); } /* disable all interrupt status sources */ write_4(sc, INTERRUPT_MASK, 0xffff1fff); /* clear previous interrupt sources */ write_4(sc, INTERRUPT_STATUS, 0x00001FFF); write_4(sc, TS_DMA_CONTROL, 0); write_4(sc, FS_DMA_CONTROL, 0); return (0); } static int hardware_init(struct ece_softc *sc) { int status = 0; static int gw_phy_type; gw_phy_type = get_phy_type(sc); /* Currently only ic_plus phy is supported. */ if (gw_phy_type != IC_PLUS_PHY) { device_printf(sc->dev, "PHY type is not supported (%d)\n", gw_phy_type); return (-1); } status = configure_lan_port(sc, gw_phy_type); configure_cpu_port(sc); return (0); } static void set_mac_address(struct ece_softc *sc, const char *mac, int mac_len) { /* Invalid age_field mean erase this entry. */ write_arl_table_entry(sc, 0, 1, VLAN0_GROUP_ID, INVALID_ENTRY, VLAN0_GROUP, mac); memcpy(vlan0_mac, mac, ETHER_ADDR_LEN); write_arl_table_entry(sc, 0, 1, VLAN0_GROUP_ID, STATIC_ENTRY, VLAN0_GROUP, mac); } static void ece_set_mac(struct ece_softc *sc, u_char *eaddr) { memcpy(vlan0_mac, eaddr, ETHER_ADDR_LEN); set_mac_address(sc, eaddr, ETHER_ADDR_LEN); } /* * TODO: the device doesn't have MAC stored, we should read the * configuration stored in FLASH, but the format depends on the * bootloader used.* */ static int ece_get_mac(struct ece_softc *sc, u_char *eaddr) { return (ENXIO); } static void ece_intr_rx_locked(struct ece_softc *sc, int count) { struct ifnet *ifp = sc->ifp; struct mbuf *mb; struct rx_desc_info *rxdesc; eth_rx_desc_t *desc; int fssd_curr; int fssd; int i; int idx; int rxcount; uint32_t status; fssd_curr = read_4(sc, FS_DESCRIPTOR_POINTER); fssd = (fssd_curr - (uint32_t)sc->ring_paddr_rx)>>4; desc = sc->rx_desc[sc->last_rx].desc; /* Prepare to read the data in the ring. */ bus_dmamap_sync(sc->dmatag_ring_rx, sc->dmamap_ring_rx, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (fssd > sc->last_rx) rxcount = fssd - sc->last_rx; else if (fssd < sc->last_rx) rxcount = (ECE_MAX_RX_BUFFERS - sc->last_rx) + fssd; else { if (desc->cown == 0) return; else rxcount = ECE_MAX_RX_BUFFERS; } for (i= 0; i < rxcount; i++) { status = desc->cown; if (!status) break; idx = sc->last_rx; rxdesc = &sc->rx_desc[idx]; mb = rxdesc->buff; if (desc->length < ETHER_MIN_LEN - ETHER_CRC_LEN || desc->length > ETHER_MAX_LEN - ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); desc->cown = 0; desc->length = MCLBYTES - 2; /* Invalid packet, skip and process next * packet. */ continue; } if (ece_new_rxbuf(sc, rxdesc) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); desc->cown = 0; desc->length = MCLBYTES - 2; break; } /** * The device will write to addrress + 2 So we need to adjust * the address after the packet is received. */ mb->m_data += 2; mb->m_len = mb->m_pkthdr.len = desc->length; mb->m_flags |= M_PKTHDR; mb->m_pkthdr.rcvif = ifp; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) { /*check for valid checksum*/ if ( (!desc->l4f) && (desc->prot != 3)) { mb->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; mb->m_pkthdr.csum_flags |= CSUM_IP_VALID; mb->m_pkthdr.csum_data = 0xffff; } } ECE_RXUNLOCK(sc); (*ifp->if_input)(ifp, mb); ECE_RXLOCK(sc); desc->cown = 0; desc->length = MCLBYTES - 2; bus_dmamap_sync(sc->dmatag_ring_rx, sc->dmamap_ring_rx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (sc->last_rx == ECE_MAX_RX_BUFFERS - 1) sc->last_rx = 0; else sc->last_rx++; desc = sc->rx_desc[sc->last_rx].desc; } /* Sync updated flags. */ bus_dmamap_sync(sc->dmatag_ring_rx, sc->dmamap_ring_rx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return; } static void ece_intr_task(void *arg, int pending __unused) { struct ece_softc *sc = arg; ECE_RXLOCK(sc); ece_intr_rx_locked(sc, -1); ECE_RXUNLOCK(sc); } static void ece_intr(void *xsc) { struct ece_softc *sc = xsc; struct ifnet *ifp = sc->ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { write_4(sc, FS_DMA_CONTROL, 0); return; } taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task); } static void ece_intr_status(void *xsc) { struct ece_softc *sc = xsc; struct ifnet *ifp = sc->ifp; int stat; stat = read_4(sc, INTERRUPT_STATUS); write_4(sc, INTERRUPT_STATUS, stat); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if ((stat & ERROR_MASK) != 0) if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } } static void ece_cleanup_locked(struct ece_softc *sc) { eth_tx_desc_t *desc; if (sc->tx_cons == sc->tx_prod) return; /* Prepare to read the ring (owner bit). */ bus_dmamap_sync(sc->dmatag_ring_tx, sc->dmamap_ring_tx, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); while (sc->tx_cons != sc->tx_prod) { desc = sc->tx_desc[sc->tx_cons].desc; if (desc->cown != 0) { struct tx_desc_info *td = &(sc->tx_desc[sc->tx_cons]); /* We are finished with this descriptor ... */ bus_dmamap_sync(sc->dmatag_data_tx, td->dmamap, BUS_DMASYNC_POSTWRITE); /* ... and unload, so we can reuse. */ bus_dmamap_unload(sc->dmatag_data_tx, td->dmamap); m_freem(td->buff); td->buff = 0; sc->tx_cons = (sc->tx_cons + 1) % ECE_MAX_TX_BUFFERS; } else { break; } } } static void ece_cleanup_task(void *arg, int pending __unused) { struct ece_softc *sc = arg; ECE_CLEANUPLOCK(sc); ece_cleanup_locked(sc); ECE_CLEANUPUNLOCK(sc); } static void ece_intr_tx(void *xsc) { struct ece_softc *sc = xsc; struct ifnet *ifp = sc->ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { /* This should not happen, stop DMA. */ write_4(sc, FS_DMA_CONTROL, 0); return; } taskqueue_enqueue(sc->sc_tq, &sc->sc_cleanup_task); } static void ece_intr_qf(void *xsc) { struct ece_softc *sc = xsc; struct ifnet *ifp = sc->ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { /* This should not happen, stop DMA. */ write_4(sc, FS_DMA_CONTROL, 0); return; } taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task); write_4(sc, FS_DMA_CONTROL, 1); } /* * Reset and initialize the chip */ static void eceinit_locked(void *xsc) { struct ece_softc *sc = xsc; struct ifnet *ifp = sc->ifp; struct mii_data *mii; uint32_t cfg_reg; uint32_t cpu_port_config; uint32_t mac_port_config; while (1) { cfg_reg = read_4(sc, BIST_RESULT_TEST_0); if ((cfg_reg & (1<<17))) break; DELAY(100); } /* Set to default values. */ write_4(sc, SWITCH_CONFIG, 0x007AA7A1); write_4(sc, MAC_PORT_0_CONFIG, 0x00423D00); write_4(sc, MAC_PORT_1_CONFIG, 0x00423D80); write_4(sc, CPU_PORT_CONFIG, 0x004C0000); hardware_init(sc); mac_port_config = read_4(sc, MAC_PORT_0_CONFIG); /* Enable Port 0 */ mac_port_config &= (~(PORT_DISABLE)); write_4(sc, MAC_PORT_0_CONFIG, mac_port_config); cpu_port_config = read_4(sc, CPU_PORT_CONFIG); /* Enable CPU. */ cpu_port_config &= ~(PORT_DISABLE); write_4(sc, CPU_PORT_CONFIG, cpu_port_config); /* * Set 'running' flag, and clear output active flag * and attempt to start the output */ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; mii = device_get_softc(sc->miibus); mii_pollstat(mii); /* Enable DMA. */ write_4(sc, FS_DMA_CONTROL, 1); callout_reset(&sc->tick_ch, hz, ece_tick, sc); } static inline int ece_encap(struct ece_softc *sc, struct mbuf *m0) { struct ifnet *ifp; bus_dma_segment_t segs[MAX_FRAGMENT]; bus_dmamap_t mapp; - eth_tx_desc_t *desc = 0; + eth_tx_desc_t *desc = NULL; int csum_flags; int desc_no; int error; int nsegs; int seg; ifp = sc->ifp; /* Fetch unused map */ mapp = sc->tx_desc[sc->tx_prod].dmamap; error = bus_dmamap_load_mbuf_sg(sc->dmatag_ring_tx, mapp, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { bus_dmamap_unload(sc->dmatag_ring_tx, mapp); return ((error != 0) ? error : -1); } desc = &(sc->desc_tx[sc->desc_curr_tx]); sc->tx_desc[sc->tx_prod].desc = desc; sc->tx_desc[sc->tx_prod].buff = m0; desc_no = sc->desc_curr_tx; for (seg = 0; seg < nsegs; seg++) { if (desc->cown == 0 ) { if_printf(ifp, "ERROR: descriptor is still used\n"); return (-1); } desc->length = segs[seg].ds_len; desc->data_ptr = segs[seg].ds_addr; if (seg == 0) { desc->fs = 1; } else { desc->fs = 0; } if (seg == nsegs - 1) { desc->ls = 1; } else { desc->ls = 0; } csum_flags = m0->m_pkthdr.csum_flags; desc->fr = 1; desc->pmap = 1; desc->insv = 0; desc->ico = 0; desc->tco = 0; desc->uco = 0; desc->interrupt = 1; if (csum_flags & CSUM_IP) { desc->ico = 1; if (csum_flags & CSUM_TCP) desc->tco = 1; if (csum_flags & CSUM_UDP) desc->uco = 1; } desc++; sc->desc_curr_tx = (sc->desc_curr_tx + 1) % ECE_MAX_TX_BUFFERS; if (sc->desc_curr_tx == 0) { desc = (eth_tx_desc_t *)&(sc->desc_tx[0]); } } desc = sc->tx_desc[sc->tx_prod].desc; sc->tx_prod = (sc->tx_prod + 1) % ECE_MAX_TX_BUFFERS; /* * After all descriptors are set, we set the flags to start the * sending proces. */ for (seg = 0; seg < nsegs; seg++) { desc->cown = 0; desc++; desc_no = (desc_no + 1) % ECE_MAX_TX_BUFFERS; if (desc_no == 0) desc = (eth_tx_desc_t *)&(sc->desc_tx[0]); } bus_dmamap_sync(sc->dmatag_data_tx, mapp, BUS_DMASYNC_PREWRITE); return (0); } /* * dequeu packets and transmit */ static void ecestart_locked(struct ifnet *ifp) { struct ece_softc *sc; struct mbuf *m0; uint32_t queued = 0; sc = ifp->if_softc; if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; bus_dmamap_sync(sc->dmatag_ring_tx, sc->dmamap_ring_tx, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (;;) { /* Get packet from the queue */ IF_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; if (ece_encap(sc, m0)) { IF_PREPEND(&ifp->if_snd, m0); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } queued++; BPF_MTAP(ifp, m0); } if (queued) { bus_dmamap_sync(sc->dmatag_ring_tx, sc->dmamap_ring_tx, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); write_4(sc, TS_DMA_CONTROL, 1); } } static void eceinit(void *xsc) { struct ece_softc *sc = xsc; ECE_LOCK(sc); eceinit_locked(sc); ECE_UNLOCK(sc); } static void ece_tx_task(void *arg, int pending __unused) { struct ifnet *ifp; ifp = (struct ifnet *)arg; ecestart(ifp); } static void ecestart(struct ifnet *ifp) { struct ece_softc *sc = ifp->if_softc; ECE_TXLOCK(sc); ecestart_locked(ifp); ECE_TXUNLOCK(sc); } /* * Turn off interrupts, and stop the nic. Can be called with sc->ifp * NULL so be careful. */ static void ecestop(struct ece_softc *sc) { struct ifnet *ifp = sc->ifp; uint32_t mac_port_config; write_4(sc, TS_DMA_CONTROL, 0); write_4(sc, FS_DMA_CONTROL, 0); if (ifp) ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&sc->tick_ch); /*Disable Port 0 */ mac_port_config = read_4(sc, MAC_PORT_0_CONFIG); mac_port_config |= (PORT_DISABLE); write_4(sc, MAC_PORT_0_CONFIG, mac_port_config); /*Disable Port 1 */ mac_port_config = read_4(sc, MAC_PORT_1_CONFIG); mac_port_config |= (PORT_DISABLE); write_4(sc, MAC_PORT_1_CONFIG, mac_port_config); /* Disable all interrupt status sources. */ write_4(sc, INTERRUPT_MASK, 0x00001FFF); /* Clear previous interrupt sources. */ write_4(sc, INTERRUPT_STATUS, 0x00001FFF); write_4(sc, SWITCH_CONFIG, initial_switch_config); write_4(sc, CPU_PORT_CONFIG, initial_cpu_config); write_4(sc, MAC_PORT_0_CONFIG, initial_port0_config); write_4(sc, MAC_PORT_1_CONFIG, initial_port1_config); clear_mac_entries(sc, 1); } static void ece_restart(struct ece_softc *sc) { struct ifnet *ifp = sc->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* Enable port 0. */ write_4(sc, PORT_0_CONFIG, read_4(sc, PORT_0_CONFIG) & ~(PORT_DISABLE)); write_4(sc, INTERRUPT_MASK, 0x00000000); write_4(sc, FS_DMA_CONTROL, 1); callout_reset(&sc->tick_ch, hz, ece_tick, sc); } static void set_filter(struct ece_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; uint32_t mac_port_config; ifp = sc->ifp; clear_mac_entries(sc, 0); if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { mac_port_config = read_4(sc, MAC_PORT_0_CONFIG); mac_port_config &= ~(DISABLE_BROADCAST_PACKET); mac_port_config &= ~(DISABLE_MULTICAST_PACKET); write_4(sc, MAC_PORT_0_CONFIG, mac_port_config); return; } if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; add_mac_entry(sc, LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); } if_maddr_runlock(ifp); } static int eceioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ece_softc *sc = ifp->if_softc; struct mii_data *mii; struct ifreq *ifr = (struct ifreq *)data; int mask, error = 0; switch (cmd) { case SIOCSIFFLAGS: ECE_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0 && ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ecestop(sc); } else { /* Reinitialize card on any parameter change. */ if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) ece_restart(sc); } ECE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: ECE_LOCK(sc); set_filter(sc); ECE_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: mii = device_get_softc(sc->miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd); break; case SIOCSIFCAP: mask = ifp->if_capenable ^ ifr->ifr_reqcap; if (mask & IFCAP_VLAN_MTU) { ECE_LOCK(sc); ECE_UNLOCK(sc); } default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void ece_child_detached(device_t dev, device_t child) { struct ece_softc *sc; sc = device_get_softc(dev); if (child == sc->miibus) sc->miibus = NULL; } /* * MII bus support routines. */ static int ece_miibus_readreg(device_t dev, int phy, int reg) { struct ece_softc *sc; sc = device_get_softc(dev); return (phy_read(sc, phy, reg)); } static int ece_miibus_writereg(device_t dev, int phy, int reg, int data) { struct ece_softc *sc; sc = device_get_softc(dev); phy_write(sc, phy, reg, data); return (0); } static device_method_t ece_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ece_probe), DEVMETHOD(device_attach, ece_attach), DEVMETHOD(device_detach, ece_detach), /* Bus interface */ DEVMETHOD(bus_child_detached, ece_child_detached), /* MII interface */ DEVMETHOD(miibus_readreg, ece_miibus_readreg), DEVMETHOD(miibus_writereg, ece_miibus_writereg), { 0, 0 } }; static driver_t ece_driver = { "ece", ece_methods, sizeof(struct ece_softc), }; DRIVER_MODULE(ece, econaarm, ece_driver, ece_devclass, 0, 0); DRIVER_MODULE(miibus, ece, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(ece, miibus, 1, 1, 1); MODULE_DEPEND(ece, ether, 1, 1, 1); Index: head/sys/arm/xscale/ixp425/ixp425_qmgr.c =================================================================== --- head/sys/arm/xscale/ixp425/ixp425_qmgr.c (revision 298054) +++ head/sys/arm/xscale/ixp425/ixp425_qmgr.c (revision 298055) @@ -1,1104 +1,1104 @@ /*- * Copyright (c) 2006 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ /*- * Copyright (c) 2001-2005, Intel Corporation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Intel XScale Queue Manager support. * * Each IXP4XXX device has a hardware block that implements a priority * queue manager that is shared between the XScale cpu and the backend * devices (such as the NPE). Queues are accessed by reading/writing * special memory locations. The queue contents are mapped into a shared * SRAM region with entries managed in a circular buffer. The XScale * processor can receive interrupts based on queue contents (a condition * code determines when interrupts should be delivered). * * The code here basically replaces the qmgr class in the Intel Access * Library (IAL). */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * State per AQM hw queue. * This structure holds q configuration and dispatch state. */ struct qmgrInfo { int qSizeInWords; /* queue size in words */ uint32_t qOflowStatBitMask; /* overflow status mask */ int qWriteCount; /* queue write count */ bus_size_t qAccRegAddr; /* access register */ bus_size_t qUOStatRegAddr; /* status register */ bus_size_t qConfigRegAddr; /* config register */ int qSizeInEntries; /* queue size in entries */ uint32_t qUflowStatBitMask; /* underflow status mask */ int qReadCount; /* queue read count */ /* XXX union */ uint32_t qStatRegAddr; uint32_t qStatBitsOffset; uint32_t qStat0BitMask; uint32_t qStat1BitMask; uint32_t intRegCheckMask; /* interrupt reg check mask */ void (*cb)(int, void *); /* callback function */ void *cbarg; /* callback argument */ int priority; /* dispatch priority */ #if 0 /* NB: needed only for A0 parts */ u_int statusWordOffset; /* status word offset */ uint32_t statusMask; /* status mask */ uint32_t statusCheckValue; /* status check value */ #endif }; struct ixpqmgr_softc { device_t sc_dev; bus_space_tag_t sc_iot; bus_space_handle_t sc_ioh; struct resource *sc_irq1; /* IRQ resource */ void *sc_ih1; /* interrupt handler */ int sc_rid1; /* resource id for irq */ struct resource *sc_irq2; void *sc_ih2; int sc_rid2; struct qmgrInfo qinfo[IX_QMGR_MAX_NUM_QUEUES]; /* * This array contains a list of queue identifiers ordered by * priority. The table is split logically between queue * identifiers 0-31 and 32-63. To optimize lookups bit masks * are kept for the first-32 and last-32 q's. When the * table needs to be rebuilt mark rebuildTable and it'll * happen after the next interrupt. */ int priorityTable[IX_QMGR_MAX_NUM_QUEUES]; uint32_t lowPriorityTableFirstHalfMask; uint32_t uppPriorityTableFirstHalfMask; int rebuildTable; /* rebuild priorityTable */ uint32_t aqmFreeSramAddress; /* SRAM free space */ }; static int qmgr_debug; SYSCTL_INT(_debug, OID_AUTO, qmgr, CTLFLAG_RWTUN, &qmgr_debug, 0, "IXP4XX Q-Manager debug msgs"); #define DPRINTF(dev, fmt, ...) do { \ if (qmgr_debug) printf(fmt, __VA_ARGS__); \ } while (0) #define DPRINTFn(n, dev, fmt, ...) do { \ if (qmgr_debug >= n) printf(fmt, __VA_ARGS__); \ } while (0) static struct ixpqmgr_softc *ixpqmgr_sc = NULL; static void ixpqmgr_rebuild(struct ixpqmgr_softc *); static void ixpqmgr_intr(void *); static void aqm_int_enable(struct ixpqmgr_softc *sc, int qId); static void aqm_int_disable(struct ixpqmgr_softc *sc, int qId); static void aqm_qcfg(struct ixpqmgr_softc *sc, int qId, u_int ne, u_int nf); static void aqm_srcsel_write(struct ixpqmgr_softc *sc, int qId, int sourceId); static void aqm_reset(struct ixpqmgr_softc *sc); static void dummyCallback(int qId, void *arg) { /* XXX complain */ } static uint32_t aqm_reg_read(struct ixpqmgr_softc *sc, bus_size_t off) { DPRINTFn(9, sc->sc_dev, "%s(0x%x)\n", __func__, (int)off); return bus_space_read_4(sc->sc_iot, sc->sc_ioh, off); } static void aqm_reg_write(struct ixpqmgr_softc *sc, bus_size_t off, uint32_t val) { DPRINTFn(9, sc->sc_dev, "%s(0x%x, 0x%x)\n", __func__, (int)off, val); bus_space_write_4(sc->sc_iot, sc->sc_ioh, off, val); } static int ixpqmgr_probe(device_t dev) { device_set_desc(dev, "IXP4XX Q-Manager"); return 0; } static int ixpqmgr_attach(device_t dev) { struct ixpqmgr_softc *sc = device_get_softc(dev); struct ixp425_softc *sa = device_get_softc(device_get_parent(dev)); int i, err; ixpqmgr_sc = sc; sc->sc_dev = dev; sc->sc_iot = sa->sc_iot; if (bus_space_map(sc->sc_iot, IXP425_QMGR_HWBASE, IXP425_QMGR_SIZE, 0, &sc->sc_ioh)) panic("%s: Cannot map registers", device_get_name(dev)); /* NB: we only use the lower 32 q's */ /* Set up QMGR interrupts */ sc->sc_rid1 = 0; sc->sc_irq1 = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->sc_rid1, IXP425_INT_QUE1_32, IXP425_INT_QUE1_32, 1, RF_ACTIVE); sc->sc_rid2 = 1; sc->sc_irq2 = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->sc_rid2, IXP425_INT_QUE33_64, IXP425_INT_QUE33_64, 1, RF_ACTIVE); if (sc->sc_irq1 == NULL || sc->sc_irq2 == NULL) panic("Unable to allocate the qmgr irqs.\n"); err = bus_setup_intr(dev, sc->sc_irq1, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixpqmgr_intr, NULL, &sc->sc_ih1); if (err) { device_printf(dev, "failed to set up qmgr irq=%d\n", IXP425_INT_QUE1_32); return (ENXIO); } err = bus_setup_intr(dev, sc->sc_irq2, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixpqmgr_intr, NULL, &sc->sc_ih2); if (err) { device_printf(dev, "failed to set up qmgr irq=%d\n", IXP425_INT_QUE33_64); return (ENXIO); } /* NB: softc is pre-zero'd */ for (i = 0; i < IX_QMGR_MAX_NUM_QUEUES; i++) { struct qmgrInfo *qi = &sc->qinfo[i]; qi->cb = dummyCallback; qi->priority = IX_QMGR_Q_PRIORITY_0; /* default priority */ /* * There are two interrupt registers, 32 bits each. One * for the lower queues(0-31) and one for the upper * queues(32-63). Therefore need to mod by 32 i.e the * min upper queue identifier. */ qi->intRegCheckMask = (1<<(i%(IX_QMGR_MIN_QUEUPP_QID))); /* * Register addresses and bit masks are calculated and * stored here to optimize QRead, QWrite and QStatusGet * functions. */ /* AQM Queue access reg addresses, per queue */ qi->qAccRegAddr = IX_QMGR_Q_ACCESS_ADDR_GET(i); qi->qAccRegAddr = IX_QMGR_Q_ACCESS_ADDR_GET(i); qi->qConfigRegAddr = IX_QMGR_Q_CONFIG_ADDR_GET(i); /* AQM Queue lower-group (0-31), only */ if (i < IX_QMGR_MIN_QUEUPP_QID) { /* AQM Q underflow/overflow status reg address, per queue */ qi->qUOStatRegAddr = IX_QMGR_QUEUOSTAT0_OFFSET + ((i / IX_QMGR_QUEUOSTAT_NUM_QUE_PER_WORD) * sizeof(uint32_t)); /* AQM Q underflow status bit masks for status reg per queue */ qi->qUflowStatBitMask = (IX_QMGR_UNDERFLOW_BIT_OFFSET + 1) << ((i & (IX_QMGR_QUEUOSTAT_NUM_QUE_PER_WORD - 1)) * (32 / IX_QMGR_QUEUOSTAT_NUM_QUE_PER_WORD)); /* AQM Q overflow status bit masks for status reg, per queue */ qi->qOflowStatBitMask = (IX_QMGR_OVERFLOW_BIT_OFFSET + 1) << ((i & (IX_QMGR_QUEUOSTAT_NUM_QUE_PER_WORD - 1)) * (32 / IX_QMGR_QUEUOSTAT_NUM_QUE_PER_WORD)); /* AQM Q lower-group (0-31) status reg addresses, per queue */ qi->qStatRegAddr = IX_QMGR_QUELOWSTAT0_OFFSET + ((i / IX_QMGR_QUELOWSTAT_NUM_QUE_PER_WORD) * sizeof(uint32_t)); /* AQM Q lower-group (0-31) status register bit offset */ qi->qStatBitsOffset = (i & (IX_QMGR_QUELOWSTAT_NUM_QUE_PER_WORD - 1)) * (32 / IX_QMGR_QUELOWSTAT_NUM_QUE_PER_WORD); } else { /* AQM Q upper-group (32-63), only */ qi->qUOStatRegAddr = 0; /* XXX */ /* AQM Q upper-group (32-63) Nearly Empty status reg bitmasks */ qi->qStat0BitMask = (1 << (i - IX_QMGR_MIN_QUEUPP_QID)); /* AQM Q upper-group (32-63) Full status register bitmasks */ qi->qStat1BitMask = (1 << (i - IX_QMGR_MIN_QUEUPP_QID)); } } sc->aqmFreeSramAddress = 0x100; /* Q buffer space starts at 0x2100 */ ixpqmgr_rebuild(sc); /* build initial priority table */ aqm_reset(sc); /* reset h/w */ return (0); } static int ixpqmgr_detach(device_t dev) { struct ixpqmgr_softc *sc = device_get_softc(dev); aqm_reset(sc); /* disable interrupts */ bus_teardown_intr(dev, sc->sc_irq1, sc->sc_ih1); bus_teardown_intr(dev, sc->sc_irq2, sc->sc_ih2); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_rid1, sc->sc_irq1); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_rid2, sc->sc_irq2); bus_space_unmap(sc->sc_iot, sc->sc_ioh, IXP425_QMGR_SIZE); return (0); } int ixpqmgr_qconfig(int qId, int qEntries, int ne, int nf, int srcSel, qconfig_hand_t *cb, void *cbarg) { struct ixpqmgr_softc *sc = ixpqmgr_sc; struct qmgrInfo *qi = &sc->qinfo[qId]; DPRINTF(sc->sc_dev, "%s(%u, %u, %u, %u, %u, %p, %p)\n", __func__, qId, qEntries, ne, nf, srcSel, cb, cbarg); /* NB: entry size is always 1 */ qi->qSizeInWords = qEntries; qi->qReadCount = 0; qi->qWriteCount = 0; qi->qSizeInEntries = qEntries; /* XXX kept for code clarity */ if (cb == NULL) { /* Reset to dummy callback */ qi->cb = dummyCallback; - qi->cbarg = 0; + qi->cbarg = NULL; } else { qi->cb = cb; qi->cbarg = cbarg; } /* Write the config register; NB must be AFTER qinfo setup */ aqm_qcfg(sc, qId, ne, nf); /* * Account for space just allocated to queue. */ sc->aqmFreeSramAddress += (qi->qSizeInWords * sizeof(uint32_t)); /* Set the interrupt source if this queue is in the range 0-31 */ if (qId < IX_QMGR_MIN_QUEUPP_QID) aqm_srcsel_write(sc, qId, srcSel); if (cb != NULL) /* Enable the interrupt */ aqm_int_enable(sc, qId); sc->rebuildTable = TRUE; return 0; /* XXX */ } int ixpqmgr_qwrite(int qId, uint32_t entry) { struct ixpqmgr_softc *sc = ixpqmgr_sc; struct qmgrInfo *qi = &sc->qinfo[qId]; DPRINTFn(3, sc->sc_dev, "%s(%u, 0x%x) writeCount %u size %u\n", __func__, qId, entry, qi->qWriteCount, qi->qSizeInEntries); /* write the entry */ aqm_reg_write(sc, qi->qAccRegAddr, entry); /* NB: overflow is available for lower queues only */ if (qId < IX_QMGR_MIN_QUEUPP_QID) { int qSize = qi->qSizeInEntries; /* * Increment the current number of entries in the queue * and check for overflow . */ if (qi->qWriteCount++ == qSize) { /* check for overflow */ uint32_t status = aqm_reg_read(sc, qi->qUOStatRegAddr); int qPtrs; /* * Read the status twice because the status may * not be immediately ready after the write operation */ if ((status & qi->qOflowStatBitMask) || ((status = aqm_reg_read(sc, qi->qUOStatRegAddr)) & qi->qOflowStatBitMask)) { /* * The queue is full, clear the overflow status bit if set. */ aqm_reg_write(sc, qi->qUOStatRegAddr, status & ~qi->qOflowStatBitMask); qi->qWriteCount = qSize; DPRINTFn(5, sc->sc_dev, "%s(%u, 0x%x) Q full, overflow status cleared\n", __func__, qId, entry); return ENOSPC; } /* * No overflow occured : someone is draining the queue * and the current counter needs to be * updated from the current number of entries in the queue */ /* calculate number of words in q */ qPtrs = aqm_reg_read(sc, qi->qConfigRegAddr); DPRINTFn(2, sc->sc_dev, "%s(%u, 0x%x) Q full, no overflow status, qConfig 0x%x\n", __func__, qId, entry, qPtrs); qPtrs = (qPtrs - (qPtrs >> 7)) & 0x7f; if (qPtrs == 0) { /* * The queue may be full at the time of the * snapshot. Next access will check * the overflow status again. */ qi->qWriteCount = qSize; } else { /* convert the number of words to a number of entries */ qi->qWriteCount = qPtrs & (qSize - 1); } } } return 0; } int ixpqmgr_qread(int qId, uint32_t *entry) { struct ixpqmgr_softc *sc = ixpqmgr_sc; struct qmgrInfo *qi = &sc->qinfo[qId]; bus_size_t off = qi->qAccRegAddr; *entry = aqm_reg_read(sc, off); /* * Reset the current read count : next access to the read function * will force a underflow status check. */ qi->qReadCount = 0; /* Check if underflow occurred on the read */ if (*entry == 0 && qId < IX_QMGR_MIN_QUEUPP_QID) { /* get the queue status */ uint32_t status = aqm_reg_read(sc, qi->qUOStatRegAddr); if (status & qi->qUflowStatBitMask) { /* clear underflow status */ aqm_reg_write(sc, qi->qUOStatRegAddr, status &~ qi->qUflowStatBitMask); return ENOSPC; } } return 0; } int ixpqmgr_qreadm(int qId, uint32_t n, uint32_t *p) { struct ixpqmgr_softc *sc = ixpqmgr_sc; struct qmgrInfo *qi = &sc->qinfo[qId]; uint32_t entry; bus_size_t off = qi->qAccRegAddr; entry = aqm_reg_read(sc, off); while (--n) { if (entry == 0) { /* if we read a NULL entry, stop. We have underflowed */ break; } *p++ = entry; /* store */ entry = aqm_reg_read(sc, off); } *p = entry; /* * Reset the current read count : next access to the read function * will force a underflow status check. */ qi->qReadCount = 0; /* Check if underflow occurred on the read */ if (entry == 0 && qId < IX_QMGR_MIN_QUEUPP_QID) { /* get the queue status */ uint32_t status = aqm_reg_read(sc, qi->qUOStatRegAddr); if (status & qi->qUflowStatBitMask) { /* clear underflow status */ aqm_reg_write(sc, qi->qUOStatRegAddr, status &~ qi->qUflowStatBitMask); return ENOSPC; } } return 0; } uint32_t ixpqmgr_getqstatus(int qId) { #define QLOWSTATMASK \ ((1 << (32 / IX_QMGR_QUELOWSTAT_NUM_QUE_PER_WORD)) - 1) struct ixpqmgr_softc *sc = ixpqmgr_sc; const struct qmgrInfo *qi = &sc->qinfo[qId]; uint32_t status; if (qId < IX_QMGR_MIN_QUEUPP_QID) { /* read the status of a queue in the range 0-31 */ status = aqm_reg_read(sc, qi->qStatRegAddr); /* mask out the status bits relevant only to this queue */ status = (status >> qi->qStatBitsOffset) & QLOWSTATMASK; } else { /* read status of a queue in the range 32-63 */ status = 0; if (aqm_reg_read(sc, IX_QMGR_QUEUPPSTAT0_OFFSET)&qi->qStat0BitMask) status |= IX_QMGR_Q_STATUS_NE_BIT_MASK; /* nearly empty */ if (aqm_reg_read(sc, IX_QMGR_QUEUPPSTAT1_OFFSET)&qi->qStat1BitMask) status |= IX_QMGR_Q_STATUS_F_BIT_MASK; /* full */ } return status; #undef QLOWSTATMASK } uint32_t ixpqmgr_getqconfig(int qId) { struct ixpqmgr_softc *sc = ixpqmgr_sc; return aqm_reg_read(sc, IX_QMGR_Q_CONFIG_ADDR_GET(qId)); } void ixpqmgr_dump(void) { struct ixpqmgr_softc *sc = ixpqmgr_sc; int i, a; /* status registers */ printf("0x%04x: %08x %08x %08x %08x\n" , 0x400 , aqm_reg_read(sc, 0x400) , aqm_reg_read(sc, 0x400+4) , aqm_reg_read(sc, 0x400+8) , aqm_reg_read(sc, 0x400+12) ); printf("0x%04x: %08x %08x %08x %08x\n" , 0x410 , aqm_reg_read(sc, 0x410) , aqm_reg_read(sc, 0x410+4) , aqm_reg_read(sc, 0x410+8) , aqm_reg_read(sc, 0x410+12) ); printf("0x%04x: %08x %08x %08x %08x\n" , 0x420 , aqm_reg_read(sc, 0x420) , aqm_reg_read(sc, 0x420+4) , aqm_reg_read(sc, 0x420+8) , aqm_reg_read(sc, 0x420+12) ); printf("0x%04x: %08x %08x %08x %08x\n" , 0x430 , aqm_reg_read(sc, 0x430) , aqm_reg_read(sc, 0x430+4) , aqm_reg_read(sc, 0x430+8) , aqm_reg_read(sc, 0x430+12) ); /* q configuration registers */ for (a = 0x2000; a < 0x20ff; a += 32) printf("0x%04x: %08x %08x %08x %08x %08x %08x %08x %08x\n" , a , aqm_reg_read(sc, a) , aqm_reg_read(sc, a+4) , aqm_reg_read(sc, a+8) , aqm_reg_read(sc, a+12) , aqm_reg_read(sc, a+16) , aqm_reg_read(sc, a+20) , aqm_reg_read(sc, a+24) , aqm_reg_read(sc, a+28) ); /* allocated SRAM */ for (i = 0x100; i < sc->aqmFreeSramAddress; i += 32) { a = 0x2000 + i; printf("0x%04x: %08x %08x %08x %08x %08x %08x %08x %08x\n" , a , aqm_reg_read(sc, a) , aqm_reg_read(sc, a+4) , aqm_reg_read(sc, a+8) , aqm_reg_read(sc, a+12) , aqm_reg_read(sc, a+16) , aqm_reg_read(sc, a+20) , aqm_reg_read(sc, a+24) , aqm_reg_read(sc, a+28) ); } for (i = 0; i < 16; i++) { printf("Q[%2d] config 0x%08x status 0x%02x " "Q[%2d] config 0x%08x status 0x%02x\n" , i, ixpqmgr_getqconfig(i), ixpqmgr_getqstatus(i) , i+16, ixpqmgr_getqconfig(i+16), ixpqmgr_getqstatus(i+16) ); } } void ixpqmgr_notify_enable(int qId, int srcSel) { struct ixpqmgr_softc *sc = ixpqmgr_sc; #if 0 /* Calculate the checkMask and checkValue for this q */ aqm_calc_statuscheck(sc, qId, srcSel); #endif /* Set the interrupt source if this queue is in the range 0-31 */ if (qId < IX_QMGR_MIN_QUEUPP_QID) aqm_srcsel_write(sc, qId, srcSel); /* Enable the interrupt */ aqm_int_enable(sc, qId); } void ixpqmgr_notify_disable(int qId) { struct ixpqmgr_softc *sc = ixpqmgr_sc; aqm_int_disable(sc, qId); } /* * Rebuild the priority table used by the dispatcher. */ static void ixpqmgr_rebuild(struct ixpqmgr_softc *sc) { int q, pri; int lowQuePriorityTableIndex, uppQuePriorityTableIndex; struct qmgrInfo *qi; sc->lowPriorityTableFirstHalfMask = 0; sc->uppPriorityTableFirstHalfMask = 0; lowQuePriorityTableIndex = 0; uppQuePriorityTableIndex = 32; for (pri = 0; pri < IX_QMGR_NUM_PRIORITY_LEVELS; pri++) { /* low priority q's */ for (q = 0; q < IX_QMGR_MIN_QUEUPP_QID; q++) { qi = &sc->qinfo[q]; if (qi->priority == pri) { /* * Build the priority table bitmask which match the * queues of the first half of the priority table. */ if (lowQuePriorityTableIndex < 16) { sc->lowPriorityTableFirstHalfMask |= qi->intRegCheckMask; } sc->priorityTable[lowQuePriorityTableIndex++] = q; } } /* high priority q's */ for (; q < IX_QMGR_MAX_NUM_QUEUES; q++) { qi = &sc->qinfo[q]; if (qi->priority == pri) { /* * Build the priority table bitmask which match the * queues of the first half of the priority table . */ if (uppQuePriorityTableIndex < 48) { sc->uppPriorityTableFirstHalfMask |= qi->intRegCheckMask; } sc->priorityTable[uppQuePriorityTableIndex++] = q; } } } sc->rebuildTable = FALSE; } /* * Count the number of leading zero bits in a word, * and return the same value than the CLZ instruction. * Note this is similar to the standard ffs function but * it counts zero's from the MSB instead of the LSB. * * word (in) return value (out) * 0x80000000 0 * 0x40000000 1 * ,,, ,,, * 0x00000002 30 * 0x00000001 31 * 0x00000000 32 * * The C version of this function is used as a replacement * for system not providing the equivalent of the CLZ * assembly language instruction. * * Note that this version is big-endian */ static unsigned int _lzcount(uint32_t word) { unsigned int lzcount = 0; if (word == 0) return 32; while ((word & 0x80000000) == 0) { word <<= 1; lzcount++; } return lzcount; } static void ixpqmgr_intr(void *arg) { struct ixpqmgr_softc *sc = ixpqmgr_sc; uint32_t intRegVal; /* Interrupt reg val */ struct qmgrInfo *qi; int priorityTableIndex; /* Priority table index */ int qIndex; /* Current queue being processed */ /* Read the interrupt register */ intRegVal = aqm_reg_read(sc, IX_QMGR_QINTREG0_OFFSET); /* Write back to clear interrupt */ aqm_reg_write(sc, IX_QMGR_QINTREG0_OFFSET, intRegVal); DPRINTFn(5, sc->sc_dev, "%s: ISR0 0x%x ISR1 0x%x\n", __func__, intRegVal, aqm_reg_read(sc, IX_QMGR_QINTREG1_OFFSET)); /* No queue has interrupt register set */ if (intRegVal != 0) { /* get the first queue Id from the interrupt register value */ qIndex = (32 - 1) - _lzcount(intRegVal); DPRINTFn(2, sc->sc_dev, "%s: ISR0 0x%x qIndex %u\n", __func__, intRegVal, qIndex); /* * Optimize for single callback case. */ qi = &sc->qinfo[qIndex]; if (intRegVal == qi->intRegCheckMask) { /* * Only 1 queue event triggered a notification. * Call the callback function for this queue */ qi->cb(qIndex, qi->cbarg); } else { /* * The event is triggered by more than 1 queue, * the queue search will start from the beginning * or the middle of the priority table. * * The search will end when all the bits of the interrupt * register are cleared. There is no need to maintain * a separate value and test it at each iteration. */ if (intRegVal & sc->lowPriorityTableFirstHalfMask) { priorityTableIndex = 0; } else { priorityTableIndex = 16; } /* * Iterate over the priority table until all the bits * of the interrupt register are cleared. */ do { qIndex = sc->priorityTable[priorityTableIndex++]; qi = &sc->qinfo[qIndex]; /* If this queue caused this interrupt to be raised */ if (intRegVal & qi->intRegCheckMask) { /* Call the callback function for this queue */ qi->cb(qIndex, qi->cbarg); /* Clear the interrupt register bit */ intRegVal &= ~qi->intRegCheckMask; } } while (intRegVal); } } /* Rebuild the priority table if needed */ if (sc->rebuildTable) ixpqmgr_rebuild(sc); } #if 0 /* * Generate the parameters used to check if a Q's status matches * the specified source select. We calculate which status word * to check (statusWordOffset), the value to check the status * against (statusCheckValue) and the mask (statusMask) to mask * out all but the bits to check in the status word. */ static void aqm_calc_statuscheck(int qId, IxQMgrSourceId srcSel) { struct qmgrInfo *qi = &qinfo[qId]; uint32_t shiftVal; if (qId < IX_QMGR_MIN_QUEUPP_QID) { switch (srcSel) { case IX_QMGR_Q_SOURCE_ID_E: qi->statusCheckValue = IX_QMGR_Q_STATUS_E_BIT_MASK; qi->statusMask = IX_QMGR_Q_STATUS_E_BIT_MASK; break; case IX_QMGR_Q_SOURCE_ID_NE: qi->statusCheckValue = IX_QMGR_Q_STATUS_NE_BIT_MASK; qi->statusMask = IX_QMGR_Q_STATUS_NE_BIT_MASK; break; case IX_QMGR_Q_SOURCE_ID_NF: qi->statusCheckValue = IX_QMGR_Q_STATUS_NF_BIT_MASK; qi->statusMask = IX_QMGR_Q_STATUS_NF_BIT_MASK; break; case IX_QMGR_Q_SOURCE_ID_F: qi->statusCheckValue = IX_QMGR_Q_STATUS_F_BIT_MASK; qi->statusMask = IX_QMGR_Q_STATUS_F_BIT_MASK; break; case IX_QMGR_Q_SOURCE_ID_NOT_E: qi->statusCheckValue = 0; qi->statusMask = IX_QMGR_Q_STATUS_E_BIT_MASK; break; case IX_QMGR_Q_SOURCE_ID_NOT_NE: qi->statusCheckValue = 0; qi->statusMask = IX_QMGR_Q_STATUS_NE_BIT_MASK; break; case IX_QMGR_Q_SOURCE_ID_NOT_NF: qi->statusCheckValue = 0; qi->statusMask = IX_QMGR_Q_STATUS_NF_BIT_MASK; break; case IX_QMGR_Q_SOURCE_ID_NOT_F: qi->statusCheckValue = 0; qi->statusMask = IX_QMGR_Q_STATUS_F_BIT_MASK; break; default: /* Should never hit */ IX_OSAL_ASSERT(0); break; } /* One nibble of status per queue so need to shift the * check value and mask out to the correct position. */ shiftVal = (qId % IX_QMGR_QUELOWSTAT_NUM_QUE_PER_WORD) * IX_QMGR_QUELOWSTAT_BITS_PER_Q; /* Calculate the which status word to check from the qId, * 8 Qs status per word */ qi->statusWordOffset = qId / IX_QMGR_QUELOWSTAT_NUM_QUE_PER_WORD; qi->statusCheckValue <<= shiftVal; qi->statusMask <<= shiftVal; } else { /* One status word */ qi->statusWordOffset = 0; /* Single bits per queue and int source bit hardwired NE, * Qs start at 32. */ qi->statusMask = 1 << (qId - IX_QMGR_MIN_QUEUPP_QID); qi->statusCheckValue = qi->statusMask; } } #endif static void aqm_int_enable(struct ixpqmgr_softc *sc, int qId) { bus_size_t reg; uint32_t v; if (qId < IX_QMGR_MIN_QUEUPP_QID) reg = IX_QMGR_QUEIEREG0_OFFSET; else reg = IX_QMGR_QUEIEREG1_OFFSET; v = aqm_reg_read(sc, reg); aqm_reg_write(sc, reg, v | (1 << (qId % IX_QMGR_MIN_QUEUPP_QID))); DPRINTF(sc->sc_dev, "%s(%u) 0x%lx: 0x%x => 0x%x\n", __func__, qId, reg, v, aqm_reg_read(sc, reg)); } static void aqm_int_disable(struct ixpqmgr_softc *sc, int qId) { bus_size_t reg; uint32_t v; if (qId < IX_QMGR_MIN_QUEUPP_QID) reg = IX_QMGR_QUEIEREG0_OFFSET; else reg = IX_QMGR_QUEIEREG1_OFFSET; v = aqm_reg_read(sc, reg); aqm_reg_write(sc, reg, v &~ (1 << (qId % IX_QMGR_MIN_QUEUPP_QID))); DPRINTF(sc->sc_dev, "%s(%u) 0x%lx: 0x%x => 0x%x\n", __func__, qId, reg, v, aqm_reg_read(sc, reg)); } static unsigned log2(unsigned n) { unsigned count; /* * N.B. this function will return 0 if supplied 0. */ for (count = 0; n/2; count++) n /= 2; return count; } static __inline unsigned toAqmEntrySize(int entrySize) { /* entrySize 1("00"),2("01"),4("10") */ return log2(entrySize); } static __inline unsigned toAqmBufferSize(unsigned bufferSizeInWords) { /* bufferSize 16("00"),32("01),64("10"),128("11") */ return log2(bufferSizeInWords / IX_QMGR_MIN_BUFFER_SIZE); } static __inline unsigned toAqmWatermark(int watermark) { /* * Watermarks 0("000"),1("001"),2("010"),4("011"), * 8("100"),16("101"),32("110"),64("111") */ return log2(2 * watermark); } static void aqm_qcfg(struct ixpqmgr_softc *sc, int qId, u_int ne, u_int nf) { const struct qmgrInfo *qi = &sc->qinfo[qId]; uint32_t qCfg; uint32_t baseAddress; /* Build config register */ qCfg = ((toAqmEntrySize(1) & IX_QMGR_ENTRY_SIZE_MASK) << IX_QMGR_Q_CONFIG_ESIZE_OFFSET) | ((toAqmBufferSize(qi->qSizeInWords) & IX_QMGR_SIZE_MASK) << IX_QMGR_Q_CONFIG_BSIZE_OFFSET); /* baseAddress, calculated relative to start address */ baseAddress = sc->aqmFreeSramAddress; /* base address must be word-aligned */ KASSERT((baseAddress % IX_QMGR_BASE_ADDR_16_WORD_ALIGN) == 0, ("address not word-aligned")); /* Now convert to a 16 word pointer as required by QUECONFIG register */ baseAddress >>= IX_QMGR_BASE_ADDR_16_WORD_SHIFT; qCfg |= baseAddress << IX_QMGR_Q_CONFIG_BADDR_OFFSET; /* set watermarks */ qCfg |= (toAqmWatermark(ne) << IX_QMGR_Q_CONFIG_NE_OFFSET) | (toAqmWatermark(nf) << IX_QMGR_Q_CONFIG_NF_OFFSET); DPRINTF(sc->sc_dev, "%s(%u, %u, %u) 0x%x => 0x%x @ 0x%x\n", __func__, qId, ne, nf, aqm_reg_read(sc, IX_QMGR_Q_CONFIG_ADDR_GET(qId)), qCfg, IX_QMGR_Q_CONFIG_ADDR_GET(qId)); aqm_reg_write(sc, IX_QMGR_Q_CONFIG_ADDR_GET(qId), qCfg); } static void aqm_srcsel_write(struct ixpqmgr_softc *sc, int qId, int sourceId) { bus_size_t off; uint32_t v; /* * Calculate the register offset; multiple queues split across registers */ off = IX_QMGR_INT0SRCSELREG0_OFFSET + ((qId / IX_QMGR_INTSRC_NUM_QUE_PER_WORD) * sizeof(uint32_t)); v = aqm_reg_read(sc, off); if (off == IX_QMGR_INT0SRCSELREG0_OFFSET && qId == 0) { /* Queue 0 at INT0SRCSELREG should not corrupt the value bit-3 */ v |= 0x7; } else { const uint32_t bpq = 32 / IX_QMGR_INTSRC_NUM_QUE_PER_WORD; uint32_t mask; int qshift; qshift = (qId & (IX_QMGR_INTSRC_NUM_QUE_PER_WORD-1)) * bpq; mask = ((1 << bpq) - 1) << qshift; /* q's status mask */ /* merge sourceId */ v = (v &~ mask) | ((sourceId << qshift) & mask); } DPRINTF(sc->sc_dev, "%s(%u, %u) 0x%x => 0x%x @ 0x%lx\n", __func__, qId, sourceId, aqm_reg_read(sc, off), v, off); aqm_reg_write(sc, off, v); } /* * Reset AQM registers to default values. */ static void aqm_reset(struct ixpqmgr_softc *sc) { int i; /* Reset queues 0..31 status registers 0..3 */ aqm_reg_write(sc, IX_QMGR_QUELOWSTAT0_OFFSET, IX_QMGR_QUELOWSTAT_RESET_VALUE); aqm_reg_write(sc, IX_QMGR_QUELOWSTAT1_OFFSET, IX_QMGR_QUELOWSTAT_RESET_VALUE); aqm_reg_write(sc, IX_QMGR_QUELOWSTAT2_OFFSET, IX_QMGR_QUELOWSTAT_RESET_VALUE); aqm_reg_write(sc, IX_QMGR_QUELOWSTAT3_OFFSET, IX_QMGR_QUELOWSTAT_RESET_VALUE); /* Reset underflow/overflow status registers 0..1 */ aqm_reg_write(sc, IX_QMGR_QUEUOSTAT0_OFFSET, IX_QMGR_QUEUOSTAT_RESET_VALUE); aqm_reg_write(sc, IX_QMGR_QUEUOSTAT1_OFFSET, IX_QMGR_QUEUOSTAT_RESET_VALUE); /* Reset queues 32..63 nearly empty status registers */ aqm_reg_write(sc, IX_QMGR_QUEUPPSTAT0_OFFSET, IX_QMGR_QUEUPPSTAT0_RESET_VALUE); /* Reset queues 32..63 full status registers */ aqm_reg_write(sc, IX_QMGR_QUEUPPSTAT1_OFFSET, IX_QMGR_QUEUPPSTAT1_RESET_VALUE); /* Reset int0 status flag source select registers 0..3 */ aqm_reg_write(sc, IX_QMGR_INT0SRCSELREG0_OFFSET, IX_QMGR_INT0SRCSELREG_RESET_VALUE); aqm_reg_write(sc, IX_QMGR_INT0SRCSELREG1_OFFSET, IX_QMGR_INT0SRCSELREG_RESET_VALUE); aqm_reg_write(sc, IX_QMGR_INT0SRCSELREG2_OFFSET, IX_QMGR_INT0SRCSELREG_RESET_VALUE); aqm_reg_write(sc, IX_QMGR_INT0SRCSELREG3_OFFSET, IX_QMGR_INT0SRCSELREG_RESET_VALUE); /* Reset queue interrupt enable register 0..1 */ aqm_reg_write(sc, IX_QMGR_QUEIEREG0_OFFSET, IX_QMGR_QUEIEREG_RESET_VALUE); aqm_reg_write(sc, IX_QMGR_QUEIEREG1_OFFSET, IX_QMGR_QUEIEREG_RESET_VALUE); /* Reset queue interrupt register 0..1 */ aqm_reg_write(sc, IX_QMGR_QINTREG0_OFFSET, IX_QMGR_QINTREG_RESET_VALUE); aqm_reg_write(sc, IX_QMGR_QINTREG1_OFFSET, IX_QMGR_QINTREG_RESET_VALUE); /* Reset queue configuration words 0..63 */ for (i = 0; i < IX_QMGR_MAX_NUM_QUEUES; i++) aqm_reg_write(sc, sc->qinfo[i].qConfigRegAddr, IX_QMGR_QUECONFIG_RESET_VALUE); /* XXX zero SRAM to simplify debugging */ for (i = IX_QMGR_QUEBUFFER_SPACE_OFFSET; i < IX_QMGR_AQM_SRAM_SIZE_IN_BYTES; i += sizeof(uint32_t)) aqm_reg_write(sc, i, 0); } static device_method_t ixpqmgr_methods[] = { DEVMETHOD(device_probe, ixpqmgr_probe), DEVMETHOD(device_attach, ixpqmgr_attach), DEVMETHOD(device_detach, ixpqmgr_detach), { 0, 0 } }; static driver_t ixpqmgr_driver = { "ixpqmgr", ixpqmgr_methods, sizeof(struct ixpqmgr_softc), }; static devclass_t ixpqmgr_devclass; DRIVER_MODULE(ixpqmgr, ixp, ixpqmgr_driver, ixpqmgr_devclass, 0, 0); Index: head/sys/arm64/arm64/nexus.c =================================================================== --- head/sys/arm64/arm64/nexus.c (revision 298054) +++ head/sys/arm64/arm64/nexus.c (revision 298055) @@ -1,481 +1,481 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * This code implements a `root nexus' for Arm Architecture * machines. The function of the root nexus is to serve as an * attachment point for both processors and buses, and to manage * resources which are common to all of them. In particular, * this code implements the core resource managers for interrupt * requests, DMA requests (which rightfully should be a part of the * ISA code but it's easier to do it here for now), I/O port addresses, * and I/O memory address space. */ #include "opt_acpi.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include "ofw_bus_if.h" #endif #ifdef DEV_ACPI #include #include #endif extern struct bus_space memmap_bus; static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); struct nexus_device { struct resource_list nx_resources; }; #define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev)) static struct rman mem_rman; static struct rman irq_rman; static int nexus_attach(device_t); #ifdef FDT static device_probe_t nexus_fdt_probe; static device_attach_t nexus_fdt_attach; #endif #ifdef DEV_ACPI static device_probe_t nexus_acpi_probe; static device_attach_t nexus_acpi_attach; #endif static int nexus_print_child(device_t, device_t); static device_t nexus_add_child(device_t, u_int, const char *, int); static struct resource *nexus_alloc_resource(device_t, device_t, int, int *, rman_res_t, rman_res_t, rman_res_t, u_int); static int nexus_activate_resource(device_t, device_t, int, int, struct resource *); static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol); static struct resource_list *nexus_get_reslist(device_t, device_t); static int nexus_set_resource(device_t, device_t, int, int, rman_res_t, rman_res_t); static int nexus_deactivate_resource(device_t, device_t, int, int, struct resource *); static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep); static int nexus_teardown_intr(device_t, device_t, struct resource *, void *); static bus_space_tag_t nexus_get_bus_tag(device_t, device_t); #ifdef SMP static int nexus_bind_intr(device_t, device_t, struct resource *, int); #endif #ifdef FDT static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, pcell_t *intr); #endif static device_method_t nexus_methods[] = { /* Bus interface */ DEVMETHOD(bus_print_child, nexus_print_child), DEVMETHOD(bus_add_child, nexus_add_child), DEVMETHOD(bus_alloc_resource, nexus_alloc_resource), DEVMETHOD(bus_activate_resource, nexus_activate_resource), DEVMETHOD(bus_config_intr, nexus_config_intr), DEVMETHOD(bus_get_resource_list, nexus_get_reslist), DEVMETHOD(bus_set_resource, nexus_set_resource), DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource), DEVMETHOD(bus_setup_intr, nexus_setup_intr), DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), DEVMETHOD(bus_get_bus_tag, nexus_get_bus_tag), #ifdef SMP DEVMETHOD(bus_bind_intr, nexus_bind_intr), #endif { 0, 0 } }; static driver_t nexus_driver = { "nexus", nexus_methods, 1 /* no softc */ }; static int nexus_attach(device_t dev) { mem_rman.rm_start = 0; mem_rman.rm_end = BUS_SPACE_MAXADDR; mem_rman.rm_type = RMAN_ARRAY; mem_rman.rm_descr = "I/O memory addresses"; if (rman_init(&mem_rman) || rman_manage_region(&mem_rman, 0, BUS_SPACE_MAXADDR)) panic("nexus_attach mem_rman"); irq_rman.rm_start = 0; irq_rman.rm_end = ~0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupts"; if (rman_init(&irq_rman) || rman_manage_region(&irq_rman, 0, ~0)) panic("nexus_attach irq_rman"); bus_generic_probe(dev); bus_generic_attach(dev); return (0); } static int nexus_print_child(device_t bus, device_t child) { int retval = 0; retval += bus_print_child_header(bus, child); retval += printf("\n"); return (retval); } static device_t nexus_add_child(device_t bus, u_int order, const char *name, int unit) { device_t child; struct nexus_device *ndev; ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO); if (!ndev) return (0); resource_list_init(&ndev->nx_resources); child = device_add_child_ordered(bus, order, name, unit); /* should we free this in nexus_child_detached? */ device_set_ivars(child, ndev); return (child); } /* * Allocate a resource on behalf of child. NB: child is usually going to be a * child of one of our descendants, not a direct child of nexus0. * (Exceptions include footbridge.) */ static struct resource * nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct nexus_device *ndev = DEVTONX(child); struct resource *rv; struct resource_list_entry *rle; struct rman *rm; int needactivate = flags & RF_ACTIVE; /* * If this is an allocation of the "default" range for a given * RID, and we know what the resources for this device are * (ie. they aren't maintained by a child bus), then work out * the start/end values. */ if (RMAN_IS_DEFAULT_RANGE(start, end) && (count == 1)) { if (device_get_parent(child) != bus || ndev == NULL) return(NULL); rle = resource_list_find(&ndev->nx_resources, type, *rid); if (rle == NULL) return(NULL); start = rle->start; end = rle->end; count = rle->count; } switch (type) { case SYS_RES_IRQ: rm = &irq_rman; break; case SYS_RES_MEMORY: case SYS_RES_IOPORT: rm = &mem_rman; break; default: return (NULL); } rv = rman_reserve_resource(rm, start, end, count, flags, child); - if (rv == 0) + if (rv == NULL) return (NULL); rman_set_rid(rv, *rid); rman_set_bushandle(rv, rman_get_start(rv)); if (needactivate) { if (bus_activate_resource(child, type, *rid, rv)) { rman_release_resource(rv); return (NULL); } } return (rv); } static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol) { return (intr_irq_config(irq, trig, pol)); } static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep) { int error; if ((rman_get_flags(res) & RF_SHAREABLE) == 0) flags |= INTR_EXCL; /* We depend here on rman_activate_resource() being idempotent. */ error = rman_activate_resource(res); if (error) return (error); error = arm_setup_intr(device_get_nameunit(child), filt, intr, arg, rman_get_start(res), flags, cookiep); return (error); } static int nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) { return (intr_irq_remove_handler(child, rman_get_start(r), ih)); } #ifdef SMP static int nexus_bind_intr(device_t dev, device_t child, struct resource *irq, int cpu) { return (intr_irq_bind(rman_get_start(irq), cpu)); } #endif static bus_space_tag_t nexus_get_bus_tag(device_t bus __unused, device_t child __unused) { return(&memmap_bus); } static int nexus_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { int err; bus_addr_t paddr; bus_size_t psize; bus_space_handle_t vaddr; if ((err = rman_activate_resource(r)) != 0) return (err); /* * If this is a memory resource, map it into the kernel. */ if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { paddr = (bus_addr_t)rman_get_start(r); psize = (bus_size_t)rman_get_size(r); err = bus_space_map(&memmap_bus, paddr, psize, 0, &vaddr); if (err != 0) { rman_deactivate_resource(r); return (err); } rman_set_bustag(r, &memmap_bus); rman_set_virtual(r, (void *)vaddr); rman_set_bushandle(r, vaddr); } return (0); } static struct resource_list * nexus_get_reslist(device_t dev, device_t child) { struct nexus_device *ndev = DEVTONX(child); return (&ndev->nx_resources); } static int nexus_set_resource(device_t dev, device_t child, int type, int rid, rman_res_t start, rman_res_t count) { struct nexus_device *ndev = DEVTONX(child); struct resource_list *rl = &ndev->nx_resources; /* XXX this should return a success/failure indicator */ resource_list_add(rl, type, rid, start, start + count - 1, count); return(0); } static int nexus_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { bus_size_t psize; bus_space_handle_t vaddr; psize = (bus_size_t)rman_get_size(r); vaddr = rman_get_bushandle(r); if (vaddr != 0) { bus_space_unmap(&memmap_bus, vaddr, psize); rman_set_virtual(r, NULL); rman_set_bushandle(r, 0); } return (rman_deactivate_resource(r)); } #ifdef FDT static device_method_t nexus_fdt_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_fdt_probe), DEVMETHOD(device_attach, nexus_fdt_attach), /* OFW interface */ DEVMETHOD(ofw_bus_map_intr, nexus_ofw_map_intr), }; #define nexus_baseclasses nexus_fdt_baseclasses DEFINE_CLASS_1(nexus, nexus_fdt_driver, nexus_fdt_methods, 1, nexus_driver); #undef nexus_baseclasses static devclass_t nexus_fdt_devclass; EARLY_DRIVER_MODULE(nexus_fdt, root, nexus_fdt_driver, nexus_fdt_devclass, 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST); static int nexus_fdt_probe(device_t dev) { if (OF_peer(0) == 0) return (ENXIO); device_quiet(dev); return (BUS_PROBE_DEFAULT); } static int nexus_fdt_attach(device_t dev) { nexus_add_child(dev, 10, "ofwbus", 0); return (nexus_attach(dev)); } static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, pcell_t *intr) { int irq; if (icells == 3) { irq = intr[1]; if (intr[0] == 0) irq += 32; /* SPI */ else irq += 16; /* PPI */ } else irq = intr[0]; return (irq); } #endif #ifdef DEV_ACPI static device_method_t nexus_acpi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_acpi_probe), DEVMETHOD(device_attach, nexus_acpi_attach), }; #define nexus_baseclasses nexus_acpi_baseclasses DEFINE_CLASS_1(nexus, nexus_acpi_driver, nexus_acpi_methods, 1, nexus_driver); #undef nexus_baseclasses static devclass_t nexus_acpi_devclass; EARLY_DRIVER_MODULE(nexus_acpi, root, nexus_acpi_driver, nexus_acpi_devclass, 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST); static int nexus_acpi_probe(device_t dev) { if (acpi_identify() != 0) return (ENXIO); device_quiet(dev); return (BUS_PROBE_LOW_PRIORITY); } static int nexus_acpi_attach(device_t dev) { nexus_add_child(dev, 10, "acpi", 0); return (nexus_attach(dev)); } #endif