diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 44d93494d830..62e782304fca 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -1,935 +1,931 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Functions to provide access to special i386 instructions. * This in included in sys/systm.h, and that file should be * used in preference to this. */ #ifdef __i386__ #include #else /* !__i386__ */ #ifndef _MACHINE_CPUFUNC_H_ #define _MACHINE_CPUFUNC_H_ struct region_descriptor; #define readb(va) (*(volatile uint8_t *) (va)) #define readw(va) (*(volatile uint16_t *) (va)) #define readl(va) (*(volatile uint32_t *) (va)) #define readq(va) (*(volatile uint64_t *) (va)) #define writeb(va, d) (*(volatile uint8_t *) (va) = (d)) #define writew(va, d) (*(volatile uint16_t *) (va) = (d)) #define writel(va, d) (*(volatile uint32_t *) (va) = (d)) #define writeq(va, d) (*(volatile uint64_t *) (va) = (d)) static __inline void breakpoint(void) { __asm __volatile("int $3"); } #define bsfl(mask) __builtin_ctz(mask) #define bsfq(mask) __builtin_ctzl(mask) -#define bsrl(mask) (__builtin_clz(mask) ^ 0x1f) - -#define bsrq(mask) (__builtin_clzl(mask) ^ 0x3f) - static __inline void clflush(u_long addr) { __asm __volatile("clflush %0" : : "m" (*(char *)addr)); } static __inline void clflushopt(u_long addr) { __asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr)); } static __inline void clwb(u_long addr) { __asm __volatile("clwb %0" : : "m" (*(char *)addr)); } static __inline void clts(void) { __asm __volatile("clts"); } static __inline void disable_intr(void) { __asm __volatile("cli" : : : "memory"); } static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax), "c" (cx)); } static __inline void enable_intr(void) { __asm __volatile("sti"); } static __inline void halt(void) { __asm __volatile("hlt"); } static __inline u_char inb(u_int port) { u_char data; __asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline u_int inl(u_int port) { u_int data; __asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void insb(u_int port, void *addr, size_t count) { __asm __volatile("rep; insb" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insw(u_int port, void *addr, size_t count) { __asm __volatile("rep; insw" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insl(u_int port, void *addr, size_t count) { __asm __volatile("rep; insl" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void invd(void) { __asm __volatile("invd"); } static __inline u_short inw(u_int port) { u_short data; __asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void outb(u_int port, u_char data) { __asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outl(u_int port, u_int data) { __asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outsb(u_int port, const void *addr, size_t count) { __asm __volatile("rep; outsb" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsw(u_int port, const void *addr, size_t count) { __asm __volatile("rep; outsw" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsl(u_int port, const void *addr, size_t count) { __asm __volatile("rep; outsl" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outw(u_int port, u_short data) { __asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port)); } static __inline u_long popcntq(u_long mask) { u_long result; __asm __volatile("popcntq %1,%0" : "=r" (result) : "rm" (mask)); return (result); } static __inline void lfence(void) { __asm __volatile("lfence" : : : "memory"); } static __inline void mfence(void) { __asm __volatile("mfence" : : : "memory"); } static __inline void sfence(void) { __asm __volatile("sfence" : : : "memory"); } static __inline void ia32_pause(void) { __asm __volatile("pause"); } static __inline u_long read_rflags(void) { u_long rf; __asm __volatile("pushfq; popq %0" : "=r" (rf)); return (rf); } static __inline uint64_t rdmsr(u_int msr) { uint32_t low, high; __asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr)); return (low | ((uint64_t)high << 32)); } static __inline uint32_t rdmsr32(u_int msr) { uint32_t low; __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "rdx"); return (low); } static __inline uint64_t rdpmc(u_int pmc) { uint32_t low, high; __asm __volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (pmc)); return (low | ((uint64_t)high << 32)); } static __inline uint64_t rdtsc(void) { uint32_t low, high; __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); return (low | ((uint64_t)high << 32)); } static __inline uint64_t rdtsc_ordered_lfence(void) { lfence(); return (rdtsc()); } static __inline uint64_t rdtsc_ordered_mfence(void) { mfence(); return (rdtsc()); } static __inline uint64_t rdtscp(void) { uint32_t low, high; __asm __volatile("rdtscp" : "=a" (low), "=d" (high) : : "ecx"); return (low | ((uint64_t)high << 32)); } static __inline uint64_t rdtscp_aux(uint32_t *aux) { uint32_t low, high; __asm __volatile("rdtscp" : "=a" (low), "=d" (high), "=c" (*aux)); return (low | ((uint64_t)high << 32)); } static __inline uint32_t rdtsc32(void) { uint32_t rv; __asm __volatile("rdtsc" : "=a" (rv) : : "edx"); return (rv); } static __inline uint32_t rdtscp32(void) { uint32_t rv; __asm __volatile("rdtscp" : "=a" (rv) : : "ecx", "edx"); return (rv); } static __inline void wbinvd(void) { __asm __volatile("wbinvd"); } static __inline void write_rflags(u_long rf) { __asm __volatile("pushq %0; popfq" : : "r" (rf)); } static __inline void wrmsr(u_int msr, uint64_t newval) { uint32_t low, high; low = newval; high = newval >> 32; __asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr)); } static __inline void load_cr0(u_long data) { __asm __volatile("movq %0,%%cr0" : : "r" (data)); } static __inline u_long rcr0(void) { u_long data; __asm __volatile("movq %%cr0,%0" : "=r" (data)); return (data); } static __inline u_long rcr2(void) { u_long data; __asm __volatile("movq %%cr2,%0" : "=r" (data)); return (data); } static __inline void load_cr3(u_long data) { __asm __volatile("movq %0,%%cr3" : : "r" (data) : "memory"); } static __inline u_long rcr3(void) { u_long data; __asm __volatile("movq %%cr3,%0" : "=r" (data)); return (data); } static __inline void load_cr4(u_long data) { __asm __volatile("movq %0,%%cr4" : : "r" (data)); } static __inline u_long rcr4(void) { u_long data; __asm __volatile("movq %%cr4,%0" : "=r" (data)); return (data); } static __inline u_long rxcr(u_int reg) { u_int low, high; __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg)); return (low | ((uint64_t)high << 32)); } static __inline void load_xcr(u_int reg, u_long val) { u_int low, high; low = val; high = val >> 32; __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high)); } /* * Global TLB flush (except for thise for pages marked PG_G) */ static __inline void invltlb(void) { load_cr3(rcr3()); } #ifndef CR4_PGE #define CR4_PGE 0x00000080 /* Page global enable */ #endif /* * Perform the guaranteed invalidation of all TLB entries. This * includes the global entries, and entries in all PCIDs, not only the * current context. The function works both on non-PCID CPUs and CPUs * with the PCID turned off or on. See IA-32 SDM Vol. 3a 4.10.4.1 * Operations that Invalidate TLBs and Paging-Structure Caches. */ static __inline void invltlb_glob(void) { uint64_t cr4; cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* * Although preemption at this point could be detrimental to * performance, it would not lead to an error. PG_G is simply * ignored if CR4.PGE is clear. Moreover, in case this block * is re-entered, the load_cr4() either above or below will * modify CR4.PGE flushing the TLB. */ load_cr4(cr4 | CR4_PGE); } /* * TLB flush for an individual page (even if it has PG_G). * Only works on 486+ CPUs (i386 does not have PG_G). */ static __inline void invlpg(u_long addr) { __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); } #define INVPCID_ADDR 0 #define INVPCID_CTX 1 #define INVPCID_CTXGLOB 2 #define INVPCID_ALLCTX 3 struct invpcid_descr { uint64_t pcid:12 __packed; uint64_t pad:52 __packed; uint64_t addr; } __packed; static __inline void invpcid(struct invpcid_descr *d, int type) { __asm __volatile("invpcid (%0),%1" : : "r" (d), "r" ((u_long)type) : "memory"); } static __inline u_short rfs(void) { u_short sel; __asm __volatile("movw %%fs,%0" : "=rm" (sel)); return (sel); } static __inline u_short rgs(void) { u_short sel; __asm __volatile("movw %%gs,%0" : "=rm" (sel)); return (sel); } static __inline u_short rss(void) { u_short sel; __asm __volatile("movw %%ss,%0" : "=rm" (sel)); return (sel); } static __inline void load_ds(u_short sel) { __asm __volatile("movw %0,%%ds" : : "rm" (sel)); } static __inline void load_es(u_short sel) { __asm __volatile("movw %0,%%es" : : "rm" (sel)); } static __inline void cpu_monitor(const void *addr, u_long extensions, u_int hints) { __asm __volatile("monitor" : : "a" (addr), "c" (extensions), "d" (hints)); } static __inline void cpu_mwait(u_long extensions, u_int hints) { __asm __volatile("mwait" : : "a" (hints), "c" (extensions)); } static __inline uint32_t rdpkru(void) { uint32_t res; __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx"); return (res); } static __inline void wrpkru(uint32_t mask) { __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0)); } #ifdef _KERNEL /* This is defined in but is too painful to get to */ #ifndef MSR_FSBASE #define MSR_FSBASE 0xc0000100 #endif static __inline void load_fs(u_short sel) { /* Preserve the fsbase value across the selector load */ __asm __volatile("rdmsr; movw %0,%%fs; wrmsr" : : "rm" (sel), "c" (MSR_FSBASE) : "eax", "edx"); } #ifndef MSR_GSBASE #define MSR_GSBASE 0xc0000101 #endif static __inline void load_gs(u_short sel) { /* * Preserve the gsbase value across the selector load. * Note that we have to disable interrupts because the gsbase * being trashed happens to be the kernel gsbase at the time. */ __asm __volatile("pushfq; cli; rdmsr; movw %0,%%gs; wrmsr; popfq" : : "rm" (sel), "c" (MSR_GSBASE) : "eax", "edx"); } #else /* Usable by userland */ static __inline void load_fs(u_short sel) { __asm __volatile("movw %0,%%fs" : : "rm" (sel)); } static __inline void load_gs(u_short sel) { __asm __volatile("movw %0,%%gs" : : "rm" (sel)); } #endif static __inline uint64_t rdfsbase(void) { uint64_t x; __asm __volatile("rdfsbase %0" : "=r" (x)); return (x); } static __inline void wrfsbase(uint64_t x) { __asm __volatile("wrfsbase %0" : : "r" (x)); } static __inline uint64_t rdgsbase(void) { uint64_t x; __asm __volatile("rdgsbase %0" : "=r" (x)); return (x); } static __inline void wrgsbase(uint64_t x) { __asm __volatile("wrgsbase %0" : : "r" (x)); } static __inline void bare_lgdt(struct region_descriptor *addr) { __asm __volatile("lgdt (%0)" : : "r" (addr)); } static __inline void sgdt(struct region_descriptor *addr) { char *loc; loc = (char *)addr; __asm __volatile("sgdt %0" : "=m" (*loc) : : "memory"); } static __inline void lidt(struct region_descriptor *addr) { __asm __volatile("lidt (%0)" : : "r" (addr)); } static __inline void sidt(struct region_descriptor *addr) { char *loc; loc = (char *)addr; __asm __volatile("sidt %0" : "=m" (*loc) : : "memory"); } static __inline void lldt(u_short sel) { __asm __volatile("lldt %0" : : "r" (sel)); } static __inline u_short sldt(void) { u_short sel; __asm __volatile("sldt %0" : "=r" (sel)); return (sel); } static __inline void ltr(u_short sel) { __asm __volatile("ltr %0" : : "r" (sel)); } static __inline uint32_t read_tr(void) { u_short sel; __asm __volatile("str %0" : "=r" (sel)); return (sel); } static __inline uint64_t rdr0(void) { uint64_t data; __asm __volatile("movq %%dr0,%0" : "=r" (data)); return (data); } static __inline void load_dr0(uint64_t dr0) { __asm __volatile("movq %0,%%dr0" : : "r" (dr0)); } static __inline uint64_t rdr1(void) { uint64_t data; __asm __volatile("movq %%dr1,%0" : "=r" (data)); return (data); } static __inline void load_dr1(uint64_t dr1) { __asm __volatile("movq %0,%%dr1" : : "r" (dr1)); } static __inline uint64_t rdr2(void) { uint64_t data; __asm __volatile("movq %%dr2,%0" : "=r" (data)); return (data); } static __inline void load_dr2(uint64_t dr2) { __asm __volatile("movq %0,%%dr2" : : "r" (dr2)); } static __inline uint64_t rdr3(void) { uint64_t data; __asm __volatile("movq %%dr3,%0" : "=r" (data)); return (data); } static __inline void load_dr3(uint64_t dr3) { __asm __volatile("movq %0,%%dr3" : : "r" (dr3)); } static __inline uint64_t rdr6(void) { uint64_t data; __asm __volatile("movq %%dr6,%0" : "=r" (data)); return (data); } static __inline void load_dr6(uint64_t dr6) { __asm __volatile("movq %0,%%dr6" : : "r" (dr6)); } static __inline uint64_t rdr7(void) { uint64_t data; __asm __volatile("movq %%dr7,%0" : "=r" (data)); return (data); } static __inline void load_dr7(uint64_t dr7) { __asm __volatile("movq %0,%%dr7" : : "r" (dr7)); } static __inline register_t intr_disable(void) { register_t rflags; rflags = read_rflags(); disable_intr(); return (rflags); } static __inline void intr_restore(register_t rflags) { write_rflags(rflags); } static __inline void stac(void) { __asm __volatile("stac" : : : "cc"); } static __inline void clac(void) { __asm __volatile("clac" : : : "cc"); } enum { SGX_ECREATE = 0x0, SGX_EADD = 0x1, SGX_EINIT = 0x2, SGX_EREMOVE = 0x3, SGX_EDGBRD = 0x4, SGX_EDGBWR = 0x5, SGX_EEXTEND = 0x6, SGX_ELDU = 0x8, SGX_EBLOCK = 0x9, SGX_EPA = 0xA, SGX_EWB = 0xB, SGX_ETRACK = 0xC, }; enum { SGX_PT_SECS = 0x00, SGX_PT_TCS = 0x01, SGX_PT_REG = 0x02, SGX_PT_VA = 0x03, SGX_PT_TRIM = 0x04, }; int sgx_encls(uint32_t eax, uint64_t rbx, uint64_t rcx, uint64_t rdx); static __inline int sgx_ecreate(void *pginfo, void *secs) { return (sgx_encls(SGX_ECREATE, (uint64_t)pginfo, (uint64_t)secs, 0)); } static __inline int sgx_eadd(void *pginfo, void *epc) { return (sgx_encls(SGX_EADD, (uint64_t)pginfo, (uint64_t)epc, 0)); } static __inline int sgx_einit(void *sigstruct, void *secs, void *einittoken) { return (sgx_encls(SGX_EINIT, (uint64_t)sigstruct, (uint64_t)secs, (uint64_t)einittoken)); } static __inline int sgx_eextend(void *secs, void *epc) { return (sgx_encls(SGX_EEXTEND, (uint64_t)secs, (uint64_t)epc, 0)); } static __inline int sgx_epa(void *epc) { return (sgx_encls(SGX_EPA, SGX_PT_VA, (uint64_t)epc, 0)); } static __inline int sgx_eldu(uint64_t rbx, uint64_t rcx, uint64_t rdx) { return (sgx_encls(SGX_ELDU, rbx, rcx, rdx)); } static __inline int sgx_eremove(void *epc) { return (sgx_encls(SGX_EREMOVE, 0, (uint64_t)epc, 0)); } void reset_dbregs(void); #ifdef _KERNEL int rdmsr_safe(u_int msr, uint64_t *val); int wrmsr_safe(u_int msr, uint64_t newval); #endif #endif /* !_MACHINE_CPUFUNC_H_ */ #endif /* __i386__ */ diff --git a/sys/arm64/iommu/smmu.c b/sys/arm64/iommu/smmu.c index 1d1996a69027..76a7d29dc2db 100644 --- a/sys/arm64/iommu/smmu.c +++ b/sys/arm64/iommu/smmu.c @@ -1,2022 +1,2013 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2019-2020 Ruslan Bukin * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory (Department of Computer Science and * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the * DARPA SSITH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Hardware overview. * * An incoming transaction from a peripheral device has an address, size, * attributes and StreamID. * * In case of PCI-based devices, StreamID is a PCI rid. * * The StreamID is used to select a Stream Table Entry (STE) in a Stream table, * which contains per-device configuration. * * Stream table is a linear or 2-level walk table (this driver supports both). * Note that a linear table could occupy 1GB or more of memory depending on * sid_bits value. * * STE is used to locate a Context Descriptor, which is a struct in memory * that describes stages of translation, translation table type, pointer to * level 0 of page tables, ASID, etc. * * Hardware supports two stages of translation: Stage1 (S1) and Stage2 (S2): * o S1 is used for the host machine traffic translation * o S2 is for a hypervisor * * This driver enables S1 stage with standard AArch64 page tables. * * Note that SMMU does not share TLB with a main CPU. * Command queue is used by this driver to Invalidate SMMU TLB, STE cache. * * An arm64 SoC could have more than one SMMU instance. * ACPI IORT table describes which SMMU unit is assigned for a particular * peripheral device. * * Queues. * * Register interface and Memory-based circular buffer queues are used * to interface SMMU. * * These are a Command queue for commands to send to the SMMU and an Event * queue for event/fault reports from the SMMU. Optionally PRI queue is * designed for PCIe page requests reception. * * Note that not every hardware supports PRI services. For instance they were * not found in Neoverse N1 SDP machine. * (This drivers does not implement PRI queue.) * * All SMMU queues are arranged as circular buffers in memory. They are used * in a producer-consumer fashion so that an output queue contains data * produced by the SMMU and consumed by software. * An input queue contains data produced by software, consumed by the SMMU. * * Interrupts. * * Interrupts are not required by this driver for normal operation. * The standard wired interrupt is only triggered when an event comes from * the SMMU, which is only in a case of errors (e.g. translation fault). */ #include "opt_platform.h" #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ACPI #include #include #endif #include #include #include #include #include #ifdef FDT #include #include #include #endif #include "iommu.h" #include "iommu_if.h" #include "smmureg.h" #include "smmuvar.h" #define STRTAB_L1_SZ_SHIFT 20 #define STRTAB_SPLIT 8 #define STRTAB_L1_DESC_L2PTR_M (0x3fffffffffff << 6) #define STRTAB_L1_DESC_DWORDS 1 #define STRTAB_STE_DWORDS 8 #define CMDQ_ENTRY_DWORDS 2 #define EVTQ_ENTRY_DWORDS 4 #define PRIQ_ENTRY_DWORDS 2 #define CD_DWORDS 8 #define Q_WRP(q, p) ((p) & (1 << (q)->size_log2)) #define Q_IDX(q, p) ((p) & ((1 << (q)->size_log2) - 1)) #define Q_OVF(p) ((p) & (1 << 31)) /* Event queue overflowed */ #define SMMU_Q_ALIGN (64 * 1024) #define MAXADDR_48BIT 0xFFFFFFFFFFFFUL #define MAXADDR_52BIT 0xFFFFFFFFFFFFFUL static struct resource_spec smmu_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE }, { SYS_RES_IRQ, 1, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 2, RF_ACTIVE }, { SYS_RES_IRQ, 3, RF_ACTIVE }, RESOURCE_SPEC_END }; MALLOC_DEFINE(M_SMMU, "SMMU", SMMU_DEVSTR); #define dprintf(fmt, ...) struct smmu_event { int ident; char *str; char *msg; }; static struct smmu_event events[] = { { 0x01, "F_UUT", "Unsupported Upstream Transaction."}, { 0x02, "C_BAD_STREAMID", "Transaction StreamID out of range."}, { 0x03, "F_STE_FETCH", "Fetch of STE caused external abort."}, { 0x04, "C_BAD_STE", "Used STE invalid."}, { 0x05, "F_BAD_ATS_TREQ", "Address Translation Request disallowed for a StreamID " "and a PCIe ATS Translation Request received."}, { 0x06, "F_STREAM_DISABLED", "The STE of a transaction marks non-substream transactions " "disabled."}, { 0x07, "F_TRANSL_FORBIDDEN", "An incoming PCIe transaction is marked Translated but " "SMMU bypass is disallowed for this StreamID."}, { 0x08, "C_BAD_SUBSTREAMID", "Incoming SubstreamID present, but configuration is invalid."}, { 0x09, "F_CD_FETCH", "Fetch of CD caused external abort."}, { 0x0a, "C_BAD_CD", "Fetched CD invalid."}, { 0x0b, "F_WALK_EABT", "An external abort occurred fetching (or updating) " "a translation table descriptor."}, { 0x10, "F_TRANSLATION", "Translation fault."}, { 0x11, "F_ADDR_SIZE", "Address Size fault."}, { 0x12, "F_ACCESS", "Access flag fault due to AF == 0 in a page or block TTD."}, { 0x13, "F_PERMISSION", "Permission fault occurred on page access."}, { 0x20, "F_TLB_CONFLICT", "A TLB conflict occurred because of the transaction."}, { 0x21, "F_CFG_CONFLICT", "A configuration cache conflict occurred due to " "the transaction."}, { 0x24, "E_PAGE_REQUEST", "Speculative page request hint."}, { 0x25, "F_VMS_FETCH", "Fetch of VMS caused external abort."}, { 0, NULL, NULL }, }; static int smmu_q_has_space(struct smmu_queue *q) { /* * See 6.3.27 SMMU_CMDQ_PROD * * There is space in the queue for additional commands if: * SMMU_CMDQ_CONS.RD != SMMU_CMDQ_PROD.WR || * SMMU_CMDQ_CONS.RD_WRAP == SMMU_CMDQ_PROD.WR_WRAP */ if (Q_IDX(q, q->lc.cons) != Q_IDX(q, q->lc.prod) || Q_WRP(q, q->lc.cons) == Q_WRP(q, q->lc.prod)) return (1); return (0); } static int smmu_q_empty(struct smmu_queue *q) { if (Q_IDX(q, q->lc.cons) == Q_IDX(q, q->lc.prod) && Q_WRP(q, q->lc.cons) == Q_WRP(q, q->lc.prod)) return (1); return (0); } static int __unused smmu_q_consumed(struct smmu_queue *q, uint32_t prod) { if ((Q_WRP(q, q->lc.cons) == Q_WRP(q, prod)) && (Q_IDX(q, q->lc.cons) >= Q_IDX(q, prod))) return (1); if ((Q_WRP(q, q->lc.cons) != Q_WRP(q, prod)) && (Q_IDX(q, q->lc.cons) <= Q_IDX(q, prod))) return (1); return (0); } static uint32_t smmu_q_inc_cons(struct smmu_queue *q) { uint32_t cons; uint32_t val; cons = (Q_WRP(q, q->lc.cons) | Q_IDX(q, q->lc.cons)) + 1; val = (Q_OVF(q->lc.cons) | Q_WRP(q, cons) | Q_IDX(q, cons)); return (val); } static uint32_t smmu_q_inc_prod(struct smmu_queue *q) { uint32_t prod; uint32_t val; prod = (Q_WRP(q, q->lc.prod) | Q_IDX(q, q->lc.prod)) + 1; val = (Q_OVF(q->lc.prod) | Q_WRP(q, prod) | Q_IDX(q, prod)); return (val); } static int smmu_write_ack(struct smmu_softc *sc, uint32_t reg, uint32_t reg_ack, uint32_t val) { uint32_t v; int timeout; timeout = 100000; bus_write_4(sc->res[0], reg, val); do { v = bus_read_4(sc->res[0], reg_ack); if (v == val) break; } while (timeout--); if (timeout <= 0) { device_printf(sc->dev, "Failed to write reg.\n"); return (-1); } return (0); } -static inline int -ilog2(long x) -{ - - KASSERT(x > 0 && powerof2(x), ("%s: invalid arg %ld", __func__, x)); - - return (flsl(x) - 1); -} - static int smmu_init_queue(struct smmu_softc *sc, struct smmu_queue *q, uint32_t prod_off, uint32_t cons_off, uint32_t dwords) { int sz; sz = (1 << q->size_log2) * dwords * 8; /* Set up the command circular buffer */ q->vaddr = contigmalloc(sz, M_SMMU, M_WAITOK | M_ZERO, 0, (1ul << 48) - 1, SMMU_Q_ALIGN, 0); if (q->vaddr == NULL) { device_printf(sc->dev, "failed to allocate %d bytes\n", sz); return (-1); } q->prod_off = prod_off; q->cons_off = cons_off; q->paddr = vtophys(q->vaddr); q->base = CMDQ_BASE_RA | EVENTQ_BASE_WA | PRIQ_BASE_WA; q->base |= q->paddr & Q_BASE_ADDR_M; q->base |= q->size_log2 << Q_LOG2SIZE_S; return (0); } static int smmu_init_queues(struct smmu_softc *sc) { int err; /* Command queue. */ err = smmu_init_queue(sc, &sc->cmdq, SMMU_CMDQ_PROD, SMMU_CMDQ_CONS, CMDQ_ENTRY_DWORDS); if (err) return (ENXIO); /* Event queue. */ err = smmu_init_queue(sc, &sc->evtq, SMMU_EVENTQ_PROD, SMMU_EVENTQ_CONS, EVTQ_ENTRY_DWORDS); if (err) return (ENXIO); if (!(sc->features & SMMU_FEATURE_PRI)) return (0); /* PRI queue. */ err = smmu_init_queue(sc, &sc->priq, SMMU_PRIQ_PROD, SMMU_PRIQ_CONS, PRIQ_ENTRY_DWORDS); if (err) return (ENXIO); return (0); } /* * Dump 2LVL or linear STE. */ static void smmu_dump_ste(struct smmu_softc *sc, int sid) { struct smmu_strtab *strtab; struct l1_desc *l1_desc; uint64_t *ste, *l1; int i; strtab = &sc->strtab; if (sc->features & SMMU_FEATURE_2_LVL_STREAM_TABLE) { i = sid >> STRTAB_SPLIT; l1 = (void *)((uint64_t)strtab->vaddr + STRTAB_L1_DESC_DWORDS * 8 * i); device_printf(sc->dev, "L1 ste == %lx\n", l1[0]); l1_desc = &strtab->l1[i]; ste = l1_desc->va; if (ste == NULL) /* L2 is not initialized */ return; } else { ste = (void *)((uint64_t)strtab->vaddr + sid * (STRTAB_STE_DWORDS << 3)); } /* Dump L2 or linear STE. */ for (i = 0; i < STRTAB_STE_DWORDS; i++) device_printf(sc->dev, "ste[%d] == %lx\n", i, ste[i]); } static void __unused smmu_dump_cd(struct smmu_softc *sc, struct smmu_cd *cd) { uint64_t *vaddr; int i; device_printf(sc->dev, "%s\n", __func__); vaddr = cd->vaddr; for (i = 0; i < CD_DWORDS; i++) device_printf(sc->dev, "cd[%d] == %lx\n", i, vaddr[i]); } static void smmu_evtq_dequeue(struct smmu_softc *sc, uint32_t *evt) { struct smmu_queue *evtq; void *entry_addr; evtq = &sc->evtq; evtq->lc.val = bus_read_8(sc->res[0], evtq->prod_off); entry_addr = (void *)((uint64_t)evtq->vaddr + evtq->lc.cons * EVTQ_ENTRY_DWORDS * 8); memcpy(evt, entry_addr, EVTQ_ENTRY_DWORDS * 8); evtq->lc.cons = smmu_q_inc_cons(evtq); bus_write_4(sc->res[0], evtq->cons_off, evtq->lc.cons); } static void smmu_print_event(struct smmu_softc *sc, uint32_t *evt) { struct smmu_event *ev; uintptr_t input_addr; uint8_t event_id; device_t dev; int sid; int i; dev = sc->dev; ev = NULL; event_id = evt[0] & 0xff; for (i = 0; events[i].ident != 0; i++) { if (events[i].ident == event_id) { ev = &events[i]; break; } } sid = evt[1]; input_addr = evt[5]; input_addr <<= 32; input_addr |= evt[4]; if (smmu_quirks_check(dev, sid, event_id, input_addr)) { /* The event is known. Don't print anything. */ return; } if (ev) { device_printf(sc->dev, "Event %s (%s) received.\n", ev->str, ev->msg); } else device_printf(sc->dev, "Event 0x%x received\n", event_id); device_printf(sc->dev, "SID %x, Input Address: %jx\n", sid, input_addr); for (i = 0; i < 8; i++) device_printf(sc->dev, "evt[%d] %x\n", i, evt[i]); smmu_dump_ste(sc, sid); } static void make_cmd(struct smmu_softc *sc, uint64_t *cmd, struct smmu_cmdq_entry *entry) { memset(cmd, 0, CMDQ_ENTRY_DWORDS * 8); cmd[0] = entry->opcode << CMD_QUEUE_OPCODE_S; switch (entry->opcode) { case CMD_TLBI_NH_VA: cmd[0] |= (uint64_t)entry->tlbi.asid << TLBI_0_ASID_S; cmd[1] = entry->tlbi.addr & TLBI_1_ADDR_M; if (entry->tlbi.leaf) { /* * Leaf flag means that only cached entries * for the last level of translation table walk * are required to be invalidated. */ cmd[1] |= TLBI_1_LEAF; } break; case CMD_TLBI_NH_ASID: cmd[0] |= (uint64_t)entry->tlbi.asid << TLBI_0_ASID_S; break; case CMD_TLBI_NSNH_ALL: case CMD_TLBI_NH_ALL: case CMD_TLBI_EL2_ALL: break; case CMD_CFGI_CD: cmd[0] |= ((uint64_t)entry->cfgi.ssid << CFGI_0_SSID_S); /* FALLTROUGH */ case CMD_CFGI_STE: cmd[0] |= ((uint64_t)entry->cfgi.sid << CFGI_0_STE_SID_S); cmd[1] |= ((uint64_t)entry->cfgi.leaf << CFGI_1_LEAF_S); break; case CMD_CFGI_STE_RANGE: cmd[1] = (31 << CFGI_1_STE_RANGE_S); break; case CMD_SYNC: cmd[0] |= SYNC_0_MSH_IS | SYNC_0_MSIATTR_OIWB; if (entry->sync.msiaddr) { cmd[0] |= SYNC_0_CS_SIG_IRQ; cmd[1] |= (entry->sync.msiaddr & SYNC_1_MSIADDRESS_M); } else cmd[0] |= SYNC_0_CS_SIG_SEV; break; case CMD_PREFETCH_CONFIG: cmd[0] |= ((uint64_t)entry->prefetch.sid << PREFETCH_0_SID_S); break; }; } static void smmu_cmdq_enqueue_cmd(struct smmu_softc *sc, struct smmu_cmdq_entry *entry) { uint64_t cmd[CMDQ_ENTRY_DWORDS]; struct smmu_queue *cmdq; void *entry_addr; cmdq = &sc->cmdq; make_cmd(sc, cmd, entry); SMMU_LOCK(sc); /* Ensure that a space is available. */ do { cmdq->lc.cons = bus_read_4(sc->res[0], cmdq->cons_off); } while (smmu_q_has_space(cmdq) == 0); /* Write the command to the current prod entry. */ entry_addr = (void *)((uint64_t)cmdq->vaddr + Q_IDX(cmdq, cmdq->lc.prod) * CMDQ_ENTRY_DWORDS * 8); memcpy(entry_addr, cmd, CMDQ_ENTRY_DWORDS * 8); /* Increment prod index. */ cmdq->lc.prod = smmu_q_inc_prod(cmdq); bus_write_4(sc->res[0], cmdq->prod_off, cmdq->lc.prod); SMMU_UNLOCK(sc); } static void __unused smmu_poll_until_consumed(struct smmu_softc *sc, struct smmu_queue *q) { while (1) { q->lc.val = bus_read_8(sc->res[0], q->prod_off); if (smmu_q_empty(q)) break; cpu_spinwait(); } } static int smmu_sync(struct smmu_softc *sc) { struct smmu_cmdq_entry cmd; struct smmu_queue *q; uint32_t *base; int timeout; int prod; q = &sc->cmdq; prod = q->lc.prod; /* Enqueue sync command. */ cmd.opcode = CMD_SYNC; cmd.sync.msiaddr = q->paddr + Q_IDX(q, prod) * CMDQ_ENTRY_DWORDS * 8; smmu_cmdq_enqueue_cmd(sc, &cmd); /* Wait for the sync completion. */ base = (void *)((uint64_t)q->vaddr + Q_IDX(q, prod) * CMDQ_ENTRY_DWORDS * 8); /* * It takes around 200 loops (6 instructions each) * on Neoverse N1 to complete the sync. */ timeout = 10000; do { if (*base == 0) { /* MSI write completed. */ break; } cpu_spinwait(); } while (timeout--); if (timeout < 0) device_printf(sc->dev, "Failed to sync\n"); return (0); } static int smmu_sync_cd(struct smmu_softc *sc, int sid, int ssid, bool leaf) { struct smmu_cmdq_entry cmd; cmd.opcode = CMD_CFGI_CD; cmd.cfgi.sid = sid; cmd.cfgi.ssid = ssid; cmd.cfgi.leaf = leaf; smmu_cmdq_enqueue_cmd(sc, &cmd); return (0); } static void smmu_invalidate_all_sid(struct smmu_softc *sc) { struct smmu_cmdq_entry cmd; /* Invalidate cached config */ cmd.opcode = CMD_CFGI_STE_RANGE; smmu_cmdq_enqueue_cmd(sc, &cmd); smmu_sync(sc); } static void smmu_tlbi_all(struct smmu_softc *sc) { struct smmu_cmdq_entry cmd; /* Invalidate entire TLB */ cmd.opcode = CMD_TLBI_NSNH_ALL; smmu_cmdq_enqueue_cmd(sc, &cmd); smmu_sync(sc); } static void smmu_tlbi_asid(struct smmu_softc *sc, uint16_t asid) { struct smmu_cmdq_entry cmd; /* Invalidate TLB for an ASID. */ cmd.opcode = CMD_TLBI_NH_ASID; cmd.tlbi.asid = asid; smmu_cmdq_enqueue_cmd(sc, &cmd); smmu_sync(sc); } static void smmu_tlbi_va(struct smmu_softc *sc, vm_offset_t va, uint16_t asid) { struct smmu_cmdq_entry cmd; /* Invalidate specific range */ cmd.opcode = CMD_TLBI_NH_VA; cmd.tlbi.asid = asid; cmd.tlbi.vmid = 0; cmd.tlbi.leaf = true; /* We change only L3. */ cmd.tlbi.addr = va; smmu_cmdq_enqueue_cmd(sc, &cmd); } static void smmu_invalidate_sid(struct smmu_softc *sc, uint32_t sid) { struct smmu_cmdq_entry cmd; /* Invalidate cached config */ cmd.opcode = CMD_CFGI_STE; cmd.cfgi.sid = sid; smmu_cmdq_enqueue_cmd(sc, &cmd); smmu_sync(sc); } static void smmu_prefetch_sid(struct smmu_softc *sc, uint32_t sid) { struct smmu_cmdq_entry cmd; cmd.opcode = CMD_PREFETCH_CONFIG; cmd.prefetch.sid = sid; smmu_cmdq_enqueue_cmd(sc, &cmd); smmu_sync(sc); } /* * Init STE in bypass mode. Traffic is not translated for the sid. */ static void smmu_init_ste_bypass(struct smmu_softc *sc, uint32_t sid, uint64_t *ste) { uint64_t val; val = STE0_VALID | STE0_CONFIG_BYPASS; ste[1] = STE1_SHCFG_INCOMING | STE1_EATS_FULLATS; ste[2] = 0; ste[3] = 0; ste[4] = 0; ste[5] = 0; ste[6] = 0; ste[7] = 0; smmu_invalidate_sid(sc, sid); ste[0] = val; dsb(sy); smmu_invalidate_sid(sc, sid); smmu_prefetch_sid(sc, sid); } /* * Enable Stage1 (S1) translation for the sid. */ static int smmu_init_ste_s1(struct smmu_softc *sc, struct smmu_cd *cd, uint32_t sid, uint64_t *ste) { uint64_t val; val = STE0_VALID; /* S1 */ ste[1] = STE1_EATS_FULLATS | STE1_S1CSH_IS | STE1_S1CIR_WBRA | STE1_S1COR_WBRA | STE1_STRW_NS_EL1; ste[2] = 0; ste[3] = 0; ste[4] = 0; ste[5] = 0; ste[6] = 0; ste[7] = 0; if (sc->features & SMMU_FEATURE_STALL && ((sc->features & SMMU_FEATURE_STALL_FORCE) == 0)) ste[1] |= STE1_S1STALLD; /* Configure STE */ val |= (cd->paddr & STE0_S1CONTEXTPTR_M); val |= STE0_CONFIG_S1_TRANS; smmu_invalidate_sid(sc, sid); /* The STE[0] has to be written in a single blast, last of all. */ ste[0] = val; dsb(sy); smmu_invalidate_sid(sc, sid); smmu_sync_cd(sc, sid, 0, true); smmu_invalidate_sid(sc, sid); /* The sid will be used soon most likely. */ smmu_prefetch_sid(sc, sid); return (0); } static uint64_t * smmu_get_ste_addr(struct smmu_softc *sc, int sid) { struct smmu_strtab *strtab; struct l1_desc *l1_desc; uint64_t *addr; strtab = &sc->strtab; if (sc->features & SMMU_FEATURE_2_LVL_STREAM_TABLE) { l1_desc = &strtab->l1[sid >> STRTAB_SPLIT]; addr = l1_desc->va; addr += (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS; } else { addr = (void *)((uint64_t)strtab->vaddr + STRTAB_STE_DWORDS * 8 * sid); }; return (addr); } static int smmu_init_ste(struct smmu_softc *sc, struct smmu_cd *cd, int sid, bool bypass) { uint64_t *addr; addr = smmu_get_ste_addr(sc, sid); if (bypass) smmu_init_ste_bypass(sc, sid, addr); else smmu_init_ste_s1(sc, cd, sid, addr); smmu_sync(sc); return (0); } static void smmu_deinit_ste(struct smmu_softc *sc, int sid) { uint64_t *ste; ste = smmu_get_ste_addr(sc, sid); ste[0] = 0; smmu_invalidate_sid(sc, sid); smmu_sync_cd(sc, sid, 0, true); smmu_invalidate_sid(sc, sid); smmu_sync(sc); } static int smmu_init_cd(struct smmu_softc *sc, struct smmu_domain *domain) { vm_paddr_t paddr; uint64_t *ptr; uint64_t val; vm_size_t size; struct smmu_cd *cd; struct smmu_pmap *p; size = 1 * (CD_DWORDS << 3); p = &domain->p; cd = domain->cd = malloc(sizeof(struct smmu_cd), M_SMMU, M_WAITOK | M_ZERO); cd->vaddr = contigmalloc(size, M_SMMU, M_WAITOK | M_ZERO, /* flags */ 0, /* low */ (1ul << 40) - 1, /* high */ size, /* alignment */ 0); /* boundary */ if (cd->vaddr == NULL) { device_printf(sc->dev, "Failed to allocate CD\n"); return (ENXIO); } cd->size = size; cd->paddr = vtophys(cd->vaddr); ptr = cd->vaddr; val = CD0_VALID; val |= CD0_AA64; val |= CD0_R; val |= CD0_A; val |= CD0_ASET; val |= (uint64_t)domain->asid << CD0_ASID_S; val |= CD0_TG0_4KB; val |= CD0_EPD1; /* Disable TT1 */ val |= ((64 - sc->ias) << CD0_T0SZ_S); val |= CD0_IPS_48BITS; paddr = p->sp_l0_paddr & CD1_TTB0_M; KASSERT(paddr == p->sp_l0_paddr, ("bad allocation 1")); ptr[1] = paddr; ptr[2] = 0; ptr[3] = MAIR_ATTR(MAIR_DEVICE_nGnRnE, VM_MEMATTR_DEVICE) | MAIR_ATTR(MAIR_NORMAL_NC, VM_MEMATTR_UNCACHEABLE) | MAIR_ATTR(MAIR_NORMAL_WB, VM_MEMATTR_WRITE_BACK) | MAIR_ATTR(MAIR_NORMAL_WT, VM_MEMATTR_WRITE_THROUGH); /* Install the CD. */ ptr[0] = val; return (0); } static int smmu_init_strtab_linear(struct smmu_softc *sc) { struct smmu_strtab *strtab; vm_paddr_t base; uint32_t size; uint64_t reg; strtab = &sc->strtab; strtab->num_l1_entries = (1 << sc->sid_bits); size = strtab->num_l1_entries * (STRTAB_STE_DWORDS << 3); if (bootverbose) device_printf(sc->dev, "%s: linear strtab size %d, num_l1_entries %d\n", __func__, size, strtab->num_l1_entries); strtab->vaddr = contigmalloc(size, M_SMMU, M_WAITOK | M_ZERO, /* flags */ 0, /* low */ (1ul << 48) - 1, /* high */ size, /* alignment */ 0); /* boundary */ if (strtab->vaddr == NULL) { device_printf(sc->dev, "failed to allocate strtab\n"); return (ENXIO); } reg = STRTAB_BASE_CFG_FMT_LINEAR; reg |= sc->sid_bits << STRTAB_BASE_CFG_LOG2SIZE_S; strtab->base_cfg = (uint32_t)reg; base = vtophys(strtab->vaddr); reg = base & STRTAB_BASE_ADDR_M; KASSERT(reg == base, ("bad allocation 2")); reg |= STRTAB_BASE_RA; strtab->base = reg; return (0); } static int smmu_init_strtab_2lvl(struct smmu_softc *sc) { struct smmu_strtab *strtab; vm_paddr_t base; uint64_t reg_base; uint32_t l1size; uint32_t size; uint32_t reg; int sz; strtab = &sc->strtab; size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); size = min(size, sc->sid_bits - STRTAB_SPLIT); strtab->num_l1_entries = (1 << size); size += STRTAB_SPLIT; l1size = strtab->num_l1_entries * (STRTAB_L1_DESC_DWORDS << 3); if (bootverbose) device_printf(sc->dev, "%s: size %d, l1 entries %d, l1size %d\n", __func__, size, strtab->num_l1_entries, l1size); strtab->vaddr = contigmalloc(l1size, M_SMMU, M_WAITOK | M_ZERO, /* flags */ 0, /* low */ (1ul << 48) - 1, /* high */ l1size, /* alignment */ 0); /* boundary */ if (strtab->vaddr == NULL) { device_printf(sc->dev, "Failed to allocate 2lvl strtab.\n"); return (ENOMEM); } sz = strtab->num_l1_entries * sizeof(struct l1_desc); strtab->l1 = malloc(sz, M_SMMU, M_WAITOK | M_ZERO); if (strtab->l1 == NULL) { contigfree(strtab->vaddr, l1size, M_SMMU); return (ENOMEM); } reg = STRTAB_BASE_CFG_FMT_2LVL; reg |= size << STRTAB_BASE_CFG_LOG2SIZE_S; reg |= STRTAB_SPLIT << STRTAB_BASE_CFG_SPLIT_S; strtab->base_cfg = (uint32_t)reg; base = vtophys(strtab->vaddr); reg_base = base & STRTAB_BASE_ADDR_M; KASSERT(reg_base == base, ("bad allocation 3")); reg_base |= STRTAB_BASE_RA; strtab->base = reg_base; return (0); } static int smmu_init_strtab(struct smmu_softc *sc) { int error; if (sc->features & SMMU_FEATURE_2_LVL_STREAM_TABLE) error = smmu_init_strtab_2lvl(sc); else error = smmu_init_strtab_linear(sc); return (error); } static int smmu_init_l1_entry(struct smmu_softc *sc, int sid) { struct smmu_strtab *strtab; struct l1_desc *l1_desc; uint64_t *addr; uint64_t val; size_t size; int i; strtab = &sc->strtab; l1_desc = &strtab->l1[sid >> STRTAB_SPLIT]; if (l1_desc->va) { /* Already allocated. */ return (0); } size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); l1_desc->span = STRTAB_SPLIT + 1; l1_desc->size = size; l1_desc->va = contigmalloc(size, M_SMMU, M_WAITOK | M_ZERO, /* flags */ 0, /* low */ (1ul << 48) - 1, /* high */ size, /* alignment */ 0); /* boundary */ if (l1_desc->va == NULL) { device_printf(sc->dev, "failed to allocate l2 entry\n"); return (ENXIO); } l1_desc->pa = vtophys(l1_desc->va); i = sid >> STRTAB_SPLIT; addr = (void *)((uint64_t)strtab->vaddr + STRTAB_L1_DESC_DWORDS * 8 * i); /* Install the L1 entry. */ val = l1_desc->pa & STRTAB_L1_DESC_L2PTR_M; KASSERT(val == l1_desc->pa, ("bad allocation 4")); val |= l1_desc->span; *addr = val; return (0); } static void __unused smmu_deinit_l1_entry(struct smmu_softc *sc, int sid) { struct smmu_strtab *strtab; struct l1_desc *l1_desc; uint64_t *addr; int i; strtab = &sc->strtab; i = sid >> STRTAB_SPLIT; addr = (void *)((uint64_t)strtab->vaddr + STRTAB_L1_DESC_DWORDS * 8 * i); *addr = 0; l1_desc = &strtab->l1[sid >> STRTAB_SPLIT]; contigfree(l1_desc->va, l1_desc->size, M_SMMU); } static int smmu_disable(struct smmu_softc *sc) { uint32_t reg; int error; /* Disable SMMU */ reg = bus_read_4(sc->res[0], SMMU_CR0); reg &= ~CR0_SMMUEN; error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); if (error) device_printf(sc->dev, "Could not disable SMMU.\n"); return (0); } static int smmu_event_intr(void *arg) { uint32_t evt[EVTQ_ENTRY_DWORDS * 2]; struct smmu_softc *sc; sc = arg; do { smmu_evtq_dequeue(sc, evt); smmu_print_event(sc, evt); } while (!smmu_q_empty(&sc->evtq)); return (FILTER_HANDLED); } static int __unused smmu_sync_intr(void *arg) { struct smmu_softc *sc; sc = arg; device_printf(sc->dev, "%s\n", __func__); return (FILTER_HANDLED); } static int smmu_gerr_intr(void *arg) { struct smmu_softc *sc; sc = arg; device_printf(sc->dev, "SMMU Global Error\n"); return (FILTER_HANDLED); } static int smmu_enable_interrupts(struct smmu_softc *sc) { uint32_t reg; int error; /* Disable MSI. */ bus_write_8(sc->res[0], SMMU_GERROR_IRQ_CFG0, 0); bus_write_4(sc->res[0], SMMU_GERROR_IRQ_CFG1, 0); bus_write_4(sc->res[0], SMMU_GERROR_IRQ_CFG2, 0); bus_write_8(sc->res[0], SMMU_EVENTQ_IRQ_CFG0, 0); bus_write_4(sc->res[0], SMMU_EVENTQ_IRQ_CFG1, 0); bus_write_4(sc->res[0], SMMU_EVENTQ_IRQ_CFG2, 0); if (sc->features & CR0_PRIQEN) { bus_write_8(sc->res[0], SMMU_PRIQ_IRQ_CFG0, 0); bus_write_4(sc->res[0], SMMU_PRIQ_IRQ_CFG1, 0); bus_write_4(sc->res[0], SMMU_PRIQ_IRQ_CFG2, 0); } /* Disable any interrupts. */ error = smmu_write_ack(sc, SMMU_IRQ_CTRL, SMMU_IRQ_CTRLACK, 0); if (error) { device_printf(sc->dev, "Could not disable interrupts.\n"); return (ENXIO); } /* Enable interrupts. */ reg = IRQ_CTRL_EVENTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; if (sc->features & SMMU_FEATURE_PRI) reg |= IRQ_CTRL_PRIQ_IRQEN; error = smmu_write_ack(sc, SMMU_IRQ_CTRL, SMMU_IRQ_CTRLACK, reg); if (error) { device_printf(sc->dev, "Could not enable interrupts.\n"); return (ENXIO); } return (0); } #ifdef DEV_ACPI static void smmu_configure_intr(struct smmu_softc *sc, struct resource *res) { struct intr_map_data_acpi *ad; struct intr_map_data *data; data = rman_get_virtual(res); KASSERT(data != NULL, ("data is NULL")); if (data->type == INTR_MAP_DATA_ACPI) { ad = (struct intr_map_data_acpi *)data; ad->trig = INTR_TRIGGER_EDGE; ad->pol = INTR_POLARITY_HIGH; } } #endif static int smmu_setup_interrupts(struct smmu_softc *sc) { device_t dev; int error; dev = sc->dev; #ifdef DEV_ACPI /* * Configure SMMU interrupts as EDGE triggered manually * as ACPI tables carries no information for that. */ smmu_configure_intr(sc, sc->res[1]); /* PRIQ is not in use. */ smmu_configure_intr(sc, sc->res[3]); smmu_configure_intr(sc, sc->res[4]); #endif error = bus_setup_intr(dev, sc->res[1], INTR_TYPE_MISC, smmu_event_intr, NULL, sc, &sc->intr_cookie[0]); if (error) { device_printf(dev, "Couldn't setup Event interrupt handler\n"); return (ENXIO); } error = bus_setup_intr(dev, sc->res[4], INTR_TYPE_MISC, smmu_gerr_intr, NULL, sc, &sc->intr_cookie[2]); if (error) { device_printf(dev, "Couldn't setup Gerr interrupt handler\n"); return (ENXIO); } return (0); } static int smmu_reset(struct smmu_softc *sc) { struct smmu_cmdq_entry cmd; struct smmu_strtab *strtab; int error; int reg; reg = bus_read_4(sc->res[0], SMMU_CR0); if (reg & CR0_SMMUEN) device_printf(sc->dev, "%s: Warning: SMMU is enabled\n", __func__); error = smmu_disable(sc); if (error) device_printf(sc->dev, "%s: Could not disable SMMU.\n", __func__); if (smmu_enable_interrupts(sc) != 0) { device_printf(sc->dev, "Could not enable interrupts.\n"); return (ENXIO); } reg = CR1_TABLE_SH_IS | CR1_TABLE_OC_WBC | CR1_TABLE_IC_WBC | CR1_QUEUE_SH_IS | CR1_QUEUE_OC_WBC | CR1_QUEUE_IC_WBC; bus_write_4(sc->res[0], SMMU_CR1, reg); reg = CR2_PTM | CR2_RECINVSID | CR2_E2H; bus_write_4(sc->res[0], SMMU_CR2, reg); /* Stream table. */ strtab = &sc->strtab; bus_write_8(sc->res[0], SMMU_STRTAB_BASE, strtab->base); bus_write_4(sc->res[0], SMMU_STRTAB_BASE_CFG, strtab->base_cfg); /* Command queue. */ bus_write_8(sc->res[0], SMMU_CMDQ_BASE, sc->cmdq.base); bus_write_4(sc->res[0], SMMU_CMDQ_PROD, sc->cmdq.lc.prod); bus_write_4(sc->res[0], SMMU_CMDQ_CONS, sc->cmdq.lc.cons); reg = CR0_CMDQEN; error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); if (error) { device_printf(sc->dev, "Could not enable command queue\n"); return (ENXIO); } /* Invalidate cached configuration. */ smmu_invalidate_all_sid(sc); if (sc->features & SMMU_FEATURE_HYP) { cmd.opcode = CMD_TLBI_EL2_ALL; smmu_cmdq_enqueue_cmd(sc, &cmd); }; /* Invalidate TLB. */ smmu_tlbi_all(sc); /* Event queue */ bus_write_8(sc->res[0], SMMU_EVENTQ_BASE, sc->evtq.base); bus_write_4(sc->res[0], SMMU_EVENTQ_PROD, sc->evtq.lc.prod); bus_write_4(sc->res[0], SMMU_EVENTQ_CONS, sc->evtq.lc.cons); reg |= CR0_EVENTQEN; error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); if (error) { device_printf(sc->dev, "Could not enable event queue\n"); return (ENXIO); } if (sc->features & SMMU_FEATURE_PRI) { /* PRI queue */ bus_write_8(sc->res[0], SMMU_PRIQ_BASE, sc->priq.base); bus_write_4(sc->res[0], SMMU_PRIQ_PROD, sc->priq.lc.prod); bus_write_4(sc->res[0], SMMU_PRIQ_CONS, sc->priq.lc.cons); reg |= CR0_PRIQEN; error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); if (error) { device_printf(sc->dev, "Could not enable PRI queue\n"); return (ENXIO); } } if (sc->features & SMMU_FEATURE_ATS) { reg |= CR0_ATSCHK; error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); if (error) { device_printf(sc->dev, "Could not enable ATS check.\n"); return (ENXIO); } } reg |= CR0_SMMUEN; error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); if (error) { device_printf(sc->dev, "Could not enable SMMU.\n"); return (ENXIO); } return (0); } static int smmu_check_features(struct smmu_softc *sc) { uint32_t reg; uint32_t val; sc->features = 0; reg = bus_read_4(sc->res[0], SMMU_IDR0); if (reg & IDR0_ST_LVL_2) { if (bootverbose) device_printf(sc->dev, "2-level stream table supported.\n"); sc->features |= SMMU_FEATURE_2_LVL_STREAM_TABLE; } if (reg & IDR0_CD2L) { if (bootverbose) device_printf(sc->dev, "2-level CD table supported.\n"); sc->features |= SMMU_FEATURE_2_LVL_CD; } switch (reg & IDR0_TTENDIAN_M) { case IDR0_TTENDIAN_MIXED: if (bootverbose) device_printf(sc->dev, "Mixed endianness supported.\n"); sc->features |= SMMU_FEATURE_TT_LE; sc->features |= SMMU_FEATURE_TT_BE; break; case IDR0_TTENDIAN_LITTLE: if (bootverbose) device_printf(sc->dev, "Little endian supported only.\n"); sc->features |= SMMU_FEATURE_TT_LE; break; case IDR0_TTENDIAN_BIG: if (bootverbose) device_printf(sc->dev, "Big endian supported only.\n"); sc->features |= SMMU_FEATURE_TT_BE; break; default: device_printf(sc->dev, "Unsupported endianness.\n"); return (ENXIO); } if (reg & IDR0_SEV) sc->features |= SMMU_FEATURE_SEV; if (reg & IDR0_MSI) { if (bootverbose) device_printf(sc->dev, "MSI feature present.\n"); sc->features |= SMMU_FEATURE_MSI; } if (reg & IDR0_HYP) { if (bootverbose) device_printf(sc->dev, "HYP feature present.\n"); sc->features |= SMMU_FEATURE_HYP; } if (reg & IDR0_ATS) sc->features |= SMMU_FEATURE_ATS; if (reg & IDR0_PRI) sc->features |= SMMU_FEATURE_PRI; switch (reg & IDR0_STALL_MODEL_M) { case IDR0_STALL_MODEL_FORCE: /* Stall is forced. */ sc->features |= SMMU_FEATURE_STALL_FORCE; /* FALLTHROUGH */ case IDR0_STALL_MODEL_STALL: sc->features |= SMMU_FEATURE_STALL; break; } /* Grab translation stages supported. */ if (reg & IDR0_S1P) { if (bootverbose) device_printf(sc->dev, "Stage 1 translation supported.\n"); sc->features |= SMMU_FEATURE_S1P; } if (reg & IDR0_S2P) { if (bootverbose) device_printf(sc->dev, "Stage 2 translation supported.\n"); sc->features |= SMMU_FEATURE_S2P; } switch (reg & IDR0_TTF_M) { case IDR0_TTF_ALL: case IDR0_TTF_AA64: sc->ias = 40; break; default: device_printf(sc->dev, "No AArch64 table format support.\n"); return (ENXIO); } if (reg & IDR0_ASID16) sc->asid_bits = 16; else sc->asid_bits = 8; if (bootverbose) device_printf(sc->dev, "ASID bits %d\n", sc->asid_bits); if (reg & IDR0_VMID16) sc->vmid_bits = 16; else sc->vmid_bits = 8; reg = bus_read_4(sc->res[0], SMMU_IDR1); if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) { device_printf(sc->dev, "Embedded implementations not supported by this driver.\n"); return (ENXIO); } val = (reg & IDR1_CMDQS_M) >> IDR1_CMDQS_S; sc->cmdq.size_log2 = val; if (bootverbose) device_printf(sc->dev, "CMD queue bits %d\n", val); val = (reg & IDR1_EVENTQS_M) >> IDR1_EVENTQS_S; sc->evtq.size_log2 = val; if (bootverbose) device_printf(sc->dev, "EVENT queue bits %d\n", val); if (sc->features & SMMU_FEATURE_PRI) { val = (reg & IDR1_PRIQS_M) >> IDR1_PRIQS_S; sc->priq.size_log2 = val; if (bootverbose) device_printf(sc->dev, "PRI queue bits %d\n", val); } sc->ssid_bits = (reg & IDR1_SSIDSIZE_M) >> IDR1_SSIDSIZE_S; sc->sid_bits = (reg & IDR1_SIDSIZE_M) >> IDR1_SIDSIZE_S; if (sc->sid_bits <= STRTAB_SPLIT) sc->features &= ~SMMU_FEATURE_2_LVL_STREAM_TABLE; if (bootverbose) { device_printf(sc->dev, "SSID bits %d\n", sc->ssid_bits); device_printf(sc->dev, "SID bits %d\n", sc->sid_bits); } /* IDR3 */ reg = bus_read_4(sc->res[0], SMMU_IDR3); if (reg & IDR3_RIL) sc->features |= SMMU_FEATURE_RANGE_INV; /* IDR5 */ reg = bus_read_4(sc->res[0], SMMU_IDR5); switch (reg & IDR5_OAS_M) { case IDR5_OAS_32: sc->oas = 32; break; case IDR5_OAS_36: sc->oas = 36; break; case IDR5_OAS_40: sc->oas = 40; break; case IDR5_OAS_42: sc->oas = 42; break; case IDR5_OAS_44: sc->oas = 44; break; case IDR5_OAS_48: sc->oas = 48; break; case IDR5_OAS_52: sc->oas = 52; break; } sc->pgsizes = 0; if (reg & IDR5_GRAN64K) sc->pgsizes |= 64 * 1024; if (reg & IDR5_GRAN16K) sc->pgsizes |= 16 * 1024; if (reg & IDR5_GRAN4K) sc->pgsizes |= 4 * 1024; if ((reg & IDR5_VAX_M) == IDR5_VAX_52) sc->features |= SMMU_FEATURE_VAX; return (0); } static void smmu_init_asids(struct smmu_softc *sc) { sc->asid_set_size = (1 << sc->asid_bits); sc->asid_set = bit_alloc(sc->asid_set_size, M_SMMU, M_WAITOK); mtx_init(&sc->asid_set_mutex, "asid set", NULL, MTX_SPIN); } static int smmu_asid_alloc(struct smmu_softc *sc, int *new_asid) { mtx_lock_spin(&sc->asid_set_mutex); bit_ffc(sc->asid_set, sc->asid_set_size, new_asid); if (*new_asid == -1) { mtx_unlock_spin(&sc->asid_set_mutex); return (ENOMEM); } bit_set(sc->asid_set, *new_asid); mtx_unlock_spin(&sc->asid_set_mutex); return (0); } static void smmu_asid_free(struct smmu_softc *sc, int asid) { mtx_lock_spin(&sc->asid_set_mutex); bit_clear(sc->asid_set, asid); mtx_unlock_spin(&sc->asid_set_mutex); } /* * Device interface. */ int smmu_attach(device_t dev) { struct smmu_softc *sc; int error; sc = device_get_softc(dev); sc->dev = dev; mtx_init(&sc->sc_mtx, device_get_nameunit(sc->dev), "smmu", MTX_DEF); error = smmu_setup_interrupts(sc); if (error) { bus_release_resources(dev, smmu_spec, sc->res); return (ENXIO); } error = smmu_check_features(sc); if (error) { device_printf(dev, "Some features are required " "but not supported by hardware.\n"); return (ENXIO); } smmu_init_asids(sc); error = smmu_init_queues(sc); if (error) { device_printf(dev, "Couldn't allocate queues.\n"); return (ENXIO); } error = smmu_init_strtab(sc); if (error) { device_printf(dev, "Couldn't allocate strtab.\n"); return (ENXIO); } error = smmu_reset(sc); if (error) { device_printf(dev, "Couldn't reset SMMU.\n"); return (ENXIO); } return (0); } int smmu_detach(device_t dev) { struct smmu_softc *sc; sc = device_get_softc(dev); bus_release_resources(dev, smmu_spec, sc->res); return (0); } static int smmu_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct smmu_softc *sc; sc = device_get_softc(dev); device_printf(sc->dev, "%s\n", __func__); return (ENOENT); } static int smmu_unmap(device_t dev, struct iommu_domain *iodom, vm_offset_t va, bus_size_t size) { struct smmu_domain *domain; struct smmu_softc *sc; int err; int i; sc = device_get_softc(dev); domain = (struct smmu_domain *)iodom; err = 0; dprintf("%s: %lx, %ld, domain %d\n", __func__, va, size, domain->asid); for (i = 0; i < size; i += PAGE_SIZE) { if (smmu_pmap_remove(&domain->p, va) == 0) { /* pmap entry removed, invalidate TLB. */ smmu_tlbi_va(sc, va, domain->asid); } else { err = ENOENT; break; } va += PAGE_SIZE; } smmu_sync(sc); return (err); } static int smmu_map(device_t dev, struct iommu_domain *iodom, vm_offset_t va, vm_page_t *ma, vm_size_t size, vm_prot_t prot) { struct smmu_domain *domain; struct smmu_softc *sc; vm_paddr_t pa; int error; int i; sc = device_get_softc(dev); domain = (struct smmu_domain *)iodom; dprintf("%s: %lx -> %lx, %ld, domain %d\n", __func__, va, pa, size, domain->asid); for (i = 0; size > 0; size -= PAGE_SIZE) { pa = VM_PAGE_TO_PHYS(ma[i++]); error = smmu_pmap_enter(&domain->p, va, pa, prot, 0); if (error) return (error); smmu_tlbi_va(sc, va, domain->asid); va += PAGE_SIZE; } smmu_sync(sc); return (0); } static struct iommu_domain * smmu_domain_alloc(device_t dev, struct iommu_unit *iommu) { struct iommu_domain *iodom; struct smmu_domain *domain; struct smmu_unit *unit; struct smmu_softc *sc; int error; int new_asid; sc = device_get_softc(dev); unit = (struct smmu_unit *)iommu; domain = malloc(sizeof(*domain), M_SMMU, M_WAITOK | M_ZERO); error = smmu_asid_alloc(sc, &new_asid); if (error) { free(domain, M_SMMU); device_printf(sc->dev, "Could not allocate ASID for a new domain.\n"); return (NULL); } domain->asid = (uint16_t)new_asid; smmu_pmap_pinit(&domain->p); error = smmu_init_cd(sc, domain); if (error) { free(domain, M_SMMU); device_printf(sc->dev, "Could not initialize CD\n"); return (NULL); } smmu_tlbi_asid(sc, domain->asid); LIST_INIT(&domain->ctx_list); IOMMU_LOCK(iommu); LIST_INSERT_HEAD(&unit->domain_list, domain, next); IOMMU_UNLOCK(iommu); iodom = &domain->iodom; /* * Use 48-bit address space regardless of VAX bit * as we need 64k IOMMU_PAGE_SIZE for 52-bit space. */ iodom->end = MAXADDR_48BIT; return (iodom); } static void smmu_domain_free(device_t dev, struct iommu_domain *iodom) { struct smmu_domain *domain; struct smmu_softc *sc; struct smmu_cd *cd; sc = device_get_softc(dev); domain = (struct smmu_domain *)iodom; LIST_REMOVE(domain, next); cd = domain->cd; smmu_pmap_remove_pages(&domain->p); smmu_pmap_release(&domain->p); smmu_tlbi_asid(sc, domain->asid); smmu_asid_free(sc, domain->asid); contigfree(cd->vaddr, cd->size, M_SMMU); free(cd, M_SMMU); free(domain, M_SMMU); } static int smmu_set_buswide(device_t dev, struct smmu_domain *domain, struct smmu_ctx *ctx) { struct smmu_softc *sc; int i; sc = device_get_softc(dev); for (i = 0; i < PCI_SLOTMAX; i++) smmu_init_ste(sc, domain->cd, (ctx->sid | i), ctx->bypass); return (0); } static int smmu_pci_get_sid(device_t child, u_int *xref0, u_int *sid0) { struct pci_id_ofw_iommu pi; int err; err = pci_get_id(child, PCI_ID_OFW_IOMMU, (uintptr_t *)&pi); if (err == 0) { if (sid0) *sid0 = pi.id; if (xref0) *xref0 = pi.xref; } return (err); } static struct iommu_ctx * smmu_ctx_alloc(device_t dev, struct iommu_domain *iodom, device_t child, bool disabled) { struct smmu_domain *domain; struct smmu_ctx *ctx; domain = (struct smmu_domain *)iodom; ctx = malloc(sizeof(struct smmu_ctx), M_SMMU, M_WAITOK | M_ZERO); ctx->dev = child; ctx->domain = domain; if (disabled) ctx->bypass = true; IOMMU_DOMAIN_LOCK(iodom); LIST_INSERT_HEAD(&domain->ctx_list, ctx, next); IOMMU_DOMAIN_UNLOCK(iodom); return (&ctx->ioctx); } static int smmu_ctx_init(device_t dev, struct iommu_ctx *ioctx) { struct smmu_domain *domain; struct iommu_domain *iodom; struct smmu_softc *sc; struct smmu_ctx *ctx; devclass_t pci_class; u_int sid; int err; ctx = (struct smmu_ctx *)ioctx; sc = device_get_softc(dev); domain = ctx->domain; iodom = (struct iommu_domain *)domain; pci_class = devclass_find("pci"); if (device_get_devclass(device_get_parent(ctx->dev)) == pci_class) { err = smmu_pci_get_sid(ctx->dev, NULL, &sid); if (err) return (err); ioctx->rid = pci_get_rid(dev); ctx->sid = sid; ctx->vendor = pci_get_vendor(ctx->dev); ctx->device = pci_get_device(ctx->dev); } if (sc->features & SMMU_FEATURE_2_LVL_STREAM_TABLE) { err = smmu_init_l1_entry(sc, ctx->sid); if (err) return (err); } /* * Neoverse N1 SDP: * 0x800 xhci * 0x700 re * 0x600 sata */ smmu_init_ste(sc, domain->cd, ctx->sid, ctx->bypass); if (device_get_devclass(device_get_parent(ctx->dev)) == pci_class) if (iommu_is_buswide_ctx(iodom->iommu, pci_get_bus(ctx->dev))) smmu_set_buswide(dev, domain, ctx); return (0); } static void smmu_ctx_free(device_t dev, struct iommu_ctx *ioctx) { struct smmu_softc *sc; struct smmu_ctx *ctx; IOMMU_ASSERT_LOCKED(ioctx->domain->iommu); sc = device_get_softc(dev); ctx = (struct smmu_ctx *)ioctx; smmu_deinit_ste(sc, ctx->sid); LIST_REMOVE(ctx, next); free(ctx, M_SMMU); } struct smmu_ctx * smmu_ctx_lookup_by_sid(device_t dev, u_int sid) { struct smmu_softc *sc; struct smmu_domain *domain; struct smmu_unit *unit; struct smmu_ctx *ctx; sc = device_get_softc(dev); unit = &sc->unit; LIST_FOREACH(domain, &unit->domain_list, next) { LIST_FOREACH(ctx, &domain->ctx_list, next) { if (ctx->sid == sid) return (ctx); } } return (NULL); } static struct iommu_ctx * smmu_ctx_lookup(device_t dev, device_t child) { struct iommu_unit *iommu __diagused; struct smmu_softc *sc; struct smmu_domain *domain; struct smmu_unit *unit; struct smmu_ctx *ctx; sc = device_get_softc(dev); unit = &sc->unit; iommu = &unit->iommu; IOMMU_ASSERT_LOCKED(iommu); LIST_FOREACH(domain, &unit->domain_list, next) { IOMMU_DOMAIN_LOCK(&domain->iodom); LIST_FOREACH(ctx, &domain->ctx_list, next) { if (ctx->dev == child) { IOMMU_DOMAIN_UNLOCK(&domain->iodom); return (&ctx->ioctx); } } IOMMU_DOMAIN_UNLOCK(&domain->iodom); } return (NULL); } static int smmu_find(device_t dev, device_t child) { struct smmu_softc *sc; u_int xref; int err; sc = device_get_softc(dev); err = smmu_pci_get_sid(child, &xref, NULL); if (err) return (ENOENT); /* Check if xref is ours. */ if (xref != sc->xref) return (EFAULT); return (0); } #ifdef FDT static int smmu_ofw_md_data(device_t dev, struct iommu_ctx *ioctx, pcell_t *cells, int ncells) { struct smmu_ctx *ctx; ctx = (struct smmu_ctx *)ioctx; if (ncells != 1) return (-1); ctx->sid = cells[0]; return (0); } #endif static device_method_t smmu_methods[] = { /* Device interface */ DEVMETHOD(device_detach, smmu_detach), /* SMMU interface */ DEVMETHOD(iommu_find, smmu_find), DEVMETHOD(iommu_map, smmu_map), DEVMETHOD(iommu_unmap, smmu_unmap), DEVMETHOD(iommu_domain_alloc, smmu_domain_alloc), DEVMETHOD(iommu_domain_free, smmu_domain_free), DEVMETHOD(iommu_ctx_alloc, smmu_ctx_alloc), DEVMETHOD(iommu_ctx_init, smmu_ctx_init), DEVMETHOD(iommu_ctx_free, smmu_ctx_free), DEVMETHOD(iommu_ctx_lookup, smmu_ctx_lookup), #ifdef FDT DEVMETHOD(iommu_ofw_md_data, smmu_ofw_md_data), #endif /* Bus interface */ DEVMETHOD(bus_read_ivar, smmu_read_ivar), /* End */ DEVMETHOD_END }; DEFINE_CLASS_0(smmu, smmu_driver, smmu_methods, sizeof(struct smmu_softc)); diff --git a/sys/compat/linuxkpi/common/include/linux/log2.h b/sys/compat/linuxkpi/common/include/linux/log2.h index 27e91a8bdbe0..2d54c75c7c23 100644 --- a/sys/compat/linuxkpi/common/include/linux/log2.h +++ b/sys/compat/linuxkpi/common/include/linux/log2.h @@ -1,129 +1,56 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2015 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUXKPI_LINUX_LOG2_H_ #define _LINUXKPI_LINUX_LOG2_H_ #include #include static inline unsigned long roundup_pow_of_two(unsigned long x) { return (1UL << flsl(x - 1)); } static inline int is_power_of_2(unsigned long n) { return (n == roundup_pow_of_two(n)); } static inline unsigned long rounddown_pow_of_two(unsigned long x) { return (1UL << (flsl(x) - 1)); } -#define ilog2(n) \ -( \ - __builtin_constant_p(n) ? ( \ - (n) < 1 ? -1 : \ - (n) & (1ULL << 63) ? 63 : \ - (n) & (1ULL << 62) ? 62 : \ - (n) & (1ULL << 61) ? 61 : \ - (n) & (1ULL << 60) ? 60 : \ - (n) & (1ULL << 59) ? 59 : \ - (n) & (1ULL << 58) ? 58 : \ - (n) & (1ULL << 57) ? 57 : \ - (n) & (1ULL << 56) ? 56 : \ - (n) & (1ULL << 55) ? 55 : \ - (n) & (1ULL << 54) ? 54 : \ - (n) & (1ULL << 53) ? 53 : \ - (n) & (1ULL << 52) ? 52 : \ - (n) & (1ULL << 51) ? 51 : \ - (n) & (1ULL << 50) ? 50 : \ - (n) & (1ULL << 49) ? 49 : \ - (n) & (1ULL << 48) ? 48 : \ - (n) & (1ULL << 47) ? 47 : \ - (n) & (1ULL << 46) ? 46 : \ - (n) & (1ULL << 45) ? 45 : \ - (n) & (1ULL << 44) ? 44 : \ - (n) & (1ULL << 43) ? 43 : \ - (n) & (1ULL << 42) ? 42 : \ - (n) & (1ULL << 41) ? 41 : \ - (n) & (1ULL << 40) ? 40 : \ - (n) & (1ULL << 39) ? 39 : \ - (n) & (1ULL << 38) ? 38 : \ - (n) & (1ULL << 37) ? 37 : \ - (n) & (1ULL << 36) ? 36 : \ - (n) & (1ULL << 35) ? 35 : \ - (n) & (1ULL << 34) ? 34 : \ - (n) & (1ULL << 33) ? 33 : \ - (n) & (1ULL << 32) ? 32 : \ - (n) & (1ULL << 31) ? 31 : \ - (n) & (1ULL << 30) ? 30 : \ - (n) & (1ULL << 29) ? 29 : \ - (n) & (1ULL << 28) ? 28 : \ - (n) & (1ULL << 27) ? 27 : \ - (n) & (1ULL << 26) ? 26 : \ - (n) & (1ULL << 25) ? 25 : \ - (n) & (1ULL << 24) ? 24 : \ - (n) & (1ULL << 23) ? 23 : \ - (n) & (1ULL << 22) ? 22 : \ - (n) & (1ULL << 21) ? 21 : \ - (n) & (1ULL << 20) ? 20 : \ - (n) & (1ULL << 19) ? 19 : \ - (n) & (1ULL << 18) ? 18 : \ - (n) & (1ULL << 17) ? 17 : \ - (n) & (1ULL << 16) ? 16 : \ - (n) & (1ULL << 15) ? 15 : \ - (n) & (1ULL << 14) ? 14 : \ - (n) & (1ULL << 13) ? 13 : \ - (n) & (1ULL << 12) ? 12 : \ - (n) & (1ULL << 11) ? 11 : \ - (n) & (1ULL << 10) ? 10 : \ - (n) & (1ULL << 9) ? 9 : \ - (n) & (1ULL << 8) ? 8 : \ - (n) & (1ULL << 7) ? 7 : \ - (n) & (1ULL << 6) ? 6 : \ - (n) & (1ULL << 5) ? 5 : \ - (n) & (1ULL << 4) ? 4 : \ - (n) & (1ULL << 3) ? 3 : \ - (n) & (1ULL << 2) ? 2 : \ - (n) & (1ULL << 1) ? 1 : \ - (n) & (1ULL << 0) ? 0 : \ - -1) : \ - (sizeof(n) <= 4) ? \ - fls((u32)(n)) - 1 : flsll((u64)(n)) - 1 \ -) - #define order_base_2(x) ilog2(roundup_pow_of_two(x)) #endif /* _LINUXKPI_LINUX_LOG2_H_ */ diff --git a/sys/dev/bxe/bxe.h b/sys/dev/bxe/bxe.h index 0c7e6232dbdb..79d2792f7d6f 100644 --- a/sys/dev/bxe/bxe.h +++ b/sys/dev/bxe/bxe.h @@ -1,2443 +1,2433 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2007-2014 QLogic Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __BXE_H__ #define __BXE_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "device_if.h" #include "bus_if.h" #include "pci_if.h" #if _BYTE_ORDER == _LITTLE_ENDIAN #ifndef LITTLE_ENDIAN #define LITTLE_ENDIAN #endif #ifndef __LITTLE_ENDIAN #define __LITTLE_ENDIAN #endif #undef BIG_ENDIAN #undef __BIG_ENDIAN #else /* _BIG_ENDIAN */ #ifndef BIG_ENDIAN #define BIG_ENDIAN #endif #ifndef __BIG_ENDIAN #define __BIG_ENDIAN #endif #undef LITTLE_ENDIAN #undef __LITTLE_ENDIAN #endif #include "ecore_mfw_req.h" #include "ecore_fw_defs.h" #include "ecore_hsi.h" #include "ecore_reg.h" #include "bxe_dcb.h" #include "bxe_stats.h" #include "bxe_elink.h" #define VF_MAC_CREDIT_CNT 0 #define VF_VLAN_CREDIT_CNT (0) #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif #ifndef ARRSIZE #define ARRSIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif #ifndef DIV_ROUND_UP #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) #endif #ifndef roundup #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) #endif -#ifndef ilog2 -static inline -int bxe_ilog2(int x) -{ - int log = 0; - while (x >>= 1) log++; - return (log); -} -#define ilog2(x) bxe_ilog2(x) -#endif #include "ecore_sp.h" #define BRCM_VENDORID 0x14e4 #define QLOGIC_VENDORID 0x1077 #define PCI_ANY_ID (uint16_t)(~0U) struct bxe_device_type { uint16_t bxe_vid; uint16_t bxe_did; uint16_t bxe_svid; uint16_t bxe_sdid; char *bxe_name; }; #define BCM_PAGE_SHIFT 12 #define BCM_PAGE_SIZE (1 << BCM_PAGE_SHIFT) #define BCM_PAGE_MASK (~(BCM_PAGE_SIZE - 1)) #define BCM_PAGE_ALIGN(addr) ((addr + BCM_PAGE_SIZE - 1) & BCM_PAGE_MASK) #if BCM_PAGE_SIZE != 4096 #error Page sizes other than 4KB are unsupported! #endif #if (BUS_SPACE_MAXADDR > 0xFFFFFFFF) #define U64_LO(addr) ((uint32_t)(((uint64_t)(addr)) & 0xFFFFFFFF)) #define U64_HI(addr) ((uint32_t)(((uint64_t)(addr)) >> 32)) #else #define U64_LO(addr) ((uint32_t)(addr)) #define U64_HI(addr) (0) #endif #define HILO_U64(hi, lo) ((((uint64_t)(hi)) << 32) + (lo)) #define SET_FLAG(value, mask, flag) \ do { \ (value) &= ~(mask); \ (value) |= ((flag) << (mask##_SHIFT)); \ } while (0) #define GET_FLAG(value, mask) \ (((value) & (mask)) >> (mask##_SHIFT)) #define GET_FIELD(value, fname) \ (((value) & (fname##_MASK)) >> (fname##_SHIFT)) #define BXE_MAX_SEGMENTS 12 /* 13-1 for parsing buffer */ #define BXE_TSO_MAX_SEGMENTS 32 #define BXE_TSO_MAX_SIZE (65535 + sizeof(struct ether_vlan_header)) #define BXE_TSO_MAX_SEG_SIZE 4096 /* dropless fc FW/HW related params */ #define BRB_SIZE(sc) (CHIP_IS_E3(sc) ? 1024 : 512) #define MAX_AGG_QS(sc) (CHIP_IS_E1(sc) ? \ ETH_MAX_AGGREGATION_QUEUES_E1 : \ ETH_MAX_AGGREGATION_QUEUES_E1H_E2) #define FW_DROP_LEVEL(sc) (3 + MAX_SPQ_PENDING + MAX_AGG_QS(sc)) #define FW_PREFETCH_CNT 16 #define DROPLESS_FC_HEADROOM 100 /******************/ /* RX SGE defines */ /******************/ #define RX_SGE_NUM_PAGES 2 /* must be a power of 2 */ #define RX_SGE_TOTAL_PER_PAGE (BCM_PAGE_SIZE / sizeof(struct eth_rx_sge)) #define RX_SGE_NEXT_PAGE_DESC_CNT 2 #define RX_SGE_USABLE_PER_PAGE (RX_SGE_TOTAL_PER_PAGE - RX_SGE_NEXT_PAGE_DESC_CNT) #define RX_SGE_PER_PAGE_MASK (RX_SGE_TOTAL_PER_PAGE - 1) #define RX_SGE_TOTAL (RX_SGE_TOTAL_PER_PAGE * RX_SGE_NUM_PAGES) #define RX_SGE_USABLE (RX_SGE_USABLE_PER_PAGE * RX_SGE_NUM_PAGES) #define RX_SGE_MAX (RX_SGE_TOTAL - 1) #define RX_SGE(x) ((x) & RX_SGE_MAX) #define RX_SGE_NEXT(x) \ ((((x) & RX_SGE_PER_PAGE_MASK) == (RX_SGE_USABLE_PER_PAGE - 1)) \ ? (x) + 1 + RX_SGE_NEXT_PAGE_DESC_CNT : (x) + 1) #define RX_SGE_MASK_ELEM_SZ 64 #define RX_SGE_MASK_ELEM_SHIFT 6 #define RX_SGE_MASK_ELEM_MASK ((uint64_t)RX_SGE_MASK_ELEM_SZ - 1) /* * Creates a bitmask of all ones in less significant bits. * idx - index of the most significant bit in the created mask. */ #define RX_SGE_ONES_MASK(idx) \ (((uint64_t)0x1 << (((idx) & RX_SGE_MASK_ELEM_MASK) + 1)) - 1) #define RX_SGE_MASK_ELEM_ONE_MASK ((uint64_t)(~0)) /* Number of uint64_t elements in SGE mask array. */ #define RX_SGE_MASK_LEN \ ((RX_SGE_NUM_PAGES * RX_SGE_TOTAL_PER_PAGE) / RX_SGE_MASK_ELEM_SZ) #define RX_SGE_MASK_LEN_MASK (RX_SGE_MASK_LEN - 1) #define RX_SGE_NEXT_MASK_ELEM(el) (((el) + 1) & RX_SGE_MASK_LEN_MASK) /* * dropless fc calculations for SGEs * Number of required SGEs is the sum of two: * 1. Number of possible opened aggregations (next packet for * these aggregations will probably consume SGE immidiatelly) * 2. Rest of BRB blocks divided by 2 (block will consume new SGE only * after placement on BD for new TPA aggregation) * Takes into account RX_SGE_NEXT_PAGE_DESC_CNT "next" elements on each page */ #define NUM_SGE_REQ(sc) \ (MAX_AGG_QS(sc) + (BRB_SIZE(sc) - MAX_AGG_QS(sc)) / 2) #define NUM_SGE_PG_REQ(sc) \ ((NUM_SGE_REQ(sc) + RX_SGE_USABLE_PER_PAGE - 1) / RX_SGE_USABLE_PER_PAGE) #define SGE_TH_LO(sc) \ (NUM_SGE_REQ(sc) + NUM_SGE_PG_REQ(sc) * RX_SGE_NEXT_PAGE_DESC_CNT) #define SGE_TH_HI(sc) \ (SGE_TH_LO(sc) + DROPLESS_FC_HEADROOM) #define PAGES_PER_SGE_SHIFT 0 #define PAGES_PER_SGE (1 << PAGES_PER_SGE_SHIFT) #define SGE_PAGE_SIZE BCM_PAGE_SIZE #define SGE_PAGE_SHIFT BCM_PAGE_SHIFT #define SGE_PAGE_ALIGN(addr) BCM_PAGE_ALIGN(addr) #define SGE_PAGES (SGE_PAGE_SIZE * PAGES_PER_SGE) #define TPA_AGG_SIZE min((8 * SGE_PAGES), 0xffff) /*****************/ /* TX BD defines */ /*****************/ #define TX_BD_NUM_PAGES 16 /* must be a power of 2 */ #define TX_BD_TOTAL_PER_PAGE (BCM_PAGE_SIZE / sizeof(union eth_tx_bd_types)) #define TX_BD_USABLE_PER_PAGE (TX_BD_TOTAL_PER_PAGE - 1) #define TX_BD_TOTAL (TX_BD_TOTAL_PER_PAGE * TX_BD_NUM_PAGES) #define TX_BD_USABLE (TX_BD_USABLE_PER_PAGE * TX_BD_NUM_PAGES) #define TX_BD_MAX (TX_BD_TOTAL - 1) #define TX_BD_NEXT(x) \ ((((x) & TX_BD_USABLE_PER_PAGE) == (TX_BD_USABLE_PER_PAGE - 1)) ? \ ((x) + 2) : ((x) + 1)) #define TX_BD(x) ((x) & TX_BD_MAX) #define TX_BD_PAGE(x) (((x) & ~TX_BD_USABLE_PER_PAGE) >> 8) #define TX_BD_IDX(x) ((x) & TX_BD_USABLE_PER_PAGE) /* * Trigger pending transmits when the number of available BDs is greater * than 1/8 of the total number of usable BDs. */ #define BXE_TX_CLEANUP_THRESHOLD (TX_BD_USABLE / 8) #define BXE_TX_TIMEOUT 5 /*****************/ /* RX BD defines */ /*****************/ #define RX_BD_NUM_PAGES 8 /* power of 2 */ #define RX_BD_TOTAL_PER_PAGE (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd)) #define RX_BD_NEXT_PAGE_DESC_CNT 2 #define RX_BD_USABLE_PER_PAGE (RX_BD_TOTAL_PER_PAGE - RX_BD_NEXT_PAGE_DESC_CNT) #define RX_BD_PER_PAGE_MASK (RX_BD_TOTAL_PER_PAGE - 1) #define RX_BD_TOTAL (RX_BD_TOTAL_PER_PAGE * RX_BD_NUM_PAGES) #define RX_BD_USABLE (RX_BD_USABLE_PER_PAGE * RX_BD_NUM_PAGES) #define RX_BD_MAX (RX_BD_TOTAL - 1) #define RX_BD_NEXT(x) \ ((((x) & RX_BD_PER_PAGE_MASK) == (RX_BD_USABLE_PER_PAGE - 1)) ? \ ((x) + 3) : ((x) + 1)) #define RX_BD(x) ((x) & RX_BD_MAX) #define RX_BD_PAGE(x) (((x) & ~RX_BD_PER_PAGE_MASK) >> 9) #define RX_BD_IDX(x) ((x) & RX_BD_PER_PAGE_MASK) /* * dropless fc calculations for BDs * Number of BDs should be as number of buffers in BRB: * Low threshold takes into account RX_BD_NEXT_PAGE_DESC_CNT * "next" elements on each page */ #define NUM_BD_REQ(sc) \ BRB_SIZE(sc) #define NUM_BD_PG_REQ(sc) \ ((NUM_BD_REQ(sc) + RX_BD_USABLE_PER_PAGE - 1) / RX_BD_USABLE_PER_PAGE) #define BD_TH_LO(sc) \ (NUM_BD_REQ(sc) + \ NUM_BD_PG_REQ(sc) * RX_BD_NEXT_PAGE_DESC_CNT + \ FW_DROP_LEVEL(sc)) #define BD_TH_HI(sc) \ (BD_TH_LO(sc) + DROPLESS_FC_HEADROOM) #define MIN_RX_AVAIL(sc) \ ((sc)->dropless_fc ? BD_TH_HI(sc) + 128 : 128) #define MIN_RX_SIZE_TPA_HW(sc) \ (CHIP_IS_E1(sc) ? ETH_MIN_RX_CQES_WITH_TPA_E1 : \ ETH_MIN_RX_CQES_WITH_TPA_E1H_E2) #define MIN_RX_SIZE_NONTPA_HW ETH_MIN_RX_CQES_WITHOUT_TPA #define MIN_RX_SIZE_TPA(sc) \ (max(MIN_RX_SIZE_TPA_HW(sc), MIN_RX_AVAIL(sc))) #define MIN_RX_SIZE_NONTPA(sc) \ (max(MIN_RX_SIZE_NONTPA_HW, MIN_RX_AVAIL(sc))) /***************/ /* RCQ defines */ /***************/ /* * As long as CQE is X times bigger than BD entry we have to allocate X times * more pages for CQ ring in order to keep it balanced with BD ring */ #define CQE_BD_REL (sizeof(union eth_rx_cqe) / \ sizeof(struct eth_rx_bd)) #define RCQ_NUM_PAGES (RX_BD_NUM_PAGES * CQE_BD_REL) /* power of 2 */ #define RCQ_TOTAL_PER_PAGE (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe)) #define RCQ_NEXT_PAGE_DESC_CNT 1 #define RCQ_USABLE_PER_PAGE (RCQ_TOTAL_PER_PAGE - RCQ_NEXT_PAGE_DESC_CNT) #define RCQ_TOTAL (RCQ_TOTAL_PER_PAGE * RCQ_NUM_PAGES) #define RCQ_USABLE (RCQ_USABLE_PER_PAGE * RCQ_NUM_PAGES) #define RCQ_MAX (RCQ_TOTAL - 1) #define RCQ_NEXT(x) \ ((((x) & RCQ_USABLE_PER_PAGE) == (RCQ_USABLE_PER_PAGE - 1)) ? \ ((x) + 1 + RCQ_NEXT_PAGE_DESC_CNT) : ((x) + 1)) #define RCQ(x) ((x) & RCQ_MAX) #define RCQ_PAGE(x) (((x) & ~RCQ_USABLE_PER_PAGE) >> 7) #define RCQ_IDX(x) ((x) & RCQ_USABLE_PER_PAGE) /* * dropless fc calculations for RCQs * Number of RCQs should be as number of buffers in BRB: * Low threshold takes into account RCQ_NEXT_PAGE_DESC_CNT * "next" elements on each page */ #define NUM_RCQ_REQ(sc) \ BRB_SIZE(sc) #define NUM_RCQ_PG_REQ(sc) \ ((NUM_RCQ_REQ(sc) + RCQ_USABLE_PER_PAGE - 1) / RCQ_USABLE_PER_PAGE) #define RCQ_TH_LO(sc) \ (NUM_RCQ_REQ(sc) + \ NUM_RCQ_PG_REQ(sc) * RCQ_NEXT_PAGE_DESC_CNT + \ FW_DROP_LEVEL(sc)) #define RCQ_TH_HI(sc) \ (RCQ_TH_LO(sc) + DROPLESS_FC_HEADROOM) /* This is needed for determening of last_max */ #define SUB_S16(a, b) (int16_t)((int16_t)(a) - (int16_t)(b)) #define __SGE_MASK_SET_BIT(el, bit) \ do { \ (el) = ((el) | ((uint64_t)0x1 << (bit))); \ } while (0) #define __SGE_MASK_CLEAR_BIT(el, bit) \ do { \ (el) = ((el) & (~((uint64_t)0x1 << (bit)))); \ } while (0) #define SGE_MASK_SET_BIT(fp, idx) \ __SGE_MASK_SET_BIT((fp)->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \ ((idx) & RX_SGE_MASK_ELEM_MASK)) #define SGE_MASK_CLEAR_BIT(fp, idx) \ __SGE_MASK_CLEAR_BIT((fp)->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \ ((idx) & RX_SGE_MASK_ELEM_MASK)) /* Load / Unload modes */ #define LOAD_NORMAL 0 #define LOAD_OPEN 1 #define LOAD_DIAG 2 #define LOAD_LOOPBACK_EXT 3 #define UNLOAD_NORMAL 0 #define UNLOAD_CLOSE 1 #define UNLOAD_RECOVERY 2 /* Some constants... */ //#define MAX_PATH_NUM 2 //#define E2_MAX_NUM_OF_VFS 64 //#define E1H_FUNC_MAX 8 //#define E2_FUNC_MAX 4 /* per path */ #define MAX_VNIC_NUM 4 #define MAX_FUNC_NUM 8 /* common to all chips */ //#define MAX_NDSB HC_SB_MAX_SB_E2 /* max non-default status block */ #define MAX_RSS_CHAINS 16 /* a constant for HW limit */ #define MAX_MSI_VECTOR 8 /* a constant for HW limit */ #define ILT_NUM_PAGE_ENTRIES 3072 /* * 57710/11 we use whole table since we have 8 functions. * 57712 we have only 4 functions, but use same size per func, so only half * of the table is used. */ #define ILT_PER_FUNC (ILT_NUM_PAGE_ENTRIES / 8) #define FUNC_ILT_BASE(func) (func * ILT_PER_FUNC) /* * the phys address is shifted right 12 bits and has an added * 1=valid bit added to the 53rd bit * then since this is a wide register(TM) * we split it into two 32 bit writes */ #define ONCHIP_ADDR1(x) ((uint32_t)(((uint64_t)x >> 12) & 0xFFFFFFFF)) #define ONCHIP_ADDR2(x) ((uint32_t)((1 << 20) | ((uint64_t)x >> 44))) /* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */ #define ETH_HLEN 14 #define ETH_OVERHEAD (ETH_HLEN + 8 + 8) #define ETH_MIN_PACKET_SIZE 60 #define ETH_MAX_PACKET_SIZE ETHERMTU /* 1500 */ #define ETH_MAX_JUMBO_PACKET_SIZE 9600 /* TCP with Timestamp Option (32) + IPv6 (40) */ #define ETH_MAX_TPA_HEADER_SIZE 72 /* max supported alignment is 256 (8 shift) */ //#define BXE_RX_ALIGN_SHIFT ((CACHE_LINE_SHIFT < 8) ? CACHE_LINE_SHIFT : 8) #define BXE_RX_ALIGN_SHIFT 8 /* FW uses 2 cache lines alignment for start packet and size */ #define BXE_FW_RX_ALIGN_START (1 << BXE_RX_ALIGN_SHIFT) #define BXE_FW_RX_ALIGN_END (1 << BXE_RX_ALIGN_SHIFT) #define BXE_PXP_DRAM_ALIGN (BXE_RX_ALIGN_SHIFT - 5) /* XXX ??? */ #define BXE_SET_ERROR_BIT(sc, error) \ { \ (sc)->error_status |= (error); \ } struct bxe_bar { struct resource *resource; int rid; bus_space_tag_t tag; bus_space_handle_t handle; vm_offset_t kva; }; struct bxe_intr { struct resource *resource; int rid; void *tag; }; /* Used to manage DMA allocations. */ struct bxe_dma { struct bxe_softc *sc; bus_addr_t paddr; void *vaddr; bus_dma_tag_t tag; bus_dmamap_t map; bus_dma_segment_t seg; bus_size_t size; int nseg; char msg[32]; }; /* attn group wiring */ #define MAX_DYNAMIC_ATTN_GRPS 8 struct attn_route { uint32_t sig[5]; }; struct iro { uint32_t base; uint16_t m1; uint16_t m2; uint16_t m3; uint16_t size; }; union bxe_host_hc_status_block { /* pointer to fp status block e2 */ struct host_hc_status_block_e2 *e2_sb; /* pointer to fp status block e1x */ struct host_hc_status_block_e1x *e1x_sb; }; union bxe_db_prod { struct doorbell_set_prod data; uint32_t raw; }; struct bxe_sw_tx_bd { struct mbuf *m; bus_dmamap_t m_map; uint16_t first_bd; uint8_t flags; /* set on the first BD descriptor when there is a split BD */ #define BXE_TSO_SPLIT_BD (1 << 0) }; struct bxe_sw_rx_bd { struct mbuf *m; bus_dmamap_t m_map; }; struct bxe_sw_tpa_info { struct bxe_sw_rx_bd bd; bus_dma_segment_t seg; uint8_t state; #define BXE_TPA_STATE_START 1 #define BXE_TPA_STATE_STOP 2 uint8_t placement_offset; uint16_t parsing_flags; uint16_t vlan_tag; uint16_t len_on_bd; }; /* * This is the HSI fastpath data structure. There can be up to MAX_RSS_CHAIN * instances of the fastpath structure when using multiple queues. */ struct bxe_fastpath { /* pointer back to parent structure */ struct bxe_softc *sc; struct mtx tx_mtx; char tx_mtx_name[32]; struct mtx rx_mtx; char rx_mtx_name[32]; #define BXE_FP_TX_LOCK(fp) mtx_lock(&fp->tx_mtx) #define BXE_FP_TX_UNLOCK(fp) mtx_unlock(&fp->tx_mtx) #define BXE_FP_TX_LOCK_ASSERT(fp) mtx_assert(&fp->tx_mtx, MA_OWNED) #define BXE_FP_TX_TRYLOCK(fp) mtx_trylock(&fp->tx_mtx) #define BXE_FP_RX_LOCK(fp) mtx_lock(&fp->rx_mtx) #define BXE_FP_RX_UNLOCK(fp) mtx_unlock(&fp->rx_mtx) #define BXE_FP_RX_LOCK_ASSERT(fp) mtx_assert(&fp->rx_mtx, MA_OWNED) /* status block */ struct bxe_dma sb_dma; union bxe_host_hc_status_block status_block; /* transmit chain (tx bds) */ struct bxe_dma tx_dma; union eth_tx_bd_types *tx_chain; /* receive chain (rx bds) */ struct bxe_dma rx_dma; struct eth_rx_bd *rx_chain; /* receive completion queue chain (rcq bds) */ struct bxe_dma rcq_dma; union eth_rx_cqe *rcq_chain; /* receive scatter/gather entry chain (for TPA) */ struct bxe_dma rx_sge_dma; struct eth_rx_sge *rx_sge_chain; /* tx mbufs */ bus_dma_tag_t tx_mbuf_tag; struct bxe_sw_tx_bd tx_mbuf_chain[TX_BD_TOTAL]; /* rx mbufs */ bus_dma_tag_t rx_mbuf_tag; struct bxe_sw_rx_bd rx_mbuf_chain[RX_BD_TOTAL]; bus_dmamap_t rx_mbuf_spare_map; /* rx sge mbufs */ bus_dma_tag_t rx_sge_mbuf_tag; struct bxe_sw_rx_bd rx_sge_mbuf_chain[RX_SGE_TOTAL]; bus_dmamap_t rx_sge_mbuf_spare_map; /* rx tpa mbufs (use the larger size for TPA queue length) */ int tpa_enable; /* disabled per fastpath upon error */ struct bxe_sw_tpa_info rx_tpa_info[ETH_MAX_AGGREGATION_QUEUES_E1H_E2]; bus_dmamap_t rx_tpa_info_mbuf_spare_map; uint64_t rx_tpa_queue_used; uint16_t *sb_index_values; uint16_t *sb_running_index; uint32_t ustorm_rx_prods_offset; uint8_t igu_sb_id; /* status block number in HW */ uint8_t fw_sb_id; /* status block number in FW */ uint32_t rx_buf_size; int mbuf_alloc_size; int state; #define BXE_FP_STATE_CLOSED 0x01 #define BXE_FP_STATE_IRQ 0x02 #define BXE_FP_STATE_OPENING 0x04 #define BXE_FP_STATE_OPEN 0x08 #define BXE_FP_STATE_HALTING 0x10 #define BXE_FP_STATE_HALTED 0x20 /* reference back to this fastpath queue number */ uint8_t index; /* this is also the 'cid' */ #define FP_IDX(fp) (fp->index) /* interrupt taskqueue (fast) */ struct task tq_task; struct taskqueue *tq; char tq_name[32]; struct task tx_task; struct timeout_task tx_timeout_task; /* ethernet client ID (each fastpath set of RX/TX/CQE is a client) */ uint8_t cl_id; #define FP_CL_ID(fp) (fp->cl_id) uint8_t cl_qzone_id; uint16_t fp_hc_idx; /* driver copy of the receive buffer descriptor prod/cons indices */ uint16_t rx_bd_prod; uint16_t rx_bd_cons; /* driver copy of the receive completion queue prod/cons indices */ uint16_t rx_cq_prod; uint16_t rx_cq_cons; union bxe_db_prod tx_db; /* Transmit packet producer index (used in eth_tx_bd). */ uint16_t tx_pkt_prod; uint16_t tx_pkt_cons; /* Transmit buffer descriptor producer index. */ uint16_t tx_bd_prod; uint16_t tx_bd_cons; uint64_t sge_mask[RX_SGE_MASK_LEN]; uint16_t rx_sge_prod; struct tstorm_per_queue_stats old_tclient; struct ustorm_per_queue_stats old_uclient; struct xstorm_per_queue_stats old_xclient; struct bxe_eth_q_stats eth_q_stats; struct bxe_eth_q_stats_old eth_q_stats_old; /* Pointer to the receive consumer in the status block */ uint16_t *rx_cq_cons_sb; /* Pointer to the transmit consumer in the status block */ uint16_t *tx_cons_sb; /* transmit timeout until chip reset */ int watchdog_timer; /* Free/used buffer descriptor counters. */ //uint16_t used_tx_bd; /* Last maximal completed SGE */ uint16_t last_max_sge; //uint16_t rx_sge_free_idx; //uint8_t segs; #define BXE_BR_SIZE 4096 struct buf_ring *tx_br; }; /* struct bxe_fastpath */ /* sriov XXX */ #define BXE_MAX_NUM_OF_VFS 64 #define BXE_VF_CID_WND 0 #define BXE_CIDS_PER_VF (1 << BXE_VF_CID_WND) #define BXE_CLIENTS_PER_VF 1 #define BXE_FIRST_VF_CID 256 #define BXE_VF_CIDS (BXE_MAX_NUM_OF_VFS * BXE_CIDS_PER_VF) #define BXE_VF_ID_INVALID 0xFF #define IS_SRIOV(sc) 0 #define GET_NUM_VFS_PER_PATH(sc) 0 #define GET_NUM_VFS_PER_PF(sc) 0 /* maximum number of fast-path interrupt contexts */ #define FP_SB_MAX_E1x 16 #define FP_SB_MAX_E2 HC_SB_MAX_SB_E2 union cdu_context { struct eth_context eth; char pad[1024]; }; /* CDU host DB constants */ #define CDU_ILT_PAGE_SZ_HW 2 #define CDU_ILT_PAGE_SZ (8192 << CDU_ILT_PAGE_SZ_HW) /* 32K */ #define ILT_PAGE_CIDS (CDU_ILT_PAGE_SZ / sizeof(union cdu_context)) #define CNIC_ISCSI_CID_MAX 256 #define CNIC_FCOE_CID_MAX 2048 #define CNIC_CID_MAX (CNIC_ISCSI_CID_MAX + CNIC_FCOE_CID_MAX) #define CNIC_ILT_LINES DIV_ROUND_UP(CNIC_CID_MAX, ILT_PAGE_CIDS) #define QM_ILT_PAGE_SZ_HW 0 #define QM_ILT_PAGE_SZ (4096 << QM_ILT_PAGE_SZ_HW) /* 4K */ #define QM_CID_ROUND 1024 /* TM (timers) host DB constants */ #define TM_ILT_PAGE_SZ_HW 0 #define TM_ILT_PAGE_SZ (4096 << TM_ILT_PAGE_SZ_HW) /* 4K */ /*#define TM_CONN_NUM (CNIC_STARTING_CID+CNIC_ISCSI_CXT_MAX) */ #define TM_CONN_NUM 1024 #define TM_ILT_SZ (8 * TM_CONN_NUM) #define TM_ILT_LINES DIV_ROUND_UP(TM_ILT_SZ, TM_ILT_PAGE_SZ) /* SRC (Searcher) host DB constants */ #define SRC_ILT_PAGE_SZ_HW 0 #define SRC_ILT_PAGE_SZ (4096 << SRC_ILT_PAGE_SZ_HW) /* 4K */ #define SRC_HASH_BITS 10 #define SRC_CONN_NUM (1 << SRC_HASH_BITS) /* 1024 */ #define SRC_ILT_SZ (sizeof(struct src_ent) * SRC_CONN_NUM) #define SRC_T2_SZ SRC_ILT_SZ #define SRC_ILT_LINES DIV_ROUND_UP(SRC_ILT_SZ, SRC_ILT_PAGE_SZ) struct hw_context { struct bxe_dma vcxt_dma; union cdu_context *vcxt; //bus_addr_t cxt_mapping; size_t size; }; #define SM_RX_ID 0 #define SM_TX_ID 1 /* defines for multiple tx priority indices */ #define FIRST_TX_ONLY_COS_INDEX 1 #define FIRST_TX_COS_INDEX 0 #define CID_TO_FP(cid, sc) ((cid) % BXE_NUM_NON_CNIC_QUEUES(sc)) #define HC_INDEX_ETH_RX_CQ_CONS 1 #define HC_INDEX_OOO_TX_CQ_CONS 4 #define HC_INDEX_ETH_TX_CQ_CONS_COS0 5 #define HC_INDEX_ETH_TX_CQ_CONS_COS1 6 #define HC_INDEX_ETH_TX_CQ_CONS_COS2 7 #define HC_INDEX_ETH_FIRST_TX_CQ_CONS HC_INDEX_ETH_TX_CQ_CONS_COS0 /* congestion management fairness mode */ #define CMNG_FNS_NONE 0 #define CMNG_FNS_MINMAX 1 /* CMNG constants, as derived from system spec calculations */ /* default MIN rate in case VNIC min rate is configured to zero - 100Mbps */ #define DEF_MIN_RATE 100 /* resolution of the rate shaping timer - 400 usec */ #define RS_PERIODIC_TIMEOUT_USEC 400 /* number of bytes in single QM arbitration cycle - * coefficient for calculating the fairness timer */ #define QM_ARB_BYTES 160000 /* resolution of Min algorithm 1:100 */ #define MIN_RES 100 /* how many bytes above threshold for the minimal credit of Min algorithm*/ #define MIN_ABOVE_THRESH 32768 /* fairness algorithm integration time coefficient - * for calculating the actual Tfair */ #define T_FAIR_COEF ((MIN_ABOVE_THRESH + QM_ARB_BYTES) * 8 * MIN_RES) /* memory of fairness algorithm - 2 cycles */ #define FAIR_MEM 2 #define HC_SEG_ACCESS_DEF 0 /* Driver decision 0-3 */ #define HC_SEG_ACCESS_ATTN 4 #define HC_SEG_ACCESS_NORM 0 /* Driver decision 0-1 */ /* * The total number of L2 queues, MSIX vectors and HW contexts (CIDs) is * control by the number of fast-path status blocks supported by the * device (HW/FW). Each fast-path status block (FP-SB) aka non-default * status block represents an independent interrupts context that can * serve a regular L2 networking queue. However special L2 queues such * as the FCoE queue do not require a FP-SB and other components like * the CNIC may consume FP-SB reducing the number of possible L2 queues * * If the maximum number of FP-SB available is X then: * a. If CNIC is supported it consumes 1 FP-SB thus the max number of * regular L2 queues is Y=X-1 * b. in MF mode the actual number of L2 queues is Y= (X-1/MF_factor) * c. If the FCoE L2 queue is supported the actual number of L2 queues * is Y+1 * d. The number of irqs (MSIX vectors) is either Y+1 (one extra for * slow-path interrupts) or Y+2 if CNIC is supported (one additional * FP interrupt context for the CNIC). * e. The number of HW context (CID count) is always X or X+1 if FCoE * L2 queue is supported. the cid for the FCoE L2 queue is always X. * * So this is quite simple for now as no ULPs are supported yet. :-) */ #define BXE_NUM_QUEUES(sc) ((sc)->num_queues) #define BXE_NUM_ETH_QUEUES(sc) BXE_NUM_QUEUES(sc) #define BXE_NUM_NON_CNIC_QUEUES(sc) BXE_NUM_QUEUES(sc) #define BXE_NUM_RX_QUEUES(sc) BXE_NUM_QUEUES(sc) #define FOR_EACH_QUEUE(sc, var) \ for ((var) = 0; (var) < BXE_NUM_QUEUES(sc); (var)++) #define FOR_EACH_NONDEFAULT_QUEUE(sc, var) \ for ((var) = 1; (var) < BXE_NUM_QUEUES(sc); (var)++) #define FOR_EACH_ETH_QUEUE(sc, var) \ for ((var) = 0; (var) < BXE_NUM_ETH_QUEUES(sc); (var)++) #define FOR_EACH_NONDEFAULT_ETH_QUEUE(sc, var) \ for ((var) = 1; (var) < BXE_NUM_ETH_QUEUES(sc); (var)++) #define FOR_EACH_COS_IN_TX_QUEUE(sc, var) \ for ((var) = 0; (var) < (sc)->max_cos; (var)++) #define FOR_EACH_CNIC_QUEUE(sc, var) \ for ((var) = BXE_NUM_ETH_QUEUES(sc); \ (var) < BXE_NUM_QUEUES(sc); \ (var)++) enum { OOO_IDX_OFFSET, FCOE_IDX_OFFSET, FWD_IDX_OFFSET, }; #define FCOE_IDX(sc) (BXE_NUM_NON_CNIC_QUEUES(sc) + FCOE_IDX_OFFSET) #define bxe_fcoe_fp(sc) (&sc->fp[FCOE_IDX(sc)]) #define bxe_fcoe(sc, var) (bxe_fcoe_fp(sc)->var) #define bxe_fcoe_inner_sp_obj(sc) (&sc->sp_objs[FCOE_IDX(sc)]) #define bxe_fcoe_sp_obj(sc, var) (bxe_fcoe_inner_sp_obj(sc)->var) #define bxe_fcoe_tx(sc, var) (bxe_fcoe_fp(sc)->txdata_ptr[FIRST_TX_COS_INDEX]->var) #define OOO_IDX(sc) (BXE_NUM_NON_CNIC_QUEUES(sc) + OOO_IDX_OFFSET) #define bxe_ooo_fp(sc) (&sc->fp[OOO_IDX(sc)]) #define bxe_ooo(sc, var) (bxe_ooo_fp(sc)->var) #define bxe_ooo_inner_sp_obj(sc) (&sc->sp_objs[OOO_IDX(sc)]) #define bxe_ooo_sp_obj(sc, var) (bxe_ooo_inner_sp_obj(sc)->var) #define FWD_IDX(sc) (BXE_NUM_NON_CNIC_QUEUES(sc) + FWD_IDX_OFFSET) #define bxe_fwd_fp(sc) (&sc->fp[FWD_IDX(sc)]) #define bxe_fwd(sc, var) (bxe_fwd_fp(sc)->var) #define bxe_fwd_inner_sp_obj(sc) (&sc->sp_objs[FWD_IDX(sc)]) #define bxe_fwd_sp_obj(sc, var) (bxe_fwd_inner_sp_obj(sc)->var) #define bxe_fwd_txdata(fp) (fp->txdata_ptr[FIRST_TX_COS_INDEX]) #define IS_ETH_FP(fp) ((fp)->index < BXE_NUM_ETH_QUEUES((fp)->sc)) #define IS_FCOE_FP(fp) ((fp)->index == FCOE_IDX((fp)->sc)) #define IS_FCOE_IDX(idx) ((idx) == FCOE_IDX(sc)) #define IS_FWD_FP(fp) ((fp)->index == FWD_IDX((fp)->sc)) #define IS_FWD_IDX(idx) ((idx) == FWD_IDX(sc)) #define IS_OOO_FP(fp) ((fp)->index == OOO_IDX((fp)->sc)) #define IS_OOO_IDX(idx) ((idx) == OOO_IDX(sc)) enum { BXE_PORT_QUERY_IDX, BXE_PF_QUERY_IDX, BXE_FCOE_QUERY_IDX, BXE_FIRST_QUEUE_QUERY_IDX, }; struct bxe_fw_stats_req { struct stats_query_header hdr; struct stats_query_entry query[FP_SB_MAX_E1x + BXE_FIRST_QUEUE_QUERY_IDX]; }; struct bxe_fw_stats_data { struct stats_counter storm_counters; struct per_port_stats port; struct per_pf_stats pf; //struct fcoe_statistics_params fcoe; struct per_queue_stats queue_stats[1]; }; /* IGU MSIX STATISTICS on 57712: 64 for VFs; 4 for PFs; 4 for Attentions */ #define BXE_IGU_STAS_MSG_VF_CNT 64 #define BXE_IGU_STAS_MSG_PF_CNT 4 #define MAX_DMAE_C 8 /* * For the main interface up/down code paths, a not-so-fine-grained CORE * mutex lock is used. Inside this code are various calls to kernel routines * that can cause a sleep to occur. Namely memory allocations and taskqueue * handling. If using an MTX lock we are *not* allowed to sleep but we can * with an SX lock. This define forces the CORE lock to use and SX lock. * Undefine this and an MTX lock will be used instead. Note that the IOCTL * path can cause problems since it's called by a non-sleepable thread. To * alleviate a potential sleep, any IOCTL processing that results in the * chip/interface being started/stopped/reinitialized, the actual work is * offloaded to a taskqueue. */ #define BXE_CORE_LOCK_SX /* * This is the slowpath data structure. It is mapped into non-paged memory * so that the hardware can access it's contents directly and must be page * aligned. */ struct bxe_slowpath { /* used by the DMAE command executer */ struct dmae_cmd dmae[MAX_DMAE_C]; /* statistics completion */ uint32_t stats_comp; /* firmware defined statistics blocks */ union mac_stats mac_stats; struct nig_stats nig_stats; struct host_port_stats port_stats; struct host_func_stats func_stats; //struct host_func_stats func_stats_base; /* DMAE completion value and data source/sink */ uint32_t wb_comp; uint32_t wb_data[4]; union { struct mac_configuration_cmd e1x; struct eth_classify_rules_ramrod_data e2; } mac_rdata; union { struct tstorm_eth_mac_filter_config e1x; struct eth_filter_rules_ramrod_data e2; } rx_mode_rdata; struct eth_rss_update_ramrod_data rss_rdata; union { struct mac_configuration_cmd e1; struct eth_multicast_rules_ramrod_data e2; } mcast_rdata; union { struct function_start_data func_start; struct flow_control_configuration pfc_config; /* for DCBX ramrod */ } func_rdata; /* Queue State related ramrods */ union { struct client_init_ramrod_data init_data; struct client_update_ramrod_data update_data; } q_rdata; /* * AFEX ramrod can not be a part of func_rdata union because these * events might arrive in parallel to other events from func_rdata. * If they were defined in the same union the data can get corrupted. */ struct afex_vif_list_ramrod_data func_afex_rdata; union drv_info_to_mcp drv_info_to_mcp; }; /* struct bxe_slowpath */ /* * Port specifc data structure. */ struct bxe_port { /* * Port Management Function (for 57711E only). * When this field is set the driver instance is * responsible for managing port specifc * configurations such as handling link attentions. */ uint32_t pmf; /* Ethernet maximum transmission unit. */ uint16_t ether_mtu; uint32_t link_config[ELINK_LINK_CONFIG_SIZE]; uint32_t ext_phy_config; /* Port feature config.*/ uint32_t config; /* Defines the features supported by the PHY. */ uint32_t supported[ELINK_LINK_CONFIG_SIZE]; /* Defines the features advertised by the PHY. */ uint32_t advertising[ELINK_LINK_CONFIG_SIZE]; #define ADVERTISED_10baseT_Half (1 << 1) #define ADVERTISED_10baseT_Full (1 << 2) #define ADVERTISED_100baseT_Half (1 << 3) #define ADVERTISED_100baseT_Full (1 << 4) #define ADVERTISED_1000baseT_Half (1 << 5) #define ADVERTISED_1000baseT_Full (1 << 6) #define ADVERTISED_TP (1 << 7) #define ADVERTISED_FIBRE (1 << 8) #define ADVERTISED_Autoneg (1 << 9) #define ADVERTISED_Asym_Pause (1 << 10) #define ADVERTISED_Pause (1 << 11) #define ADVERTISED_2500baseX_Full (1 << 15) #define ADVERTISED_10000baseT_Full (1 << 16) uint32_t phy_addr; /* Used to synchronize phy accesses. */ struct mtx phy_mtx; char phy_mtx_name[32]; #define BXE_PHY_LOCK(sc) mtx_lock(&sc->port.phy_mtx) #define BXE_PHY_UNLOCK(sc) mtx_unlock(&sc->port.phy_mtx) #define BXE_PHY_LOCK_ASSERT(sc) mtx_assert(&sc->port.phy_mtx, MA_OWNED) /* * MCP scratchpad address for port specific statistics. * The device is responsible for writing statistcss * back to the MCP for use with management firmware such * as UMP/NC-SI. */ uint32_t port_stx; struct nig_stats old_nig_stats; }; /* struct bxe_port */ struct bxe_mf_info { uint32_t mf_config[E1HVN_MAX]; uint32_t vnics_per_port; /* 1, 2 or 4 */ uint32_t multi_vnics_mode; /* can be set even if vnics_per_port = 1 */ uint32_t path_has_ovlan; /* MF mode in the path (can be different than the MF mode of the function */ #define IS_MULTI_VNIC(sc) ((sc)->devinfo.mf_info.multi_vnics_mode) #define VNICS_PER_PORT(sc) ((sc)->devinfo.mf_info.vnics_per_port) #define VNICS_PER_PATH(sc) \ ((sc)->devinfo.mf_info.vnics_per_port * \ ((CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 1 )) uint8_t min_bw[MAX_VNIC_NUM]; uint8_t max_bw[MAX_VNIC_NUM]; uint16_t ext_id; /* vnic outer vlan or VIF ID */ #define VALID_OVLAN(ovlan) ((ovlan) <= 4096) #define INVALID_VIF_ID 0xFFFF #define OVLAN(sc) ((sc)->devinfo.mf_info.ext_id) #define VIF_ID(sc) ((sc)->devinfo.mf_info.ext_id) uint16_t default_vlan; #define NIV_DEFAULT_VLAN(sc) ((sc)->devinfo.mf_info.default_vlan) uint8_t niv_allowed_priorities; #define NIV_ALLOWED_PRIORITIES(sc) ((sc)->devinfo.mf_info.niv_allowed_priorities) uint8_t niv_default_cos; #define NIV_DEFAULT_COS(sc) ((sc)->devinfo.mf_info.niv_default_cos) uint8_t niv_mba_enabled; enum mf_cfg_afex_vlan_mode afex_vlan_mode; #define AFEX_VLAN_MODE(sc) ((sc)->devinfo.mf_info.afex_vlan_mode) int afex_def_vlan_tag; uint32_t pending_max; uint16_t flags; #define MF_INFO_VALID_MAC 0x0001 uint8_t mf_mode; /* Switch-Dependent or Switch-Independent */ #define IS_MF(sc) \ (IS_MULTI_VNIC(sc) && \ ((sc)->devinfo.mf_info.mf_mode != 0)) #define IS_MF_SD(sc) \ (IS_MULTI_VNIC(sc) && \ ((sc)->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SD)) #define IS_MF_SI(sc) \ (IS_MULTI_VNIC(sc) && \ ((sc)->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SI)) #define IS_MF_AFEX(sc) \ (IS_MULTI_VNIC(sc) && \ ((sc)->devinfo.mf_info.mf_mode == MULTI_FUNCTION_AFEX)) #define IS_MF_SD_MODE(sc) IS_MF_SD(sc) #define IS_MF_SI_MODE(sc) IS_MF_SI(sc) #define IS_MF_AFEX_MODE(sc) IS_MF_AFEX(sc) uint32_t mf_protos_supported; #define MF_PROTO_SUPPORT_ETHERNET 0x1 #define MF_PROTO_SUPPORT_ISCSI 0x2 #define MF_PROTO_SUPPORT_FCOE 0x4 }; /* struct bxe_mf_info */ /* Device information data structure. */ struct bxe_devinfo { /* PCIe info */ uint16_t vendor_id; uint16_t device_id; uint16_t subvendor_id; uint16_t subdevice_id; /* * chip_id = 0b'CCCCCCCCCCCCCCCCRRRRMMMMMMMMBBBB' * C = Chip Number (bits 16-31) * R = Chip Revision (bits 12-15) * M = Chip Metal (bits 4-11) * B = Chip Bond ID (bits 0-3) */ uint32_t chip_id; #define CHIP_ID(sc) ((sc)->devinfo.chip_id & 0xffff0000) #define CHIP_NUM(sc) ((sc)->devinfo.chip_id >> 16) /* device ids */ #define CHIP_NUM_57710 0x164e #define CHIP_NUM_57711 0x164f #define CHIP_NUM_57711E 0x1650 #define CHIP_NUM_57712 0x1662 #define CHIP_NUM_57712_MF 0x1663 #define CHIP_NUM_57712_VF 0x166f #define CHIP_NUM_57800 0x168a #define CHIP_NUM_57800_MF 0x16a5 #define CHIP_NUM_57800_VF 0x16a9 #define CHIP_NUM_57810 0x168e #define CHIP_NUM_57810_MF 0x16ae #define CHIP_NUM_57810_VF 0x16af #define CHIP_NUM_57811 0x163d #define CHIP_NUM_57811_MF 0x163e #define CHIP_NUM_57811_VF 0x163f #define CHIP_NUM_57840_OBS 0x168d #define CHIP_NUM_57840_OBS_MF 0x16ab #define CHIP_NUM_57840_4_10 0x16a1 #define CHIP_NUM_57840_2_20 0x16a2 #define CHIP_NUM_57840_MF 0x16a4 #define CHIP_NUM_57840_VF 0x16ad #define CHIP_REV_SHIFT 12 #define CHIP_REV_MASK (0xF << CHIP_REV_SHIFT) #define CHIP_REV(sc) ((sc)->devinfo.chip_id & CHIP_REV_MASK) #define CHIP_REV_Ax (0x0 << CHIP_REV_SHIFT) #define CHIP_REV_Bx (0x1 << CHIP_REV_SHIFT) #define CHIP_REV_Cx (0x2 << CHIP_REV_SHIFT) #define CHIP_REV_IS_SLOW(sc) \ (CHIP_REV(sc) > 0x00005000) #define CHIP_REV_IS_FPGA(sc) \ (CHIP_REV_IS_SLOW(sc) && (CHIP_REV(sc) & 0x00001000)) #define CHIP_REV_IS_EMUL(sc) \ (CHIP_REV_IS_SLOW(sc) && !(CHIP_REV(sc) & 0x00001000)) #define CHIP_REV_IS_ASIC(sc) \ (!CHIP_REV_IS_SLOW(sc)) #define CHIP_METAL(sc) ((sc->devinfo.chip_id) & 0x00000ff0) #define CHIP_BOND_ID(sc) ((sc->devinfo.chip_id) & 0x0000000f) #define CHIP_IS_E1(sc) (CHIP_NUM(sc) == CHIP_NUM_57710) #define CHIP_IS_57710(sc) (CHIP_NUM(sc) == CHIP_NUM_57710) #define CHIP_IS_57711(sc) (CHIP_NUM(sc) == CHIP_NUM_57711) #define CHIP_IS_57711E(sc) (CHIP_NUM(sc) == CHIP_NUM_57711E) #define CHIP_IS_E1H(sc) ((CHIP_IS_57711(sc)) || \ (CHIP_IS_57711E(sc))) #define CHIP_IS_E1x(sc) (CHIP_IS_E1((sc)) || \ CHIP_IS_E1H((sc))) #define CHIP_IS_57712(sc) (CHIP_NUM(sc) == CHIP_NUM_57712) #define CHIP_IS_57712_MF(sc) (CHIP_NUM(sc) == CHIP_NUM_57712_MF) #define CHIP_IS_57712_VF(sc) (CHIP_NUM(sc) == CHIP_NUM_57712_VF) #define CHIP_IS_E2(sc) (CHIP_IS_57712(sc) || \ CHIP_IS_57712_MF(sc)) #define CHIP_IS_57800(sc) (CHIP_NUM(sc) == CHIP_NUM_57800) #define CHIP_IS_57800_MF(sc) (CHIP_NUM(sc) == CHIP_NUM_57800_MF) #define CHIP_IS_57800_VF(sc) (CHIP_NUM(sc) == CHIP_NUM_57800_VF) #define CHIP_IS_57810(sc) (CHIP_NUM(sc) == CHIP_NUM_57810) #define CHIP_IS_57810_MF(sc) (CHIP_NUM(sc) == CHIP_NUM_57810_MF) #define CHIP_IS_57810_VF(sc) (CHIP_NUM(sc) == CHIP_NUM_57810_VF) #define CHIP_IS_57811(sc) (CHIP_NUM(sc) == CHIP_NUM_57811) #define CHIP_IS_57811_MF(sc) (CHIP_NUM(sc) == CHIP_NUM_57811_MF) #define CHIP_IS_57811_VF(sc) (CHIP_NUM(sc) == CHIP_NUM_57811_VF) #define CHIP_IS_57840(sc) ((CHIP_NUM(sc) == CHIP_NUM_57840_OBS) || \ (CHIP_NUM(sc) == CHIP_NUM_57840_4_10) || \ (CHIP_NUM(sc) == CHIP_NUM_57840_2_20)) #define CHIP_IS_57840_MF(sc) ((CHIP_NUM(sc) == CHIP_NUM_57840_OBS_MF) || \ (CHIP_NUM(sc) == CHIP_NUM_57840_MF)) #define CHIP_IS_57840_VF(sc) (CHIP_NUM(sc) == CHIP_NUM_57840_VF) #define CHIP_IS_E3(sc) (CHIP_IS_57800(sc) || \ CHIP_IS_57800_MF(sc) || \ CHIP_IS_57800_VF(sc) || \ CHIP_IS_57810(sc) || \ CHIP_IS_57810_MF(sc) || \ CHIP_IS_57810_VF(sc) || \ CHIP_IS_57811(sc) || \ CHIP_IS_57811_MF(sc) || \ CHIP_IS_57811_VF(sc) || \ CHIP_IS_57840(sc) || \ CHIP_IS_57840_MF(sc) || \ CHIP_IS_57840_VF(sc)) #define CHIP_IS_E3A0(sc) (CHIP_IS_E3(sc) && \ (CHIP_REV(sc) == CHIP_REV_Ax)) #define CHIP_IS_E3B0(sc) (CHIP_IS_E3(sc) && \ (CHIP_REV(sc) == CHIP_REV_Bx)) #define USES_WARPCORE(sc) (CHIP_IS_E3(sc)) #define CHIP_IS_E2E3(sc) (CHIP_IS_E2(sc) || \ CHIP_IS_E3(sc)) #define CHIP_IS_MF_CAP(sc) (CHIP_IS_57711E(sc) || \ CHIP_IS_57712_MF(sc) || \ CHIP_IS_E3(sc)) #define IS_VF(sc) (CHIP_IS_57712_VF(sc) || \ CHIP_IS_57800_VF(sc) || \ CHIP_IS_57810_VF(sc) || \ CHIP_IS_57840_VF(sc)) #define IS_PF(sc) (!IS_VF(sc)) /* * This define is used in two main places: * 1. In the early stages of nic_load, to know if to configure Parser/Searcher * to nic-only mode or to offload mode. Offload mode is configured if either * the chip is E1x (where NIC_MODE register is not applicable), or if cnic * already registered for this port (which means that the user wants storage * services). * 2. During cnic-related load, to know if offload mode is already configured * in the HW or needs to be configrued. Since the transition from nic-mode to * offload-mode in HW causes traffic coruption, nic-mode is configured only * in ports on which storage services where never requested. */ #define CONFIGURE_NIC_MODE(sc) (!CHIP_IS_E1x(sc) && !CNIC_ENABLED(sc)) uint8_t chip_port_mode; #define CHIP_4_PORT_MODE 0x0 #define CHIP_2_PORT_MODE 0x1 #define CHIP_PORT_MODE_NONE 0x2 #define CHIP_PORT_MODE(sc) ((sc)->devinfo.chip_port_mode) #define CHIP_IS_MODE_4_PORT(sc) (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) uint8_t int_block; #define INT_BLOCK_HC 0 #define INT_BLOCK_IGU 1 #define INT_BLOCK_MODE_NORMAL 0 #define INT_BLOCK_MODE_BW_COMP 2 #define CHIP_INT_MODE_IS_NBC(sc) \ (!CHIP_IS_E1x(sc) && \ !((sc)->devinfo.int_block & INT_BLOCK_MODE_BW_COMP)) #define CHIP_INT_MODE_IS_BC(sc) (!CHIP_INT_MODE_IS_NBC(sc)) uint32_t shmem_base; uint32_t shmem2_base; uint32_t bc_ver; char bc_ver_str[32]; uint32_t mf_cfg_base; /* bootcode shmem address in BAR memory */ struct bxe_mf_info mf_info; int flash_size; #define NVRAM_1MB_SIZE 0x20000 #define NVRAM_TIMEOUT_COUNT 30000 #define NVRAM_PAGE_SIZE 256 /* PCIe capability information */ uint32_t pcie_cap_flags; #define BXE_PM_CAPABLE_FLAG 0x00000001 #define BXE_PCIE_CAPABLE_FLAG 0x00000002 #define BXE_MSI_CAPABLE_FLAG 0x00000004 #define BXE_MSIX_CAPABLE_FLAG 0x00000008 uint16_t pcie_pm_cap_reg; uint16_t pcie_pcie_cap_reg; //uint16_t pcie_devctl; uint16_t pcie_link_width; uint16_t pcie_link_speed; uint16_t pcie_msi_cap_reg; uint16_t pcie_msix_cap_reg; /* device configuration read from bootcode shared memory */ uint32_t hw_config; uint32_t hw_config2; }; /* struct bxe_devinfo */ struct bxe_sp_objs { struct ecore_vlan_mac_obj mac_obj; /* MACs object */ struct ecore_queue_sp_obj q_obj; /* Queue State object */ }; /* struct bxe_sp_objs */ /* * Data that will be used to create a link report message. We will keep the * data used for the last link report in order to prevent reporting the same * link parameters twice. */ struct bxe_link_report_data { uint16_t line_speed; /* Effective line speed */ unsigned long link_report_flags; /* BXE_LINK_REPORT_XXX flags */ }; enum { BXE_LINK_REPORT_FULL_DUPLEX, BXE_LINK_REPORT_LINK_DOWN, BXE_LINK_REPORT_RX_FC_ON, BXE_LINK_REPORT_TX_FC_ON }; /* Top level device private data structure. */ struct bxe_softc { /* * First entry must be a pointer to the BSD ifnet struct which * has a first element of 'void *if_softc' (which is us). XXX */ if_t ifp; struct ifmedia ifmedia; /* network interface media structure */ int media; volatile int state; /* device state */ #define BXE_STATE_CLOSED 0x0000 #define BXE_STATE_OPENING_WAITING_LOAD 0x1000 #define BXE_STATE_OPENING_WAITING_PORT 0x2000 #define BXE_STATE_OPEN 0x3000 #define BXE_STATE_CLOSING_WAITING_HALT 0x4000 #define BXE_STATE_CLOSING_WAITING_DELETE 0x5000 #define BXE_STATE_CLOSING_WAITING_UNLOAD 0x6000 #define BXE_STATE_DISABLED 0xD000 #define BXE_STATE_DIAG 0xE000 #define BXE_STATE_ERROR 0xF000 int flags; #define BXE_ONE_PORT_FLAG 0x00000001 #define BXE_NO_ISCSI 0x00000002 #define BXE_NO_FCOE 0x00000004 #define BXE_ONE_PORT(sc) (sc->flags & BXE_ONE_PORT_FLAG) //#define BXE_NO_WOL_FLAG 0x00000008 //#define BXE_USING_DAC_FLAG 0x00000010 //#define BXE_USING_MSIX_FLAG 0x00000020 //#define BXE_USING_MSI_FLAG 0x00000040 //#define BXE_DISABLE_MSI_FLAG 0x00000080 #define BXE_NO_MCP_FLAG 0x00000200 #define BXE_NOMCP(sc) (sc->flags & BXE_NO_MCP_FLAG) //#define BXE_SAFC_TX_FLAG 0x00000400 #define BXE_MF_FUNC_DIS 0x00000800 #define BXE_TX_SWITCHING 0x00001000 #define BXE_NO_PULSE 0x00002000 unsigned long debug; /* per-instance debug logging config */ #define MAX_BARS 5 struct bxe_bar bar[MAX_BARS]; /* map BARs 0, 2, 4 */ uint16_t doorbell_size; /* periodic timer callout */ #define PERIODIC_STOP 0 #define PERIODIC_GO 1 volatile unsigned long periodic_flags; struct callout periodic_callout; /* chip start/stop/reset taskqueue */ #define CHIP_TQ_NONE 0 #define CHIP_TQ_START 1 #define CHIP_TQ_STOP 2 #define CHIP_TQ_REINIT 3 volatile unsigned long chip_tq_flags; struct task chip_tq_task; struct taskqueue *chip_tq; char chip_tq_name[32]; struct timeout_task sp_err_timeout_task; /* slowpath interrupt taskqueue */ struct task sp_tq_task; struct taskqueue *sp_tq; char sp_tq_name[32]; struct bxe_fastpath fp[MAX_RSS_CHAINS]; struct bxe_sp_objs sp_objs[MAX_RSS_CHAINS]; device_t dev; /* parent device handle */ uint8_t unit; /* driver instance number */ int pcie_bus; /* PCIe bus number */ int pcie_device; /* PCIe device/slot number */ int pcie_func; /* PCIe function number */ uint8_t pfunc_rel; /* function relative */ uint8_t pfunc_abs; /* function absolute */ uint8_t path_id; /* function absolute */ #define SC_PATH(sc) (sc->path_id) #define SC_PORT(sc) (sc->pfunc_rel & 1) #define SC_FUNC(sc) (sc->pfunc_rel) #define SC_ABS_FUNC(sc) (sc->pfunc_abs) #define SC_VN(sc) (sc->pfunc_rel >> 1) #define SC_L_ID(sc) (SC_VN(sc) << 2) #define PORT_ID(sc) SC_PORT(sc) #define PATH_ID(sc) SC_PATH(sc) #define VNIC_ID(sc) SC_VN(sc) #define FUNC_ID(sc) SC_FUNC(sc) #define ABS_FUNC_ID(sc) SC_ABS_FUNC(sc) #define SC_FW_MB_IDX_VN(sc, vn) \ (SC_PORT(sc) + (vn) * \ ((CHIP_IS_E1x(sc) || (CHIP_IS_MODE_4_PORT(sc))) ? 2 : 1)) #define SC_FW_MB_IDX(sc) SC_FW_MB_IDX_VN(sc, SC_VN(sc)) int if_capen; /* enabled interface capabilities */ struct bxe_devinfo devinfo; char fw_ver_str[32]; char mf_mode_str[32]; char pci_link_str[32]; const struct iro *iro_array; #ifdef BXE_CORE_LOCK_SX struct sx core_sx; char core_sx_name[32]; #else struct mtx core_mtx; char core_mtx_name[32]; #endif struct mtx sp_mtx; char sp_mtx_name[32]; struct mtx dmae_mtx; char dmae_mtx_name[32]; struct mtx fwmb_mtx; char fwmb_mtx_name[32]; struct mtx print_mtx; char print_mtx_name[32]; struct mtx stats_mtx; char stats_mtx_name[32]; struct mtx mcast_mtx; char mcast_mtx_name[32]; #ifdef BXE_CORE_LOCK_SX #define BXE_CORE_TRYLOCK(sc) sx_try_xlock(&sc->core_sx) #define BXE_CORE_LOCK(sc) sx_xlock(&sc->core_sx) #define BXE_CORE_UNLOCK(sc) sx_xunlock(&sc->core_sx) #define BXE_CORE_LOCK_ASSERT(sc) sx_assert(&sc->core_sx, SA_XLOCKED) #else #define BXE_CORE_TRYLOCK(sc) mtx_trylock(&sc->core_mtx) #define BXE_CORE_LOCK(sc) mtx_lock(&sc->core_mtx) #define BXE_CORE_UNLOCK(sc) mtx_unlock(&sc->core_mtx) #define BXE_CORE_LOCK_ASSERT(sc) mtx_assert(&sc->core_mtx, MA_OWNED) #endif #define BXE_SP_LOCK(sc) mtx_lock(&sc->sp_mtx) #define BXE_SP_UNLOCK(sc) mtx_unlock(&sc->sp_mtx) #define BXE_SP_LOCK_ASSERT(sc) mtx_assert(&sc->sp_mtx, MA_OWNED) #define BXE_DMAE_LOCK(sc) mtx_lock(&sc->dmae_mtx) #define BXE_DMAE_UNLOCK(sc) mtx_unlock(&sc->dmae_mtx) #define BXE_DMAE_LOCK_ASSERT(sc) mtx_assert(&sc->dmae_mtx, MA_OWNED) #define BXE_FWMB_LOCK(sc) mtx_lock(&sc->fwmb_mtx) #define BXE_FWMB_UNLOCK(sc) mtx_unlock(&sc->fwmb_mtx) #define BXE_FWMB_LOCK_ASSERT(sc) mtx_assert(&sc->fwmb_mtx, MA_OWNED) #define BXE_PRINT_LOCK(sc) mtx_lock(&sc->print_mtx) #define BXE_PRINT_UNLOCK(sc) mtx_unlock(&sc->print_mtx) #define BXE_PRINT_LOCK_ASSERT(sc) mtx_assert(&sc->print_mtx, MA_OWNED) #define BXE_STATS_LOCK(sc) mtx_lock(&sc->stats_mtx) #define BXE_STATS_UNLOCK(sc) mtx_unlock(&sc->stats_mtx) #define BXE_STATS_LOCK_ASSERT(sc) mtx_assert(&sc->stats_mtx, MA_OWNED) #define BXE_MCAST_LOCK(sc) mtx_lock(&sc->mcast_mtx); #define BXE_MCAST_UNLOCK(sc) mtx_unlock(&sc->mcast_mtx); #define BXE_MCAST_LOCK_ASSERT(sc) mtx_assert(&sc->mcast_mtx, MA_OWNED) int dmae_ready; #define DMAE_READY(sc) (sc->dmae_ready) struct ecore_credit_pool_obj vlans_pool; struct ecore_credit_pool_obj macs_pool; struct ecore_rx_mode_obj rx_mode_obj; struct ecore_mcast_obj mcast_obj; struct ecore_rss_config_obj rss_conf_obj; struct ecore_func_sp_obj func_obj; uint16_t fw_seq; uint16_t fw_drv_pulse_wr_seq; uint32_t func_stx; struct elink_params link_params; struct elink_vars link_vars; uint32_t link_cnt; struct bxe_link_report_data last_reported_link; char mac_addr_str[32]; int last_reported_link_state; int tx_ring_size; int rx_ring_size; int wol; int is_leader; int recovery_state; #define BXE_RECOVERY_DONE 1 #define BXE_RECOVERY_INIT 2 #define BXE_RECOVERY_WAIT 3 #define BXE_RECOVERY_FAILED 4 #define BXE_RECOVERY_NIC_LOADING 5 #define BXE_ERR_TXQ_STUCK 0x1 /* Tx queue stuck detected by driver. */ #define BXE_ERR_MISC 0x2 /* MISC ERR */ #define BXE_ERR_PARITY 0x4 /* Parity error detected. */ #define BXE_ERR_STATS_TO 0x8 /* Statistics timeout detected. */ #define BXE_ERR_MC_ASSERT 0x10 /* MC assert attention received. */ #define BXE_ERR_PANIC 0x20 /* Driver asserted. */ #define BXE_ERR_MCP_ASSERT 0x40 /* MCP assert attention received. No Recovery*/ #define BXE_ERR_GLOBAL 0x80 /* PCIe/PXP/IGU/MISC/NIG device blocks error- needs PCIe/Fundamental reset */ uint32_t error_status; uint32_t rx_mode; #define BXE_RX_MODE_NONE 0 #define BXE_RX_MODE_NORMAL 1 #define BXE_RX_MODE_ALLMULTI 2 #define BXE_RX_MODE_PROMISC 3 #define BXE_MAX_MULTICAST 64 struct bxe_port port; struct cmng_init cmng; /* user configs */ int num_queues; int max_rx_bufs; int hc_rx_ticks; int hc_tx_ticks; int rx_budget; int max_aggregation_size; int mrrs; int autogreeen; #define AUTO_GREEN_HW_DEFAULT 0 #define AUTO_GREEN_FORCE_ON 1 #define AUTO_GREEN_FORCE_OFF 2 int interrupt_mode; #define INTR_MODE_INTX 0 #define INTR_MODE_MSI 1 #define INTR_MODE_MSIX 2 int udp_rss; /* interrupt allocations */ struct bxe_intr intr[MAX_RSS_CHAINS+1]; int intr_count; uint8_t igu_dsb_id; uint8_t igu_base_sb; uint8_t igu_sb_cnt; //uint8_t min_msix_vec_cnt; uint32_t igu_base_addr; //bus_addr_t def_status_blk_mapping; uint8_t base_fw_ndsb; #define DEF_SB_IGU_ID 16 #define DEF_SB_ID HC_SP_SB_ID /* parent bus DMA tag */ bus_dma_tag_t parent_dma_tag; /* default status block */ struct bxe_dma def_sb_dma; struct host_sp_status_block *def_sb; uint16_t def_idx; uint16_t def_att_idx; uint32_t attn_state; struct attn_route attn_group[MAX_DYNAMIC_ATTN_GRPS]; /* general SP events - stats query, cfc delete, etc */ #define HC_SP_INDEX_ETH_DEF_CONS 3 /* EQ completions */ #define HC_SP_INDEX_EQ_CONS 7 /* FCoE L2 connection completions */ #define HC_SP_INDEX_ETH_FCOE_TX_CQ_CONS 6 #define HC_SP_INDEX_ETH_FCOE_RX_CQ_CONS 4 /* iSCSI L2 */ #define HC_SP_INDEX_ETH_ISCSI_CQ_CONS 5 #define HC_SP_INDEX_ETH_ISCSI_RX_CQ_CONS 1 /* event queue */ struct bxe_dma eq_dma; union event_ring_elem *eq; uint16_t eq_prod; uint16_t eq_cons; uint16_t *eq_cons_sb; #define NUM_EQ_PAGES 1 /* must be a power of 2 */ #define EQ_DESC_CNT_PAGE (BCM_PAGE_SIZE / sizeof(union event_ring_elem)) #define EQ_DESC_MAX_PAGE (EQ_DESC_CNT_PAGE - 1) #define NUM_EQ_DESC (EQ_DESC_CNT_PAGE * NUM_EQ_PAGES) #define EQ_DESC_MASK (NUM_EQ_DESC - 1) #define MAX_EQ_AVAIL (EQ_DESC_MAX_PAGE * NUM_EQ_PAGES - 2) /* depends on EQ_DESC_CNT_PAGE being a power of 2 */ #define NEXT_EQ_IDX(x) \ ((((x) & EQ_DESC_MAX_PAGE) == (EQ_DESC_MAX_PAGE - 1)) ? \ ((x) + 2) : ((x) + 1)) /* depends on the above and on NUM_EQ_PAGES being a power of 2 */ #define EQ_DESC(x) ((x) & EQ_DESC_MASK) /* slow path */ struct bxe_dma sp_dma; struct bxe_slowpath *sp; unsigned long sp_state; /* slow path queue */ struct bxe_dma spq_dma; struct eth_spe *spq; #define SP_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_spe)) #define MAX_SP_DESC_CNT (SP_DESC_CNT - 1) #define MAX_SPQ_PENDING 8 uint16_t spq_prod_idx; struct eth_spe *spq_prod_bd; struct eth_spe *spq_last_bd; uint16_t *dsb_sp_prod; //uint16_t *spq_hw_con; //uint16_t spq_left; volatile unsigned long eq_spq_left; /* COMMON_xxx ramrod credit */ volatile unsigned long cq_spq_left; /* ETH_xxx ramrod credit */ /* fw decompression buffer */ struct bxe_dma gz_buf_dma; void *gz_buf; z_streamp gz_strm; uint32_t gz_outlen; #define GUNZIP_BUF(sc) (sc->gz_buf) #define GUNZIP_OUTLEN(sc) (sc->gz_outlen) #define GUNZIP_PHYS(sc) (sc->gz_buf_dma.paddr) #define FW_BUF_SIZE 0x40000 const struct raw_op *init_ops; const uint16_t *init_ops_offsets; /* init block offsets inside init_ops */ const uint32_t *init_data; /* data blob, 32 bit granularity */ uint32_t init_mode_flags; #define INIT_MODE_FLAGS(sc) (sc->init_mode_flags) /* PRAM blobs - raw data */ const uint8_t *tsem_int_table_data; const uint8_t *tsem_pram_data; const uint8_t *usem_int_table_data; const uint8_t *usem_pram_data; const uint8_t *xsem_int_table_data; const uint8_t *xsem_pram_data; const uint8_t *csem_int_table_data; const uint8_t *csem_pram_data; #define INIT_OPS(sc) (sc->init_ops) #define INIT_OPS_OFFSETS(sc) (sc->init_ops_offsets) #define INIT_DATA(sc) (sc->init_data) #define INIT_TSEM_INT_TABLE_DATA(sc) (sc->tsem_int_table_data) #define INIT_TSEM_PRAM_DATA(sc) (sc->tsem_pram_data) #define INIT_USEM_INT_TABLE_DATA(sc) (sc->usem_int_table_data) #define INIT_USEM_PRAM_DATA(sc) (sc->usem_pram_data) #define INIT_XSEM_INT_TABLE_DATA(sc) (sc->xsem_int_table_data) #define INIT_XSEM_PRAM_DATA(sc) (sc->xsem_pram_data) #define INIT_CSEM_INT_TABLE_DATA(sc) (sc->csem_int_table_data) #define INIT_CSEM_PRAM_DATA(sc) (sc->csem_pram_data) /* ILT * For max 196 cids (64*3 + non-eth), 32KB ILT page size and 1KB * context size we need 8 ILT entries. */ #define ILT_MAX_L2_LINES 8 struct hw_context context[ILT_MAX_L2_LINES]; struct ecore_ilt *ilt; #define ILT_MAX_LINES 256 /* max supported number of RSS queues: IGU SBs minus one for CNIC */ #define BXE_MAX_RSS_COUNT(sc) ((sc)->igu_sb_cnt - CNIC_SUPPORT(sc)) /* max CID count: Max RSS * Max_Tx_Multi_Cos + FCoE + iSCSI */ #if 1 #define BXE_L2_MAX_CID(sc) \ (BXE_MAX_RSS_COUNT(sc) * ECORE_MULTI_TX_COS + 2 * CNIC_SUPPORT(sc)) #else #define BXE_L2_MAX_CID(sc) /* OOO + FWD */ \ (BXE_MAX_RSS_COUNT(sc) * ECORE_MULTI_TX_COS + 4 * CNIC_SUPPORT(sc)) #endif #if 1 #define BXE_L2_CID_COUNT(sc) \ (BXE_NUM_ETH_QUEUES(sc) * ECORE_MULTI_TX_COS + 2 * CNIC_SUPPORT(sc)) #else #define BXE_L2_CID_COUNT(sc) /* OOO + FWD */ \ (BXE_NUM_ETH_QUEUES(sc) * ECORE_MULTI_TX_COS + 4 * CNIC_SUPPORT(sc)) #endif #define L2_ILT_LINES(sc) \ (DIV_ROUND_UP(BXE_L2_CID_COUNT(sc), ILT_PAGE_CIDS)) int qm_cid_count; uint8_t dropless_fc; /* total number of FW statistics requests */ uint8_t fw_stats_num; /* * This is a memory buffer that will contain both statistics ramrod * request and data. */ struct bxe_dma fw_stats_dma; /* * FW statistics request shortcut (points at the beginning of fw_stats * buffer). */ int fw_stats_req_size; struct bxe_fw_stats_req *fw_stats_req; bus_addr_t fw_stats_req_mapping; /* * FW statistics data shortcut (points at the beginning of fw_stats * buffer + fw_stats_req_size). */ int fw_stats_data_size; struct bxe_fw_stats_data *fw_stats_data; bus_addr_t fw_stats_data_mapping; /* tracking a pending STAT_QUERY ramrod */ uint16_t stats_pending; /* number of completed statistics ramrods */ uint16_t stats_comp; uint16_t stats_counter; uint8_t stats_init; int stats_state; struct bxe_eth_stats eth_stats; struct host_func_stats func_stats; struct bxe_eth_stats_old eth_stats_old; struct bxe_net_stats_old net_stats_old; struct bxe_fw_port_stats_old fw_stats_old; struct dmae_cmd stats_dmae; /* used by dmae command loader */ int executer_idx; int mtu; /* LLDP params */ struct bxe_config_lldp_params lldp_config_params; /* DCB support on/off */ int dcb_state; #define BXE_DCB_STATE_OFF 0 #define BXE_DCB_STATE_ON 1 /* DCBX engine mode */ int dcbx_enabled; #define BXE_DCBX_ENABLED_OFF 0 #define BXE_DCBX_ENABLED_ON_NEG_OFF 1 #define BXE_DCBX_ENABLED_ON_NEG_ON 2 #define BXE_DCBX_ENABLED_INVALID -1 uint8_t dcbx_mode_uset; struct bxe_config_dcbx_params dcbx_config_params; struct bxe_dcbx_port_params dcbx_port_params; int dcb_version; uint8_t cnic_support; uint8_t cnic_enabled; uint8_t cnic_loaded; #define CNIC_SUPPORT(sc) 0 /* ((sc)->cnic_support) */ #define CNIC_ENABLED(sc) 0 /* ((sc)->cnic_enabled) */ #define CNIC_LOADED(sc) 0 /* ((sc)->cnic_loaded) */ /* multiple tx classes of service */ uint8_t max_cos; #define BXE_MAX_PRIORITY 8 /* priority to cos mapping */ uint8_t prio_to_cos[BXE_MAX_PRIORITY]; int panic; struct cdev *ioctl_dev; void *grc_dump; unsigned int trigger_grcdump; unsigned int grcdump_done; unsigned int grcdump_started; int bxe_pause_param; void *eeprom; }; /* struct bxe_softc */ /* IOCTL sub-commands for edebug and firmware upgrade */ #define BXE_IOC_RD_NVRAM 1 #define BXE_IOC_WR_NVRAM 2 #define BXE_IOC_STATS_SHOW_NUM 3 #define BXE_IOC_STATS_SHOW_STR 4 #define BXE_IOC_STATS_SHOW_CNT 5 struct bxe_nvram_data { uint32_t op; /* ioctl sub-command */ uint32_t offset; uint32_t len; uint32_t value[1]; /* variable */ }; union bxe_stats_show_data { uint32_t op; /* ioctl sub-command */ struct { uint32_t num; /* return number of stats */ uint32_t len; /* length of each string item */ } desc; /* variable length... */ char str[1]; /* holds names of desc.num stats, each desc.len in length */ /* variable length... */ uint64_t stats[1]; /* holds all stats */ }; /* function init flags */ #define FUNC_FLG_RSS 0x0001 #define FUNC_FLG_STATS 0x0002 /* FUNC_FLG_UNMATCHED 0x0004 */ #define FUNC_FLG_TPA 0x0008 #define FUNC_FLG_SPQ 0x0010 #define FUNC_FLG_LEADING 0x0020 /* PF only */ struct bxe_func_init_params { bus_addr_t fw_stat_map; /* (dma) valid if FUNC_FLG_STATS */ bus_addr_t spq_map; /* (dma) valid if FUNC_FLG_SPQ */ uint16_t func_flgs; uint16_t func_id; /* abs function id */ uint16_t pf_id; uint16_t spq_prod; /* valid if FUNC_FLG_SPQ */ }; /* memory resources reside at BARs 0, 2, 4 */ /* Run `pciconf -lb` to see mappings */ #define BAR0 0 #define BAR1 2 #define BAR2 4 #ifdef BXE_REG_NO_INLINE uint8_t bxe_reg_read8(struct bxe_softc *sc, bus_size_t offset); uint16_t bxe_reg_read16(struct bxe_softc *sc, bus_size_t offset); uint32_t bxe_reg_read32(struct bxe_softc *sc, bus_size_t offset); void bxe_reg_write8(struct bxe_softc *sc, bus_size_t offset, uint8_t val); void bxe_reg_write16(struct bxe_softc *sc, bus_size_t offset, uint16_t val); void bxe_reg_write32(struct bxe_softc *sc, bus_size_t offset, uint32_t val); #define REG_RD8(sc, offset) bxe_reg_read8(sc, offset) #define REG_RD16(sc, offset) bxe_reg_read16(sc, offset) #define REG_RD32(sc, offset) bxe_reg_read32(sc, offset) #define REG_WR8(sc, offset, val) bxe_reg_write8(sc, offset, val) #define REG_WR16(sc, offset, val) bxe_reg_write16(sc, offset, val) #define REG_WR32(sc, offset, val) bxe_reg_write32(sc, offset, val) #else /* not BXE_REG_NO_INLINE */ #define REG_WR8(sc, offset, val) \ bus_space_write_1(sc->bar[BAR0].tag, \ sc->bar[BAR0].handle, \ offset, val) #define REG_WR16(sc, offset, val) \ bus_space_write_2(sc->bar[BAR0].tag, \ sc->bar[BAR0].handle, \ offset, val) #define REG_WR32(sc, offset, val) \ bus_space_write_4(sc->bar[BAR0].tag, \ sc->bar[BAR0].handle, \ offset, val) #define REG_RD8(sc, offset) \ bus_space_read_1(sc->bar[BAR0].tag, \ sc->bar[BAR0].handle, \ offset) #define REG_RD16(sc, offset) \ bus_space_read_2(sc->bar[BAR0].tag, \ sc->bar[BAR0].handle, \ offset) #define REG_RD32(sc, offset) \ bus_space_read_4(sc->bar[BAR0].tag, \ sc->bar[BAR0].handle, \ offset) #endif /* BXE_REG_NO_INLINE */ #define REG_RD(sc, offset) REG_RD32(sc, offset) #define REG_WR(sc, offset, val) REG_WR32(sc, offset, val) #define REG_RD_IND(sc, offset) bxe_reg_rd_ind(sc, offset) #define REG_WR_IND(sc, offset, val) bxe_reg_wr_ind(sc, offset, val) #define BXE_SP(sc, var) (&(sc)->sp->var) #define BXE_SP_MAPPING(sc, var) \ (sc->sp_dma.paddr + offsetof(struct bxe_slowpath, var)) #define BXE_FP(sc, nr, var) ((sc)->fp[(nr)].var) #define BXE_SP_OBJ(sc, fp) ((sc)->sp_objs[(fp)->index]) #define REG_RD_DMAE(sc, offset, valp, len32) \ do { \ bxe_read_dmae(sc, offset, len32); \ memcpy(valp, BXE_SP(sc, wb_data[0]), (len32) * 4); \ } while (0) #define REG_WR_DMAE(sc, offset, valp, len32) \ do { \ memcpy(BXE_SP(sc, wb_data[0]), valp, (len32) * 4); \ bxe_write_dmae(sc, BXE_SP_MAPPING(sc, wb_data), offset, len32); \ } while (0) #define REG_WR_DMAE_LEN(sc, offset, valp, len32) \ REG_WR_DMAE(sc, offset, valp, len32) #define REG_RD_DMAE_LEN(sc, offset, valp, len32) \ REG_RD_DMAE(sc, offset, valp, len32) #define VIRT_WR_DMAE_LEN(sc, data, addr, len32, le32_swap) \ do { \ /* if (le32_swap) { */ \ /* BLOGW(sc, "VIRT_WR_DMAE_LEN with le32_swap=1\n"); */ \ /* } */ \ memcpy(GUNZIP_BUF(sc), data, len32 * 4); \ ecore_write_big_buf_wb(sc, addr, len32); \ } while (0) #define BXE_DB_MIN_SHIFT 3 /* 8 bytes */ #define BXE_DB_SHIFT 7 /* 128 bytes */ #if (BXE_DB_SHIFT < BXE_DB_MIN_SHIFT) #error "Minimum DB doorbell stride is 8" #endif #define DPM_TRIGGER_TYPE 0x40 #define DOORBELL(sc, cid, val) \ do { \ bus_space_write_4(sc->bar[BAR1].tag, sc->bar[BAR1].handle, \ ((sc->doorbell_size * (cid)) + DPM_TRIGGER_TYPE), \ (uint32_t)val); \ } while(0) #define SHMEM_ADDR(sc, field) \ (sc->devinfo.shmem_base + offsetof(struct shmem_region, field)) #define SHMEM_RD(sc, field) REG_RD(sc, SHMEM_ADDR(sc, field)) #define SHMEM_RD16(sc, field) REG_RD16(sc, SHMEM_ADDR(sc, field)) #define SHMEM_WR(sc, field, val) REG_WR(sc, SHMEM_ADDR(sc, field), val) #define SHMEM2_ADDR(sc, field) \ (sc->devinfo.shmem2_base + offsetof(struct shmem2_region, field)) #define SHMEM2_HAS(sc, field) \ (sc->devinfo.shmem2_base && (REG_RD(sc, SHMEM2_ADDR(sc, size)) > \ offsetof(struct shmem2_region, field))) #define SHMEM2_RD(sc, field) REG_RD(sc, SHMEM2_ADDR(sc, field)) #define SHMEM2_WR(sc, field, val) REG_WR(sc, SHMEM2_ADDR(sc, field), val) #define MFCFG_ADDR(sc, field) \ (sc->devinfo.mf_cfg_base + offsetof(struct mf_cfg, field)) #define MFCFG_RD(sc, field) REG_RD(sc, MFCFG_ADDR(sc, field)) #define MFCFG_RD16(sc, field) REG_RD16(sc, MFCFG_ADDR(sc, field)) #define MFCFG_WR(sc, field, val) REG_WR(sc, MFCFG_ADDR(sc, field), val) /* DMAE command defines */ #define DMAE_TIMEOUT -1 #define DMAE_PCI_ERROR -2 /* E2 and onward */ #define DMAE_NOT_RDY -3 #define DMAE_PCI_ERR_FLAG 0x80000000 #define DMAE_SRC_PCI 0 #define DMAE_SRC_GRC 1 #define DMAE_DST_NONE 0 #define DMAE_DST_PCI 1 #define DMAE_DST_GRC 2 #define DMAE_COMP_PCI 0 #define DMAE_COMP_GRC 1 #define DMAE_COMP_REGULAR 0 #define DMAE_COM_SET_ERR 1 #define DMAE_CMD_SRC_PCI (DMAE_SRC_PCI << DMAE_CMD_SRC_SHIFT) #define DMAE_CMD_SRC_GRC (DMAE_SRC_GRC << DMAE_CMD_SRC_SHIFT) #define DMAE_CMD_DST_PCI (DMAE_DST_PCI << DMAE_CMD_DST_SHIFT) #define DMAE_CMD_DST_GRC (DMAE_DST_GRC << DMAE_CMD_DST_SHIFT) #define DMAE_CMD_C_DST_PCI (DMAE_COMP_PCI << DMAE_CMD_C_DST_SHIFT) #define DMAE_CMD_C_DST_GRC (DMAE_COMP_GRC << DMAE_CMD_C_DST_SHIFT) #define DMAE_CMD_ENDIANITY_NO_SWAP (0 << DMAE_CMD_ENDIANITY_SHIFT) #define DMAE_CMD_ENDIANITY_B_SWAP (1 << DMAE_CMD_ENDIANITY_SHIFT) #define DMAE_CMD_ENDIANITY_DW_SWAP (2 << DMAE_CMD_ENDIANITY_SHIFT) #define DMAE_CMD_ENDIANITY_B_DW_SWAP (3 << DMAE_CMD_ENDIANITY_SHIFT) #define DMAE_CMD_PORT_0 0 #define DMAE_CMD_PORT_1 DMAE_CMD_PORT #define DMAE_SRC_PF 0 #define DMAE_SRC_VF 1 #define DMAE_DST_PF 0 #define DMAE_DST_VF 1 #define DMAE_C_SRC 0 #define DMAE_C_DST 1 #define DMAE_LEN32_RD_MAX 0x80 #define DMAE_LEN32_WR_MAX(sc) (CHIP_IS_E1(sc) ? 0x400 : 0x2000) #define DMAE_COMP_VAL 0x60d0d0ae /* E2 and beyond, upper bit indicates error */ #define MAX_DMAE_C_PER_PORT 8 #define INIT_DMAE_C(sc) ((SC_PORT(sc) * MAX_DMAE_C_PER_PORT) + SC_VN(sc)) #define PMF_DMAE_C(sc) ((SC_PORT(sc) * MAX_DMAE_C_PER_PORT) + E1HVN_MAX) static const uint32_t dmae_reg_go_c[] = { DMAE_REG_GO_C0, DMAE_REG_GO_C1, DMAE_REG_GO_C2, DMAE_REG_GO_C3, DMAE_REG_GO_C4, DMAE_REG_GO_C5, DMAE_REG_GO_C6, DMAE_REG_GO_C7, DMAE_REG_GO_C8, DMAE_REG_GO_C9, DMAE_REG_GO_C10, DMAE_REG_GO_C11, DMAE_REG_GO_C12, DMAE_REG_GO_C13, DMAE_REG_GO_C14, DMAE_REG_GO_C15 }; #define ATTN_NIG_FOR_FUNC (1L << 8) #define ATTN_SW_TIMER_4_FUNC (1L << 9) #define GPIO_2_FUNC (1L << 10) #define GPIO_3_FUNC (1L << 11) #define GPIO_4_FUNC (1L << 12) #define ATTN_GENERAL_ATTN_1 (1L << 13) #define ATTN_GENERAL_ATTN_2 (1L << 14) #define ATTN_GENERAL_ATTN_3 (1L << 15) #define ATTN_GENERAL_ATTN_4 (1L << 13) #define ATTN_GENERAL_ATTN_5 (1L << 14) #define ATTN_GENERAL_ATTN_6 (1L << 15) #define ATTN_HARD_WIRED_MASK 0xff00 #define ATTENTION_ID 4 #define AEU_IN_ATTN_BITS_PXPPCICLOCKCLIENT_PARITY_ERROR \ AEU_INPUTS_ATTN_BITS_PXPPCICLOCKCLIENT_PARITY_ERROR #define MAX_IGU_ATTN_ACK_TO 100 #define STORM_ASSERT_ARRAY_SIZE 50 #define BXE_PMF_LINK_ASSERT(sc) \ GENERAL_ATTEN_OFFSET(LINK_SYNC_ATTENTION_BIT_FUNC_0 + SC_FUNC(sc)) #define BXE_MC_ASSERT_BITS \ (GENERAL_ATTEN_OFFSET(TSTORM_FATAL_ASSERT_ATTENTION_BIT) | \ GENERAL_ATTEN_OFFSET(USTORM_FATAL_ASSERT_ATTENTION_BIT) | \ GENERAL_ATTEN_OFFSET(CSTORM_FATAL_ASSERT_ATTENTION_BIT) | \ GENERAL_ATTEN_OFFSET(XSTORM_FATAL_ASSERT_ATTENTION_BIT)) #define BXE_MCP_ASSERT \ GENERAL_ATTEN_OFFSET(MCP_FATAL_ASSERT_ATTENTION_BIT) #define BXE_GRC_TIMEOUT GENERAL_ATTEN_OFFSET(LATCHED_ATTN_TIMEOUT_GRC) #define BXE_GRC_RSV (GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCR) | \ GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCT) | \ GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCN) | \ GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCU) | \ GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCP) | \ GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RSVD_GRC)) #define MULTI_MASK 0x7f #define PFS_PER_PORT(sc) \ ((CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4) #define SC_MAX_VN_NUM(sc) PFS_PER_PORT(sc) #define FIRST_ABS_FUNC_IN_PORT(sc) \ ((CHIP_PORT_MODE(sc) == CHIP_PORT_MODE_NONE) ? \ PORT_ID(sc) : (PATH_ID(sc) + (2 * PORT_ID(sc)))) #define FOREACH_ABS_FUNC_IN_PORT(sc, i) \ for ((i) = FIRST_ABS_FUNC_IN_PORT(sc); \ (i) < MAX_FUNC_NUM; \ (i) += (MAX_FUNC_NUM / PFS_PER_PORT(sc))) #define BXE_SWCID_SHIFT 17 #define BXE_SWCID_MASK ((0x1 << BXE_SWCID_SHIFT) - 1) #define SW_CID(x) (le32toh(x) & BXE_SWCID_MASK) #define CQE_CMD(x) (le32toh(x) >> COMMON_RAMROD_ETH_RX_CQE_CMD_ID_SHIFT) #define CQE_TYPE(cqe_fp_flags) ((cqe_fp_flags) & ETH_FAST_PATH_RX_CQE_TYPE) #define CQE_TYPE_START(cqe_type) ((cqe_type) == RX_ETH_CQE_TYPE_ETH_START_AGG) #define CQE_TYPE_STOP(cqe_type) ((cqe_type) == RX_ETH_CQE_TYPE_ETH_STOP_AGG) #define CQE_TYPE_SLOW(cqe_type) ((cqe_type) == RX_ETH_CQE_TYPE_ETH_RAMROD) #define CQE_TYPE_FAST(cqe_type) ((cqe_type) == RX_ETH_CQE_TYPE_ETH_FASTPATH) /* must be used on a CID before placing it on a HW ring */ #define HW_CID(sc, x) \ ((SC_PORT(sc) << 23) | (SC_VN(sc) << BXE_SWCID_SHIFT) | (x)) #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 #define SPEED_2500 2500 #define SPEED_10000 10000 #define PCI_PM_D0 1 #define PCI_PM_D3hot 2 #ifndef DUPLEX_UNKNOWN #define DUPLEX_UNKNOWN (0xff) #endif #ifndef SPEED_UNKNOWN #define SPEED_UNKNOWN (-1) #endif /* Enable or disable autonegotiation. */ #define AUTONEG_DISABLE 0x00 #define AUTONEG_ENABLE 0x01 /* Which connector port. */ #define PORT_TP 0x00 #define PORT_AUI 0x01 #define PORT_MII 0x02 #define PORT_FIBRE 0x03 #define PORT_BNC 0x04 #define PORT_DA 0x05 #define PORT_NONE 0xef #define PORT_OTHER 0xff int bxe_test_bit(int nr, volatile unsigned long * addr); void bxe_set_bit(unsigned int nr, volatile unsigned long * addr); void bxe_clear_bit(int nr, volatile unsigned long * addr); int bxe_test_and_set_bit(int nr, volatile unsigned long * addr); int bxe_test_and_clear_bit(int nr, volatile unsigned long * addr); int bxe_cmpxchg(volatile int *addr, int old, int new); void bxe_reg_wr_ind(struct bxe_softc *sc, uint32_t addr, uint32_t val); uint32_t bxe_reg_rd_ind(struct bxe_softc *sc, uint32_t addr); int bxe_dma_alloc(struct bxe_softc *sc, bus_size_t size, struct bxe_dma *dma, const char *msg); void bxe_dma_free(struct bxe_softc *sc, struct bxe_dma *dma); uint32_t bxe_dmae_opcode_add_comp(uint32_t opcode, uint8_t comp_type); uint32_t bxe_dmae_opcode_clr_src_reset(uint32_t opcode); uint32_t bxe_dmae_opcode(struct bxe_softc *sc, uint8_t src_type, uint8_t dst_type, uint8_t with_comp, uint8_t comp_type); void bxe_post_dmae(struct bxe_softc *sc, struct dmae_cmd *dmae, int idx); void bxe_read_dmae(struct bxe_softc *sc, uint32_t src_addr, uint32_t len32); void bxe_write_dmae(struct bxe_softc *sc, bus_addr_t dma_addr, uint32_t dst_addr, uint32_t len32); void bxe_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len); void bxe_set_ctx_validation(struct bxe_softc *sc, struct eth_context *cxt, uint32_t cid); void bxe_update_coalesce_sb_index(struct bxe_softc *sc, uint8_t fw_sb_id, uint8_t sb_index, uint8_t disable, uint16_t usec); int bxe_sp_post(struct bxe_softc *sc, int command, int cid, uint32_t data_hi, uint32_t data_lo, int cmd_type); void bxe_igu_ack_sb(struct bxe_softc *sc, uint8_t igu_sb_id, uint8_t segment, uint16_t index, uint8_t op, uint8_t update); void ecore_init_e1_firmware(struct bxe_softc *sc); void ecore_init_e1h_firmware(struct bxe_softc *sc); void ecore_init_e2_firmware(struct bxe_softc *sc); void ecore_storm_memset_struct(struct bxe_softc *sc, uint32_t addr, size_t size, uint32_t *data); /*********************/ /* LOGGING AND DEBUG */ /*********************/ /* debug logging codepaths */ #define DBG_LOAD 0x00000001 /* load and unload */ #define DBG_INTR 0x00000002 /* interrupt handling */ #define DBG_SP 0x00000004 /* slowpath handling */ #define DBG_STATS 0x00000008 /* stats updates */ #define DBG_TX 0x00000010 /* packet transmit */ #define DBG_RX 0x00000020 /* packet receive */ #define DBG_PHY 0x00000040 /* phy/link handling */ #define DBG_IOCTL 0x00000080 /* ioctl handling */ #define DBG_MBUF 0x00000100 /* dumping mbuf info */ #define DBG_REGS 0x00000200 /* register access */ #define DBG_LRO 0x00000400 /* lro processing */ #define DBG_ASSERT 0x80000000 /* debug assert */ #define DBG_ALL 0xFFFFFFFF /* flying monkeys */ #define DBASSERT(sc, exp, msg) \ do { \ if (__predict_false(sc->debug & DBG_ASSERT)) { \ if (__predict_false(!(exp))) { \ panic msg; \ } \ } \ } while (0) /* log a debug message */ #define BLOGD(sc, codepath, format, args...) \ do { \ if (__predict_false(sc->debug & (codepath))) { \ device_printf((sc)->dev, \ "%s(%s:%d) " format, \ __FUNCTION__, \ __FILE__, \ __LINE__, \ ## args); \ } \ } while(0) /* log a info message */ #define BLOGI(sc, format, args...) \ do { \ if (__predict_false(sc->debug)) { \ device_printf((sc)->dev, \ "%s(%s:%d) " format, \ __FUNCTION__, \ __FILE__, \ __LINE__, \ ## args); \ } else { \ device_printf((sc)->dev, \ format, \ ## args); \ } \ } while(0) /* log a warning message */ #define BLOGW(sc, format, args...) \ do { \ if (__predict_false(sc->debug)) { \ device_printf((sc)->dev, \ "%s(%s:%d) WARNING: " format, \ __FUNCTION__, \ __FILE__, \ __LINE__, \ ## args); \ } else { \ device_printf((sc)->dev, \ "WARNING: " format, \ ## args); \ } \ } while(0) /* log a error message */ #define BLOGE(sc, format, args...) \ do { \ if (__predict_false(sc->debug)) { \ device_printf((sc)->dev, \ "%s(%s:%d) ERROR: " format, \ __FUNCTION__, \ __FILE__, \ __LINE__, \ ## args); \ } else { \ device_printf((sc)->dev, \ "ERROR: " format, \ ## args); \ } \ } while(0) #ifdef ECORE_STOP_ON_ERROR #define bxe_panic(sc, msg) \ do { \ panic msg; \ } while (0) #else #define bxe_panic(sc, msg) \ device_printf((sc)->dev, "%s (%s,%d)\n", __FUNCTION__, __FILE__, __LINE__); #endif #define CATC_TRIGGER(sc, data) REG_WR((sc), 0x2000, (data)); #define CATC_TRIGGER_START(sc) CATC_TRIGGER((sc), 0xcafecafe) void bxe_dump_mem(struct bxe_softc *sc, char *tag, uint8_t *mem, uint32_t len); void bxe_dump_mbuf_data(struct bxe_softc *sc, char *pTag, struct mbuf *m, uint8_t contents); #define BXE_SET_FLOWID(m) M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE) #define BXE_VALID_FLOWID(m) (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) /***********/ /* INLINES */ /***********/ static inline uint32_t reg_poll(struct bxe_softc *sc, uint32_t reg, uint32_t expected, int ms, int wait) { uint32_t val; do { val = REG_RD(sc, reg); if (val == expected) { break; } ms -= wait; DELAY(wait * 1000); } while (ms > 0); return (val); } static inline void bxe_update_fp_sb_idx(struct bxe_fastpath *fp) { mb(); /* status block is written to by the chip */ fp->fp_hc_idx = fp->sb_running_index[SM_RX_ID]; } static inline void bxe_igu_ack_sb_gen(struct bxe_softc *sc, uint8_t igu_sb_id, uint8_t segment, uint16_t index, uint8_t op, uint8_t update, uint32_t igu_addr) { struct igu_regular cmd_data = {0}; cmd_data.sb_id_and_flags = ((index << IGU_REGULAR_SB_INDEX_SHIFT) | (segment << IGU_REGULAR_SEGMENT_ACCESS_SHIFT) | (update << IGU_REGULAR_BUPDATE_SHIFT) | (op << IGU_REGULAR_ENABLE_INT_SHIFT)); BLOGD(sc, DBG_INTR, "write 0x%08x to IGU addr 0x%x\n", cmd_data.sb_id_and_flags, igu_addr); REG_WR(sc, igu_addr, cmd_data.sb_id_and_flags); /* Make sure that ACK is written */ bus_space_barrier(sc->bar[0].tag, sc->bar[0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); } static inline void bxe_hc_ack_sb(struct bxe_softc *sc, uint8_t sb_id, uint8_t storm, uint16_t index, uint8_t op, uint8_t update) { uint32_t hc_addr = (HC_REG_COMMAND_REG + SC_PORT(sc)*32 + COMMAND_REG_INT_ACK); struct igu_ack_register igu_ack; igu_ack.status_block_index = index; igu_ack.sb_id_and_flags = ((sb_id << IGU_ACK_REGISTER_STATUS_BLOCK_ID_SHIFT) | (storm << IGU_ACK_REGISTER_STORM_ID_SHIFT) | (update << IGU_ACK_REGISTER_UPDATE_INDEX_SHIFT) | (op << IGU_ACK_REGISTER_INTERRUPT_MODE_SHIFT)); REG_WR(sc, hc_addr, (*(uint32_t *)&igu_ack)); /* Make sure that ACK is written */ bus_space_barrier(sc->bar[0].tag, sc->bar[0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); } static inline void bxe_ack_sb(struct bxe_softc *sc, uint8_t igu_sb_id, uint8_t storm, uint16_t index, uint8_t op, uint8_t update) { if (sc->devinfo.int_block == INT_BLOCK_HC) bxe_hc_ack_sb(sc, igu_sb_id, storm, index, op, update); else { uint8_t segment; if (CHIP_INT_MODE_IS_BC(sc)) { segment = storm; } else if (igu_sb_id != sc->igu_dsb_id) { segment = IGU_SEG_ACCESS_DEF; } else if (storm == ATTENTION_ID) { segment = IGU_SEG_ACCESS_ATTN; } else { segment = IGU_SEG_ACCESS_DEF; } bxe_igu_ack_sb(sc, igu_sb_id, segment, index, op, update); } } static inline uint16_t bxe_hc_ack_int(struct bxe_softc *sc) { uint32_t hc_addr = (HC_REG_COMMAND_REG + SC_PORT(sc)*32 + COMMAND_REG_SIMD_MASK); uint32_t result = REG_RD(sc, hc_addr); mb(); return (result); } static inline uint16_t bxe_igu_ack_int(struct bxe_softc *sc) { uint32_t igu_addr = (BAR_IGU_INTMEM + IGU_REG_SISR_MDPC_WMASK_LSB_UPPER*8); uint32_t result = REG_RD(sc, igu_addr); BLOGD(sc, DBG_INTR, "read 0x%08x from IGU addr 0x%x\n", result, igu_addr); mb(); return (result); } static inline uint16_t bxe_ack_int(struct bxe_softc *sc) { mb(); if (sc->devinfo.int_block == INT_BLOCK_HC) { return (bxe_hc_ack_int(sc)); } else { return (bxe_igu_ack_int(sc)); } } static inline int func_by_vn(struct bxe_softc *sc, int vn) { return (2 * vn + SC_PORT(sc)); } /* * Statistics ID are global per chip/path, while Client IDs for E1x * are per port. */ static inline uint8_t bxe_stats_id(struct bxe_fastpath *fp) { struct bxe_softc *sc = fp->sc; if (!CHIP_IS_E1x(sc)) { return (fp->cl_id); } return (fp->cl_id + SC_PORT(sc) * FP_SB_MAX_E1x); } #endif /* __BXE_H__ */ diff --git a/sys/dev/bxe/ecore_sp.h b/sys/dev/bxe/ecore_sp.h index f39c908f7530..917f27549c1b 100644 --- a/sys/dev/bxe/ecore_sp.h +++ b/sys/dev/bxe/ecore_sp.h @@ -1,2003 +1,2003 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2007-2017 QLogic Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifndef ECORE_SP_H #define ECORE_SP_H #include #include #include #include #include #include #include #include #include #if _BYTE_ORDER == _LITTLE_ENDIAN #ifndef LITTLE_ENDIAN #define LITTLE_ENDIAN #endif #ifndef __LITTLE_ENDIAN #define __LITTLE_ENDIAN #endif #undef BIG_ENDIAN #undef __BIG_ENDIAN #else /* _BIG_ENDIAN */ #ifndef BIG_ENDIAN #define BIG_ENDIAN #endif #ifndef __BIG_ENDIAN #define __BIG_ENDIAN #endif #undef LITTLE_ENDIAN #undef __LITTLE_ENDIAN #endif #include "ecore_mfw_req.h" #include "ecore_fw_defs.h" #include "ecore_hsi.h" #include "ecore_reg.h" struct bxe_softc; typedef bus_addr_t ecore_dma_addr_t; /* expected to be 64 bit wide */ typedef volatile int ecore_atomic_t; #ifndef __bool_true_false_are_defined #ifndef __cplusplus #define bool _Bool #endif /* !__cplusplus */ #endif /* !__bool_true_false_are_defined$ */ #define ETH_ALEN ETHER_ADDR_LEN /* 6 */ #define ECORE_SWCID_SHIFT 17 #define ECORE_SWCID_MASK ((0x1 << ECORE_SWCID_SHIFT) - 1) #define ECORE_MC_HASH_SIZE 8 #define ECORE_MC_HASH_OFFSET(sc, i) \ (BAR_TSTRORM_INTMEM + \ TSTORM_APPROXIMATE_MATCH_MULTICAST_FILTERING_OFFSET(FUNC_ID(sc)) + i*4) #define ECORE_MAX_MULTICAST 64 #define ECORE_MAX_EMUL_MULTI 1 #define IRO sc->iro_array typedef struct mtx ECORE_MUTEX; #define ECORE_MUTEX_INIT(_mutex) \ mtx_init(_mutex, "ecore_lock", "ECORE Lock", MTX_DEF) #define ECORE_MUTEX_LOCK(_mutex) mtx_lock(_mutex) #define ECORE_MUTEX_UNLOCK(_mutex) mtx_unlock(_mutex) typedef struct mtx ECORE_MUTEX_SPIN; #define ECORE_SPIN_LOCK_INIT(_spin, _sc) \ mtx_init(_spin, "ecore_lock", "ECORE Lock", MTX_DEF) #define ECORE_SPIN_LOCK_BH(_spin) mtx_lock(_spin) /* bh = bottom-half */ #define ECORE_SPIN_UNLOCK_BH(_spin) mtx_unlock(_spin) /* bh = bottom-half */ #define ECORE_SMP_MB_AFTER_CLEAR_BIT() mb() #define ECORE_SMP_MB_BEFORE_CLEAR_BIT() mb() #define ECORE_SMP_MB() mb() #define ECORE_SMP_RMB() rmb() #define ECORE_SMP_WMB() wmb() #define ECORE_MMIOWB() wmb() #define ECORE_SET_BIT_NA(bit, var) bit_set(var, bit) /* non-atomic */ #define ECORE_CLEAR_BIT_NA(bit, var) bit_clear(var, bit) /* non-atomic */ #define ECORE_TEST_BIT(bit, var) bxe_test_bit(bit, var) #define ECORE_SET_BIT(bit, var) bxe_set_bit(bit, var) #define ECORE_CLEAR_BIT(bit, var) bxe_clear_bit(bit, var) #define ECORE_TEST_AND_CLEAR_BIT(bit, var) bxe_test_and_clear_bit(bit, var) #define ECORE_ATOMIC_READ(a) atomic_load_acq_int((volatile int *)a) #define ECORE_ATOMIC_SET(a, v) atomic_store_rel_int((volatile int *)a, v) #define ECORE_ATOMIC_CMPXCHG(a, o, n) bxe_cmpxchg((volatile int *)a, o, n) #define ECORE_RET_PENDING(pending_bit, pending) \ (ECORE_TEST_BIT(pending_bit, pending) ? ECORE_PENDING : ECORE_SUCCESS) #define ECORE_SET_FLAG(value, mask, flag) \ do { \ (value) &= ~(mask); \ (value) |= ((flag) << (mask##_SHIFT)); \ } while (0) #define ECORE_GET_FLAG(value, mask) \ (((value) &= (mask)) >> (mask##_SHIFT)) #define ECORE_MIGHT_SLEEP() #define ECORE_FCOE_CID(sc) ((sc)->fp[FCOE_IDX(sc)].cl_id) #define ECORE_MEMCMP(_a, _b, _s) memcmp(_a, _b, _s) #define ECORE_MEMCPY(_a, _b, _s) memcpy(_a, _b, _s) #define ECORE_MEMSET(_a, _c, _s) memset(_a, _c, _s) #define ECORE_CPU_TO_LE16(x) htole16(x) #define ECORE_CPU_TO_LE32(x) htole32(x) #define ECORE_WAIT(_s, _t) DELAY(1000) #define ECORE_MSLEEP(_t) DELAY((_t) * 1000) #define ECORE_LIKELY(x) __predict_true(x) #define ECORE_UNLIKELY(x) __predict_false(x) #define ECORE_ZALLOC(_size, _flags, _sc) \ malloc(_size, M_TEMP, (M_NOWAIT | M_ZERO)) #define ECORE_CALLOC(_len, _size, _flags, _sc) \ mallocarray(_len, _size, M_TEMP, (M_NOWAIT | M_ZERO)) #define ECORE_FREE(_s, _buf, _size) free(_buf, M_TEMP) #define SC_ILT(sc) ((sc)->ilt) -#define ILOG2(x) bxe_ilog2(x) +#define ILOG2(x) ilog2(x) #define ECORE_ILT_ZALLOC(x, y, size) \ do { \ x = malloc(sizeof(struct bxe_dma), M_DEVBUF, (M_NOWAIT | M_ZERO)); \ if (x) { \ if (bxe_dma_alloc((struct bxe_softc *)sc, \ size, (struct bxe_dma *)x, \ "ECORE_ILT") != 0) { \ free(x, M_DEVBUF); \ x = NULL; \ *y = 0; \ } else { \ *y = ((struct bxe_dma *)x)->paddr; \ } \ } \ } while (0) #define ECORE_ILT_FREE(x, y, size) \ do { \ if (x) { \ bxe_dma_free((struct bxe_softc *)sc, x); \ free(x, M_DEVBUF); \ x = NULL; \ y = 0; \ } \ } while (0) #define ECORE_IS_VALID_ETHER_ADDR(_mac) TRUE #define ECORE_IS_MF_SD_MODE IS_MF_SD_MODE #define ECORE_IS_MF_SI_MODE IS_MF_SI_MODE #define ECORE_IS_MF_AFEX_MODE IS_MF_AFEX_MODE #define ECORE_SET_CTX_VALIDATION bxe_set_ctx_validation #define ECORE_UPDATE_COALESCE_SB_INDEX bxe_update_coalesce_sb_index #define ECORE_ALIGN(x, a) ((((x) + (a) - 1) / (a)) * (a)) #define ECORE_REG_WR_DMAE_LEN REG_WR_DMAE_LEN #define ECORE_PATH_ID SC_PATH #define ECORE_PORT_ID SC_PORT #define ECORE_FUNC_ID SC_FUNC #define ECORE_ABS_FUNC_ID SC_ABS_FUNC uint32_t calc_crc32(uint8_t *crc32_packet, uint32_t crc32_length, uint32_t crc32_seed, uint8_t complement); static inline uint32_t ECORE_CRC32_LE(uint32_t seed, uint8_t *mac, uint32_t len) { uint32_t packet_buf[2] = {0}; memcpy(((uint8_t *)(&packet_buf[0]))+2, &mac[0], 2); memcpy(&packet_buf[1], &mac[2], 4); return bswap32(calc_crc32((uint8_t *)packet_buf, 8, seed, 0)); } #define ecore_sp_post(_sc, _a, _b, _c, _d) \ bxe_sp_post(_sc, _a, _b, U64_HI(_c), U64_LO(_c), _d) #ifdef ECORE_STOP_ON_ERROR #define ECORE_DBG_BREAK_IF(exp) \ do { \ if (__predict_false(exp)) { \ panic("ECORE"); \ } \ } while (0) #define ECORE_BUG() \ do { \ panic("BUG (%s:%d)", __FILE__, __LINE__); \ } while(0); #define ECORE_BUG_ON(exp) \ do { \ if (__predict_true(exp)) { \ panic("BUG_ON (%s:%d)", __FILE__, __LINE__); \ } \ } while (0) #else extern unsigned long bxe_debug; #define BXE_DEBUG_ECORE_DBG_BREAK_IF 0x01 #define BXE_DEBUG_ECORE_BUG 0x02 #define BXE_DEBUG_ECORE_BUG_ON 0x04 #define ECORE_DBG_BREAK_IF(exp) \ if (bxe_debug & BXE_DEBUG_ECORE_DBG_BREAK_IF) \ printf("%s (%s,%d)\n", __FUNCTION__, __FILE__, __LINE__); #define ECORE_BUG(exp) \ if (bxe_debug & BXE_DEBUG_ECORE_BUG) \ printf("%s (%s,%d)\n", __FUNCTION__, __FILE__, __LINE__); #define ECORE_BUG_ON(exp) \ if (bxe_debug & BXE_DEBUG_ECORE_BUG_ON) \ printf("%s (%s,%d)\n", __FUNCTION__, __FILE__, __LINE__); #endif /* #ifdef ECORE_STOP_ON_ERROR */ #define ECORE_ERR(str, ...) \ BLOGE(sc, "ECORE: " str, ##__VA_ARGS__) #define DBG_SP 0x00000004 /* defined in bxe.h */ #define ECORE_MSG(sc, m, ...) \ BLOGD(sc, DBG_SP, "ECORE: " m, ##__VA_ARGS__) typedef struct _ecore_list_entry_t { struct _ecore_list_entry_t *next, *prev; } ecore_list_entry_t; typedef struct ecore_list_t { ecore_list_entry_t *head, *tail; unsigned long cnt; } ecore_list_t; /* initialize the list */ #define ECORE_LIST_INIT(_list) \ do { \ (_list)->head = NULL; \ (_list)->tail = NULL; \ (_list)->cnt = 0; \ } while (0) /* return TRUE if the element is the last on the list */ #define ECORE_LIST_IS_LAST(_elem, _list) \ (_elem == (_list)->tail) /* return TRUE if the list is empty */ #define ECORE_LIST_IS_EMPTY(_list) \ ((_list)->cnt == 0) /* return the first element */ #define ECORE_LIST_FIRST_ENTRY(_list, cast, _link) \ (cast *)((_list)->head) /* return the next element */ #define ECORE_LIST_NEXT(_elem, _link, cast) \ (cast *)((&((_elem)->_link))->next) /* push an element on the head of the list */ #define ECORE_LIST_PUSH_HEAD(_elem, _list) \ do { \ (_elem)->prev = (ecore_list_entry_t *)0; \ (_elem)->next = (_list)->head; \ if ((_list)->tail == (ecore_list_entry_t *)0) { \ (_list)->tail = (_elem); \ } else { \ (_list)->head->prev = (_elem); \ } \ (_list)->head = (_elem); \ (_list)->cnt++; \ } while (0) /* push an element on the tail of the list */ #define ECORE_LIST_PUSH_TAIL(_elem, _list) \ do { \ (_elem)->next = (ecore_list_entry_t *)0; \ (_elem)->prev = (_list)->tail; \ if ((_list)->tail) { \ (_list)->tail->next = (_elem); \ } else { \ (_list)->head = (_elem); \ } \ (_list)->tail = (_elem); \ (_list)->cnt++; \ } while (0) /* push list1 on the head of list2 and return with list1 as empty */ #define ECORE_LIST_SPLICE_INIT(_list1, _list2) \ do { \ (_list1)->tail->next = (_list2)->head; \ if ((_list2)->head) { \ (_list2)->head->prev = (_list1)->tail; \ } else { \ (_list2)->tail = (_list1)->tail; \ } \ (_list2)->head = (_list1)->head; \ (_list2)->cnt += (_list1)->cnt; \ (_list1)->head = NULL; \ (_list1)->tail = NULL; \ (_list1)->cnt = 0; \ } while (0) /* remove an element from the list */ #define ECORE_LIST_REMOVE_ENTRY(_elem, _list) \ do { \ if ((_list)->head == (_elem)) { \ if ((_list)->head) { \ (_list)->head = (_list)->head->next; \ if ((_list)->head) { \ (_list)->head->prev = (ecore_list_entry_t *)0; \ } else { \ (_list)->tail = (ecore_list_entry_t *)0; \ } \ (_list)->cnt--; \ } \ } else if ((_list)->tail == (_elem)) { \ if ((_list)->tail) { \ (_list)->tail = (_list)->tail->prev; \ if ((_list)->tail) { \ (_list)->tail->next = (ecore_list_entry_t *)0; \ } else { \ (_list)->head = (ecore_list_entry_t *)0; \ } \ (_list)->cnt--; \ } \ } else { \ (_elem)->prev->next = (_elem)->next; \ (_elem)->next->prev = (_elem)->prev; \ (_list)->cnt--; \ } \ } while (0) /* walk the list */ #define ECORE_LIST_FOR_EACH_ENTRY(pos, _list, _link, cast) \ for (pos = ECORE_LIST_FIRST_ENTRY(_list, cast, _link); \ pos; \ pos = ECORE_LIST_NEXT(pos, _link, cast)) /* walk the list (safely) */ #define ECORE_LIST_FOR_EACH_ENTRY_SAFE(pos, n, _list, _link, cast) \ for (pos = ECORE_LIST_FIRST_ENTRY(_list, cast, _lint), \ n = (pos) ? ECORE_LIST_NEXT(pos, _link, cast) : NULL; \ pos != NULL; \ pos = (cast *)n, \ n = (pos) ? ECORE_LIST_NEXT(pos, _link, cast) : NULL) /* Manipulate a bit vector defined as an array of uint64_t */ /* Number of bits in one sge_mask array element */ #define BIT_VEC64_ELEM_SZ 64 #define BIT_VEC64_ELEM_SHIFT 6 #define BIT_VEC64_ELEM_MASK ((uint64_t)BIT_VEC64_ELEM_SZ - 1) #define __BIT_VEC64_SET_BIT(el, bit) \ do { \ el = ((el) | ((uint64_t)0x1 << (bit))); \ } while (0) #define __BIT_VEC64_CLEAR_BIT(el, bit) \ do { \ el = ((el) & (~((uint64_t)0x1 << (bit)))); \ } while (0) #define BIT_VEC64_SET_BIT(vec64, idx) \ __BIT_VEC64_SET_BIT((vec64)[(idx) >> BIT_VEC64_ELEM_SHIFT], \ (idx) & BIT_VEC64_ELEM_MASK) #define BIT_VEC64_CLEAR_BIT(vec64, idx) \ __BIT_VEC64_CLEAR_BIT((vec64)[(idx) >> BIT_VEC64_ELEM_SHIFT], \ (idx) & BIT_VEC64_ELEM_MASK) #define BIT_VEC64_TEST_BIT(vec64, idx) \ (((vec64)[(idx) >> BIT_VEC64_ELEM_SHIFT] >> \ ((idx) & BIT_VEC64_ELEM_MASK)) & 0x1) /* * Creates a bitmask of all ones in less significant bits. * idx - index of the most significant bit in the created mask */ #define BIT_VEC64_ONES_MASK(idx) \ (((uint64_t)0x1 << (((idx) & BIT_VEC64_ELEM_MASK) + 1)) - 1) #define BIT_VEC64_ELEM_ONE_MASK ((uint64_t)(~0)) /* fill in a MAC address the way the FW likes it */ static inline void ecore_set_fw_mac_addr(uint16_t *fw_hi, uint16_t *fw_mid, uint16_t *fw_lo, uint8_t *mac) { ((uint8_t *)fw_hi)[0] = mac[1]; ((uint8_t *)fw_hi)[1] = mac[0]; ((uint8_t *)fw_mid)[0] = mac[3]; ((uint8_t *)fw_mid)[1] = mac[2]; ((uint8_t *)fw_lo)[0] = mac[5]; ((uint8_t *)fw_lo)[1] = mac[4]; } enum ecore_status_t { ECORE_EXISTS = -6, ECORE_IO = -5, ECORE_TIMEOUT = -4, ECORE_INVAL = -3, ECORE_BUSY = -2, ECORE_NOMEM = -1, ECORE_SUCCESS = 0, /* PENDING is not an error and should be positive */ ECORE_PENDING = 1, }; enum { SWITCH_UPDATE, AFEX_UPDATE, }; struct bxe_softc; struct eth_context; /* Bits representing general command's configuration */ enum { RAMROD_TX, RAMROD_RX, /* Wait until all pending commands complete */ RAMROD_COMP_WAIT, /* Don't send a ramrod, only update a registry */ RAMROD_DRV_CLR_ONLY, /* Configure HW according to the current object state */ RAMROD_RESTORE, /* Execute the next command now */ RAMROD_EXEC, /* Don't add a new command and continue execution of posponed * commands. If not set a new command will be added to the * pending commands list. */ RAMROD_CONT, /* If there is another pending ramrod, wait until it finishes and * re-try to submit this one. This flag can be set only in sleepable * context, and should not be set from the context that completes the * ramrods as deadlock will occur. */ RAMROD_RETRY, }; typedef enum { ECORE_OBJ_TYPE_RX, ECORE_OBJ_TYPE_TX, ECORE_OBJ_TYPE_RX_TX, } ecore_obj_type; /* Public slow path states */ enum { ECORE_FILTER_MAC_PENDING, ECORE_FILTER_VLAN_PENDING, ECORE_FILTER_VLAN_MAC_PENDING, ECORE_FILTER_RX_MODE_PENDING, ECORE_FILTER_RX_MODE_SCHED, ECORE_FILTER_ISCSI_ETH_START_SCHED, ECORE_FILTER_ISCSI_ETH_STOP_SCHED, ECORE_FILTER_FCOE_ETH_START_SCHED, ECORE_FILTER_FCOE_ETH_STOP_SCHED, ECORE_FILTER_BYPASS_RX_MODE_PENDING, ECORE_FILTER_BYPASS_MAC_PENDING, ECORE_FILTER_BYPASS_RSS_CONF_PENDING, ECORE_FILTER_MCAST_PENDING, ECORE_FILTER_MCAST_SCHED, ECORE_FILTER_RSS_CONF_PENDING, ECORE_AFEX_FCOE_Q_UPDATE_PENDING, ECORE_AFEX_PENDING_VIFSET_MCP_ACK, ECORE_FILTER_VXLAN_PENDING }; struct ecore_raw_obj { uint8_t func_id; /* Queue params */ uint8_t cl_id; uint32_t cid; /* Ramrod data buffer params */ void *rdata; ecore_dma_addr_t rdata_mapping; /* Ramrod state params */ int state; /* "ramrod is pending" state bit */ unsigned long *pstate; /* pointer to state buffer */ ecore_obj_type obj_type; int (*wait_comp)(struct bxe_softc *sc, struct ecore_raw_obj *o); bool (*check_pending)(struct ecore_raw_obj *o); void (*clear_pending)(struct ecore_raw_obj *o); void (*set_pending)(struct ecore_raw_obj *o); }; /************************* VLAN-MAC commands related parameters ***************/ struct ecore_mac_ramrod_data { uint8_t mac[ETH_ALEN]; uint8_t is_inner_mac; }; struct ecore_vlan_ramrod_data { uint16_t vlan; }; struct ecore_vlan_mac_ramrod_data { uint8_t mac[ETH_ALEN]; uint8_t is_inner_mac; uint16_t vlan; }; struct ecore_vxlan_fltr_ramrod_data { uint8_t innermac[ETH_ALEN]; uint32_t vni; }; union ecore_classification_ramrod_data { struct ecore_mac_ramrod_data mac; struct ecore_vlan_ramrod_data vlan; struct ecore_vlan_mac_ramrod_data vlan_mac; struct ecore_vxlan_fltr_ramrod_data vxlan_fltr; }; /* VLAN_MAC commands */ enum ecore_vlan_mac_cmd { ECORE_VLAN_MAC_ADD, ECORE_VLAN_MAC_DEL, ECORE_VLAN_MAC_MOVE, }; struct ecore_vlan_mac_data { /* Requested command: ECORE_VLAN_MAC_XX */ enum ecore_vlan_mac_cmd cmd; /* used to contain the data related vlan_mac_flags bits from * ramrod parameters. */ unsigned long vlan_mac_flags; /* Needed for MOVE command */ struct ecore_vlan_mac_obj *target_obj; union ecore_classification_ramrod_data u; }; /*************************** Exe Queue obj ************************************/ union ecore_exe_queue_cmd_data { struct ecore_vlan_mac_data vlan_mac; struct { /* TODO */ } mcast; }; struct ecore_exeq_elem { ecore_list_entry_t link; /* Length of this element in the exe_chunk. */ int cmd_len; union ecore_exe_queue_cmd_data cmd_data; }; union ecore_qable_obj; union ecore_exeq_comp_elem { union event_ring_elem *elem; }; struct ecore_exe_queue_obj; typedef int (*exe_q_validate)(struct bxe_softc *sc, union ecore_qable_obj *o, struct ecore_exeq_elem *elem); typedef int (*exe_q_remove)(struct bxe_softc *sc, union ecore_qable_obj *o, struct ecore_exeq_elem *elem); /* Return positive if entry was optimized, 0 - if not, negative * in case of an error. */ typedef int (*exe_q_optimize)(struct bxe_softc *sc, union ecore_qable_obj *o, struct ecore_exeq_elem *elem); typedef int (*exe_q_execute)(struct bxe_softc *sc, union ecore_qable_obj *o, ecore_list_t *exe_chunk, unsigned long *ramrod_flags); typedef struct ecore_exeq_elem * (*exe_q_get)(struct ecore_exe_queue_obj *o, struct ecore_exeq_elem *elem); struct ecore_exe_queue_obj { /* Commands pending for an execution. */ ecore_list_t exe_queue; /* Commands pending for an completion. */ ecore_list_t pending_comp; ECORE_MUTEX_SPIN lock; /* Maximum length of commands' list for one execution */ int exe_chunk_len; union ecore_qable_obj *owner; /****** Virtual functions ******/ /** * Called before commands execution for commands that are really * going to be executed (after 'optimize'). * * Must run under exe_queue->lock */ exe_q_validate validate; /** * Called before removing pending commands, cleaning allocated * resources (e.g., credits from validate) */ exe_q_remove remove; /** * This will try to cancel the current pending commands list * considering the new command. * * Returns the number of optimized commands or a negative error code * * Must run under exe_queue->lock */ exe_q_optimize optimize; /** * Run the next commands chunk (owner specific). */ exe_q_execute execute; /** * Return the exe_queue element containing the specific command * if any. Otherwise return NULL. */ exe_q_get get; }; /***************** Classification verbs: Set/Del MAC/VLAN/VLAN-MAC ************/ /* * Element in the VLAN_MAC registry list having all current configured * rules. */ struct ecore_vlan_mac_registry_elem { ecore_list_entry_t link; /* Used to store the cam offset used for the mac/vlan/vlan-mac. * Relevant for 57710 and 57711 only. VLANs and MACs share the * same CAM for these chips. */ int cam_offset; /* Needed for DEL and RESTORE flows */ unsigned long vlan_mac_flags; union ecore_classification_ramrod_data u; }; /* Bits representing VLAN_MAC commands specific flags */ enum { ECORE_UC_LIST_MAC, ECORE_ETH_MAC, ECORE_ISCSI_ETH_MAC, ECORE_NETQ_ETH_MAC, ECORE_DONT_CONSUME_CAM_CREDIT, ECORE_DONT_CONSUME_CAM_CREDIT_DEST, }; /* When looking for matching filters, some flags are not interesting */ #define ECORE_VLAN_MAC_CMP_MASK (1 << ECORE_UC_LIST_MAC | \ 1 << ECORE_ETH_MAC | \ 1 << ECORE_ISCSI_ETH_MAC | \ 1 << ECORE_NETQ_ETH_MAC) #define ECORE_VLAN_MAC_CMP_FLAGS(flags) \ ((flags) & ECORE_VLAN_MAC_CMP_MASK) struct ecore_vlan_mac_ramrod_params { /* Object to run the command from */ struct ecore_vlan_mac_obj *vlan_mac_obj; /* General command flags: COMP_WAIT, etc. */ unsigned long ramrod_flags; /* Command specific configuration request */ struct ecore_vlan_mac_data user_req; }; struct ecore_vlan_mac_obj { struct ecore_raw_obj raw; /* Bookkeeping list: will prevent the addition of already existing * entries. */ ecore_list_t head; /* Implement a simple reader/writer lock on the head list. * all these fields should only be accessed under the exe_queue lock */ uint8_t head_reader; /* Num. of readers accessing head list */ bool head_exe_request; /* Pending execution request. */ unsigned long saved_ramrod_flags; /* Ramrods of pending execution */ /* Execution queue interface instance */ struct ecore_exe_queue_obj exe_queue; /* MACs credit pool */ struct ecore_credit_pool_obj *macs_pool; /* VLANs credit pool */ struct ecore_credit_pool_obj *vlans_pool; /* RAMROD command to be used */ int ramrod_cmd; /* copy first n elements onto preallocated buffer * * @param n number of elements to get * @param buf buffer preallocated by caller into which elements * will be copied. Note elements are 4-byte aligned * so buffer size must be able to accommodate the * aligned elements. * * @return number of copied bytes */ int (*get_n_elements)(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o, int n, uint8_t *base, uint8_t stride, uint8_t size); /** * Checks if ADD-ramrod with the given params may be performed. * * @return zero if the element may be added */ int (*check_add)(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o, union ecore_classification_ramrod_data *data); /** * Checks if DEL-ramrod with the given params may be performed. * * @return TRUE if the element may be deleted */ struct ecore_vlan_mac_registry_elem * (*check_del)(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o, union ecore_classification_ramrod_data *data); /** * Checks if DEL-ramrod with the given params may be performed. * * @return TRUE if the element may be deleted */ bool (*check_move)(struct bxe_softc *sc, struct ecore_vlan_mac_obj *src_o, struct ecore_vlan_mac_obj *dst_o, union ecore_classification_ramrod_data *data); /** * Update the relevant credit object(s) (consume/return * correspondingly). */ bool (*get_credit)(struct ecore_vlan_mac_obj *o); bool (*put_credit)(struct ecore_vlan_mac_obj *o); bool (*get_cam_offset)(struct ecore_vlan_mac_obj *o, int *offset); bool (*put_cam_offset)(struct ecore_vlan_mac_obj *o, int offset); /** * Configures one rule in the ramrod data buffer. */ void (*set_one_rule)(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o, struct ecore_exeq_elem *elem, int rule_idx, int cam_offset); /** * Delete all configured elements having the given * vlan_mac_flags specification. Assumes no pending for * execution commands. Will schedule all all currently * configured MACs/VLANs/VLAN-MACs matching the vlan_mac_flags * specification for deletion and will use the given * ramrod_flags for the last DEL operation. * * @param sc * @param o * @param ramrod_flags RAMROD_XX flags * * @return 0 if the last operation has completed successfully * and there are no more elements left, positive value * if there are pending for completion commands, * negative value in case of failure. */ int (*delete_all)(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o, unsigned long *vlan_mac_flags, unsigned long *ramrod_flags); /** * Reconfigures the next MAC/VLAN/VLAN-MAC element from the previously * configured elements list. * * @param sc * @param p Command parameters (RAMROD_COMP_WAIT bit in * ramrod_flags is only taken into an account) * @param ppos a pointer to the cookie that should be given back in the * next call to make function handle the next element. If * *ppos is set to NULL it will restart the iterator. * If returned *ppos == NULL this means that the last * element has been handled. * * @return int */ int (*restore)(struct bxe_softc *sc, struct ecore_vlan_mac_ramrod_params *p, struct ecore_vlan_mac_registry_elem **ppos); /** * Should be called on a completion arrival. * * @param sc * @param o * @param cqe Completion element we are handling * @param ramrod_flags if RAMROD_CONT is set the next bulk of * pending commands will be executed. * RAMROD_DRV_CLR_ONLY and RAMROD_RESTORE * may also be set if needed. * * @return 0 if there are neither pending nor waiting for * completion commands. Positive value if there are * pending for execution or for completion commands. * Negative value in case of an error (including an * error in the cqe). */ int (*complete)(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o, union event_ring_elem *cqe, unsigned long *ramrod_flags); /** * Wait for completion of all commands. Don't schedule new ones, * just wait. It assumes that the completion code will schedule * for new commands. */ int (*wait)(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o); }; enum { ECORE_LLH_CAM_ISCSI_ETH_LINE = 0, ECORE_LLH_CAM_ETH_LINE, ECORE_LLH_CAM_MAX_PF_LINE = NIG_REG_LLH1_FUNC_MEM_SIZE / 2 }; void ecore_set_mac_in_nig(struct bxe_softc *sc, bool add, unsigned char *dev_addr, int index); /** RX_MODE verbs:DROP_ALL/ACCEPT_ALL/ACCEPT_ALL_MULTI/ACCEPT_ALL_VLAN/NORMAL */ /* RX_MODE ramrod special flags: set in rx_mode_flags field in * a ecore_rx_mode_ramrod_params. */ enum { ECORE_RX_MODE_FCOE_ETH, ECORE_RX_MODE_ISCSI_ETH, }; enum { ECORE_ACCEPT_UNICAST, ECORE_ACCEPT_MULTICAST, ECORE_ACCEPT_ALL_UNICAST, ECORE_ACCEPT_ALL_MULTICAST, ECORE_ACCEPT_BROADCAST, ECORE_ACCEPT_UNMATCHED, ECORE_ACCEPT_ANY_VLAN }; struct ecore_rx_mode_ramrod_params { struct ecore_rx_mode_obj *rx_mode_obj; unsigned long *pstate; int state; uint8_t cl_id; uint32_t cid; uint8_t func_id; unsigned long ramrod_flags; unsigned long rx_mode_flags; /* rdata is either a pointer to eth_filter_rules_ramrod_data(e2) or to * a tstorm_eth_mac_filter_config (e1x). */ void *rdata; ecore_dma_addr_t rdata_mapping; /* Rx mode settings */ unsigned long rx_accept_flags; /* internal switching settings */ unsigned long tx_accept_flags; }; struct ecore_rx_mode_obj { int (*config_rx_mode)(struct bxe_softc *sc, struct ecore_rx_mode_ramrod_params *p); int (*wait_comp)(struct bxe_softc *sc, struct ecore_rx_mode_ramrod_params *p); }; /********************** Set multicast group ***********************************/ struct ecore_mcast_list_elem { ecore_list_entry_t link; uint8_t *mac; }; union ecore_mcast_config_data { uint8_t *mac; uint8_t bin; /* used in a RESTORE flow */ }; struct ecore_mcast_ramrod_params { struct ecore_mcast_obj *mcast_obj; /* Relevant options are RAMROD_COMP_WAIT and RAMROD_DRV_CLR_ONLY */ unsigned long ramrod_flags; ecore_list_t mcast_list; /* list of struct ecore_mcast_list_elem */ /** TODO: * - rename it to macs_num. * - Add a new command type for handling pending commands * (remove "zero semantics"). * * Length of mcast_list. If zero and ADD_CONT command - post * pending commands. */ int mcast_list_len; }; enum ecore_mcast_cmd { ECORE_MCAST_CMD_ADD, ECORE_MCAST_CMD_CONT, ECORE_MCAST_CMD_DEL, ECORE_MCAST_CMD_RESTORE, }; struct ecore_mcast_obj { struct ecore_raw_obj raw; union { struct { #define ECORE_MCAST_BINS_NUM 256 #define ECORE_MCAST_VEC_SZ (ECORE_MCAST_BINS_NUM / 64) uint64_t vec[ECORE_MCAST_VEC_SZ]; /** Number of BINs to clear. Should be updated * immediately when a command arrives in order to * properly create DEL commands. */ int num_bins_set; } aprox_match; struct { ecore_list_t macs; int num_macs_set; } exact_match; } registry; /* Pending commands */ ecore_list_t pending_cmds_head; /* A state that is set in raw.pstate, when there are pending commands */ int sched_state; /* Maximal number of mcast MACs configured in one command */ int max_cmd_len; /* Total number of currently pending MACs to configure: both * in the pending commands list and in the current command. */ int total_pending_num; uint8_t engine_id; /** * @param cmd command to execute (ECORE_MCAST_CMD_X, see above) */ int (*config_mcast)(struct bxe_softc *sc, struct ecore_mcast_ramrod_params *p, enum ecore_mcast_cmd cmd); /** * Fills the ramrod data during the RESTORE flow. * * @param sc * @param o * @param start_idx Registry index to start from * @param rdata_idx Index in the ramrod data to start from * * @return -1 if we handled the whole registry or index of the last * handled registry element. */ int (*hdl_restore)(struct bxe_softc *sc, struct ecore_mcast_obj *o, int start_bin, int *rdata_idx); int (*enqueue_cmd)(struct bxe_softc *sc, struct ecore_mcast_obj *o, struct ecore_mcast_ramrod_params *p, enum ecore_mcast_cmd cmd); void (*set_one_rule)(struct bxe_softc *sc, struct ecore_mcast_obj *o, int idx, union ecore_mcast_config_data *cfg_data, enum ecore_mcast_cmd cmd); /** Checks if there are more mcast MACs to be set or a previous * command is still pending. */ bool (*check_pending)(struct ecore_mcast_obj *o); /** * Set/Clear/Check SCHEDULED state of the object */ void (*set_sched)(struct ecore_mcast_obj *o); void (*clear_sched)(struct ecore_mcast_obj *o); bool (*check_sched)(struct ecore_mcast_obj *o); /* Wait until all pending commands complete */ int (*wait_comp)(struct bxe_softc *sc, struct ecore_mcast_obj *o); /** * Handle the internal object counters needed for proper * commands handling. Checks that the provided parameters are * feasible. */ int (*validate)(struct bxe_softc *sc, struct ecore_mcast_ramrod_params *p, enum ecore_mcast_cmd cmd); /** * Restore the values of internal counters in case of a failure. */ void (*revert)(struct bxe_softc *sc, struct ecore_mcast_ramrod_params *p, int old_num_bins); int (*get_registry_size)(struct ecore_mcast_obj *o); void (*set_registry_size)(struct ecore_mcast_obj *o, int n); }; /*************************** Credit handling **********************************/ struct ecore_credit_pool_obj { /* Current amount of credit in the pool */ ecore_atomic_t credit; /* Maximum allowed credit. put() will check against it. */ int pool_sz; /* Allocate a pool table statically. * * Currently the maximum allowed size is MAX_MAC_CREDIT_E2(272) * * The set bit in the table will mean that the entry is available. */ #define ECORE_POOL_VEC_SIZE (MAX_MAC_CREDIT_E2 / 64) uint64_t pool_mirror[ECORE_POOL_VEC_SIZE]; /* Base pool offset (initialized differently */ int base_pool_offset; /** * Get the next free pool entry. * * @return TRUE if there was a free entry in the pool */ bool (*get_entry)(struct ecore_credit_pool_obj *o, int *entry); /** * Return the entry back to the pool. * * @return TRUE if entry is legal and has been successfully * returned to the pool. */ bool (*put_entry)(struct ecore_credit_pool_obj *o, int entry); /** * Get the requested amount of credit from the pool. * * @param cnt Amount of requested credit * @return TRUE if the operation is successful */ bool (*get)(struct ecore_credit_pool_obj *o, int cnt); /** * Returns the credit to the pool. * * @param cnt Amount of credit to return * @return TRUE if the operation is successful */ bool (*put)(struct ecore_credit_pool_obj *o, int cnt); /** * Reads the current amount of credit. */ int (*check)(struct ecore_credit_pool_obj *o); }; /*************************** RSS configuration ********************************/ enum { /* RSS_MODE bits are mutually exclusive */ ECORE_RSS_MODE_DISABLED, ECORE_RSS_MODE_REGULAR, ECORE_RSS_SET_SRCH, /* Setup searcher, E1x specific flag */ ECORE_RSS_IPV4, ECORE_RSS_IPV4_TCP, ECORE_RSS_IPV4_UDP, ECORE_RSS_IPV6, ECORE_RSS_IPV6_TCP, ECORE_RSS_IPV6_UDP, ECORE_RSS_IPV4_VXLAN, ECORE_RSS_IPV6_VXLAN, ECORE_RSS_TUNN_INNER_HDRS, }; struct ecore_config_rss_params { struct ecore_rss_config_obj *rss_obj; /* may have RAMROD_COMP_WAIT set only */ unsigned long ramrod_flags; /* ECORE_RSS_X bits */ unsigned long rss_flags; /* Number hash bits to take into an account */ uint8_t rss_result_mask; /* Indirection table */ uint8_t ind_table[T_ETH_INDIRECTION_TABLE_SIZE]; /* RSS hash values */ uint32_t rss_key[10]; /* valid only iff ECORE_RSS_UPDATE_TOE is set */ uint16_t toe_rss_bitmap; }; struct ecore_rss_config_obj { struct ecore_raw_obj raw; /* RSS engine to use */ uint8_t engine_id; /* Last configured indirection table */ uint8_t ind_table[T_ETH_INDIRECTION_TABLE_SIZE]; /* flags for enabling 4-tupple hash on UDP */ uint8_t udp_rss_v4; uint8_t udp_rss_v6; int (*config_rss)(struct bxe_softc *sc, struct ecore_config_rss_params *p); }; /*********************** Queue state update ***********************************/ /* UPDATE command options */ enum { ECORE_Q_UPDATE_IN_VLAN_REM, ECORE_Q_UPDATE_IN_VLAN_REM_CHNG, ECORE_Q_UPDATE_OUT_VLAN_REM, ECORE_Q_UPDATE_OUT_VLAN_REM_CHNG, ECORE_Q_UPDATE_ANTI_SPOOF, ECORE_Q_UPDATE_ANTI_SPOOF_CHNG, ECORE_Q_UPDATE_ACTIVATE, ECORE_Q_UPDATE_ACTIVATE_CHNG, ECORE_Q_UPDATE_DEF_VLAN_EN, ECORE_Q_UPDATE_DEF_VLAN_EN_CHNG, ECORE_Q_UPDATE_SILENT_VLAN_REM_CHNG, ECORE_Q_UPDATE_SILENT_VLAN_REM, ECORE_Q_UPDATE_TX_SWITCHING_CHNG, ECORE_Q_UPDATE_TX_SWITCHING, ECORE_Q_UPDATE_PTP_PKTS_CHNG, ECORE_Q_UPDATE_PTP_PKTS, }; /* Allowed Queue states */ enum ecore_q_state { ECORE_Q_STATE_RESET, ECORE_Q_STATE_INITIALIZED, ECORE_Q_STATE_ACTIVE, ECORE_Q_STATE_MULTI_COS, ECORE_Q_STATE_MCOS_TERMINATED, ECORE_Q_STATE_INACTIVE, ECORE_Q_STATE_STOPPED, ECORE_Q_STATE_TERMINATED, ECORE_Q_STATE_FLRED, ECORE_Q_STATE_MAX, }; /* Allowed Queue states */ enum ecore_q_logical_state { ECORE_Q_LOGICAL_STATE_ACTIVE, ECORE_Q_LOGICAL_STATE_STOPPED, }; /* Allowed commands */ enum ecore_queue_cmd { ECORE_Q_CMD_INIT, ECORE_Q_CMD_SETUP, ECORE_Q_CMD_SETUP_TX_ONLY, ECORE_Q_CMD_DEACTIVATE, ECORE_Q_CMD_ACTIVATE, ECORE_Q_CMD_UPDATE, ECORE_Q_CMD_UPDATE_TPA, ECORE_Q_CMD_HALT, ECORE_Q_CMD_CFC_DEL, ECORE_Q_CMD_TERMINATE, ECORE_Q_CMD_EMPTY, ECORE_Q_CMD_MAX, }; /* queue SETUP + INIT flags */ enum { ECORE_Q_FLG_TPA, ECORE_Q_FLG_TPA_IPV6, ECORE_Q_FLG_TPA_GRO, ECORE_Q_FLG_STATS, ECORE_Q_FLG_ZERO_STATS, ECORE_Q_FLG_ACTIVE, ECORE_Q_FLG_OV, ECORE_Q_FLG_VLAN, ECORE_Q_FLG_COS, ECORE_Q_FLG_HC, ECORE_Q_FLG_HC_EN, ECORE_Q_FLG_DHC, ECORE_Q_FLG_OOO, ECORE_Q_FLG_FCOE, ECORE_Q_FLG_LEADING_RSS, ECORE_Q_FLG_MCAST, ECORE_Q_FLG_DEF_VLAN, ECORE_Q_FLG_TX_SWITCH, ECORE_Q_FLG_TX_SEC, ECORE_Q_FLG_ANTI_SPOOF, ECORE_Q_FLG_SILENT_VLAN_REM, ECORE_Q_FLG_FORCE_DEFAULT_PRI, ECORE_Q_FLG_REFUSE_OUTBAND_VLAN, ECORE_Q_FLG_PCSUM_ON_PKT, ECORE_Q_FLG_TUN_INC_INNER_IP_ID }; /* Queue type options: queue type may be a combination of below. */ enum ecore_q_type { ECORE_Q_TYPE_FWD, /** TODO: Consider moving both these flags into the init() * ramrod params. */ ECORE_Q_TYPE_HAS_RX, ECORE_Q_TYPE_HAS_TX, }; #define ECORE_PRIMARY_CID_INDEX 0 #define ECORE_MULTI_TX_COS_E1X 3 /* QM only */ #define ECORE_MULTI_TX_COS_E2_E3A0 2 #define ECORE_MULTI_TX_COS_E3B0 3 #define ECORE_MULTI_TX_COS 3 /* Maximum possible */ #define MAC_PAD (ECORE_ALIGN(ETH_ALEN, sizeof(uint32_t)) - ETH_ALEN) /* DMAE channel to be used by FW for timesync workaroun. A driver that sends * timesync-related ramrods must not use this DMAE command ID. */ #define FW_DMAE_CMD_ID 6 struct ecore_queue_init_params { struct { unsigned long flags; uint16_t hc_rate; uint8_t fw_sb_id; uint8_t sb_cq_index; } tx; struct { unsigned long flags; uint16_t hc_rate; uint8_t fw_sb_id; uint8_t sb_cq_index; } rx; /* CID context in the host memory */ struct eth_context *cxts[ECORE_MULTI_TX_COS]; /* maximum number of cos supported by hardware */ uint8_t max_cos; }; struct ecore_queue_terminate_params { /* index within the tx_only cids of this queue object */ uint8_t cid_index; }; struct ecore_queue_cfc_del_params { /* index within the tx_only cids of this queue object */ uint8_t cid_index; }; struct ecore_queue_update_params { unsigned long update_flags; /* ECORE_Q_UPDATE_XX bits */ uint16_t def_vlan; uint16_t silent_removal_value; uint16_t silent_removal_mask; /* index within the tx_only cids of this queue object */ uint8_t cid_index; }; struct ecore_queue_update_tpa_params { ecore_dma_addr_t sge_map; uint8_t update_ipv4; uint8_t update_ipv6; uint8_t max_tpa_queues; uint8_t max_sges_pkt; uint8_t complete_on_both_clients; uint8_t dont_verify_thr; uint8_t tpa_mode; uint8_t _pad; uint16_t sge_buff_sz; uint16_t max_agg_sz; uint16_t sge_pause_thr_low; uint16_t sge_pause_thr_high; }; struct rxq_pause_params { uint16_t bd_th_lo; uint16_t bd_th_hi; uint16_t rcq_th_lo; uint16_t rcq_th_hi; uint16_t sge_th_lo; /* valid iff ECORE_Q_FLG_TPA */ uint16_t sge_th_hi; /* valid iff ECORE_Q_FLG_TPA */ uint16_t pri_map; }; /* general */ struct ecore_general_setup_params { /* valid iff ECORE_Q_FLG_STATS */ uint8_t stat_id; uint8_t spcl_id; uint16_t mtu; uint8_t cos; uint8_t fp_hsi; }; struct ecore_rxq_setup_params { /* dma */ ecore_dma_addr_t dscr_map; ecore_dma_addr_t sge_map; ecore_dma_addr_t rcq_map; ecore_dma_addr_t rcq_np_map; uint16_t drop_flags; uint16_t buf_sz; uint8_t fw_sb_id; uint8_t cl_qzone_id; /* valid iff ECORE_Q_FLG_TPA */ uint16_t tpa_agg_sz; uint16_t sge_buf_sz; uint8_t max_sges_pkt; uint8_t max_tpa_queues; uint8_t rss_engine_id; /* valid iff ECORE_Q_FLG_MCAST */ uint8_t mcast_engine_id; uint8_t cache_line_log; uint8_t sb_cq_index; /* valid iff BXN2X_Q_FLG_SILENT_VLAN_REM */ uint16_t silent_removal_value; uint16_t silent_removal_mask; }; struct ecore_txq_setup_params { /* dma */ ecore_dma_addr_t dscr_map; uint8_t fw_sb_id; uint8_t sb_cq_index; uint8_t cos; /* valid iff ECORE_Q_FLG_COS */ uint16_t traffic_type; /* equals to the leading rss client id, used for TX classification*/ uint8_t tss_leading_cl_id; /* valid iff ECORE_Q_FLG_DEF_VLAN */ uint16_t default_vlan; }; struct ecore_queue_setup_params { struct ecore_general_setup_params gen_params; struct ecore_txq_setup_params txq_params; struct ecore_rxq_setup_params rxq_params; struct rxq_pause_params pause_params; unsigned long flags; }; struct ecore_queue_setup_tx_only_params { struct ecore_general_setup_params gen_params; struct ecore_txq_setup_params txq_params; unsigned long flags; /* index within the tx_only cids of this queue object */ uint8_t cid_index; }; struct ecore_queue_state_params { struct ecore_queue_sp_obj *q_obj; /* Current command */ enum ecore_queue_cmd cmd; /* may have RAMROD_COMP_WAIT set only */ unsigned long ramrod_flags; /* Params according to the current command */ union { struct ecore_queue_update_params update; struct ecore_queue_update_tpa_params update_tpa; struct ecore_queue_setup_params setup; struct ecore_queue_init_params init; struct ecore_queue_setup_tx_only_params tx_only; struct ecore_queue_terminate_params terminate; struct ecore_queue_cfc_del_params cfc_del; } params; }; struct ecore_viflist_params { uint8_t echo_res; uint8_t func_bit_map_res; }; struct ecore_queue_sp_obj { uint32_t cids[ECORE_MULTI_TX_COS]; uint8_t cl_id; uint8_t func_id; /* number of traffic classes supported by queue. * The primary connection of the queue supports the first traffic * class. Any further traffic class is supported by a tx-only * connection. * * Therefore max_cos is also a number of valid entries in the cids * array. */ uint8_t max_cos; uint8_t num_tx_only, next_tx_only; enum ecore_q_state state, next_state; /* bits from enum ecore_q_type */ unsigned long type; /* ECORE_Q_CMD_XX bits. This object implements "one * pending" paradigm but for debug and tracing purposes it's * more convenient to have different bits for different * commands. */ unsigned long pending; /* Buffer to use as a ramrod data and its mapping */ void *rdata; ecore_dma_addr_t rdata_mapping; /** * Performs one state change according to the given parameters. * * @return 0 in case of success and negative value otherwise. */ int (*send_cmd)(struct bxe_softc *sc, struct ecore_queue_state_params *params); /** * Sets the pending bit according to the requested transition. */ int (*set_pending)(struct ecore_queue_sp_obj *o, struct ecore_queue_state_params *params); /** * Checks that the requested state transition is legal. */ int (*check_transition)(struct bxe_softc *sc, struct ecore_queue_sp_obj *o, struct ecore_queue_state_params *params); /** * Completes the pending command. */ int (*complete_cmd)(struct bxe_softc *sc, struct ecore_queue_sp_obj *o, enum ecore_queue_cmd); int (*wait_comp)(struct bxe_softc *sc, struct ecore_queue_sp_obj *o, enum ecore_queue_cmd cmd); }; /********************** Function state update *********************************/ /* UPDATE command options */ enum { ECORE_F_UPDATE_TX_SWITCH_SUSPEND_CHNG, ECORE_F_UPDATE_TX_SWITCH_SUSPEND, ECORE_F_UPDATE_SD_VLAN_TAG_CHNG, ECORE_F_UPDATE_SD_VLAN_ETH_TYPE_CHNG, ECORE_F_UPDATE_VLAN_FORCE_PRIO_CHNG, ECORE_F_UPDATE_VLAN_FORCE_PRIO_FLAG, ECORE_F_UPDATE_TUNNEL_CFG_CHNG, ECORE_F_UPDATE_TUNNEL_INNER_CLSS_L2GRE, ECORE_F_UPDATE_TUNNEL_INNER_CLSS_VXLAN, ECORE_F_UPDATE_TUNNEL_INNER_CLSS_L2GENEVE, ECORE_F_UPDATE_TUNNEL_INNER_RSS, }; /* Allowed Function states */ enum ecore_func_state { ECORE_F_STATE_RESET, ECORE_F_STATE_INITIALIZED, ECORE_F_STATE_STARTED, ECORE_F_STATE_TX_STOPPED, ECORE_F_STATE_MAX, }; /* Allowed Function commands */ enum ecore_func_cmd { ECORE_F_CMD_HW_INIT, ECORE_F_CMD_START, ECORE_F_CMD_STOP, ECORE_F_CMD_HW_RESET, ECORE_F_CMD_AFEX_UPDATE, ECORE_F_CMD_AFEX_VIFLISTS, ECORE_F_CMD_TX_STOP, ECORE_F_CMD_TX_START, ECORE_F_CMD_SWITCH_UPDATE, ECORE_F_CMD_SET_TIMESYNC, ECORE_F_CMD_MAX, }; struct ecore_func_hw_init_params { /* A load phase returned by MCP. * * May be: * FW_MSG_CODE_DRV_LOAD_COMMON_CHIP * FW_MSG_CODE_DRV_LOAD_COMMON * FW_MSG_CODE_DRV_LOAD_PORT * FW_MSG_CODE_DRV_LOAD_FUNCTION */ uint32_t load_phase; }; struct ecore_func_hw_reset_params { /* A load phase returned by MCP. * * May be: * FW_MSG_CODE_DRV_LOAD_COMMON_CHIP * FW_MSG_CODE_DRV_LOAD_COMMON * FW_MSG_CODE_DRV_LOAD_PORT * FW_MSG_CODE_DRV_LOAD_FUNCTION */ uint32_t reset_phase; }; struct ecore_func_start_params { /* Multi Function mode: * - Single Function * - Switch Dependent * - Switch Independent */ uint16_t mf_mode; /* Switch Dependent mode outer VLAN tag */ uint16_t sd_vlan_tag; /* Function cos mode */ uint8_t network_cos_mode; /* UDP dest port for VXLAN */ uint16_t vxlan_dst_port; /* UDP dest port for Geneve */ uint16_t geneve_dst_port; /* Enable inner Rx classifications for L2GRE packets */ uint8_t inner_clss_l2gre; /* Enable inner Rx classifications for L2-Geneve packets */ uint8_t inner_clss_l2geneve; /* Enable inner Rx classification for vxlan packets */ uint8_t inner_clss_vxlan; /* Enable RSS according to inner header */ uint8_t inner_rss; /** Allows accepting of packets failing MF classification, possibly * only matching a given ethertype */ uint8_t class_fail; uint16_t class_fail_ethtype; /* Override priority of output packets */ uint8_t sd_vlan_force_pri; uint8_t sd_vlan_force_pri_val; /* Replace vlan's ethertype */ uint16_t sd_vlan_eth_type; /* Prevent inner vlans from being added by FW */ uint8_t no_added_tags; /* Inner-to-Outer vlan priority mapping */ uint8_t c2s_pri[MAX_VLAN_PRIORITIES]; uint8_t c2s_pri_default; uint8_t c2s_pri_valid; }; struct ecore_func_switch_update_params { unsigned long changes; /* ECORE_F_UPDATE_XX bits */ uint16_t vlan; uint16_t vlan_eth_type; uint8_t vlan_force_prio; uint16_t vxlan_dst_port; uint16_t geneve_dst_port; }; struct ecore_func_afex_update_params { uint16_t vif_id; uint16_t afex_default_vlan; uint8_t allowed_priorities; }; struct ecore_func_afex_viflists_params { uint16_t vif_list_index; uint8_t func_bit_map; uint8_t afex_vif_list_command; uint8_t func_to_clear; }; struct ecore_func_tx_start_params { struct priority_cos traffic_type_to_priority_cos[MAX_TRAFFIC_TYPES]; uint8_t dcb_enabled; uint8_t dcb_version; uint8_t dont_add_pri_0; uint8_t dcb_outer_pri[MAX_TRAFFIC_TYPES]; }; struct ecore_func_set_timesync_params { /* Reset, set or keep the current drift value */ uint8_t drift_adjust_cmd; /* Dec, inc or keep the current offset */ uint8_t offset_cmd; /* Drift value direction */ uint8_t add_sub_drift_adjust_value; /* Drift, period and offset values to be used according to the commands * above. */ uint8_t drift_adjust_value; uint32_t drift_adjust_period; uint64_t offset_delta; }; struct ecore_func_state_params { struct ecore_func_sp_obj *f_obj; /* Current command */ enum ecore_func_cmd cmd; /* may have RAMROD_COMP_WAIT set only */ unsigned long ramrod_flags; /* Params according to the current command */ union { struct ecore_func_hw_init_params hw_init; struct ecore_func_hw_reset_params hw_reset; struct ecore_func_start_params start; struct ecore_func_switch_update_params switch_update; struct ecore_func_afex_update_params afex_update; struct ecore_func_afex_viflists_params afex_viflists; struct ecore_func_tx_start_params tx_start; struct ecore_func_set_timesync_params set_timesync; } params; }; struct ecore_func_sp_drv_ops { /* Init tool + runtime initialization: * - Common Chip * - Common (per Path) * - Port * - Function phases */ int (*init_hw_cmn_chip)(struct bxe_softc *sc); int (*init_hw_cmn)(struct bxe_softc *sc); int (*init_hw_port)(struct bxe_softc *sc); int (*init_hw_func)(struct bxe_softc *sc); /* Reset Function HW: Common, Port, Function phases. */ void (*reset_hw_cmn)(struct bxe_softc *sc); void (*reset_hw_port)(struct bxe_softc *sc); void (*reset_hw_func)(struct bxe_softc *sc); /* Init/Free GUNZIP resources */ int (*gunzip_init)(struct bxe_softc *sc); void (*gunzip_end)(struct bxe_softc *sc); /* Prepare/Release FW resources */ int (*init_fw)(struct bxe_softc *sc); void (*release_fw)(struct bxe_softc *sc); }; struct ecore_func_sp_obj { enum ecore_func_state state, next_state; /* ECORE_FUNC_CMD_XX bits. This object implements "one * pending" paradigm but for debug and tracing purposes it's * more convenient to have different bits for different * commands. */ unsigned long pending; /* Buffer to use as a ramrod data and its mapping */ void *rdata; ecore_dma_addr_t rdata_mapping; /* Buffer to use as a afex ramrod data and its mapping. * This can't be same rdata as above because afex ramrod requests * can arrive to the object in parallel to other ramrod requests. */ void *afex_rdata; ecore_dma_addr_t afex_rdata_mapping; /* this mutex validates that when pending flag is taken, the next * ramrod to be sent will be the one set the pending bit */ ECORE_MUTEX one_pending_mutex; /* Driver interface */ struct ecore_func_sp_drv_ops *drv; /** * Performs one state change according to the given parameters. * * @return 0 in case of success and negative value otherwise. */ int (*send_cmd)(struct bxe_softc *sc, struct ecore_func_state_params *params); /** * Checks that the requested state transition is legal. */ int (*check_transition)(struct bxe_softc *sc, struct ecore_func_sp_obj *o, struct ecore_func_state_params *params); /** * Completes the pending command. */ int (*complete_cmd)(struct bxe_softc *sc, struct ecore_func_sp_obj *o, enum ecore_func_cmd cmd); int (*wait_comp)(struct bxe_softc *sc, struct ecore_func_sp_obj *o, enum ecore_func_cmd cmd); }; /********************** Interfaces ********************************************/ /* Queueable objects set */ union ecore_qable_obj { struct ecore_vlan_mac_obj vlan_mac; }; /************** Function state update *********/ void ecore_init_func_obj(struct bxe_softc *sc, struct ecore_func_sp_obj *obj, void *rdata, ecore_dma_addr_t rdata_mapping, void *afex_rdata, ecore_dma_addr_t afex_rdata_mapping, struct ecore_func_sp_drv_ops *drv_iface); int ecore_func_state_change(struct bxe_softc *sc, struct ecore_func_state_params *params); enum ecore_func_state ecore_func_get_state(struct bxe_softc *sc, struct ecore_func_sp_obj *o); /******************* Queue State **************/ void ecore_init_queue_obj(struct bxe_softc *sc, struct ecore_queue_sp_obj *obj, uint8_t cl_id, uint32_t *cids, uint8_t cid_cnt, uint8_t func_id, void *rdata, ecore_dma_addr_t rdata_mapping, unsigned long type); int ecore_queue_state_change(struct bxe_softc *sc, struct ecore_queue_state_params *params); int ecore_get_q_logical_state(struct bxe_softc *sc, struct ecore_queue_sp_obj *obj); /********************* VLAN-MAC ****************/ void ecore_init_mac_obj(struct bxe_softc *sc, struct ecore_vlan_mac_obj *mac_obj, uint8_t cl_id, uint32_t cid, uint8_t func_id, void *rdata, ecore_dma_addr_t rdata_mapping, int state, unsigned long *pstate, ecore_obj_type type, struct ecore_credit_pool_obj *macs_pool); void ecore_init_vlan_obj(struct bxe_softc *sc, struct ecore_vlan_mac_obj *vlan_obj, uint8_t cl_id, uint32_t cid, uint8_t func_id, void *rdata, ecore_dma_addr_t rdata_mapping, int state, unsigned long *pstate, ecore_obj_type type, struct ecore_credit_pool_obj *vlans_pool); void ecore_init_vlan_mac_obj(struct bxe_softc *sc, struct ecore_vlan_mac_obj *vlan_mac_obj, uint8_t cl_id, uint32_t cid, uint8_t func_id, void *rdata, ecore_dma_addr_t rdata_mapping, int state, unsigned long *pstate, ecore_obj_type type, struct ecore_credit_pool_obj *macs_pool, struct ecore_credit_pool_obj *vlans_pool); void ecore_init_vxlan_fltr_obj(struct bxe_softc *sc, struct ecore_vlan_mac_obj *vlan_mac_obj, uint8_t cl_id, uint32_t cid, uint8_t func_id, void *rdata, ecore_dma_addr_t rdata_mapping, int state, unsigned long *pstate, ecore_obj_type type, struct ecore_credit_pool_obj *macs_pool, struct ecore_credit_pool_obj *vlans_pool); int ecore_vlan_mac_h_read_lock(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o); void ecore_vlan_mac_h_read_unlock(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o); int ecore_vlan_mac_h_write_lock(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o); void ecore_vlan_mac_h_write_unlock(struct bxe_softc *sc, struct ecore_vlan_mac_obj *o); int ecore_config_vlan_mac(struct bxe_softc *sc, struct ecore_vlan_mac_ramrod_params *p); int ecore_vlan_mac_move(struct bxe_softc *sc, struct ecore_vlan_mac_ramrod_params *p, struct ecore_vlan_mac_obj *dest_o); /********************* RX MODE ****************/ void ecore_init_rx_mode_obj(struct bxe_softc *sc, struct ecore_rx_mode_obj *o); /** * ecore_config_rx_mode - Send and RX_MODE ramrod according to the provided parameters. * * @p: Command parameters * * Return: 0 - if operation was successful and there is no pending completions, * positive number - if there are pending completions, * negative - if there were errors */ int ecore_config_rx_mode(struct bxe_softc *sc, struct ecore_rx_mode_ramrod_params *p); /****************** MULTICASTS ****************/ void ecore_init_mcast_obj(struct bxe_softc *sc, struct ecore_mcast_obj *mcast_obj, uint8_t mcast_cl_id, uint32_t mcast_cid, uint8_t func_id, uint8_t engine_id, void *rdata, ecore_dma_addr_t rdata_mapping, int state, unsigned long *pstate, ecore_obj_type type); /** * ecore_config_mcast - Configure multicast MACs list. * * @cmd: command to execute: BNX2X_MCAST_CMD_X * * May configure a new list * provided in p->mcast_list (ECORE_MCAST_CMD_ADD), clean up * (ECORE_MCAST_CMD_DEL) or restore (ECORE_MCAST_CMD_RESTORE) a current * configuration, continue to execute the pending commands * (ECORE_MCAST_CMD_CONT). * * If previous command is still pending or if number of MACs to * configure is more that maximum number of MACs in one command, * the current command will be enqueued to the tail of the * pending commands list. * * Return: 0 is operation was successful and there are no pending completions, * negative if there were errors, positive if there are pending * completions. */ int ecore_config_mcast(struct bxe_softc *sc, struct ecore_mcast_ramrod_params *p, enum ecore_mcast_cmd cmd); /****************** CREDIT POOL ****************/ void ecore_init_mac_credit_pool(struct bxe_softc *sc, struct ecore_credit_pool_obj *p, uint8_t func_id, uint8_t func_num); void ecore_init_vlan_credit_pool(struct bxe_softc *sc, struct ecore_credit_pool_obj *p, uint8_t func_id, uint8_t func_num); void ecore_init_credit_pool(struct ecore_credit_pool_obj *p, int base, int credit); /****************** RSS CONFIGURATION ****************/ void ecore_init_rss_config_obj(struct bxe_softc *sc, struct ecore_rss_config_obj *rss_obj, uint8_t cl_id, uint32_t cid, uint8_t func_id, uint8_t engine_id, void *rdata, ecore_dma_addr_t rdata_mapping, int state, unsigned long *pstate, ecore_obj_type type); /** * ecore_config_rss - Updates RSS configuration according to provided parameters * * Return: 0 in case of success */ int ecore_config_rss(struct bxe_softc *sc, struct ecore_config_rss_params *p); /** * ecore_get_rss_ind_table - Return the current ind_table configuration. * * @ind_table: buffer to fill with the current indirection * table content. Should be at least * T_ETH_INDIRECTION_TABLE_SIZE bytes long. */ void ecore_get_rss_ind_table(struct ecore_rss_config_obj *rss_obj, uint8_t *ind_table); #define PF_MAC_CREDIT_E2(sc, func_num) \ ((MAX_MAC_CREDIT_E2 - GET_NUM_VFS_PER_PATH(sc) * VF_MAC_CREDIT_CNT) / \ func_num + GET_NUM_VFS_PER_PF(sc) * VF_MAC_CREDIT_CNT) #define PF_VLAN_CREDIT_E2(sc, func_num) \ ((MAX_MAC_CREDIT_E2 - GET_NUM_VFS_PER_PATH(sc) * VF_VLAN_CREDIT_CNT) / \ func_num + GET_NUM_VFS_PER_PF(sc) * VF_VLAN_CREDIT_CNT) #endif /* ECORE_SP_H */ diff --git a/sys/dev/cxgbe/osdep.h b/sys/dev/cxgbe/osdep.h index 39675339dd2c..b8692692fd43 100644 --- a/sys/dev/cxgbe/osdep.h +++ b/sys/dev/cxgbe/osdep.h @@ -1,160 +1,152 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2010 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #ifndef __CXGBE_OSDEP_H_ #define __CXGBE_OSDEP_H_ #include #include #include #include #include #include #include #include #define CH_ERR(adap, fmt, ...) log(LOG_ERR, "%s: " fmt, \ device_get_nameunit((adap)->dev), ##__VA_ARGS__) #define CH_WARN(adap, fmt, ...) log(LOG_WARNING, "%s: " fmt, \ device_get_nameunit((adap)->dev), ##__VA_ARGS__) #define CH_ALERT(adap, fmt, ...) log(LOG_ALERT, "%s: " fmt, \ device_get_nameunit((adap)->dev), ##__VA_ARGS__) #define CH_WARN_RATELIMIT(adap, fmt, ...) log(LOG_WARNING, "%s: " fmt, \ device_get_nameunit((adap)->dev), ##__VA_ARGS__) #ifndef LINUX_TYPES_DEFINED typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef uint8_t __u8; typedef uint16_t __u16; typedef uint32_t __u32; typedef uint64_t __u64; typedef uint8_t __be8; typedef uint16_t __be16; typedef uint32_t __be32; typedef uint64_t __be64; #if BYTE_ORDER == BIG_ENDIAN #define __BIG_ENDIAN_BITFIELD #elif BYTE_ORDER == LITTLE_ENDIAN #define __LITTLE_ENDIAN_BITFIELD #else #error "Must set BYTE_ORDER" #endif #ifndef __bool_true_false_are_defined typedef boolean_t bool; #define false FALSE #define true TRUE #endif #define __force #define mdelay(x) DELAY((x) * 1000) #define udelay(x) DELAY(x) #define simple_strtoul strtoul #define DIV_ROUND_UP(x, y) howmany(x, y) #define ARRAY_SIZE(x) nitems(x) #define container_of(p, s, f) ((s *)(((uint8_t *)(p)) - offsetof(s, f))) #define swab16(x) bswap16(x) #define swab32(x) bswap32(x) #define swab64(x) bswap64(x) #define le16_to_cpu(x) le16toh(x) #define le32_to_cpu(x) le32toh(x) #define le64_to_cpu(x) le64toh(x) #define cpu_to_le16(x) htole16(x) #define cpu_to_le32(x) htole32(x) #define cpu_to_le64(x) htole64(x) #define be16_to_cpu(x) be16toh(x) #define be32_to_cpu(x) be32toh(x) #define be64_to_cpu(x) be64toh(x) #define cpu_to_be16(x) htobe16(x) #define cpu_to_be32(x) htobe32(x) #define cpu_to_be64(x) htobe64(x) #define DUPLEX_HALF 0 #define DUPLEX_FULL 1 #define AUTONEG_AUTO (-1) #define AUTONEG_DISABLE 0 #define AUTONEG_ENABLE 1 #define PCI_DEVICE_ID PCIR_DEVICE #define PCI_CAP_ID_VPD PCIY_VPD #define PCI_VPD_ADDR PCIR_VPD_ADDR #define PCI_VPD_ADDR_F 0x8000 #define PCI_VPD_DATA PCIR_VPD_DATA #define PCI_CAP_ID_EXP PCIY_EXPRESS #define PCI_EXP_DEVCTL PCIER_DEVICE_CTL #define PCI_EXP_DEVCTL_PAYLOAD PCIEM_CTL_MAX_PAYLOAD #define PCI_EXP_DEVCTL_READRQ PCIEM_CTL_MAX_READ_REQUEST #define PCI_EXP_LNKCTL PCIER_LINK_CTL #define PCI_EXP_LNKSTA PCIER_LINK_STA #define PCI_EXP_LNKSTA_CLS PCIEM_LINK_STA_SPEED #define PCI_EXP_LNKSTA_NLW PCIEM_LINK_STA_WIDTH #define PCI_EXP_DEVCTL2 PCIER_DEVICE_CTL2 -static inline int -ilog2(long x) -{ - KASSERT(x > 0 && powerof2(x), ("%s: invalid arg %ld", __func__, x)); - - return (flsl(x) - 1); -} - static inline char * strstrip(char *s) { char c, *r, *trim_at; while (isspace(*s)) s++; r = trim_at = s; while ((c = *s++) != 0) { if (!isspace(c)) trim_at = s; } *trim_at = 0; return (r); } #endif /* LINUX_TYPES_DEFINED */ #endif diff --git a/sys/dev/enetc/enetc_hw.h b/sys/dev/enetc/enetc_hw.h index 507c4657453d..323d5529f50a 100644 --- a/sys/dev/enetc/enetc_hw.h +++ b/sys/dev/enetc/enetc_hw.h @@ -1,755 +1,754 @@ /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ /* Copyright 2017-2019 NXP */ #ifndef _ENETC_HW_H_ #define _ENETC_HW_H_ #include #include #define BIT(x) (1UL << (x)) #define GENMASK(h, l) (((~0U) - (1U << (l)) + 1) & (~0U >> (32 - 1 - (h)))) -#define ilog2(x) (flsl(x) - 1) #define PCI_VENDOR_FREESCALE 0x1957 /* ENETC device IDs */ #define ENETC_DEV_ID_PF 0xe100 #define ENETC_DEV_ID_VF 0xef00 #define ENETC_DEV_ID_PTP 0xee02 /* ENETC register block BAR */ #define ENETC_BAR_REGS 0 /** SI regs, offset: 0h */ #define ENETC_SIMR 0 #define ENETC_SIMR_EN BIT(31) #define ENETC_SIMR_DRXG BIT(16) #define ENETC_SIMR_RSSE BIT(0) #define ENETC_SICTR0 0x18 #define ENETC_SICTR1 0x1c #define ENETC_SIPCAPR0 0x20 #define ENETC_SIPCAPR0_QBV BIT(4) #define ENETC_SIPCAPR0_PSFP BIT(9) #define ENETC_SIPCAPR0_RSS BIT(8) #define ENETC_SIPCAPR1 0x24 #define ENETC_SITGTGR 0x30 #define ENETC_SIRBGCR 0x38 /* cache attribute registers for transactions initiated by ENETC */ #define ENETC_SICAR0 0x40 #define ENETC_SICAR1 0x44 #define ENETC_SICAR2 0x48 /* rd snoop, no alloc * wr snoop, no alloc, partial cache line update for BDs and full cache line * update for data */ #define ENETC_SICAR_RD_COHERENT 0x2b2b0000 #define ENETC_SICAR_WR_COHERENT 0x00006727 #define ENETC_SICAR_MSI 0x00300030 /* rd/wr device, no snoop, no alloc */ #define ENETC_SIPMAR0 0x80 #define ENETC_SIPMAR1 0x84 /* VF-PF Message passing */ #define ENETC_DEFAULT_MSG_SIZE 1024 /* and max size */ #define ENETC_PSIMSGRR 0x204 #define ENETC_PSIMSGRR_MR_MASK GENMASK(2, 1) #define ENETC_PSIMSGRR_MR(n) BIT((n) + 1) /* n = VSI index */ #define ENETC_PSIVMSGRCVAR0(n) (0x210 + (n) * 0x8) /* n = VSI index */ #define ENETC_PSIVMSGRCVAR1(n) (0x214 + (n) * 0x8) #define ENETC_VSIMSGSR 0x204 /* RO */ #define ENETC_VSIMSGSR_MB BIT(0) #define ENETC_VSIMSGSR_MS BIT(1) #define ENETC_VSIMSGSNDAR0 0x210 #define ENETC_VSIMSGSNDAR1 0x214 #define ENETC_SIMSGSR_SET_MC(val) ((val) << 16) #define ENETC_SIMSGSR_GET_MC(val) ((val) >> 16) /* SI statistics */ #define ENETC_SIROCT 0x300 #define ENETC_SIRFRM 0x308 #define ENETC_SIRUCA 0x310 #define ENETC_SIRMCA 0x318 #define ENETC_SITOCT 0x320 #define ENETC_SITFRM 0x328 #define ENETC_SITUCA 0x330 #define ENETC_SITMCA 0x338 #define ENETC_RBDCR(n) (0x8180 + (n) * 0x200) /* Control BDR regs */ #define ENETC_SICBDRMR 0x800 #define ENETC_SICBDRMR_EN BIT(31) #define ENETC_SICBDRSR 0x804 /* RO */ #define ENETC_SICBDRBAR0 0x810 #define ENETC_SICBDRBAR1 0x814 #define ENETC_SICBDRPIR 0x818 #define ENETC_SICBDRCIR 0x81c #define ENETC_SICBDRLENR 0x820 #define ENETC_SICAPR0 0x900 #define ENETC_SICAPR1 0x904 #define ENETC_PSIIER 0xa00 #define ENETC_PSIIER_MR_MASK GENMASK(2, 1) #define ENETC_PSIIDR 0xa08 #define ENETC_SITXIDR 0xa18 #define ENETC_SIRXIDR 0xa28 #define ENETC_SIMSIVR 0xa30 #define ENETC_SIMSITRV(n) (0xB00 + (n) * 0x4) #define ENETC_SIMSIRRV(n) (0xB80 + (n) * 0x4) #define ENETC_SIUEFDCR 0xe28 #define ENETC_SIRFSCAPR 0x1200 #define ENETC_SIRFSCAPR_GET_NUM_RFS(val) ((val) & 0x7f) #define ENETC_SIRSSCAPR 0x1600 #define ENETC_SIRSSCAPR_GET_NUM_RSS(val) (BIT((val) & 0xf) * 32) /** SI BDR sub-blocks, n = 0..7 */ enum enetc_bdr_type {TX, RX}; #define ENETC_BDR_OFF(i) ((i) * 0x200) #define ENETC_BDR(t, i, r) (0x8000 + (t) * 0x100 + ENETC_BDR_OFF(i) + (r)) /* RX BDR reg offsets */ #define ENETC_RBMR 0 #define ENETC_RBMR_AL BIT(0) #define ENETC_RBMR_BDS BIT(2) #define ENETC_RBMR_VTE BIT(5) #define ENETC_RBMR_EN BIT(31) #define ENETC_RBSR 0x4 #define ENETC_RBBSR 0x8 #define ENETC_RBCIR 0xc #define ENETC_RBBAR0 0x10 #define ENETC_RBBAR1 0x14 #define ENETC_RBPIR 0x18 #define ENETC_RBLENR 0x20 #define ENETC_RBIER 0xa0 #define ENETC_RBIER_RXTIE BIT(0) #define ENETC_RBIDR 0xa4 #define ENETC_RBICR0 0xa8 #define ENETC_RBICR0_ICEN BIT(31) #define ENETC_RBICR0_ICPT_MASK 0x1ff #define ENETC_RBICR0_SET_ICPT(n) ((n) & ENETC_RBICR0_ICPT_MASK) #define ENETC_RBICR1 0xac /* TX BDR reg offsets */ #define ENETC_TBMR 0 #define ENETC_TBSR_BUSY BIT(0) #define ENETC_TBMR_VIH BIT(9) #define ENETC_TBMR_PRIO_MASK GENMASK(2, 0) #define ENETC_TBMR_SET_PRIO(val) ((val) & ENETC_TBMR_PRIO_MASK) #define ENETC_TBMR_EN BIT(31) #define ENETC_TBSR 0x4 #define ENETC_TBBAR0 0x10 #define ENETC_TBBAR1 0x14 #define ENETC_TBPIR 0x18 #define ENETC_TBCIR 0x1c #define ENETC_TBCIR_IDX_MASK 0xffff #define ENETC_TBLENR 0x20 #define ENETC_TBIER 0xa0 #define ENETC_TBIER_TXT BIT(0) #define ENETC_TBIER_TXF BIT(1) #define ENETC_TBIDR 0xa4 #define ENETC_TBICR0 0xa8 #define ENETC_TBICR0_ICEN BIT(31) #define ENETC_TBICR0_ICPT_MASK 0xf #define ENETC_TBICR0_SET_ICPT(n) ((ilog2(n) + 1) & ENETC_TBICR0_ICPT_MASK) #define ENETC_TBICR1 0xac #define ENETC_RTBLENR_LEN(n) ((n) & ~0x7) /* Port regs, offset: 1_0000h */ #define ENETC_PORT_BASE 0x10000 #define ENETC_PMR 0x0000 #define ENETC_PMR_SI0EN BIT(16) #define ENETC_PMR_EN GENMASK(18, 16) #define ENETC_PMR_PSPEED_MASK GENMASK(11, 8) #define ENETC_PMR_PSPEED_10M 0 #define ENETC_PMR_PSPEED_100M BIT(8) #define ENETC_PMR_PSPEED_1000M BIT(9) #define ENETC_PMR_PSPEED_2500M BIT(10) #define ENETC_PSR 0x0004 /* RO */ #define ENETC_PSIPMR 0x0018 #define ENETC_PSIPMR_SET_UP(n) BIT(n) /* n = SI index */ #define ENETC_PSIPMR_SET_MP(n) BIT((n) + 16) #define ENETC_PSIPVMR 0x001c #define ENETC_VLAN_PROMISC_MAP_ALL 0x7 #define ENETC_PSIPVMR_SET_VP(simap) ((simap) & 0x7) #define ENETC_PSIPVMR_SET_VUTA(simap) (((simap) & 0x7) << 16) #define ENETC_PSIPMAR0(n) (0x0100 + (n) * 0x8) /* n = SI index */ #define ENETC_PSIPMAR1(n) (0x0104 + (n) * 0x8) #define ENETC_PVCLCTR 0x0208 #define ENETC_PCVLANR1 0x0210 #define ENETC_PCVLANR2 0x0214 #define ENETC_VLAN_TYPE_C BIT(0) #define ENETC_VLAN_TYPE_S BIT(1) #define ENETC_PVCLCTR_OVTPIDL(bmp) ((bmp) & 0xff) /* VLAN_TYPE */ #define ENETC_PSIVLANR(n) (0x0240 + (n) * 4) /* n = SI index */ #define ENETC_PSIVLAN_EN BIT(31) #define ENETC_PSIVLAN_SET_QOS(val) ((uint32_t)(val) << 12) #define ENETC_PTXMBAR 0x0608 #define ENETC_PCAPR0 0x0900 #define ENETC_PCAPR0_RXBDR(val) ((val) >> 24) #define ENETC_PCAPR0_TXBDR(val) (((val) >> 16) & 0xff) #define ENETC_PCAPR1 0x0904 #define ENETC_PSICFGR0(n) (0x0940 + (n) * 0xc) /* n = SI index */ #define ENETC_PSICFGR0_SET_TXBDR(val) ((val) & 0xff) #define ENETC_PSICFGR0_SET_RXBDR(val) (((val) & 0xff) << 16) #define ENETC_PSICFGR0_VTE BIT(12) #define ENETC_PSICFGR0_SIVIE BIT(14) #define ENETC_PSICFGR0_ASE BIT(15) #define ENETC_PSICFGR0_SIVC(bmp) (((bmp) & 0xff) << 24) /* VLAN_TYPE */ #define ENETC_PTCCBSR0(n) (0x1110 + (n) * 8) /* n = 0 to 7*/ #define ENETC_CBSE BIT(31) #define ENETC_CBS_BW_MASK GENMASK(6, 0) #define ENETC_PTCCBSR1(n) (0x1114 + (n) * 8) /* n = 0 to 7*/ #define ENETC_RSSHASH_KEY_SIZE 40 #define ENETC_PRSSCAPR 0x1404 #define ENETC_PRSSCAPR_GET_NUM_RSS(val) (BIT((val) & 0xf) * 32) #define ENETC_PRSSK(n) (0x1410 + (n) * 4) /* n = [0..9] */ #define ENETC_PSIVLANFMR 0x1700 #define ENETC_PSIVLANFMR_VS BIT(0) #define ENETC_PRFSMR 0x1800 #define ENETC_PRFSMR_RFSE BIT(31) #define ENETC_PRFSCAPR 0x1804 #define ENETC_PRFSCAPR_GET_NUM_RFS(val) ((((val) & 0xf) + 1) * 16) #define ENETC_PSIRFSCFGR(n) (0x1814 + (n) * 4) /* n = SI index */ #define ENETC_PFPMR 0x1900 #define ENETC_PFPMR_PMACE BIT(1) #define ENETC_PFPMR_MWLM BIT(0) #define ENETC_EMDIO_BASE 0x1c00 #define ENETC_PSIUMHFR0(n, err) (((err) ? 0x1d08 : 0x1d00) + (n) * 0x10) #define ENETC_PSIUMHFR1(n) (0x1d04 + (n) * 0x10) #define ENETC_PSIMMHFR0(n, err) (((err) ? 0x1d00 : 0x1d08) + (n) * 0x10) #define ENETC_PSIMMHFR1(n) (0x1d0c + (n) * 0x10) #define ENETC_PSIVHFR0(n) (0x1e00 + (n) * 8) /* n = SI index */ #define ENETC_PSIVHFR1(n) (0x1e04 + (n) * 8) /* n = SI index */ #define ENETC_MMCSR 0x1f00 #define ENETC_MMCSR_ME BIT(16) #define ENETC_PTCMSDUR(n) (0x2020 + (n) * 4) /* n = TC index [0..7] */ #define ENETC_PAR_PORT_CFG 0x3050 #define ENETC_PAR_PORT_L4CD BIT(0) #define ENETC_PAR_PORT_L3CD BIT(1) #define ENETC_PM0_CMD_CFG 0x8008 #define ENETC_PM1_CMD_CFG 0x9008 #define ENETC_PM0_TX_EN BIT(0) #define ENETC_PM0_RX_EN BIT(1) #define ENETC_PM0_PROMISC BIT(4) #define ENETC_PM0_CMD_XGLP BIT(10) #define ENETC_PM0_CMD_TXP BIT(11) #define ENETC_PM0_CMD_PHY_TX_EN BIT(15) #define ENETC_PM0_CMD_SFD BIT(21) #define ENETC_PM0_MAXFRM 0x8014 #define ENETC_SET_TX_MTU(val) ((val) << 16) #define ENETC_SET_MAXFRM(val) ((val) & 0xffff) #define ENETC_PM0_RX_FIFO 0x801c #define ENETC_PM0_RX_FIFO_VAL 1 #define ENETC_PM_IMDIO_BASE 0x8030 #define ENETC_PM0_IF_MODE 0x8300 #define ENETC_PM0_IFM_RG BIT(2) #define ENETC_PM0_IFM_RLP (BIT(5) | BIT(11)) #define ENETC_PM0_IFM_EN_AUTO BIT(15) #define ENETC_PM0_IFM_SSP_MASK GENMASK(14, 13) #define ENETC_PM0_IFM_SSP_1000 (2 << 13) #define ENETC_PM0_IFM_SSP_100 (0 << 13) #define ENETC_PM0_IFM_SSP_10 (1 << 13) #define ENETC_PM0_IFM_FULL_DPX BIT(12) #define ENETC_PM0_IFM_IFMODE_MASK GENMASK(1, 0) #define ENETC_PM0_IFM_IFMODE_XGMII 0 #define ENETC_PM0_IFM_IFMODE_GMII 2 #define ENETC_PSIDCAPR 0x1b08 #define ENETC_PSIDCAPR_MSK GENMASK(15, 0) #define ENETC_PSFCAPR 0x1b18 #define ENETC_PSFCAPR_MSK GENMASK(15, 0) #define ENETC_PSGCAPR 0x1b28 #define ENETC_PSGCAPR_GCL_MSK GENMASK(18, 16) #define ENETC_PSGCAPR_SGIT_MSK GENMASK(15, 0) #define ENETC_PFMCAPR 0x1b38 #define ENETC_PFMCAPR_MSK GENMASK(15, 0) /* MAC counters */ #define ENETC_PM0_REOCT 0x8100 #define ENETC_PM0_RALN 0x8110 #define ENETC_PM0_RXPF 0x8118 #define ENETC_PM0_RFRM 0x8120 #define ENETC_PM0_RFCS 0x8128 #define ENETC_PM0_RVLAN 0x8130 #define ENETC_PM0_RERR 0x8138 #define ENETC_PM0_RUCA 0x8140 #define ENETC_PM0_RMCA 0x8148 #define ENETC_PM0_RBCA 0x8150 #define ENETC_PM0_RDRP 0x8158 #define ENETC_PM0_RPKT 0x8160 #define ENETC_PM0_RUND 0x8168 #define ENETC_PM0_R64 0x8170 #define ENETC_PM0_R127 0x8178 #define ENETC_PM0_R255 0x8180 #define ENETC_PM0_R511 0x8188 #define ENETC_PM0_R1023 0x8190 #define ENETC_PM0_R1522 0x8198 #define ENETC_PM0_R1523X 0x81A0 #define ENETC_PM0_ROVR 0x81A8 #define ENETC_PM0_RJBR 0x81B0 #define ENETC_PM0_RFRG 0x81B8 #define ENETC_PM0_RCNP 0x81C0 #define ENETC_PM0_RDRNTP 0x81C8 #define ENETC_PM0_TEOCT 0x8200 #define ENETC_PM0_TOCT 0x8208 #define ENETC_PM0_TCRSE 0x8210 #define ENETC_PM0_TXPF 0x8218 #define ENETC_PM0_TFRM 0x8220 #define ENETC_PM0_TFCS 0x8228 #define ENETC_PM0_TVLAN 0x8230 #define ENETC_PM0_TERR 0x8238 #define ENETC_PM0_TUCA 0x8240 #define ENETC_PM0_TMCA 0x8248 #define ENETC_PM0_TBCA 0x8250 #define ENETC_PM0_TPKT 0x8260 #define ENETC_PM0_TUND 0x8268 #define ENETC_PM0_T64 0x8270 #define ENETC_PM0_T127 0x8278 #define ENETC_PM0_T255 0x8280 #define ENETC_PM0_T511 0x8288 #define ENETC_PM0_T1023 0x8290 #define ENETC_PM0_T1522 0x8298 #define ENETC_PM0_T1523X 0x82A0 #define ENETC_PM0_TCNP 0x82C0 #define ENETC_PM0_TDFR 0x82D0 #define ENETC_PM0_TMCOL 0x82D8 #define ENETC_PM0_TSCOL 0x82E0 #define ENETC_PM0_TLCOL 0x82E8 #define ENETC_PM0_TECOL 0x82F0 /* Port counters */ #define ENETC_PICDR(n) (0x0700 + (n) * 8) /* n = [0..3] */ #define ENETC_PBFDSIR 0x0810 #define ENETC_PFDMSAPR 0x0814 #define ENETC_UFDMF 0x1680 #define ENETC_MFDMF 0x1684 #define ENETC_PUFDVFR 0x1780 #define ENETC_PMFDVFR 0x1784 #define ENETC_PBFDVFR 0x1788 /** Global regs, offset: 2_0000h */ #define ENETC_GLOBAL_BASE 0x20000 #define ENETC_G_EIPBRR0 0x0bf8 #define ENETC_G_EIPBRR1 0x0bfc #define ENETC_G_EPFBLPR(n) (0xd00 + 4 * (n)) #define ENETC_G_EPFBLPR1_XGMII 0x80000000 /* Buffer Descriptors (BD) */ union enetc_tx_bd { struct { uint64_t addr; uint16_t buf_len; uint16_t frm_len; union { struct { uint8_t reserved[3]; uint8_t flags; }; /* default layout */ uint32_t txstart; uint32_t lstatus; }; }; struct { uint32_t tstamp; uint16_t tpid; uint16_t vid; uint8_t reserved[6]; uint8_t e_flags; uint8_t flags; } ext; /* Tx BD extension */ struct { uint32_t tstamp; uint8_t reserved[10]; uint8_t status; uint8_t flags; } wb; /* writeback descriptor */ }; enum enetc_txbd_flags { ENETC_TXBD_FLAGS_RES0 = BIT(0), /* reserved */ ENETC_TXBD_FLAGS_TSE = BIT(1), ENETC_TXBD_FLAGS_W = BIT(2), ENETC_TXBD_FLAGS_RES3 = BIT(3), /* reserved */ ENETC_TXBD_FLAGS_TXSTART = BIT(4), ENETC_TXBD_FLAGS_FI = BIT(5), ENETC_TXBD_FLAGS_EX = BIT(6), ENETC_TXBD_FLAGS_F = BIT(7) }; #define ENETC_TXBD_TXSTART_MASK GENMASK(24, 0) #define ENETC_TXBD_FLAGS_OFFSET 24 static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd) { memset(txbd, 0, sizeof(*txbd)); } /* Extension flags */ #define ENETC_TXBD_E_FLAGS_VLAN_INS BIT(0) #define ENETC_TXBD_E_FLAGS_TWO_STEP_PTP BIT(2) union enetc_rx_bd { struct { uint64_t addr; uint8_t reserved[8]; } w; struct { uint16_t inet_csum; uint16_t parse_summary; uint32_t rss_hash; uint16_t buf_len; uint16_t vlan_opt; union { struct { uint16_t flags; uint16_t error; }; uint32_t lstatus; }; } r; struct { uint32_t tstamp; uint8_t reserved[12]; } ext; }; #define ENETC_RXBD_PARSER_ERROR BIT(15) #define ENETC_RXBD_LSTATUS_R BIT(30) #define ENETC_RXBD_LSTATUS_F BIT(31) #define ENETC_RXBD_ERR_MASK 0xff #define ENETC_RXBD_LSTATUS(flags) ((flags) << 16) #define ENETC_RXBD_FLAG_RSSV BIT(8) #define ENETC_RXBD_FLAG_VLAN BIT(9) #define ENETC_RXBD_FLAG_TSTMP BIT(10) #define ENETC_RXBD_FLAG_TPID GENMASK(1, 0) #define ENETC_MAC_ADDR_FILT_CNT 8 /* # of supported entries per port */ #define EMETC_MAC_ADDR_FILT_RES 3 /* # of reserved entries at the beginning */ #define ENETC_MAX_NUM_VFS 2 #define ENETC_CBD_FLAGS_SF BIT(7) /* short format */ #define ENETC_CBD_STATUS_MASK 0xf struct enetc_cmd_rfse { uint8_t smac_h[6]; uint8_t smac_m[6]; uint8_t dmac_h[6]; uint8_t dmac_m[6]; uint32_t sip_h[4]; /* Big-endian */ uint32_t sip_m[4]; /* Big-endian */ uint32_t dip_h[4]; /* Big-endian */ uint32_t dip_m[4]; /* Big-endian */ uint16_t ethtype_h; uint16_t ethtype_m; uint16_t ethtype4_h; uint16_t ethtype4_m; uint16_t sport_h; uint16_t sport_m; uint16_t dport_h; uint16_t dport_m; uint16_t vlan_h; uint16_t vlan_m; uint8_t proto_h; uint8_t proto_m; uint16_t flags; uint16_t result; uint16_t mode; }; #define ENETC_RFSE_EN BIT(15) #define ENETC_RFSE_MODE_BD 2 #define ENETC_SI_INT_IDX 0 /* base index for Rx/Tx interrupts */ #define ENETC_BDR_INT_BASE_IDX 1 /* Messaging */ /* Command completion status */ enum enetc_msg_cmd_status { ENETC_MSG_CMD_STATUS_OK, ENETC_MSG_CMD_STATUS_FAIL }; /* VSI-PSI command message types */ enum enetc_msg_cmd_type { ENETC_MSG_CMD_MNG_MAC = 1, /* manage MAC address */ ENETC_MSG_CMD_MNG_RX_MAC_FILTER,/* manage RX MAC table */ ENETC_MSG_CMD_MNG_RX_VLAN_FILTER /* manage RX VLAN table */ }; /* VSI-PSI command action types */ enum enetc_msg_cmd_action_type { ENETC_MSG_CMD_MNG_ADD = 1, ENETC_MSG_CMD_MNG_REMOVE }; /* PSI-VSI command header format */ struct enetc_msg_cmd_header { uint16_t type; /* command class type */ uint16_t id; /* denotes the specific required action */ }; enum bdcr_cmd_class { BDCR_CMD_UNSPEC = 0, BDCR_CMD_MAC_FILTER, BDCR_CMD_VLAN_FILTER, BDCR_CMD_RSS, BDCR_CMD_RFS, BDCR_CMD_PORT_GCL, BDCR_CMD_RECV_CLASSIFIER, BDCR_CMD_STREAM_IDENTIFY, BDCR_CMD_STREAM_FILTER, BDCR_CMD_STREAM_GCL, BDCR_CMD_FLOW_METER, __BDCR_CMD_MAX_LEN, BDCR_CMD_MAX_LEN = __BDCR_CMD_MAX_LEN - 1, }; enum bdcr_cmd_rss { BDCR_CMD_RSS_WRITE = 1, BDCR_CMD_RSS_READ = 2, }; /* class 5, command 0 */ struct tgs_gcl_conf { uint8_t atc; /* init gate value */ uint8_t res[7]; struct { uint8_t res1[4]; uint16_t acl_len; uint8_t res2[2]; }; }; /* gate control list entry */ struct gce { uint32_t period; uint8_t gate; uint8_t res[3]; }; /* tgs_gcl_conf address point to this data space */ struct tgs_gcl_data { uint32_t btl; uint32_t bth; uint32_t ct; uint32_t cte; struct gce entry[]; }; /* class 7, command 0, Stream Identity Entry Configuration */ struct streamid_conf { uint32_t stream_handle; /* init gate value */ uint32_t iports; uint8_t id_type; uint8_t oui[3]; uint8_t res[3]; uint8_t en; }; #define ENETC_CBDR_SID_VID_MASK 0xfff #define ENETC_CBDR_SID_VIDM BIT(12) #define ENETC_CBDR_SID_TG_MASK 0xc000 /* streamid_conf address point to this data space */ struct streamid_data { union { uint8_t dmac[6]; uint8_t smac[6]; }; uint16_t vid_vidm_tg; }; #define ENETC_CBDR_SFI_PRI_MASK 0x7 #define ENETC_CBDR_SFI_PRIM BIT(3) #define ENETC_CBDR_SFI_BLOV BIT(4) #define ENETC_CBDR_SFI_BLEN BIT(5) #define ENETC_CBDR_SFI_MSDUEN BIT(6) #define ENETC_CBDR_SFI_FMITEN BIT(7) #define ENETC_CBDR_SFI_ENABLE BIT(7) /* class 8, command 0, Stream Filter Instance, Short Format */ struct sfi_conf { uint32_t stream_handle; uint8_t multi; uint8_t res[2]; uint8_t sthm; /* Max Service Data Unit or Flow Meter Instance Table index. * Depending on the value of FLT this represents either Max * Service Data Unit (max frame size) allowed by the filter * entry or is an index into the Flow Meter Instance table * index identifying the policer which will be used to police * it. */ uint16_t fm_inst_table_index; uint16_t msdu; uint16_t sg_inst_table_index; uint8_t res1[2]; uint32_t input_ports; uint8_t res2[3]; uint8_t en; }; /* class 8, command 2 stream Filter Instance status query short format * command no need structure define * Stream Filter Instance Query Statistics Response data */ struct sfi_counter_data { uint32_t matchl; uint32_t matchh; uint32_t msdu_dropl; uint32_t msdu_droph; uint32_t stream_gate_dropl; uint32_t stream_gate_droph; uint32_t flow_meter_dropl; uint32_t flow_meter_droph; }; #define ENETC_CBDR_SGI_OIPV_MASK 0x7 #define ENETC_CBDR_SGI_OIPV_EN BIT(3) #define ENETC_CBDR_SGI_CGTST BIT(6) #define ENETC_CBDR_SGI_OGTST BIT(7) #define ENETC_CBDR_SGI_CFG_CHG BIT(1) #define ENETC_CBDR_SGI_CFG_PND BIT(2) #define ENETC_CBDR_SGI_OEX BIT(4) #define ENETC_CBDR_SGI_OEXEN BIT(5) #define ENETC_CBDR_SGI_IRX BIT(6) #define ENETC_CBDR_SGI_IRXEN BIT(7) #define ENETC_CBDR_SGI_ACLLEN_MASK 0x3 #define ENETC_CBDR_SGI_OCLLEN_MASK 0xc #define ENETC_CBDR_SGI_EN BIT(7) /* class 9, command 0, Stream Gate Instance Table, Short Format * class 9, command 2, Stream Gate Instance Table entry query write back * Short Format */ struct sgi_table { uint8_t res[8]; uint8_t oipv; uint8_t res0[2]; uint8_t ocgtst; uint8_t res1[7]; uint8_t gset; uint8_t oacl_len; uint8_t res2[2]; uint8_t en; }; #define ENETC_CBDR_SGI_AIPV_MASK 0x7 #define ENETC_CBDR_SGI_AIPV_EN BIT(3) #define ENETC_CBDR_SGI_AGTST BIT(7) /* class 9, command 1, Stream Gate Control List, Long Format */ struct sgcl_conf { uint8_t aipv; uint8_t res[2]; uint8_t agtst; uint8_t res1[4]; union { struct { uint8_t res2[4]; uint8_t acl_len; uint8_t res3[3]; }; uint8_t cct[8]; /* Config change time */ }; }; #define ENETC_CBDR_SGL_IOMEN BIT(0) #define ENETC_CBDR_SGL_IPVEN BIT(3) #define ENETC_CBDR_SGL_GTST BIT(4) #define ENETC_CBDR_SGL_IPV_MASK 0xe /* Stream Gate Control List Entry */ struct sgce { uint32_t interval; uint8_t msdu[3]; uint8_t multi; }; /* stream control list class 9 , cmd 1 data buffer */ struct sgcl_data { uint32_t btl; uint32_t bth; uint32_t ct; uint32_t cte; struct sgce sgcl[0]; }; #define ENETC_CBDR_FMI_MR BIT(0) #define ENETC_CBDR_FMI_MREN BIT(1) #define ENETC_CBDR_FMI_DOY BIT(2) #define ENETC_CBDR_FMI_CM BIT(3) #define ENETC_CBDR_FMI_CF BIT(4) #define ENETC_CBDR_FMI_NDOR BIT(5) #define ENETC_CBDR_FMI_OALEN BIT(6) #define ENETC_CBDR_FMI_IRFPP_MASK GENMASK(4, 0) /* class 10: command 0/1, Flow Meter Instance Set, short Format */ struct fmi_conf { uint32_t cir; uint32_t cbs; uint32_t eir; uint32_t ebs; uint8_t conf; uint8_t res1; uint8_t ir_fpp; uint8_t res2[4]; uint8_t en; }; struct enetc_cbd { union{ struct sfi_conf sfi_conf; struct sgi_table sgi_table; struct fmi_conf fmi_conf; struct { uint32_t addr[2]; union { uint32_t opt[4]; struct tgs_gcl_conf gcl_conf; struct streamid_conf sid_set; struct sgcl_conf sgcl_conf; }; }; /* Long format */ uint32_t data[6]; }; uint16_t index; uint16_t length; uint8_t cmd; uint8_t cls; uint8_t _res; uint8_t status_flags; }; #define ENETC_CLK 400000000ULL /* port time gating control register */ #define ENETC_QBV_PTGCR_OFFSET 0x11a00 #define ENETC_QBV_TGE BIT(31) #define ENETC_QBV_TGPE BIT(30) /* Port time gating capability register */ #define ENETC_QBV_PTGCAPR_OFFSET 0x11a08 #define ENETC_QBV_MAX_GCL_LEN_MASK GENMASK(15, 0) /* Port time specific departure */ #define ENETC_PTCTSDR(n) (0x1210 + 4 * (n)) #define ENETC_TSDE BIT(31) /* PSFP setting */ #define ENETC_PPSFPMR 0x11b00 #define ENETC_PPSFPMR_PSFPEN BIT(0) #define ENETC_PPSFPMR_VS BIT(1) #define ENETC_PPSFPMR_PVC BIT(2) #define ENETC_PPSFPMR_PVZC BIT(3) #endif diff --git a/sys/dev/mana/gdma_util.h b/sys/dev/mana/gdma_util.h index 822c831b9d70..37c2653d5ec9 100644 --- a/sys/dev/mana/gdma_util.h +++ b/sys/dev/mana/gdma_util.h @@ -1,204 +1,195 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2021 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #ifndef _GDMA_UTIL_H_ #define _GDMA_UTIL_H_ #include #include /* Log Levels */ #define MANA_ALERT (1 << 0) /* Alerts are providing more error info. */ #define MANA_WARNING (1 << 1) /* Driver output is more error sensitive. */ #define MANA_INFO (1 << 2) /* Provides additional driver info. */ #define MANA_DBG (1 << 3) /* Driver output for debugging. */ extern int mana_log_level; #define mana_trace_raw(ctx, level, fmt, args...) \ do { \ ((void)(ctx)); \ if (((level) & mana_log_level) != (level)) \ break; \ printf(fmt, ##args); \ } while (0) #define mana_trace(ctx, level, fmt, args...) \ mana_trace_raw(ctx, level, "%s() [TID:%d]: " \ fmt, __func__, curthread->td_tid, ##args) #define mana_dbg(ctx, format, arg...) \ mana_trace(ctx, MANA_DBG, format, ##arg) #define mana_info(ctx, format, arg...) \ mana_trace(ctx, MANA_INFO, format, ##arg) #define mana_warn(ctx, format, arg...) \ mana_trace(ctx, MANA_WARNING, format, ##arg) #define mana_err(ctx, format, arg...) \ mana_trace(ctx, MANA_ALERT, format, ##arg) #define unlikely(x) __predict_false(!!(x)) #define likely(x) __predict_true(!!(x)) #define BITS_PER_LONG (sizeof(long) * NBBY) #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) #define BITMAP_LAST_WORD_MASK(n) (~0UL >> (BITS_PER_LONG - (n))) #define BITS_TO_LONGS(n) howmany((n), BITS_PER_LONG) #define BIT_MASK(nr) (1UL << ((nr) & (BITS_PER_LONG - 1))) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #undef ALIGN #define ALIGN(x, y) roundup2((x), (y)) #define IS_ALIGNED(x, a) (((x) & ((__typeof(x))(a) - 1)) == 0) #define BIT(n) (1ULL << (n)) #define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) #define offset_in_page(x) ((x) & PAGE_MASK) #define min_t(type, _x, _y) \ ((type)(_x) < (type)(_y) ? (type)(_x) : (type)(_y)) #define test_bit(i, a) \ ((((volatile const unsigned long *)(a))[BIT_WORD(i)]) & BIT_MASK(i)) typedef volatile uint32_t atomic_t; #define atomic_add_return(v, p) (atomic_fetchadd_int(p, v) + (v)) #define atomic_sub_return(v, p) (atomic_fetchadd_int(p, -(v)) - (v)) #define atomic_inc_return(p) atomic_add_return(1, p) #define atomic_dec_return(p) atomic_sub_return(1, p) #define atomic_read(p) atomic_add_return(0, p) #define usleep_range(_1, _2) \ pause_sbt("gdma-usleep-range", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE) static inline void gdma_msleep(unsigned int ms) { if (ms == 0) ms = 1; pause_sbt("gdma-msleep", mstosbt(ms), 0, C_HARDCLOCK); } static inline void bitmap_set(unsigned long *map, unsigned int start, int nr) { const unsigned int size = start + nr; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); map += BIT_WORD(start); while (nr - bits_to_set >= 0) { *map |= mask_to_set; nr -= bits_to_set; bits_to_set = BITS_PER_LONG; mask_to_set = ~0UL; map++; } if (nr) { mask_to_set &= BITMAP_LAST_WORD_MASK(size); *map |= mask_to_set; } } static inline void bitmap_clear(unsigned long *map, unsigned int start, int nr) { const unsigned int size = start + nr; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); map += BIT_WORD(start); while (nr - bits_to_clear >= 0) { *map &= ~mask_to_clear; nr -= bits_to_clear; bits_to_clear = BITS_PER_LONG; mask_to_clear = ~0UL; map++; } if (nr) { mask_to_clear &= BITMAP_LAST_WORD_MASK(size); *map &= ~mask_to_clear; } } static inline unsigned long find_first_zero_bit(const unsigned long *p, unsigned long max) { unsigned long i, n; for (i = 0; i < max / BITS_PER_LONG + 1; i++) { n = ~p[i]; if (n != 0) return (i * BITS_PER_LONG + ffsl(n) - 1); } return (max); } -static inline unsigned long -ilog2(unsigned long x) -{ - unsigned long log = x; - while (x >>= 1) - log++; - return (log); -} - static inline unsigned long roundup_pow_of_two(unsigned long x) { return (1UL << flsl(x - 1)); } static inline int is_power_of_2(unsigned long n) { return (n == roundup_pow_of_two(n)); } struct completion { unsigned int done; struct mtx lock; }; void init_completion(struct completion *c); void free_completion(struct completion *c); void complete(struct completion *c); void wait_for_completion(struct completion *c); int wait_for_completion_timeout(struct completion *c, int timeout); #endif /* _GDMA_UTIL_H_ */ diff --git a/sys/dev/qat/qat/qat_ocf.c b/sys/dev/qat/qat/qat_ocf.c index 8958c7b82e49..b25135b6a678 100644 --- a/sys/dev/qat/qat/qat_ocf.c +++ b/sys/dev/qat/qat/qat_ocf.c @@ -1,1302 +1,1302 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright(c) 2007-2022 Intel Corporation */ /* System headers */ #include #include #include #include #include #include #include #include #include /* Cryptodev headers */ #include #include "cryptodev_if.h" /* QAT specific headers */ #include "cpa.h" #include "cpa_cy_im.h" #include "cpa_cy_sym_dp.h" #include "adf_accel_devices.h" #include "adf_common_drv.h" #include "lac_sym_hash_defs.h" #include "lac_sym_qat_hash_defs_lookup.h" /* To get only IRQ instances */ #include "icp_accel_devices.h" #include "icp_adf_accel_mgr.h" #include "lac_sal_types.h" /* To disable AEAD HW MAC verification */ #include "icp_sal_user.h" /* QAT OCF specific headers */ #include "qat_ocf_mem_pool.h" #include "qat_ocf_utils.h" #define QAT_OCF_MAX_INSTANCES (256) #define QAT_OCF_SESSION_WAIT_TIMEOUT_MS (1000) MALLOC_DEFINE(M_QAT_OCF, "qat_ocf", "qat_ocf(4) memory allocations"); /* QAT OCF internal structures */ struct qat_ocf_softc { device_t sc_dev; struct sysctl_oid *rc; uint32_t enabled; int32_t cryptodev_id; struct qat_ocf_instance cyInstHandles[QAT_OCF_MAX_INSTANCES]; int32_t numCyInstances; }; /* Function definitions */ static void qat_ocf_freesession(device_t dev, crypto_session_t cses); static int qat_ocf_probesession(device_t dev, const struct crypto_session_params *csp); static int qat_ocf_newsession(device_t dev, crypto_session_t cses, const struct crypto_session_params *csp); static int qat_ocf_attach(device_t dev); static int qat_ocf_detach(device_t dev); static void symDpCallback(CpaCySymDpOpData *pOpData, CpaStatus result, CpaBoolean verifyResult) { struct qat_ocf_cookie *qat_cookie; struct cryptop *crp; struct qat_ocf_dsession *qat_dsession = NULL; struct qat_ocf_session *qat_session = NULL; struct qat_ocf_instance *qat_instance = NULL; CpaStatus status; int rc = 0; qat_cookie = (struct qat_ocf_cookie *)pOpData->pCallbackTag; if (!qat_cookie) return; crp = qat_cookie->crp_op; qat_dsession = crypto_get_driver_session(crp->crp_session); qat_instance = qat_dsession->qatInstance; status = qat_ocf_cookie_dma_post_sync(crp, pOpData); if (CPA_STATUS_SUCCESS != status) { rc = EIO; goto exit; } status = qat_ocf_cookie_dma_unload(crp, pOpData); if (CPA_STATUS_SUCCESS != status) { rc = EIO; goto exit; } /* Verify result */ if (CPA_STATUS_SUCCESS != result) { rc = EBADMSG; goto exit; } /* Verify digest by FW (GCM and CCM only) */ if (CPA_TRUE != verifyResult) { rc = EBADMSG; goto exit; } if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) qat_session = &qat_dsession->encSession; else qat_session = &qat_dsession->decSession; /* Copy back digest result if it's stored in separated buffer */ if (pOpData->digestResult && qat_session->authLen > 0) { if ((crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) != 0) { char icv[QAT_OCF_MAX_DIGEST] = { 0 }; crypto_copydata(crp, crp->crp_digest_start, qat_session->authLen, icv); if (timingsafe_bcmp(icv, qat_cookie->qat_ocf_digest, qat_session->authLen) != 0) { rc = EBADMSG; goto exit; } } else { crypto_copyback(crp, crp->crp_digest_start, qat_session->authLen, qat_cookie->qat_ocf_digest); } } exit: qat_ocf_cookie_free(qat_instance, qat_cookie); crp->crp_etype = rc; crypto_done(crp); return; } static inline CpaPhysicalAddr qatVirtToPhys(void *virtAddr) { return (CpaPhysicalAddr)vtophys(virtAddr); } static int qat_ocf_probesession(device_t dev, const struct crypto_session_params *csp) { if ((csp->csp_flags & ~(CSP_F_SEPARATE_OUTPUT | CSP_F_SEPARATE_AAD)) != 0) { return EINVAL; } switch (csp->csp_mode) { case CSP_MODE_CIPHER: switch (csp->csp_cipher_alg) { case CRYPTO_AES_CBC: case CRYPTO_AES_ICM: if (csp->csp_ivlen != AES_BLOCK_LEN) return EINVAL; break; case CRYPTO_AES_XTS: if (csp->csp_ivlen != AES_XTS_IV_LEN) return EINVAL; break; default: return EINVAL; } break; case CSP_MODE_DIGEST: switch (csp->csp_auth_alg) { case CRYPTO_SHA1: case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_256: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512: case CRYPTO_SHA2_512_HMAC: break; case CRYPTO_AES_NIST_GMAC: if (csp->csp_ivlen != AES_GCM_IV_LEN) return EINVAL; break; default: return EINVAL; } break; case CSP_MODE_AEAD: switch (csp->csp_cipher_alg) { case CRYPTO_AES_NIST_GCM_16: if (csp->csp_ivlen != AES_GCM_IV_LEN) return EINVAL; break; default: return EINVAL; } break; case CSP_MODE_ETA: switch (csp->csp_auth_alg) { case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512_HMAC: switch (csp->csp_cipher_alg) { case CRYPTO_AES_CBC: case CRYPTO_AES_ICM: if (csp->csp_ivlen != AES_BLOCK_LEN) return EINVAL; break; case CRYPTO_AES_XTS: if (csp->csp_ivlen != AES_XTS_IV_LEN) return EINVAL; break; default: return EINVAL; } break; default: return EINVAL; } break; default: return EINVAL; } return CRYPTODEV_PROBE_HARDWARE; } static CpaStatus qat_ocf_session_init(device_t dev, struct cryptop *crp, struct qat_ocf_instance *qat_instance, struct qat_ocf_session *qat_ssession) { CpaStatus status = CPA_STATUS_SUCCESS; /* Crytpodev structures */ crypto_session_t cses; const struct crypto_session_params *csp; /* DP API Session configuration */ CpaCySymSessionSetupData sessionSetupData = { 0 }; CpaCySymSessionCtx sessionCtx = NULL; Cpa32U sessionCtxSize = 0; cses = crp->crp_session; if (NULL == cses) { device_printf(dev, "no crypto session in cryptodev request\n"); return CPA_STATUS_FAIL; } csp = crypto_get_params(cses); if (NULL == csp) { device_printf(dev, "no session in cryptodev session\n"); return CPA_STATUS_FAIL; } /* Common fields */ sessionSetupData.sessionPriority = CPA_CY_PRIORITY_HIGH; /* Cipher key */ if (crp->crp_cipher_key) sessionSetupData.cipherSetupData.pCipherKey = crp->crp_cipher_key; else sessionSetupData.cipherSetupData.pCipherKey = csp->csp_cipher_key; sessionSetupData.cipherSetupData.cipherKeyLenInBytes = csp->csp_cipher_klen; /* Auth key */ if (crp->crp_auth_key) sessionSetupData.hashSetupData.authModeSetupData.authKey = crp->crp_auth_key; else sessionSetupData.hashSetupData.authModeSetupData.authKey = csp->csp_auth_key; sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes = csp->csp_auth_klen; qat_ssession->aadLen = crp->crp_aad_length; if (CPA_TRUE == is_sep_aad_supported(csp)) sessionSetupData.hashSetupData.authModeSetupData.aadLenInBytes = crp->crp_aad_length; else sessionSetupData.hashSetupData.authModeSetupData.aadLenInBytes = 0; /* Just setup algorithm - regardless of mode */ if (csp->csp_cipher_alg) { sessionSetupData.symOperation = CPA_CY_SYM_OP_CIPHER; switch (csp->csp_cipher_alg) { case CRYPTO_AES_CBC: sessionSetupData.cipherSetupData.cipherAlgorithm = CPA_CY_SYM_CIPHER_AES_CBC; break; case CRYPTO_AES_ICM: sessionSetupData.cipherSetupData.cipherAlgorithm = CPA_CY_SYM_CIPHER_AES_CTR; break; case CRYPTO_AES_XTS: sessionSetupData.cipherSetupData.cipherAlgorithm = CPA_CY_SYM_CIPHER_AES_XTS; break; case CRYPTO_AES_NIST_GCM_16: sessionSetupData.cipherSetupData.cipherAlgorithm = CPA_CY_SYM_CIPHER_AES_GCM; sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_AES_GCM; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; break; default: device_printf(dev, "cipher_alg: %d not supported\n", csp->csp_cipher_alg); status = CPA_STATUS_UNSUPPORTED; goto fail; } } if (csp->csp_auth_alg) { switch (csp->csp_auth_alg) { case CRYPTO_SHA1_HMAC: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA1; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; break; case CRYPTO_SHA1: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA1; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN; break; case CRYPTO_SHA2_256_HMAC: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA256; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; break; case CRYPTO_SHA2_256: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA256; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN; break; case CRYPTO_SHA2_224_HMAC: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA224; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; break; case CRYPTO_SHA2_224: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA224; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN; break; case CRYPTO_SHA2_384_HMAC: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA384; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; break; case CRYPTO_SHA2_384: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA384; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN; break; case CRYPTO_SHA2_512_HMAC: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA512; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; break; case CRYPTO_SHA2_512: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA512; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN; break; case CRYPTO_AES_NIST_GMAC: sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_AES_GMAC; break; default: status = CPA_STATUS_UNSUPPORTED; goto fail; } } /* csp->csp_auth_alg */ /* Setting digest-length if no cipher-only mode is set */ if (csp->csp_mode != CSP_MODE_CIPHER) { lac_sym_qat_hash_defs_t *pHashDefsInfo = NULL; if (csp->csp_auth_mlen) { sessionSetupData.hashSetupData.digestResultLenInBytes = csp->csp_auth_mlen; qat_ssession->authLen = csp->csp_auth_mlen; } else { LacSymQat_HashDefsLookupGet( qat_instance->cyInstHandle, sessionSetupData.hashSetupData.hashAlgorithm, &pHashDefsInfo); if (NULL == pHashDefsInfo) { device_printf( dev, "unable to find corresponding hash data\n"); status = CPA_STATUS_UNSUPPORTED; goto fail; } sessionSetupData.hashSetupData.digestResultLenInBytes = pHashDefsInfo->algInfo->digestLength; qat_ssession->authLen = pHashDefsInfo->algInfo->digestLength; } sessionSetupData.verifyDigest = CPA_FALSE; } switch (csp->csp_mode) { case CSP_MODE_AEAD: case CSP_MODE_ETA: sessionSetupData.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING; /* Place the digest result in a buffer unrelated to srcBuffer */ sessionSetupData.digestIsAppended = CPA_FALSE; /* Due to FW limitation to verify only appended MACs */ sessionSetupData.verifyDigest = CPA_FALSE; if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { sessionSetupData.cipherSetupData.cipherDirection = CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT; sessionSetupData.algChainOrder = CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH; } else { sessionSetupData.cipherSetupData.cipherDirection = CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT; sessionSetupData.algChainOrder = CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER; } break; case CSP_MODE_CIPHER: if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { sessionSetupData.cipherSetupData.cipherDirection = CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT; } else { sessionSetupData.cipherSetupData.cipherDirection = CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT; } sessionSetupData.symOperation = CPA_CY_SYM_OP_CIPHER; break; case CSP_MODE_DIGEST: sessionSetupData.symOperation = CPA_CY_SYM_OP_HASH; if (csp->csp_auth_alg == CRYPTO_AES_NIST_GMAC) { sessionSetupData.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING; /* GMAC is always encrypt */ sessionSetupData.cipherSetupData.cipherDirection = CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT; sessionSetupData.algChainOrder = CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH; sessionSetupData.cipherSetupData.cipherAlgorithm = CPA_CY_SYM_CIPHER_AES_GCM; sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_AES_GMAC; sessionSetupData.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; /* Same key for cipher and auth */ sessionSetupData.cipherSetupData.pCipherKey = csp->csp_auth_key; sessionSetupData.cipherSetupData.cipherKeyLenInBytes = csp->csp_auth_klen; /* Generated GMAC stored in separated buffer */ sessionSetupData.digestIsAppended = CPA_FALSE; /* Digest verification not allowed in GMAC case */ sessionSetupData.verifyDigest = CPA_FALSE; /* No AAD allowed */ sessionSetupData.hashSetupData.authModeSetupData .aadLenInBytes = 0; } else { sessionSetupData.cipherSetupData.cipherDirection = CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT; sessionSetupData.symOperation = CPA_CY_SYM_OP_HASH; sessionSetupData.digestIsAppended = CPA_FALSE; } break; default: device_printf(dev, "%s: unhandled crypto algorithm %d, %d\n", __func__, csp->csp_cipher_alg, csp->csp_auth_alg); status = CPA_STATUS_FAIL; goto fail; } /* Extracting session size */ status = cpaCySymSessionCtxGetSize(qat_instance->cyInstHandle, &sessionSetupData, &sessionCtxSize); if (CPA_STATUS_SUCCESS != status) { device_printf(dev, "unable to get session size\n"); goto fail; } /* Allocating contiguous memory for session */ sessionCtx = contigmalloc(sessionCtxSize, M_QAT_OCF, M_NOWAIT, 0, ~1UL, - 1 << (bsrl(sessionCtxSize - 1) + 1), + 1 << (ilog2(sessionCtxSize - 1) + 1), 0); if (NULL == sessionCtx) { device_printf(dev, "unable to allocate memory for session\n"); status = CPA_STATUS_RESOURCE; goto fail; } status = cpaCySymDpInitSession(qat_instance->cyInstHandle, &sessionSetupData, sessionCtx); if (CPA_STATUS_SUCCESS != status) { device_printf(dev, "session initialization failed\n"); goto fail; } /* NOTE: lets keep double session (both directions) approach to overcome * lack of direction update in FBSD QAT. */ qat_ssession->sessionCtx = sessionCtx; qat_ssession->sessionCtxSize = sessionCtxSize; return CPA_STATUS_SUCCESS; fail: /* Release resources if any */ if (sessionCtx) contigfree(sessionCtx, sessionCtxSize, M_QAT_OCF); return status; } static int qat_ocf_newsession(device_t dev, crypto_session_t cses, const struct crypto_session_params *csp) { /* Cryptodev QAT structures */ struct qat_ocf_softc *qat_softc; struct qat_ocf_dsession *qat_dsession; struct qat_ocf_instance *qat_instance; u_int cpu_id = PCPU_GET(cpuid); /* Create cryptodev session */ qat_softc = device_get_softc(dev); if (qat_softc->numCyInstances > 0) { qat_instance = &qat_softc ->cyInstHandles[cpu_id % qat_softc->numCyInstances]; qat_dsession = crypto_get_driver_session(cses); if (NULL == qat_dsession) { device_printf(dev, "Unable to create new session\n"); return (EINVAL); } /* Add only instance at this point remaining operations moved to * lazy session init */ qat_dsession->qatInstance = qat_instance; } else { return ENXIO; } return 0; } static CpaStatus qat_ocf_remove_session(device_t dev, CpaInstanceHandle cyInstHandle, struct qat_ocf_session *qat_session) { CpaStatus status = CPA_STATUS_SUCCESS; if (NULL == qat_session->sessionCtx) return CPA_STATUS_SUCCESS; /* User callback is executed right before decrementing pending * callback atomic counter. To avoid removing session rejection * we have to wait a very short while for counter update * after call back execution. */ status = qat_ocf_wait_for_session(qat_session->sessionCtx, QAT_OCF_SESSION_WAIT_TIMEOUT_MS); if (CPA_STATUS_SUCCESS != status) { device_printf(dev, "waiting for session un-busy failed\n"); return CPA_STATUS_FAIL; } status = cpaCySymDpRemoveSession(cyInstHandle, qat_session->sessionCtx); if (CPA_STATUS_SUCCESS != status) { device_printf(dev, "error while removing session\n"); return CPA_STATUS_FAIL; } explicit_bzero(qat_session->sessionCtx, qat_session->sessionCtxSize); contigfree(qat_session->sessionCtx, qat_session->sessionCtxSize, M_QAT_OCF); qat_session->sessionCtx = NULL; qat_session->sessionCtxSize = 0; return CPA_STATUS_SUCCESS; } static void qat_ocf_freesession(device_t dev, crypto_session_t cses) { CpaStatus status = CPA_STATUS_SUCCESS; struct qat_ocf_dsession *qat_dsession = NULL; struct qat_ocf_instance *qat_instance = NULL; qat_dsession = crypto_get_driver_session(cses); qat_instance = qat_dsession->qatInstance; mtx_lock(&qat_instance->cyInstMtx); status = qat_ocf_remove_session(dev, qat_dsession->qatInstance->cyInstHandle, &qat_dsession->encSession); if (CPA_STATUS_SUCCESS != status) device_printf(dev, "unable to remove encrypt session\n"); status = qat_ocf_remove_session(dev, qat_dsession->qatInstance->cyInstHandle, &qat_dsession->decSession); if (CPA_STATUS_SUCCESS != status) device_printf(dev, "unable to remove decrypt session\n"); mtx_unlock(&qat_instance->cyInstMtx); } /* QAT GCM/CCM FW API are only algorithms which support separated AAD. */ static CpaStatus qat_ocf_load_aad_gcm(struct cryptop *crp, struct qat_ocf_cookie *qat_cookie) { CpaCySymDpOpData *pOpData; pOpData = &qat_cookie->pOpdata; if (NULL != crp->crp_aad) memcpy(qat_cookie->qat_ocf_gcm_aad, crp->crp_aad, crp->crp_aad_length); else crypto_copydata(crp, crp->crp_aad_start, crp->crp_aad_length, qat_cookie->qat_ocf_gcm_aad); pOpData->pAdditionalAuthData = qat_cookie->qat_ocf_gcm_aad; pOpData->additionalAuthData = qat_cookie->qat_ocf_gcm_aad_paddr; return CPA_STATUS_SUCCESS; } static CpaStatus qat_ocf_load_aad(struct cryptop *crp, struct qat_ocf_cookie *qat_cookie) { CpaStatus status = CPA_STATUS_SUCCESS; const struct crypto_session_params *csp; CpaCySymDpOpData *pOpData; struct qat_ocf_load_cb_arg args; pOpData = &qat_cookie->pOpdata; pOpData->pAdditionalAuthData = NULL; pOpData->additionalAuthData = 0UL; if (crp->crp_aad_length == 0) return CPA_STATUS_SUCCESS; if (crp->crp_aad_length > ICP_QAT_FW_CCM_GCM_AAD_SZ_MAX) return CPA_STATUS_FAIL; csp = crypto_get_params(crp->crp_session); /* Handle GCM/CCM case */ if (CPA_TRUE == is_sep_aad_supported(csp)) return qat_ocf_load_aad_gcm(crp, qat_cookie); if (NULL == crp->crp_aad) { /* AAD already embedded in source buffer */ pOpData->messageLenToCipherInBytes = crp->crp_payload_length; pOpData->cryptoStartSrcOffsetInBytes = crp->crp_payload_start; pOpData->messageLenToHashInBytes = crp->crp_aad_length + crp->crp_payload_length; pOpData->hashStartSrcOffsetInBytes = crp->crp_aad_start; return CPA_STATUS_SUCCESS; } /* Separated AAD not supported by QAT - lets place the content * of ADD buffer at the very beginning of source SGL */ args.crp_op = crp; args.qat_cookie = qat_cookie; args.pOpData = pOpData; args.error = 0; status = bus_dmamap_load(qat_cookie->gcm_aad_dma_mem.dma_tag, qat_cookie->gcm_aad_dma_mem.dma_map, crp->crp_aad, crp->crp_aad_length, qat_ocf_crypto_load_aadbuf_cb, &args, BUS_DMA_NOWAIT); qat_cookie->is_sep_aad_used = CPA_TRUE; /* Right after this step we have AAD placed in the first flat buffer * in source SGL */ pOpData->messageLenToCipherInBytes = crp->crp_payload_length; pOpData->cryptoStartSrcOffsetInBytes = crp->crp_aad_length + crp->crp_aad_start + crp->crp_payload_start; pOpData->messageLenToHashInBytes = crp->crp_aad_length + crp->crp_payload_length; pOpData->hashStartSrcOffsetInBytes = crp->crp_aad_start; return status; } static CpaStatus qat_ocf_load(struct cryptop *crp, struct qat_ocf_cookie *qat_cookie) { CpaStatus status = CPA_STATUS_SUCCESS; CpaCySymDpOpData *pOpData; struct qat_ocf_load_cb_arg args; /* cryptodev internals */ const struct crypto_session_params *csp; pOpData = &qat_cookie->pOpdata; csp = crypto_get_params(crp->crp_session); /* Load IV buffer if present */ if (csp->csp_ivlen > 0) { memset(qat_cookie->qat_ocf_iv_buf, 0, sizeof(qat_cookie->qat_ocf_iv_buf)); crypto_read_iv(crp, qat_cookie->qat_ocf_iv_buf); pOpData->iv = qat_cookie->qat_ocf_iv_buf_paddr; pOpData->pIv = qat_cookie->qat_ocf_iv_buf; pOpData->ivLenInBytes = csp->csp_ivlen; } /* GCM/CCM - load AAD to separated buffer * AES+SHA - load AAD to first flat in SGL */ status = qat_ocf_load_aad(crp, qat_cookie); if (CPA_STATUS_SUCCESS != status) goto fail; /* Load source buffer */ args.crp_op = crp; args.qat_cookie = qat_cookie; args.pOpData = pOpData; args.error = 0; status = bus_dmamap_load_crp_buffer(qat_cookie->src_dma_mem.dma_tag, qat_cookie->src_dma_mem.dma_map, &crp->crp_buf, qat_ocf_crypto_load_buf_cb, &args, BUS_DMA_NOWAIT); if (CPA_STATUS_SUCCESS != status) goto fail; pOpData->srcBuffer = qat_cookie->src_buffer_list_paddr; pOpData->srcBufferLen = CPA_DP_BUFLIST; /* Load destination buffer */ if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) { status = bus_dmamap_load_crp_buffer(qat_cookie->dst_dma_mem.dma_tag, qat_cookie->dst_dma_mem.dma_map, &crp->crp_obuf, qat_ocf_crypto_load_obuf_cb, &args, BUS_DMA_NOWAIT); if (CPA_STATUS_SUCCESS != status) goto fail; pOpData->dstBuffer = qat_cookie->dst_buffer_list_paddr; pOpData->dstBufferLen = CPA_DP_BUFLIST; } else { pOpData->dstBuffer = pOpData->srcBuffer; pOpData->dstBufferLen = pOpData->srcBufferLen; } if (CPA_TRUE == is_use_sep_digest(csp)) pOpData->digestResult = qat_cookie->qat_ocf_digest_paddr; else pOpData->digestResult = 0UL; /* GMAC - aka zero length buffer */ if (CPA_TRUE == is_gmac_exception(csp)) pOpData->messageLenToCipherInBytes = 0; fail: return status; } static int qat_ocf_check_input(device_t dev, struct cryptop *crp) { const struct crypto_session_params *csp; csp = crypto_get_params(crp->crp_session); if (crypto_buffer_len(&crp->crp_buf) > QAT_OCF_MAX_LEN) return E2BIG; if (CPA_TRUE == is_sep_aad_supported(csp) && (crp->crp_aad_length > ICP_QAT_FW_CCM_GCM_AAD_SZ_MAX)) return EBADMSG; return 0; } static int qat_ocf_process(device_t dev, struct cryptop *crp, int hint) { CpaStatus status = CPA_STATUS_SUCCESS; int rc = 0; struct qat_ocf_dsession *qat_dsession = NULL; struct qat_ocf_session *qat_session = NULL; struct qat_ocf_instance *qat_instance = NULL; CpaCySymDpOpData *pOpData = NULL; struct qat_ocf_cookie *qat_cookie = NULL; CpaBoolean memLoaded = CPA_FALSE; rc = qat_ocf_check_input(dev, crp); if (rc) goto fail; qat_dsession = crypto_get_driver_session(crp->crp_session); if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) qat_session = &qat_dsession->encSession; else qat_session = &qat_dsession->decSession; qat_instance = qat_dsession->qatInstance; status = qat_ocf_cookie_alloc(qat_instance, &qat_cookie); if (CPA_STATUS_SUCCESS != status) { rc = EAGAIN; goto fail; } qat_cookie->crp_op = crp; /* Common request fields */ pOpData = &qat_cookie->pOpdata; pOpData->instanceHandle = qat_instance->cyInstHandle; pOpData->sessionCtx = NULL; /* Cipher fields */ pOpData->cryptoStartSrcOffsetInBytes = crp->crp_payload_start; pOpData->messageLenToCipherInBytes = crp->crp_payload_length; /* Digest fields - any exceptions from this basic rules are covered * in qat_ocf_load */ pOpData->hashStartSrcOffsetInBytes = crp->crp_payload_start; pOpData->messageLenToHashInBytes = crp->crp_payload_length; status = qat_ocf_load(crp, qat_cookie); if (CPA_STATUS_SUCCESS != status) { device_printf(dev, "unable to load OCF buffers to QAT DMA " "transaction\n"); rc = EIO; goto fail; } memLoaded = CPA_TRUE; status = qat_ocf_cookie_dma_pre_sync(crp, pOpData); if (CPA_STATUS_SUCCESS != status) { device_printf(dev, "unable to sync DMA buffers\n"); rc = EIO; goto fail; } mtx_lock(&qat_instance->cyInstMtx); /* Session initialization at the first request. It's done * in such way to overcome missing QAT specific session data * such like AAD length and limited possibility to update * QAT session while handling traffic. */ if (NULL == qat_session->sessionCtx) { status = qat_ocf_session_init(dev, crp, qat_instance, qat_session); if (CPA_STATUS_SUCCESS != status) { mtx_unlock(&qat_instance->cyInstMtx); device_printf(dev, "unable to init session\n"); rc = EIO; goto fail; } } else { status = qat_ocf_handle_session_update(qat_dsession, crp); if (CPA_STATUS_RESOURCE == status) { mtx_unlock(&qat_instance->cyInstMtx); rc = EAGAIN; goto fail; } else if (CPA_STATUS_SUCCESS != status) { mtx_unlock(&qat_instance->cyInstMtx); rc = EIO; goto fail; } } pOpData->sessionCtx = qat_session->sessionCtx; status = cpaCySymDpEnqueueOp(pOpData, CPA_TRUE); mtx_unlock(&qat_instance->cyInstMtx); if (CPA_STATUS_SUCCESS != status) { if (CPA_STATUS_RETRY == status) { rc = EAGAIN; goto fail; } device_printf(dev, "unable to send request. Status: %d\n", status); rc = EIO; goto fail; } return 0; fail: if (qat_cookie) { if (memLoaded) qat_ocf_cookie_dma_unload(crp, pOpData); qat_ocf_cookie_free(qat_instance, qat_cookie); } crp->crp_etype = rc; crypto_done(crp); return 0; } static void qat_ocf_identify(driver_t *drv, device_t parent) { if (device_find_child(parent, "qat_ocf", -1) == NULL && BUS_ADD_CHILD(parent, 200, "qat_ocf", -1) == 0) device_printf(parent, "qat_ocf: could not attach!"); } static int qat_ocf_probe(device_t dev) { device_set_desc(dev, "QAT engine"); return (BUS_PROBE_NOWILDCARD); } static CpaStatus qat_ocf_get_irq_instances(CpaInstanceHandle *cyInstHandles, Cpa16U cyInstHandlesSize, Cpa16U *foundInstances) { CpaStatus status = CPA_STATUS_SUCCESS; icp_accel_dev_t **pAdfInsts = NULL; icp_accel_dev_t *dev_addr = NULL; sal_t *baseAddr = NULL; sal_list_t *listTemp = NULL; CpaInstanceHandle cyInstHandle; CpaInstanceInfo2 info; Cpa16U numDevices; Cpa32U instCtr = 0; Cpa32U i; /* Get the number of devices */ status = icp_amgr_getNumInstances(&numDevices); if (CPA_STATUS_SUCCESS != status) return status; /* Allocate memory to store addr of accel_devs */ pAdfInsts = malloc(numDevices * sizeof(icp_accel_dev_t *), M_QAT_OCF, M_WAITOK); /* Get ADF to return all accel_devs that support either * symmetric or asymmetric crypto */ status = icp_amgr_getAllAccelDevByCapabilities( (ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC), pAdfInsts, &numDevices); if (CPA_STATUS_SUCCESS != status) { free(pAdfInsts, M_QAT_OCF); return status; } for (i = 0; i < numDevices; i++) { dev_addr = (icp_accel_dev_t *)pAdfInsts[i]; baseAddr = dev_addr->pSalHandle; if (NULL == baseAddr) continue; listTemp = baseAddr->sym_services; if (NULL == listTemp) { listTemp = baseAddr->crypto_services; } while (NULL != listTemp) { cyInstHandle = SalList_getObject(listTemp); status = cpaCyInstanceGetInfo2(cyInstHandle, &info); if (CPA_STATUS_SUCCESS != status) continue; listTemp = SalList_next(listTemp); if (CPA_TRUE == info.isPolled) continue; if (instCtr >= cyInstHandlesSize) break; cyInstHandles[instCtr++] = cyInstHandle; } } free(pAdfInsts, M_QAT_OCF); *foundInstances = instCtr; return CPA_STATUS_SUCCESS; } static CpaStatus qat_ocf_start_instances(struct qat_ocf_softc *qat_softc, device_t dev) { CpaStatus status = CPA_STATUS_SUCCESS; Cpa16U numInstances = 0; CpaInstanceHandle cyInstHandles[QAT_OCF_MAX_INSTANCES] = { 0 }; CpaInstanceHandle cyInstHandle = NULL; Cpa32U startedInstances = 0; Cpa32U i; qat_softc->numCyInstances = 0; status = qat_ocf_get_irq_instances(cyInstHandles, QAT_OCF_MAX_INSTANCES, &numInstances); if (CPA_STATUS_SUCCESS != status) return status; for (i = 0; i < numInstances; i++) { struct qat_ocf_instance *qat_ocf_instance; cyInstHandle = cyInstHandles[i]; if (!cyInstHandle) continue; /* Starting instance */ status = cpaCyStartInstance(cyInstHandle); if (CPA_STATUS_SUCCESS != status) { device_printf(qat_softc->sc_dev, "unable to get start instance\n"); continue; } qat_ocf_instance = &qat_softc->cyInstHandles[startedInstances]; qat_ocf_instance->cyInstHandle = cyInstHandle; mtx_init(&qat_ocf_instance->cyInstMtx, "Instance MTX", NULL, MTX_DEF); status = cpaCySetAddressTranslation(cyInstHandle, qatVirtToPhys); if (CPA_STATUS_SUCCESS != status) { device_printf(qat_softc->sc_dev, "unable to add virt to phys callback\n"); goto fail; } status = cpaCySymDpRegCbFunc(cyInstHandle, symDpCallback); if (CPA_STATUS_SUCCESS != status) { device_printf(qat_softc->sc_dev, "unable to add user callback\n"); goto fail; } /* Initialize cookie pool */ status = qat_ocf_cookie_pool_init(qat_ocf_instance, dev); if (CPA_STATUS_SUCCESS != status) { device_printf(qat_softc->sc_dev, "unable to create cookie pool\n"); goto fail; } /* Disable forcing HW MAC validation for AEAD */ status = icp_sal_setForceAEADMACVerify(cyInstHandle, CPA_FALSE); if (CPA_STATUS_SUCCESS != status) { device_printf( qat_softc->sc_dev, "unable to disable AEAD HW MAC verification\n"); goto fail; } qat_ocf_instance->driver_id = qat_softc->cryptodev_id; startedInstances++; continue; fail: mtx_destroy(&qat_ocf_instance->cyInstMtx); /* Stop instance */ status = cpaCyStopInstance(cyInstHandle); if (CPA_STATUS_SUCCESS != status) device_printf(qat_softc->sc_dev, "unable to stop the instance\n"); } qat_softc->numCyInstances = startedInstances; return CPA_STATUS_SUCCESS; } static CpaStatus qat_ocf_stop_instances(struct qat_ocf_softc *qat_softc) { CpaStatus status = CPA_STATUS_SUCCESS; int i; for (i = 0; i < qat_softc->numCyInstances; i++) { struct qat_ocf_instance *qat_instance; qat_instance = &qat_softc->cyInstHandles[i]; status = cpaCyStopInstance(qat_instance->cyInstHandle); if (CPA_STATUS_SUCCESS != status) { pr_err("QAT: stopping instance id: %d failed\n", i); continue; } qat_ocf_cookie_pool_deinit(qat_instance); mtx_destroy(&qat_instance->cyInstMtx); } qat_softc->numCyInstances = 0; return status; } static int qat_ocf_deinit(struct qat_ocf_softc *qat_softc) { int status = 0; CpaStatus cpaStatus; if (qat_softc->cryptodev_id >= 0) { crypto_unregister_all(qat_softc->cryptodev_id); qat_softc->cryptodev_id = -1; } /* Stop QAT instances */ cpaStatus = qat_ocf_stop_instances(qat_softc); if (CPA_STATUS_SUCCESS != cpaStatus) { device_printf(qat_softc->sc_dev, "unable to stop instances\n"); status = EIO; } return status; } static int qat_ocf_init(struct qat_ocf_softc *qat_softc) { int32_t cryptodev_id; /* Starting instances for OCF */ if (qat_ocf_start_instances(qat_softc, qat_softc->sc_dev)) { device_printf(qat_softc->sc_dev, "unable to get QAT IRQ instances\n"); goto fail; } /* Register only if instances available */ if (qat_softc->numCyInstances) { cryptodev_id = crypto_get_driverid(qat_softc->sc_dev, sizeof(struct qat_ocf_dsession), CRYPTOCAP_F_HARDWARE); if (cryptodev_id < 0) { device_printf(qat_softc->sc_dev, "cannot initialize!\n"); goto fail; } qat_softc->cryptodev_id = cryptodev_id; } return 0; fail: qat_ocf_deinit(qat_softc); return ENXIO; } static int qat_ocf_sysctl_handle(SYSCTL_HANDLER_ARGS) { struct qat_ocf_softc *qat_softc = NULL; int ret = 0; device_t dev = arg1; u_int enabled; qat_softc = device_get_softc(dev); enabled = qat_softc->enabled; ret = sysctl_handle_int(oidp, &enabled, 0, req); if (ret || !req->newptr) return (ret); if (qat_softc->enabled != enabled) { if (enabled) { ret = qat_ocf_init(qat_softc); } else { ret = qat_ocf_deinit(qat_softc); } if (!ret) qat_softc->enabled = enabled; } return ret; } static int qat_ocf_attach(device_t dev) { int status; struct qat_ocf_softc *qat_softc; qat_softc = device_get_softc(dev); qat_softc->sc_dev = dev; qat_softc->cryptodev_id = -1; qat_softc->enabled = 1; qat_softc->rc = SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev, 0, qat_ocf_sysctl_handle, "I", "QAT OCF support enablement"); if (!qat_softc->rc) return ENOMEM; if (qat_softc->enabled) { status = qat_ocf_init(qat_softc); if (status) { device_printf(dev, "qat_ocf init failed\n"); goto fail; } } return 0; fail: qat_ocf_deinit(qat_softc); return (ENXIO); } static int qat_ocf_detach(device_t dev) { struct qat_ocf_softc *qat_softc = device_get_softc(dev); return qat_ocf_deinit(qat_softc); } static device_method_t qat_ocf_methods[] = { DEVMETHOD(device_identify, qat_ocf_identify), DEVMETHOD(device_probe, qat_ocf_probe), DEVMETHOD(device_attach, qat_ocf_attach), DEVMETHOD(device_detach, qat_ocf_detach), /* Cryptodev interface */ DEVMETHOD(cryptodev_probesession, qat_ocf_probesession), DEVMETHOD(cryptodev_newsession, qat_ocf_newsession), DEVMETHOD(cryptodev_freesession, qat_ocf_freesession), DEVMETHOD(cryptodev_process, qat_ocf_process), DEVMETHOD_END }; static driver_t qat_ocf_driver = { .name = "qat_ocf", .methods = qat_ocf_methods, .size = sizeof(struct qat_ocf_softc), }; DRIVER_MODULE_ORDERED(qat, nexus, qat_ocf_driver, NULL, NULL, SI_ORDER_ANY); MODULE_VERSION(qat, 1); MODULE_DEPEND(qat, qat_c62x, 1, 1, 1); MODULE_DEPEND(qat, qat_200xx, 1, 1, 1); MODULE_DEPEND(qat, qat_c3xxx, 1, 1, 1); MODULE_DEPEND(qat, qat_c4xxx, 1, 1, 1); MODULE_DEPEND(qat, qat_dh895xcc, 1, 1, 1); MODULE_DEPEND(qat, qat_4xxx, 1, 1, 1); MODULE_DEPEND(qat, crypto, 1, 1, 1); MODULE_DEPEND(qat, qat_common, 1, 1, 1); MODULE_DEPEND(qat, qat_api, 1, 1, 1); MODULE_DEPEND(qat, linuxkpi, 1, 1, 1); diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index bf643e6da21f..4bed57b5afbf 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -1,727 +1,718 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Functions to provide access to special i386 instructions. * This in included in sys/systm.h, and that file should be * used in preference to this. */ #ifndef _MACHINE_CPUFUNC_H_ #define _MACHINE_CPUFUNC_H_ struct region_descriptor; #define readb(va) (*(volatile uint8_t *) (va)) #define readw(va) (*(volatile uint16_t *) (va)) #define readl(va) (*(volatile uint32_t *) (va)) #define writeb(va, d) (*(volatile uint8_t *) (va) = (d)) #define writew(va, d) (*(volatile uint16_t *) (va) = (d)) #define writel(va, d) (*(volatile uint32_t *) (va) = (d)) static __inline void breakpoint(void) { __asm __volatile("int $3"); } static __inline __pure2 u_int bsfl(u_int mask) { u_int result; __asm("bsfl %1,%0" : "=r" (result) : "rm" (mask) : "cc"); return (result); } -static __inline __pure2 u_int -bsrl(u_int mask) -{ - u_int result; - - __asm("bsrl %1,%0" : "=r" (result) : "rm" (mask) : "cc"); - return (result); -} - static __inline void clflush(u_long addr) { __asm __volatile("clflush %0" : : "m" (*(char *)addr)); } static __inline void clflushopt(u_long addr) { __asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr)); } static __inline void clts(void) { __asm __volatile("clts"); } static __inline void disable_intr(void) { __asm __volatile("cli" : : : "memory"); } #ifdef _KERNEL static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax), "c" (cx)); } #else static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile( "pushl\t%%ebx\n\t" "cpuid\n\t" "movl\t%%ebx,%1\n\t" "popl\t%%ebx" : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile( "pushl\t%%ebx\n\t" "cpuid\n\t" "movl\t%%ebx,%1\n\t" "popl\t%%ebx" : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax), "c" (cx)); } #endif static __inline void enable_intr(void) { __asm __volatile("sti"); } static __inline void cpu_monitor(const void *addr, u_long extensions, u_int hints) { __asm __volatile("monitor" : : "a" (addr), "c" (extensions), "d" (hints)); } static __inline void cpu_mwait(u_long extensions, u_int hints) { __asm __volatile("mwait" : : "a" (hints), "c" (extensions)); } static __inline void lfence(void) { __asm __volatile("lfence" : : : "memory"); } static __inline void mfence(void) { __asm __volatile("mfence" : : : "memory"); } static __inline void sfence(void) { __asm __volatile("sfence" : : : "memory"); } static __inline void halt(void) { __asm __volatile("hlt"); } static __inline u_char inb(u_int port) { u_char data; __asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline u_int inl(u_int port) { u_int data; __asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void insb(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insb" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insw(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insw" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insl(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insl" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void invd(void) { __asm __volatile("invd"); } static __inline u_short inw(u_int port) { u_short data; __asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void outb(u_int port, u_char data) { __asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outl(u_int port, u_int data) { __asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outsb(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsb" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsw(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsw" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsl(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsl" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outw(u_int port, u_short data) { __asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void ia32_pause(void) { __asm __volatile("pause"); } static __inline u_int read_eflags(void) { u_int ef; __asm __volatile("pushfl; popl %0" : "=r" (ef)); return (ef); } static __inline uint64_t rdmsr(u_int msr) { uint64_t rv; __asm __volatile("rdmsr" : "=A" (rv) : "c" (msr)); return (rv); } static __inline uint32_t rdmsr32(u_int msr) { uint32_t low; __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "edx"); return (low); } static __inline uint64_t rdpmc(u_int pmc) { uint64_t rv; __asm __volatile("rdpmc" : "=A" (rv) : "c" (pmc)); return (rv); } static __inline uint64_t rdtsc(void) { uint64_t rv; __asm __volatile("rdtsc" : "=A" (rv)); return (rv); } static __inline uint64_t rdtsc_ordered_lfence(void) { lfence(); return (rdtsc()); } static __inline uint64_t rdtsc_ordered_mfence(void) { mfence(); return (rdtsc()); } static __inline uint64_t rdtscp(void) { uint64_t rv; __asm __volatile("rdtscp" : "=A" (rv) : : "ecx"); return (rv); } static __inline uint64_t rdtscp_aux(uint32_t *aux) { uint64_t rv; __asm __volatile("rdtscp" : "=A" (rv), "=c" (*aux)); return (rv); } static __inline uint32_t rdtsc32(void) { uint32_t rv; __asm __volatile("rdtsc" : "=a" (rv) : : "edx"); return (rv); } static __inline uint32_t rdtscp32(void) { uint32_t rv; __asm __volatile("rdtscp" : "=a" (rv) : : "ecx", "edx"); return (rv); } static __inline void wbinvd(void) { __asm __volatile("wbinvd"); } static __inline void write_eflags(u_int ef) { __asm __volatile("pushl %0; popfl" : : "r" (ef)); } static __inline void wrmsr(u_int msr, uint64_t newval) { __asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); } static __inline void load_cr0(u_int data) { __asm __volatile("movl %0,%%cr0" : : "r" (data)); } static __inline u_int rcr0(void) { u_int data; __asm __volatile("movl %%cr0,%0" : "=r" (data)); return (data); } static __inline u_int rcr2(void) { u_int data; __asm __volatile("movl %%cr2,%0" : "=r" (data)); return (data); } static __inline void load_cr3(u_int data) { __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory"); } static __inline u_int rcr3(void) { u_int data; __asm __volatile("movl %%cr3,%0" : "=r" (data)); return (data); } static __inline void load_cr4(u_int data) { __asm __volatile("movl %0,%%cr4" : : "r" (data)); } static __inline u_int rcr4(void) { u_int data; __asm __volatile("movl %%cr4,%0" : "=r" (data)); return (data); } static __inline uint64_t rxcr(u_int reg) { u_int low, high; __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg)); return (low | ((uint64_t)high << 32)); } static __inline void load_xcr(u_int reg, uint64_t val) { u_int low, high; low = val; high = val >> 32; __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high)); } /* * Global TLB flush (except for thise for pages marked PG_G) */ static __inline void invltlb(void) { load_cr3(rcr3()); } /* * TLB flush for an individual page (even if it has PG_G). * Only works on 486+ CPUs (i386 does not have PG_G). */ static __inline void invlpg(u_int addr) { __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); } static __inline u_short rfs(void) { u_short sel; __asm __volatile("movw %%fs,%0" : "=rm" (sel)); return (sel); } static __inline uint64_t rgdt(void) { uint64_t gdtr; __asm __volatile("sgdt %0" : "=m" (gdtr)); return (gdtr); } static __inline u_short rgs(void) { u_short sel; __asm __volatile("movw %%gs,%0" : "=rm" (sel)); return (sel); } static __inline uint64_t ridt(void) { uint64_t idtr; __asm __volatile("sidt %0" : "=m" (idtr)); return (idtr); } static __inline u_short rldt(void) { u_short ldtr; __asm __volatile("sldt %0" : "=g" (ldtr)); return (ldtr); } static __inline u_short rss(void) { u_short sel; __asm __volatile("movw %%ss,%0" : "=rm" (sel)); return (sel); } static __inline u_short rtr(void) { u_short tr; __asm __volatile("str %0" : "=g" (tr)); return (tr); } static __inline void load_fs(u_short sel) { __asm __volatile("movw %0,%%fs" : : "rm" (sel)); } static __inline void load_gs(u_short sel) { __asm __volatile("movw %0,%%gs" : : "rm" (sel)); } static __inline void lidt(struct region_descriptor *addr) { __asm __volatile("lidt (%0)" : : "r" (addr)); } static __inline void lldt(u_short sel) { __asm __volatile("lldt %0" : : "r" (sel)); } static __inline void ltr(u_short sel) { __asm __volatile("ltr %0" : : "r" (sel)); } static __inline u_int rdr0(void) { u_int data; __asm __volatile("movl %%dr0,%0" : "=r" (data)); return (data); } static __inline void load_dr0(u_int dr0) { __asm __volatile("movl %0,%%dr0" : : "r" (dr0)); } static __inline u_int rdr1(void) { u_int data; __asm __volatile("movl %%dr1,%0" : "=r" (data)); return (data); } static __inline void load_dr1(u_int dr1) { __asm __volatile("movl %0,%%dr1" : : "r" (dr1)); } static __inline u_int rdr2(void) { u_int data; __asm __volatile("movl %%dr2,%0" : "=r" (data)); return (data); } static __inline void load_dr2(u_int dr2) { __asm __volatile("movl %0,%%dr2" : : "r" (dr2)); } static __inline u_int rdr3(void) { u_int data; __asm __volatile("movl %%dr3,%0" : "=r" (data)); return (data); } static __inline void load_dr3(u_int dr3) { __asm __volatile("movl %0,%%dr3" : : "r" (dr3)); } static __inline u_int rdr6(void) { u_int data; __asm __volatile("movl %%dr6,%0" : "=r" (data)); return (data); } static __inline void load_dr6(u_int dr6) { __asm __volatile("movl %0,%%dr6" : : "r" (dr6)); } static __inline u_int rdr7(void) { u_int data; __asm __volatile("movl %%dr7,%0" : "=r" (data)); return (data); } static __inline void load_dr7(u_int dr7) { __asm __volatile("movl %0,%%dr7" : : "r" (dr7)); } static __inline u_char read_cyrix_reg(u_char reg) { outb(0x22, reg); return inb(0x23); } static __inline void write_cyrix_reg(u_char reg, u_char data) { outb(0x22, reg); outb(0x23, data); } static __inline register_t intr_disable(void) { register_t eflags; eflags = read_eflags(); disable_intr(); return (eflags); } static __inline void intr_restore(register_t eflags) { write_eflags(eflags); } static __inline uint32_t rdpkru(void) { uint32_t res; __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx"); return (res); } static __inline void wrpkru(uint32_t mask) { __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0)); } void reset_dbregs(void); #ifdef _KERNEL int rdmsr_safe(u_int msr, uint64_t *val); int wrmsr_safe(u_int msr, uint64_t newval); #endif #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index fe84a2a3c213..9f96255ea00e 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -1,3123 +1,3122 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski * Copyright (C) 2006 Semihalf, Marian Balakowicz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Some hw specific parts of this pmap were derived or influenced * by NetBSD's ibm4xx pmap module. More generic code is shared with * a few other pmap modules from the FreeBSD tree. */ /* * VM layout notes: * * Kernel and user threads run within one common virtual address space * defined by AS=0. * * 32-bit pmap: * Virtual address space layout: * ----------------------------- * 0x0000_0000 - 0x7fff_ffff : user process * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) * 0xc000_0000 - 0xc0ff_ffff : kernel reserved * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. * 0xc100_0000 - 0xffff_ffff : KVA * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 * 0xc200_9000 - 0xfeef_ffff : actual free KVA space * * 64-bit pmap: * Virtual address space layout: * ----------------------------- * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data * endkernel - msgbufp-1 : flat device tree * msgbufp - kernel_pdir-1 : message buffer * kernel_pdir - kernel_pp2d-1 : kernel page directory * kernel_pp2d - . : kernel pointers to page directory * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map * 0xf000_0000_0000_0000 - +Maxmem : physmem map * - 0xffff_ffff_ffff_ffff : device direct map */ #include #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SPARSE_MAPDEV /* Use power-of-two mappings in mmu_booke_mapdev(), to save entries. */ #define POW2_MAPPINGS #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #define __debug_used #else #define debugf(fmt, args...) #define __debug_used __unused #endif #ifdef __powerpc64__ #define PRI0ptrX "016lx" #else #define PRI0ptrX "08x" #endif #define TODO panic("%s: not implemented", __func__); extern unsigned char _etext[]; extern unsigned char _end[]; extern uint32_t *bootinfo; vm_paddr_t kernload; vm_offset_t kernstart; vm_size_t kernsize; /* Message buffer and tables. */ static vm_offset_t data_start; static vm_size_t data_end; /* Phys/avail memory regions. */ static struct mem_region *availmem_regions; static int availmem_regions_sz; static struct mem_region *physmem_regions; static int physmem_regions_sz; #ifndef __powerpc64__ /* Reserved KVA space and mutex for mmu_booke_zero_page. */ static vm_offset_t zero_page_va; static struct mtx zero_page_mutex; /* Reserved KVA space and mutex for mmu_booke_copy_page. */ static vm_offset_t copy_page_src_va; static vm_offset_t copy_page_dst_va; static struct mtx copy_page_mutex; #endif static struct mtx tlbivax_mutex; /**************************************************************************/ /* PMAP */ /**************************************************************************/ static int mmu_booke_enter_locked(pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int flags, int8_t psind); unsigned int kptbl_min; /* Index of the first kernel ptbl. */ static uma_zone_t ptbl_root_zone; /* * If user pmap is processed with mmu_booke_remove and the resident count * drops to 0, there are no more pages to remove, so we need not continue. */ #define PMAP_REMOVE_DONE(pmap) \ ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) extern int elf32_nxstack; #endif /**************************************************************************/ /* TLB and TID handling */ /**************************************************************************/ /* Translation ID busy table */ static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; /* * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 * core revisions and should be read from h/w registers during early config. */ uint32_t tlb0_entries; uint32_t tlb0_ways; uint32_t tlb0_entries_per_way; uint32_t tlb1_entries; #define TLB0_ENTRIES (tlb0_entries) #define TLB0_WAYS (tlb0_ways) #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) #define TLB1_ENTRIES (tlb1_entries) static tlbtid_t tid_alloc(struct pmap *); #ifdef DDB #ifdef __powerpc64__ static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); #else static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); #endif #endif static void tlb1_read_entry(tlb_entry_t *, unsigned int); static void tlb1_write_entry(tlb_entry_t *, unsigned int); static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t, int); static __inline uint32_t tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma); static vm_size_t tsize2size(unsigned int); static unsigned int size2tsize(vm_size_t); -static unsigned long ilog2(unsigned long); static void set_mas4_defaults(void); static inline void tlb0_flush_entry(vm_offset_t); static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); /**************************************************************************/ /* Page table management */ /**************************************************************************/ static struct rwlock_padalign pvh_global_lock; /* Data for the pv entry allocation mechanism */ static uma_zone_t pvzone; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t); static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, bool); static int pte_remove(pmap_t, vm_offset_t, uint8_t); static pte_t *pte_find(pmap_t, vm_offset_t); static void kernel_pte_alloc(vm_offset_t, vm_offset_t); static pv_entry_t pv_alloc(void); static void pv_free(pv_entry_t); static void pv_insert(pmap_t, vm_offset_t, vm_page_t); static void pv_remove(pmap_t, vm_offset_t, vm_page_t); static void booke_pmap_init_qpages(void); static inline void tlb_miss_lock(void); static inline void tlb_miss_unlock(void); #ifdef SMP extern tlb_entry_t __boot_tlb1[]; void pmap_bootstrap_ap(volatile uint32_t *); #endif /* * Kernel MMU interface */ static void mmu_booke_clear_modify(vm_page_t); static void mmu_booke_copy(pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t); static void mmu_booke_copy_page(vm_page_t, vm_page_t); static void mmu_booke_copy_pages(vm_page_t *, vm_offset_t, vm_page_t *, vm_offset_t, int); static int mmu_booke_enter(pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int flags, int8_t psind); static void mmu_booke_enter_object(pmap_t, vm_offset_t, vm_offset_t, vm_page_t, vm_prot_t); static void mmu_booke_enter_quick(pmap_t, vm_offset_t, vm_page_t, vm_prot_t); static vm_paddr_t mmu_booke_extract(pmap_t, vm_offset_t); static vm_page_t mmu_booke_extract_and_hold(pmap_t, vm_offset_t, vm_prot_t); static void mmu_booke_init(void); static bool mmu_booke_is_modified(vm_page_t); static bool mmu_booke_is_prefaultable(pmap_t, vm_offset_t); static bool mmu_booke_is_referenced(vm_page_t); static int mmu_booke_ts_referenced(vm_page_t); static vm_offset_t mmu_booke_map(vm_offset_t *, vm_paddr_t, vm_paddr_t, int); static int mmu_booke_mincore(pmap_t, vm_offset_t, vm_paddr_t *); static void mmu_booke_object_init_pt(pmap_t, vm_offset_t, vm_object_t, vm_pindex_t, vm_size_t); static bool mmu_booke_page_exists_quick(pmap_t, vm_page_t); static void mmu_booke_page_init(vm_page_t); static int mmu_booke_page_wired_mappings(vm_page_t); static int mmu_booke_pinit(pmap_t); static void mmu_booke_pinit0(pmap_t); static void mmu_booke_protect(pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); static void mmu_booke_qenter(vm_offset_t, vm_page_t *, int); static void mmu_booke_qremove(vm_offset_t, int); static void mmu_booke_release(pmap_t); static void mmu_booke_remove(pmap_t, vm_offset_t, vm_offset_t); static void mmu_booke_remove_all(vm_page_t); static void mmu_booke_remove_write(vm_page_t); static void mmu_booke_unwire(pmap_t, vm_offset_t, vm_offset_t); static void mmu_booke_zero_page(vm_page_t); static void mmu_booke_zero_page_area(vm_page_t, int, int); static void mmu_booke_activate(struct thread *); static void mmu_booke_deactivate(struct thread *); static void mmu_booke_bootstrap(vm_offset_t, vm_offset_t); static void *mmu_booke_mapdev(vm_paddr_t, vm_size_t); static void *mmu_booke_mapdev_attr(vm_paddr_t, vm_size_t, vm_memattr_t); static void mmu_booke_unmapdev(void *, vm_size_t); static vm_paddr_t mmu_booke_kextract(vm_offset_t); static void mmu_booke_kenter(vm_offset_t, vm_paddr_t); static void mmu_booke_kenter_attr(vm_offset_t, vm_paddr_t, vm_memattr_t); static void mmu_booke_kremove(vm_offset_t); static int mmu_booke_dev_direct_mapped(vm_paddr_t, vm_size_t); static void mmu_booke_sync_icache(pmap_t, vm_offset_t, vm_size_t); static void mmu_booke_dumpsys_map(vm_paddr_t pa, size_t, void **); static void mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t, void *); static void mmu_booke_scan_init(void); static vm_offset_t mmu_booke_quick_enter_page(vm_page_t m); static void mmu_booke_quick_remove_page(vm_offset_t addr); static int mmu_booke_change_attr(vm_offset_t addr, vm_size_t sz, vm_memattr_t mode); static int mmu_booke_decode_kernel_ptr(vm_offset_t addr, int *is_user, vm_offset_t *decoded_addr); static void mmu_booke_page_array_startup(long); static bool mmu_booke_page_is_mapped(vm_page_t m); static bool mmu_booke_ps_enabled(pmap_t pmap); static struct pmap_funcs mmu_booke_methods = { /* pmap dispatcher interface */ .clear_modify = mmu_booke_clear_modify, .copy = mmu_booke_copy, .copy_page = mmu_booke_copy_page, .copy_pages = mmu_booke_copy_pages, .enter = mmu_booke_enter, .enter_object = mmu_booke_enter_object, .enter_quick = mmu_booke_enter_quick, .extract = mmu_booke_extract, .extract_and_hold = mmu_booke_extract_and_hold, .init = mmu_booke_init, .is_modified = mmu_booke_is_modified, .is_prefaultable = mmu_booke_is_prefaultable, .is_referenced = mmu_booke_is_referenced, .ts_referenced = mmu_booke_ts_referenced, .map = mmu_booke_map, .mincore = mmu_booke_mincore, .object_init_pt = mmu_booke_object_init_pt, .page_exists_quick = mmu_booke_page_exists_quick, .page_init = mmu_booke_page_init, .page_wired_mappings = mmu_booke_page_wired_mappings, .pinit = mmu_booke_pinit, .pinit0 = mmu_booke_pinit0, .protect = mmu_booke_protect, .qenter = mmu_booke_qenter, .qremove = mmu_booke_qremove, .release = mmu_booke_release, .remove = mmu_booke_remove, .remove_all = mmu_booke_remove_all, .remove_write = mmu_booke_remove_write, .sync_icache = mmu_booke_sync_icache, .unwire = mmu_booke_unwire, .zero_page = mmu_booke_zero_page, .zero_page_area = mmu_booke_zero_page_area, .activate = mmu_booke_activate, .deactivate = mmu_booke_deactivate, .quick_enter_page = mmu_booke_quick_enter_page, .quick_remove_page = mmu_booke_quick_remove_page, .page_array_startup = mmu_booke_page_array_startup, .page_is_mapped = mmu_booke_page_is_mapped, .ps_enabled = mmu_booke_ps_enabled, /* Internal interfaces */ .bootstrap = mmu_booke_bootstrap, .dev_direct_mapped = mmu_booke_dev_direct_mapped, .mapdev = mmu_booke_mapdev, .mapdev_attr = mmu_booke_mapdev_attr, .kenter = mmu_booke_kenter, .kenter_attr = mmu_booke_kenter_attr, .kextract = mmu_booke_kextract, .kremove = mmu_booke_kremove, .unmapdev = mmu_booke_unmapdev, .change_attr = mmu_booke_change_attr, .decode_kernel_ptr = mmu_booke_decode_kernel_ptr, /* dumpsys() support */ .dumpsys_map_chunk = mmu_booke_dumpsys_map, .dumpsys_unmap_chunk = mmu_booke_dumpsys_unmap, .dumpsys_pa_init = mmu_booke_scan_init, }; MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods); #ifdef __powerpc64__ #include "pmap_64.c" #else #include "pmap_32.c" #endif static vm_offset_t tlb1_map_base = VM_MAPDEV_BASE; static __inline uint32_t tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) { uint32_t attrib; int i; if (ma != VM_MEMATTR_DEFAULT) { switch (ma) { case VM_MEMATTR_UNCACHEABLE: return (MAS2_I | MAS2_G); case VM_MEMATTR_WRITE_COMBINING: case VM_MEMATTR_WRITE_BACK: case VM_MEMATTR_PREFETCHABLE: return (MAS2_I); case VM_MEMATTR_WRITE_THROUGH: return (MAS2_W | MAS2_M); case VM_MEMATTR_CACHEABLE: return (MAS2_M); } } /* * Assume the page is cache inhibited and access is guarded unless * it's in our available memory array. */ attrib = _TLB_ENTRY_IO; for (i = 0; i < physmem_regions_sz; i++) { if ((pa >= physmem_regions[i].mr_start) && (pa < (physmem_regions[i].mr_start + physmem_regions[i].mr_size))) { attrib = _TLB_ENTRY_MEM; break; } } return (attrib); } static inline void tlb_miss_lock(void) { #ifdef SMP struct pcpu *pc; if (!smp_started) return; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc != pcpup) { CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock); KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), ("tlb_miss_lock: tried to lock self")); tlb_lock(pc->pc_booke.tlb_lock); CTR1(KTR_PMAP, "%s: locked", __func__); } } #endif } static inline void tlb_miss_unlock(void) { #ifdef SMP struct pcpu *pc; if (!smp_started) return; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc != pcpup) { CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", __func__, pc->pc_cpuid); tlb_unlock(pc->pc_booke.tlb_lock); CTR1(KTR_PMAP, "%s: unlocked", __func__); } } #endif } /* Return number of entries in TLB0. */ static __inline void tlb0_get_tlbconf(void) { uint32_t tlb0_cfg; tlb0_cfg = mfspr(SPR_TLB0CFG); tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; tlb0_entries_per_way = tlb0_entries / tlb0_ways; } /* Return number of entries in TLB1. */ static __inline void tlb1_get_tlbconf(void) { uint32_t tlb1_cfg; tlb1_cfg = mfspr(SPR_TLB1CFG); tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; } /**************************************************************************/ /* Page table related */ /**************************************************************************/ /* Allocate pv_entry structure. */ pv_entry_t pv_alloc(void) { pv_entry_t pv; pv_entry_count++; if (pv_entry_count > pv_entry_high_water) pagedaemon_wakeup(0); /* XXX powerpc NUMA */ pv = uma_zalloc(pvzone, M_NOWAIT); return (pv); } /* Free pv_entry structure. */ static __inline void pv_free(pv_entry_t pve) { pv_entry_count--; uma_zfree(pvzone, pve); } /* Allocate and initialize pv_entry structure. */ static void pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pve; //int su = (pmap == kernel_pmap); //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, // (u_int32_t)pmap, va, (u_int32_t)m); pve = pv_alloc(); if (pve == NULL) panic("pv_insert: no pv entries!"); pve->pv_pmap = pmap; pve->pv_va = va; /* add to pv_list */ PMAP_LOCK_ASSERT(pmap, MA_OWNED); rw_assert(&pvh_global_lock, RA_WLOCKED); TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); //debugf("pv_insert: e\n"); } /* Destroy pv entry. */ static void pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pve; //int su = (pmap == kernel_pmap); //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); PMAP_LOCK_ASSERT(pmap, MA_OWNED); rw_assert(&pvh_global_lock, RA_WLOCKED); /* find pv entry */ TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { /* remove from pv_list */ TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); /* free pv entry struct */ pv_free(pve); break; } } //debugf("pv_remove: e\n"); } /**************************************************************************/ /* PMAP related */ /**************************************************************************/ /* * This is called during booke_init, before the system is really initialized. */ static void mmu_booke_bootstrap(vm_offset_t start, vm_offset_t kernelend) { vm_paddr_t phys_kernelend; struct mem_region *mp, *mp1; int cnt, i, j; vm_paddr_t s, e, sz; vm_paddr_t physsz, hwphyssz; u_int phys_avail_count __debug_used; vm_size_t kstack0_sz; vm_paddr_t kstack0_phys; vm_offset_t kstack0; void *dpcpu; debugf("mmu_booke_bootstrap: entered\n"); /* Set interesting system properties */ #ifdef __powerpc64__ hw_direct_map = 1; #else hw_direct_map = 0; #endif #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) elf32_nxstack = 1; #endif /* Initialize invalidation mutex */ mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); /* Read TLB0 size and associativity. */ tlb0_get_tlbconf(); /* * Align kernel start and end address (kernel image). * Note that kernel end does not necessarily relate to kernsize. * kernsize is the size of the kernel that is actually mapped. */ data_start = round_page(kernelend); data_end = data_start; /* Allocate the dynamic per-cpu area. */ dpcpu = (void *)data_end; data_end += DPCPU_SIZE; /* Allocate space for the message buffer. */ msgbufp = (struct msgbuf *)data_end; data_end += msgbufsize; debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", (uintptr_t)msgbufp, data_end); data_end = round_page(data_end); data_end = round_page(mmu_booke_alloc_kernel_pgtables(data_end)); /* Retrieve phys/avail mem regions */ mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, &availmem_regions_sz); if (PHYS_AVAIL_ENTRIES < availmem_regions_sz) panic("mmu_booke_bootstrap: phys_avail too small"); data_end = round_page(data_end); vm_page_array = (vm_page_t)data_end; /* * Get a rough idea (upper bound) on the size of the page array. The * vm_page_array will not handle any more pages than we have in the * avail_regions array, and most likely much less. */ sz = 0; for (mp = availmem_regions; mp->mr_size; mp++) { sz += mp->mr_size; } sz = (round_page(sz) / (PAGE_SIZE + sizeof(struct vm_page))); data_end += round_page(sz * sizeof(struct vm_page)); /* Pre-round up to 1MB. This wastes some space, but saves TLB entries */ data_end = roundup2(data_end, 1 << 20); debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); debugf(" kernstart: %#zx\n", kernstart); debugf(" kernsize: %#zx\n", kernsize); if (data_end - kernstart > kernsize) { kernsize += tlb1_mapin_region(kernstart + kernsize, kernload + kernsize, (data_end - kernstart) - kernsize, _TLB_ENTRY_MEM); } data_end = kernstart + kernsize; debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); /* * Clear the structures - note we can only do it safely after the * possible additional TLB1 translations are in place (above) so that * all range up to the currently calculated 'data_end' is covered. */ bzero((void *)data_start, data_end - data_start); dpcpu_init(dpcpu, 0); /*******************************************************/ /* Set the start and end of kva. */ /*******************************************************/ virtual_avail = round_page(data_end); virtual_end = VM_MAX_KERNEL_ADDRESS; #ifndef __powerpc64__ /* Allocate KVA space for page zero/copy operations. */ zero_page_va = virtual_avail; virtual_avail += PAGE_SIZE; copy_page_src_va = virtual_avail; virtual_avail += PAGE_SIZE; copy_page_dst_va = virtual_avail; virtual_avail += PAGE_SIZE; debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va); debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va); debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va); /* Initialize page zero/copy mutexes. */ mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); /* Allocate KVA space for ptbl bufs. */ ptbl_buf_pool_vabase = virtual_avail; virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", ptbl_buf_pool_vabase, virtual_avail); #endif #ifdef __powerpc64__ /* Allocate KVA space for crashdumpmap. */ crashdumpmap = (caddr_t)virtual_avail; virtual_avail += MAXDUMPPGS * PAGE_SIZE; #endif /* Calculate corresponding physical addresses for the kernel region. */ phys_kernelend = kernload + kernsize; debugf("kernel image and allocated data:\n"); debugf(" kernload = 0x%09jx\n", (uintmax_t)kernload); debugf(" kernstart = 0x%"PRI0ptrX"\n", kernstart); debugf(" kernsize = 0x%"PRI0ptrX"\n", kernsize); /* * Remove kernel physical address range from avail regions list. Page * align all regions. Non-page aligned memory isn't very interesting * to us. Also, sort the entries for ascending addresses. */ sz = 0; cnt = availmem_regions_sz; debugf("processing avail regions:\n"); for (mp = availmem_regions; mp->mr_size; mp++) { s = mp->mr_start; e = mp->mr_start + mp->mr_size; debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); /* Check whether this region holds all of the kernel. */ if (s < kernload && e > phys_kernelend) { availmem_regions[cnt].mr_start = phys_kernelend; availmem_regions[cnt++].mr_size = e - phys_kernelend; e = kernload; } /* Look whether this regions starts within the kernel. */ if (s >= kernload && s < phys_kernelend) { if (e <= phys_kernelend) goto empty; s = phys_kernelend; } /* Now look whether this region ends within the kernel. */ if (e > kernload && e <= phys_kernelend) { if (s >= kernload) goto empty; e = kernload; } /* Now page align the start and size of the region. */ s = round_page(s); e = trunc_page(e); if (e < s) e = s; sz = e - s; debugf("%09jx-%09jx = %jx\n", (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); /* Check whether some memory is left here. */ if (sz == 0) { empty: memmove(mp, mp + 1, (cnt - (mp - availmem_regions)) * sizeof(*mp)); cnt--; mp--; continue; } /* Do an insertion sort. */ for (mp1 = availmem_regions; mp1 < mp; mp1++) if (s < mp1->mr_start) break; if (mp1 < mp) { memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); mp1->mr_start = s; mp1->mr_size = sz; } else { mp->mr_start = s; mp->mr_size = sz; } } availmem_regions_sz = cnt; /*******************************************************/ /* Steal physical memory for kernel stack from the end */ /* of the first avail region */ /*******************************************************/ kstack0_sz = kstack_pages * PAGE_SIZE; kstack0_phys = availmem_regions[0].mr_start + availmem_regions[0].mr_size; kstack0_phys -= kstack0_sz; availmem_regions[0].mr_size -= kstack0_sz; /*******************************************************/ /* Fill in phys_avail table, based on availmem_regions */ /*******************************************************/ phys_avail_count = 0; physsz = 0; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); debugf("fill in phys_avail:\n"); for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", (uintmax_t)availmem_regions[i].mr_start, (uintmax_t)availmem_regions[i].mr_start + availmem_regions[i].mr_size, (uintmax_t)availmem_regions[i].mr_size); if (hwphyssz != 0 && (physsz + availmem_regions[i].mr_size) >= hwphyssz) { debugf(" hw.physmem adjust\n"); if (physsz < hwphyssz) { phys_avail[j] = availmem_regions[i].mr_start; phys_avail[j + 1] = availmem_regions[i].mr_start + hwphyssz - physsz; physsz = hwphyssz; phys_avail_count++; dump_avail[j] = phys_avail[j]; dump_avail[j + 1] = phys_avail[j + 1]; } break; } phys_avail[j] = availmem_regions[i].mr_start; phys_avail[j + 1] = availmem_regions[i].mr_start + availmem_regions[i].mr_size; phys_avail_count++; physsz += availmem_regions[i].mr_size; dump_avail[j] = phys_avail[j]; dump_avail[j + 1] = phys_avail[j + 1]; } physmem = btoc(physsz); /* Calculate the last available physical address. */ for (i = 0; phys_avail[i + 2] != 0; i += 2) ; Maxmem = powerpc_btop(phys_avail[i + 1]); debugf("Maxmem = 0x%08lx\n", Maxmem); debugf("phys_avail_count = %d\n", phys_avail_count); debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); #ifdef __powerpc64__ /* * Map the physical memory contiguously in TLB1. * Round so it fits into a single mapping. */ tlb1_mapin_region(DMAP_BASE_ADDRESS, 0, phys_avail[i + 1], _TLB_ENTRY_MEM); #endif /*******************************************************/ /* Initialize (statically allocated) kernel pmap. */ /*******************************************************/ PMAP_LOCK_INIT(kernel_pmap); debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); kernel_pte_alloc(virtual_avail, kernstart); for (i = 0; i < MAXCPU; i++) { kernel_pmap->pm_tid[i] = TID_KERNEL; /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ tidbusy[i][TID_KERNEL] = kernel_pmap; } /* Mark kernel_pmap active on all CPUs */ CPU_FILL(&kernel_pmap->pm_active); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); /*******************************************************/ /* Final setup */ /*******************************************************/ /* Enter kstack0 into kernel map, provide guard page */ kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; thread0.td_kstack = kstack0; thread0.td_kstack_pages = kstack_pages; debugf("kstack_sz = 0x%08jx\n", (uintmax_t)kstack0_sz); debugf("kstack0_phys at 0x%09jx - 0x%09jx\n", (uintmax_t)kstack0_phys, (uintmax_t)kstack0_phys + kstack0_sz); debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", kstack0, kstack0 + kstack0_sz); virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; for (i = 0; i < kstack_pages; i++) { mmu_booke_kenter(kstack0, kstack0_phys); kstack0 += PAGE_SIZE; kstack0_phys += PAGE_SIZE; } pmap_bootstrapped = 1; debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); debugf("mmu_booke_bootstrap: exit\n"); } #ifdef SMP void tlb1_ap_prep(void) { tlb_entry_t *e, tmp; unsigned int i; /* Prepare TLB1 image for AP processors */ e = __boot_tlb1; for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&tmp, i); if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) memcpy(e++, &tmp, sizeof(tmp)); } } void pmap_bootstrap_ap(volatile uint32_t *trcp __unused) { int i; /* * Finish TLB1 configuration: the BSP already set up its TLB1 and we * have the snapshot of its contents in the s/w __boot_tlb1[] table * created by tlb1_ap_prep(), so use these values directly to * (re)program AP's TLB1 hardware. * * Start at index 1 because index 0 has the kernel map. */ for (i = 1; i < TLB1_ENTRIES; i++) { if (__boot_tlb1[i].mas1 & MAS1_VALID) tlb1_write_entry(&__boot_tlb1[i], i); } set_mas4_defaults(); } #endif static void booke_pmap_init_qpages(void) { struct pcpu *pc; int i; CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); if (pc->pc_qmap_addr == 0) panic("pmap_init_qpages: unable to allocate KVA"); } } SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); /* * Get the physical page address for the given pmap/virtual address. */ static vm_paddr_t mmu_booke_extract(pmap_t pmap, vm_offset_t va) { vm_paddr_t pa; PMAP_LOCK(pmap); pa = pte_vatopa(pmap, va); PMAP_UNLOCK(pmap); return (pa); } /* * Extract the physical page address associated with the given * kernel virtual address. */ static vm_paddr_t mmu_booke_kextract(vm_offset_t va) { tlb_entry_t e; vm_paddr_t p = 0; int i; #ifdef __powerpc64__ if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) return (DMAP_TO_PHYS(va)); #endif if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) p = pte_vatopa(kernel_pmap, va); if (p == 0) { /* Check TLB1 mappings */ for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (va >= e.virt && va < e.virt + e.size) return (e.phys + (va - e.virt)); } } return (p); } /* * Initialize the pmap module. * * Called by vm_mem_init(), to initialize any structures that the pmap system * needs to map virtual memory. */ static void mmu_booke_init(void) { int shpgperproc = PMAP_SHPGPERPROC; /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entry_max", &pv_entry_max); pv_entry_high_water = 9 * (pv_entry_max / 10); uma_zone_reserve_kva(pvzone, pv_entry_max); /* Pre-fill pvzone with initial number of pv entries. */ uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); /* Create a UMA zone for page table roots. */ ptbl_root_zone = uma_zcreate("pmap root", PMAP_ROOT_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM); /* Initialize ptbl allocation. */ ptbl_init(); } /* * Map a list of wired pages into kernel virtual address space. This is * intended for temporary mappings which do not need page modification or * references recorded. Existing mappings in the region are overwritten. */ static void mmu_booke_qenter(vm_offset_t sva, vm_page_t *m, int count) { vm_offset_t va; va = sva; while (count-- > 0) { mmu_booke_kenter(va, VM_PAGE_TO_PHYS(*m)); va += PAGE_SIZE; m++; } } /* * Remove page mappings from kernel virtual address space. Intended for * temporary mappings entered by mmu_booke_qenter. */ static void mmu_booke_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { mmu_booke_kremove(va); va += PAGE_SIZE; } } /* * Map a wired page into kernel virtual address space. */ static void mmu_booke_kenter(vm_offset_t va, vm_paddr_t pa) { mmu_booke_kenter_attr(va, pa, VM_MEMATTR_DEFAULT); } static void mmu_booke_kenter_attr(vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) { uint32_t flags; pte_t *pte; KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; flags |= PTE_PS_4KB; pte = pte_find(kernel_pmap, va); KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); if (PTE_ISVALID(pte)) { CTR1(KTR_PMAP, "%s: replacing entry!", __func__); /* Flush entry from TLB0 */ tlb0_flush_entry(va); } *pte = PTE_RPN_FROM_PA(pa) | flags; //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); /* Flush the real memory from the instruction cache. */ if ((flags & (PTE_I | PTE_G)) == 0) __syncicache((void *)va, PAGE_SIZE); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } /* * Remove a page from kernel page table. */ static void mmu_booke_kremove(vm_offset_t va) { pte_t *pte; CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va); KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kremove: invalid va")); pte = pte_find(kernel_pmap, va); if (!PTE_ISVALID(pte)) { CTR1(KTR_PMAP, "%s: invalid pte", __func__); return; } mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); /* Invalidate entry in TLB0, update PTE. */ tlb0_flush_entry(va); *pte = 0; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } /* * Figure out where a given kernel pointer (usually in a fault) points * to from the VM's perspective, potentially remapping into userland's * address space. */ static int mmu_booke_decode_kernel_ptr(vm_offset_t addr, int *is_user, vm_offset_t *decoded_addr) { if (trunc_page(addr) <= VM_MAXUSER_ADDRESS) *is_user = 1; else *is_user = 0; *decoded_addr = addr; return (0); } static bool mmu_booke_page_is_mapped(vm_page_t m) { return (!TAILQ_EMPTY(&(m)->md.pv_list)); } static bool mmu_booke_ps_enabled(pmap_t pmap __unused) { return (false); } /* * Initialize pmap associated with process 0. */ static void mmu_booke_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); mmu_booke_pinit(pmap); PCPU_SET(curpmap, pmap); } /* * Insert the given physical page at the specified virtual address in the * target physical map with the protection requested. If specified the page * will be wired down. */ static int mmu_booke_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { int error; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); error = mmu_booke_enter_locked(pmap, va, m, prot, flags, psind); PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); return (error); } static int mmu_booke_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) { pte_t *pte; vm_paddr_t pa; pte_t flags; int error, su, sync; pa = VM_PAGE_TO_PHYS(m); su = (pmap == kernel_pmap); sync = 0; //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " // "pa=0x%08x prot=0x%08x flags=%#x)\n", // (u_int32_t)pmap, su, pmap->pm_tid, // (u_int32_t)m, va, pa, prot, flags); if (su) { KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_enter_locked: kernel pmap, non kernel va")); } else { KASSERT((va <= VM_MAXUSER_ADDRESS), ("mmu_booke_enter_locked: user pmap, non user va")); } if ((m->oflags & VPO_UNMANAGED) == 0) { if ((pmap_flags & PMAP_ENTER_QUICK_LOCKED) == 0) VM_PAGE_OBJECT_BUSY_ASSERT(m); else VM_OBJECT_ASSERT_LOCKED(m->object); } PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * If there is an existing mapping, and the physical address has not * changed, must be protection or wiring change. */ if (((pte = pte_find(pmap, va)) != NULL) && (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { /* * Before actually updating pte->flags we calculate and * prepare its new value in a helper var. */ flags = *pte; flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); /* Wiring change, just update stats. */ if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { if (!PTE_ISWIRED(pte)) { flags |= PTE_WIRED; pmap->pm_stats.wired_count++; } } else { if (PTE_ISWIRED(pte)) { flags &= ~PTE_WIRED; pmap->pm_stats.wired_count--; } } if (prot & VM_PROT_WRITE) { /* Add write permissions. */ flags |= PTE_SW; if (!su) flags |= PTE_UW; if ((flags & PTE_MANAGED) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } else { /* Handle modified pages, sense modify status. */ /* * The PTE_MODIFIED flag could be set by underlying * TLB misses since we last read it (above), possibly * other CPUs could update it so we check in the PTE * directly rather than rely on that saved local flags * copy. */ if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); } if (prot & VM_PROT_EXECUTE) { flags |= PTE_SX; if (!su) flags |= PTE_UX; /* * Check existing flags for execute permissions: if we * are turning execute permissions on, icache should * be flushed. */ if ((*pte & (PTE_UX | PTE_SX)) == 0) sync++; } flags &= ~PTE_REFERENCED; /* * The new flags value is all calculated -- only now actually * update the PTE. */ mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte &= ~PTE_FLAGS_MASK; *pte |= flags; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } else { /* * If there is an existing mapping, but it's for a different * physical address, pte_enter() will delete the old mapping. */ //if ((pte != NULL) && PTE_ISVALID(pte)) // debugf("mmu_booke_enter_locked: replace\n"); //else // debugf("mmu_booke_enter_locked: new\n"); /* Now set up the flags and install the new mapping. */ flags = (PTE_SR | PTE_VALID); flags |= PTE_M; if (!su) flags |= PTE_UR; if (prot & VM_PROT_WRITE) { flags |= PTE_SW; if (!su) flags |= PTE_UW; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_WRITEABLE); } if (prot & VM_PROT_EXECUTE) { flags |= PTE_SX; if (!su) flags |= PTE_UX; } /* If its wired update stats. */ if ((pmap_flags & PMAP_ENTER_WIRED) != 0) flags |= PTE_WIRED; error = pte_enter(pmap, m, va, flags, (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); if (error != 0) return (KERN_RESOURCE_SHORTAGE); if ((flags & PMAP_ENTER_WIRED) != 0) pmap->pm_stats.wired_count++; /* Flush the real memory from the instruction cache. */ if (prot & VM_PROT_EXECUTE) sync++; } if (sync && (su || pmap == PCPU_GET(curpmap))) { __syncicache((void *)va, PAGE_SIZE); sync = 0; } return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ static void mmu_booke_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); m = m_start; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { mmu_booke_enter_locked(pmap, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); m = TAILQ_NEXT(m, listq); } PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); } static void mmu_booke_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); mmu_booke_enter_locked(pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly rounded to the page size. */ static void mmu_booke_remove(pmap_t pmap, vm_offset_t va, vm_offset_t endva) { pte_t *pte; uint8_t hold_flag; int su = (pmap == kernel_pmap); //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); if (su) { KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_remove: kernel pmap, non kernel va")); } else { KASSERT((va <= VM_MAXUSER_ADDRESS), ("mmu_booke_remove: user pmap, non user va")); } if (PMAP_REMOVE_DONE(pmap)) { //debugf("mmu_booke_remove: e (empty)\n"); return; } hold_flag = PTBL_HOLD_FLAG(pmap); //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); for (; va < endva; va += PAGE_SIZE) { pte = pte_find_next(pmap, &va); if ((pte == NULL) || !PTE_ISVALID(pte)) break; if (va >= endva) break; pte_remove(pmap, va, hold_flag); } PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); //debugf("mmu_booke_remove: e\n"); } /* * Remove physical page from all pmaps in which it resides. */ static void mmu_booke_remove_all(vm_page_t m) { pv_entry_t pv, pvn; uint8_t hold_flag; rw_wlock(&pvh_global_lock); TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_link, pvn) { PMAP_LOCK(pv->pv_pmap); hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); pte_remove(pv->pv_pmap, pv->pv_va, hold_flag); PMAP_UNLOCK(pv->pv_pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); } /* * Map a range of physical addresses into kernel virtual address space. */ static vm_offset_t mmu_booke_map(vm_offset_t *virt, vm_paddr_t pa_start, vm_paddr_t pa_end, int prot) { vm_offset_t sva = *virt; vm_offset_t va = sva; #ifdef __powerpc64__ /* XXX: Handle memory not starting at 0x0. */ if (pa_end < ctob(Maxmem)) return (PHYS_TO_DMAP(pa_start)); #endif while (pa_start < pa_end) { mmu_booke_kenter(va, pa_start); va += PAGE_SIZE; pa_start += PAGE_SIZE; } *virt = va; return (sva); } /* * The pmap must be activated before it's address space can be accessed in any * way. */ static void mmu_booke_activate(struct thread *td) { pmap_t pmap; u_int cpuid; pmap = &td->td_proc->p_vmspace->vm_pmap; CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")", __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); sched_pin(); cpuid = PCPU_GET(cpuid); CPU_SET_ATOMIC(cpuid, &pmap->pm_active); PCPU_SET(curpmap, pmap); if (pmap->pm_tid[cpuid] == TID_NONE) tid_alloc(pmap); /* Load PID0 register with pmap tid value. */ mtspr(SPR_PID0, pmap->pm_tid[cpuid]); __asm __volatile("isync"); mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); sched_unpin(); CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); } /* * Deactivate the specified process's address space. */ static void mmu_booke_deactivate(struct thread *td) { pmap_t pmap; pmap = &td->td_proc->p_vmspace->vm_pmap; CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX, __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); PCPU_SET(curpmap, NULL); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ static void mmu_booke_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * Set the physical protection on the specified range of this map as requested. */ static void mmu_booke_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va; vm_page_t m; pte_t *pte; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { mmu_booke_remove(pmap, sva, eva); return; } if (prot & VM_PROT_WRITE) return; PMAP_LOCK(pmap); for (va = sva; va < eva; va += PAGE_SIZE) { if ((pte = pte_find(pmap, va)) != NULL) { if (PTE_ISVALID(pte)) { m = PHYS_TO_VM_PAGE(PTE_PA(pte)); mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); /* Handle modified pages. */ if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) vm_page_dirty(m); tlb0_flush_entry(va); *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } } } PMAP_UNLOCK(pmap); } /* * Clear the write and modified bits in each of the given page's mappings. */ static void mmu_booke_remove_write(vm_page_t m) { pv_entry_t pv; pte_t *pte; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_remove_write: page %p is not managed", m)); vm_page_assert_busied(m); if (!pmap_page_is_write_mapped(m)) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL) { if (PTE_ISVALID(pte)) { m = PHYS_TO_VM_PAGE(PTE_PA(pte)); mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); /* Handle modified pages. */ if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); /* Flush mapping from TLB0. */ *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } } PMAP_UNLOCK(pv->pv_pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); } /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given * protection. */ static vm_page_t mmu_booke_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pte_t *pte; vm_page_t m; uint32_t pte_wbit; m = NULL; PMAP_LOCK(pmap); pte = pte_find(pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) { if (pmap == kernel_pmap) pte_wbit = PTE_SW; else pte_wbit = PTE_UW; if ((*pte & pte_wbit) != 0 || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(PTE_PA(pte)); if (!vm_page_wire_mapped(m)) m = NULL; } } PMAP_UNLOCK(pmap); return (m); } /* * Initialize a vm_page's machine-dependent fields. */ static void mmu_booke_page_init(vm_page_t m) { m->md.pv_tracked = 0; TAILQ_INIT(&m->md.pv_list); } /* * Return whether or not the specified physical page was modified * in any of physical maps. */ static bool mmu_booke_is_modified(vm_page_t m) { pte_t *pte; pv_entry_t pv; bool rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_is_modified: page %p is not managed", m)); rv = false; /* * If the page is not busied then this check is racy. */ if (!pmap_page_is_write_mapped(m)) return (false); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && PTE_ISVALID(pte)) { if (PTE_ISMODIFIED(pte)) rv = true; } PMAP_UNLOCK(pv->pv_pmap); if (rv) break; } rw_wunlock(&pvh_global_lock); return (rv); } /* * Return whether or not the specified virtual address is eligible * for prefault. */ static bool mmu_booke_is_prefaultable(pmap_t pmap, vm_offset_t addr) { return (false); } /* * Return whether or not the specified physical page was referenced * in any physical maps. */ static bool mmu_booke_is_referenced(vm_page_t m) { pte_t *pte; pv_entry_t pv; bool rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_is_referenced: page %p is not managed", m)); rv = false; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && PTE_ISVALID(pte)) { if (PTE_ISREFERENCED(pte)) rv = true; } PMAP_UNLOCK(pv->pv_pmap); if (rv) break; } rw_wunlock(&pvh_global_lock); return (rv); } /* * Clear the modify bits on the specified physical page. */ static void mmu_booke_clear_modify(vm_page_t m) { pte_t *pte; pv_entry_t pv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_clear_modify: page %p is not managed", m)); vm_page_assert_busied(m); if (!pmap_page_is_write_mapped(m)) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && PTE_ISVALID(pte)) { mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { tlb0_flush_entry(pv->pv_va); *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | PTE_REFERENCED); } tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } PMAP_UNLOCK(pv->pv_pmap); } rw_wunlock(&pvh_global_lock); } /* * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * As an optimization, update the page's dirty field if a modified bit is * found while counting reference bits. This opportunistic update can be * performed at low cost and can eliminate the need for some future calls * to pmap_is_modified(). However, since this function stops after * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some * dirty pages. Those dirty pages will only be detected by a future call * to pmap_is_modified(). */ static int mmu_booke_ts_referenced(vm_page_t m) { pte_t *pte; pv_entry_t pv; int count; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_ts_referenced: page %p is not managed", m)); count = 0; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL && PTE_ISVALID(pte)) { if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); if (PTE_ISREFERENCED(pte)) { mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(pv->pv_va); *pte &= ~PTE_REFERENCED; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); if (++count >= PMAP_TS_REFERENCED_MAX) { PMAP_UNLOCK(pv->pv_pmap); break; } } } PMAP_UNLOCK(pv->pv_pmap); } rw_wunlock(&pvh_global_lock); return (count); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range must * have the wired attribute set. In contrast, invalid mappings cannot have * the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, so * there is no need to invalidate any TLB entries. */ static void mmu_booke_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va; pte_t *pte; PMAP_LOCK(pmap); for (va = sva; va < eva; va += PAGE_SIZE) { if ((pte = pte_find(pmap, va)) != NULL && PTE_ISVALID(pte)) { if (!PTE_ISWIRED(pte)) panic("mmu_booke_unwire: pte %p isn't wired", pte); *pte &= ~PTE_WIRED; pmap->pm_stats.wired_count--; } } PMAP_UNLOCK(pmap); } /* * Return true if the pmap's pv is one of the first 16 pvs linked to from this * page. This count may be changed upwards or downwards in the future; it is * only necessary that true be returned for a small subset of pmaps for proper * page aging. */ static bool mmu_booke_page_exists_quick(pmap_t pmap, vm_page_t m) { pv_entry_t pv; int loops; bool rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_page_exists_quick: page %p is not managed", m)); loops = 0; rv = false; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { if (pv->pv_pmap == pmap) { rv = true; break; } if (++loops >= 16) break; } rw_wunlock(&pvh_global_lock); return (rv); } /* * Return the number of managed mappings to the given physical page that are * wired. */ static int mmu_booke_page_wired_mappings(vm_page_t m) { pv_entry_t pv; pte_t *pte; int count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL) if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) count++; PMAP_UNLOCK(pv->pv_pmap); } rw_wunlock(&pvh_global_lock); return (count); } static int mmu_booke_dev_direct_mapped(vm_paddr_t pa, vm_size_t size) { int i; vm_offset_t va; /* * This currently does not work for entries that * overlap TLB1 entries. */ for (i = 0; i < TLB1_ENTRIES; i ++) { if (tlb1_iomapped(i, pa, size, &va) == 0) return (0); } return (EFAULT); } void mmu_booke_dumpsys_map(vm_paddr_t pa, size_t sz, void **va) { vm_paddr_t ppa; vm_offset_t ofs; vm_size_t gran; /* Minidumps are based on virtual memory addresses. */ if (do_minidump) { *va = (void *)(vm_offset_t)pa; return; } /* Raw physical memory dumps don't have a virtual address. */ /* We always map a 256MB page at 256M. */ gran = 256 * 1024 * 1024; ppa = rounddown2(pa, gran); ofs = pa - ppa; *va = (void *)gran; tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); if (sz > (gran - ofs)) tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, _TLB_ENTRY_IO); } void mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t sz, void *va) { vm_paddr_t ppa; vm_offset_t ofs; vm_size_t gran; tlb_entry_t e; int i; /* Minidumps are based on virtual memory addresses. */ /* Nothing to do... */ if (do_minidump) return; for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) break; } /* Raw physical memory dumps don't have a virtual address. */ i--; e.mas1 = 0; e.mas2 = 0; e.mas3 = 0; tlb1_write_entry(&e, i); gran = 256 * 1024 * 1024; ppa = rounddown2(pa, gran); ofs = pa - ppa; if (sz > (gran - ofs)) { i--; e.mas1 = 0; e.mas2 = 0; e.mas3 = 0; tlb1_write_entry(&e, i); } } extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; void mmu_booke_scan_init(void) { vm_offset_t va; pte_t *pte; int i; if (!do_minidump) { /* Initialize phys. segments for dumpsys(). */ memset(&dump_map, 0, sizeof(dump_map)); mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, &availmem_regions_sz); for (i = 0; i < physmem_regions_sz; i++) { dump_map[i].pa_start = physmem_regions[i].mr_start; dump_map[i].pa_size = physmem_regions[i].mr_size; } return; } /* Virtual segments for minidumps: */ memset(&dump_map, 0, sizeof(dump_map)); /* 1st: kernel .data and .bss. */ dump_map[0].pa_start = trunc_page((uintptr_t)_etext); dump_map[0].pa_size = round_page((uintptr_t)_end) - dump_map[0].pa_start; /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ dump_map[1].pa_start = data_start; dump_map[1].pa_size = data_end - data_start; /* 3rd: kernel VM. */ va = dump_map[1].pa_start + dump_map[1].pa_size; /* Find start of next chunk (from va). */ while (va < virtual_end) { /* Don't dump the buffer cache. */ if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { va = kmi.buffer_eva; continue; } pte = pte_find(kernel_pmap, va); if (pte != NULL && PTE_ISVALID(pte)) break; va += PAGE_SIZE; } if (va < virtual_end) { dump_map[2].pa_start = va; va += PAGE_SIZE; /* Find last page in chunk. */ while (va < virtual_end) { /* Don't run into the buffer cache. */ if (va == kmi.buffer_sva) break; pte = pte_find(kernel_pmap, va); if (pte == NULL || !PTE_ISVALID(pte)) break; va += PAGE_SIZE; } dump_map[2].pa_size = va - dump_map[2].pa_start; } } /* * Map a set of physical memory pages into the kernel virtual address space. * Return a pointer to where it is mapped. This routine is intended to be used * for mapping device memory, NOT real memory. */ static void * mmu_booke_mapdev(vm_paddr_t pa, vm_size_t size) { return (mmu_booke_mapdev_attr(pa, size, VM_MEMATTR_DEFAULT)); } static int tlb1_find_pa(vm_paddr_t pa, tlb_entry_t *e) { int i; for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(e, i); if ((e->mas1 & MAS1_VALID) == 0) continue; if (e->phys == pa) return (i); } return (-1); } static void * mmu_booke_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) { tlb_entry_t e; vm_paddr_t tmppa; #ifndef __powerpc64__ uintptr_t tmpva; #endif uintptr_t va, retva; vm_size_t sz; int i; int wimge; /* * Check if this is premapped in TLB1. */ sz = size; tmppa = pa; va = ~0; wimge = tlb_calc_wimg(pa, ma); for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) continue; if (tmppa >= e.phys && tmppa < e.phys + e.size) { va = e.virt + (pa - e.phys); tmppa = e.phys + e.size; sz -= MIN(sz, e.size - (pa - e.phys)); while (sz > 0 && (i = tlb1_find_pa(tmppa, &e)) != -1) { if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) break; sz -= MIN(sz, e.size); tmppa = e.phys + e.size; } if (sz != 0) break; return ((void *)va); } } size = roundup(size, PAGE_SIZE); #ifdef __powerpc64__ KASSERT(pa < VM_MAPDEV_PA_MAX, ("Unsupported physical address! %lx", pa)); va = VM_MAPDEV_BASE + pa; retva = va; #ifdef POW2_MAPPINGS /* * Align the mapping to a power of 2 size, taking into account that we * may need to increase the size multiple times to satisfy the size and * alignment requirements. * * This works in the general case because it's very rare (near never?) * to have different access properties (WIMG) within a single * power-of-two region. If a design does call for that, POW2_MAPPINGS * can be undefined, and exact mappings will be used instead. */ sz = size; size = roundup2(size, 1 << ilog2(size)); while (rounddown2(va, size) + size < va + sz) size <<= 1; va = rounddown2(va, size); pa = rounddown2(pa, size); #endif #else /* * The device mapping area is between VM_MAXUSER_ADDRESS and * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. */ #ifdef SPARSE_MAPDEV /* * With a sparse mapdev, align to the largest starting region. This * could feasibly be optimized for a 'best-fit' alignment, but that * calculation could be very costly. * Align to the smaller of: * - first set bit in overlap of (pa & size mask) * - largest size envelope * * It's possible the device mapping may start at a PA that's not larger * than the size mask, so we need to offset in to maximize the TLB entry * range and minimize the number of used TLB entries. */ do { tmpva = tlb1_map_base; sz = ffsl((~((1 << flsl(size-1)) - 1)) & pa); sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); #endif va = atomic_fetchadd_int(&tlb1_map_base, size); retva = va; #endif if (tlb1_mapin_region(va, pa, size, tlb_calc_wimg(pa, ma)) != size) return (NULL); return ((void *)retva); } /* * 'Unmap' a range mapped by mmu_booke_mapdev(). */ static void mmu_booke_unmapdev(void *p, vm_size_t size) { #ifdef SUPPORTS_SHRINKING_TLB1 vm_offset_t base, offset, va; /* * Unmap only if this is inside kernel virtual space. */ va = (vm_offset_t)p; if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); mmu_booke_qremove(base, atop(size)); kva_free(base, size); } #endif } /* * mmu_booke_object_init_pt preloads the ptes for a given object into the * specified pmap. This eliminates the blast of soft faults on process startup * and immediately after an mmap. */ static void mmu_booke_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("mmu_booke_object_init_pt: non-device object")); } /* * Perform the pmap work for mincore. */ static int mmu_booke_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) { /* XXX: this should be implemented at some point */ return (0); } static int mmu_booke_change_attr(vm_offset_t addr, vm_size_t sz, vm_memattr_t mode) { vm_offset_t va; pte_t *pte; int i, j; tlb_entry_t e; addr = trunc_page(addr); /* Only allow changes to mapped kernel addresses. This includes: * - KVA * - DMAP (powerpc64) * - Device mappings */ if (addr <= VM_MAXUSER_ADDRESS || #ifdef __powerpc64__ (addr >= tlb1_map_base && addr < DMAP_BASE_ADDRESS) || (addr > DMAP_MAX_ADDRESS && addr < VM_MIN_KERNEL_ADDRESS) || #else (addr >= tlb1_map_base && addr < VM_MIN_KERNEL_ADDRESS) || #endif (addr > VM_MAX_KERNEL_ADDRESS)) return (EINVAL); /* Check TLB1 mappings */ for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (addr >= e.virt && addr < e.virt + e.size) break; } if (i < TLB1_ENTRIES) { /* Only allow full mappings to be modified for now. */ /* Validate the range. */ for (j = i, va = addr; va < addr + sz; va += e.size, j++) { tlb1_read_entry(&e, j); if (va != e.virt || (sz - (va - addr) < e.size)) return (EINVAL); } for (va = addr; va < addr + sz; va += e.size, i++) { tlb1_read_entry(&e, i); e.mas2 &= ~MAS2_WIMGE_MASK; e.mas2 |= tlb_calc_wimg(e.phys, mode); /* * Write it out to the TLB. Should really re-sync with other * cores. */ tlb1_write_entry(&e, i); } return (0); } /* Not in TLB1, try through pmap */ /* First validate the range. */ for (va = addr; va < addr + sz; va += PAGE_SIZE) { pte = pte_find(kernel_pmap, va); if (pte == NULL || !PTE_ISVALID(pte)) return (EINVAL); } mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); for (va = addr; va < addr + sz; va += PAGE_SIZE) { pte = pte_find(kernel_pmap, va); *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; tlb0_flush_entry(va); } tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); return (0); } static void mmu_booke_page_array_startup(long pages) { vm_page_array_size = pages; } /**************************************************************************/ /* TID handling */ /**************************************************************************/ /* * Allocate a TID. If necessary, steal one from someone else. * The new TID is flushed from the TLB before returning. */ static tlbtid_t tid_alloc(pmap_t pmap) { tlbtid_t tid; int thiscpu; KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); thiscpu = PCPU_GET(cpuid); tid = PCPU_GET(booke.tid_next); if (tid > TID_MAX) tid = TID_MIN; PCPU_SET(booke.tid_next, tid + 1); /* If we are stealing TID then clear the relevant pmap's field */ if (tidbusy[thiscpu][tid] != NULL) { CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; /* Flush all entries from TLB0 matching this TID. */ tid_flush(tid); } tidbusy[thiscpu][tid] = pmap; pmap->pm_tid[thiscpu] = tid; __asm __volatile("msync; isync"); CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, PCPU_GET(booke.tid_next)); return (tid); } /**************************************************************************/ /* TLB0 handling */ /**************************************************************************/ /* Convert TLB0 va and way number to tlb0[] table index. */ static inline unsigned int tlb0_tableidx(vm_offset_t va, unsigned int way) { unsigned int idx; idx = (way * TLB0_ENTRIES_PER_WAY); idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; return (idx); } /* * Invalidate TLB0 entry. */ static inline void tlb0_flush_entry(vm_offset_t va) { CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); mtx_assert(&tlbivax_mutex, MA_OWNED); __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); __asm __volatile("isync; msync"); __asm __volatile("tlbsync; msync"); CTR1(KTR_PMAP, "%s: e", __func__); } /**************************************************************************/ /* TLB1 handling */ /**************************************************************************/ /* * TLB1 mapping notes: * * TLB1[0] Kernel text and data. * TLB1[1-15] Additional kernel text and data mappings (if required), PCI * windows, other devices mappings. */ /* * Read an entry from given TLB1 slot. */ void tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) { register_t msr; uint32_t mas0; KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); msr = mfmsr(); __asm __volatile("wrteei 0"); mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); mtspr(SPR_MAS0, mas0); __asm __volatile("isync; tlbre"); entry->mas1 = mfspr(SPR_MAS1); entry->mas2 = mfspr(SPR_MAS2); entry->mas3 = mfspr(SPR_MAS3); switch ((mfpvr() >> 16) & 0xFFFF) { case FSL_E500v2: case FSL_E500mc: case FSL_E5500: case FSL_E6500: entry->mas7 = mfspr(SPR_MAS7); break; default: entry->mas7 = 0; break; } __asm __volatile("wrtee %0" :: "r"(msr)); entry->virt = entry->mas2 & MAS2_EPN_MASK; entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | (entry->mas3 & MAS3_RPN); entry->size = tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); } struct tlbwrite_args { tlb_entry_t *e; unsigned int idx; }; static uint32_t tlb1_find_free(void) { tlb_entry_t e; int i; for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if ((e.mas1 & MAS1_VALID) == 0) return (i); } return (-1); } static void tlb1_purge_va_range(vm_offset_t va, vm_size_t size) { tlb_entry_t e; int i; for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if ((e.mas1 & MAS1_VALID) == 0) continue; if ((e.mas2 & MAS2_EPN_MASK) >= va && (e.mas2 & MAS2_EPN_MASK) < va + size) { mtspr(SPR_MAS1, e.mas1 & ~MAS1_VALID); __asm __volatile("isync; tlbwe; isync; msync"); } } } static void tlb1_write_entry_int(void *arg) { struct tlbwrite_args *args = arg; uint32_t idx, mas0; idx = args->idx; if (idx == -1) { tlb1_purge_va_range(args->e->virt, args->e->size); idx = tlb1_find_free(); if (idx == -1) panic("No free TLB1 entries!\n"); } /* Select entry */ mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx); mtspr(SPR_MAS0, mas0); mtspr(SPR_MAS1, args->e->mas1); mtspr(SPR_MAS2, args->e->mas2); mtspr(SPR_MAS3, args->e->mas3); switch ((mfpvr() >> 16) & 0xFFFF) { case FSL_E500mc: case FSL_E5500: case FSL_E6500: mtspr(SPR_MAS8, 0); /* FALLTHROUGH */ case FSL_E500v2: mtspr(SPR_MAS7, args->e->mas7); break; default: break; } __asm __volatile("isync; tlbwe; isync; msync"); } static void tlb1_write_entry_sync(void *arg) { /* Empty synchronization point for smp_rendezvous(). */ } /* * Write given entry to TLB1 hardware. */ static void tlb1_write_entry(tlb_entry_t *e, unsigned int idx) { struct tlbwrite_args args; args.e = e; args.idx = idx; #ifdef SMP if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { mb(); smp_rendezvous(tlb1_write_entry_sync, tlb1_write_entry_int, tlb1_write_entry_sync, &args); } else #endif { register_t msr; msr = mfmsr(); __asm __volatile("wrteei 0"); tlb1_write_entry_int(&args); __asm __volatile("wrtee %0" :: "r"(msr)); } } /* * Convert TLB TSIZE value to mapped region size. */ static vm_size_t tsize2size(unsigned int tsize) { /* * size = 4^tsize KB * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) */ return ((1 << (2 * tsize)) * 1024); } /* * Convert region size (must be power of 4) to TLB TSIZE value. */ static unsigned int size2tsize(vm_size_t size) { return (ilog2(size) / 2 - 5); } /* * Register permanent kernel mapping in TLB1. * * Entries are created starting from index 0 (current free entry is * kept in tlb1_idx) and are not supposed to be invalidated. */ int tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, uint32_t flags) { tlb_entry_t e; uint32_t ts, tid; int tsize, index; /* First try to update an existing entry. */ for (index = 0; index < TLB1_ENTRIES; index++) { tlb1_read_entry(&e, index); /* Check if we're just updating the flags, and update them. */ if (e.phys == pa && e.virt == va && e.size == size) { e.mas2 = (va & MAS2_EPN_MASK) | flags; tlb1_write_entry(&e, index); return (0); } } /* Convert size to TSIZE */ tsize = size2tsize(size); tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; /* XXX TS is hard coded to 0 for now as we only use single address space */ ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; e.phys = pa; e.virt = va; e.size = size; e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); e.mas2 = (va & MAS2_EPN_MASK) | flags; /* Set supervisor RWX permission bits */ e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; e.mas7 = (pa >> 32) & MAS7_RPN; tlb1_write_entry(&e, -1); return (0); } /* * Map in contiguous RAM region into the TLB1. */ static vm_size_t tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size, int wimge) { vm_offset_t base; vm_size_t mapped, sz, ssize; mapped = 0; base = va; ssize = size; while (size > 0) { sz = 1UL << (ilog2(size) & ~1); /* Align size to PA */ if (pa % sz != 0) { do { sz >>= 2; } while (pa % sz != 0); } /* Now align from there to VA */ if (va % sz != 0) { do { sz >>= 2; } while (va % sz != 0); } #ifdef __powerpc64__ /* * Clamp TLB1 entries to 4G. * * While the e6500 supports up to 1TB mappings, the e5500 * only supports up to 4G mappings. (0b1011) * * If any e6500 machines capable of supporting a very * large amount of memory appear in the future, we can * revisit this. * * For now, though, since we have plenty of space in TLB1, * always avoid creating entries larger than 4GB. */ sz = MIN(sz, 1UL << 32); #endif if (bootverbose) printf("Wiring VA=%p to PA=%jx (size=%lx)\n", (void *)va, (uintmax_t)pa, (long)sz); if (tlb1_set_entry(va, pa, sz, _TLB_ENTRY_SHARED | wimge) < 0) return (mapped); size -= sz; pa += sz; va += sz; } mapped = (va - base); if (bootverbose) printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n", mapped, mapped - ssize); return (mapped); } /* * TLB1 initialization routine, to be called after the very first * assembler level setup done in locore.S. */ void tlb1_init(void) { vm_offset_t mas2; uint32_t mas0, mas1, mas3, mas7; uint32_t tsz; tlb1_get_tlbconf(); mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); mtspr(SPR_MAS0, mas0); __asm __volatile("isync; tlbre"); mas1 = mfspr(SPR_MAS1); mas2 = mfspr(SPR_MAS2); mas3 = mfspr(SPR_MAS3); mas7 = mfspr(SPR_MAS7); kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | (mas3 & MAS3_RPN); tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; kernsize += (tsz > 0) ? tsize2size(tsz) : 0; kernstart = trunc_page(mas2); /* Setup TLB miss defaults */ set_mas4_defaults(); } /* * pmap_early_io_unmap() should be used in short conjunction with * pmap_early_io_map(), as in the following snippet: * * x = pmap_early_io_map(...); * * pmap_early_io_unmap(x, size); * * And avoiding more allocations between. */ void pmap_early_io_unmap(vm_offset_t va, vm_size_t size) { int i; tlb_entry_t e; vm_size_t isize; size = roundup(size, PAGE_SIZE); isize = size; for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { size -= e.size; e.mas1 &= ~MAS1_VALID; tlb1_write_entry(&e, i); } } if (tlb1_map_base == va + isize) tlb1_map_base -= isize; } vm_offset_t pmap_early_io_map(vm_paddr_t pa, vm_size_t size) { vm_paddr_t pa_base; vm_offset_t va, sz; int i; tlb_entry_t e; KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (pa >= e.phys && (pa + size) <= (e.phys + e.size)) return (e.virt + (pa - e.phys)); } pa_base = rounddown(pa, PAGE_SIZE); size = roundup(size + (pa - pa_base), PAGE_SIZE); tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); va = tlb1_map_base + (pa - pa_base); do { sz = 1 << (ilog2(size) & ~1); tlb1_set_entry(tlb1_map_base, pa_base, sz, _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); size -= sz; pa_base += sz; tlb1_map_base += sz; } while (size > 0); return (va); } void pmap_track_page(pmap_t pmap, vm_offset_t va) { vm_paddr_t pa; vm_page_t page; struct pv_entry *pve; va = trunc_page(va); pa = pmap_kextract(va); page = PHYS_TO_VM_PAGE(pa); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { goto out; } } page->md.pv_tracked = true; pv_insert(pmap, va, page); out: PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); } /* * Setup MAS4 defaults. * These values are loaded to MAS0-2 on a TLB miss. */ static void set_mas4_defaults(void) { uint32_t mas4; /* Defaults: TLB0, PID0, TSIZED=4K */ mas4 = MAS4_TLBSELD0; mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; #ifdef SMP mas4 |= MAS4_MD; #endif mtspr(SPR_MAS4, mas4); __asm __volatile("isync"); } /* * Return 0 if the physical IO range is encompassed by one of the * the TLB1 entries, otherwise return related error code. */ static int tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) { uint32_t prot; vm_paddr_t pa_start; vm_paddr_t pa_end; unsigned int entry_tsize; vm_size_t entry_size; tlb_entry_t e; *va = (vm_offset_t)NULL; tlb1_read_entry(&e, i); /* Skip invalid entries */ if (!(e.mas1 & MAS1_VALID)) return (EINVAL); /* * The entry must be cache-inhibited, guarded, and r/w * so it can function as an i/o page */ prot = e.mas2 & (MAS2_I | MAS2_G); if (prot != (MAS2_I | MAS2_G)) return (EPERM); prot = e.mas3 & (MAS3_SR | MAS3_SW); if (prot != (MAS3_SR | MAS3_SW)) return (EPERM); /* The address should be within the entry range. */ entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); entry_size = tsize2size(entry_tsize); pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | (e.mas3 & MAS3_RPN); pa_end = pa_start + entry_size; if ((pa < pa_start) || ((pa + size) > pa_end)) return (ERANGE); /* Return virtual address of this mapping. */ *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); return (0); } #ifdef DDB /* Print out contents of the MAS registers for each TLB0 entry */ static void #ifdef __powerpc64__ tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, #else tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, #endif uint32_t mas7) { int as; char desc[3]; tlbtid_t tid; vm_size_t size; unsigned int tsize; desc[2] = '\0'; if (mas1 & MAS1_VALID) desc[0] = 'V'; else desc[0] = ' '; if (mas1 & MAS1_IPROT) desc[1] = 'P'; else desc[1] = ' '; as = (mas1 & MAS1_TS_MASK) ? 1 : 0; tid = MAS1_GETTID(mas1); tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; size = 0; if (tsize) size = tsize2size(tsize); printf("%3d: (%s) [AS=%d] " "sz = 0x%jx tsz = %d tid = %d mas1 = 0x%08x " "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", i, desc, as, (uintmax_t)size, tsize, tid, mas1, mas2, mas3, mas7); } DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries) { uint32_t mas0, mas1, mas3, mas7; #ifdef __powerpc64__ uint64_t mas2; #else uint32_t mas2; #endif int entryidx, way, idx; printf("TLB0 entries:\n"); for (way = 0; way < TLB0_WAYS; way ++) for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); mtspr(SPR_MAS0, mas0); mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; mtspr(SPR_MAS2, mas2); __asm __volatile("isync; tlbre"); mas1 = mfspr(SPR_MAS1); mas2 = mfspr(SPR_MAS2); mas3 = mfspr(SPR_MAS3); mas7 = mfspr(SPR_MAS7); idx = tlb0_tableidx(mas2, way); tlb_print_entry(idx, mas1, mas2, mas3, mas7); } } /* * Print out contents of the MAS registers for each TLB1 entry */ DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries) { uint32_t mas0, mas1, mas3, mas7; #ifdef __powerpc64__ uint64_t mas2; #else uint32_t mas2; #endif int i; printf("TLB1 entries:\n"); for (i = 0; i < TLB1_ENTRIES; i++) { mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); mtspr(SPR_MAS0, mas0); __asm __volatile("isync; tlbre"); mas1 = mfspr(SPR_MAS1); mas2 = mfspr(SPR_MAS2); mas3 = mfspr(SPR_MAS3); mas7 = mfspr(SPR_MAS7); tlb_print_entry(i, mas1, mas2, mas3, mas7); } } #endif diff --git a/sys/powerpc/booke/pmap_32.c b/sys/powerpc/booke/pmap_32.c index 580c54c3642f..efeefb6a91c5 100644 --- a/sys/powerpc/booke/pmap_32.c +++ b/sys/powerpc/booke/pmap_32.c @@ -1,1001 +1,988 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (C) 2020 Justin Hibbits * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski * Copyright (C) 2006 Semihalf, Marian Balakowicz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Some hw specific parts of this pmap were derived or influenced * by NetBSD's ibm4xx pmap module. More generic code is shared with * a few other pmap modules from the FreeBSD tree. */ /* * VM layout notes: * * Kernel and user threads run within one common virtual address space * defined by AS=0. * * 32-bit pmap: * Virtual address space layout: * ----------------------------- * 0x0000_0000 - 0x7fff_ffff : user process * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) * 0xc000_0000 - 0xffff_efff : KVA */ #include #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define PRI0ptrX "08x" /* Reserved KVA space and mutex for mmu_booke_zero_page. */ static vm_offset_t zero_page_va; static struct mtx zero_page_mutex; /* Reserved KVA space and mutex for mmu_booke_copy_page. */ static vm_offset_t copy_page_src_va; static vm_offset_t copy_page_dst_va; static struct mtx copy_page_mutex; static vm_offset_t kernel_ptbl_root; static unsigned int kernel_ptbls; /* Number of KVA ptbls. */ /**************************************************************************/ /* PMAP */ /**************************************************************************/ #define VM_MAPDEV_BASE ((vm_offset_t)VM_MAXUSER_ADDRESS + PAGE_SIZE) static void tid_flush(tlbtid_t tid); -static unsigned long ilog2(unsigned long); /**************************************************************************/ /* Page table management */ /**************************************************************************/ #define PMAP_ROOT_SIZE (sizeof(pte_t**) * PDIR_NENTRIES) static void ptbl_init(void); static struct ptbl_buf *ptbl_buf_alloc(void); static void ptbl_buf_free(struct ptbl_buf *); static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); static pte_t *ptbl_alloc(pmap_t, unsigned int, bool); static void ptbl_free(pmap_t, unsigned int); static void ptbl_hold(pmap_t, unsigned int); static int ptbl_unhold(pmap_t, unsigned int); static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t); static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, bool); static int pte_remove(pmap_t, vm_offset_t, uint8_t); static pte_t *pte_find(pmap_t, vm_offset_t); struct ptbl_buf { TAILQ_ENTRY(ptbl_buf) link; /* list link */ vm_offset_t kva; /* va of mapping */ }; /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ #define PTBL_BUFS (128 * 16) /* ptbl free list and a lock used for access synchronization. */ static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; static struct mtx ptbl_buf_freelist_lock; /* Base address of kva space allocated fot ptbl bufs. */ static vm_offset_t ptbl_buf_pool_vabase; /* Pointer to ptbl_buf structures. */ static struct ptbl_buf *ptbl_bufs; /**************************************************************************/ /* Page table related */ /**************************************************************************/ /* Initialize pool of kva ptbl buffers. */ static void ptbl_init(void) { int i; CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); TAILQ_INIT(&ptbl_buf_freelist); for (i = 0; i < PTBL_BUFS; i++) { ptbl_bufs[i].kva = ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); } } /* Get a ptbl_buf from the freelist. */ static struct ptbl_buf * ptbl_buf_alloc(void) { struct ptbl_buf *buf; mtx_lock(&ptbl_buf_freelist_lock); buf = TAILQ_FIRST(&ptbl_buf_freelist); if (buf != NULL) TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); mtx_unlock(&ptbl_buf_freelist_lock); CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); return (buf); } /* Return ptbl buff to free pool. */ static void ptbl_buf_free(struct ptbl_buf *buf) { CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); mtx_lock(&ptbl_buf_freelist_lock); TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); mtx_unlock(&ptbl_buf_freelist_lock); } /* * Search the list of allocated ptbl bufs and find on list of allocated ptbls */ static void ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) { struct ptbl_buf *pbuf; CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); PMAP_LOCK_ASSERT(pmap, MA_OWNED); TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) if (pbuf->kva == (vm_offset_t)ptbl) { /* Remove from pmap ptbl buf list. */ TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); /* Free corresponding ptbl buf. */ ptbl_buf_free(pbuf); break; } } /* Allocate page table. */ static pte_t * ptbl_alloc(pmap_t pmap, unsigned int pdir_idx, bool nosleep) { vm_page_t mtbl[PTBL_PAGES]; vm_page_t m; struct ptbl_buf *pbuf; unsigned int pidx; pte_t *ptbl; int i, j; CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, (pmap == kernel_pmap), pdir_idx); KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), ("ptbl_alloc: invalid pdir_idx")); KASSERT((pmap->pm_pdir[pdir_idx] == NULL), ("pte_alloc: valid ptbl entry exists!")); pbuf = ptbl_buf_alloc(); if (pbuf == NULL) panic("pte_alloc: couldn't alloc kernel virtual memory"); ptbl = (pte_t *)pbuf->kva; CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); for (i = 0; i < PTBL_PAGES; i++) { pidx = (PTBL_PAGES * pdir_idx) + i; while ((m = vm_page_alloc_noobj(VM_ALLOC_WIRED)) == NULL) { if (nosleep) { ptbl_free_pmap_ptbl(pmap, ptbl); for (j = 0; j < i; j++) vm_page_free(mtbl[j]); vm_wire_sub(i); return (NULL); } PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } m->pindex = pidx; mtbl[i] = m; } /* Map allocated pages into kernel_pmap. */ mmu_booke_qenter((vm_offset_t)ptbl, mtbl, PTBL_PAGES); /* Zero whole ptbl. */ bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); /* Add pbuf to the pmap ptbl bufs list. */ TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); return (ptbl); } /* Free ptbl pages and invalidate pdir entry. */ static void ptbl_free(pmap_t pmap, unsigned int pdir_idx) { pte_t *ptbl; vm_paddr_t pa; vm_offset_t va; vm_page_t m; int i; CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, (pmap == kernel_pmap), pdir_idx); KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), ("ptbl_free: invalid pdir_idx")); ptbl = pmap->pm_pdir[pdir_idx]; CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); /* * Invalidate the pdir entry as soon as possible, so that other CPUs * don't attempt to look up the page tables we are releasing. */ mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); pmap->pm_pdir[pdir_idx] = NULL; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); for (i = 0; i < PTBL_PAGES; i++) { va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); pa = pte_vatopa(kernel_pmap, va); m = PHYS_TO_VM_PAGE(pa); vm_page_free_zero(m); vm_wire_sub(1); mmu_booke_kremove(va); } ptbl_free_pmap_ptbl(pmap, ptbl); } /* * Decrement ptbl pages hold count and attempt to free ptbl pages. * Called when removing pte entry from ptbl. * * Return 1 if ptbl pages were freed. */ static int ptbl_unhold(pmap_t pmap, unsigned int pdir_idx) { pte_t *ptbl; vm_paddr_t pa; vm_page_t m; int i; CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, (pmap == kernel_pmap), pdir_idx); KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), ("ptbl_unhold: invalid pdir_idx")); KASSERT((pmap != kernel_pmap), ("ptbl_unhold: unholding kernel ptbl!")); ptbl = pmap->pm_pdir[pdir_idx]; //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), ("ptbl_unhold: non kva ptbl")); /* decrement hold count */ for (i = 0; i < PTBL_PAGES; i++) { pa = pte_vatopa(kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE)); m = PHYS_TO_VM_PAGE(pa); m->ref_count--; } /* * Free ptbl pages if there are no pte etries in this ptbl. * ref_count has the same value for all ptbl pages, so check the last * page. */ if (m->ref_count == 0) { ptbl_free(pmap, pdir_idx); //debugf("ptbl_unhold: e (freed ptbl)\n"); return (1); } return (0); } /* * Increment hold count for ptbl pages. This routine is used when a new pte * entry is being inserted into the ptbl. */ static void ptbl_hold(pmap_t pmap, unsigned int pdir_idx) { vm_paddr_t pa; pte_t *ptbl; vm_page_t m; int i; CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, pdir_idx); KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), ("ptbl_hold: invalid pdir_idx")); KASSERT((pmap != kernel_pmap), ("ptbl_hold: holding kernel ptbl!")); ptbl = pmap->pm_pdir[pdir_idx]; KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); for (i = 0; i < PTBL_PAGES; i++) { pa = pte_vatopa(kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE)); m = PHYS_TO_VM_PAGE(pa); m->ref_count++; } } /* * Clean pte entry, try to free page table page if requested. * * Return 1 if ptbl pages were freed, otherwise return 0. */ static int pte_remove(pmap_t pmap, vm_offset_t va, uint8_t flags) { unsigned int pdir_idx = PDIR_IDX(va); unsigned int ptbl_idx = PTBL_IDX(va); vm_page_t m; pte_t *ptbl; pte_t *pte; //int su = (pmap == kernel_pmap); //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", // su, (u_int32_t)pmap, va, flags); ptbl = pmap->pm_pdir[pdir_idx]; KASSERT(ptbl, ("pte_remove: null ptbl")); pte = &ptbl[ptbl_idx]; if (pte == NULL || !PTE_ISVALID(pte)) return (0); if (PTE_ISWIRED(pte)) pmap->pm_stats.wired_count--; /* Get vm_page_t for mapped pte. */ m = PHYS_TO_VM_PAGE(PTE_PA(pte)); /* Handle managed entry. */ if (PTE_ISMANAGED(pte)) { if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); if (PTE_ISREFERENCED(pte)) vm_page_aflag_set(m, PGA_REFERENCED); pv_remove(pmap, va, m); } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { /* * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is * used. This is needed by the NCSW support code for fast * VA<->PA translation. */ pv_remove(pmap, va, m); if (TAILQ_EMPTY(&m->md.pv_list)) m->md.pv_tracked = false; } mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte = 0; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); pmap->pm_stats.resident_count--; if (flags & PTBL_UNHOLD) { //debugf("pte_remove: e (unhold)\n"); return (ptbl_unhold(pmap, pdir_idx)); } //debugf("pte_remove: e\n"); return (0); } /* * Insert PTE for a given page and virtual address. */ static int pte_enter(pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, bool nosleep) { unsigned int pdir_idx = PDIR_IDX(va); unsigned int ptbl_idx = PTBL_IDX(va); pte_t *ptbl, *pte, pte_tmp; CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, pmap == kernel_pmap, pmap, va); /* Get the page table pointer. */ ptbl = pmap->pm_pdir[pdir_idx]; if (ptbl == NULL) { /* Allocate page table pages. */ ptbl = ptbl_alloc(pmap, pdir_idx, nosleep); if (ptbl == NULL) { KASSERT(nosleep, ("nosleep and NULL ptbl")); return (ENOMEM); } pmap->pm_pdir[pdir_idx] = ptbl; pte = &ptbl[ptbl_idx]; } else { /* * Check if there is valid mapping for requested * va, if there is, remove it. */ pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; if (PTE_ISVALID(pte)) { pte_remove(pmap, va, PTBL_HOLD); } else { /* * pte is not used, increment hold count * for ptbl pages. */ if (pmap != kernel_pmap) ptbl_hold(pmap, pdir_idx); } } /* * Insert pv_entry into pv_list for mapped page if part of managed * memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { flags |= PTE_MANAGED; /* Create and insert pv entry. */ pv_insert(pmap, va, m); } pmap->pm_stats.resident_count++; pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte = pte_tmp; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); return (0); } /* Return the pa for the given pmap/va. */ static vm_paddr_t pte_vatopa(pmap_t pmap, vm_offset_t va) { vm_paddr_t pa = 0; pte_t *pte; pte = pte_find(pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); return (pa); } /* Get a pointer to a PTE in a page table. */ static pte_t * pte_find(pmap_t pmap, vm_offset_t va) { unsigned int pdir_idx = PDIR_IDX(va); unsigned int ptbl_idx = PTBL_IDX(va); KASSERT((pmap != NULL), ("pte_find: invalid pmap")); if (pmap->pm_pdir[pdir_idx]) return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); return (NULL); } /* Get a pointer to a PTE in a page table, or the next closest (greater) one. */ static __inline pte_t * pte_find_next(pmap_t pmap, vm_offset_t *pva) { vm_offset_t va; pte_t **pdir; pte_t *pte; unsigned long i, j; KASSERT((pmap != NULL), ("pte_find: invalid pmap")); va = *pva; i = PDIR_IDX(va); j = PTBL_IDX(va); pdir = pmap->pm_pdir; for (; i < PDIR_NENTRIES; i++, j = 0) { if (pdir[i] == NULL) continue; for (; j < PTBL_NENTRIES; j++) { pte = &pdir[i][j]; if (!PTE_ISVALID(pte)) continue; *pva = PDIR_SIZE * i + PAGE_SIZE * j; return (pte); } } return (NULL); } /* Set up kernel page tables. */ static void kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr) { pte_t *pte; vm_offset_t va; vm_offset_t pdir_start; int i; kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; kernel_pmap->pm_pdir = (pte_t **)kernel_ptbl_root; pdir_start = kernel_ptbl_root + PDIR_NENTRIES * sizeof(pte_t); /* Initialize kernel pdir */ for (i = 0; i < kernel_ptbls; i++) { kernel_pmap->pm_pdir[kptbl_min + i] = (pte_t *)(pdir_start + (i * PAGE_SIZE * PTBL_PAGES)); } /* * Fill in PTEs covering kernel code and data. They are not required * for address translation, as this area is covered by static TLB1 * entries, but for pte_vatopa() to work correctly with kernel area * addresses. */ for (va = addr; va < data_end; va += PAGE_SIZE) { pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); powerpc_sync(); *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID | PTE_PS_4KB; } } static vm_offset_t mmu_booke_alloc_kernel_pgtables(vm_offset_t data_end) { /* Allocate space for ptbl_bufs. */ ptbl_bufs = (struct ptbl_buf *)data_end; data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", (uintptr_t)ptbl_bufs, data_end); data_end = round_page(data_end); kernel_ptbl_root = data_end; data_end += PDIR_NENTRIES * sizeof(pte_t*); /* Allocate PTE tables for kernel KVA. */ kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, PDIR_SIZE); data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; debugf(" kernel ptbls: %d\n", kernel_ptbls); debugf(" kernel pdir at %#jx end = %#jx\n", (uintmax_t)kernel_ptbl_root, (uintmax_t)data_end); return (data_end); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ static int mmu_booke_pinit(pmap_t pmap) { int i; CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, curthread->td_proc->p_pid, curthread->td_proc->p_comm); KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); for (i = 0; i < MAXCPU; i++) pmap->pm_tid[i] = TID_NONE; CPU_ZERO(&kernel_pmap->pm_active); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK); bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); TAILQ_INIT(&pmap->pm_ptbl_list); return (1); } /* * Release any resources held by the given physical map. * Called when a pmap initialized by mmu_booke_pinit is being released. * Should only be called if the map contains no valid mappings. */ static void mmu_booke_release(pmap_t pmap) { KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); uma_zfree(ptbl_root_zone, pmap->pm_pdir); } static void mmu_booke_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { pte_t *pte; vm_paddr_t pa = 0; int sync_sz, valid; pmap_t pmap; vm_page_t m; vm_offset_t addr; int active; rw_wlock(&pvh_global_lock); pmap = PCPU_GET(curpmap); active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; while (sz > 0) { PMAP_LOCK(pm); pte = pte_find(pm, va); valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; if (valid) pa = PTE_PA(pte); PMAP_UNLOCK(pm); sync_sz = PAGE_SIZE - (va & PAGE_MASK); sync_sz = min(sync_sz, sz); if (valid) { if (!active) { /* * Create a mapping in the active pmap. * * XXX: We use the zero page here, because * it isn't likely to be in use. * If we ever decide to support * security.bsd.map_at_zero on Book-E, change * this to some other address that isn't * normally mappable. */ addr = 0; m = PHYS_TO_VM_PAGE(pa); PMAP_LOCK(pmap); pte_enter(pmap, m, addr, PTE_SR | PTE_VALID, false); __syncicache((void *)(addr + (va & PAGE_MASK)), sync_sz); pte_remove(pmap, addr, PTBL_UNHOLD); PMAP_UNLOCK(pmap); } else __syncicache((void *)va, sync_sz); } va += sync_sz; sz -= sync_sz; } rw_wunlock(&pvh_global_lock); } /* * mmu_booke_zero_page_area zeros the specified hardware page by * mapping it into virtual memory and using bzero to clear * its contents. * * off and size must reside within a single page. */ static void mmu_booke_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va; /* XXX KASSERT off and size are within a single page? */ mtx_lock(&zero_page_mutex); va = zero_page_va; mmu_booke_kenter(va, VM_PAGE_TO_PHYS(m)); bzero((caddr_t)va + off, size); mmu_booke_kremove(va); mtx_unlock(&zero_page_mutex); } /* * mmu_booke_zero_page zeros the specified hardware page. */ static void mmu_booke_zero_page(vm_page_t m) { vm_offset_t off, va; va = zero_page_va; mtx_lock(&zero_page_mutex); mmu_booke_kenter(va, VM_PAGE_TO_PHYS(m)); for (off = 0; off < PAGE_SIZE; off += cacheline_size) __asm __volatile("dcbz 0,%0" :: "r"(va + off)); mmu_booke_kremove(va); mtx_unlock(&zero_page_mutex); } /* * mmu_booke_copy_page copies the specified (machine independent) page by * mapping the page into virtual memory and using memcopy to copy the page, * one machine dependent page at a time. */ static void mmu_booke_copy_page(vm_page_t sm, vm_page_t dm) { vm_offset_t sva, dva; sva = copy_page_src_va; dva = copy_page_dst_va; mtx_lock(©_page_mutex); mmu_booke_kenter(sva, VM_PAGE_TO_PHYS(sm)); mmu_booke_kenter(dva, VM_PAGE_TO_PHYS(dm)); memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); mmu_booke_kremove(dva); mmu_booke_kremove(sva); mtx_unlock(©_page_mutex); } static inline void mmu_booke_copy_pages(vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; mtx_lock(©_page_mutex); while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); mmu_booke_kenter(copy_page_src_va, VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); a_cp = (char *)copy_page_src_va + a_pg_offset; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); mmu_booke_kenter(copy_page_dst_va, VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); b_cp = (char *)copy_page_dst_va + b_pg_offset; bcopy(a_cp, b_cp, cnt); mmu_booke_kremove(copy_page_dst_va); mmu_booke_kremove(copy_page_src_va); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } mtx_unlock(©_page_mutex); } static vm_offset_t mmu_booke_quick_enter_page(vm_page_t m) { vm_paddr_t paddr; vm_offset_t qaddr; uint32_t flags; pte_t *pte; paddr = VM_PAGE_TO_PHYS(m); flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; flags |= PTE_PS_4KB; critical_enter(); qaddr = PCPU_GET(qmap_addr); pte = pte_find(kernel_pmap, qaddr); KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); /* * XXX: tlbivax is broadcast to other cores, but qaddr should * not be present in other TLBs. Is there a better instruction * sequence to use? Or just forget it & use mmu_booke_kenter()... */ __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); __asm __volatile("isync; msync"); *pte = PTE_RPN_FROM_PA(paddr) | flags; /* Flush the real memory from the instruction cache. */ if ((flags & (PTE_I | PTE_G)) == 0) __syncicache((void *)qaddr, PAGE_SIZE); return (qaddr); } static void mmu_booke_quick_remove_page(vm_offset_t addr) { pte_t *pte; pte = pte_find(kernel_pmap, addr); KASSERT(PCPU_GET(qmap_addr) == addr, ("mmu_booke_quick_remove_page: invalid address")); KASSERT(*pte != 0, ("mmu_booke_quick_remove_page: PTE not in use")); *pte = 0; critical_exit(); } /**************************************************************************/ /* TID handling */ /**************************************************************************/ -/* - * Return the largest uint value log such that 2^log <= num. - */ -static unsigned long -ilog2(unsigned long num) -{ - long lz; - - __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); - return (31 - lz); -} - /* * Invalidate all TLB0 entries which match the given TID. Note this is * dedicated for cases when invalidations should NOT be propagated to other * CPUs. */ static void tid_flush(tlbtid_t tid) { register_t msr; uint32_t mas0, mas1, mas2; int entry, way; /* Don't evict kernel translations */ if (tid == TID_KERNEL) return; msr = mfmsr(); __asm __volatile("wrteei 0"); /* * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use * it for PID invalidation. */ switch ((mfpvr() >> 16) & 0xffff) { case FSL_E500mc: case FSL_E5500: case FSL_E6500: mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); /* tlbilxpid */ __asm __volatile("isync; .long 0x7c200024; isync; msync"); __asm __volatile("wrtee %0" :: "r"(msr)); return; } for (way = 0; way < TLB0_WAYS; way++) for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); mtspr(SPR_MAS0, mas0); mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; mtspr(SPR_MAS2, mas2); __asm __volatile("isync; tlbre"); mas1 = mfspr(SPR_MAS1); if (!(mas1 & MAS1_VALID)) continue; if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) continue; mas1 &= ~MAS1_VALID; mtspr(SPR_MAS1, mas1); __asm __volatile("isync; tlbwe; isync; msync"); } __asm __volatile("wrtee %0" :: "r"(msr)); } diff --git a/sys/powerpc/booke/pmap_64.c b/sys/powerpc/booke/pmap_64.c index 802f37e921a4..affa08ebee3f 100644 --- a/sys/powerpc/booke/pmap_64.c +++ b/sys/powerpc/booke/pmap_64.c @@ -1,786 +1,773 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (C) 2020 Justin Hibbits * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski * Copyright (C) 2006 Semihalf, Marian Balakowicz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Some hw specific parts of this pmap were derived or influenced * by NetBSD's ibm4xx pmap module. More generic code is shared with * a few other pmap modules from the FreeBSD tree. */ /* * VM layout notes: * * Kernel and user threads run within one common virtual address space * defined by AS=0. * * 64-bit pmap: * Virtual address space layout: * ----------------------------- * 0x0000_0000_0000_0000 - 0x3fff_ffff_ffff_ffff : user process * 0x4000_0000_0000_0000 - 0x7fff_ffff_ffff_ffff : unused * 0x8000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : mmio region * 0xc000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : direct map * 0xe000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : KVA */ #include #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif #define PRI0ptrX "016lx" /**************************************************************************/ /* PMAP */ /**************************************************************************/ unsigned int kernel_pdirs; static uma_zone_t ptbl_root_zone; static pte_t ****kernel_ptbl_root; /* * Base of the pmap_mapdev() region. On 32-bit it immediately follows the * userspace address range. On On 64-bit it's far above, at (1 << 63), and * ranges up to the DMAP, giving 62 bits of PA allowed. This is far larger than * the widest Book-E address bus, the e6500 has a 40-bit PA space. This allows * us to map akin to the DMAP, with addresses identical to the PA, offset by the * base. */ #define VM_MAPDEV_BASE 0x8000000000000000 #define VM_MAPDEV_PA_MAX 0x4000000000000000 /* Don't encroach on DMAP */ static void tid_flush(tlbtid_t tid); -static unsigned long ilog2(unsigned long); /**************************************************************************/ /* Page table management */ /**************************************************************************/ #define PMAP_ROOT_SIZE (sizeof(pte_t****) * PG_ROOT_NENTRIES) static pte_t *ptbl_alloc(pmap_t pmap, vm_offset_t va, bool nosleep, bool *is_new); static void ptbl_hold(pmap_t, pte_t *); static int ptbl_unhold(pmap_t, vm_offset_t); static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t); static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, bool); static int pte_remove(pmap_t, vm_offset_t, uint8_t); static pte_t *pte_find(pmap_t, vm_offset_t); static pte_t *pte_find_next(pmap_t, vm_offset_t *); static void kernel_pte_alloc(vm_offset_t, vm_offset_t); /**************************************************************************/ /* Page table related */ /**************************************************************************/ /* Allocate a page, to be used in a page table. */ static vm_offset_t mmu_booke_alloc_page(pmap_t pmap, unsigned int idx, bool nosleep) { vm_page_t m; int req; req = VM_ALLOC_WIRED | VM_ALLOC_ZERO; while ((m = vm_page_alloc_noobj(req)) == NULL) { if (nosleep) return (0); PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } m->pindex = idx; return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); } /* Initialize pool of kva ptbl buffers. */ static void ptbl_init(void) { } /* Get a pointer to a PTE in a page table. */ static __inline pte_t * pte_find(pmap_t pmap, vm_offset_t va) { pte_t ***pdir_l1; pte_t **pdir; pte_t *ptbl; KASSERT((pmap != NULL), ("pte_find: invalid pmap")); pdir_l1 = pmap->pm_root[PG_ROOT_IDX(va)]; if (pdir_l1 == NULL) return (NULL); pdir = pdir_l1[PDIR_L1_IDX(va)]; if (pdir == NULL) return (NULL); ptbl = pdir[PDIR_IDX(va)]; return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); } /* Get a pointer to a PTE in a page table, or the next closest (greater) one. */ static __inline pte_t * pte_find_next(pmap_t pmap, vm_offset_t *pva) { vm_offset_t va; pte_t ****pm_root; pte_t *pte; unsigned long i, j, k, l; KASSERT((pmap != NULL), ("pte_find: invalid pmap")); va = *pva; i = PG_ROOT_IDX(va); j = PDIR_L1_IDX(va); k = PDIR_IDX(va); l = PTBL_IDX(va); pm_root = pmap->pm_root; /* truncate the VA for later. */ va &= ~((1UL << (PG_ROOT_H + 1)) - 1); for (; i < PG_ROOT_NENTRIES; i++, j = 0, k = 0, l = 0) { if (pm_root[i] == 0) continue; for (; j < PDIR_L1_NENTRIES; j++, k = 0, l = 0) { if (pm_root[i][j] == 0) continue; for (; k < PDIR_NENTRIES; k++, l = 0) { if (pm_root[i][j][k] == NULL) continue; for (; l < PTBL_NENTRIES; l++) { pte = &pm_root[i][j][k][l]; if (!PTE_ISVALID(pte)) continue; *pva = va + PG_ROOT_SIZE * i + PDIR_L1_SIZE * j + PDIR_SIZE * k + PAGE_SIZE * l; return (pte); } } } } return (NULL); } static bool unhold_free_page(pmap_t pmap, vm_page_t m) { if (vm_page_unwire_noq(m)) { vm_page_free_zero(m); return (true); } return (false); } static vm_offset_t get_pgtbl_page(pmap_t pmap, vm_offset_t *ptr_tbl, uint32_t index, bool nosleep, bool hold_parent, bool *isnew) { vm_offset_t page; vm_page_t m; page = ptr_tbl[index]; KASSERT(page != 0 || pmap != kernel_pmap, ("NULL page table page found in kernel pmap!")); if (page == 0) { page = mmu_booke_alloc_page(pmap, index, nosleep); if (ptr_tbl[index] == 0) { *isnew = true; ptr_tbl[index] = page; if (hold_parent) { m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)ptr_tbl)); m->ref_count++; } return (page); } m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS(page)); page = ptr_tbl[index]; vm_page_unwire_noq(m); vm_page_free_zero(m); } *isnew = false; return (page); } /* Allocate page table. */ static pte_t* ptbl_alloc(pmap_t pmap, vm_offset_t va, bool nosleep, bool *is_new) { unsigned int pg_root_idx = PG_ROOT_IDX(va); unsigned int pdir_l1_idx = PDIR_L1_IDX(va); unsigned int pdir_idx = PDIR_IDX(va); vm_offset_t pdir_l1, pdir, ptbl; /* When holding a parent, no need to hold the root index pages. */ pdir_l1 = get_pgtbl_page(pmap, (vm_offset_t *)pmap->pm_root, pg_root_idx, nosleep, false, is_new); if (pdir_l1 == 0) return (NULL); pdir = get_pgtbl_page(pmap, (vm_offset_t *)pdir_l1, pdir_l1_idx, nosleep, !*is_new, is_new); if (pdir == 0) return (NULL); ptbl = get_pgtbl_page(pmap, (vm_offset_t *)pdir, pdir_idx, nosleep, !*is_new, is_new); return ((pte_t *)ptbl); } /* * Decrement ptbl pages hold count and attempt to free ptbl pages. Called * when removing pte entry from ptbl. * * Return 1 if ptbl pages were freed. */ static int ptbl_unhold(pmap_t pmap, vm_offset_t va) { pte_t *ptbl; vm_page_t m; u_int pg_root_idx; pte_t ***pdir_l1; u_int pdir_l1_idx; pte_t **pdir; u_int pdir_idx; pg_root_idx = PG_ROOT_IDX(va); pdir_l1_idx = PDIR_L1_IDX(va); pdir_idx = PDIR_IDX(va); KASSERT((pmap != kernel_pmap), ("ptbl_unhold: unholding kernel ptbl!")); pdir_l1 = pmap->pm_root[pg_root_idx]; pdir = pdir_l1[pdir_l1_idx]; ptbl = pdir[pdir_idx]; /* decrement hold count */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); if (!unhold_free_page(pmap, m)) return (0); pdir[pdir_idx] = NULL; m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) pdir)); if (!unhold_free_page(pmap, m)) return (1); pdir_l1[pdir_l1_idx] = NULL; m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) pdir_l1)); if (!unhold_free_page(pmap, m)) return (1); pmap->pm_root[pg_root_idx] = NULL; return (1); } /* * Increment hold count for ptbl pages. This routine is used when new pte * entry is being inserted into ptbl. */ static void ptbl_hold(pmap_t pmap, pte_t *ptbl) { vm_page_t m; KASSERT((pmap != kernel_pmap), ("ptbl_hold: holding kernel ptbl!")); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); m->ref_count++; } /* * Clean pte entry, try to free page table page if requested. * * Return 1 if ptbl pages were freed, otherwise return 0. */ static int pte_remove(pmap_t pmap, vm_offset_t va, u_int8_t flags) { vm_page_t m; pte_t *pte; pte = pte_find(pmap, va); KASSERT(pte != NULL, ("%s: NULL pte for va %#jx, pmap %p", __func__, (uintmax_t)va, pmap)); if (!PTE_ISVALID(pte)) return (0); /* Get vm_page_t for mapped pte. */ m = PHYS_TO_VM_PAGE(PTE_PA(pte)); if (PTE_ISWIRED(pte)) pmap->pm_stats.wired_count--; /* Handle managed entry. */ if (PTE_ISMANAGED(pte)) { /* Handle modified pages. */ if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); /* Referenced pages. */ if (PTE_ISREFERENCED(pte)) vm_page_aflag_set(m, PGA_REFERENCED); /* Remove pv_entry from pv_list. */ pv_remove(pmap, va, m); } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { pv_remove(pmap, va, m); if (TAILQ_EMPTY(&m->md.pv_list)) m->md.pv_tracked = false; } mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte = 0; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); pmap->pm_stats.resident_count--; if (flags & PTBL_UNHOLD) { return (ptbl_unhold(pmap, va)); } return (0); } /* * Insert PTE for a given page and virtual address. */ static int pte_enter(pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, bool nosleep) { unsigned int ptbl_idx = PTBL_IDX(va); pte_t *ptbl, *pte, pte_tmp; bool is_new; /* Get the page directory pointer. */ ptbl = ptbl_alloc(pmap, va, nosleep, &is_new); if (ptbl == NULL) { KASSERT(nosleep, ("nosleep and NULL ptbl")); return (ENOMEM); } if (is_new) { pte = &ptbl[ptbl_idx]; } else { /* * Check if there is valid mapping for requested va, if there * is, remove it. */ pte = &ptbl[ptbl_idx]; if (PTE_ISVALID(pte)) { pte_remove(pmap, va, PTBL_HOLD); } else { /* * pte is not used, increment hold count for ptbl * pages. */ if (pmap != kernel_pmap) ptbl_hold(pmap, ptbl); } } /* * Insert pv_entry into pv_list for mapped page if part of managed * memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { flags |= PTE_MANAGED; /* Create and insert pv entry. */ pv_insert(pmap, va, m); } pmap->pm_stats.resident_count++; pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); pte_tmp |= (PTE_VALID | flags); mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte = pte_tmp; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); return (0); } /* Return the pa for the given pmap/va. */ static vm_paddr_t pte_vatopa(pmap_t pmap, vm_offset_t va) { vm_paddr_t pa = 0; pte_t *pte; pte = pte_find(pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); return (pa); } /* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ static void kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr) { pte_t *pte; vm_size_t kva_size; int kernel_pdirs, kernel_pgtbls, pdir_l1s; vm_offset_t va, l1_va, pdir_va, ptbl_va; int i, j, k; kva_size = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; kernel_pmap->pm_root = kernel_ptbl_root; pdir_l1s = howmany(kva_size, PG_ROOT_SIZE); kernel_pdirs = howmany(kva_size, PDIR_L1_SIZE); kernel_pgtbls = howmany(kva_size, PDIR_SIZE); /* Initialize kernel pdir */ l1_va = (vm_offset_t)kernel_ptbl_root + round_page(PG_ROOT_NENTRIES * sizeof(pte_t ***)); pdir_va = l1_va + pdir_l1s * PAGE_SIZE; ptbl_va = pdir_va + kernel_pdirs * PAGE_SIZE; if (bootverbose) { printf("ptbl_root_va: %#lx\n", (vm_offset_t)kernel_ptbl_root); printf("l1_va: %#lx (%d entries)\n", l1_va, pdir_l1s); printf("pdir_va: %#lx(%d entries)\n", pdir_va, kernel_pdirs); printf("ptbl_va: %#lx(%d entries)\n", ptbl_va, kernel_pgtbls); } va = VM_MIN_KERNEL_ADDRESS; for (i = PG_ROOT_IDX(va); i < PG_ROOT_IDX(va) + pdir_l1s; i++, l1_va += PAGE_SIZE) { kernel_pmap->pm_root[i] = (pte_t ***)l1_va; for (j = 0; j < PDIR_L1_NENTRIES && va < VM_MAX_KERNEL_ADDRESS; j++, pdir_va += PAGE_SIZE) { kernel_pmap->pm_root[i][j] = (pte_t **)pdir_va; for (k = 0; k < PDIR_NENTRIES && va < VM_MAX_KERNEL_ADDRESS; k++, va += PDIR_SIZE, ptbl_va += PAGE_SIZE) kernel_pmap->pm_root[i][j][k] = (pte_t *)ptbl_va; } } /* * Fill in PTEs covering kernel code and data. They are not required * for address translation, as this area is covered by static TLB1 * entries, but for pte_vatopa() to work correctly with kernel area * addresses. */ for (va = addr; va < data_end; va += PAGE_SIZE) { pte = &(kernel_pmap->pm_root[PG_ROOT_IDX(va)][PDIR_L1_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID | PTE_PS_4KB; } } static vm_offset_t mmu_booke_alloc_kernel_pgtables(vm_offset_t data_end) { vm_size_t kva_size = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; kernel_ptbl_root = (pte_t ****)data_end; data_end += round_page(PG_ROOT_NENTRIES * sizeof(pte_t ***)); data_end += howmany(kva_size, PG_ROOT_SIZE) * PAGE_SIZE; data_end += howmany(kva_size, PDIR_L1_SIZE) * PAGE_SIZE; data_end += howmany(kva_size, PDIR_SIZE) * PAGE_SIZE; return (data_end); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ static int mmu_booke_pinit(pmap_t pmap) { int i; CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, curthread->td_proc->p_pid, curthread->td_proc->p_comm); KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); for (i = 0; i < MAXCPU; i++) pmap->pm_tid[i] = TID_NONE; CPU_ZERO(&kernel_pmap->pm_active); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_root = uma_zalloc(ptbl_root_zone, M_WAITOK); bzero(pmap->pm_root, sizeof(pte_t **) * PG_ROOT_NENTRIES); return (1); } /* * Release any resources held by the given physical map. * Called when a pmap initialized by mmu_booke_pinit is being released. * Should only be called if the map contains no valid mappings. */ static void mmu_booke_release(pmap_t pmap) { KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); #ifdef INVARIANTS /* * Verify that all page directories are gone. * Protects against reference count leakage. */ for (int i = 0; i < PG_ROOT_NENTRIES; i++) KASSERT(pmap->pm_root[i] == 0, ("Index %d on root page %p is non-zero!\n", i, pmap->pm_root)); #endif uma_zfree(ptbl_root_zone, pmap->pm_root); } static void mmu_booke_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { pte_t *pte; vm_paddr_t pa = 0; int sync_sz, valid; while (sz > 0) { PMAP_LOCK(pm); pte = pte_find(pm, va); valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; if (valid) pa = PTE_PA(pte); PMAP_UNLOCK(pm); sync_sz = PAGE_SIZE - (va & PAGE_MASK); sync_sz = min(sync_sz, sz); if (valid) { pa += (va & PAGE_MASK); __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz); } va += sync_sz; sz -= sync_sz; } } /* * mmu_booke_zero_page_area zeros the specified hardware page by * mapping it into virtual memory and using bzero to clear * its contents. * * off and size must reside within a single page. */ static void mmu_booke_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va; /* XXX KASSERT off and size are within a single page? */ va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); bzero((caddr_t)va + off, size); } /* * mmu_booke_zero_page zeros the specified hardware page. */ static void mmu_booke_zero_page(vm_page_t m) { vm_offset_t off, va; va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); for (off = 0; off < PAGE_SIZE; off += cacheline_size) __asm __volatile("dcbz 0,%0" :: "r"(va + off)); } /* * mmu_booke_copy_page copies the specified (machine independent) page by * mapping the page into virtual memory and using memcopy to copy the page, * one machine dependent page at a time. */ static void mmu_booke_copy_page(vm_page_t sm, vm_page_t dm) { vm_offset_t sva, dva; sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); } static inline void mmu_booke_copy_pages(vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; vm_page_t pa, pb; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; pa = ma[a_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; pb = mb[b_offset >> PAGE_SHIFT]; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) + a_pg_offset); b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) + b_pg_offset); bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } static vm_offset_t mmu_booke_quick_enter_page(vm_page_t m) { return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); } static void mmu_booke_quick_remove_page(vm_offset_t addr) { } /**************************************************************************/ /* TID handling */ /**************************************************************************/ -/* - * Return the largest uint value log such that 2^log <= num. - */ -static unsigned long -ilog2(unsigned long num) -{ - long lz; - - __asm ("cntlzd %0, %1" : "=r" (lz) : "r" (num)); - return (63 - lz); -} - /* * Invalidate all TLB0 entries which match the given TID. Note this is * dedicated for cases when invalidations should NOT be propagated to other * CPUs. */ static void tid_flush(tlbtid_t tid) { register_t msr; /* Don't evict kernel translations */ if (tid == TID_KERNEL) return; msr = mfmsr(); __asm __volatile("wrteei 0"); /* * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use * it for PID invalidation. */ mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); __asm __volatile("isync; .long 0x7c200024; isync; msync"); __asm __volatile("wrtee %0" :: "r"(msr)); } diff --git a/sys/sys/libkern.h b/sys/sys/libkern.h index 6fbb97e48dac..84d982c43a76 100644 --- a/sys/sys/libkern.h +++ b/sys/sys/libkern.h @@ -1,293 +1,399 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_LIBKERN_H_ #define _SYS_LIBKERN_H_ #include #ifdef _KERNEL #include #endif #ifndef LIBKERN_INLINE #define LIBKERN_INLINE static __inline #define LIBKERN_BODY #endif /* BCD conversions. */ extern u_char const bcd2bin_data[]; extern u_char const bin2bcd_data[]; extern char const hex2ascii_data[]; #define LIBKERN_LEN_BCD2BIN 154 #define LIBKERN_LEN_BIN2BCD 100 #define LIBKERN_LEN_HEX2ASCII 36 static inline u_char bcd2bin(int bcd) { KASSERT(bcd >= 0 && bcd < LIBKERN_LEN_BCD2BIN, ("invalid bcd %d", bcd)); return (bcd2bin_data[bcd]); } static inline u_char bin2bcd(int bin) { KASSERT(bin >= 0 && bin < LIBKERN_LEN_BIN2BCD, ("invalid bin %d", bin)); return (bin2bcd_data[bin]); } static inline char hex2ascii(int hex) { KASSERT(hex >= 0 && hex < LIBKERN_LEN_HEX2ASCII, ("invalid hex %d", hex)); return (hex2ascii_data[hex]); } static inline bool validbcd(int bcd) { return (bcd == 0 || (bcd > 0 && bcd <= 0x99 && bcd2bin_data[bcd] != 0)); } static __inline int imax(int a, int b) { return (a > b ? a : b); } static __inline int imin(int a, int b) { return (a < b ? a : b); } static __inline long lmax(long a, long b) { return (a > b ? a : b); } static __inline long lmin(long a, long b) { return (a < b ? a : b); } static __inline u_int max(u_int a, u_int b) { return (a > b ? a : b); } static __inline u_int min(u_int a, u_int b) { return (a < b ? a : b); } static __inline quad_t qmax(quad_t a, quad_t b) { return (a > b ? a : b); } static __inline quad_t qmin(quad_t a, quad_t b) { return (a < b ? a : b); } static __inline u_quad_t uqmax(u_quad_t a, u_quad_t b) { return (a > b ? a : b); } static __inline u_quad_t uqmin(u_quad_t a, u_quad_t b) { return (a < b ? a : b); } static __inline u_long ulmax(u_long a, u_long b) { return (a > b ? a : b); } static __inline u_long ulmin(u_long a, u_long b) { return (a < b ? a : b); } static __inline __uintmax_t ummax(__uintmax_t a, __uintmax_t b) { return (a > b ? a : b); } static __inline __uintmax_t ummin(__uintmax_t a, __uintmax_t b) { return (a < b ? a : b); } static __inline off_t omax(off_t a, off_t b) { return (a > b ? a : b); } static __inline off_t omin(off_t a, off_t b) { return (a < b ? a : b); } static __inline int abs(int a) { return (a < 0 ? -a : a); } static __inline long labs(long a) { return (a < 0 ? -a : a); } static __inline int64_t abs64(int64_t a) { return (a < 0 ? -a : a); } static __inline quad_t qabs(quad_t a) { return (a < 0 ? -a : a); } #ifndef RANDOM_FENESTRASX #define ARC4_ENTR_NONE 0 /* Don't have entropy yet. */ #define ARC4_ENTR_HAVE 1 /* Have entropy. */ #define ARC4_ENTR_SEED 2 /* Reseeding. */ extern int arc4rand_iniseed_state; #endif /* Prototypes for non-quad routines. */ struct malloc_type; uint32_t arc4random(void); void arc4random_buf(void *, size_t); uint32_t arc4random_uniform(uint32_t); void arc4rand(void *, u_int, int); int timingsafe_bcmp(const void *, const void *, size_t); void *bsearch(const void *, const void *, size_t, size_t, int (*)(const void *, const void *)); /* * MHTODO: remove the 'HAVE_INLINE_FOO' defines once use of these flags has * been purged everywhere. For now we provide them unconditionally. */ #define HAVE_INLINE_FFS #define HAVE_INLINE_FFSL #define HAVE_INLINE_FFSLL #define HAVE_INLINE_FLS #define HAVE_INLINE_FLSL #define HAVE_INLINE_FLSLL static __inline __pure2 int ffs(int mask) { return (__builtin_ffs((u_int)mask)); } static __inline __pure2 int ffsl(long mask) { return (__builtin_ffsl((u_long)mask)); } static __inline __pure2 int ffsll(long long mask) { return (__builtin_ffsll((unsigned long long)mask)); } static __inline __pure2 int fls(int mask) { return (mask == 0 ? 0 : 8 * sizeof(mask) - __builtin_clz((u_int)mask)); } static __inline __pure2 int flsl(long mask) { return (mask == 0 ? 0 : 8 * sizeof(mask) - __builtin_clzl((u_long)mask)); } static __inline __pure2 int flsll(long long mask) { return (mask == 0 ? 0 : 8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask)); } +static __inline __pure2 int +ilog2_int(int n) +{ + + MPASS(n != 0); + return (8 * sizeof(n) - 1 - __builtin_clz((u_int)n)); +} + +static __inline __pure2 int +ilog2_long(long n) +{ + + MPASS(n != 0); + return (8 * sizeof(n) - 1 - __builtin_clzl((u_long)n)); +} + +static __inline __pure2 int +ilog2_long_long(long long n) +{ + + MPASS(n != 0); + return (8 * sizeof(n) - 1 - + __builtin_clzll((unsigned long long)n)); +} + +#define ilog2_var(n) \ + _Generic((n), \ + default: ilog2_int, \ + long: ilog2_long, \ + unsigned long: ilog2_long, \ + long long: ilog2_long_long, \ + unsigned long long: ilog2_long_long \ + )(n) + +#define ilog2(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n) < 1 ? -1 : \ + (n) & (1ULL << 63) ? 63 : \ + (n) & (1ULL << 62) ? 62 : \ + (n) & (1ULL << 61) ? 61 : \ + (n) & (1ULL << 60) ? 60 : \ + (n) & (1ULL << 59) ? 59 : \ + (n) & (1ULL << 58) ? 58 : \ + (n) & (1ULL << 57) ? 57 : \ + (n) & (1ULL << 56) ? 56 : \ + (n) & (1ULL << 55) ? 55 : \ + (n) & (1ULL << 54) ? 54 : \ + (n) & (1ULL << 53) ? 53 : \ + (n) & (1ULL << 52) ? 52 : \ + (n) & (1ULL << 51) ? 51 : \ + (n) & (1ULL << 50) ? 50 : \ + (n) & (1ULL << 49) ? 49 : \ + (n) & (1ULL << 48) ? 48 : \ + (n) & (1ULL << 47) ? 47 : \ + (n) & (1ULL << 46) ? 46 : \ + (n) & (1ULL << 45) ? 45 : \ + (n) & (1ULL << 44) ? 44 : \ + (n) & (1ULL << 43) ? 43 : \ + (n) & (1ULL << 42) ? 42 : \ + (n) & (1ULL << 41) ? 41 : \ + (n) & (1ULL << 40) ? 40 : \ + (n) & (1ULL << 39) ? 39 : \ + (n) & (1ULL << 38) ? 38 : \ + (n) & (1ULL << 37) ? 37 : \ + (n) & (1ULL << 36) ? 36 : \ + (n) & (1ULL << 35) ? 35 : \ + (n) & (1ULL << 34) ? 34 : \ + (n) & (1ULL << 33) ? 33 : \ + (n) & (1ULL << 32) ? 32 : \ + (n) & (1ULL << 31) ? 31 : \ + (n) & (1ULL << 30) ? 30 : \ + (n) & (1ULL << 29) ? 29 : \ + (n) & (1ULL << 28) ? 28 : \ + (n) & (1ULL << 27) ? 27 : \ + (n) & (1ULL << 26) ? 26 : \ + (n) & (1ULL << 25) ? 25 : \ + (n) & (1ULL << 24) ? 24 : \ + (n) & (1ULL << 23) ? 23 : \ + (n) & (1ULL << 22) ? 22 : \ + (n) & (1ULL << 21) ? 21 : \ + (n) & (1ULL << 20) ? 20 : \ + (n) & (1ULL << 19) ? 19 : \ + (n) & (1ULL << 18) ? 18 : \ + (n) & (1ULL << 17) ? 17 : \ + (n) & (1ULL << 16) ? 16 : \ + (n) & (1ULL << 15) ? 15 : \ + (n) & (1ULL << 14) ? 14 : \ + (n) & (1ULL << 13) ? 13 : \ + (n) & (1ULL << 12) ? 12 : \ + (n) & (1ULL << 11) ? 11 : \ + (n) & (1ULL << 10) ? 10 : \ + (n) & (1ULL << 9) ? 9 : \ + (n) & (1ULL << 8) ? 8 : \ + (n) & (1ULL << 7) ? 7 : \ + (n) & (1ULL << 6) ? 6 : \ + (n) & (1ULL << 5) ? 5 : \ + (n) & (1ULL << 4) ? 4 : \ + (n) & (1ULL << 3) ? 3 : \ + (n) & (1ULL << 2) ? 2 : \ + (n) & (1ULL << 1) ? 1 : \ + (n) & (1ULL << 0) ? 0 : \ + -1) : \ + ilog2_var(n) \ +) + #define bitcount64(x) __bitcount64((uint64_t)(x)) #define bitcount32(x) __bitcount32((uint32_t)(x)) #define bitcount16(x) __bitcount16((uint16_t)(x)) #define bitcountl(x) __bitcountl((u_long)(x)) #define bitcount(x) __bitcount((u_int)(x)) int fnmatch(const char *, const char *, int); int locc(int, char *, u_int); void *memchr(const void *s, int c, size_t n); void *memcchr(const void *s, int c, size_t n); void *memmem(const void *l, size_t l_len, const void *s, size_t s_len); void qsort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)); void qsort_r(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *, void *), void *thunk); u_long random(void); int scanc(u_int, const u_char *, const u_char *, int); int strcasecmp(const char *, const char *); char *strcasestr(const char *, const char *); char *strcat(char * __restrict, const char * __restrict); char *strchr(const char *, int); char *strchrnul(const char *, int); int strcmp(const char *, const char *); char *strcpy(char * __restrict, const char * __restrict); char *strdup_flags(const char *__restrict, struct malloc_type *, int); size_t strcspn(const char *, const char *) __pure; char *strdup(const char *__restrict, struct malloc_type *); char *strncat(char *, const char *, size_t); char *strndup(const char *__restrict, size_t, struct malloc_type *); size_t strlcat(char *, const char *, size_t); size_t strlcpy(char *, const char *, size_t); size_t strlen(const char *); int strncasecmp(const char *, const char *, size_t); int strncmp(const char *, const char *, size_t); char *strncpy(char * __restrict, const char * __restrict, size_t); size_t strnlen(const char *, size_t); char *strnstr(const char *, const char *, size_t); char *strrchr(const char *, int); char *strsep(char **, const char *delim); size_t strspn(const char *, const char *); char *strstr(const char *, const char *); int strvalid(const char *, size_t); #ifdef SAN_NEEDS_INTERCEPTORS #ifndef SAN_INTERCEPTOR #define SAN_INTERCEPTOR(func) \ __CONCAT(SAN_INTERCEPTOR_PREFIX, __CONCAT(_, func)) #endif char *SAN_INTERCEPTOR(strcpy)(char *, const char *); int SAN_INTERCEPTOR(strcmp)(const char *, const char *); size_t SAN_INTERCEPTOR(strlen)(const char *); #ifndef SAN_RUNTIME #define strcpy(d, s) SAN_INTERCEPTOR(strcpy)((d), (s)) #define strcmp(s1, s2) SAN_INTERCEPTOR(strcmp)((s1), (s2)) #define strlen(s) SAN_INTERCEPTOR(strlen)(s) #endif /* !SAN_RUNTIME */ #else /* !SAN_NEEDS_INTERCEPTORS */ #define strcpy(d, s) __builtin_strcpy((d), (s)) #define strcmp(s1, s2) __builtin_strcmp((s1), (s2)) #define strlen(s) __builtin_strlen((s)) #endif /* SAN_NEEDS_INTERCEPTORS */ static __inline char * index(const char *p, int ch) { return (strchr(p, ch)); } static __inline char * rindex(const char *p, int ch) { return (strrchr(p, ch)); } static __inline int64_t signed_extend64(uint64_t bitmap, int lsb, int width) { return ((int64_t)(bitmap << (63 - lsb - (width - 1)))) >> (63 - (width - 1)); } static __inline int32_t signed_extend32(uint32_t bitmap, int lsb, int width) { return ((int32_t)(bitmap << (31 - lsb - (width - 1)))) >> (31 - (width - 1)); } /* fnmatch() return values. */ #define FNM_NOMATCH 1 /* Match failed. */ /* fnmatch() flags. */ #define FNM_NOESCAPE 0x01 /* Disable backslash escaping. */ #define FNM_PATHNAME 0x02 /* Slash must be matched by slash. */ #define FNM_PERIOD 0x04 /* Period must be matched by period. */ #define FNM_LEADING_DIR 0x08 /* Ignore / after Imatch. */ #define FNM_CASEFOLD 0x10 /* Case insensitive search. */ #define FNM_IGNORECASE FNM_CASEFOLD #define FNM_FILE_NAME FNM_PATHNAME #endif /* !_SYS_LIBKERN_H_ */