Index: head/sys/compat/linprocfs/linprocfs.c =================================================================== --- head/sys/compat/linprocfs/linprocfs.c (revision 263101) +++ head/sys/compat/linprocfs/linprocfs.c (revision 263102) @@ -1,1479 +1,1480 @@ /*- * Copyright (c) 2000 Dag-Erling Coïdan Smørgrav * Copyright (c) 1999 Pierre Beyssac * Copyright (c) 1993 Jan-Simon Pendry * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)procfs_status.c 8.4 (Berkeley) 6/15/94 */ #include "opt_compat.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) || defined(__amd64__) #include #include #endif /* __i386__ || __amd64__ */ #ifdef COMPAT_FREEBSD32 #include #endif #include #include #include #include #include #include /* * Various conversion macros */ #define T2J(x) ((long)(((x) * 100ULL) / (stathz ? stathz : hz))) /* ticks to jiffies */ #define T2CS(x) ((unsigned long)(((x) * 100ULL) / (stathz ? stathz : hz))) /* ticks to centiseconds */ #define T2S(x) ((x) / (stathz ? stathz : hz)) /* ticks to seconds */ #define B2K(x) ((x) >> 10) /* bytes to kbytes */ #define B2P(x) ((x) >> PAGE_SHIFT) /* bytes to pages */ #define P2B(x) ((x) << PAGE_SHIFT) /* pages to bytes */ #define P2K(x) ((x) << (PAGE_SHIFT - 10)) /* pages to kbytes */ #define TV2J(x) ((x)->tv_sec * 100UL + (x)->tv_usec / 10000) /** * @brief Mapping of ki_stat in struct kinfo_proc to the linux state * * The linux procfs state field displays one of the characters RSDZTW to * denote running, sleeping in an interruptible wait, waiting in an * uninterruptible disk sleep, a zombie process, process is being traced * or stopped, or process is paging respectively. * * Our struct kinfo_proc contains the variable ki_stat which contains a * value out of SIDL, SRUN, SSLEEP, SSTOP, SZOMB, SWAIT and SLOCK. * * This character array is used with ki_stati-1 as an index and tries to * map our states to suitable linux states. */ static char linux_state[] = "RRSTZDD"; /* * Filler function for proc/meminfo */ static int linprocfs_domeminfo(PFS_FILL_ARGS) { unsigned long memtotal; /* total memory in bytes */ unsigned long memused; /* used memory in bytes */ unsigned long memfree; /* free memory in bytes */ unsigned long memshared; /* shared memory ??? */ unsigned long buffers, cached; /* buffer / cache memory ??? */ unsigned long long swaptotal; /* total swap space in bytes */ unsigned long long swapused; /* used swap space in bytes */ unsigned long long swapfree; /* free swap space in bytes */ vm_object_t object; int i, j; memtotal = physmem * PAGE_SIZE; /* * The correct thing here would be: * memfree = cnt.v_free_count * PAGE_SIZE; memused = memtotal - memfree; * * but it might mislead linux binaries into thinking there * is very little memory left, so we cheat and tell them that * all memory that isn't wired down is free. */ memused = cnt.v_wire_count * PAGE_SIZE; memfree = memtotal - memused; swap_pager_status(&i, &j); swaptotal = (unsigned long long)i * PAGE_SIZE; swapused = (unsigned long long)j * PAGE_SIZE; swapfree = swaptotal - swapused; memshared = 0; mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(object, &vm_object_list, object_list) if (object->shadow_count > 1) memshared += object->resident_page_count; mtx_unlock(&vm_object_list_mtx); memshared *= PAGE_SIZE; /* * We'd love to be able to write: * buffers = bufspace; * * but bufspace is internal to vfs_bio.c and we don't feel * like unstaticizing it just for linprocfs's sake. */ buffers = 0; cached = cnt.v_cache_count * PAGE_SIZE; sbuf_printf(sb, " total: used: free: shared: buffers: cached:\n" "Mem: %lu %lu %lu %lu %lu %lu\n" "Swap: %llu %llu %llu\n" "MemTotal: %9lu kB\n" "MemFree: %9lu kB\n" "MemShared:%9lu kB\n" "Buffers: %9lu kB\n" "Cached: %9lu kB\n" "SwapTotal:%9llu kB\n" "SwapFree: %9llu kB\n", memtotal, memused, memfree, memshared, buffers, cached, swaptotal, swapused, swapfree, B2K(memtotal), B2K(memfree), B2K(memshared), B2K(buffers), B2K(cached), B2K(swaptotal), B2K(swapfree)); return (0); } #if defined(__i386__) || defined(__amd64__) /* * Filler function for proc/cpuinfo (i386 & amd64 version) */ static int linprocfs_docpuinfo(PFS_FILL_ARGS) { int hw_model[2]; char model[128]; uint64_t freq; size_t size; int class, fqmhz, fqkhz; int i; /* * We default the flags to include all non-conflicting flags, * and the Intel versions of conflicting flags. */ static char *flags[] = { "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", "cx8", "apic", "sep", "sep", "mtrr", "pge", "mca", "cmov", "pat", "pse36", "pn", "b19", "b20", "b21", "mmxext", "mmx", "fxsr", "xmm", "sse2", "b27", "b28", "b29", "3dnowext", "3dnow" }; switch (cpu_class) { #ifdef __i386__ case CPUCLASS_286: class = 2; break; case CPUCLASS_386: class = 3; break; case CPUCLASS_486: class = 4; break; case CPUCLASS_586: class = 5; break; case CPUCLASS_686: class = 6; break; default: class = 0; break; #else /* __amd64__ */ default: class = 15; break; #endif } hw_model[0] = CTL_HW; hw_model[1] = HW_MODEL; model[0] = '\0'; size = sizeof(model); if (kernel_sysctl(td, hw_model, 2, &model, &size, 0, 0, 0, 0) != 0) strcpy(model, "unknown"); for (i = 0; i < mp_ncpus; ++i) { sbuf_printf(sb, "processor\t: %d\n" "vendor_id\t: %.20s\n" "cpu family\t: %u\n" "model\t\t: %u\n" "model name\t: %s\n" "stepping\t: %u\n\n", i, cpu_vendor, CPUID_TO_FAMILY(cpu_id), CPUID_TO_MODEL(cpu_id), model, cpu_id & CPUID_STEPPING); /* XXX per-cpu vendor / class / model / id? */ } sbuf_cat(sb, "flags\t\t:"); #ifdef __i386__ switch (cpu_vendor_id) { case CPU_VENDOR_AMD: if (class < 6) flags[16] = "fcmov"; break; case CPU_VENDOR_CYRIX: flags[24] = "cxmmx"; break; } #endif for (i = 0; i < 32; i++) if (cpu_feature & (1 << i)) sbuf_printf(sb, " %s", flags[i]); sbuf_cat(sb, "\n"); freq = atomic_load_acq_64(&tsc_freq); if (freq != 0) { fqmhz = (freq + 4999) / 1000000; fqkhz = ((freq + 4999) / 10000) % 100; sbuf_printf(sb, "cpu MHz\t\t: %d.%02d\n" "bogomips\t: %d.%02d\n", fqmhz, fqkhz, fqmhz, fqkhz); } return (0); } #endif /* __i386__ || __amd64__ */ /* * Filler function for proc/mtab * * This file doesn't exist in Linux' procfs, but is included here so * users can symlink /compat/linux/etc/mtab to /proc/mtab */ static int linprocfs_domtab(PFS_FILL_ARGS) { struct nameidata nd; struct mount *mp; const char *lep; char *dlep, *flep, *mntto, *mntfrom, *fstype; size_t lep_len; int error; /* resolve symlinks etc. in the emulation tree prefix */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, linux_emul_path, td); flep = NULL; error = namei(&nd); lep = linux_emul_path; if (error == 0) { if (vn_fullpath(td, nd.ni_vp, &dlep, &flep) == 0) lep = dlep; vrele(nd.ni_vp); } lep_len = strlen(lep); mtx_lock(&mountlist_mtx); error = 0; TAILQ_FOREACH(mp, &mountlist, mnt_list) { /* determine device name */ mntfrom = mp->mnt_stat.f_mntfromname; /* determine mount point */ mntto = mp->mnt_stat.f_mntonname; if (strncmp(mntto, lep, lep_len) == 0 && mntto[lep_len] == '/') mntto += lep_len; /* determine fs type */ fstype = mp->mnt_stat.f_fstypename; if (strcmp(fstype, pn->pn_info->pi_name) == 0) mntfrom = fstype = "proc"; else if (strcmp(fstype, "procfs") == 0) continue; if (strcmp(fstype, "linsysfs") == 0) { sbuf_printf(sb, "/sys %s sysfs %s", mntto, mp->mnt_stat.f_flags & MNT_RDONLY ? "ro" : "rw"); } else { /* For Linux msdosfs is called vfat */ if (strcmp(fstype, "msdosfs") == 0) fstype = "vfat"; sbuf_printf(sb, "%s %s %s %s", mntfrom, mntto, fstype, mp->mnt_stat.f_flags & MNT_RDONLY ? "ro" : "rw"); } #define ADD_OPTION(opt, name) \ if (mp->mnt_stat.f_flags & (opt)) sbuf_printf(sb, "," name); ADD_OPTION(MNT_SYNCHRONOUS, "sync"); ADD_OPTION(MNT_NOEXEC, "noexec"); ADD_OPTION(MNT_NOSUID, "nosuid"); ADD_OPTION(MNT_UNION, "union"); ADD_OPTION(MNT_ASYNC, "async"); ADD_OPTION(MNT_SUIDDIR, "suiddir"); ADD_OPTION(MNT_NOSYMFOLLOW, "nosymfollow"); ADD_OPTION(MNT_NOATIME, "noatime"); #undef ADD_OPTION /* a real Linux mtab will also show NFS options */ sbuf_printf(sb, " 0 0\n"); } mtx_unlock(&mountlist_mtx); free(flep, M_TEMP); return (error); } /* * Filler function for proc/partitions * */ static int linprocfs_dopartitions(PFS_FILL_ARGS) { struct g_class *cp; struct g_geom *gp; struct g_provider *pp; struct nameidata nd; const char *lep; char *dlep, *flep; size_t lep_len; int error; int major, minor; /* resolve symlinks etc. in the emulation tree prefix */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, linux_emul_path, td); flep = NULL; error = namei(&nd); lep = linux_emul_path; if (error == 0) { if (vn_fullpath(td, nd.ni_vp, &dlep, &flep) == 0) lep = dlep; vrele(nd.ni_vp); } lep_len = strlen(lep); g_topology_lock(); error = 0; sbuf_printf(sb, "major minor #blocks name rio rmerge rsect " "ruse wio wmerge wsect wuse running use aveq\n"); LIST_FOREACH(cp, &g_classes, class) { if (strcmp(cp->name, "DISK") == 0 || strcmp(cp->name, "PART") == 0) LIST_FOREACH(gp, &cp->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { if (linux_driver_get_major_minor( pp->name, &major, &minor) != 0) { major = 0; minor = 0; } sbuf_printf(sb, "%d %d %lld %s " "%d %d %d %d %d " "%d %d %d %d %d %d\n", major, minor, (long long)pp->mediasize, pp->name, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } } } g_topology_unlock(); free(flep, M_TEMP); return (error); } /* * Filler function for proc/stat */ static int linprocfs_dostat(PFS_FILL_ARGS) { struct pcpu *pcpu; long cp_time[CPUSTATES]; long *cp; int i; read_cpu_time(cp_time); sbuf_printf(sb, "cpu %ld %ld %ld %ld\n", T2J(cp_time[CP_USER]), T2J(cp_time[CP_NICE]), T2J(cp_time[CP_SYS] /*+ cp_time[CP_INTR]*/), T2J(cp_time[CP_IDLE])); CPU_FOREACH(i) { pcpu = pcpu_find(i); cp = pcpu->pc_cp_time; sbuf_printf(sb, "cpu%d %ld %ld %ld %ld\n", i, T2J(cp[CP_USER]), T2J(cp[CP_NICE]), T2J(cp[CP_SYS] /*+ cp[CP_INTR]*/), T2J(cp[CP_IDLE])); } sbuf_printf(sb, "disk 0 0 0 0\n" "page %u %u\n" "swap %u %u\n" "intr %u\n" "ctxt %u\n" "btime %lld\n", cnt.v_vnodepgsin, cnt.v_vnodepgsout, cnt.v_swappgsin, cnt.v_swappgsout, cnt.v_intr, cnt.v_swtch, (long long)boottime.tv_sec); return (0); } static int linprocfs_doswaps(PFS_FILL_ARGS) { struct xswdev xsw; uintmax_t total, used; int n; char devname[SPECNAMELEN + 1]; sbuf_printf(sb, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); mtx_lock(&Giant); for (n = 0; ; n++) { if (swap_dev_info(n, &xsw, devname, sizeof(devname)) != 0) break; total = (uintmax_t)xsw.xsw_nblks * PAGE_SIZE / 1024; used = (uintmax_t)xsw.xsw_used * PAGE_SIZE / 1024; /* * The space and not tab after the device name is on * purpose. Linux does so. */ sbuf_printf(sb, "/dev/%-34s unknown\t\t%jd\t%jd\t-1\n", devname, total, used); } mtx_unlock(&Giant); return (0); } /* * Filler function for proc/uptime */ static int linprocfs_douptime(PFS_FILL_ARGS) { long cp_time[CPUSTATES]; struct timeval tv; getmicrouptime(&tv); read_cpu_time(cp_time); sbuf_printf(sb, "%lld.%02ld %ld.%02lu\n", (long long)tv.tv_sec, tv.tv_usec / 10000, T2S(cp_time[CP_IDLE] / mp_ncpus), T2CS(cp_time[CP_IDLE] / mp_ncpus) % 100); return (0); } /* * Get OS build date */ static void linprocfs_osbuild(struct thread *td, struct sbuf *sb) { #if 0 char osbuild[256]; char *cp1, *cp2; strncpy(osbuild, version, 256); osbuild[255] = '\0'; cp1 = strstr(osbuild, "\n"); cp2 = strstr(osbuild, ":"); if (cp1 && cp2) { *cp1 = *cp2 = '\0'; cp1 = strstr(osbuild, "#"); } else cp1 = NULL; if (cp1) sbuf_printf(sb, "%s%s", cp1, cp2 + 1); else #endif sbuf_cat(sb, "#4 Sun Dec 18 04:30:00 CET 1977"); } /* * Get OS builder */ static void linprocfs_osbuilder(struct thread *td, struct sbuf *sb) { #if 0 char builder[256]; char *cp; cp = strstr(version, "\n "); if (cp) { strncpy(builder, cp + 5, 256); builder[255] = '\0'; cp = strstr(builder, ":"); if (cp) *cp = '\0'; } if (cp) sbuf_cat(sb, builder); else #endif sbuf_cat(sb, "des@freebsd.org"); } /* * Filler function for proc/version */ static int linprocfs_doversion(PFS_FILL_ARGS) { char osname[LINUX_MAX_UTSNAME]; char osrelease[LINUX_MAX_UTSNAME]; linux_get_osname(td, osname); linux_get_osrelease(td, osrelease); sbuf_printf(sb, "%s version %s (", osname, osrelease); linprocfs_osbuilder(td, sb); sbuf_cat(sb, ") (gcc version " __VERSION__ ") "); linprocfs_osbuild(td, sb); sbuf_cat(sb, "\n"); return (0); } /* * Filler function for proc/loadavg */ static int linprocfs_doloadavg(PFS_FILL_ARGS) { sbuf_printf(sb, "%d.%02d %d.%02d %d.%02d %d/%d %d\n", (int)(averunnable.ldavg[0] / averunnable.fscale), (int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100), (int)(averunnable.ldavg[1] / averunnable.fscale), (int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100), (int)(averunnable.ldavg[2] / averunnable.fscale), (int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100), 1, /* number of running tasks */ nprocs, /* number of tasks */ lastpid /* the last pid */ ); return (0); } /* * Filler function for proc/pid/stat */ static int linprocfs_doprocstat(PFS_FILL_ARGS) { struct kinfo_proc kp; char state; static int ratelimit = 0; vm_offset_t startcode, startdata; PROC_LOCK(p); fill_kinfo_proc(p, &kp); if (p->p_vmspace) { startcode = (vm_offset_t)p->p_vmspace->vm_taddr; startdata = (vm_offset_t)p->p_vmspace->vm_daddr; } else { startcode = 0; startdata = 0; }; sbuf_printf(sb, "%d", p->p_pid); #define PS_ADD(name, fmt, arg) sbuf_printf(sb, " " fmt, arg) PS_ADD("comm", "(%s)", p->p_comm); if (kp.ki_stat > sizeof(linux_state)) { state = 'R'; if (ratelimit == 0) { printf("linprocfs: don't know how to handle unknown FreeBSD state %d/%zd, mapping to R\n", kp.ki_stat, sizeof(linux_state)); ++ratelimit; } } else state = linux_state[kp.ki_stat - 1]; PS_ADD("state", "%c", state); PS_ADD("ppid", "%d", p->p_pptr ? p->p_pptr->p_pid : 0); PS_ADD("pgrp", "%d", p->p_pgid); PS_ADD("session", "%d", p->p_session->s_sid); PROC_UNLOCK(p); PS_ADD("tty", "%d", kp.ki_tdev); PS_ADD("tpgid", "%d", kp.ki_tpgid); PS_ADD("flags", "%u", 0); /* XXX */ PS_ADD("minflt", "%lu", kp.ki_rusage.ru_minflt); PS_ADD("cminflt", "%lu", kp.ki_rusage_ch.ru_minflt); PS_ADD("majflt", "%lu", kp.ki_rusage.ru_majflt); PS_ADD("cmajflt", "%lu", kp.ki_rusage_ch.ru_majflt); PS_ADD("utime", "%ld", TV2J(&kp.ki_rusage.ru_utime)); PS_ADD("stime", "%ld", TV2J(&kp.ki_rusage.ru_stime)); PS_ADD("cutime", "%ld", TV2J(&kp.ki_rusage_ch.ru_utime)); PS_ADD("cstime", "%ld", TV2J(&kp.ki_rusage_ch.ru_stime)); PS_ADD("priority", "%d", kp.ki_pri.pri_user); PS_ADD("nice", "%d", kp.ki_nice); /* 19 (nicest) to -19 */ PS_ADD("0", "%d", 0); /* removed field */ PS_ADD("itrealvalue", "%d", 0); /* XXX */ PS_ADD("starttime", "%lu", TV2J(&kp.ki_start) - TV2J(&boottime)); PS_ADD("vsize", "%ju", P2K((uintmax_t)kp.ki_size)); PS_ADD("rss", "%ju", (uintmax_t)kp.ki_rssize); PS_ADD("rlim", "%lu", kp.ki_rusage.ru_maxrss); PS_ADD("startcode", "%ju", (uintmax_t)startcode); PS_ADD("endcode", "%ju", (uintmax_t)startdata); PS_ADD("startstack", "%u", 0); /* XXX */ PS_ADD("kstkesp", "%u", 0); /* XXX */ PS_ADD("kstkeip", "%u", 0); /* XXX */ PS_ADD("signal", "%u", 0); /* XXX */ PS_ADD("blocked", "%u", 0); /* XXX */ PS_ADD("sigignore", "%u", 0); /* XXX */ PS_ADD("sigcatch", "%u", 0); /* XXX */ PS_ADD("wchan", "%u", 0); /* XXX */ PS_ADD("nswap", "%lu", kp.ki_rusage.ru_nswap); PS_ADD("cnswap", "%lu", kp.ki_rusage_ch.ru_nswap); PS_ADD("exitsignal", "%d", 0); /* XXX */ PS_ADD("processor", "%u", kp.ki_lastcpu); PS_ADD("rt_priority", "%u", 0); /* XXX */ /* >= 2.5.19 */ PS_ADD("policy", "%u", kp.ki_pri.pri_class); /* >= 2.5.19 */ #undef PS_ADD sbuf_putc(sb, '\n'); return (0); } /* * Filler function for proc/pid/statm */ static int linprocfs_doprocstatm(PFS_FILL_ARGS) { struct kinfo_proc kp; segsz_t lsize; PROC_LOCK(p); fill_kinfo_proc(p, &kp); PROC_UNLOCK(p); /* * See comments in linprocfs_doprocstatus() regarding the * computation of lsize. */ /* size resident share trs drs lrs dt */ sbuf_printf(sb, "%ju ", B2P((uintmax_t)kp.ki_size)); sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_rssize); sbuf_printf(sb, "%ju ", (uintmax_t)0); /* XXX */ sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_tsize); sbuf_printf(sb, "%ju ", (uintmax_t)(kp.ki_dsize + kp.ki_ssize)); lsize = B2P(kp.ki_size) - kp.ki_dsize - kp.ki_ssize - kp.ki_tsize - 1; sbuf_printf(sb, "%ju ", (uintmax_t)lsize); sbuf_printf(sb, "%ju\n", (uintmax_t)0); /* XXX */ return (0); } /* * Filler function for proc/pid/status */ static int linprocfs_doprocstatus(PFS_FILL_ARGS) { struct kinfo_proc kp; char *state; segsz_t lsize; struct thread *td2; struct sigacts *ps; int i; PROC_LOCK(p); td2 = FIRST_THREAD_IN_PROC(p); /* XXXKSE pretend only one thread */ if (P_SHOULDSTOP(p)) { state = "T (stopped)"; } else { switch(p->p_state) { case PRS_NEW: state = "I (idle)"; break; case PRS_NORMAL: if (p->p_flag & P_WEXIT) { state = "X (exiting)"; break; } switch(td2->td_state) { case TDS_INHIBITED: state = "S (sleeping)"; break; case TDS_RUNQ: case TDS_RUNNING: state = "R (running)"; break; default: state = "? (unknown)"; break; } break; case PRS_ZOMBIE: state = "Z (zombie)"; break; default: state = "? (unknown)"; break; } } fill_kinfo_proc(p, &kp); sbuf_printf(sb, "Name:\t%s\n", p->p_comm); /* XXX escape */ sbuf_printf(sb, "State:\t%s\n", state); /* * Credentials */ sbuf_printf(sb, "Pid:\t%d\n", p->p_pid); sbuf_printf(sb, "PPid:\t%d\n", p->p_pptr ? p->p_pptr->p_pid : 0); sbuf_printf(sb, "Uid:\t%d %d %d %d\n", p->p_ucred->cr_ruid, p->p_ucred->cr_uid, p->p_ucred->cr_svuid, /* FreeBSD doesn't have fsuid */ p->p_ucred->cr_uid); sbuf_printf(sb, "Gid:\t%d %d %d %d\n", p->p_ucred->cr_rgid, p->p_ucred->cr_gid, p->p_ucred->cr_svgid, /* FreeBSD doesn't have fsgid */ p->p_ucred->cr_gid); sbuf_cat(sb, "Groups:\t"); for (i = 0; i < p->p_ucred->cr_ngroups; i++) sbuf_printf(sb, "%d ", p->p_ucred->cr_groups[i]); PROC_UNLOCK(p); sbuf_putc(sb, '\n'); /* * Memory * * While our approximation of VmLib may not be accurate (I * don't know of a simple way to verify it, and I'm not sure * it has much meaning anyway), I believe it's good enough. * * The same code that could (I think) accurately compute VmLib * could also compute VmLck, but I don't really care enough to * implement it. Submissions are welcome. */ sbuf_printf(sb, "VmSize:\t%8ju kB\n", B2K((uintmax_t)kp.ki_size)); sbuf_printf(sb, "VmLck:\t%8u kB\n", P2K(0)); /* XXX */ sbuf_printf(sb, "VmRSS:\t%8ju kB\n", P2K((uintmax_t)kp.ki_rssize)); sbuf_printf(sb, "VmData:\t%8ju kB\n", P2K((uintmax_t)kp.ki_dsize)); sbuf_printf(sb, "VmStk:\t%8ju kB\n", P2K((uintmax_t)kp.ki_ssize)); sbuf_printf(sb, "VmExe:\t%8ju kB\n", P2K((uintmax_t)kp.ki_tsize)); lsize = B2P(kp.ki_size) - kp.ki_dsize - kp.ki_ssize - kp.ki_tsize - 1; sbuf_printf(sb, "VmLib:\t%8ju kB\n", P2K((uintmax_t)lsize)); /* * Signal masks * * We support up to 128 signals, while Linux supports 32, * but we only define 32 (the same 32 as Linux, to boot), so * just show the lower 32 bits of each mask. XXX hack. * * NB: on certain platforms (Sparc at least) Linux actually * supports 64 signals, but this code is a long way from * running on anything but i386, so ignore that for now. */ PROC_LOCK(p); sbuf_printf(sb, "SigPnd:\t%08x\n", p->p_siglist.__bits[0]); /* * I can't seem to find out where the signal mask is in * relation to struct proc, so SigBlk is left unimplemented. */ sbuf_printf(sb, "SigBlk:\t%08x\n", 0); /* XXX */ ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); sbuf_printf(sb, "SigIgn:\t%08x\n", ps->ps_sigignore.__bits[0]); sbuf_printf(sb, "SigCgt:\t%08x\n", ps->ps_sigcatch.__bits[0]); mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); /* * Linux also prints the capability masks, but we don't have * capabilities yet, and when we do get them they're likely to * be meaningless to Linux programs, so we lie. XXX */ sbuf_printf(sb, "CapInh:\t%016x\n", 0); sbuf_printf(sb, "CapPrm:\t%016x\n", 0); sbuf_printf(sb, "CapEff:\t%016x\n", 0); return (0); } /* * Filler function for proc/pid/cwd */ static int linprocfs_doproccwd(PFS_FILL_ARGS) { char *fullpath = "unknown"; char *freepath = NULL; vn_fullpath(td, p->p_fd->fd_cdir, &fullpath, &freepath); sbuf_printf(sb, "%s", fullpath); if (freepath) free(freepath, M_TEMP); return (0); } /* * Filler function for proc/pid/root */ static int linprocfs_doprocroot(PFS_FILL_ARGS) { struct vnode *rvp; char *fullpath = "unknown"; char *freepath = NULL; rvp = jailed(p->p_ucred) ? p->p_fd->fd_jdir : p->p_fd->fd_rdir; vn_fullpath(td, rvp, &fullpath, &freepath); sbuf_printf(sb, "%s", fullpath); if (freepath) free(freepath, M_TEMP); return (0); } /* * Filler function for proc/pid/cmdline */ static int linprocfs_doproccmdline(PFS_FILL_ARGS) { int ret; PROC_LOCK(p); if ((ret = p_cansee(td, p)) != 0) { PROC_UNLOCK(p); return (ret); } /* * Mimic linux behavior and pass only processes with usermode * address space as valid. Return zero silently otherwize. */ if (p->p_vmspace == &vmspace0) { PROC_UNLOCK(p); return (0); } if (p->p_args != NULL) { sbuf_bcpy(sb, p->p_args->ar_args, p->p_args->ar_length); PROC_UNLOCK(p); return (0); } if ((p->p_flag & P_SYSTEM) != 0) { PROC_UNLOCK(p); return (0); } PROC_UNLOCK(p); ret = proc_getargv(td, p, sb); return (ret); } /* * Filler function for proc/pid/environ */ static int linprocfs_doprocenviron(PFS_FILL_ARGS) { int ret; PROC_LOCK(p); if ((ret = p_candebug(td, p)) != 0) { PROC_UNLOCK(p); return (ret); } /* * Mimic linux behavior and pass only processes with usermode * address space as valid. Return zero silently otherwize. */ if (p->p_vmspace == &vmspace0) { PROC_UNLOCK(p); return (0); } if ((p->p_flag & P_SYSTEM) != 0) { PROC_UNLOCK(p); return (0); } PROC_UNLOCK(p); ret = proc_getenvv(td, p, sb); return (ret); } /* * Filler function for proc/pid/maps */ static int linprocfs_doprocmaps(PFS_FILL_ARGS) { struct vmspace *vm; vm_map_t map; vm_map_entry_t entry, tmp_entry; vm_object_t obj, tobj, lobj; vm_offset_t e_start, e_end; vm_ooffset_t off = 0; vm_prot_t e_prot; unsigned int last_timestamp; char *name = "", *freename = NULL; ino_t ino; int ref_count, shadow_count, flags; int error; struct vnode *vp; struct vattr vat; PROC_LOCK(p); error = p_candebug(td, p); PROC_UNLOCK(p); if (error) return (error); if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); error = 0; vm = vmspace_acquire_ref(p); if (vm == NULL) return (ESRCH); map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { name = ""; freename = NULL; if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; e_prot = entry->protection; e_start = entry->start; e_end = entry->end; obj = entry->object.vm_object; for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) { VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } last_timestamp = map->timestamp; vm_map_unlock_read(map); ino = 0; if (lobj) { off = IDX_TO_OFF(lobj->size); if (lobj->type == OBJT_VNODE) { vp = lobj->handle; if (vp) vref(vp); } else vp = NULL; if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); flags = obj->flags; ref_count = obj->ref_count; shadow_count = obj->shadow_count; VM_OBJECT_RUNLOCK(obj); if (vp) { vn_fullpath(td, vp, &name, &freename); vn_lock(vp, LK_SHARED | LK_RETRY); VOP_GETATTR(vp, &vat, td->td_ucred); ino = vat.va_fileid; vput(vp); } } else { flags = 0; ref_count = 0; shadow_count = 0; } /* * format: * start, end, access, offset, major, minor, inode, name. */ error = sbuf_printf(sb, "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n", (u_long)e_start, (u_long)e_end, (e_prot & VM_PROT_READ)?"r":"-", (e_prot & VM_PROT_WRITE)?"w":"-", (e_prot & VM_PROT_EXECUTE)?"x":"-", "p", (u_long)off, 0, 0, (u_long)ino, *name ? " " : "", name ); if (freename) free(freename, M_TEMP); vm_map_lock_read(map); if (error == -1) { error = 0; break; } if (last_timestamp != map->timestamp) { /* * Look again for the entry because the map was * modified while it was unlocked. Specifically, * the entry may have been clipped, merged, or deleted. */ vm_map_lookup_entry(map, e_end - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); return (error); } /* * Filler function for proc/net/dev */ static int linprocfs_donetdev(PFS_FILL_ARGS) { char ifname[16]; /* XXX LINUX_IFNAMSIZ */ struct ifnet *ifp; sbuf_printf(sb, "%6s|%58s|%s\n" "%6s|%58s|%58s\n", "Inter-", " Receive", " Transmit", " face", "bytes packets errs drop fifo frame compressed multicast", "bytes packets errs drop fifo colls carrier compressed"); CURVNET_SET(TD_TO_VNET(curthread)); IFNET_RLOCK(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { linux_ifname(ifp, ifname, sizeof ifname); sbuf_printf(sb, "%6.6s: ", ifname); - sbuf_printf(sb, "%7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu ", - ifp->if_ibytes, /* rx_bytes */ - ifp->if_ipackets, /* rx_packets */ - ifp->if_ierrors, /* rx_errors */ - ifp->if_iqdrops, /* rx_dropped + - * rx_missed_errors */ - 0UL, /* rx_fifo_errors */ - 0UL, /* rx_length_errors + - * rx_over_errors + - * rx_crc_errors + - * rx_frame_errors */ - 0UL, /* rx_compressed */ - ifp->if_imcasts); /* multicast, XXX-BZ rx only? */ - sbuf_printf(sb, "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", - ifp->if_obytes, /* tx_bytes */ - ifp->if_opackets, /* tx_packets */ - ifp->if_oerrors, /* tx_errors */ - 0UL, /* tx_dropped */ - 0UL, /* tx_fifo_errors */ - ifp->if_collisions, /* collisions */ - 0UL, /* tx_carrier_errors + - * tx_aborted_errors + - * tx_window_errors + - * tx_heartbeat_errors */ - 0UL); /* tx_compressed */ + sbuf_printf(sb, "%7ju %7ju %4ju %4ju %4lu %5lu %10lu %9ju ", + (uintmax_t )ifp->if_ibytes, /* rx_bytes */ + (uintmax_t )ifp->if_ipackets, /* rx_packets */ + (uintmax_t )ifp->if_ierrors, /* rx_errors */ + (uintmax_t )ifp->if_iqdrops, /* rx_dropped + + * rx_missed_errors */ + 0UL, /* rx_fifo_errors */ + 0UL, /* rx_length_errors + + * rx_over_errors + + * rx_crc_errors + + * rx_frame_errors */ + 0UL, /* rx_compressed */ + (uintmax_t )ifp->if_imcasts); /* multicast, + * XXX-BZ rx only? */ + sbuf_printf(sb, "%8ju %7ju %4ju %4lu %4lu %5ju %7lu %10lu\n", + (uintmax_t )ifp->if_obytes, /* tx_bytes */ + (uintmax_t )ifp->if_opackets, /* tx_packets */ + (uintmax_t )ifp->if_oerrors, /* tx_errors */ + 0UL, /* tx_dropped */ + 0UL, /* tx_fifo_errors */ + (uintmax_t )ifp->if_collisions, /* collisions */ + 0UL, /* tx_carrier_errors + + * tx_aborted_errors + + * tx_window_errors + + * tx_heartbeat_errors*/ + 0UL); /* tx_compressed */ } IFNET_RUNLOCK(); CURVNET_RESTORE(); return (0); } /* * Filler function for proc/sys/kernel/osrelease */ static int linprocfs_doosrelease(PFS_FILL_ARGS) { char osrelease[LINUX_MAX_UTSNAME]; linux_get_osrelease(td, osrelease); sbuf_printf(sb, "%s\n", osrelease); return (0); } /* * Filler function for proc/sys/kernel/ostype */ static int linprocfs_doostype(PFS_FILL_ARGS) { char osname[LINUX_MAX_UTSNAME]; linux_get_osname(td, osname); sbuf_printf(sb, "%s\n", osname); return (0); } /* * Filler function for proc/sys/kernel/version */ static int linprocfs_doosbuild(PFS_FILL_ARGS) { linprocfs_osbuild(td, sb); sbuf_cat(sb, "\n"); return (0); } /* * Filler function for proc/sys/kernel/msgmni */ static int linprocfs_domsgmni(PFS_FILL_ARGS) { sbuf_printf(sb, "%d\n", msginfo.msgmni); return (0); } /* * Filler function for proc/sys/kernel/pid_max */ static int linprocfs_dopid_max(PFS_FILL_ARGS) { sbuf_printf(sb, "%i\n", PID_MAX); return (0); } /* * Filler function for proc/sys/kernel/sem */ static int linprocfs_dosem(PFS_FILL_ARGS) { sbuf_printf(sb, "%d %d %d %d\n", seminfo.semmsl, seminfo.semmns, seminfo.semopm, seminfo.semmni); return (0); } /* * Filler function for proc/scsi/device_info */ static int linprocfs_doscsidevinfo(PFS_FILL_ARGS) { return (0); } /* * Filler function for proc/scsi/scsi */ static int linprocfs_doscsiscsi(PFS_FILL_ARGS) { return (0); } extern struct cdevsw *cdevsw[]; /* * Filler function for proc/devices */ static int linprocfs_dodevices(PFS_FILL_ARGS) { char *char_devices; sbuf_printf(sb, "Character devices:\n"); char_devices = linux_get_char_devices(); sbuf_printf(sb, "%s", char_devices); linux_free_get_char_devices(char_devices); sbuf_printf(sb, "\nBlock devices:\n"); return (0); } /* * Filler function for proc/cmdline */ static int linprocfs_docmdline(PFS_FILL_ARGS) { sbuf_printf(sb, "BOOT_IMAGE=%s", kernelname); sbuf_printf(sb, " ro root=302\n"); return (0); } /* * Filler function for proc/filesystems */ static int linprocfs_dofilesystems(PFS_FILL_ARGS) { struct vfsconf *vfsp; mtx_lock(&Giant); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { if (vfsp->vfc_flags & VFCF_SYNTHETIC) sbuf_printf(sb, "nodev"); sbuf_printf(sb, "\t%s\n", vfsp->vfc_name); } mtx_unlock(&Giant); return(0); } #if 0 /* * Filler function for proc/modules */ static int linprocfs_domodules(PFS_FILL_ARGS) { struct linker_file *lf; TAILQ_FOREACH(lf, &linker_files, link) { sbuf_printf(sb, "%-20s%8lu%4d\n", lf->filename, (unsigned long)lf->size, lf->refs); } return (0); } #endif /* * Filler function for proc/pid/fd */ static int linprocfs_dofdescfs(PFS_FILL_ARGS) { if (p == curproc) sbuf_printf(sb, "/dev/fd"); else sbuf_printf(sb, "unknown"); return (0); } /* * Filler function for proc/sys/kernel/random/uuid */ static int linprocfs_douuid(PFS_FILL_ARGS) { struct uuid uuid; kern_uuidgen(&uuid, 1); sbuf_printf_uuid(sb, &uuid); sbuf_printf(sb, "\n"); return(0); } /* * Constructor */ static int linprocfs_init(PFS_INIT_ARGS) { struct pfs_node *root; struct pfs_node *dir; root = pi->pi_root; /* /proc/... */ pfs_create_file(root, "cmdline", &linprocfs_docmdline, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "devices", &linprocfs_dodevices, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "filesystems", &linprocfs_dofilesystems, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "loadavg", &linprocfs_doloadavg, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "meminfo", &linprocfs_domeminfo, NULL, NULL, NULL, PFS_RD); #if 0 pfs_create_file(root, "modules", &linprocfs_domodules, NULL, NULL, NULL, PFS_RD); #endif pfs_create_file(root, "mounts", &linprocfs_domtab, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "mtab", &linprocfs_domtab, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "partitions", &linprocfs_dopartitions, NULL, NULL, NULL, PFS_RD); pfs_create_link(root, "self", &procfs_docurproc, NULL, NULL, NULL, 0); pfs_create_file(root, "stat", &linprocfs_dostat, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "swaps", &linprocfs_doswaps, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "uptime", &linprocfs_douptime, NULL, NULL, NULL, PFS_RD); pfs_create_file(root, "version", &linprocfs_doversion, NULL, NULL, NULL, PFS_RD); /* /proc/net/... */ dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0); pfs_create_file(dir, "dev", &linprocfs_donetdev, NULL, NULL, NULL, PFS_RD); /* /proc//... */ dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP); pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline, NULL, NULL, NULL, PFS_RD); pfs_create_link(dir, "cwd", &linprocfs_doproccwd, NULL, NULL, NULL, 0); pfs_create_file(dir, "environ", &linprocfs_doprocenviron, NULL, NULL, NULL, PFS_RD); pfs_create_link(dir, "exe", &procfs_doprocfile, NULL, &procfs_notsystem, NULL, 0); pfs_create_file(dir, "maps", &linprocfs_doprocmaps, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "mem", &procfs_doprocmem, &procfs_attr, &procfs_candebug, NULL, PFS_RDWR|PFS_RAW); pfs_create_link(dir, "root", &linprocfs_doprocroot, NULL, NULL, NULL, 0); pfs_create_file(dir, "stat", &linprocfs_doprocstat, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "statm", &linprocfs_doprocstatm, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "status", &linprocfs_doprocstatus, NULL, NULL, NULL, PFS_RD); pfs_create_link(dir, "fd", &linprocfs_dofdescfs, NULL, NULL, NULL, 0); /* /proc/scsi/... */ dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0); pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi, NULL, NULL, NULL, PFS_RD); /* /proc/sys/... */ dir = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0); /* /proc/sys/kernel/... */ dir = pfs_create_dir(dir, "kernel", NULL, NULL, NULL, 0); pfs_create_file(dir, "osrelease", &linprocfs_doosrelease, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "ostype", &linprocfs_doostype, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "version", &linprocfs_doosbuild, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "msgmni", &linprocfs_domsgmni, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "pid_max", &linprocfs_dopid_max, NULL, NULL, NULL, PFS_RD); pfs_create_file(dir, "sem", &linprocfs_dosem, NULL, NULL, NULL, PFS_RD); /* /proc/sys/kernel/random/... */ dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0); pfs_create_file(dir, "uuid", &linprocfs_douuid, NULL, NULL, NULL, PFS_RD); return (0); } /* * Destructor */ static int linprocfs_uninit(PFS_INIT_ARGS) { /* nothing to do, pseudofs will GC */ return (0); } PSEUDOFS(linprocfs, 1, 0); MODULE_DEPEND(linprocfs, linux, 1, 1, 1); MODULE_DEPEND(linprocfs, procfs, 1, 1, 1); MODULE_DEPEND(linprocfs, sysvmsg, 1, 1, 1); MODULE_DEPEND(linprocfs, sysvsem, 1, 1, 1); Index: head/sys/dev/bxe/bxe.c =================================================================== --- head/sys/dev/bxe/bxe.c (revision 263101) +++ head/sys/dev/bxe/bxe.c (revision 263102) @@ -1,18819 +1,18815 @@ /*- * Copyright (c) 2007-2013 Broadcom Corporation. All rights reserved. * * Eric Davis * David Christensen * Gary Zambrano * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Broadcom Corporation nor the name of its contributors * may be used to endorse or promote products derived from this software * without specific prior written consent. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define BXE_DRIVER_VERSION "1.78.78" #include "bxe.h" #include "ecore_sp.h" #include "ecore_init.h" #include "ecore_init_ops.h" #include "57710_int_offsets.h" #include "57711_int_offsets.h" #include "57712_int_offsets.h" /* * CTLTYPE_U64 and sysctl_handle_64 were added in r217616. Define these * explicitly here for older kernels that don't include this changeset. */ #ifndef CTLTYPE_U64 #define CTLTYPE_U64 CTLTYPE_QUAD #define sysctl_handle_64 sysctl_handle_quad #endif /* * CSUM_TCP_IPV6 and CSUM_UDP_IPV6 were added in r236170. Define these * here as zero(0) for older kernels that don't include this changeset * thereby masking the functionality. */ #ifndef CSUM_TCP_IPV6 #define CSUM_TCP_IPV6 0 #define CSUM_UDP_IPV6 0 #endif /* * pci_find_cap was added in r219865. Re-define this at pci_find_extcap * for older kernels that don't include this changeset. */ #if __FreeBSD_version < 900035 #define pci_find_cap pci_find_extcap #endif #define BXE_DEF_SB_ATT_IDX 0x0001 #define BXE_DEF_SB_IDX 0x0002 /* * FLR Support - bxe_pf_flr_clnup() is called during nic_load in the per * function HW initialization. */ #define FLR_WAIT_USEC 10000 /* 10 msecs */ #define FLR_WAIT_INTERVAL 50 /* usecs */ #define FLR_POLL_CNT (FLR_WAIT_USEC / FLR_WAIT_INTERVAL) /* 200 */ struct pbf_pN_buf_regs { int pN; uint32_t init_crd; uint32_t crd; uint32_t crd_freed; }; struct pbf_pN_cmd_regs { int pN; uint32_t lines_occup; uint32_t lines_freed; }; /* * PCI Device ID Table used by bxe_probe(). */ #define BXE_DEVDESC_MAX 64 static struct bxe_device_type bxe_devs[] = { { BRCM_VENDORID, CHIP_NUM_57710, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57710 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57711, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57711 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57711E, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57711E 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57712, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57712 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57712_MF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57712 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57712_VF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57712 VF 10GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57800, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57800 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57800_MF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57800 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57800_VF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57800 VF 10GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57810, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57810 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57810_MF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57810 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57810_VF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57810 VF 10GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57811, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57811 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57811_MF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57811 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57811_VF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57811 VF 10GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57840_4_10, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57840 4x10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57840_2_20, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57840 2x20GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57840_MF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57840 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57840_VF, PCI_ANY_ID, PCI_ANY_ID, "Broadcom NetXtreme II BCM57840 VF 10GbE" }, #endif { 0, 0, 0, 0, NULL } }; MALLOC_DECLARE(M_BXE_ILT); MALLOC_DEFINE(M_BXE_ILT, "bxe_ilt", "bxe ILT pointer"); /* * FreeBSD device entry points. */ static int bxe_probe(device_t); static int bxe_attach(device_t); static int bxe_detach(device_t); static int bxe_shutdown(device_t); /* * FreeBSD KLD module/device interface event handler method. */ static device_method_t bxe_methods[] = { /* Device interface (device_if.h) */ DEVMETHOD(device_probe, bxe_probe), DEVMETHOD(device_attach, bxe_attach), DEVMETHOD(device_detach, bxe_detach), DEVMETHOD(device_shutdown, bxe_shutdown), #if 0 DEVMETHOD(device_suspend, bxe_suspend), DEVMETHOD(device_resume, bxe_resume), #endif /* Bus interface (bus_if.h) */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), KOBJMETHOD_END }; /* * FreeBSD KLD Module data declaration */ static driver_t bxe_driver = { "bxe", /* module name */ bxe_methods, /* event handler */ sizeof(struct bxe_softc) /* extra data */ }; /* * FreeBSD dev class is needed to manage dev instances and * to associate with a bus type */ static devclass_t bxe_devclass; MODULE_DEPEND(bxe, pci, 1, 1, 1); MODULE_DEPEND(bxe, ether, 1, 1, 1); DRIVER_MODULE(bxe, pci, bxe_driver, bxe_devclass, 0, 0); /* resources needed for unloading a previously loaded device */ #define BXE_PREV_WAIT_NEEDED 1 struct mtx bxe_prev_mtx; MTX_SYSINIT(bxe_prev_mtx, &bxe_prev_mtx, "bxe_prev_lock", MTX_DEF); struct bxe_prev_list_node { LIST_ENTRY(bxe_prev_list_node) node; uint8_t bus; uint8_t slot; uint8_t path; uint8_t aer; /* XXX automatic error recovery */ uint8_t undi; }; static LIST_HEAD(, bxe_prev_list_node) bxe_prev_list = LIST_HEAD_INITIALIZER(bxe_prev_list); static int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */ /* Tunable device values... */ SYSCTL_NODE(_hw, OID_AUTO, bxe, CTLFLAG_RD, 0, "bxe driver parameters"); /* Debug */ unsigned long bxe_debug = 0; TUNABLE_ULONG("hw.bxe.debug", &bxe_debug); SYSCTL_ULONG(_hw_bxe, OID_AUTO, debug, (CTLFLAG_RDTUN), &bxe_debug, 0, "Debug logging mode"); /* Interrupt Mode: 0 (IRQ), 1 (MSI/IRQ), and 2 (MSI-X/MSI/IRQ) */ static int bxe_interrupt_mode = INTR_MODE_MSIX; TUNABLE_INT("hw.bxe.interrupt_mode", &bxe_interrupt_mode); SYSCTL_INT(_hw_bxe, OID_AUTO, interrupt_mode, CTLFLAG_RDTUN, &bxe_interrupt_mode, 0, "Interrupt (MSI-X/MSI/INTx) mode"); /* Number of Queues: 0 (Auto) or 1 to 16 (fixed queue number) */ static int bxe_queue_count = 4; TUNABLE_INT("hw.bxe.queue_count", &bxe_queue_count); SYSCTL_INT(_hw_bxe, OID_AUTO, queue_count, CTLFLAG_RDTUN, &bxe_queue_count, 0, "Multi-Queue queue count"); /* max number of buffers per queue (default RX_BD_USABLE) */ static int bxe_max_rx_bufs = 0; TUNABLE_INT("hw.bxe.max_rx_bufs", &bxe_max_rx_bufs); SYSCTL_INT(_hw_bxe, OID_AUTO, max_rx_bufs, CTLFLAG_RDTUN, &bxe_max_rx_bufs, 0, "Maximum Number of Rx Buffers Per Queue"); /* Host interrupt coalescing RX tick timer (usecs) */ static int bxe_hc_rx_ticks = 25; TUNABLE_INT("hw.bxe.hc_rx_ticks", &bxe_hc_rx_ticks); SYSCTL_INT(_hw_bxe, OID_AUTO, hc_rx_ticks, CTLFLAG_RDTUN, &bxe_hc_rx_ticks, 0, "Host Coalescing Rx ticks"); /* Host interrupt coalescing TX tick timer (usecs) */ static int bxe_hc_tx_ticks = 50; TUNABLE_INT("hw.bxe.hc_tx_ticks", &bxe_hc_tx_ticks); SYSCTL_INT(_hw_bxe, OID_AUTO, hc_tx_ticks, CTLFLAG_RDTUN, &bxe_hc_tx_ticks, 0, "Host Coalescing Tx ticks"); /* Maximum number of Rx packets to process at a time */ static int bxe_rx_budget = 0xffffffff; TUNABLE_INT("hw.bxe.rx_budget", &bxe_rx_budget); SYSCTL_INT(_hw_bxe, OID_AUTO, rx_budget, CTLFLAG_TUN, &bxe_rx_budget, 0, "Rx processing budget"); /* Maximum LRO aggregation size */ static int bxe_max_aggregation_size = 0; TUNABLE_INT("hw.bxe.max_aggregation_size", &bxe_max_aggregation_size); SYSCTL_INT(_hw_bxe, OID_AUTO, max_aggregation_size, CTLFLAG_TUN, &bxe_max_aggregation_size, 0, "max aggregation size"); /* PCI MRRS: -1 (Auto), 0 (128B), 1 (256B), 2 (512B), 3 (1KB) */ static int bxe_mrrs = -1; TUNABLE_INT("hw.bxe.mrrs", &bxe_mrrs); SYSCTL_INT(_hw_bxe, OID_AUTO, mrrs, CTLFLAG_RDTUN, &bxe_mrrs, 0, "PCIe maximum read request size"); /* AutoGrEEEn: 0 (hardware default), 1 (force on), 2 (force off) */ static int bxe_autogreeen = 0; TUNABLE_INT("hw.bxe.autogreeen", &bxe_autogreeen); SYSCTL_INT(_hw_bxe, OID_AUTO, autogreeen, CTLFLAG_RDTUN, &bxe_autogreeen, 0, "AutoGrEEEn support"); /* 4-tuple RSS support for UDP: 0 (disabled), 1 (enabled) */ static int bxe_udp_rss = 0; TUNABLE_INT("hw.bxe.udp_rss", &bxe_udp_rss); SYSCTL_INT(_hw_bxe, OID_AUTO, udp_rss, CTLFLAG_RDTUN, &bxe_udp_rss, 0, "UDP RSS support"); #define STAT_NAME_LEN 32 /* no stat names below can be longer than this */ #define STATS_OFFSET32(stat_name) \ (offsetof(struct bxe_eth_stats, stat_name) / 4) #define Q_STATS_OFFSET32(stat_name) \ (offsetof(struct bxe_eth_q_stats, stat_name) / 4) static const struct { uint32_t offset; uint32_t size; uint32_t flags; #define STATS_FLAGS_PORT 1 #define STATS_FLAGS_FUNC 2 /* MF only cares about function stats */ #define STATS_FLAGS_BOTH (STATS_FLAGS_FUNC | STATS_FLAGS_PORT) char string[STAT_NAME_LEN]; } bxe_eth_stats_arr[] = { { STATS_OFFSET32(total_bytes_received_hi), 8, STATS_FLAGS_BOTH, "rx_bytes" }, { STATS_OFFSET32(error_bytes_received_hi), 8, STATS_FLAGS_BOTH, "rx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_bcast_packets" }, { STATS_OFFSET32(rx_stat_dot3statsfcserrors_hi), 8, STATS_FLAGS_PORT, "rx_crc_errors" }, { STATS_OFFSET32(rx_stat_dot3statsalignmenterrors_hi), 8, STATS_FLAGS_PORT, "rx_align_errors" }, { STATS_OFFSET32(rx_stat_etherstatsundersizepkts_hi), 8, STATS_FLAGS_PORT, "rx_undersize_packets" }, { STATS_OFFSET32(etherstatsoverrsizepkts_hi), 8, STATS_FLAGS_PORT, "rx_oversize_packets" }, { STATS_OFFSET32(rx_stat_etherstatsfragments_hi), 8, STATS_FLAGS_PORT, "rx_fragments" }, { STATS_OFFSET32(rx_stat_etherstatsjabbers_hi), 8, STATS_FLAGS_PORT, "rx_jabbers" }, { STATS_OFFSET32(no_buff_discard_hi), 8, STATS_FLAGS_BOTH, "rx_discards" }, { STATS_OFFSET32(mac_filter_discard), 4, STATS_FLAGS_PORT, "rx_filtered_packets" }, { STATS_OFFSET32(mf_tag_discard), 4, STATS_FLAGS_PORT, "rx_mf_tag_discard" }, { STATS_OFFSET32(pfc_frames_received_hi), 8, STATS_FLAGS_PORT, "pfc_frames_received" }, { STATS_OFFSET32(pfc_frames_sent_hi), 8, STATS_FLAGS_PORT, "pfc_frames_sent" }, { STATS_OFFSET32(brb_drop_hi), 8, STATS_FLAGS_PORT, "rx_brb_discard" }, { STATS_OFFSET32(brb_truncate_hi), 8, STATS_FLAGS_PORT, "rx_brb_truncate" }, { STATS_OFFSET32(pause_frames_received_hi), 8, STATS_FLAGS_PORT, "rx_pause_frames" }, { STATS_OFFSET32(rx_stat_maccontrolframesreceived_hi), 8, STATS_FLAGS_PORT, "rx_mac_ctrl_frames" }, { STATS_OFFSET32(nig_timer_max), 4, STATS_FLAGS_PORT, "rx_constant_pause_events" }, { STATS_OFFSET32(total_bytes_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bytes" }, { STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi), 8, STATS_FLAGS_PORT, "tx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bcast_packets" }, { STATS_OFFSET32(tx_stat_dot3statsinternalmactransmiterrors_hi), 8, STATS_FLAGS_PORT, "tx_mac_errors" }, { STATS_OFFSET32(rx_stat_dot3statscarriersenseerrors_hi), 8, STATS_FLAGS_PORT, "tx_carrier_errors" }, { STATS_OFFSET32(tx_stat_dot3statssinglecollisionframes_hi), 8, STATS_FLAGS_PORT, "tx_single_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsmultiplecollisionframes_hi), 8, STATS_FLAGS_PORT, "tx_multi_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsdeferredtransmissions_hi), 8, STATS_FLAGS_PORT, "tx_deferred" }, { STATS_OFFSET32(tx_stat_dot3statsexcessivecollisions_hi), 8, STATS_FLAGS_PORT, "tx_excess_collisions" }, { STATS_OFFSET32(tx_stat_dot3statslatecollisions_hi), 8, STATS_FLAGS_PORT, "tx_late_collisions" }, { STATS_OFFSET32(tx_stat_etherstatscollisions_hi), 8, STATS_FLAGS_PORT, "tx_total_collisions" }, { STATS_OFFSET32(tx_stat_etherstatspkts64octets_hi), 8, STATS_FLAGS_PORT, "tx_64_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts65octetsto127octets_hi), 8, STATS_FLAGS_PORT, "tx_65_to_127_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts128octetsto255octets_hi), 8, STATS_FLAGS_PORT, "tx_128_to_255_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts256octetsto511octets_hi), 8, STATS_FLAGS_PORT, "tx_256_to_511_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts512octetsto1023octets_hi), 8, STATS_FLAGS_PORT, "tx_512_to_1023_byte_packets" }, { STATS_OFFSET32(etherstatspkts1024octetsto1522octets_hi), 8, STATS_FLAGS_PORT, "tx_1024_to_1522_byte_packets" }, { STATS_OFFSET32(etherstatspktsover1522octets_hi), 8, STATS_FLAGS_PORT, "tx_1523_to_9022_byte_packets" }, { STATS_OFFSET32(pause_frames_sent_hi), 8, STATS_FLAGS_PORT, "tx_pause_frames" }, { STATS_OFFSET32(total_tpa_aggregations_hi), 8, STATS_FLAGS_FUNC, "tpa_aggregations" }, { STATS_OFFSET32(total_tpa_aggregated_frames_hi), 8, STATS_FLAGS_FUNC, "tpa_aggregated_frames"}, { STATS_OFFSET32(total_tpa_bytes_hi), 8, STATS_FLAGS_FUNC, "tpa_bytes"}, #if 0 { STATS_OFFSET32(recoverable_error), 4, STATS_FLAGS_FUNC, "recoverable_errors" }, { STATS_OFFSET32(unrecoverable_error), 4, STATS_FLAGS_FUNC, "unrecoverable_errors" }, #endif { STATS_OFFSET32(eee_tx_lpi), 4, STATS_FLAGS_PORT, "eee_tx_lpi"}, { STATS_OFFSET32(rx_calls), 4, STATS_FLAGS_FUNC, "rx_calls"}, { STATS_OFFSET32(rx_pkts), 4, STATS_FLAGS_FUNC, "rx_pkts"}, { STATS_OFFSET32(rx_tpa_pkts), 4, STATS_FLAGS_FUNC, "rx_tpa_pkts"}, { STATS_OFFSET32(rx_soft_errors), 4, STATS_FLAGS_FUNC, "rx_soft_errors"}, { STATS_OFFSET32(rx_hw_csum_errors), 4, STATS_FLAGS_FUNC, "rx_hw_csum_errors"}, { STATS_OFFSET32(rx_ofld_frames_csum_ip), 4, STATS_FLAGS_FUNC, "rx_ofld_frames_csum_ip"}, { STATS_OFFSET32(rx_ofld_frames_csum_tcp_udp), 4, STATS_FLAGS_FUNC, "rx_ofld_frames_csum_tcp_udp"}, { STATS_OFFSET32(rx_budget_reached), 4, STATS_FLAGS_FUNC, "rx_budget_reached"}, { STATS_OFFSET32(tx_pkts), 4, STATS_FLAGS_FUNC, "tx_pkts"}, { STATS_OFFSET32(tx_soft_errors), 4, STATS_FLAGS_FUNC, "tx_soft_errors"}, { STATS_OFFSET32(tx_ofld_frames_csum_ip), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_ip"}, { STATS_OFFSET32(tx_ofld_frames_csum_tcp), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_tcp"}, { STATS_OFFSET32(tx_ofld_frames_csum_udp), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_udp"}, { STATS_OFFSET32(tx_ofld_frames_lso), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_lso"}, { STATS_OFFSET32(tx_ofld_frames_lso_hdr_splits), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_lso_hdr_splits"}, { STATS_OFFSET32(tx_encap_failures), 4, STATS_FLAGS_FUNC, "tx_encap_failures"}, { STATS_OFFSET32(tx_hw_queue_full), 4, STATS_FLAGS_FUNC, "tx_hw_queue_full"}, { STATS_OFFSET32(tx_hw_max_queue_depth), 4, STATS_FLAGS_FUNC, "tx_hw_max_queue_depth"}, { STATS_OFFSET32(tx_dma_mapping_failure), 4, STATS_FLAGS_FUNC, "tx_dma_mapping_failure"}, { STATS_OFFSET32(tx_max_drbr_queue_depth), 4, STATS_FLAGS_FUNC, "tx_max_drbr_queue_depth"}, { STATS_OFFSET32(tx_window_violation_std), 4, STATS_FLAGS_FUNC, "tx_window_violation_std"}, { STATS_OFFSET32(tx_window_violation_tso), 4, STATS_FLAGS_FUNC, "tx_window_violation_tso"}, #if 0 { STATS_OFFSET32(tx_unsupported_tso_request_ipv6), 4, STATS_FLAGS_FUNC, "tx_unsupported_tso_request_ipv6"}, { STATS_OFFSET32(tx_unsupported_tso_request_not_tcp), 4, STATS_FLAGS_FUNC, "tx_unsupported_tso_request_not_tcp"}, #endif { STATS_OFFSET32(tx_chain_lost_mbuf), 4, STATS_FLAGS_FUNC, "tx_chain_lost_mbuf"}, { STATS_OFFSET32(tx_frames_deferred), 4, STATS_FLAGS_FUNC, "tx_frames_deferred"}, { STATS_OFFSET32(tx_queue_xoff), 4, STATS_FLAGS_FUNC, "tx_queue_xoff"}, { STATS_OFFSET32(mbuf_defrag_attempts), 4, STATS_FLAGS_FUNC, "mbuf_defrag_attempts"}, { STATS_OFFSET32(mbuf_defrag_failures), 4, STATS_FLAGS_FUNC, "mbuf_defrag_failures"}, { STATS_OFFSET32(mbuf_rx_bd_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_bd_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_bd_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_bd_mapping_failed"}, { STATS_OFFSET32(mbuf_rx_tpa_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_tpa_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_tpa_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_tpa_mapping_failed"}, { STATS_OFFSET32(mbuf_rx_sge_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_sge_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_sge_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_sge_mapping_failed"}, { STATS_OFFSET32(mbuf_alloc_tx), 4, STATS_FLAGS_FUNC, "mbuf_alloc_tx"}, { STATS_OFFSET32(mbuf_alloc_rx), 4, STATS_FLAGS_FUNC, "mbuf_alloc_rx"}, { STATS_OFFSET32(mbuf_alloc_sge), 4, STATS_FLAGS_FUNC, "mbuf_alloc_sge"}, { STATS_OFFSET32(mbuf_alloc_tpa), 4, STATS_FLAGS_FUNC, "mbuf_alloc_tpa"} }; static const struct { uint32_t offset; uint32_t size; char string[STAT_NAME_LEN]; } bxe_eth_q_stats_arr[] = { { Q_STATS_OFFSET32(total_bytes_received_hi), 8, "rx_bytes" }, { Q_STATS_OFFSET32(total_unicast_packets_received_hi), 8, "rx_ucast_packets" }, { Q_STATS_OFFSET32(total_multicast_packets_received_hi), 8, "rx_mcast_packets" }, { Q_STATS_OFFSET32(total_broadcast_packets_received_hi), 8, "rx_bcast_packets" }, { Q_STATS_OFFSET32(no_buff_discard_hi), 8, "rx_discards" }, { Q_STATS_OFFSET32(total_bytes_transmitted_hi), 8, "tx_bytes" }, { Q_STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, "tx_ucast_packets" }, { Q_STATS_OFFSET32(total_multicast_packets_transmitted_hi), 8, "tx_mcast_packets" }, { Q_STATS_OFFSET32(total_broadcast_packets_transmitted_hi), 8, "tx_bcast_packets" }, { Q_STATS_OFFSET32(total_tpa_aggregations_hi), 8, "tpa_aggregations" }, { Q_STATS_OFFSET32(total_tpa_aggregated_frames_hi), 8, "tpa_aggregated_frames"}, { Q_STATS_OFFSET32(total_tpa_bytes_hi), 8, "tpa_bytes"}, { Q_STATS_OFFSET32(rx_calls), 4, "rx_calls"}, { Q_STATS_OFFSET32(rx_pkts), 4, "rx_pkts"}, { Q_STATS_OFFSET32(rx_tpa_pkts), 4, "rx_tpa_pkts"}, { Q_STATS_OFFSET32(rx_soft_errors), 4, "rx_soft_errors"}, { Q_STATS_OFFSET32(rx_hw_csum_errors), 4, "rx_hw_csum_errors"}, { Q_STATS_OFFSET32(rx_ofld_frames_csum_ip), 4, "rx_ofld_frames_csum_ip"}, { Q_STATS_OFFSET32(rx_ofld_frames_csum_tcp_udp), 4, "rx_ofld_frames_csum_tcp_udp"}, { Q_STATS_OFFSET32(rx_budget_reached), 4, "rx_budget_reached"}, { Q_STATS_OFFSET32(tx_pkts), 4, "tx_pkts"}, { Q_STATS_OFFSET32(tx_soft_errors), 4, "tx_soft_errors"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_ip), 4, "tx_ofld_frames_csum_ip"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_tcp), 4, "tx_ofld_frames_csum_tcp"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_udp), 4, "tx_ofld_frames_csum_udp"}, { Q_STATS_OFFSET32(tx_ofld_frames_lso), 4, "tx_ofld_frames_lso"}, { Q_STATS_OFFSET32(tx_ofld_frames_lso_hdr_splits), 4, "tx_ofld_frames_lso_hdr_splits"}, { Q_STATS_OFFSET32(tx_encap_failures), 4, "tx_encap_failures"}, { Q_STATS_OFFSET32(tx_hw_queue_full), 4, "tx_hw_queue_full"}, { Q_STATS_OFFSET32(tx_hw_max_queue_depth), 4, "tx_hw_max_queue_depth"}, { Q_STATS_OFFSET32(tx_dma_mapping_failure), 4, "tx_dma_mapping_failure"}, { Q_STATS_OFFSET32(tx_max_drbr_queue_depth), 4, "tx_max_drbr_queue_depth"}, { Q_STATS_OFFSET32(tx_window_violation_std), 4, "tx_window_violation_std"}, { Q_STATS_OFFSET32(tx_window_violation_tso), 4, "tx_window_violation_tso"}, #if 0 { Q_STATS_OFFSET32(tx_unsupported_tso_request_ipv6), 4, "tx_unsupported_tso_request_ipv6"}, { Q_STATS_OFFSET32(tx_unsupported_tso_request_not_tcp), 4, "tx_unsupported_tso_request_not_tcp"}, #endif { Q_STATS_OFFSET32(tx_chain_lost_mbuf), 4, "tx_chain_lost_mbuf"}, { Q_STATS_OFFSET32(tx_frames_deferred), 4, "tx_frames_deferred"}, { Q_STATS_OFFSET32(tx_queue_xoff), 4, "tx_queue_xoff"}, { Q_STATS_OFFSET32(mbuf_defrag_attempts), 4, "mbuf_defrag_attempts"}, { Q_STATS_OFFSET32(mbuf_defrag_failures), 4, "mbuf_defrag_failures"}, { Q_STATS_OFFSET32(mbuf_rx_bd_alloc_failed), 4, "mbuf_rx_bd_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_bd_mapping_failed), 4, "mbuf_rx_bd_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_rx_tpa_alloc_failed), 4, "mbuf_rx_tpa_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_tpa_mapping_failed), 4, "mbuf_rx_tpa_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_rx_sge_alloc_failed), 4, "mbuf_rx_sge_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_sge_mapping_failed), 4, "mbuf_rx_sge_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_alloc_tx), 4, "mbuf_alloc_tx"}, { Q_STATS_OFFSET32(mbuf_alloc_rx), 4, "mbuf_alloc_rx"}, { Q_STATS_OFFSET32(mbuf_alloc_sge), 4, "mbuf_alloc_sge"}, { Q_STATS_OFFSET32(mbuf_alloc_tpa), 4, "mbuf_alloc_tpa"} }; #define BXE_NUM_ETH_STATS ARRAY_SIZE(bxe_eth_stats_arr) #define BXE_NUM_ETH_Q_STATS ARRAY_SIZE(bxe_eth_q_stats_arr) static void bxe_cmng_fns_init(struct bxe_softc *sc, uint8_t read_cfg, uint8_t cmng_type); static int bxe_get_cmng_fns_mode(struct bxe_softc *sc); static void storm_memset_cmng(struct bxe_softc *sc, struct cmng_init *cmng, uint8_t port); static void bxe_set_reset_global(struct bxe_softc *sc); static void bxe_set_reset_in_progress(struct bxe_softc *sc); static uint8_t bxe_reset_is_done(struct bxe_softc *sc, int engine); static uint8_t bxe_clear_pf_load(struct bxe_softc *sc); static uint8_t bxe_chk_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print); static void bxe_int_disable(struct bxe_softc *sc); static int bxe_release_leader_lock(struct bxe_softc *sc); static void bxe_pf_disable(struct bxe_softc *sc); static void bxe_free_fp_buffers(struct bxe_softc *sc); static inline void bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t rx_bd_prod, uint16_t rx_cq_prod, uint16_t rx_sge_prod); static void bxe_link_report_locked(struct bxe_softc *sc); static void bxe_link_report(struct bxe_softc *sc); static void bxe_link_status_update(struct bxe_softc *sc); static void bxe_periodic_callout_func(void *xsc); static void bxe_periodic_start(struct bxe_softc *sc); static void bxe_periodic_stop(struct bxe_softc *sc); static int bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *fp, uint16_t prev_index, uint16_t index); static int bxe_alloc_rx_tpa_mbuf(struct bxe_fastpath *fp, int queue); static int bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *fp, uint16_t index); static uint8_t bxe_txeof(struct bxe_softc *sc, struct bxe_fastpath *fp); static void bxe_task_fp(struct bxe_fastpath *fp); static __noinline void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m, uint8_t contents); static int bxe_alloc_mem(struct bxe_softc *sc); static void bxe_free_mem(struct bxe_softc *sc); static int bxe_alloc_fw_stats_mem(struct bxe_softc *sc); static void bxe_free_fw_stats_mem(struct bxe_softc *sc); static int bxe_interrupt_attach(struct bxe_softc *sc); static void bxe_interrupt_detach(struct bxe_softc *sc); static void bxe_set_rx_mode(struct bxe_softc *sc); static int bxe_init_locked(struct bxe_softc *sc); static int bxe_stop_locked(struct bxe_softc *sc); static __noinline int bxe_nic_load(struct bxe_softc *sc, int load_mode); static __noinline int bxe_nic_unload(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link); static void bxe_handle_sp_tq(void *context, int pending); static void bxe_handle_rx_mode_tq(void *context, int pending); static void bxe_handle_fp_tq(void *context, int pending); /* calculate crc32 on a buffer (NOTE: crc32_length MUST be aligned to 8) */ uint32_t calc_crc32(uint8_t *crc32_packet, uint32_t crc32_length, uint32_t crc32_seed, uint8_t complement) { uint32_t byte = 0; uint32_t bit = 0; uint8_t msb = 0; uint32_t temp = 0; uint32_t shft = 0; uint8_t current_byte = 0; uint32_t crc32_result = crc32_seed; const uint32_t CRC32_POLY = 0x1edc6f41; if ((crc32_packet == NULL) || (crc32_length == 0) || ((crc32_length % 8) != 0)) { return (crc32_result); } for (byte = 0; byte < crc32_length; byte = byte + 1) { current_byte = crc32_packet[byte]; for (bit = 0; bit < 8; bit = bit + 1) { /* msb = crc32_result[31]; */ msb = (uint8_t)(crc32_result >> 31); crc32_result = crc32_result << 1; /* it (msb != current_byte[bit]) */ if (msb != (0x1 & (current_byte >> bit))) { crc32_result = crc32_result ^ CRC32_POLY; /* crc32_result[0] = 1 */ crc32_result |= 1; } } } /* Last step is to: * 1. "mirror" every bit * 2. swap the 4 bytes * 3. complement each bit */ /* Mirror */ temp = crc32_result; shft = sizeof(crc32_result) * 8 - 1; for (crc32_result >>= 1; crc32_result; crc32_result >>= 1) { temp <<= 1; temp |= crc32_result & 1; shft-- ; } /* temp[31-bit] = crc32_result[bit] */ temp <<= shft; /* Swap */ /* crc32_result = {temp[7:0], temp[15:8], temp[23:16], temp[31:24]} */ { uint32_t t0, t1, t2, t3; t0 = (0x000000ff & (temp >> 24)); t1 = (0x0000ff00 & (temp >> 8)); t2 = (0x00ff0000 & (temp << 8)); t3 = (0xff000000 & (temp << 24)); crc32_result = t0 | t1 | t2 | t3; } /* Complement */ if (complement) { crc32_result = ~crc32_result; } return (crc32_result); } int bxe_test_bit(int nr, volatile unsigned long *addr) { return ((atomic_load_acq_long(addr) & (1 << nr)) != 0); } void bxe_set_bit(unsigned int nr, volatile unsigned long *addr) { atomic_set_acq_long(addr, (1 << nr)); } void bxe_clear_bit(int nr, volatile unsigned long *addr) { atomic_clear_acq_long(addr, (1 << nr)); } int bxe_test_and_set_bit(int nr, volatile unsigned long *addr) { unsigned long x; nr = (1 << nr); do { x = *addr; } while (atomic_cmpset_acq_long(addr, x, x | nr) == 0); // if (x & nr) bit_was_set; else bit_was_not_set; return (x & nr); } int bxe_test_and_clear_bit(int nr, volatile unsigned long *addr) { unsigned long x; nr = (1 << nr); do { x = *addr; } while (atomic_cmpset_acq_long(addr, x, x & ~nr) == 0); // if (x & nr) bit_was_set; else bit_was_not_set; return (x & nr); } int bxe_cmpxchg(volatile int *addr, int old, int new) { int x; do { x = *addr; } while (atomic_cmpset_acq_int(addr, old, new) == 0); return (x); } /* * Get DMA memory from the OS. * * Validates that the OS has provided DMA buffers in response to a * bus_dmamap_load call and saves the physical address of those buffers. * When the callback is used the OS will return 0 for the mapping function * (bus_dmamap_load) so we use the value of map_arg->maxsegs to pass any * failures back to the caller. * * Returns: * Nothing. */ static void bxe_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct bxe_dma *dma = arg; if (error) { dma->paddr = 0; dma->nseg = 0; BLOGE(dma->sc, "Failed DMA alloc '%s' (%d)!\n", dma->msg, error); } else { dma->paddr = segs->ds_addr; dma->nseg = nseg; #if 0 BLOGD(dma->sc, DBG_LOAD, "DMA alloc '%s': vaddr=%p paddr=%p nseg=%d size=%lu\n", dma->msg, dma->vaddr, (void *)dma->paddr, dma->nseg, dma->size); #endif } } /* * Allocate a block of memory and map it for DMA. No partial completions * allowed and release any resources acquired if we can't acquire all * resources. * * Returns: * 0 = Success, !0 = Failure */ int bxe_dma_alloc(struct bxe_softc *sc, bus_size_t size, struct bxe_dma *dma, const char *msg) { int rc; if (dma->size > 0) { BLOGE(sc, "dma block '%s' already has size %lu\n", msg, (unsigned long)dma->size); return (1); } memset(dma, 0, sizeof(*dma)); /* sanity */ dma->sc = sc; dma->size = size; snprintf(dma->msg, sizeof(dma->msg), "%s", msg); rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ BCM_PAGE_SIZE, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ size, /* max map size */ 1, /* num discontinuous */ size, /* max seg size */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &dma->tag); /* returned dma tag */ if (rc != 0) { BLOGE(sc, "Failed to create dma tag for '%s' (%d)\n", msg, rc); memset(dma, 0, sizeof(*dma)); return (1); } rc = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, (BUS_DMA_NOWAIT | BUS_DMA_ZERO), &dma->map); if (rc != 0) { BLOGE(sc, "Failed to alloc dma mem for '%s' (%d)\n", msg, rc); bus_dma_tag_destroy(dma->tag); memset(dma, 0, sizeof(*dma)); return (1); } rc = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, bxe_dma_map_addr, /* BLOGD in here */ dma, BUS_DMA_NOWAIT); if (rc != 0) { BLOGE(sc, "Failed to load dma map for '%s' (%d)\n", msg, rc); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); bus_dma_tag_destroy(dma->tag); memset(dma, 0, sizeof(*dma)); return (1); } return (0); } void bxe_dma_free(struct bxe_softc *sc, struct bxe_dma *dma) { if (dma->size > 0) { #if 0 BLOGD(sc, DBG_LOAD, "DMA free '%s': vaddr=%p paddr=%p nseg=%d size=%lu\n", dma->msg, dma->vaddr, (void *)dma->paddr, dma->nseg, dma->size); #endif DBASSERT(sc, (dma->tag != NULL), ("dma tag is NULL")); bus_dmamap_sync(dma->tag, dma->map, (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)); bus_dmamap_unload(dma->tag, dma->map); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); bus_dma_tag_destroy(dma->tag); } memset(dma, 0, sizeof(*dma)); } /* * These indirect read and write routines are only during init. * The locking is handled by the MCP. */ void bxe_reg_wr_ind(struct bxe_softc *sc, uint32_t addr, uint32_t val) { pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, addr, 4); pci_write_config(sc->dev, PCICFG_GRC_DATA, val, 4); pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); } uint32_t bxe_reg_rd_ind(struct bxe_softc *sc, uint32_t addr) { uint32_t val; pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, addr, 4); val = pci_read_config(sc->dev, PCICFG_GRC_DATA, 4); pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); return (val); } #if 0 void bxe_dp_dmae(struct bxe_softc *sc, struct dmae_command *dmae, int msglvl) { uint32_t src_type = dmae->opcode & DMAE_COMMAND_SRC; switch (dmae->opcode & DMAE_COMMAND_DST) { case DMAE_CMD_DST_PCI: if (src_type == DMAE_CMD_SRC_PCI) DP(msglvl, "DMAE: opcode 0x%08x\n" "src [%x:%08x], len [%d*4], dst [%x:%08x]\n" "comp_addr [%x:%08x], comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo, dmae->len, dmae->dst_addr_hi, dmae->dst_addr_lo, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); else DP(msglvl, "DMAE: opcode 0x%08x\n" "src [%08x], len [%d*4], dst [%x:%08x]\n" "comp_addr [%x:%08x], comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_lo >> 2, dmae->len, dmae->dst_addr_hi, dmae->dst_addr_lo, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); break; case DMAE_CMD_DST_GRC: if (src_type == DMAE_CMD_SRC_PCI) DP(msglvl, "DMAE: opcode 0x%08x\n" "src [%x:%08x], len [%d*4], dst_addr [%08x]\n" "comp_addr [%x:%08x], comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo, dmae->len, dmae->dst_addr_lo >> 2, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); else DP(msglvl, "DMAE: opcode 0x%08x\n" "src [%08x], len [%d*4], dst [%08x]\n" "comp_addr [%x:%08x], comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_lo >> 2, dmae->len, dmae->dst_addr_lo >> 2, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); break; default: if (src_type == DMAE_CMD_SRC_PCI) DP(msglvl, "DMAE: opcode 0x%08x\n" "src_addr [%x:%08x] len [%d * 4] dst_addr [none]\n" "comp_addr [%x:%08x] comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo, dmae->len, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); else DP(msglvl, "DMAE: opcode 0x%08x\n" "src_addr [%08x] len [%d * 4] dst_addr [none]\n" "comp_addr [%x:%08x] comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_lo >> 2, dmae->len, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); break; } } #endif static int bxe_acquire_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; int cnt; /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGE(sc, "resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE\n", resource); return (-1); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + (func * 8)); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + ((func - 6) * 8)); } /* validate the resource is not already taken */ lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { BLOGE(sc, "resource in use (status 0x%x bit 0x%x)\n", lock_status, resource_bit); return (-1); } /* try every 5ms for 5 seconds */ for (cnt = 0; cnt < 1000; cnt++) { REG_WR(sc, (hw_lock_control_reg + 4), resource_bit); lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { return (0); } DELAY(5000); } BLOGE(sc, "Resource lock timeout!\n"); return (-1); } static int bxe_release_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGE(sc, "resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE\n", resource); return (-1); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + (func * 8)); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + ((func - 6) * 8)); } /* validate the resource is currently taken */ lock_status = REG_RD(sc, hw_lock_control_reg); if (!(lock_status & resource_bit)) { BLOGE(sc, "resource not in use (status 0x%x bit 0x%x)\n", lock_status, resource_bit); return (-1); } REG_WR(sc, hw_lock_control_reg, resource_bit); return (0); } /* * Per pf misc lock must be acquired before the per port mcp lock. Otherwise, * had we done things the other way around, if two pfs from the same port * would attempt to access nvram at the same time, we could run into a * scenario such as: * pf A takes the port lock. * pf B succeeds in taking the same lock since they are from the same port. * pf A takes the per pf misc lock. Performs eeprom access. * pf A finishes. Unlocks the per pf misc lock. * Pf B takes the lock and proceeds to perform it's own access. * pf A unlocks the per port lock, while pf B is still working (!). * mcp takes the per port lock and corrupts pf B's access (and/or has it's own * access corrupted by pf B).* */ static int bxe_acquire_nvram_lock(struct bxe_softc *sc) { int port = SC_PORT(sc); int count, i; uint32_t val = 0; /* acquire HW lock: protect against other PFs in PF Direct Assignment */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_NVRAM); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* request access to nvram interface */ REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_SET1 << port)); for (i = 0; i < count*10; i++) { val = REG_RD(sc, MCP_REG_MCPR_NVM_SW_ARB); if (val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port)) { break; } DELAY(5); } if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) { BLOGE(sc, "Cannot get access to nvram interface\n"); return (-1); } return (0); } static int bxe_release_nvram_lock(struct bxe_softc *sc) { int port = SC_PORT(sc); int count, i; uint32_t val = 0; /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* relinquish nvram interface */ REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << port)); for (i = 0; i < count*10; i++) { val = REG_RD(sc, MCP_REG_MCPR_NVM_SW_ARB); if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) { break; } DELAY(5); } if (val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port)) { BLOGE(sc, "Cannot free access to nvram interface\n"); return (-1); } /* release HW lock: protect against other PFs in PF Direct Assignment */ bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_NVRAM); return (0); } static void bxe_enable_nvram_access(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE); /* enable both bits, even on read */ REG_WR(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE, (val | MCPR_NVM_ACCESS_ENABLE_EN | MCPR_NVM_ACCESS_ENABLE_WR_EN)); } static void bxe_disable_nvram_access(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE); /* disable both bits, even after read */ REG_WR(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE, (val & ~(MCPR_NVM_ACCESS_ENABLE_EN | MCPR_NVM_ACCESS_ENABLE_WR_EN))); } static int bxe_nvram_read_dword(struct bxe_softc *sc, uint32_t offset, uint32_t *ret_val, uint32_t cmd_flags) { int count, i, rc; uint32_t val; /* build the command word */ cmd_flags |= MCPR_NVM_COMMAND_DOIT; /* need to clear DONE bit separately */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, MCPR_NVM_COMMAND_DONE); /* address of the NVRAM to read from */ REG_WR(sc, MCP_REG_MCPR_NVM_ADDR, (offset & MCPR_NVM_ADDR_NVM_ADDR_VALUE)); /* issue a read command */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, cmd_flags); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* wait for completion */ *ret_val = 0; rc = -1; for (i = 0; i < count; i++) { DELAY(5); val = REG_RD(sc, MCP_REG_MCPR_NVM_COMMAND); if (val & MCPR_NVM_COMMAND_DONE) { val = REG_RD(sc, MCP_REG_MCPR_NVM_READ); /* we read nvram data in cpu order * but ethtool sees it as an array of bytes * converting to big-endian will do the work */ *ret_val = htobe32(val); rc = 0; break; } } if (rc == -1) { BLOGE(sc, "nvram read timeout expired\n"); } return (rc); } static int bxe_nvram_read(struct bxe_softc *sc, uint32_t offset, uint8_t *ret_buf, int buf_size) { uint32_t cmd_flags; uint32_t val; int rc; if ((offset & 0x03) || (buf_size & 0x03) || (buf_size == 0)) { BLOGE(sc, "Invalid parameter, offset 0x%x buf_size 0x%x\n", offset, buf_size); return (-1); } if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); /* read the first word(s) */ cmd_flags = MCPR_NVM_COMMAND_FIRST; while ((buf_size > sizeof(uint32_t)) && (rc == 0)) { rc = bxe_nvram_read_dword(sc, offset, &val, cmd_flags); memcpy(ret_buf, &val, 4); /* advance to the next dword */ offset += sizeof(uint32_t); ret_buf += sizeof(uint32_t); buf_size -= sizeof(uint32_t); cmd_flags = 0; } if (rc == 0) { cmd_flags |= MCPR_NVM_COMMAND_LAST; rc = bxe_nvram_read_dword(sc, offset, &val, cmd_flags); memcpy(ret_buf, &val, 4); } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } static int bxe_nvram_write_dword(struct bxe_softc *sc, uint32_t offset, uint32_t val, uint32_t cmd_flags) { int count, i, rc; /* build the command word */ cmd_flags |= (MCPR_NVM_COMMAND_DOIT | MCPR_NVM_COMMAND_WR); /* need to clear DONE bit separately */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, MCPR_NVM_COMMAND_DONE); /* write the data */ REG_WR(sc, MCP_REG_MCPR_NVM_WRITE, val); /* address of the NVRAM to write to */ REG_WR(sc, MCP_REG_MCPR_NVM_ADDR, (offset & MCPR_NVM_ADDR_NVM_ADDR_VALUE)); /* issue the write command */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, cmd_flags); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* wait for completion */ rc = -1; for (i = 0; i < count; i++) { DELAY(5); val = REG_RD(sc, MCP_REG_MCPR_NVM_COMMAND); if (val & MCPR_NVM_COMMAND_DONE) { rc = 0; break; } } if (rc == -1) { BLOGE(sc, "nvram write timeout expired\n"); } return (rc); } #define BYTE_OFFSET(offset) (8 * (offset & 0x03)) static int bxe_nvram_write1(struct bxe_softc *sc, uint32_t offset, uint8_t *data_buf, int buf_size) { uint32_t cmd_flags; uint32_t align_offset; uint32_t val; int rc; if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); cmd_flags = (MCPR_NVM_COMMAND_FIRST | MCPR_NVM_COMMAND_LAST); align_offset = (offset & ~0x03); rc = bxe_nvram_read_dword(sc, align_offset, &val, cmd_flags); if (rc == 0) { val &= ~(0xff << BYTE_OFFSET(offset)); val |= (*data_buf << BYTE_OFFSET(offset)); /* nvram data is returned as an array of bytes * convert it back to cpu order */ val = be32toh(val); rc = bxe_nvram_write_dword(sc, align_offset, val, cmd_flags); } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } static int bxe_nvram_write(struct bxe_softc *sc, uint32_t offset, uint8_t *data_buf, int buf_size) { uint32_t cmd_flags; uint32_t val; uint32_t written_so_far; int rc; if (buf_size == 1) { return (bxe_nvram_write1(sc, offset, data_buf, buf_size)); } if ((offset & 0x03) || (buf_size & 0x03) /* || (buf_size == 0) */) { BLOGE(sc, "Invalid parameter, offset 0x%x buf_size 0x%x\n", offset, buf_size); return (-1); } if (buf_size == 0) { return (0); /* nothing to do */ } if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); written_so_far = 0; cmd_flags = MCPR_NVM_COMMAND_FIRST; while ((written_so_far < buf_size) && (rc == 0)) { if (written_so_far == (buf_size - sizeof(uint32_t))) { cmd_flags |= MCPR_NVM_COMMAND_LAST; } else if (((offset + 4) % NVRAM_PAGE_SIZE) == 0) { cmd_flags |= MCPR_NVM_COMMAND_LAST; } else if ((offset % NVRAM_PAGE_SIZE) == 0) { cmd_flags |= MCPR_NVM_COMMAND_FIRST; } memcpy(&val, data_buf, 4); rc = bxe_nvram_write_dword(sc, offset, val, cmd_flags); /* advance to the next dword */ offset += sizeof(uint32_t); data_buf += sizeof(uint32_t); written_so_far += sizeof(uint32_t); cmd_flags = 0; } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } /* copy command into DMAE command memory and set DMAE command Go */ void bxe_post_dmae(struct bxe_softc *sc, struct dmae_command *dmae, int idx) { uint32_t cmd_offset; int i; cmd_offset = (DMAE_REG_CMD_MEM + (sizeof(struct dmae_command) * idx)); for (i = 0; i < ((sizeof(struct dmae_command) / 4)); i++) { REG_WR(sc, (cmd_offset + (i * 4)), *(((uint32_t *)dmae) + i)); } REG_WR(sc, dmae_reg_go_c[idx], 1); } uint32_t bxe_dmae_opcode_add_comp(uint32_t opcode, uint8_t comp_type) { return (opcode | ((comp_type << DMAE_COMMAND_C_DST_SHIFT) | DMAE_COMMAND_C_TYPE_ENABLE)); } uint32_t bxe_dmae_opcode_clr_src_reset(uint32_t opcode) { return (opcode & ~DMAE_COMMAND_SRC_RESET); } uint32_t bxe_dmae_opcode(struct bxe_softc *sc, uint8_t src_type, uint8_t dst_type, uint8_t with_comp, uint8_t comp_type) { uint32_t opcode = 0; opcode |= ((src_type << DMAE_COMMAND_SRC_SHIFT) | (dst_type << DMAE_COMMAND_DST_SHIFT)); opcode |= (DMAE_COMMAND_SRC_RESET | DMAE_COMMAND_DST_RESET); opcode |= (SC_PORT(sc) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0); opcode |= ((SC_VN(sc) << DMAE_COMMAND_E1HVN_SHIFT) | (SC_VN(sc) << DMAE_COMMAND_DST_VN_SHIFT)); opcode |= (DMAE_COM_SET_ERR << DMAE_COMMAND_ERR_POLICY_SHIFT); #ifdef __BIG_ENDIAN opcode |= DMAE_CMD_ENDIANITY_B_DW_SWAP; #else opcode |= DMAE_CMD_ENDIANITY_DW_SWAP; #endif if (with_comp) { opcode = bxe_dmae_opcode_add_comp(opcode, comp_type); } return (opcode); } static void bxe_prep_dmae_with_comp(struct bxe_softc *sc, struct dmae_command *dmae, uint8_t src_type, uint8_t dst_type) { memset(dmae, 0, sizeof(struct dmae_command)); /* set the opcode */ dmae->opcode = bxe_dmae_opcode(sc, src_type, dst_type, TRUE, DMAE_COMP_PCI); /* fill in the completion parameters */ dmae->comp_addr_lo = U64_LO(BXE_SP_MAPPING(sc, wb_comp)); dmae->comp_addr_hi = U64_HI(BXE_SP_MAPPING(sc, wb_comp)); dmae->comp_val = DMAE_COMP_VAL; } /* issue a DMAE command over the init channel and wait for completion */ static int bxe_issue_dmae_with_comp(struct bxe_softc *sc, struct dmae_command *dmae) { uint32_t *wb_comp = BXE_SP(sc, wb_comp); int timeout = CHIP_REV_IS_SLOW(sc) ? 400000 : 4000; BXE_DMAE_LOCK(sc); /* reset completion */ *wb_comp = 0; /* post the command on the channel used for initializations */ bxe_post_dmae(sc, dmae, INIT_DMAE_C(sc)); /* wait for completion */ DELAY(5); while ((*wb_comp & ~DMAE_PCI_ERR_FLAG) != DMAE_COMP_VAL) { if (!timeout || (sc->recovery_state != BXE_RECOVERY_DONE && sc->recovery_state != BXE_RECOVERY_NIC_LOADING)) { BLOGE(sc, "DMAE timeout!\n"); BXE_DMAE_UNLOCK(sc); return (DMAE_TIMEOUT); } timeout--; DELAY(50); } if (*wb_comp & DMAE_PCI_ERR_FLAG) { BLOGE(sc, "DMAE PCI error!\n"); BXE_DMAE_UNLOCK(sc); return (DMAE_PCI_ERROR); } BXE_DMAE_UNLOCK(sc); return (0); } void bxe_read_dmae(struct bxe_softc *sc, uint32_t src_addr, uint32_t len32) { struct dmae_command dmae; uint32_t *data; int i, rc; DBASSERT(sc, (len32 <= 4), ("DMAE read length is %d", len32)); if (!sc->dmae_ready) { data = BXE_SP(sc, wb_data[0]); for (i = 0; i < len32; i++) { data[i] = (CHIP_IS_E1(sc)) ? bxe_reg_rd_ind(sc, (src_addr + (i * 4))) : REG_RD(sc, (src_addr + (i * 4))); } return; } /* set opcode and fixed command fields */ bxe_prep_dmae_with_comp(sc, &dmae, DMAE_SRC_GRC, DMAE_DST_PCI); /* fill in addresses and len */ dmae.src_addr_lo = (src_addr >> 2); /* GRC addr has dword resolution */ dmae.src_addr_hi = 0; dmae.dst_addr_lo = U64_LO(BXE_SP_MAPPING(sc, wb_data)); dmae.dst_addr_hi = U64_HI(BXE_SP_MAPPING(sc, wb_data)); dmae.len = len32; /* issue the command and wait for completion */ if ((rc = bxe_issue_dmae_with_comp(sc, &dmae)) != 0) { bxe_panic(sc, ("DMAE failed (%d)\n", rc)); }; } void bxe_write_dmae(struct bxe_softc *sc, bus_addr_t dma_addr, uint32_t dst_addr, uint32_t len32) { struct dmae_command dmae; int rc; if (!sc->dmae_ready) { DBASSERT(sc, (len32 <= 4), ("DMAE not ready and length is %d", len32)); if (CHIP_IS_E1(sc)) { ecore_init_ind_wr(sc, dst_addr, BXE_SP(sc, wb_data[0]), len32); } else { ecore_init_str_wr(sc, dst_addr, BXE_SP(sc, wb_data[0]), len32); } return; } /* set opcode and fixed command fields */ bxe_prep_dmae_with_comp(sc, &dmae, DMAE_SRC_PCI, DMAE_DST_GRC); /* fill in addresses and len */ dmae.src_addr_lo = U64_LO(dma_addr); dmae.src_addr_hi = U64_HI(dma_addr); dmae.dst_addr_lo = (dst_addr >> 2); /* GRC addr has dword resolution */ dmae.dst_addr_hi = 0; dmae.len = len32; /* issue the command and wait for completion */ if ((rc = bxe_issue_dmae_with_comp(sc, &dmae)) != 0) { bxe_panic(sc, ("DMAE failed (%d)\n", rc)); } } void bxe_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len) { int dmae_wr_max = DMAE_LEN32_WR_MAX(sc); int offset = 0; while (len > dmae_wr_max) { bxe_write_dmae(sc, (phys_addr + offset), /* src DMA address */ (addr + offset), /* dst GRC address */ dmae_wr_max); offset += (dmae_wr_max * 4); len -= dmae_wr_max; } bxe_write_dmae(sc, (phys_addr + offset), /* src DMA address */ (addr + offset), /* dst GRC address */ len); } void bxe_set_ctx_validation(struct bxe_softc *sc, struct eth_context *cxt, uint32_t cid) { /* ustorm cxt validation */ cxt->ustorm_ag_context.cdu_usage = CDU_RSRVD_VALUE_TYPE_A(HW_CID(sc, cid), CDU_REGION_NUMBER_UCM_AG, ETH_CONNECTION_TYPE); /* xcontext validation */ cxt->xstorm_ag_context.cdu_reserved = CDU_RSRVD_VALUE_TYPE_A(HW_CID(sc, cid), CDU_REGION_NUMBER_XCM_AG, ETH_CONNECTION_TYPE); } static void bxe_storm_memset_hc_timeout(struct bxe_softc *sc, uint8_t port, uint8_t fw_sb_id, uint8_t sb_index, uint8_t ticks) { uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_TIMEOUT_OFFSET(fw_sb_id, sb_index)); REG_WR8(sc, addr, ticks); BLOGD(sc, DBG_LOAD, "port %d fw_sb_id %d sb_index %d ticks %d\n", port, fw_sb_id, sb_index, ticks); } static void bxe_storm_memset_hc_disable(struct bxe_softc *sc, uint8_t port, uint16_t fw_sb_id, uint8_t sb_index, uint8_t disable) { uint32_t enable_flag = (disable) ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT); uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index)); uint8_t flags; /* clear and set */ flags = REG_RD8(sc, addr); flags &= ~HC_INDEX_DATA_HC_ENABLED; flags |= enable_flag; REG_WR8(sc, addr, flags); BLOGD(sc, DBG_LOAD, "port %d fw_sb_id %d sb_index %d disable %d\n", port, fw_sb_id, sb_index, disable); } void bxe_update_coalesce_sb_index(struct bxe_softc *sc, uint8_t fw_sb_id, uint8_t sb_index, uint8_t disable, uint16_t usec) { int port = SC_PORT(sc); uint8_t ticks = (usec / 4); /* XXX ??? */ bxe_storm_memset_hc_timeout(sc, port, fw_sb_id, sb_index, ticks); disable = (disable) ? 1 : ((usec) ? 0 : 1); bxe_storm_memset_hc_disable(sc, port, fw_sb_id, sb_index, disable); } void elink_cb_udelay(struct bxe_softc *sc, uint32_t usecs) { DELAY(usecs); } uint32_t elink_cb_reg_read(struct bxe_softc *sc, uint32_t reg_addr) { return (REG_RD(sc, reg_addr)); } void elink_cb_reg_write(struct bxe_softc *sc, uint32_t reg_addr, uint32_t val) { REG_WR(sc, reg_addr, val); } void elink_cb_reg_wb_write(struct bxe_softc *sc, uint32_t offset, uint32_t *wb_write, uint16_t len) { REG_WR_DMAE(sc, offset, wb_write, len); } void elink_cb_reg_wb_read(struct bxe_softc *sc, uint32_t offset, uint32_t *wb_write, uint16_t len) { REG_RD_DMAE(sc, offset, wb_write, len); } uint8_t elink_cb_path_id(struct bxe_softc *sc) { return (SC_PATH(sc)); } void elink_cb_event_log(struct bxe_softc *sc, const elink_log_id_t elink_log_id, ...) { /* XXX */ #if 0 //va_list ap; va_start(ap, elink_log_id); _XXX_(sc, lm_log_id, ap); va_end(ap); #endif BLOGI(sc, "ELINK EVENT LOG (%d)\n", elink_log_id); } static int bxe_set_spio(struct bxe_softc *sc, int spio, uint32_t mode) { uint32_t spio_reg; /* Only 2 SPIOs are configurable */ if ((spio != MISC_SPIO_SPIO4) && (spio != MISC_SPIO_SPIO5)) { BLOGE(sc, "Invalid SPIO 0x%x\n", spio); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_SPIO); /* read SPIO and mask except the float bits */ spio_reg = (REG_RD(sc, MISC_REG_SPIO) & MISC_SPIO_FLOAT); switch (mode) { case MISC_SPIO_OUTPUT_LOW: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> output low\n", spio); /* clear FLOAT and set CLR */ spio_reg &= ~(spio << MISC_SPIO_FLOAT_POS); spio_reg |= (spio << MISC_SPIO_CLR_POS); break; case MISC_SPIO_OUTPUT_HIGH: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> output high\n", spio); /* clear FLOAT and set SET */ spio_reg &= ~(spio << MISC_SPIO_FLOAT_POS); spio_reg |= (spio << MISC_SPIO_SET_POS); break; case MISC_SPIO_INPUT_HI_Z: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> input\n", spio); /* set FLOAT */ spio_reg |= (spio << MISC_SPIO_FLOAT_POS); break; default: break; } REG_WR(sc, MISC_REG_SPIO, spio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_SPIO); return (0); } static int bxe_gpio_read(struct bxe_softc *sc, int gpio_num, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d\n", gpio_num); return (-1); } /* read GPIO value */ gpio_reg = REG_RD(sc, MISC_REG_GPIO); /* get the requested pin value */ return ((gpio_reg & gpio_mask) == gpio_mask) ? 1 : 0; } static int bxe_gpio_write(struct bxe_softc *sc, int gpio_num, uint32_t mode, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d\n", gpio_num); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO and mask except the float bits */ gpio_reg = (REG_RD(sc, MISC_REG_GPIO) & MISC_REGISTERS_GPIO_FLOAT); switch (mode) { case MISC_REGISTERS_GPIO_OUTPUT_LOW: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> output low\n", gpio_num, gpio_shift); /* clear FLOAT and set CLR */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_CLR_POS); break; case MISC_REGISTERS_GPIO_OUTPUT_HIGH: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> output high\n", gpio_num, gpio_shift); /* clear FLOAT and set SET */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_SET_POS); break; case MISC_REGISTERS_GPIO_INPUT_HI_Z: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> input\n", gpio_num, gpio_shift); /* set FLOAT */ gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); break; default: break; } REG_WR(sc, MISC_REG_GPIO, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } static int bxe_gpio_mult_write(struct bxe_softc *sc, uint8_t pins, uint32_t mode) { uint32_t gpio_reg; /* any port swapping should be handled by caller */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO and mask except the float bits */ gpio_reg = REG_RD(sc, MISC_REG_GPIO); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_CLR_POS); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_SET_POS); switch (mode) { case MISC_REGISTERS_GPIO_OUTPUT_LOW: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> output low\n", pins); /* set CLR */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_CLR_POS); break; case MISC_REGISTERS_GPIO_OUTPUT_HIGH: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> output high\n", pins); /* set SET */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_SET_POS); break; case MISC_REGISTERS_GPIO_INPUT_HI_Z: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> input\n", pins); /* set FLOAT */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_FLOAT_POS); break; default: BLOGE(sc, "Invalid GPIO mode assignment %d\n", mode); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (-1); } REG_WR(sc, MISC_REG_GPIO, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } static int bxe_gpio_int_write(struct bxe_softc *sc, int gpio_num, uint32_t mode, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d\n", gpio_num); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO int */ gpio_reg = REG_RD(sc, MISC_REG_GPIO_INT); switch (mode) { case MISC_REGISTERS_GPIO_INT_OUTPUT_CLR: BLOGD(sc, DBG_PHY, "Clear GPIO INT %d (shift %d) -> output low\n", gpio_num, gpio_shift); /* clear SET and set CLR */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_INT_SET_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_INT_CLR_POS); break; case MISC_REGISTERS_GPIO_INT_OUTPUT_SET: BLOGD(sc, DBG_PHY, "Set GPIO INT %d (shift %d) -> output high\n", gpio_num, gpio_shift); /* clear CLR and set SET */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_INT_CLR_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_INT_SET_POS); break; default: break; } REG_WR(sc, MISC_REG_GPIO_INT, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } uint32_t elink_cb_gpio_read(struct bxe_softc *sc, uint16_t gpio_num, uint8_t port) { return (bxe_gpio_read(sc, gpio_num, port)); } uint8_t elink_cb_gpio_write(struct bxe_softc *sc, uint16_t gpio_num, uint8_t mode, /* 0=low 1=high */ uint8_t port) { return (bxe_gpio_write(sc, gpio_num, mode, port)); } uint8_t elink_cb_gpio_mult_write(struct bxe_softc *sc, uint8_t pins, uint8_t mode) /* 0=low 1=high */ { return (bxe_gpio_mult_write(sc, pins, mode)); } uint8_t elink_cb_gpio_int_write(struct bxe_softc *sc, uint16_t gpio_num, uint8_t mode, /* 0=low 1=high */ uint8_t port) { return (bxe_gpio_int_write(sc, gpio_num, mode, port)); } void elink_cb_notify_link_changed(struct bxe_softc *sc) { REG_WR(sc, (MISC_REG_AEU_GENERAL_ATTN_12 + (SC_FUNC(sc) * sizeof(uint32_t))), 1); } /* send the MCP a request, block until there is a reply */ uint32_t elink_cb_fw_command(struct bxe_softc *sc, uint32_t command, uint32_t param) { int mb_idx = SC_FW_MB_IDX(sc); uint32_t seq; uint32_t rc = 0; uint32_t cnt = 1; uint8_t delay = CHIP_REV_IS_SLOW(sc) ? 100 : 10; BXE_FWMB_LOCK(sc); seq = ++sc->fw_seq; SHMEM_WR(sc, func_mb[mb_idx].drv_mb_param, param); SHMEM_WR(sc, func_mb[mb_idx].drv_mb_header, (command | seq)); BLOGD(sc, DBG_PHY, "wrote command 0x%08x to FW MB param 0x%08x\n", (command | seq), param); /* Let the FW do it's magic. GIve it up to 5 seconds... */ do { DELAY(delay * 1000); rc = SHMEM_RD(sc, func_mb[mb_idx].fw_mb_header); } while ((seq != (rc & FW_MSG_SEQ_NUMBER_MASK)) && (cnt++ < 500)); BLOGD(sc, DBG_PHY, "[after %d ms] read 0x%x seq 0x%x from FW MB\n", cnt*delay, rc, seq); /* is this a reply to our command? */ if (seq == (rc & FW_MSG_SEQ_NUMBER_MASK)) { rc &= FW_MSG_CODE_MASK; } else { /* Ruh-roh! */ BLOGE(sc, "FW failed to respond!\n"); // XXX bxe_fw_dump(sc); rc = 0; } BXE_FWMB_UNLOCK(sc); return (rc); } static uint32_t bxe_fw_command(struct bxe_softc *sc, uint32_t command, uint32_t param) { return (elink_cb_fw_command(sc, command, param)); } static void __storm_memset_dma_mapping(struct bxe_softc *sc, uint32_t addr, bus_addr_t mapping) { REG_WR(sc, addr, U64_LO(mapping)); REG_WR(sc, (addr + 4), U64_HI(mapping)); } static void storm_memset_spq_addr(struct bxe_softc *sc, bus_addr_t mapping, uint16_t abs_fid) { uint32_t addr = (XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PAGE_BASE_OFFSET(abs_fid)); __storm_memset_dma_mapping(sc, addr, mapping); } static void storm_memset_vf_to_pf(struct bxe_softc *sc, uint16_t abs_fid, uint16_t pf_id) { REG_WR8(sc, (BAR_XSTRORM_INTMEM + XSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_USTRORM_INTMEM + USTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); } static void storm_memset_func_en(struct bxe_softc *sc, uint16_t abs_fid, uint8_t enable) { REG_WR8(sc, (BAR_XSTRORM_INTMEM + XSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_USTRORM_INTMEM + USTORM_FUNC_EN_OFFSET(abs_fid)), enable); } static void storm_memset_eq_data(struct bxe_softc *sc, struct event_ring_data *eq_data, uint16_t pfid) { uint32_t addr; size_t size; addr = (BAR_CSTRORM_INTMEM + CSTORM_EVENT_RING_DATA_OFFSET(pfid)); size = sizeof(struct event_ring_data); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)eq_data); } static void storm_memset_eq_prod(struct bxe_softc *sc, uint16_t eq_prod, uint16_t pfid) { uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_EVENT_RING_PROD_OFFSET(pfid)); REG_WR16(sc, addr, eq_prod); } /* * Post a slowpath command. * * A slowpath command is used to propogate a configuration change through * the controller in a controlled manner, allowing each STORM processor and * other H/W blocks to phase in the change. The commands sent on the * slowpath are referred to as ramrods. Depending on the ramrod used the * completion of the ramrod will occur in different ways. Here's a * breakdown of ramrods and how they complete: * * RAMROD_CMD_ID_ETH_PORT_SETUP * Used to setup the leading connection on a port. Completes on the * Receive Completion Queue (RCQ) of that port (typically fp[0]). * * RAMROD_CMD_ID_ETH_CLIENT_SETUP * Used to setup an additional connection on a port. Completes on the * RCQ of the multi-queue/RSS connection being initialized. * * RAMROD_CMD_ID_ETH_STAT_QUERY * Used to force the storm processors to update the statistics database * in host memory. This ramrod is send on the leading connection CID and * completes as an index increment of the CSTORM on the default status * block. * * RAMROD_CMD_ID_ETH_UPDATE * Used to update the state of the leading connection, usually to udpate * the RSS indirection table. Completes on the RCQ of the leading * connection. (Not currently used under FreeBSD until OS support becomes * available.) * * RAMROD_CMD_ID_ETH_HALT * Used when tearing down a connection prior to driver unload. Completes * on the RCQ of the multi-queue/RSS connection being torn down. Don't * use this on the leading connection. * * RAMROD_CMD_ID_ETH_SET_MAC * Sets the Unicast/Broadcast/Multicast used by the port. Completes on * the RCQ of the leading connection. * * RAMROD_CMD_ID_ETH_CFC_DEL * Used when tearing down a conneciton prior to driver unload. Completes * on the RCQ of the leading connection (since the current connection * has been completely removed from controller memory). * * RAMROD_CMD_ID_ETH_PORT_DEL * Used to tear down the leading connection prior to driver unload, * typically fp[0]. Completes as an index increment of the CSTORM on the * default status block. * * RAMROD_CMD_ID_ETH_FORWARD_SETUP * Used for connection offload. Completes on the RCQ of the multi-queue * RSS connection that is being offloaded. (Not currently used under * FreeBSD.) * * There can only be one command pending per function. * * Returns: * 0 = Success, !0 = Failure. */ /* must be called under the spq lock */ static inline struct eth_spe *bxe_sp_get_next(struct bxe_softc *sc) { struct eth_spe *next_spe = sc->spq_prod_bd; if (sc->spq_prod_bd == sc->spq_last_bd) { /* wrap back to the first eth_spq */ sc->spq_prod_bd = sc->spq; sc->spq_prod_idx = 0; } else { sc->spq_prod_bd++; sc->spq_prod_idx++; } return (next_spe); } /* must be called under the spq lock */ static inline void bxe_sp_prod_update(struct bxe_softc *sc) { int func = SC_FUNC(sc); /* * Make sure that BD data is updated before writing the producer. * BD data is written to the memory, the producer is read from the * memory, thus we need a full memory barrier to ensure the ordering. */ mb(); REG_WR16(sc, (BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func)), sc->spq_prod_idx); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); } /** * bxe_is_contextless_ramrod - check if the current command ends on EQ * * @cmd: command to check * @cmd_type: command type */ static inline int bxe_is_contextless_ramrod(int cmd, int cmd_type) { if ((cmd_type == NONE_CONNECTION_TYPE) || (cmd == RAMROD_CMD_ID_ETH_FORWARD_SETUP) || (cmd == RAMROD_CMD_ID_ETH_CLASSIFICATION_RULES) || (cmd == RAMROD_CMD_ID_ETH_FILTER_RULES) || (cmd == RAMROD_CMD_ID_ETH_MULTICAST_RULES) || (cmd == RAMROD_CMD_ID_ETH_SET_MAC) || (cmd == RAMROD_CMD_ID_ETH_RSS_UPDATE)) { return (TRUE); } else { return (FALSE); } } /** * bxe_sp_post - place a single command on an SP ring * * @sc: driver handle * @command: command to place (e.g. SETUP, FILTER_RULES, etc.) * @cid: SW CID the command is related to * @data_hi: command private data address (high 32 bits) * @data_lo: command private data address (low 32 bits) * @cmd_type: command type (e.g. NONE, ETH) * * SP data is handled as if it's always an address pair, thus data fields are * not swapped to little endian in upper functions. Instead this function swaps * data as if it's two uint32 fields. */ int bxe_sp_post(struct bxe_softc *sc, int command, int cid, uint32_t data_hi, uint32_t data_lo, int cmd_type) { struct eth_spe *spe; uint16_t type; int common; common = bxe_is_contextless_ramrod(command, cmd_type); BXE_SP_LOCK(sc); if (common) { if (!atomic_load_acq_long(&sc->eq_spq_left)) { BLOGE(sc, "EQ ring is full!\n"); BXE_SP_UNLOCK(sc); return (-1); } } else { if (!atomic_load_acq_long(&sc->cq_spq_left)) { BLOGE(sc, "SPQ ring is full!\n"); BXE_SP_UNLOCK(sc); return (-1); } } spe = bxe_sp_get_next(sc); /* CID needs port number to be encoded int it */ spe->hdr.conn_and_cmd_data = htole32((command << SPE_HDR_CMD_ID_SHIFT) | HW_CID(sc, cid)); type = (cmd_type << SPE_HDR_CONN_TYPE_SHIFT) & SPE_HDR_CONN_TYPE; /* TBD: Check if it works for VFs */ type |= ((SC_FUNC(sc) << SPE_HDR_FUNCTION_ID_SHIFT) & SPE_HDR_FUNCTION_ID); spe->hdr.type = htole16(type); spe->data.update_data_addr.hi = htole32(data_hi); spe->data.update_data_addr.lo = htole32(data_lo); /* * It's ok if the actual decrement is issued towards the memory * somewhere between the lock and unlock. Thus no more explict * memory barrier is needed. */ if (common) { atomic_subtract_acq_long(&sc->eq_spq_left, 1); } else { atomic_subtract_acq_long(&sc->cq_spq_left, 1); } BLOGD(sc, DBG_SP, "SPQE -> %#jx\n", (uintmax_t)sc->spq_dma.paddr); BLOGD(sc, DBG_SP, "FUNC_RDATA -> %p / %#jx\n", BXE_SP(sc, func_rdata), (uintmax_t)BXE_SP_MAPPING(sc, func_rdata)); BLOGD(sc, DBG_SP, "SPQE[%x] (%x:%x) (cmd, common?) (%d,%d) hw_cid %x data (%x:%x) type(0x%x) left (CQ, EQ) (%lx,%lx)\n", sc->spq_prod_idx, (uint32_t)U64_HI(sc->spq_dma.paddr), (uint32_t)(U64_LO(sc->spq_dma.paddr) + (uint8_t *)sc->spq_prod_bd - (uint8_t *)sc->spq), command, common, HW_CID(sc, cid), data_hi, data_lo, type, atomic_load_acq_long(&sc->cq_spq_left), atomic_load_acq_long(&sc->eq_spq_left)); bxe_sp_prod_update(sc); BXE_SP_UNLOCK(sc); return (0); } /** * bxe_debug_print_ind_table - prints the indirection table configuration. * * @sc: driver hanlde * @p: pointer to rss configuration */ #if 0 static void bxe_debug_print_ind_table(struct bxe_softc *sc, struct ecore_config_rss_params *p) { int i; BLOGD(sc, DBG_LOAD, "Setting indirection table to:\n"); BLOGD(sc, DBG_LOAD, " 0x0000: "); for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) { BLOGD(sc, DBG_LOAD, "0x%02x ", p->ind_table[i]); /* Print 4 bytes in a line */ if ((i + 1 < T_ETH_INDIRECTION_TABLE_SIZE) && (((i + 1) & 0x3) == 0)) { BLOGD(sc, DBG_LOAD, "\n"); BLOGD(sc, DBG_LOAD, "0x%04x: ", i + 1); } } BLOGD(sc, DBG_LOAD, "\n"); } #endif /* * FreeBSD Device probe function. * * Compares the device found to the driver's list of supported devices and * reports back to the bsd loader whether this is the right driver for the device. * This is the driver entry function called from the "kldload" command. * * Returns: * BUS_PROBE_DEFAULT on success, positive value on failure. */ static int bxe_probe(device_t dev) { struct bxe_softc *sc; struct bxe_device_type *t; char *descbuf; uint16_t did, sdid, svid, vid; /* Find our device structure */ sc = device_get_softc(dev); sc->dev = dev; t = bxe_devs; /* Get the data for the device to be probed. */ vid = pci_get_vendor(dev); did = pci_get_device(dev); svid = pci_get_subvendor(dev); sdid = pci_get_subdevice(dev); BLOGD(sc, DBG_LOAD, "%s(); VID = 0x%04X, DID = 0x%04X, SVID = 0x%04X, " "SDID = 0x%04X\n", __FUNCTION__, vid, did, svid, sdid); /* Look through the list of known devices for a match. */ while (t->bxe_name != NULL) { if ((vid == t->bxe_vid) && (did == t->bxe_did) && ((svid == t->bxe_svid) || (t->bxe_svid == PCI_ANY_ID)) && ((sdid == t->bxe_sdid) || (t->bxe_sdid == PCI_ANY_ID))) { descbuf = malloc(BXE_DEVDESC_MAX, M_TEMP, M_NOWAIT); if (descbuf == NULL) return (ENOMEM); /* Print out the device identity. */ snprintf(descbuf, BXE_DEVDESC_MAX, "%s (%c%d) BXE v:%s\n", t->bxe_name, (((pci_read_config(dev, PCIR_REVID, 4) & 0xf0) >> 4) + 'A'), (pci_read_config(dev, PCIR_REVID, 4) & 0xf), BXE_DRIVER_VERSION); device_set_desc_copy(dev, descbuf); free(descbuf, M_TEMP); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } static void bxe_init_mutexes(struct bxe_softc *sc) { #ifdef BXE_CORE_LOCK_SX snprintf(sc->core_sx_name, sizeof(sc->core_sx_name), "bxe%d_core_lock", sc->unit); sx_init(&sc->core_sx, sc->core_sx_name); #else snprintf(sc->core_mtx_name, sizeof(sc->core_mtx_name), "bxe%d_core_lock", sc->unit); mtx_init(&sc->core_mtx, sc->core_mtx_name, NULL, MTX_DEF); #endif snprintf(sc->sp_mtx_name, sizeof(sc->sp_mtx_name), "bxe%d_sp_lock", sc->unit); mtx_init(&sc->sp_mtx, sc->sp_mtx_name, NULL, MTX_DEF); snprintf(sc->dmae_mtx_name, sizeof(sc->dmae_mtx_name), "bxe%d_dmae_lock", sc->unit); mtx_init(&sc->dmae_mtx, sc->dmae_mtx_name, NULL, MTX_DEF); snprintf(sc->port.phy_mtx_name, sizeof(sc->port.phy_mtx_name), "bxe%d_phy_lock", sc->unit); mtx_init(&sc->port.phy_mtx, sc->port.phy_mtx_name, NULL, MTX_DEF); snprintf(sc->fwmb_mtx_name, sizeof(sc->fwmb_mtx_name), "bxe%d_fwmb_lock", sc->unit); mtx_init(&sc->fwmb_mtx, sc->fwmb_mtx_name, NULL, MTX_DEF); snprintf(sc->print_mtx_name, sizeof(sc->print_mtx_name), "bxe%d_print_lock", sc->unit); mtx_init(&(sc->print_mtx), sc->print_mtx_name, NULL, MTX_DEF); snprintf(sc->stats_mtx_name, sizeof(sc->stats_mtx_name), "bxe%d_stats_lock", sc->unit); mtx_init(&(sc->stats_mtx), sc->stats_mtx_name, NULL, MTX_DEF); snprintf(sc->mcast_mtx_name, sizeof(sc->mcast_mtx_name), "bxe%d_mcast_lock", sc->unit); mtx_init(&(sc->mcast_mtx), sc->mcast_mtx_name, NULL, MTX_DEF); } static void bxe_release_mutexes(struct bxe_softc *sc) { #ifdef BXE_CORE_LOCK_SX sx_destroy(&sc->core_sx); #else if (mtx_initialized(&sc->core_mtx)) { mtx_destroy(&sc->core_mtx); } #endif if (mtx_initialized(&sc->sp_mtx)) { mtx_destroy(&sc->sp_mtx); } if (mtx_initialized(&sc->dmae_mtx)) { mtx_destroy(&sc->dmae_mtx); } if (mtx_initialized(&sc->port.phy_mtx)) { mtx_destroy(&sc->port.phy_mtx); } if (mtx_initialized(&sc->fwmb_mtx)) { mtx_destroy(&sc->fwmb_mtx); } if (mtx_initialized(&sc->print_mtx)) { mtx_destroy(&sc->print_mtx); } if (mtx_initialized(&sc->stats_mtx)) { mtx_destroy(&sc->stats_mtx); } if (mtx_initialized(&sc->mcast_mtx)) { mtx_destroy(&sc->mcast_mtx); } } static void bxe_tx_disable(struct bxe_softc* sc) { struct ifnet *ifp = sc->ifnet; /* tell the stack the driver is stopped and TX queue is full */ if (ifp != NULL) { ifp->if_drv_flags = 0; } } static void bxe_drv_pulse(struct bxe_softc *sc) { SHMEM_WR(sc, func_mb[SC_FW_MB_IDX(sc)].drv_pulse_mb, sc->fw_drv_pulse_wr_seq); } static inline uint16_t bxe_tx_avail(struct bxe_softc *sc, struct bxe_fastpath *fp) { int16_t used; uint16_t prod; uint16_t cons; prod = fp->tx_bd_prod; cons = fp->tx_bd_cons; used = SUB_S16(prod, cons); #if 0 KASSERT((used < 0), ("used tx bds < 0")); KASSERT((used > sc->tx_ring_size), ("used tx bds > tx_ring_size")); KASSERT(((sc->tx_ring_size - used) > MAX_TX_AVAIL), ("invalid number of tx bds used")); #endif return (int16_t)(sc->tx_ring_size) - used; } static inline int bxe_tx_queue_has_work(struct bxe_fastpath *fp) { uint16_t hw_cons; mb(); /* status block fields can change */ hw_cons = le16toh(*fp->tx_cons_sb); return (hw_cons != fp->tx_pkt_cons); } static inline uint8_t bxe_has_tx_work(struct bxe_fastpath *fp) { /* expand this for multi-cos if ever supported */ return (bxe_tx_queue_has_work(fp)) ? TRUE : FALSE; } static inline int bxe_has_rx_work(struct bxe_fastpath *fp) { uint16_t rx_cq_cons_sb; mb(); /* status block fields can change */ rx_cq_cons_sb = le16toh(*fp->rx_cq_cons_sb); if ((rx_cq_cons_sb & RCQ_MAX) == RCQ_MAX) rx_cq_cons_sb++; return (fp->rx_cq_cons != rx_cq_cons_sb); } static void bxe_sp_event(struct bxe_softc *sc, struct bxe_fastpath *fp, union eth_rx_cqe *rr_cqe) { int cid = SW_CID(rr_cqe->ramrod_cqe.conn_and_cmd_data); int command = CQE_CMD(rr_cqe->ramrod_cqe.conn_and_cmd_data); enum ecore_queue_cmd drv_cmd = ECORE_Q_CMD_MAX; struct ecore_queue_sp_obj *q_obj = &BXE_SP_OBJ(sc, fp).q_obj; BLOGD(sc, DBG_SP, "fp=%d cid=%d got ramrod #%d state is %x type is %d\n", fp->index, cid, command, sc->state, rr_cqe->ramrod_cqe.ramrod_type); #if 0 /* * If cid is within VF range, replace the slowpath object with the * one corresponding to this VF */ if ((cid >= BXE_FIRST_VF_CID) && (cid < BXE_FIRST_VF_CID + BXE_VF_CIDS)) { bxe_iov_set_queue_sp_obj(sc, cid, &q_obj); } #endif switch (command) { case (RAMROD_CMD_ID_ETH_CLIENT_UPDATE): BLOGD(sc, DBG_SP, "got UPDATE ramrod. CID %d\n", cid); drv_cmd = ECORE_Q_CMD_UPDATE; break; case (RAMROD_CMD_ID_ETH_CLIENT_SETUP): BLOGD(sc, DBG_SP, "got MULTI[%d] setup ramrod\n", cid); drv_cmd = ECORE_Q_CMD_SETUP; break; case (RAMROD_CMD_ID_ETH_TX_QUEUE_SETUP): BLOGD(sc, DBG_SP, "got MULTI[%d] tx-only setup ramrod\n", cid); drv_cmd = ECORE_Q_CMD_SETUP_TX_ONLY; break; case (RAMROD_CMD_ID_ETH_HALT): BLOGD(sc, DBG_SP, "got MULTI[%d] halt ramrod\n", cid); drv_cmd = ECORE_Q_CMD_HALT; break; case (RAMROD_CMD_ID_ETH_TERMINATE): BLOGD(sc, DBG_SP, "got MULTI[%d] teminate ramrod\n", cid); drv_cmd = ECORE_Q_CMD_TERMINATE; break; case (RAMROD_CMD_ID_ETH_EMPTY): BLOGD(sc, DBG_SP, "got MULTI[%d] empty ramrod\n", cid); drv_cmd = ECORE_Q_CMD_EMPTY; break; default: BLOGD(sc, DBG_SP, "ERROR: unexpected MC reply (%d) on fp[%d]\n", command, fp->index); return; } if ((drv_cmd != ECORE_Q_CMD_MAX) && q_obj->complete_cmd(sc, q_obj, drv_cmd)) { /* * q_obj->complete_cmd() failure means that this was * an unexpected completion. * * In this case we don't want to increase the sc->spq_left * because apparently we haven't sent this command the first * place. */ // bxe_panic(sc, ("Unexpected SP completion\n")); return; } #if 0 /* SRIOV: reschedule any 'in_progress' operations */ bxe_iov_sp_event(sc, cid, TRUE); #endif atomic_add_acq_long(&sc->cq_spq_left, 1); BLOGD(sc, DBG_SP, "sc->cq_spq_left 0x%lx\n", atomic_load_acq_long(&sc->cq_spq_left)); #if 0 if ((drv_cmd == ECORE_Q_CMD_UPDATE) && (IS_FCOE_FP(fp)) && (!!bxe_test_bit(ECORE_AFEX_FCOE_Q_UPDATE_PENDING, &sc->sp_state))) { /* * If Queue update ramrod is completed for last Queue in AFEX VIF set * flow, then ACK MCP at the end. Mark pending ACK to MCP bit to * prevent case that both bits are cleared. At the end of load/unload * driver checks that sp_state is cleared and this order prevents * races. */ bxe_set_bit(ECORE_AFEX_PENDING_VIFSET_MCP_ACK, &sc->sp_state); wmb(); bxe_clear_bit(ECORE_AFEX_FCOE_Q_UPDATE_PENDING, &sc->sp_state); /* schedule the sp task as MCP ack is required */ bxe_schedule_sp_task(sc); } #endif } /* * The current mbuf is part of an aggregation. Move the mbuf into the TPA * aggregation queue, put an empty mbuf back onto the receive chain, and mark * the current aggregation queue as in-progress. */ static void bxe_tpa_start(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t queue, uint16_t cons, uint16_t prod, struct eth_fast_path_rx_cqe *cqe) { struct bxe_sw_rx_bd tmp_bd; struct bxe_sw_rx_bd *rx_buf; struct eth_rx_bd *rx_bd; int max_agg_queues; struct bxe_sw_tpa_info *tpa_info = &fp->rx_tpa_info[queue]; uint16_t index; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA START " "cons=%d prod=%d\n", fp->index, queue, cons, prod); max_agg_queues = MAX_AGG_QS(sc); KASSERT((queue < max_agg_queues), ("fp[%02d] invalid aggr queue (%d >= %d)!", fp->index, queue, max_agg_queues)); KASSERT((tpa_info->state == BXE_TPA_STATE_STOP), ("fp[%02d].tpa[%02d] starting aggr on queue not stopped!", fp->index, queue)); /* copy the existing mbuf and mapping from the TPA pool */ tmp_bd = tpa_info->bd; if (tmp_bd.m == NULL) { BLOGE(sc, "fp[%02d].tpa[%02d] mbuf not allocated!\n", fp->index, queue); /* XXX Error handling? */ return; } /* change the TPA queue to the start state */ tpa_info->state = BXE_TPA_STATE_START; tpa_info->placement_offset = cqe->placement_offset; tpa_info->parsing_flags = le16toh(cqe->pars_flags.flags); tpa_info->vlan_tag = le16toh(cqe->vlan_tag); tpa_info->len_on_bd = le16toh(cqe->len_on_bd); fp->rx_tpa_queue_used |= (1 << queue); /* * If all the buffer descriptors are filled with mbufs then fill in * the current consumer index with a new BD. Else if a maximum Rx * buffer limit is imposed then fill in the next producer index. */ index = (sc->max_rx_bufs != RX_BD_USABLE) ? prod : cons; /* move the received mbuf and mapping to TPA pool */ tpa_info->bd = fp->rx_mbuf_chain[cons]; /* release any existing RX BD mbuf mappings */ if (cons != index) { rx_buf = &fp->rx_mbuf_chain[cons]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* * We get here when the maximum number of rx buffers is less than * RX_BD_USABLE. The mbuf is already saved above so it's OK to NULL * it out here without concern of a memory leak. */ fp->rx_mbuf_chain[cons].m = NULL; } /* update the Rx SW BD with the mbuf info from the TPA pool */ fp->rx_mbuf_chain[index] = tmp_bd; /* update the Rx BD with the empty mbuf phys address from the TPA pool */ rx_bd = &fp->rx_chain[index]; rx_bd->addr_hi = htole32(U64_HI(tpa_info->seg.ds_addr)); rx_bd->addr_lo = htole32(U64_LO(tpa_info->seg.ds_addr)); } /* * When a TPA aggregation is completed, loop through the individual mbufs * of the aggregation, combining them into a single mbuf which will be sent * up the stack. Refill all freed SGEs with mbufs as we go along. */ static int bxe_fill_frag_mbuf(struct bxe_softc *sc, struct bxe_fastpath *fp, struct bxe_sw_tpa_info *tpa_info, uint16_t queue, uint16_t pages, struct mbuf *m, struct eth_end_agg_rx_cqe *cqe, uint16_t cqe_idx) { struct mbuf *m_frag; uint32_t frag_len, frag_size, i; uint16_t sge_idx; int rc = 0; int j; frag_size = le16toh(cqe->pkt_len) - tpa_info->len_on_bd; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill len_on_bd=%d frag_size=%d pages=%d\n", fp->index, queue, tpa_info->len_on_bd, frag_size, pages); /* make sure the aggregated frame is not too big to handle */ if (pages > 8 * PAGES_PER_SGE) { BLOGE(sc, "fp[%02d].sge[0x%04x] has too many pages (%d)! " "pkt_len=%d len_on_bd=%d frag_size=%d\n", fp->index, cqe_idx, pages, le16toh(cqe->pkt_len), tpa_info->len_on_bd, frag_size); bxe_panic(sc, ("sge page count error\n")); return (EINVAL); } /* * Scan through the scatter gather list pulling individual mbufs into a * single mbuf for the host stack. */ for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) { sge_idx = RX_SGE(le16toh(cqe->sgl_or_raw_data.sgl[j])); /* * Firmware gives the indices of the SGE as if the ring is an array * (meaning that the "next" element will consume 2 indices). */ frag_len = min(frag_size, (uint32_t)(SGE_PAGES)); BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill i=%d j=%d " "sge_idx=%d frag_size=%d frag_len=%d\n", fp->index, queue, i, j, sge_idx, frag_size, frag_len); m_frag = fp->rx_sge_mbuf_chain[sge_idx].m; /* allocate a new mbuf for the SGE */ rc = bxe_alloc_rx_sge_mbuf(fp, sge_idx); if (rc) { /* Leave all remaining SGEs in the ring! */ return (rc); } /* update the fragment length */ m_frag->m_len = frag_len; /* concatenate the fragment to the head mbuf */ m_cat(m, m_frag); fp->eth_q_stats.mbuf_alloc_sge--; /* update the TPA mbuf size and remaining fragment size */ m->m_pkthdr.len += frag_len; frag_size -= frag_len; } BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill done frag_size=%d\n", fp->index, queue, frag_size); return (rc); } static inline void bxe_clear_sge_mask_next_elems(struct bxe_fastpath *fp) { int i, j; for (i = 1; i <= RX_SGE_NUM_PAGES; i++) { int idx = RX_SGE_TOTAL_PER_PAGE * i - 1; for (j = 0; j < 2; j++) { BIT_VEC64_CLEAR_BIT(fp->sge_mask, idx); idx--; } } } static inline void bxe_init_sge_ring_bit_mask(struct bxe_fastpath *fp) { /* set the mask to all 1's, it's faster to compare to 0 than to 0xf's */ memset(fp->sge_mask, 0xff, sizeof(fp->sge_mask)); /* * Clear the two last indices in the page to 1. These are the indices that * correspond to the "next" element, hence will never be indicated and * should be removed from the calculations. */ bxe_clear_sge_mask_next_elems(fp); } static inline void bxe_update_last_max_sge(struct bxe_fastpath *fp, uint16_t idx) { uint16_t last_max = fp->last_max_sge; if (SUB_S16(idx, last_max) > 0) { fp->last_max_sge = idx; } } static inline void bxe_update_sge_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t sge_len, struct eth_end_agg_rx_cqe *cqe) { uint16_t last_max, last_elem, first_elem; uint16_t delta = 0; uint16_t i; if (!sge_len) { return; } /* first mark all used pages */ for (i = 0; i < sge_len; i++) { BIT_VEC64_CLEAR_BIT(fp->sge_mask, RX_SGE(le16toh(cqe->sgl_or_raw_data.sgl[i]))); } BLOGD(sc, DBG_LRO, "fp[%02d] fp_cqe->sgl[%d] = %d\n", fp->index, sge_len - 1, le16toh(cqe->sgl_or_raw_data.sgl[sge_len - 1])); /* assume that the last SGE index is the biggest */ bxe_update_last_max_sge(fp, le16toh(cqe->sgl_or_raw_data.sgl[sge_len - 1])); last_max = RX_SGE(fp->last_max_sge); last_elem = last_max >> BIT_VEC64_ELEM_SHIFT; first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT; /* if ring is not full */ if (last_elem + 1 != first_elem) { last_elem++; } /* now update the prod */ for (i = first_elem; i != last_elem; i = RX_SGE_NEXT_MASK_ELEM(i)) { if (__predict_true(fp->sge_mask[i])) { break; } fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK; delta += BIT_VEC64_ELEM_SZ; } if (delta > 0) { fp->rx_sge_prod += delta; /* clear page-end entries */ bxe_clear_sge_mask_next_elems(fp); } BLOGD(sc, DBG_LRO, "fp[%02d] fp->last_max_sge=%d fp->rx_sge_prod=%d\n", fp->index, fp->last_max_sge, fp->rx_sge_prod); } /* * The aggregation on the current TPA queue has completed. Pull the individual * mbuf fragments together into a single mbuf, perform all necessary checksum * calculations, and send the resuting mbuf to the stack. */ static void bxe_tpa_stop(struct bxe_softc *sc, struct bxe_fastpath *fp, struct bxe_sw_tpa_info *tpa_info, uint16_t queue, uint16_t pages, struct eth_end_agg_rx_cqe *cqe, uint16_t cqe_idx) { struct ifnet *ifp = sc->ifnet; struct mbuf *m; int rc = 0; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] pad=%d pkt_len=%d pages=%d vlan=%d\n", fp->index, queue, tpa_info->placement_offset, le16toh(cqe->pkt_len), pages, tpa_info->vlan_tag); m = tpa_info->bd.m; /* allocate a replacement before modifying existing mbuf */ rc = bxe_alloc_rx_tpa_mbuf(fp, queue); if (rc) { /* drop the frame and log an error */ fp->eth_q_stats.rx_soft_errors++; goto bxe_tpa_stop_exit; } /* we have a replacement, fixup the current mbuf */ m_adj(m, tpa_info->placement_offset); m->m_pkthdr.len = m->m_len = tpa_info->len_on_bd; /* mark the checksums valid (taken care of by the firmware) */ fp->eth_q_stats.rx_ofld_frames_csum_ip++; fp->eth_q_stats.rx_ofld_frames_csum_tcp_udp++; m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); /* aggregate all of the SGEs into a single mbuf */ rc = bxe_fill_frag_mbuf(sc, fp, tpa_info, queue, pages, m, cqe, cqe_idx); if (rc) { /* drop the packet and log an error */ fp->eth_q_stats.rx_soft_errors++; m_freem(m); } else { if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN) { m->m_pkthdr.ether_vtag = tpa_info->vlan_tag; m->m_flags |= M_VLANTAG; } /* assign packet to this interface interface */ m->m_pkthdr.rcvif = ifp; #if __FreeBSD_version >= 800000 /* specify what RSS queue was used for this flow */ m->m_pkthdr.flowid = fp->index; m->m_flags |= M_FLOWID; #endif ifp->if_ipackets++; fp->eth_q_stats.rx_tpa_pkts++; /* pass the frame to the stack */ (*ifp->if_input)(ifp, m); } /* we passed an mbuf up the stack or dropped the frame */ fp->eth_q_stats.mbuf_alloc_tpa--; bxe_tpa_stop_exit: fp->rx_tpa_info[queue].state = BXE_TPA_STATE_STOP; fp->rx_tpa_queue_used &= ~(1 << queue); } static uint8_t bxe_rxeof(struct bxe_softc *sc, struct bxe_fastpath *fp) { struct ifnet *ifp = sc->ifnet; uint16_t bd_cons, bd_prod, bd_prod_fw, comp_ring_cons; uint16_t hw_cq_cons, sw_cq_cons, sw_cq_prod; int rx_pkts = 0; int rc; BXE_FP_RX_LOCK(fp); /* CQ "next element" is of the size of the regular element */ hw_cq_cons = le16toh(*fp->rx_cq_cons_sb); if ((hw_cq_cons & RCQ_USABLE_PER_PAGE) == RCQ_USABLE_PER_PAGE) { hw_cq_cons++; } bd_cons = fp->rx_bd_cons; bd_prod = fp->rx_bd_prod; bd_prod_fw = bd_prod; sw_cq_cons = fp->rx_cq_cons; sw_cq_prod = fp->rx_cq_prod; /* * Memory barrier necessary as speculative reads of the rx * buffer can be ahead of the index in the status block */ rmb(); BLOGD(sc, DBG_RX, "fp[%02d] Rx START hw_cq_cons=%u sw_cq_cons=%u\n", fp->index, hw_cq_cons, sw_cq_cons); while (sw_cq_cons != hw_cq_cons) { struct bxe_sw_rx_bd *rx_buf = NULL; union eth_rx_cqe *cqe; struct eth_fast_path_rx_cqe *cqe_fp; uint8_t cqe_fp_flags; enum eth_rx_cqe_type cqe_fp_type; uint16_t len, pad; struct mbuf *m = NULL; comp_ring_cons = RCQ(sw_cq_cons); bd_prod = RX_BD(bd_prod); bd_cons = RX_BD(bd_cons); cqe = &fp->rcq_chain[comp_ring_cons]; cqe_fp = &cqe->fast_path_cqe; cqe_fp_flags = cqe_fp->type_error_flags; cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE; BLOGD(sc, DBG_RX, "fp[%02d] Rx hw_cq_cons=%d hw_sw_cons=%d " "BD prod=%d cons=%d CQE type=0x%x err=0x%x " "status=0x%x rss_hash=0x%x vlan=0x%x len=%u\n", fp->index, hw_cq_cons, sw_cq_cons, bd_prod, bd_cons, CQE_TYPE(cqe_fp_flags), cqe_fp_flags, cqe_fp->status_flags, le32toh(cqe_fp->rss_hash_result), le16toh(cqe_fp->vlan_tag), le16toh(cqe_fp->pkt_len_or_gro_seg_len)); /* is this a slowpath msg? */ if (__predict_false(CQE_TYPE_SLOW(cqe_fp_type))) { bxe_sp_event(sc, fp, cqe); goto next_cqe; } rx_buf = &fp->rx_mbuf_chain[bd_cons]; if (!CQE_TYPE_FAST(cqe_fp_type)) { struct bxe_sw_tpa_info *tpa_info; uint16_t frag_size, pages; uint8_t queue; #if 0 /* sanity check */ if (!fp->tpa_enable && (CQE_TYPE_START(cqe_fp_type) || CQE_TYPE_STOP(cqe_fp_type))) { BLOGE(sc, "START/STOP packet while !tpa_enable type (0x%x)\n", CQE_TYPE(cqe_fp_type)); } #endif if (CQE_TYPE_START(cqe_fp_type)) { bxe_tpa_start(sc, fp, cqe_fp->queue_index, bd_cons, bd_prod, cqe_fp); m = NULL; /* packet not ready yet */ goto next_rx; } KASSERT(CQE_TYPE_STOP(cqe_fp_type), ("CQE type is not STOP! (0x%x)\n", cqe_fp_type)); queue = cqe->end_agg_cqe.queue_index; tpa_info = &fp->rx_tpa_info[queue]; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA STOP\n", fp->index, queue); frag_size = (le16toh(cqe->end_agg_cqe.pkt_len) - tpa_info->len_on_bd); pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT; bxe_tpa_stop(sc, fp, tpa_info, queue, pages, &cqe->end_agg_cqe, comp_ring_cons); bxe_update_sge_prod(sc, fp, pages, &cqe->end_agg_cqe); goto next_cqe; } /* non TPA */ /* is this an error packet? */ if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG)) { BLOGE(sc, "flags 0x%x rx packet %u\n", cqe_fp_flags, sw_cq_cons); fp->eth_q_stats.rx_soft_errors++; goto next_rx; } len = le16toh(cqe_fp->pkt_len_or_gro_seg_len); pad = cqe_fp->placement_offset; m = rx_buf->m; if (__predict_false(m == NULL)) { BLOGE(sc, "No mbuf in rx chain descriptor %d for fp[%02d]\n", bd_cons, fp->index); goto next_rx; } /* XXX double copy if packet length under a threshold */ /* * If all the buffer descriptors are filled with mbufs then fill in * the current consumer index with a new BD. Else if a maximum Rx * buffer limit is imposed then fill in the next producer index. */ rc = bxe_alloc_rx_bd_mbuf(fp, bd_cons, (sc->max_rx_bufs != RX_BD_USABLE) ? bd_prod : bd_cons); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] rx chain (%d)\n", fp->index, rc); fp->eth_q_stats.rx_soft_errors++; if (sc->max_rx_bufs != RX_BD_USABLE) { /* copy this consumer index to the producer index */ memcpy(&fp->rx_mbuf_chain[bd_prod], rx_buf, sizeof(struct bxe_sw_rx_bd)); memset(rx_buf, 0, sizeof(struct bxe_sw_rx_bd)); } goto next_rx; } /* current mbuf was detached from the bd */ fp->eth_q_stats.mbuf_alloc_rx--; /* we allocated a replacement mbuf, fixup the current one */ m_adj(m, pad); m->m_pkthdr.len = m->m_len = len; /* assign packet to this interface interface */ m->m_pkthdr.rcvif = ifp; /* assume no hardware checksum has complated */ m->m_pkthdr.csum_flags = 0; /* validate checksum if offload enabled */ if (ifp->if_capenable & IFCAP_RXCSUM) { /* check for a valid IP frame */ if (!(cqe->fast_path_cqe.status_flags & ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG)) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG)) { fp->eth_q_stats.rx_hw_csum_errors++; } else { fp->eth_q_stats.rx_ofld_frames_csum_ip++; m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } } /* check for a valid TCP/UDP frame */ if (!(cqe->fast_path_cqe.status_flags & ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)) { if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)) { fp->eth_q_stats.rx_hw_csum_errors++; } else { fp->eth_q_stats.rx_ofld_frames_csum_tcp_udp++; m->m_pkthdr.csum_data = 0xFFFF; m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } } } /* if there is a VLAN tag then flag that info */ if (cqe->fast_path_cqe.pars_flags.flags & PARSING_FLAGS_VLAN) { m->m_pkthdr.ether_vtag = cqe->fast_path_cqe.vlan_tag; m->m_flags |= M_VLANTAG; } #if __FreeBSD_version >= 800000 /* specify what RSS queue was used for this flow */ m->m_pkthdr.flowid = fp->index; m->m_flags |= M_FLOWID; #endif next_rx: bd_cons = RX_BD_NEXT(bd_cons); bd_prod = RX_BD_NEXT(bd_prod); bd_prod_fw = RX_BD_NEXT(bd_prod_fw); /* pass the frame to the stack */ if (__predict_true(m != NULL)) { ifp->if_ipackets++; rx_pkts++; (*ifp->if_input)(ifp, m); } next_cqe: sw_cq_prod = RCQ_NEXT(sw_cq_prod); sw_cq_cons = RCQ_NEXT(sw_cq_cons); /* limit spinning on the queue */ if (rx_pkts == sc->rx_budget) { fp->eth_q_stats.rx_budget_reached++; break; } } /* while work to do */ fp->rx_bd_cons = bd_cons; fp->rx_bd_prod = bd_prod_fw; fp->rx_cq_cons = sw_cq_cons; fp->rx_cq_prod = sw_cq_prod; /* Update producers */ bxe_update_rx_prod(sc, fp, bd_prod_fw, sw_cq_prod, fp->rx_sge_prod); fp->eth_q_stats.rx_pkts += rx_pkts; fp->eth_q_stats.rx_calls++; BXE_FP_RX_UNLOCK(fp); return (sw_cq_cons != hw_cq_cons); } static uint16_t bxe_free_tx_pkt(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t idx) { struct bxe_sw_tx_bd *tx_buf = &fp->tx_mbuf_chain[idx]; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_idx = TX_BD(tx_buf->first_bd); uint16_t new_cons; int nbd; /* unmap the mbuf from non-paged memory */ bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); tx_start_bd = &fp->tx_chain[bd_idx].start_bd; nbd = le16toh(tx_start_bd->nbd) - 1; #if 0 if ((nbd - 1) > (MAX_MBUF_FRAGS + 2)) { bxe_panic(sc, ("BAD nbd!\n")); } #endif new_cons = (tx_buf->first_bd + nbd); #if 0 struct eth_tx_bd *tx_data_bd; /* * The following code doesn't do anything but is left here * for clarity on what the new value of new_cons skipped. */ /* get the next bd */ bd_idx = TX_BD(TX_BD_NEXT(bd_idx)); /* skip the parse bd */ --nbd; bd_idx = TX_BD(TX_BD_NEXT(bd_idx)); /* skip the TSO split header bd since they have no mapping */ if (tx_buf->flags & BXE_TSO_SPLIT_BD) { --nbd; bd_idx = TX_BD(TX_BD_NEXT(bd_idx)); } /* now free frags */ while (nbd > 0) { tx_data_bd = &fp->tx_chain[bd_idx].reg_bd; if (--nbd) { bd_idx = TX_BD(TX_BD_NEXT(bd_idx)); } } #endif /* free the mbuf */ if (__predict_true(tx_buf->m != NULL)) { m_freem(tx_buf->m); fp->eth_q_stats.mbuf_alloc_tx--; } else { fp->eth_q_stats.tx_chain_lost_mbuf++; } tx_buf->m = NULL; tx_buf->first_bd = 0; return (new_cons); } /* transmit timeout watchdog */ static int bxe_watchdog(struct bxe_softc *sc, struct bxe_fastpath *fp) { BXE_FP_TX_LOCK(fp); if ((fp->watchdog_timer == 0) || (--fp->watchdog_timer)) { BXE_FP_TX_UNLOCK(fp); return (0); } BLOGE(sc, "TX watchdog timeout on fp[%02d], resetting!\n", fp->index); BXE_FP_TX_UNLOCK(fp); atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_REINIT); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); return (-1); } /* processes transmit completions */ static uint8_t bxe_txeof(struct bxe_softc *sc, struct bxe_fastpath *fp) { struct ifnet *ifp = sc->ifnet; uint16_t bd_cons, hw_cons, sw_cons, pkt_cons; uint16_t tx_bd_avail; BXE_FP_TX_LOCK_ASSERT(fp); bd_cons = fp->tx_bd_cons; hw_cons = le16toh(*fp->tx_cons_sb); sw_cons = fp->tx_pkt_cons; while (sw_cons != hw_cons) { pkt_cons = TX_BD(sw_cons); BLOGD(sc, DBG_TX, "TX: fp[%d]: hw_cons=%u sw_cons=%u pkt_cons=%u\n", fp->index, hw_cons, sw_cons, pkt_cons); bd_cons = bxe_free_tx_pkt(sc, fp, pkt_cons); sw_cons++; } fp->tx_pkt_cons = sw_cons; fp->tx_bd_cons = bd_cons; BLOGD(sc, DBG_TX, "TX done: fp[%d]: hw_cons=%u sw_cons=%u sw_prod=%u\n", fp->index, hw_cons, fp->tx_pkt_cons, fp->tx_pkt_prod); mb(); tx_bd_avail = bxe_tx_avail(sc, fp); if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; } else { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } if (fp->tx_pkt_prod != fp->tx_pkt_cons) { /* reset the watchdog timer if there are pending transmits */ fp->watchdog_timer = BXE_TX_TIMEOUT; return (TRUE); } else { /* clear watchdog when there are no pending transmits */ fp->watchdog_timer = 0; return (FALSE); } } static void bxe_drain_tx_queues(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i, count; /* wait until all TX fastpath tasks have completed */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; count = 1000; while (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); if (count == 0) { BLOGE(sc, "Timeout waiting for fp[%d] " "transmits to complete!\n", i); bxe_panic(sc, ("tx drain failure\n")); return; } count--; DELAY(1000); rmb(); } } return; } static int bxe_del_all_macs(struct bxe_softc *sc, struct ecore_vlan_mac_obj *mac_obj, int mac_type, uint8_t wait_for_comp) { unsigned long ramrod_flags = 0, vlan_mac_flags = 0; int rc; /* wait for completion of requested */ if (wait_for_comp) { bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); } /* Set the mac type of addresses we want to clear */ bxe_set_bit(mac_type, &vlan_mac_flags); rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc < 0) { BLOGE(sc, "Failed to delete MACs (%d)\n", rc); } return (rc); } static int bxe_fill_accept_flags(struct bxe_softc *sc, uint32_t rx_mode, unsigned long *rx_accept_flags, unsigned long *tx_accept_flags) { /* Clear the flags first */ *rx_accept_flags = 0; *tx_accept_flags = 0; switch (rx_mode) { case BXE_RX_MODE_NONE: /* * 'drop all' supersedes any accept flags that may have been * passed to the function. */ break; case BXE_RX_MODE_NORMAL: bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); break; case BXE_RX_MODE_ALLMULTI: bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); break; case BXE_RX_MODE_PROMISC: /* * According to deffinition of SI mode, iface in promisc mode * should receive matched and unmatched (in resolution of port) * unicast packets. */ bxe_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); if (IS_MF_SI(sc)) { bxe_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags); } else { bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); } break; default: BLOGE(sc, "Unknown rx_mode (%d)\n", rx_mode); return (-1); } /* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */ if (rx_mode != BXE_RX_MODE_NONE) { bxe_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags); } return (0); } static int bxe_set_q_rx_mode(struct bxe_softc *sc, uint8_t cl_id, unsigned long rx_mode_flags, unsigned long rx_accept_flags, unsigned long tx_accept_flags, unsigned long ramrod_flags) { struct ecore_rx_mode_ramrod_params ramrod_param; int rc; memset(&ramrod_param, 0, sizeof(ramrod_param)); /* Prepare ramrod parameters */ ramrod_param.cid = 0; ramrod_param.cl_id = cl_id; ramrod_param.rx_mode_obj = &sc->rx_mode_obj; ramrod_param.func_id = SC_FUNC(sc); ramrod_param.pstate = &sc->sp_state; ramrod_param.state = ECORE_FILTER_RX_MODE_PENDING; ramrod_param.rdata = BXE_SP(sc, rx_mode_rdata); ramrod_param.rdata_mapping = BXE_SP_MAPPING(sc, rx_mode_rdata); bxe_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state); ramrod_param.ramrod_flags = ramrod_flags; ramrod_param.rx_mode_flags = rx_mode_flags; ramrod_param.rx_accept_flags = rx_accept_flags; ramrod_param.tx_accept_flags = tx_accept_flags; rc = ecore_config_rx_mode(sc, &ramrod_param); if (rc < 0) { BLOGE(sc, "Set rx_mode %d failed\n", sc->rx_mode); return (rc); } return (0); } static int bxe_set_storm_rx_mode(struct bxe_softc *sc) { unsigned long rx_mode_flags = 0, ramrod_flags = 0; unsigned long rx_accept_flags = 0, tx_accept_flags = 0; int rc; rc = bxe_fill_accept_flags(sc, sc->rx_mode, &rx_accept_flags, &tx_accept_flags); if (rc) { return (rc); } bxe_set_bit(RAMROD_RX, &ramrod_flags); bxe_set_bit(RAMROD_TX, &ramrod_flags); /* XXX ensure all fastpath have same cl_id and/or move it to bxe_softc */ return (bxe_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags, rx_accept_flags, tx_accept_flags, ramrod_flags)); } /* returns the "mcp load_code" according to global load_count array */ static int bxe_nic_load_no_mcp(struct bxe_softc *sc) { int path = SC_PATH(sc); int port = SC_PORT(sc); BLOGI(sc, "NO MCP - load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); load_count[path][0]++; load_count[path][1 + port]++; BLOGI(sc, "NO MCP - new load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); if (load_count[path][0] == 1) { return (FW_MSG_CODE_DRV_LOAD_COMMON); } else if (load_count[path][1 + port] == 1) { return (FW_MSG_CODE_DRV_LOAD_PORT); } else { return (FW_MSG_CODE_DRV_LOAD_FUNCTION); } } /* returns the "mcp load_code" according to global load_count array */ static int bxe_nic_unload_no_mcp(struct bxe_softc *sc) { int port = SC_PORT(sc); int path = SC_PATH(sc); BLOGI(sc, "NO MCP - load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); load_count[path][0]--; load_count[path][1 + port]--; BLOGI(sc, "NO MCP - new load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); if (load_count[path][0] == 0) { return (FW_MSG_CODE_DRV_UNLOAD_COMMON); } else if (load_count[path][1 + port] == 0) { return (FW_MSG_CODE_DRV_UNLOAD_PORT); } else { return (FW_MSG_CODE_DRV_UNLOAD_FUNCTION); } } /* request unload mode from the MCP: COMMON, PORT or FUNCTION */ static uint32_t bxe_send_unload_req(struct bxe_softc *sc, int unload_mode) { uint32_t reset_code = 0; #if 0 int port = SC_PORT(sc); int path = SC_PATH(sc); #endif /* Select the UNLOAD request mode */ if (unload_mode == UNLOAD_NORMAL) { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; } #if 0 else if (sc->flags & BXE_NO_WOL_FLAG) { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP; } else if (sc->wol) { uint32_t emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0; uint8_t *mac_addr = sc->dev->dev_addr; uint32_t val; uint16_t pmc; /* * The mac address is written to entries 1-4 to * preserve entry 0 which is used by the PMF */ uint8_t entry = (SC_VN(sc) + 1)*8; val = (mac_addr[0] << 8) | mac_addr[1]; EMAC_WR(sc, EMAC_REG_EMAC_MAC_MATCH + entry, val); val = (mac_addr[2] << 24) | (mac_addr[3] << 16) | (mac_addr[4] << 8) | mac_addr[5]; EMAC_WR(sc, EMAC_REG_EMAC_MAC_MATCH + entry + 4, val); /* Enable the PME and clear the status */ pmc = pci_read_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), 2); pmc |= PCIM_PSTAT_PMEENABLE | PCIM_PSTAT_PME; pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), pmc, 4); reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_EN; } #endif else { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; } /* Send the request to the MCP */ if (!BXE_NOMCP(sc)) { reset_code = bxe_fw_command(sc, reset_code, 0); } else { reset_code = bxe_nic_unload_no_mcp(sc); } return (reset_code); } /* send UNLOAD_DONE command to the MCP */ static void bxe_send_unload_done(struct bxe_softc *sc, uint8_t keep_link) { uint32_t reset_param = keep_link ? DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET : 0; /* Report UNLOAD_DONE to MCP */ if (!BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, reset_param); } } static int bxe_func_wait_started(struct bxe_softc *sc) { int tout = 50; if (!sc->port.pmf) { return (0); } /* * (assumption: No Attention from MCP at this stage) * PMF probably in the middle of TX disable/enable transaction * 1. Sync IRS for default SB * 2. Sync SP queue - this guarantees us that attention handling started * 3. Wait, that TX disable/enable transaction completes * * 1+2 guarantee that if DCBX attention was scheduled it already changed * pending bit of transaction from STARTED-->TX_STOPPED, if we already * received completion for the transaction the state is TX_STOPPED. * State will return to STARTED after completion of TX_STOPPED-->STARTED * transaction. */ /* XXX make sure default SB ISR is done */ /* need a way to synchronize an irq (intr_mtx?) */ /* XXX flush any work queues */ while (ecore_func_get_state(sc, &sc->func_obj) != ECORE_F_STATE_STARTED && tout--) { DELAY(20000); } if (ecore_func_get_state(sc, &sc->func_obj) != ECORE_F_STATE_STARTED) { /* * Failed to complete the transaction in a "good way" * Force both transactions with CLR bit. */ struct ecore_func_state_params func_params = { NULL }; BLOGE(sc, "Unexpected function state! " "Forcing STARTED-->TX_STOPPED-->STARTED\n"); func_params.f_obj = &sc->func_obj; bxe_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags); /* STARTED-->TX_STOPPED */ func_params.cmd = ECORE_F_CMD_TX_STOP; ecore_func_state_change(sc, &func_params); /* TX_STOPPED-->STARTED */ func_params.cmd = ECORE_F_CMD_TX_START; return (ecore_func_state_change(sc, &func_params)); } return (0); } static int bxe_stop_queue(struct bxe_softc *sc, int index) { struct bxe_fastpath *fp = &sc->fp[index]; struct ecore_queue_state_params q_params = { NULL }; int rc; BLOGD(sc, DBG_LOAD, "stopping queue %d cid %d\n", index, fp->index); q_params.q_obj = &sc->sp_objs[fp->index].q_obj; /* We want to wait for completion in this context */ bxe_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); /* Stop the primary connection: */ /* ...halt the connection */ q_params.cmd = ECORE_Q_CMD_HALT; rc = ecore_queue_state_change(sc, &q_params); if (rc) { return (rc); } /* ...terminate the connection */ q_params.cmd = ECORE_Q_CMD_TERMINATE; memset(&q_params.params.terminate, 0, sizeof(q_params.params.terminate)); q_params.params.terminate.cid_index = FIRST_TX_COS_INDEX; rc = ecore_queue_state_change(sc, &q_params); if (rc) { return (rc); } /* ...delete cfc entry */ q_params.cmd = ECORE_Q_CMD_CFC_DEL; memset(&q_params.params.cfc_del, 0, sizeof(q_params.params.cfc_del)); q_params.params.cfc_del.cid_index = FIRST_TX_COS_INDEX; return (ecore_queue_state_change(sc, &q_params)); } /* wait for the outstanding SP commands */ static inline uint8_t bxe_wait_sp_comp(struct bxe_softc *sc, unsigned long mask) { unsigned long tmp; int tout = 5000; /* wait for 5 secs tops */ while (tout--) { mb(); if (!(atomic_load_acq_long(&sc->sp_state) & mask)) { return (TRUE); } DELAY(1000); } mb(); tmp = atomic_load_acq_long(&sc->sp_state); if (tmp & mask) { BLOGE(sc, "Filtering completion timed out: " "sp_state 0x%lx, mask 0x%lx\n", tmp, mask); return (FALSE); } return (FALSE); } static int bxe_func_stop(struct bxe_softc *sc) { struct ecore_func_state_params func_params = { NULL }; int rc; /* prepare parameters for function state transitions */ bxe_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_STOP; /* * Try to stop the function the 'good way'. If it fails (in case * of a parity error during bxe_chip_cleanup()) and we are * not in a debug mode, perform a state transaction in order to * enable further HW_RESET transaction. */ rc = ecore_func_state_change(sc, &func_params); if (rc) { BLOGE(sc, "FUNC_STOP ramrod failed. " "Running a dry transaction\n"); bxe_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags); return (ecore_func_state_change(sc, &func_params)); } return (0); } static int bxe_reset_hw(struct bxe_softc *sc, uint32_t load_code) { struct ecore_func_state_params func_params = { NULL }; /* Prepare parameters for function state transitions */ bxe_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_HW_RESET; func_params.params.hw_init.load_phase = load_code; return (ecore_func_state_change(sc, &func_params)); } static void bxe_int_disable_sync(struct bxe_softc *sc, int disable_hw) { if (disable_hw) { /* prevent the HW from sending interrupts */ bxe_int_disable(sc); } /* XXX need a way to synchronize ALL irqs (intr_mtx?) */ /* make sure all ISRs are done */ /* XXX make sure sp_task is not running */ /* cancel and flush work queues */ } static void bxe_chip_cleanup(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link) { int port = SC_PORT(sc); struct ecore_mcast_ramrod_params rparam = { NULL }; uint32_t reset_code; int i, rc = 0; bxe_drain_tx_queues(sc); /* give HW time to discard old tx messages */ DELAY(1000); /* Clean all ETH MACs */ rc = bxe_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_ETH_MAC, FALSE); if (rc < 0) { BLOGE(sc, "Failed to delete all ETH MACs (%d)\n", rc); } /* Clean up UC list */ rc = bxe_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_UC_LIST_MAC, TRUE); if (rc < 0) { BLOGE(sc, "Failed to delete UC MACs list (%d)\n", rc); } /* Disable LLH */ if (!CHIP_IS_E1(sc)) { REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 0); } /* Set "drop all" to stop Rx */ /* * We need to take the BXE_MCAST_LOCK() here in order to prevent * a race between the completion code and this code. */ BXE_MCAST_LOCK(sc); if (bxe_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) { bxe_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state); } else { bxe_set_storm_rx_mode(sc); } /* Clean up multicast configuration */ rparam.mcast_obj = &sc->mcast_obj; rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to send DEL MCAST command (%d)\n", rc); } BXE_MCAST_UNLOCK(sc); // XXX bxe_iov_chip_cleanup(sc); /* * Send the UNLOAD_REQUEST to the MCP. This will return if * this function should perform FUNCTION, PORT, or COMMON HW * reset. */ reset_code = bxe_send_unload_req(sc, unload_mode); /* * (assumption: No Attention from MCP at this stage) * PMF probably in the middle of TX disable/enable transaction */ rc = bxe_func_wait_started(sc); if (rc) { BLOGE(sc, "bxe_func_wait_started failed\n"); } /* * Close multi and leading connections * Completions for ramrods are collected in a synchronous way */ for (i = 0; i < sc->num_queues; i++) { if (bxe_stop_queue(sc, i)) { goto unload_error; } } /* * If SP settings didn't get completed so far - something * very wrong has happen. */ if (!bxe_wait_sp_comp(sc, ~0x0UL)) { BLOGE(sc, "Common slow path ramrods got stuck!\n"); } unload_error: rc = bxe_func_stop(sc); if (rc) { BLOGE(sc, "Function stop failed!\n"); } /* disable HW interrupts */ bxe_int_disable_sync(sc, TRUE); /* detach interrupts */ bxe_interrupt_detach(sc); /* Reset the chip */ rc = bxe_reset_hw(sc, reset_code); if (rc) { BLOGE(sc, "Hardware reset failed\n"); } /* Report UNLOAD_DONE to MCP */ bxe_send_unload_done(sc, keep_link); } static void bxe_disable_close_the_gate(struct bxe_softc *sc) { uint32_t val; int port = SC_PORT(sc); BLOGD(sc, DBG_LOAD, "Disabling 'close the gates'\n"); if (CHIP_IS_E1(sc)) { uint32_t addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; val = REG_RD(sc, addr); val &= ~(0x300); REG_WR(sc, addr, val); } else { val = REG_RD(sc, MISC_REG_AEU_GENERAL_MASK); val &= ~(MISC_AEU_GENERAL_MASK_REG_AEU_PXP_CLOSE_MASK | MISC_AEU_GENERAL_MASK_REG_AEU_NIG_CLOSE_MASK); REG_WR(sc, MISC_REG_AEU_GENERAL_MASK, val); } } /* * Cleans the object that have internal lists without sending * ramrods. Should be run when interrutps are disabled. */ static void bxe_squeeze_objects(struct bxe_softc *sc) { unsigned long ramrod_flags = 0, vlan_mac_flags = 0; struct ecore_mcast_ramrod_params rparam = { NULL }; struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj; int rc; /* Cleanup MACs' object first... */ /* Wait for completion of requested */ bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Perform a dry cleanup */ bxe_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags); /* Clean ETH primary MAC */ bxe_set_bit(ECORE_ETH_MAC, &vlan_mac_flags); rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc != 0) { BLOGE(sc, "Failed to clean ETH MACs (%d)\n", rc); } /* Cleanup UC list */ vlan_mac_flags = 0; bxe_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags); rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc != 0) { BLOGE(sc, "Failed to clean UC list MACs (%d)\n", rc); } /* Now clean mcast object... */ rparam.mcast_obj = &sc->mcast_obj; bxe_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags); /* Add a DEL command... */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to send DEL MCAST command (%d)\n", rc); } /* now wait until all pending commands are cleared */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); while (rc != 0) { if (rc < 0) { BLOGE(sc, "Failed to clean MCAST object (%d)\n", rc); return; } rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); } } /* stop the controller */ static __noinline int bxe_nic_unload(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link) { uint8_t global = FALSE; uint32_t val; BXE_CORE_LOCK_ASSERT(sc); BLOGD(sc, DBG_LOAD, "Starting NIC unload...\n"); /* mark driver as unloaded in shmem2 */ if (IS_PF(sc) && SHMEM2_HAS(sc, drv_capabilities_flag)) { val = SHMEM2_RD(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)]); SHMEM2_WR(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)], val & ~DRV_FLAGS_CAPABILITIES_LOADED_L2); } if (IS_PF(sc) && sc->recovery_state != BXE_RECOVERY_DONE && (sc->state == BXE_STATE_CLOSED || sc->state == BXE_STATE_ERROR)) { /* * We can get here if the driver has been unloaded * during parity error recovery and is either waiting for a * leader to complete or for other functions to unload and * then ifconfig down has been issued. In this case we want to * unload and let other functions to complete a recovery * process. */ sc->recovery_state = BXE_RECOVERY_DONE; sc->is_leader = 0; bxe_release_leader_lock(sc); mb(); BLOGD(sc, DBG_LOAD, "Releasing a leadership...\n"); BLOGE(sc, "Can't unload in closed or error state\n"); return (-1); } /* * Nothing to do during unload if previous bxe_nic_load() * did not completed succesfully - all resourses are released. */ if ((sc->state == BXE_STATE_CLOSED) || (sc->state == BXE_STATE_ERROR)) { return (0); } sc->state = BXE_STATE_CLOSING_WAITING_HALT; mb(); /* stop tx */ bxe_tx_disable(sc); sc->rx_mode = BXE_RX_MODE_NONE; /* XXX set rx mode ??? */ if (IS_PF(sc)) { /* set ALWAYS_ALIVE bit in shmem */ sc->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; bxe_drv_pulse(sc); bxe_stats_handle(sc, STATS_EVENT_STOP); bxe_save_statistics(sc); } /* wait till consumers catch up with producers in all queues */ bxe_drain_tx_queues(sc); /* if VF indicate to PF this function is going down (PF will delete sp * elements and clear initializations */ if (IS_VF(sc)) { ; /* bxe_vfpf_close_vf(sc); */ } else if (unload_mode != UNLOAD_RECOVERY) { /* if this is a normal/close unload need to clean up chip */ bxe_chip_cleanup(sc, unload_mode, keep_link); } else { /* Send the UNLOAD_REQUEST to the MCP */ bxe_send_unload_req(sc, unload_mode); /* * Prevent transactions to host from the functions on the * engine that doesn't reset global blocks in case of global * attention once gloabl blocks are reset and gates are opened * (the engine which leader will perform the recovery * last). */ if (!CHIP_IS_E1x(sc)) { bxe_pf_disable(sc); } /* disable HW interrupts */ bxe_int_disable_sync(sc, TRUE); /* detach interrupts */ bxe_interrupt_detach(sc); /* Report UNLOAD_DONE to MCP */ bxe_send_unload_done(sc, FALSE); } /* * At this stage no more interrupts will arrive so we may safely clean * the queue'able objects here in case they failed to get cleaned so far. */ if (IS_PF(sc)) { bxe_squeeze_objects(sc); } /* There should be no more pending SP commands at this stage */ sc->sp_state = 0; sc->port.pmf = 0; bxe_free_fp_buffers(sc); if (IS_PF(sc)) { bxe_free_mem(sc); } bxe_free_fw_stats_mem(sc); sc->state = BXE_STATE_CLOSED; /* * Check if there are pending parity attentions. If there are - set * RECOVERY_IN_PROGRESS. */ if (IS_PF(sc) && bxe_chk_parity_attn(sc, &global, FALSE)) { bxe_set_reset_in_progress(sc); /* Set RESET_IS_GLOBAL if needed */ if (global) { bxe_set_reset_global(sc); } } /* * The last driver must disable a "close the gate" if there is no * parity attention or "process kill" pending. */ if (IS_PF(sc) && !bxe_clear_pf_load(sc) && bxe_reset_is_done(sc, SC_PATH(sc))) { bxe_disable_close_the_gate(sc); } BLOGD(sc, DBG_LOAD, "Ended NIC unload\n"); return (0); } /* * Called by the OS to set various media options (i.e. link, speed, etc.) when * the user runs "ifconfig bxe media ..." or "ifconfig bxe mediaopt ...". */ static int bxe_ifmedia_update(struct ifnet *ifp) { struct bxe_softc *sc = (struct bxe_softc *)ifp->if_softc; struct ifmedia *ifm; ifm = &sc->ifmedia; /* We only support Ethernet media type. */ if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) { return (EINVAL); } switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_CX4: case IFM_10G_SR: case IFM_10G_T: case IFM_10G_TWINAX: default: /* We don't support changing the media type. */ BLOGD(sc, DBG_LOAD, "Invalid media type (%d)\n", IFM_SUBTYPE(ifm->ifm_media)); return (EINVAL); } return (0); } /* * Called by the OS to get the current media status (i.e. link, speed, etc.). */ static void bxe_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct bxe_softc *sc = ifp->if_softc; /* Report link down if the driver isn't running. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { ifmr->ifm_active |= IFM_NONE; return; } /* Setup the default interface info. */ ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (sc->link_vars.link_up) { ifmr->ifm_status |= IFM_ACTIVE; } else { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_active |= sc->media; if (sc->link_vars.duplex == DUPLEX_FULL) { ifmr->ifm_active |= IFM_FDX; } else { ifmr->ifm_active |= IFM_HDX; } } static int bxe_ioctl_nvram(struct bxe_softc *sc, uint32_t priv_op, struct ifreq *ifr) { struct bxe_nvram_data nvdata_base; struct bxe_nvram_data *nvdata; int len; int error = 0; copyin(ifr->ifr_data, &nvdata_base, sizeof(nvdata_base)); len = (sizeof(struct bxe_nvram_data) + nvdata_base.len - sizeof(uint32_t)); if (len > sizeof(struct bxe_nvram_data)) { if ((nvdata = (struct bxe_nvram_data *) malloc(len, M_DEVBUF, (M_NOWAIT | M_ZERO))) == NULL) { BLOGE(sc, "BXE_IOC_RD_NVRAM malloc failed\n"); return (1); } memcpy(nvdata, &nvdata_base, sizeof(struct bxe_nvram_data)); } else { nvdata = &nvdata_base; } if (priv_op == BXE_IOC_RD_NVRAM) { BLOGD(sc, DBG_IOCTL, "IOC_RD_NVRAM 0x%x %d\n", nvdata->offset, nvdata->len); error = bxe_nvram_read(sc, nvdata->offset, (uint8_t *)nvdata->value, nvdata->len); copyout(nvdata, ifr->ifr_data, len); } else { /* BXE_IOC_WR_NVRAM */ BLOGD(sc, DBG_IOCTL, "IOC_WR_NVRAM 0x%x %d\n", nvdata->offset, nvdata->len); copyin(ifr->ifr_data, nvdata, len); error = bxe_nvram_write(sc, nvdata->offset, (uint8_t *)nvdata->value, nvdata->len); } if (len > sizeof(struct bxe_nvram_data)) { free(nvdata, M_DEVBUF); } return (error); } static int bxe_ioctl_stats_show(struct bxe_softc *sc, uint32_t priv_op, struct ifreq *ifr) { const size_t str_size = (BXE_NUM_ETH_STATS * STAT_NAME_LEN); const size_t stats_size = (BXE_NUM_ETH_STATS * sizeof(uint64_t)); caddr_t p_tmp; uint32_t *offset; int i; switch (priv_op) { case BXE_IOC_STATS_SHOW_NUM: memset(ifr->ifr_data, 0, sizeof(union bxe_stats_show_data)); ((union bxe_stats_show_data *)ifr->ifr_data)->desc.num = BXE_NUM_ETH_STATS; ((union bxe_stats_show_data *)ifr->ifr_data)->desc.len = STAT_NAME_LEN; return (0); case BXE_IOC_STATS_SHOW_STR: memset(ifr->ifr_data, 0, str_size); p_tmp = ifr->ifr_data; for (i = 0; i < BXE_NUM_ETH_STATS; i++) { strcpy(p_tmp, bxe_eth_stats_arr[i].string); p_tmp += STAT_NAME_LEN; } return (0); case BXE_IOC_STATS_SHOW_CNT: memset(ifr->ifr_data, 0, stats_size); p_tmp = ifr->ifr_data; for (i = 0; i < BXE_NUM_ETH_STATS; i++) { offset = ((uint32_t *)&sc->eth_stats + bxe_eth_stats_arr[i].offset); switch (bxe_eth_stats_arr[i].size) { case 4: *((uint64_t *)p_tmp) = (uint64_t)*offset; break; case 8: *((uint64_t *)p_tmp) = HILO_U64(*offset, *(offset + 1)); break; default: *((uint64_t *)p_tmp) = 0; } p_tmp += sizeof(uint64_t); } return (0); default: return (-1); } } static void bxe_handle_chip_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; long work = atomic_load_acq_long(&sc->chip_tq_flags); switch (work) { case CHIP_TQ_START: if ((sc->ifnet->if_flags & IFF_UP) && !(sc->ifnet->if_drv_flags & IFF_DRV_RUNNING)) { /* start the interface */ BLOGD(sc, DBG_LOAD, "Starting the interface...\n"); BXE_CORE_LOCK(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } break; case CHIP_TQ_STOP: if (!(sc->ifnet->if_flags & IFF_UP) && (sc->ifnet->if_drv_flags & IFF_DRV_RUNNING)) { /* bring down the interface */ BLOGD(sc, DBG_LOAD, "Stopping the interface...\n"); bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_stop_locked(sc); BXE_CORE_UNLOCK(sc); } break; case CHIP_TQ_REINIT: if (sc->ifnet->if_drv_flags & IFF_DRV_RUNNING) { /* restart the interface */ BLOGD(sc, DBG_LOAD, "Restarting the interface...\n"); bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_stop_locked(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } break; default: break; } } /* * Handles any IOCTL calls from the operating system. * * Returns: * 0 = Success, >0 Failure */ static int bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct bxe_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct bxe_nvram_data *nvdata; uint32_t priv_op; int mask = 0; int reinit = 0; int error = 0; int mtu_min = (ETH_MIN_PACKET_SIZE - ETH_HLEN); int mtu_max = (MJUM9BYTES - ETH_OVERHEAD - IP_HEADER_ALIGNMENT_PADDING); switch (command) { case SIOCSIFMTU: BLOGD(sc, DBG_IOCTL, "Received SIOCSIFMTU ioctl (mtu=%d)\n", ifr->ifr_mtu); if (sc->mtu == ifr->ifr_mtu) { /* nothing to change */ break; } if ((ifr->ifr_mtu < mtu_min) || (ifr->ifr_mtu > mtu_max)) { BLOGE(sc, "Unsupported MTU size %d (range is %d-%d)\n", ifr->ifr_mtu, mtu_min, mtu_max); error = EINVAL; break; } atomic_store_rel_int((volatile unsigned int *)&sc->mtu, (unsigned long)ifr->ifr_mtu); atomic_store_rel_long((volatile unsigned long *)&ifp->if_mtu, (unsigned long)ifr->ifr_mtu); reinit = 1; break; case SIOCSIFFLAGS: /* toggle the interface state up or down */ BLOGD(sc, DBG_IOCTL, "Received SIOCSIFFLAGS ioctl\n"); /* check if the interface is up */ if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* set the receive mode flags */ bxe_set_rx_mode(sc); } else { atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_START); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_STOP); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); } } break; case SIOCADDMULTI: case SIOCDELMULTI: /* add/delete multicast addresses */ BLOGD(sc, DBG_IOCTL, "Received SIOCADDMULTI/SIOCDELMULTI ioctl\n"); /* check if the interface is up */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* set the receive mode flags */ bxe_set_rx_mode(sc); } break; case SIOCSIFCAP: /* find out which capabilities have changed */ mask = (ifr->ifr_reqcap ^ ifp->if_capenable); BLOGD(sc, DBG_IOCTL, "Received SIOCSIFCAP ioctl (mask=0x%08x)\n", mask); /* toggle the LRO capabilites enable flag */ if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; BLOGD(sc, DBG_IOCTL, "Turning LRO %s\n", (ifp->if_capenable & IFCAP_LRO) ? "ON" : "OFF"); reinit = 1; } /* toggle the TXCSUM checksum capabilites enable flag */ if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; BLOGD(sc, DBG_IOCTL, "Turning TXCSUM %s\n", (ifp->if_capenable & IFCAP_TXCSUM) ? "ON" : "OFF"); if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6); } else { ifp->if_hwassist = 0; } } /* toggle the RXCSUM checksum capabilities enable flag */ if (mask & IFCAP_RXCSUM) { ifp->if_capenable ^= IFCAP_RXCSUM; BLOGD(sc, DBG_IOCTL, "Turning RXCSUM %s\n", (ifp->if_capenable & IFCAP_RXCSUM) ? "ON" : "OFF"); if (ifp->if_capenable & IFCAP_RXCSUM) { ifp->if_hwassist = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6); } else { ifp->if_hwassist = 0; } } /* toggle TSO4 capabilities enabled flag */ if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; BLOGD(sc, DBG_IOCTL, "Turning TSO4 %s\n", (ifp->if_capenable & IFCAP_TSO4) ? "ON" : "OFF"); } /* toggle TSO6 capabilities enabled flag */ if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; BLOGD(sc, DBG_IOCTL, "Turning TSO6 %s\n", (ifp->if_capenable & IFCAP_TSO6) ? "ON" : "OFF"); } /* toggle VLAN_HWTSO capabilities enabled flag */ if (mask & IFCAP_VLAN_HWTSO) { ifp->if_capenable ^= IFCAP_VLAN_HWTSO; BLOGD(sc, DBG_IOCTL, "Turning VLAN_HWTSO %s\n", (ifp->if_capenable & IFCAP_VLAN_HWTSO) ? "ON" : "OFF"); } /* toggle VLAN_HWCSUM capabilities enabled flag */ if (mask & IFCAP_VLAN_HWCSUM) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWCSUM is not supported!\n"); error = EINVAL; } /* toggle VLAN_MTU capabilities enable flag */ if (mask & IFCAP_VLAN_MTU) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_MTU is not supported!\n"); error = EINVAL; } /* toggle VLAN_HWTAGGING capabilities enabled flag */ if (mask & IFCAP_VLAN_HWTAGGING) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWTAGGING is not supported!\n"); error = EINVAL; } /* toggle VLAN_HWFILTER capabilities enabled flag */ if (mask & IFCAP_VLAN_HWFILTER) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWFILTER is not supported!\n"); error = EINVAL; } /* XXX not yet... * IFCAP_WOL_MAGIC */ break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: /* set/get interface media */ BLOGD(sc, DBG_IOCTL, "Received SIOCSIFMEDIA/SIOCGIFMEDIA ioctl (cmd=%lu)\n", (command & 0xff)); error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; case SIOCGPRIVATE_0: copyin(ifr->ifr_data, &priv_op, sizeof(priv_op)); switch (priv_op) { case BXE_IOC_RD_NVRAM: case BXE_IOC_WR_NVRAM: nvdata = (struct bxe_nvram_data *)ifr->ifr_data; BLOGD(sc, DBG_IOCTL, "Received Private NVRAM ioctl addr=0x%x size=%u\n", nvdata->offset, nvdata->len); error = bxe_ioctl_nvram(sc, priv_op, ifr); break; case BXE_IOC_STATS_SHOW_NUM: case BXE_IOC_STATS_SHOW_STR: case BXE_IOC_STATS_SHOW_CNT: BLOGD(sc, DBG_IOCTL, "Received Private Stats ioctl (%d)\n", priv_op); error = bxe_ioctl_stats_show(sc, priv_op, ifr); break; default: BLOGW(sc, "Received Private Unknown ioctl (%d)\n", priv_op); error = EINVAL; break; } break; default: BLOGD(sc, DBG_IOCTL, "Received Unknown Ioctl (cmd=%lu)\n", (command & 0xff)); error = ether_ioctl(ifp, command, data); break; } if (reinit && (sc->ifnet->if_drv_flags & IFF_DRV_RUNNING)) { BLOGD(sc, DBG_LOAD | DBG_IOCTL, "Re-initializing hardware from IOCTL change\n"); atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_REINIT); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); } return (error); } static __noinline void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m, uint8_t contents) { char * type; int i = 0; if (!(sc->debug & DBG_MBUF)) { return; } if (m == NULL) { BLOGD(sc, DBG_MBUF, "mbuf: null pointer\n"); return; } while (m) { BLOGD(sc, DBG_MBUF, "%02d: mbuf=%p m_len=%d m_flags=0x%b m_data=%p\n", i, m, m->m_len, m->m_flags, M_FLAG_BITS, m->m_data); if (m->m_flags & M_PKTHDR) { BLOGD(sc, DBG_MBUF, "%02d: - m_pkthdr: tot_len=%d flags=0x%b csum_flags=%b\n", i, m->m_pkthdr.len, m->m_flags, M_FLAG_BITS, (int)m->m_pkthdr.csum_flags, CSUM_BITS); } if (m->m_flags & M_EXT) { switch (m->m_ext.ext_type) { case EXT_CLUSTER: type = "EXT_CLUSTER"; break; case EXT_SFBUF: type = "EXT_SFBUF"; break; case EXT_JUMBOP: type = "EXT_JUMBOP"; break; case EXT_JUMBO9: type = "EXT_JUMBO9"; break; case EXT_JUMBO16: type = "EXT_JUMBO16"; break; case EXT_PACKET: type = "EXT_PACKET"; break; case EXT_MBUF: type = "EXT_MBUF"; break; case EXT_NET_DRV: type = "EXT_NET_DRV"; break; case EXT_MOD_TYPE: type = "EXT_MOD_TYPE"; break; case EXT_DISPOSABLE: type = "EXT_DISPOSABLE"; break; case EXT_EXTREF: type = "EXT_EXTREF"; break; default: type = "UNKNOWN"; break; } BLOGD(sc, DBG_MBUF, "%02d: - m_ext: %p ext_size=%d type=%s\n", i, m->m_ext.ext_buf, m->m_ext.ext_size, type); } if (contents) { bxe_dump_mbuf_data(sc, "mbuf data", m, TRUE); } m = m->m_next; i++; } } /* * Checks to ensure the 13 bd sliding window is >= MSS for TSO. * Check that (13 total bds - 3 bds) = 10 bd window >= MSS. * The window: 3 bds are = 1 for headers BD + 2 for parse BD and last BD * The headers comes in a seperate bd in FreeBSD so 13-3=10. * Returns: 0 if OK to send, 1 if packet needs further defragmentation */ static int bxe_chktso_window(struct bxe_softc *sc, int nsegs, bus_dma_segment_t *segs, struct mbuf *m) { uint32_t num_wnds, wnd_size, wnd_sum; int32_t frag_idx, wnd_idx; unsigned short lso_mss; int defrag; defrag = 0; wnd_sum = 0; wnd_size = 10; num_wnds = nsegs - wnd_size; lso_mss = htole16(m->m_pkthdr.tso_segsz); /* * Total header lengths Eth+IP+TCP in first FreeBSD mbuf so calculate the * first window sum of data while skipping the first assuming it is the * header in FreeBSD. */ for (frag_idx = 1; (frag_idx <= wnd_size); frag_idx++) { wnd_sum += htole16(segs[frag_idx].ds_len); } /* check the first 10 bd window size */ if (wnd_sum < lso_mss) { return (1); } /* run through the windows */ for (wnd_idx = 0; wnd_idx < num_wnds; wnd_idx++, frag_idx++) { /* subtract the first mbuf->m_len of the last wndw(-header) */ wnd_sum -= htole16(segs[wnd_idx+1].ds_len); /* add the next mbuf len to the len of our new window */ wnd_sum += htole16(segs[frag_idx].ds_len); if (wnd_sum < lso_mss) { return (1); } } return (0); } static uint8_t bxe_set_pbd_csum_e2(struct bxe_fastpath *fp, struct mbuf *m, uint32_t *parsing_data) { struct ether_vlan_header *eh = NULL; struct ip *ip4 = NULL; struct ip6_hdr *ip6 = NULL; caddr_t ip = NULL; struct tcphdr *th = NULL; int e_hlen, ip_hlen, l4_off; uint16_t proto; if (m->m_pkthdr.csum_flags == CSUM_IP) { /* no L4 checksum offload needed */ return (0); } /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { e_hlen = (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); proto = ntohs(eh->evl_proto); } else { e_hlen = ETHER_HDR_LEN; proto = ntohs(eh->evl_encap_proto); } switch (proto) { case ETHERTYPE_IP: /* get the IP header, if mbuf len < 20 then header in next mbuf */ ip4 = (m->m_len < sizeof(struct ip)) ? (struct ip *)m->m_next->m_data : (struct ip *)(m->m_data + e_hlen); /* ip_hl is number of 32-bit words */ ip_hlen = (ip4->ip_hl << 2); ip = (caddr_t)ip4; break; case ETHERTYPE_IPV6: /* get the IPv6 header, if mbuf len < 40 then header in next mbuf */ ip6 = (m->m_len < sizeof(struct ip6_hdr)) ? (struct ip6_hdr *)m->m_next->m_data : (struct ip6_hdr *)(m->m_data + e_hlen); /* XXX cannot support offload with IPv6 extensions */ ip_hlen = sizeof(struct ip6_hdr); ip = (caddr_t)ip6; break; default: /* We can't offload in this case... */ /* XXX error stat ??? */ return (0); } /* XXX assuming L4 header is contiguous to IPv4/IPv6 in the same mbuf */ l4_off = (e_hlen + ip_hlen); *parsing_data |= (((l4_off >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_tcp++; th = (struct tcphdr *)(ip + ip_hlen); /* th_off is number of 32-bit words */ *parsing_data |= ((th->th_off << ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) & ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW); return (l4_off + (th->th_off << 2)); /* entire header length */ } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_udp++; return (l4_off + sizeof(struct udphdr)); /* entire header length */ } else { /* XXX error stat ??? */ return (0); } } static uint8_t bxe_set_pbd_csum(struct bxe_fastpath *fp, struct mbuf *m, struct eth_tx_parse_bd_e1x *pbd) { struct ether_vlan_header *eh = NULL; struct ip *ip4 = NULL; struct ip6_hdr *ip6 = NULL; caddr_t ip = NULL; struct tcphdr *th = NULL; struct udphdr *uh = NULL; int e_hlen, ip_hlen; uint16_t proto; uint8_t hlen; uint16_t tmp_csum; uint32_t *tmp_uh; /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { e_hlen = (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); proto = ntohs(eh->evl_proto); } else { e_hlen = ETHER_HDR_LEN; proto = ntohs(eh->evl_encap_proto); } switch (proto) { case ETHERTYPE_IP: /* get the IP header, if mbuf len < 20 then header in next mbuf */ ip4 = (m->m_len < sizeof(struct ip)) ? (struct ip *)m->m_next->m_data : (struct ip *)(m->m_data + e_hlen); /* ip_hl is number of 32-bit words */ ip_hlen = (ip4->ip_hl << 1); ip = (caddr_t)ip4; break; case ETHERTYPE_IPV6: /* get the IPv6 header, if mbuf len < 40 then header in next mbuf */ ip6 = (m->m_len < sizeof(struct ip6_hdr)) ? (struct ip6_hdr *)m->m_next->m_data : (struct ip6_hdr *)(m->m_data + e_hlen); /* XXX cannot support offload with IPv6 extensions */ ip_hlen = (sizeof(struct ip6_hdr) >> 1); ip = (caddr_t)ip6; break; default: /* We can't offload in this case... */ /* XXX error stat ??? */ return (0); } hlen = (e_hlen >> 1); /* note that rest of global_data is indirectly zeroed here */ if (m->m_flags & M_VLANTAG) { pbd->global_data = htole16(hlen | (1 << ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT)); } else { pbd->global_data = htole16(hlen); } pbd->ip_hlen_w = ip_hlen; hlen += pbd->ip_hlen_w; /* XXX assuming L4 header is contiguous to IPv4/IPv6 in the same mbuf */ if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { th = (struct tcphdr *)(ip + (ip_hlen << 1)); /* th_off is number of 32-bit words */ hlen += (uint16_t)(th->th_off << 1); } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { uh = (struct udphdr *)(ip + (ip_hlen << 1)); hlen += (sizeof(struct udphdr) / 2); } else { /* valid case as only CSUM_IP was set */ return (0); } pbd->total_hlen_w = htole16(hlen); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_tcp++; pbd->tcp_pseudo_csum = ntohs(th->th_sum); } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_udp++; /* * Everest1 (i.e. 57710, 57711, 57711E) does not natively support UDP * checksums and does not know anything about the UDP header and where * the checksum field is located. It only knows about TCP. Therefore * we "lie" to the hardware for outgoing UDP packets w/ checksum * offload. Since the checksum field offset for TCP is 16 bytes and * for UDP it is 6 bytes we pass a pointer to the hardware that is 10 * bytes less than the start of the UDP header. This allows the * hardware to write the checksum in the correct spot. But the * hardware will compute a checksum which includes the last 10 bytes * of the IP header. To correct this we tweak the stack computed * pseudo checksum by folding in the calculation of the inverse * checksum for those final 10 bytes of the IP header. This allows * the correct checksum to be computed by the hardware. */ /* set pointer 10 bytes before UDP header */ tmp_uh = (uint32_t *)((uint8_t *)uh - 10); /* calculate a pseudo header checksum over the first 10 bytes */ tmp_csum = in_pseudo(*tmp_uh, *(tmp_uh + 1), *(uint16_t *)(tmp_uh + 2)); pbd->tcp_pseudo_csum = ntohs(in_addword(uh->uh_sum, ~tmp_csum)); } return (hlen * 2); /* entire header length, number of bytes */ } static void bxe_set_pbd_lso_e2(struct mbuf *m, uint32_t *parsing_data) { *parsing_data |= ((m->m_pkthdr.tso_segsz << ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) & ETH_TX_PARSE_BD_E2_LSO_MSS); /* XXX test for IPv6 with extension header... */ #if 0 struct ip6_hdr *ip6; if (ip6 && ip6->ip6_nxt == 'some ipv6 extension header') *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR; #endif } static void bxe_set_pbd_lso(struct mbuf *m, struct eth_tx_parse_bd_e1x *pbd) { struct ether_vlan_header *eh = NULL; struct ip *ip = NULL; struct tcphdr *th = NULL; int e_hlen; /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ e_hlen = (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ? (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) : ETHER_HDR_LEN; /* get the IP and TCP header, with LSO entire header in first mbuf */ /* XXX assuming IPv4 */ ip = (struct ip *)(m->m_data + e_hlen); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); pbd->lso_mss = htole16(m->m_pkthdr.tso_segsz); pbd->tcp_send_seq = ntohl(th->th_seq); pbd->tcp_flags = ((ntohl(((uint32_t *)th)[3]) >> 16) & 0xff); #if 1 /* XXX IPv4 */ pbd->ip_id = ntohs(ip->ip_id); pbd->tcp_pseudo_csum = ntohs(in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP))); #else /* XXX IPv6 */ pbd->tcp_pseudo_csum = ntohs(in_pseudo(&ip6->ip6_src, &ip6->ip6_dst, htons(IPPROTO_TCP))); #endif pbd->global_data |= htole16(ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN); } /* * Encapsulte an mbuf cluster into the tx bd chain and makes the memory * visible to the controller. * * If an mbuf is submitted to this routine and cannot be given to the * controller (e.g. it has too many fragments) then the function may free * the mbuf and return to the caller. * * Returns: * 0 = Success, !0 = Failure * Note the side effect that an mbuf may be freed if it causes a problem. */ static int bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) { bus_dma_segment_t segs[32]; struct mbuf *m0; struct bxe_sw_tx_bd *tx_buf; struct eth_tx_parse_bd_e1x *pbd_e1x = NULL; struct eth_tx_parse_bd_e2 *pbd_e2 = NULL; /* struct eth_tx_parse_2nd_bd *pbd2 = NULL; */ struct eth_tx_bd *tx_data_bd; struct eth_tx_bd *tx_total_pkt_size_bd; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_prod, pkt_prod, total_pkt_size; uint8_t mac_type; int defragged, error, nsegs, rc, nbds, vlan_off, ovlan; struct bxe_softc *sc; uint16_t tx_bd_avail; struct ether_vlan_header *eh; uint32_t pbd_e2_parsing_data = 0; uint8_t hlen = 0; int tmp_bd; int i; sc = fp->sc; M_ASSERTPKTHDR(*m_head); m0 = *m_head; rc = defragged = nbds = ovlan = vlan_off = total_pkt_size = 0; tx_start_bd = NULL; tx_data_bd = NULL; tx_total_pkt_size_bd = NULL; /* get the H/W pointer for packets and BDs */ pkt_prod = fp->tx_pkt_prod; bd_prod = fp->tx_bd_prod; mac_type = UNICAST_ADDRESS; /* map the mbuf into the next open DMAable memory */ tx_buf = &fp->tx_mbuf_chain[TX_BD(pkt_prod)]; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); /* mapping errors */ if(__predict_false(error != 0)) { fp->eth_q_stats.tx_dma_mapping_failure++; if (error == ENOMEM) { /* resource issue, try again later */ rc = ENOMEM; } else if (error == EFBIG) { /* possibly recoverable with defragmentation */ fp->eth_q_stats.mbuf_defrag_attempts++; m0 = m_defrag(*m_head, M_NOWAIT); if (m0 == NULL) { fp->eth_q_stats.mbuf_defrag_failures++; rc = ENOBUFS; } else { /* defrag successful, try mapping again */ *m_head = m0; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { fp->eth_q_stats.tx_dma_mapping_failure++; rc = error; } } } else { /* unknown, unrecoverable mapping error */ BLOGE(sc, "Unknown TX mapping error rc=%d\n", error); bxe_dump_mbuf(sc, m0, FALSE); rc = error; } goto bxe_tx_encap_continue; } tx_bd_avail = bxe_tx_avail(sc, fp); /* make sure there is enough room in the send queue */ if (__predict_false(tx_bd_avail < (nsegs + 2))) { /* Recoverable, try again later. */ fp->eth_q_stats.tx_hw_queue_full++; bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); rc = ENOMEM; goto bxe_tx_encap_continue; } /* capture the current H/W TX chain high watermark */ if (__predict_false(fp->eth_q_stats.tx_hw_max_queue_depth < (TX_BD_USABLE - tx_bd_avail))) { fp->eth_q_stats.tx_hw_max_queue_depth = (TX_BD_USABLE - tx_bd_avail); } /* make sure it fits in the packet window */ if (__predict_false(nsegs > BXE_MAX_SEGMENTS)) { /* * The mbuf may be to big for the controller to handle. If the frame * is a TSO frame we'll need to do an additional check. */ if (m0->m_pkthdr.csum_flags & CSUM_TSO) { if (bxe_chktso_window(sc, nsegs, segs, m0) == 0) { goto bxe_tx_encap_continue; /* OK to send */ } else { fp->eth_q_stats.tx_window_violation_tso++; } } else { fp->eth_q_stats.tx_window_violation_std++; } /* lets try to defragment this mbuf and remap it */ fp->eth_q_stats.mbuf_defrag_attempts++; bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); m0 = m_defrag(*m_head, M_NOWAIT); if (m0 == NULL) { fp->eth_q_stats.mbuf_defrag_failures++; /* Ugh, just drop the frame... :( */ rc = ENOBUFS; } else { /* defrag successful, try mapping again */ *m_head = m0; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { fp->eth_q_stats.tx_dma_mapping_failure++; /* No sense in trying to defrag/copy chain, drop it. :( */ rc = error; } else { /* if the chain is still too long then drop it */ if (__predict_false(nsegs > BXE_MAX_SEGMENTS)) { bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); rc = ENODEV; } } } } bxe_tx_encap_continue: /* Check for errors */ if (rc) { if (rc == ENOMEM) { /* recoverable try again later */ } else { fp->eth_q_stats.tx_soft_errors++; fp->eth_q_stats.mbuf_alloc_tx--; m_freem(*m_head); *m_head = NULL; } return (rc); } /* set flag according to packet type (UNICAST_ADDRESS is default) */ if (m0->m_flags & M_BCAST) { mac_type = BROADCAST_ADDRESS; } else if (m0->m_flags & M_MCAST) { mac_type = MULTICAST_ADDRESS; } /* store the mbuf into the mbuf ring */ tx_buf->m = m0; tx_buf->first_bd = fp->tx_bd_prod; tx_buf->flags = 0; /* prepare the first transmit (start) BD for the mbuf */ tx_start_bd = &fp->tx_chain[TX_BD(bd_prod)].start_bd; BLOGD(sc, DBG_TX, "sending pkt_prod=%u tx_buf=%p next_idx=%u bd=%u tx_start_bd=%p\n", pkt_prod, tx_buf, fp->tx_pkt_prod, bd_prod, tx_start_bd); tx_start_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); tx_start_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); tx_start_bd->nbytes = htole16(segs[0].ds_len); total_pkt_size += tx_start_bd->nbytes; tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; tx_start_bd->general_data = (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); /* all frames have at least Start BD + Parsing BD */ nbds = nsegs + 1; tx_start_bd->nbd = htole16(nbds); if (m0->m_flags & M_VLANTAG) { tx_start_bd->vlan_or_ethertype = htole16(m0->m_pkthdr.ether_vtag); tx_start_bd->bd_flags.as_bitfield |= (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); } else { /* vf tx, start bd must hold the ethertype for fw to enforce it */ if (IS_VF(sc)) { /* map ethernet header to find type and header length */ eh = mtod(m0, struct ether_vlan_header *); tx_start_bd->vlan_or_ethertype = eh->evl_encap_proto; } else { /* used by FW for packet accounting */ tx_start_bd->vlan_or_ethertype = htole16(fp->tx_pkt_prod); #if 0 /* * If NPAR-SD is active then FW should do the tagging regardless * of value of priority. Otherwise, if priority indicates this is * a control packet we need to indicate to FW to avoid tagging. */ if (!IS_MF_AFEX(sc) && (mbuf priority == PRIO_CONTROL)) { SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_FORCE_VLAN_MODE, 1); } #endif } } /* * add a parsing BD from the chain. The parsing BD is always added * though it is only used for TSO and chksum */ bd_prod = TX_BD_NEXT(bd_prod); if (m0->m_pkthdr.csum_flags) { if (m0->m_pkthdr.csum_flags & CSUM_IP) { fp->eth_q_stats.tx_ofld_frames_csum_ip++; tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IP_CSUM; } if (m0->m_pkthdr.csum_flags & CSUM_TCP_IPV6) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_IPV6 | ETH_TX_BD_FLAGS_L4_CSUM); } else if (m0->m_pkthdr.csum_flags & CSUM_UDP_IPV6) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_IPV6 | ETH_TX_BD_FLAGS_IS_UDP | ETH_TX_BD_FLAGS_L4_CSUM); } else if ((m0->m_pkthdr.csum_flags & CSUM_TCP) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM; } else if (m0->m_pkthdr.csum_flags & CSUM_UDP) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_L4_CSUM | ETH_TX_BD_FLAGS_IS_UDP); } } if (!CHIP_IS_E1x(sc)) { pbd_e2 = &fp->tx_chain[TX_BD(bd_prod)].parse_bd_e2; memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2)); if (m0->m_pkthdr.csum_flags) { hlen = bxe_set_pbd_csum_e2(fp, m0, &pbd_e2_parsing_data); } #if 0 /* * Add the MACs to the parsing BD if the module param was * explicitly set, if this is a vf, or in switch independent * mode. */ if (sc->flags & BXE_TX_SWITCHING || IS_VF(sc) || IS_MF_SI(sc)) { eh = mtod(m0, struct ether_vlan_header *); bxe_set_fw_mac_addr(&pbd_e2->data.mac_addr.src_hi, &pbd_e2->data.mac_addr.src_mid, &pbd_e2->data.mac_addr.src_lo, eh->evl_shost); bxe_set_fw_mac_addr(&pbd_e2->data.mac_addr.dst_hi, &pbd_e2->data.mac_addr.dst_mid, &pbd_e2->data.mac_addr.dst_lo, eh->evl_dhost); } #endif SET_FLAG(pbd_e2_parsing_data, ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE, mac_type); } else { uint16_t global_data = 0; pbd_e1x = &fp->tx_chain[TX_BD(bd_prod)].parse_bd_e1x; memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x)); if (m0->m_pkthdr.csum_flags) { hlen = bxe_set_pbd_csum(fp, m0, pbd_e1x); } SET_FLAG(global_data, ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE, mac_type); pbd_e1x->global_data |= htole16(global_data); } /* setup the parsing BD with TSO specific info */ if (m0->m_pkthdr.csum_flags & CSUM_TSO) { fp->eth_q_stats.tx_ofld_frames_lso++; tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO; if (__predict_false(tx_start_bd->nbytes > hlen)) { fp->eth_q_stats.tx_ofld_frames_lso_hdr_splits++; /* split the first BD into header/data making the fw job easy */ nbds++; tx_start_bd->nbd = htole16(nbds); tx_start_bd->nbytes = htole16(hlen); bd_prod = TX_BD_NEXT(bd_prod); /* new transmit BD after the tx_parse_bd */ tx_data_bd = &fp->tx_chain[TX_BD(bd_prod)].reg_bd; tx_data_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr + hlen)); tx_data_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr + hlen)); tx_data_bd->nbytes = htole16(segs[0].ds_len - hlen); if (tx_total_pkt_size_bd == NULL) { tx_total_pkt_size_bd = tx_data_bd; } BLOGD(sc, DBG_TX, "TSO split header size is %d (%x:%x) nbds %d\n", le16toh(tx_start_bd->nbytes), le32toh(tx_start_bd->addr_hi), le32toh(tx_start_bd->addr_lo), nbds); } if (!CHIP_IS_E1x(sc)) { bxe_set_pbd_lso_e2(m0, &pbd_e2_parsing_data); } else { bxe_set_pbd_lso(m0, pbd_e1x); } } if (pbd_e2_parsing_data) { pbd_e2->parsing_data = htole32(pbd_e2_parsing_data); } /* prepare remaining BDs, start tx bd contains first seg/frag */ for (i = 1; i < nsegs ; i++) { bd_prod = TX_BD_NEXT(bd_prod); tx_data_bd = &fp->tx_chain[TX_BD(bd_prod)].reg_bd; tx_data_bd->addr_lo = htole32(U64_LO(segs[i].ds_addr)); tx_data_bd->addr_hi = htole32(U64_HI(segs[i].ds_addr)); tx_data_bd->nbytes = htole16(segs[i].ds_len); if (tx_total_pkt_size_bd == NULL) { tx_total_pkt_size_bd = tx_data_bd; } total_pkt_size += tx_data_bd->nbytes; } BLOGD(sc, DBG_TX, "last bd %p\n", tx_data_bd); if (tx_total_pkt_size_bd != NULL) { tx_total_pkt_size_bd->total_pkt_bytes = total_pkt_size; } if (__predict_false(sc->debug & DBG_TX)) { tmp_bd = tx_buf->first_bd; for (i = 0; i < nbds; i++) { if (i == 0) { BLOGD(sc, DBG_TX, "TX Strt: %p bd=%d nbd=%d vlan=0x%x " "bd_flags=0x%x hdr_nbds=%d\n", tx_start_bd, tmp_bd, le16toh(tx_start_bd->nbd), le16toh(tx_start_bd->vlan_or_ethertype), tx_start_bd->bd_flags.as_bitfield, (tx_start_bd->general_data & ETH_TX_START_BD_HDR_NBDS)); } else if (i == 1) { if (pbd_e1x) { BLOGD(sc, DBG_TX, "-> Prse: %p bd=%d global=0x%x ip_hlen_w=%u " "ip_id=%u lso_mss=%u tcp_flags=0x%x csum=0x%x " "tcp_seq=%u total_hlen_w=%u\n", pbd_e1x, tmp_bd, pbd_e1x->global_data, pbd_e1x->ip_hlen_w, pbd_e1x->ip_id, pbd_e1x->lso_mss, pbd_e1x->tcp_flags, pbd_e1x->tcp_pseudo_csum, pbd_e1x->tcp_send_seq, le16toh(pbd_e1x->total_hlen_w)); } else { /* if (pbd_e2) */ BLOGD(sc, DBG_TX, "-> Parse: %p bd=%d dst=%02x:%02x:%02x " "src=%02x:%02x:%02x parsing_data=0x%x\n", pbd_e2, tmp_bd, pbd_e2->data.mac_addr.dst_hi, pbd_e2->data.mac_addr.dst_mid, pbd_e2->data.mac_addr.dst_lo, pbd_e2->data.mac_addr.src_hi, pbd_e2->data.mac_addr.src_mid, pbd_e2->data.mac_addr.src_lo, pbd_e2->parsing_data); } } if (i != 1) { /* skip parse db as it doesn't hold data */ tx_data_bd = &fp->tx_chain[TX_BD(tmp_bd)].reg_bd; BLOGD(sc, DBG_TX, "-> Frag: %p bd=%d nbytes=%d hi=0x%x lo: 0x%x\n", tx_data_bd, tmp_bd, le16toh(tx_data_bd->nbytes), le32toh(tx_data_bd->addr_hi), le32toh(tx_data_bd->addr_lo)); } tmp_bd = TX_BD_NEXT(tmp_bd); } } BLOGD(sc, DBG_TX, "doorbell: nbds=%d bd=%u\n", nbds, bd_prod); /* update TX BD producer index value for next TX */ bd_prod = TX_BD_NEXT(bd_prod); /* * If the chain of tx_bd's describing this frame is adjacent to or spans * an eth_tx_next_bd element then we need to increment the nbds value. */ if (TX_BD_IDX(bd_prod) < nbds) { nbds++; } /* don't allow reordering of writes for nbd and packets */ mb(); fp->tx_db.data.prod += nbds; /* producer points to the next free tx_bd at this point */ fp->tx_pkt_prod++; fp->tx_bd_prod = bd_prod; DOORBELL(sc, fp->index, fp->tx_db.raw); fp->eth_q_stats.tx_pkts++; /* Prevent speculative reads from getting ahead of the status block. */ bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_READ); /* Prevent speculative reads from getting ahead of the doorbell. */ bus_space_barrier(sc->bar[BAR2].tag, sc->bar[BAR2].handle, 0, 0, BUS_SPACE_BARRIER_READ); return (0); } static void bxe_tx_start_locked(struct bxe_softc *sc, struct ifnet *ifp, struct bxe_fastpath *fp) { struct mbuf *m = NULL; int tx_count = 0; uint16_t tx_bd_avail; BXE_FP_TX_LOCK_ASSERT(fp); /* keep adding entries while there are frames to send */ while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { /* * check for any frames to send * dequeue can still be NULL even if queue is not empty */ IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (__predict_false(m == NULL)) { break; } /* the mbuf now belongs to us */ fp->eth_q_stats.mbuf_alloc_tx++; /* * Put the frame into the transmit ring. If we don't have room, * place the mbuf back at the head of the TX queue, set the * OACTIVE flag, and wait for the NIC to drain the chain. */ if (__predict_false(bxe_tx_encap(fp, &m))) { fp->eth_q_stats.tx_encap_failures++; if (m != NULL) { /* mark the TX queue as full and return the frame */ ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m); fp->eth_q_stats.mbuf_alloc_tx--; fp->eth_q_stats.tx_queue_xoff++; } /* stop looking for more work */ break; } /* the frame was enqueued successfully */ tx_count++; /* send a copy of the frame to any BPF listeners. */ BPF_MTAP(ifp, m); tx_bd_avail = bxe_tx_avail(sc, fp); /* handle any completions if we're running low */ if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ bxe_txeof(sc, fp); if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { break; } } } /* all TX packets were dequeued and/or the tx ring is full */ if (tx_count > 0) { /* reset the TX watchdog timeout timer */ fp->watchdog_timer = BXE_TX_TIMEOUT; } } /* Legacy (non-RSS) dispatch routine */ static void bxe_tx_start(struct ifnet *ifp) { struct bxe_softc *sc; struct bxe_fastpath *fp; sc = ifp->if_softc; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { BLOGW(sc, "Interface not running, ignoring transmit request\n"); return; } if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { BLOGW(sc, "Interface TX queue is full, ignoring transmit request\n"); return; } if (!sc->link_vars.link_up) { BLOGW(sc, "Interface link is down, ignoring transmit request\n"); return; } fp = &sc->fp[0]; BXE_FP_TX_LOCK(fp); bxe_tx_start_locked(sc, ifp, fp); BXE_FP_TX_UNLOCK(fp); } #if __FreeBSD_version >= 800000 static int bxe_tx_mq_start_locked(struct bxe_softc *sc, struct ifnet *ifp, struct bxe_fastpath *fp, struct mbuf *m) { struct buf_ring *tx_br = fp->tx_br; struct mbuf *next; int depth, rc, tx_count; uint16_t tx_bd_avail; rc = tx_count = 0; if (!tx_br) { BLOGE(sc, "Multiqueue TX and no buf_ring!\n"); return (EINVAL); } /* fetch the depth of the driver queue */ depth = drbr_inuse(ifp, tx_br); if (depth > fp->eth_q_stats.tx_max_drbr_queue_depth) { fp->eth_q_stats.tx_max_drbr_queue_depth = depth; } BXE_FP_TX_LOCK_ASSERT(fp); if (m == NULL) { /* no new work, check for pending frames */ next = drbr_dequeue(ifp, tx_br); } else if (drbr_needs_enqueue(ifp, tx_br)) { /* have both new and pending work, maintain packet order */ rc = drbr_enqueue(ifp, tx_br, m); if (rc != 0) { fp->eth_q_stats.tx_soft_errors++; goto bxe_tx_mq_start_locked_exit; } next = drbr_dequeue(ifp, tx_br); } else { /* new work only and nothing pending */ next = m; } /* keep adding entries while there are frames to send */ while (next != NULL) { /* the mbuf now belongs to us */ fp->eth_q_stats.mbuf_alloc_tx++; /* * Put the frame into the transmit ring. If we don't have room, * place the mbuf back at the head of the TX queue, set the * OACTIVE flag, and wait for the NIC to drain the chain. */ rc = bxe_tx_encap(fp, &next); if (__predict_false(rc != 0)) { fp->eth_q_stats.tx_encap_failures++; if (next != NULL) { /* mark the TX queue as full and save the frame */ ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* XXX this may reorder the frame */ rc = drbr_enqueue(ifp, tx_br, next); fp->eth_q_stats.mbuf_alloc_tx--; fp->eth_q_stats.tx_frames_deferred++; } /* stop looking for more work */ break; } /* the transmit frame was enqueued successfully */ tx_count++; /* send a copy of the frame to any BPF listeners */ BPF_MTAP(ifp, next); tx_bd_avail = bxe_tx_avail(sc, fp); /* handle any completions if we're running low */ if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ bxe_txeof(sc, fp); if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { break; } } next = drbr_dequeue(ifp, tx_br); } /* all TX packets were dequeued and/or the tx ring is full */ if (tx_count > 0) { /* reset the TX watchdog timeout timer */ fp->watchdog_timer = BXE_TX_TIMEOUT; } bxe_tx_mq_start_locked_exit: return (rc); } /* Multiqueue (TSS) dispatch routine. */ static int bxe_tx_mq_start(struct ifnet *ifp, struct mbuf *m) { struct bxe_softc *sc = ifp->if_softc; struct bxe_fastpath *fp; int fp_index, rc; fp_index = 0; /* default is the first queue */ /* change the queue if using flow ID */ if ((m->m_flags & M_FLOWID) != 0) { fp_index = (m->m_pkthdr.flowid % sc->num_queues); } fp = &sc->fp[fp_index]; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { BLOGW(sc, "Interface not running, ignoring transmit request\n"); return (ENETDOWN); } if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { BLOGW(sc, "Interface TX queue is full, ignoring transmit request\n"); return (EBUSY); } if (!sc->link_vars.link_up) { BLOGW(sc, "Interface link is down, ignoring transmit request\n"); return (ENETDOWN); } /* XXX change to TRYLOCK here and if failed then schedule taskqueue */ BXE_FP_TX_LOCK(fp); rc = bxe_tx_mq_start_locked(sc, ifp, fp, m); BXE_FP_TX_UNLOCK(fp); return (rc); } static void bxe_mq_flush(struct ifnet *ifp) { struct bxe_softc *sc = ifp->if_softc; struct bxe_fastpath *fp; struct mbuf *m; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->state != BXE_FP_STATE_OPEN) { BLOGD(sc, DBG_LOAD, "Not clearing fp[%02d] buf_ring (state=%d)\n", fp->index, fp->state); continue; } if (fp->tx_br != NULL) { BLOGD(sc, DBG_LOAD, "Clearing fp[%02d] buf_ring\n", fp->index); BXE_FP_TX_LOCK(fp); while ((m = buf_ring_dequeue_sc(fp->tx_br)) != NULL) { m_freem(m); } BXE_FP_TX_UNLOCK(fp); } } if_qflush(ifp); } #endif /* FreeBSD_version >= 800000 */ static uint16_t bxe_cid_ilt_lines(struct bxe_softc *sc) { if (IS_SRIOV(sc)) { return ((BXE_FIRST_VF_CID + BXE_VF_CIDS) / ILT_PAGE_CIDS); } return (L2_ILT_LINES(sc)); } static void bxe_ilt_set_info(struct bxe_softc *sc) { struct ilt_client_info *ilt_client; struct ecore_ilt *ilt = sc->ilt; uint16_t line = 0; ilt->start_line = FUNC_ILT_BASE(SC_FUNC(sc)); BLOGD(sc, DBG_LOAD, "ilt starts at line %d\n", ilt->start_line); /* CDU */ ilt_client = &ilt->clients[ILT_CLIENT_CDU]; ilt_client->client_num = ILT_CLIENT_CDU; ilt_client->page_size = CDU_ILT_PAGE_SZ; ilt_client->flags = ILT_CLIENT_SKIP_MEM; ilt_client->start = line; line += bxe_cid_ilt_lines(sc); if (CNIC_SUPPORT(sc)) { line += CNIC_ILT_LINES; } ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[CDU]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); /* QM */ if (QM_INIT(sc->qm_cid_count)) { ilt_client = &ilt->clients[ILT_CLIENT_QM]; ilt_client->client_num = ILT_CLIENT_QM; ilt_client->page_size = QM_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; /* 4 bytes for each cid */ line += DIV_ROUND_UP(sc->qm_cid_count * QM_QUEUES_PER_FUNC * 4, QM_ILT_PAGE_SZ); ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[QM]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); } if (CNIC_SUPPORT(sc)) { /* SRC */ ilt_client = &ilt->clients[ILT_CLIENT_SRC]; ilt_client->client_num = ILT_CLIENT_SRC; ilt_client->page_size = SRC_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; line += SRC_ILT_LINES; ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[SRC]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); /* TM */ ilt_client = &ilt->clients[ILT_CLIENT_TM]; ilt_client->client_num = ILT_CLIENT_TM; ilt_client->page_size = TM_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; line += TM_ILT_LINES; ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[TM]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); } KASSERT((line <= ILT_MAX_LINES), ("Invalid number of ILT lines!")); } static void bxe_set_fp_rx_buf_size(struct bxe_softc *sc) { int i; BLOGD(sc, DBG_LOAD, "mtu = %d\n", sc->mtu); for (i = 0; i < sc->num_queues; i++) { /* get the Rx buffer size for RX frames */ sc->fp[i].rx_buf_size = (IP_HEADER_ALIGNMENT_PADDING + ETH_OVERHEAD + sc->mtu); BLOGD(sc, DBG_LOAD, "rx_buf_size for fp[%02d] = %d\n", i, sc->fp[i].rx_buf_size); /* get the mbuf allocation size for RX frames */ if (sc->fp[i].rx_buf_size <= MCLBYTES) { sc->fp[i].mbuf_alloc_size = MCLBYTES; } else if (sc->fp[i].rx_buf_size <= BCM_PAGE_SIZE) { sc->fp[i].mbuf_alloc_size = PAGE_SIZE; } else { sc->fp[i].mbuf_alloc_size = MJUM9BYTES; } BLOGD(sc, DBG_LOAD, "mbuf_alloc_size for fp[%02d] = %d\n", i, sc->fp[i].mbuf_alloc_size); } } static int bxe_alloc_ilt_mem(struct bxe_softc *sc) { int rc = 0; if ((sc->ilt = (struct ecore_ilt *)malloc(sizeof(struct ecore_ilt), M_BXE_ILT, (M_NOWAIT | M_ZERO))) == NULL) { rc = 1; } return (rc); } static int bxe_alloc_ilt_lines_mem(struct bxe_softc *sc) { int rc = 0; if ((sc->ilt->lines = (struct ilt_line *)malloc((sizeof(struct ilt_line) * ILT_MAX_LINES), M_BXE_ILT, (M_NOWAIT | M_ZERO))) == NULL) { rc = 1; } return (rc); } static void bxe_free_ilt_mem(struct bxe_softc *sc) { if (sc->ilt != NULL) { free(sc->ilt, M_BXE_ILT); sc->ilt = NULL; } } static void bxe_free_ilt_lines_mem(struct bxe_softc *sc) { if (sc->ilt->lines != NULL) { free(sc->ilt->lines, M_BXE_ILT); sc->ilt->lines = NULL; } } static void bxe_free_mem(struct bxe_softc *sc) { int i; #if 0 if (!CONFIGURE_NIC_MODE(sc)) { /* free searcher T2 table */ bxe_dma_free(sc, &sc->t2); } #endif for (i = 0; i < L2_ILT_LINES(sc); i++) { bxe_dma_free(sc, &sc->context[i].vcxt_dma); sc->context[i].vcxt = NULL; sc->context[i].size = 0; } ecore_ilt_mem_op(sc, ILT_MEMOP_FREE); bxe_free_ilt_lines_mem(sc); #if 0 bxe_iov_free_mem(sc); #endif } static int bxe_alloc_mem(struct bxe_softc *sc) { int context_size; int allocated; int i; #if 0 if (!CONFIGURE_NIC_MODE(sc)) { /* allocate searcher T2 table */ if (bxe_dma_alloc(sc, SRC_T2_SZ, &sc->t2, "searcher t2 table") != 0) { return (-1); } } #endif /* * Allocate memory for CDU context: * This memory is allocated separately and not in the generic ILT * functions because CDU differs in few aspects: * 1. There can be multiple entities allocating memory for context - * regular L2, CNIC, and SRIOV drivers. Each separately controls * its own ILT lines. * 2. Since CDU page-size is not a single 4KB page (which is the case * for the other ILT clients), to be efficient we want to support * allocation of sub-page-size in the last entry. * 3. Context pointers are used by the driver to pass to FW / update * the context (for the other ILT clients the pointers are used just to * free the memory during unload). */ context_size = (sizeof(union cdu_context) * BXE_L2_CID_COUNT(sc)); for (i = 0, allocated = 0; allocated < context_size; i++) { sc->context[i].size = min(CDU_ILT_PAGE_SZ, (context_size - allocated)); if (bxe_dma_alloc(sc, sc->context[i].size, &sc->context[i].vcxt_dma, "cdu context") != 0) { bxe_free_mem(sc); return (-1); } sc->context[i].vcxt = (union cdu_context *)sc->context[i].vcxt_dma.vaddr; allocated += sc->context[i].size; } bxe_alloc_ilt_lines_mem(sc); BLOGD(sc, DBG_LOAD, "ilt=%p start_line=%u lines=%p\n", sc->ilt, sc->ilt->start_line, sc->ilt->lines); { for (i = 0; i < 4; i++) { BLOGD(sc, DBG_LOAD, "c%d page_size=%u start=%u end=%u num=%u flags=0x%x\n", i, sc->ilt->clients[i].page_size, sc->ilt->clients[i].start, sc->ilt->clients[i].end, sc->ilt->clients[i].client_num, sc->ilt->clients[i].flags); } } if (ecore_ilt_mem_op(sc, ILT_MEMOP_ALLOC)) { BLOGE(sc, "ecore_ilt_mem_op ILT_MEMOP_ALLOC failed\n"); bxe_free_mem(sc); return (-1); } #if 0 if (bxe_iov_alloc_mem(sc)) { BLOGE(sc, "Failed to allocate memory for SRIOV\n"); bxe_free_mem(sc); return (-1); } #endif return (0); } static void bxe_free_rx_bd_chain(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i; sc = fp->sc; if (fp->rx_mbuf_tag == NULL) { return; } /* free all mbufs and unload all maps */ for (i = 0; i < RX_BD_TOTAL; i++) { if (fp->rx_mbuf_chain[i].m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, fp->rx_mbuf_chain[i].m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_chain[i].m_map); } if (fp->rx_mbuf_chain[i].m != NULL) { m_freem(fp->rx_mbuf_chain[i].m); fp->rx_mbuf_chain[i].m = NULL; fp->eth_q_stats.mbuf_alloc_rx--; } } } static void bxe_free_tpa_pool(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i, max_agg_queues; sc = fp->sc; if (fp->rx_mbuf_tag == NULL) { return; } max_agg_queues = MAX_AGG_QS(sc); /* release all mbufs and unload all DMA maps in the TPA pool */ for (i = 0; i < max_agg_queues; i++) { if (fp->rx_tpa_info[i].bd.m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, fp->rx_tpa_info[i].bd.m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info[i].bd.m_map); } if (fp->rx_tpa_info[i].bd.m != NULL) { m_freem(fp->rx_tpa_info[i].bd.m); fp->rx_tpa_info[i].bd.m = NULL; fp->eth_q_stats.mbuf_alloc_tpa--; } } } static void bxe_free_sge_chain(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i; sc = fp->sc; if (fp->rx_sge_mbuf_tag == NULL) { return; } /* rree all mbufs and unload all maps */ for (i = 0; i < RX_SGE_TOTAL; i++) { if (fp->rx_sge_mbuf_chain[i].m_map != NULL) { bus_dmamap_sync(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[i].m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[i].m_map); } if (fp->rx_sge_mbuf_chain[i].m != NULL) { m_freem(fp->rx_sge_mbuf_chain[i].m); fp->rx_sge_mbuf_chain[i].m = NULL; fp->eth_q_stats.mbuf_alloc_sge--; } } } static void bxe_free_fp_buffers(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; #if __FreeBSD_version >= 800000 if (fp->tx_br != NULL) { struct mbuf *m; /* just in case bxe_mq_flush() wasn't called */ while ((m = buf_ring_dequeue_sc(fp->tx_br)) != NULL) { m_freem(m); } buf_ring_free(fp->tx_br, M_DEVBUF); fp->tx_br = NULL; } #endif /* free all RX buffers */ bxe_free_rx_bd_chain(fp); bxe_free_tpa_pool(fp); bxe_free_sge_chain(fp); if (fp->eth_q_stats.mbuf_alloc_rx != 0) { BLOGE(sc, "failed to claim all rx mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_rx); } if (fp->eth_q_stats.mbuf_alloc_sge != 0) { BLOGE(sc, "failed to claim all sge mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_sge); } if (fp->eth_q_stats.mbuf_alloc_tpa != 0) { BLOGE(sc, "failed to claim all sge mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_tpa); } if (fp->eth_q_stats.mbuf_alloc_tx != 0) { BLOGE(sc, "failed to release tx mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_tx); } /* XXX verify all mbufs were reclaimed */ if (mtx_initialized(&fp->tx_mtx)) { mtx_destroy(&fp->tx_mtx); } if (mtx_initialized(&fp->rx_mtx)) { mtx_destroy(&fp->rx_mtx); } } } static int bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *fp, uint16_t prev_index, uint16_t index) { struct bxe_sw_rx_bd *rx_buf; struct eth_rx_bd *rx_bd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs, rc; rc = 0; /* allocate the new RX BD mbuf */ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, fp->mbuf_alloc_size); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_bd_alloc_failed++; return (ENOBUFS); } fp->eth_q_stats.mbuf_alloc_rx++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = fp->rx_buf_size; /* map the mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_bd_mapping_failed++; m_freem(m); fp->eth_q_stats.mbuf_alloc_rx--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); /* release any existing RX BD mbuf mappings */ if (prev_index != index) { rx_buf = &fp->rx_mbuf_chain[prev_index]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* * We only get here from bxe_rxeof() when the maximum number * of rx buffers is less than RX_BD_USABLE. bxe_rxeof() already * holds the mbuf in the prev_index so it's OK to NULL it out * here without concern of a memory leak. */ fp->rx_mbuf_chain[prev_index].m = NULL; } rx_buf = &fp->rx_mbuf_chain[index]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* save the mbuf and mapping info for a future packet */ map = (prev_index != index) ? fp->rx_mbuf_chain[prev_index].m_map : rx_buf->m_map; rx_buf->m_map = fp->rx_mbuf_spare_map; fp->rx_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_PREREAD); rx_buf->m = m; rx_bd = &fp->rx_chain[index]; rx_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); rx_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); return (rc); } static int bxe_alloc_rx_tpa_mbuf(struct bxe_fastpath *fp, int queue) { struct bxe_sw_tpa_info *tpa_info = &fp->rx_tpa_info[queue]; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs; int rc = 0; /* allocate the new TPA mbuf */ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, fp->mbuf_alloc_size); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_tpa_alloc_failed++; return (ENOBUFS); } fp->eth_q_stats.mbuf_alloc_tpa++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = fp->rx_buf_size; /* map the mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_tpa_mapping_failed++; m_free(m); fp->eth_q_stats.mbuf_alloc_tpa--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); /* release any existing TPA mbuf mapping */ if (tpa_info->bd.m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, tpa_info->bd.m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, tpa_info->bd.m_map); } /* save the mbuf and mapping info for the TPA mbuf */ map = tpa_info->bd.m_map; tpa_info->bd.m_map = fp->rx_tpa_info_mbuf_spare_map; fp->rx_tpa_info_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_mbuf_tag, tpa_info->bd.m_map, BUS_DMASYNC_PREREAD); tpa_info->bd.m = m; tpa_info->seg = segs[0]; return (rc); } /* * Allocate an mbuf and assign it to the receive scatter gather chain. The * caller must take care to save a copy of the existing mbuf in the SG mbuf * chain. */ static int bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *fp, uint16_t index) { struct bxe_sw_rx_bd *sge_buf; struct eth_rx_sge *sge; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs; int rc = 0; /* allocate a new SGE mbuf */ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, SGE_PAGE_SIZE); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_sge_alloc_failed++; return (ENOMEM); } fp->eth_q_stats.mbuf_alloc_sge++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = SGE_PAGE_SIZE; /* map the SGE mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_sge_mapping_failed++; m_freem(m); fp->eth_q_stats.mbuf_alloc_sge--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); sge_buf = &fp->rx_sge_mbuf_chain[index]; /* release any existing SGE mbuf mapping */ if (sge_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_sge_mbuf_tag, sge_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_sge_mbuf_tag, sge_buf->m_map); } /* save the mbuf and mapping info for a future packet */ map = sge_buf->m_map; sge_buf->m_map = fp->rx_sge_mbuf_spare_map; fp->rx_sge_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_sge_mbuf_tag, sge_buf->m_map, BUS_DMASYNC_PREREAD); sge_buf->m = m; sge = &fp->rx_sge_chain[index]; sge->addr_hi = htole32(U64_HI(segs[0].ds_addr)); sge->addr_lo = htole32(U64_LO(segs[0].ds_addr)); return (rc); } static __noinline int bxe_alloc_fp_buffers(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i, j, rc = 0; int ring_prod, cqe_ring_prod; int max_agg_queues; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; #if __FreeBSD_version >= 800000 fp->tx_br = buf_ring_alloc(BXE_BR_SIZE, M_DEVBUF, M_NOWAIT, &fp->tx_mtx); if (fp->tx_br == NULL) { BLOGE(sc, "buf_ring alloc fail for fp[%02d]\n", i); goto bxe_alloc_fp_buffers_error; } #endif ring_prod = cqe_ring_prod = 0; fp->rx_bd_cons = 0; fp->rx_cq_cons = 0; /* allocate buffers for the RX BDs in RX BD chain */ for (j = 0; j < sc->max_rx_bufs; j++) { rc = bxe_alloc_rx_bd_mbuf(fp, ring_prod, ring_prod); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] rx chain (%d)\n", i, rc); goto bxe_alloc_fp_buffers_error; } ring_prod = RX_BD_NEXT(ring_prod); cqe_ring_prod = RCQ_NEXT(cqe_ring_prod); } fp->rx_bd_prod = ring_prod; fp->rx_cq_prod = cqe_ring_prod; fp->eth_q_stats.rx_calls = fp->eth_q_stats.rx_pkts = 0; if (sc->ifnet->if_capenable & IFCAP_LRO) { max_agg_queues = MAX_AGG_QS(sc); fp->tpa_enable = TRUE; /* fill the TPA pool */ for (j = 0; j < max_agg_queues; j++) { rc = bxe_alloc_rx_tpa_mbuf(fp, j); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] TPA queue %d\n", i, j); fp->tpa_enable = FALSE; goto bxe_alloc_fp_buffers_error; } fp->rx_tpa_info[j].state = BXE_TPA_STATE_STOP; } if (fp->tpa_enable) { /* fill the RX SGE chain */ ring_prod = 0; for (j = 0; j < RX_SGE_USABLE; j++) { rc = bxe_alloc_rx_sge_mbuf(fp, ring_prod); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] SGE %d\n", i, ring_prod); fp->tpa_enable = FALSE; ring_prod = 0; goto bxe_alloc_fp_buffers_error; } ring_prod = RX_SGE_NEXT(ring_prod); } fp->rx_sge_prod = ring_prod; } } } return (0); bxe_alloc_fp_buffers_error: /* unwind what was already allocated */ bxe_free_rx_bd_chain(fp); bxe_free_tpa_pool(fp); bxe_free_sge_chain(fp); return (ENOBUFS); } static void bxe_free_fw_stats_mem(struct bxe_softc *sc) { bxe_dma_free(sc, &sc->fw_stats_dma); sc->fw_stats_num = 0; sc->fw_stats_req_size = 0; sc->fw_stats_req = NULL; sc->fw_stats_req_mapping = 0; sc->fw_stats_data_size = 0; sc->fw_stats_data = NULL; sc->fw_stats_data_mapping = 0; } static int bxe_alloc_fw_stats_mem(struct bxe_softc *sc) { uint8_t num_queue_stats; int num_groups; /* number of queues for statistics is number of eth queues */ num_queue_stats = BXE_NUM_ETH_QUEUES(sc); /* * Total number of FW statistics requests = * 1 for port stats + 1 for PF stats + num of queues */ sc->fw_stats_num = (2 + num_queue_stats); /* * Request is built from stats_query_header and an array of * stats_query_cmd_group each of which contains STATS_QUERY_CMD_COUNT * rules. The real number or requests is configured in the * stats_query_header. */ num_groups = ((sc->fw_stats_num / STATS_QUERY_CMD_COUNT) + ((sc->fw_stats_num % STATS_QUERY_CMD_COUNT) ? 1 : 0)); BLOGD(sc, DBG_LOAD, "stats fw_stats_num %d num_groups %d\n", sc->fw_stats_num, num_groups); sc->fw_stats_req_size = (sizeof(struct stats_query_header) + (num_groups * sizeof(struct stats_query_cmd_group))); /* * Data for statistics requests + stats_counter. * stats_counter holds per-STORM counters that are incremented when * STORM has finished with the current request. Memory for FCoE * offloaded statistics are counted anyway, even if they will not be sent. * VF stats are not accounted for here as the data of VF stats is stored * in memory allocated by the VF, not here. */ sc->fw_stats_data_size = (sizeof(struct stats_counter) + sizeof(struct per_port_stats) + sizeof(struct per_pf_stats) + /* sizeof(struct fcoe_statistics_params) + */ (sizeof(struct per_queue_stats) * num_queue_stats)); if (bxe_dma_alloc(sc, (sc->fw_stats_req_size + sc->fw_stats_data_size), &sc->fw_stats_dma, "fw stats") != 0) { bxe_free_fw_stats_mem(sc); return (-1); } /* set up the shortcuts */ sc->fw_stats_req = (struct bxe_fw_stats_req *)sc->fw_stats_dma.vaddr; sc->fw_stats_req_mapping = sc->fw_stats_dma.paddr; sc->fw_stats_data = (struct bxe_fw_stats_data *)((uint8_t *)sc->fw_stats_dma.vaddr + sc->fw_stats_req_size); sc->fw_stats_data_mapping = (sc->fw_stats_dma.paddr + sc->fw_stats_req_size); BLOGD(sc, DBG_LOAD, "statistics request base address set to %#jx\n", (uintmax_t)sc->fw_stats_req_mapping); BLOGD(sc, DBG_LOAD, "statistics data base address set to %#jx\n", (uintmax_t)sc->fw_stats_data_mapping); return (0); } /* * Bits map: * 0-7 - Engine0 load counter. * 8-15 - Engine1 load counter. * 16 - Engine0 RESET_IN_PROGRESS bit. * 17 - Engine1 RESET_IN_PROGRESS bit. * 18 - Engine0 ONE_IS_LOADED. Set when there is at least one active * function on the engine * 19 - Engine1 ONE_IS_LOADED. * 20 - Chip reset flow bit. When set none-leader must wait for both engines * leader to complete (check for both RESET_IN_PROGRESS bits and not * for just the one belonging to its engine). */ #define BXE_RECOVERY_GLOB_REG MISC_REG_GENERIC_POR_1 #define BXE_PATH0_LOAD_CNT_MASK 0x000000ff #define BXE_PATH0_LOAD_CNT_SHIFT 0 #define BXE_PATH1_LOAD_CNT_MASK 0x0000ff00 #define BXE_PATH1_LOAD_CNT_SHIFT 8 #define BXE_PATH0_RST_IN_PROG_BIT 0x00010000 #define BXE_PATH1_RST_IN_PROG_BIT 0x00020000 #define BXE_GLOBAL_RESET_BIT 0x00040000 /* set the GLOBAL_RESET bit, should be run under rtnl lock */ static void bxe_set_reset_global(struct bxe_softc *sc) { uint32_t val; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val | BXE_GLOBAL_RESET_BIT); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* clear the GLOBAL_RESET bit, should be run under rtnl lock */ static void bxe_clear_reset_global(struct bxe_softc *sc) { uint32_t val; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val & (~BXE_GLOBAL_RESET_BIT)); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* checks the GLOBAL_RESET bit, should be run under rtnl lock */ static uint8_t bxe_reset_is_global(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "GLOB_REG=0x%08x\n", val); return (val & BXE_GLOBAL_RESET_BIT) ? TRUE : FALSE; } /* clear RESET_IN_PROGRESS bit for the engine, should be run under rtnl lock */ static void bxe_set_reset_done(struct bxe_softc *sc) { uint32_t val; uint32_t bit = SC_PATH(sc) ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); /* Clear the bit */ val &= ~bit; REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* set RESET_IN_PROGRESS for the engine, should be run under rtnl lock */ static void bxe_set_reset_in_progress(struct bxe_softc *sc) { uint32_t val; uint32_t bit = SC_PATH(sc) ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); /* Set the bit */ val |= bit; REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* check RESET_IN_PROGRESS bit for an engine, should be run under rtnl lock */ static uint8_t bxe_reset_is_done(struct bxe_softc *sc, int engine) { uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); uint32_t bit = engine ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; /* return false if bit is set */ return (val & bit) ? FALSE : TRUE; } /* get the load status for an engine, should be run under rtnl lock */ static uint8_t bxe_get_load_status(struct bxe_softc *sc, int engine) { uint32_t mask = engine ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = engine ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old value for GLOB_REG=0x%08x\n", val); val = ((val & mask) >> shift); BLOGD(sc, DBG_LOAD, "Load mask engine %d = 0x%08x\n", engine, val); return (val != 0); } /* set pf load mark */ /* XXX needs to be under rtnl lock */ static void bxe_set_pf_load(struct bxe_softc *sc) { uint32_t val; uint32_t val1; uint32_t mask = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old value for GLOB_REG=0x%08x\n", val); /* get the current counter value */ val1 = ((val & mask) >> shift); /* set bit of this PF */ val1 |= (1 << SC_ABS_FUNC(sc)); /* clear the old value */ val &= ~mask; /* set the new one */ val |= ((val1 << shift) & mask); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* clear pf load mark */ /* XXX needs to be under rtnl lock */ static uint8_t bxe_clear_pf_load(struct bxe_softc *sc) { uint32_t val1, val; uint32_t mask = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old GEN_REG_VAL=0x%08x\n", val); /* get the current counter value */ val1 = (val & mask) >> shift; /* clear bit of that PF */ val1 &= ~(1 << SC_ABS_FUNC(sc)); /* clear the old value */ val &= ~mask; /* set the new one */ val |= ((val1 << shift) & mask); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); return (val1 != 0); } /* send load requrest to mcp and analyze response */ static int bxe_nic_load_request(struct bxe_softc *sc, uint32_t *load_code) { /* init fw_seq */ sc->fw_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); BLOGD(sc, DBG_LOAD, "initial fw_seq 0x%04x\n", sc->fw_seq); /* get the current FW pulse sequence */ sc->fw_drv_pulse_wr_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_pulse_mb) & DRV_PULSE_SEQ_MASK); BLOGD(sc, DBG_LOAD, "initial drv_pulse 0x%04x\n", sc->fw_drv_pulse_wr_seq); /* load request */ (*load_code) = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_REQ, DRV_MSG_CODE_LOAD_REQ_WITH_LFA); /* if the MCP fails to respond we must abort */ if (!(*load_code)) { BLOGE(sc, "MCP response failure!\n"); return (-1); } /* if MCP refused then must abort */ if ((*load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED) { BLOGE(sc, "MCP refused load request\n"); return (-1); } return (0); } /* * Check whether another PF has already loaded FW to chip. In virtualized * environments a pf from anoth VM may have already initialized the device * including loading FW. */ static int bxe_nic_load_analyze_req(struct bxe_softc *sc, uint32_t load_code) { uint32_t my_fw, loaded_fw; /* is another pf loaded on this engine? */ if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) && (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) { /* build my FW version dword */ my_fw = (BCM_5710_FW_MAJOR_VERSION + (BCM_5710_FW_MINOR_VERSION << 8 ) + (BCM_5710_FW_REVISION_VERSION << 16) + (BCM_5710_FW_ENGINEERING_VERSION << 24)); /* read loaded FW from chip */ loaded_fw = REG_RD(sc, XSEM_REG_PRAM); BLOGD(sc, DBG_LOAD, "loaded FW 0x%08x / my FW 0x%08x\n", loaded_fw, my_fw); /* abort nic load if version mismatch */ if (my_fw != loaded_fw) { BLOGE(sc, "FW 0x%08x already loaded (mine is 0x%08x)", loaded_fw, my_fw); return (-1); } } return (0); } /* mark PMF if applicable */ static void bxe_nic_load_pmf(struct bxe_softc *sc, uint32_t load_code) { uint32_t ncsi_oem_data_addr; if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) || (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) || (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) { /* * Barrier here for ordering between the writing to sc->port.pmf here * and reading it from the periodic task. */ sc->port.pmf = 1; mb(); } else { sc->port.pmf = 0; } BLOGD(sc, DBG_LOAD, "pmf %d\n", sc->port.pmf); /* XXX needed? */ if (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) { if (SHMEM2_HAS(sc, ncsi_oem_data_addr)) { ncsi_oem_data_addr = SHMEM2_RD(sc, ncsi_oem_data_addr); if (ncsi_oem_data_addr) { REG_WR(sc, (ncsi_oem_data_addr + offsetof(struct glob_ncsi_oem_data, driver_version)), 0); } } } } static void bxe_read_mf_cfg(struct bxe_softc *sc) { int n = (CHIP_IS_MODE_4_PORT(sc) ? 2 : 1); int abs_func; int vn; if (BXE_NOMCP(sc)) { return; /* what should be the default bvalue in this case */ } /* * The formula for computing the absolute function number is... * For 2 port configuration (4 functions per port): * abs_func = 2 * vn + SC_PORT + SC_PATH * For 4 port configuration (2 functions per port): * abs_func = 4 * vn + 2 * SC_PORT + SC_PATH */ for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { abs_func = (n * (2 * vn + SC_PORT(sc)) + SC_PATH(sc)); if (abs_func >= E1H_FUNC_MAX) { break; } sc->devinfo.mf_info.mf_config[vn] = MFCFG_RD(sc, func_mf_config[abs_func].config); } if (sc->devinfo.mf_info.mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) { BLOGD(sc, DBG_LOAD, "mf_cfg function disabled\n"); sc->flags |= BXE_MF_FUNC_DIS; } else { BLOGD(sc, DBG_LOAD, "mf_cfg function enabled\n"); sc->flags &= ~BXE_MF_FUNC_DIS; } } /* acquire split MCP access lock register */ static int bxe_acquire_alr(struct bxe_softc *sc) { uint32_t j, val; for (j = 0; j < 1000; j++) { val = (1UL << 31); REG_WR(sc, GRCBASE_MCP + 0x9c, val); val = REG_RD(sc, GRCBASE_MCP + 0x9c); if (val & (1L << 31)) break; DELAY(5000); } if (!(val & (1L << 31))) { BLOGE(sc, "Cannot acquire MCP access lock register\n"); return (-1); } return (0); } /* release split MCP access lock register */ static void bxe_release_alr(struct bxe_softc *sc) { REG_WR(sc, GRCBASE_MCP + 0x9c, 0); } static void bxe_fan_failure(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t ext_phy_config; /* mark the failure */ ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); ext_phy_config &= ~PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK; ext_phy_config |= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE; SHMEM_WR(sc, dev_info.port_hw_config[port].external_phy_config, ext_phy_config); /* log the failure */ BLOGW(sc, "Fan Failure has caused the driver to shutdown " "the card to prevent permanent damage. " "Please contact OEM Support for assistance\n"); /* XXX */ #if 1 bxe_panic(sc, ("Schedule task to handle fan failure\n")); #else /* * Schedule device reset (unload) * This is due to some boards consuming sufficient power when driver is * up to overheat if fan fails. */ bxe_set_bit(BXE_SP_RTNL_FAN_FAILURE, &sc->sp_rtnl_state); schedule_delayed_work(&sc->sp_rtnl_task, 0); #endif } /* this function is called upon a link interrupt */ static void bxe_link_attn(struct bxe_softc *sc) { uint32_t pause_enabled = 0; struct host_port_stats *pstats; int cmng_fns; /* Make sure that we are synced with the current statistics */ bxe_stats_handle(sc, STATS_EVENT_STOP); elink_link_update(&sc->link_params, &sc->link_vars); if (sc->link_vars.link_up) { /* dropless flow control */ if (!CHIP_IS_E1(sc) && sc->dropless_fc) { pause_enabled = 0; if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) { pause_enabled = 1; } REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_ETH_PAUSE_ENABLED_OFFSET(SC_PORT(sc))), pause_enabled); } if (sc->link_vars.mac_type != ELINK_MAC_TYPE_EMAC) { pstats = BXE_SP(sc, port_stats); /* reset old mac stats */ memset(&(pstats->mac_stx[0]), 0, sizeof(struct mac_stx)); } if (sc->state == BXE_STATE_OPEN) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } } if (sc->link_vars.link_up && sc->link_vars.line_speed) { cmng_fns = bxe_get_cmng_fns_mode(sc); if (cmng_fns != CMNG_FNS_NONE) { bxe_cmng_fns_init(sc, FALSE, cmng_fns); storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } else { /* rate shaping and fairness are disabled */ BLOGD(sc, DBG_LOAD, "single function mode without fairness\n"); } } bxe_link_report_locked(sc); if (IS_MF(sc)) { ; // XXX bxe_link_sync_notify(sc); } } static void bxe_attn_int_asserted(struct bxe_softc *sc, uint32_t asserted) { int port = SC_PORT(sc); uint32_t aeu_addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; uint32_t nig_int_mask_addr = port ? NIG_REG_MASK_INTERRUPT_PORT1 : NIG_REG_MASK_INTERRUPT_PORT0; uint32_t aeu_mask; uint32_t nig_mask = 0; uint32_t reg_addr; uint32_t igu_acked; uint32_t cnt; if (sc->attn_state & asserted) { BLOGE(sc, "IGU ERROR attn=0x%08x\n", asserted); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); aeu_mask = REG_RD(sc, aeu_addr); BLOGD(sc, DBG_INTR, "aeu_mask 0x%08x newly asserted 0x%08x\n", aeu_mask, asserted); aeu_mask &= ~(asserted & 0x3ff); BLOGD(sc, DBG_INTR, "new mask 0x%08x\n", aeu_mask); REG_WR(sc, aeu_addr, aeu_mask); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); BLOGD(sc, DBG_INTR, "attn_state 0x%08x\n", sc->attn_state); sc->attn_state |= asserted; BLOGD(sc, DBG_INTR, "new state 0x%08x\n", sc->attn_state); if (asserted & ATTN_HARD_WIRED_MASK) { if (asserted & ATTN_NIG_FOR_FUNC) { BXE_PHY_LOCK(sc); /* save nig interrupt mask */ nig_mask = REG_RD(sc, nig_int_mask_addr); /* If nig_mask is not set, no need to call the update function */ if (nig_mask) { REG_WR(sc, nig_int_mask_addr, 0); bxe_link_attn(sc); } /* handle unicore attn? */ } if (asserted & ATTN_SW_TIMER_4_FUNC) { BLOGD(sc, DBG_INTR, "ATTN_SW_TIMER_4_FUNC!\n"); } if (asserted & GPIO_2_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_2_FUNC!\n"); } if (asserted & GPIO_3_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_3_FUNC!\n"); } if (asserted & GPIO_4_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_4_FUNC!\n"); } if (port == 0) { if (asserted & ATTN_GENERAL_ATTN_1) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_1!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_1, 0x0); } if (asserted & ATTN_GENERAL_ATTN_2) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_2!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_2, 0x0); } if (asserted & ATTN_GENERAL_ATTN_3) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_3!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_3, 0x0); } } else { if (asserted & ATTN_GENERAL_ATTN_4) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_4!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_4, 0x0); } if (asserted & ATTN_GENERAL_ATTN_5) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_5!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_5, 0x0); } if (asserted & ATTN_GENERAL_ATTN_6) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_6!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_6, 0x0); } } } /* hardwired */ if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_addr = (HC_REG_COMMAND_REG + port*32 + COMMAND_REG_ATTN_BITS_SET); } else { reg_addr = (BAR_IGU_INTMEM + IGU_CMD_ATTN_BIT_SET_UPPER*8); } BLOGD(sc, DBG_INTR, "about to mask 0x%08x at %s addr 0x%08x\n", asserted, (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU", reg_addr); REG_WR(sc, reg_addr, asserted); /* now set back the mask */ if (asserted & ATTN_NIG_FOR_FUNC) { /* * Verify that IGU ack through BAR was written before restoring * NIG mask. This loop should exit after 2-3 iterations max. */ if (sc->devinfo.int_block != INT_BLOCK_HC) { cnt = 0; do { igu_acked = REG_RD(sc, IGU_REG_ATTENTION_ACK_BITS); } while (((igu_acked & ATTN_NIG_FOR_FUNC) == 0) && (++cnt < MAX_IGU_ATTN_ACK_TO)); if (!igu_acked) { BLOGE(sc, "Failed to verify IGU ack on time\n"); } mb(); } REG_WR(sc, nig_int_mask_addr, nig_mask); BXE_PHY_UNLOCK(sc); } } static void bxe_print_next_block(struct bxe_softc *sc, int idx, const char *blk) { BLOGI(sc, "%s%s", idx ? ", " : "", blk); } static int bxe_check_blocks_with_parity0(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "BRB"); break; case AEU_INPUTS_ATTN_BITS_PARSER_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PARSER"); break; case AEU_INPUTS_ATTN_BITS_TSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TSDM"); break; case AEU_INPUTS_ATTN_BITS_SEARCHER_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "SEARCHER"); break; case AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TCM"); break; case AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TSEMI"); break; case AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XPB"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity1(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t *global, uint8_t print) { int i = 0; uint32_t cur_bit = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_PBF_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PBF"); break; case AEU_INPUTS_ATTN_BITS_QM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "QM"); break; case AEU_INPUTS_ATTN_BITS_TIMERS_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TM"); break; case AEU_INPUTS_ATTN_BITS_XSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XSDM"); break; case AEU_INPUTS_ATTN_BITS_XCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XCM"); break; case AEU_INPUTS_ATTN_BITS_XSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XSEMI"); break; case AEU_INPUTS_ATTN_BITS_DOORBELLQ_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DOORBELLQ"); break; case AEU_INPUTS_ATTN_BITS_NIG_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "NIG"); break; case AEU_INPUTS_ATTN_BITS_VAUX_PCI_CORE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "VAUX PCI CORE"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_DEBUG_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DEBUG"); break; case AEU_INPUTS_ATTN_BITS_USDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "USDM"); break; case AEU_INPUTS_ATTN_BITS_UCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "UCM"); break; case AEU_INPUTS_ATTN_BITS_USEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "USEMI"); break; case AEU_INPUTS_ATTN_BITS_UPB_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "UPB"); break; case AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CSDM"); break; case AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CCM"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity2(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_CSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CSEMI"); break; case AEU_INPUTS_ATTN_BITS_PXP_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PXP"); break; case AEU_IN_ATTN_BITS_PXPPCICLOCKCLIENT_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PXPPCICLOCKCLIENT"); break; case AEU_INPUTS_ATTN_BITS_CFC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CFC"); break; case AEU_INPUTS_ATTN_BITS_CDU_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CDU"); break; case AEU_INPUTS_ATTN_BITS_DMAE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DMAE"); break; case AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "IGU"); break; case AEU_INPUTS_ATTN_BITS_MISC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "MISC"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity3(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t *global, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP ROM"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP UMP RX"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP UMP TX"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP SCPAD"); *global = TRUE; break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity4(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PGLUE_B"); break; case AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "ATC"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static uint8_t bxe_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print, uint32_t *sig) { int par_num = 0; if ((sig[0] & HW_PRTY_ASSERT_SET_0) || (sig[1] & HW_PRTY_ASSERT_SET_1) || (sig[2] & HW_PRTY_ASSERT_SET_2) || (sig[3] & HW_PRTY_ASSERT_SET_3) || (sig[4] & HW_PRTY_ASSERT_SET_4)) { BLOGE(sc, "Parity error: HW block parity attention:\n" "[0]:0x%08x [1]:0x%08x [2]:0x%08x [3]:0x%08x [4]:0x%08x\n", (uint32_t)(sig[0] & HW_PRTY_ASSERT_SET_0), (uint32_t)(sig[1] & HW_PRTY_ASSERT_SET_1), (uint32_t)(sig[2] & HW_PRTY_ASSERT_SET_2), (uint32_t)(sig[3] & HW_PRTY_ASSERT_SET_3), (uint32_t)(sig[4] & HW_PRTY_ASSERT_SET_4)); if (print) BLOGI(sc, "Parity errors detected in blocks: "); par_num = bxe_check_blocks_with_parity0(sc, sig[0] & HW_PRTY_ASSERT_SET_0, par_num, print); par_num = bxe_check_blocks_with_parity1(sc, sig[1] & HW_PRTY_ASSERT_SET_1, par_num, global, print); par_num = bxe_check_blocks_with_parity2(sc, sig[2] & HW_PRTY_ASSERT_SET_2, par_num, print); par_num = bxe_check_blocks_with_parity3(sc, sig[3] & HW_PRTY_ASSERT_SET_3, par_num, global, print); par_num = bxe_check_blocks_with_parity4(sc, sig[4] & HW_PRTY_ASSERT_SET_4, par_num, print); if (print) BLOGI(sc, "\n"); return (TRUE); } return (FALSE); } static uint8_t bxe_chk_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print) { struct attn_route attn = { {0} }; int port = SC_PORT(sc); attn.sig[0] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + port*4); attn.sig[1] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_2_FUNC_0 + port*4); attn.sig[2] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_3_FUNC_0 + port*4); attn.sig[3] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 + port*4); if (!CHIP_IS_E1x(sc)) attn.sig[4] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_5_FUNC_0 + port*4); return (bxe_parity_attn(sc, global, print, attn.sig)); } static void bxe_attn_int_deasserted4(struct bxe_softc *sc, uint32_t attn) { uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_PGLUE_HW_INTERRUPT) { val = REG_RD(sc, PGLUE_B_REG_PGLUE_B_INT_STS_CLR); BLOGE(sc, "PGLUE hw attention 0x%08x\n", val); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW\n"); } if (attn & AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT) { val = REG_RD(sc, ATC_REG_ATC_INT_STS_CLR); BLOGE(sc, "ATC hw attention 0x%08x\n", val); if (val & ATC_ATC_INT_STS_REG_ADDRESS_ERROR) BLOGE(sc, "ATC_ATC_INT_STS_REG_ADDRESS_ERROR\n"); if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND\n"); if (val & ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS\n"); if (val & ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT\n"); if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR\n"); if (val & ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU\n"); } if (attn & (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR)) { BLOGE(sc, "FATAL parity attention set4 0x%08x\n", (uint32_t)(attn & (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR))); } } static void bxe_e1h_disable(struct bxe_softc *sc) { int port = SC_PORT(sc); bxe_tx_disable(sc); REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 0); } static void bxe_e1h_enable(struct bxe_softc *sc) { int port = SC_PORT(sc); REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 1); // XXX bxe_tx_enable(sc); } /* * called due to MCP event (on pmf): * reread new bandwidth configuration * configure FW * notify others function about the change */ static void bxe_config_mf_bw(struct bxe_softc *sc) { if (sc->link_vars.link_up) { bxe_cmng_fns_init(sc, TRUE, CMNG_FNS_MINMAX); // XXX bxe_link_sync_notify(sc); } storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } static void bxe_set_mf_bw(struct bxe_softc *sc) { bxe_config_mf_bw(sc); bxe_fw_command(sc, DRV_MSG_CODE_SET_MF_BW_ACK, 0); } static void bxe_handle_eee_event(struct bxe_softc *sc) { BLOGD(sc, DBG_INTR, "EEE - LLDP event\n"); bxe_fw_command(sc, DRV_MSG_CODE_EEE_RESULTS_ACK, 0); } #define DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED 3 static void bxe_drv_info_ether_stat(struct bxe_softc *sc) { struct eth_stats_info *ether_stat = &sc->sp->drv_info_to_mcp.ether_stat; strlcpy(ether_stat->version, BXE_DRIVER_VERSION, ETH_STAT_INFO_VERSION_LEN); /* XXX (+ MAC_PAD) taken from other driver... verify this is right */ sc->sp_objs[0].mac_obj.get_n_elements(sc, &sc->sp_objs[0].mac_obj, DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED, ether_stat->mac_local + MAC_PAD, MAC_PAD, ETH_ALEN); ether_stat->mtu_size = sc->mtu; ether_stat->feature_flags |= FEATURE_ETH_CHKSUM_OFFLOAD_MASK; if (sc->ifnet->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) { ether_stat->feature_flags |= FEATURE_ETH_LSO_MASK; } // XXX ether_stat->feature_flags |= ???; ether_stat->promiscuous_mode = 0; // (flags & PROMISC) ? 1 : 0; ether_stat->txq_size = sc->tx_ring_size; ether_stat->rxq_size = sc->rx_ring_size; } static void bxe_handle_drv_info_req(struct bxe_softc *sc) { enum drv_info_opcode op_code; uint32_t drv_info_ctl = SHMEM2_RD(sc, drv_info_control); /* if drv_info version supported by MFW doesn't match - send NACK */ if ((drv_info_ctl & DRV_INFO_CONTROL_VER_MASK) != DRV_INFO_CUR_VER) { bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_NACK, 0); return; } op_code = ((drv_info_ctl & DRV_INFO_CONTROL_OP_CODE_MASK) >> DRV_INFO_CONTROL_OP_CODE_SHIFT); memset(&sc->sp->drv_info_to_mcp, 0, sizeof(union drv_info_to_mcp)); switch (op_code) { case ETH_STATS_OPCODE: bxe_drv_info_ether_stat(sc); break; case FCOE_STATS_OPCODE: case ISCSI_STATS_OPCODE: default: /* if op code isn't supported - send NACK */ bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_NACK, 0); return; } /* * If we got drv_info attn from MFW then these fields are defined in * shmem2 for sure */ SHMEM2_WR(sc, drv_info_host_addr_lo, U64_LO(BXE_SP_MAPPING(sc, drv_info_to_mcp))); SHMEM2_WR(sc, drv_info_host_addr_hi, U64_HI(BXE_SP_MAPPING(sc, drv_info_to_mcp))); bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_ACK, 0); } static void bxe_dcc_event(struct bxe_softc *sc, uint32_t dcc_event) { BLOGD(sc, DBG_INTR, "dcc_event 0x%08x\n", dcc_event); if (dcc_event & DRV_STATUS_DCC_DISABLE_ENABLE_PF) { /* * This is the only place besides the function initialization * where the sc->flags can change so it is done without any * locks */ if (sc->devinfo.mf_info.mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) { BLOGD(sc, DBG_INTR, "mf_cfg function disabled\n"); sc->flags |= BXE_MF_FUNC_DIS; bxe_e1h_disable(sc); } else { BLOGD(sc, DBG_INTR, "mf_cfg function enabled\n"); sc->flags &= ~BXE_MF_FUNC_DIS; bxe_e1h_enable(sc); } dcc_event &= ~DRV_STATUS_DCC_DISABLE_ENABLE_PF; } if (dcc_event & DRV_STATUS_DCC_BANDWIDTH_ALLOCATION) { bxe_config_mf_bw(sc); dcc_event &= ~DRV_STATUS_DCC_BANDWIDTH_ALLOCATION; } /* Report results to MCP */ if (dcc_event) bxe_fw_command(sc, DRV_MSG_CODE_DCC_FAILURE, 0); else bxe_fw_command(sc, DRV_MSG_CODE_DCC_OK, 0); } static void bxe_pmf_update(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t val; sc->port.pmf = 1; BLOGD(sc, DBG_INTR, "pmf %d\n", sc->port.pmf); /* * We need the mb() to ensure the ordering between the writing to * sc->port.pmf here and reading it from the bxe_periodic_task(). */ mb(); /* queue a periodic task */ // XXX schedule task... // XXX bxe_dcbx_pmf_update(sc); /* enable nig attention */ val = (0xff0f | (1 << (SC_VN(sc) + 4))); if (sc->devinfo.int_block == INT_BLOCK_HC) { REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, val); REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, val); } else if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, val); REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, val); } bxe_stats_handle(sc, STATS_EVENT_PMF); } static int bxe_mc_assert(struct bxe_softc *sc) { char last_idx; int i, rc = 0; uint32_t row0, row1, row2, row3; /* XSTORM */ last_idx = REG_RD8(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) BLOGE(sc, "XSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "XSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* TSTORM */ last_idx = REG_RD8(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "TSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "TSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* CSTORM */ last_idx = REG_RD8(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "CSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "CSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* USTORM */ last_idx = REG_RD8(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "USTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "USTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } return (rc); } static void bxe_attn_int_deasserted3(struct bxe_softc *sc, uint32_t attn) { int func = SC_FUNC(sc); uint32_t val; if (attn & EVEREST_GEN_ATTN_IN_USE_MASK) { if (attn & BXE_PMF_LINK_ASSERT(sc)) { REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); bxe_read_mf_cfg(sc); sc->devinfo.mf_info.mf_config[SC_VN(sc)] = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); val = SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_status); if (val & DRV_STATUS_DCC_EVENT_MASK) bxe_dcc_event(sc, (val & DRV_STATUS_DCC_EVENT_MASK)); if (val & DRV_STATUS_SET_MF_BW) bxe_set_mf_bw(sc); if (val & DRV_STATUS_DRV_INFO_REQ) bxe_handle_drv_info_req(sc); #if 0 if (val & DRV_STATUS_VF_DISABLED) bxe_vf_handle_flr_event(sc); #endif if ((sc->port.pmf == 0) && (val & DRV_STATUS_PMF)) bxe_pmf_update(sc); #if 0 if (sc->port.pmf && (val & DRV_STATUS_DCBX_NEGOTIATION_RESULTS) && (sc->dcbx_enabled > 0)) /* start dcbx state machine */ bxe_dcbx_set_params(sc, BXE_DCBX_STATE_NEG_RECEIVED); #endif #if 0 if (val & DRV_STATUS_AFEX_EVENT_MASK) bxe_handle_afex_cmd(sc, val & DRV_STATUS_AFEX_EVENT_MASK); #endif if (val & DRV_STATUS_EEE_NEGOTIATION_RESULTS) bxe_handle_eee_event(sc); if (sc->link_vars.periodic_flags & ELINK_PERIODIC_FLAGS_LINK_EVENT) { /* sync with link */ BXE_PHY_LOCK(sc); sc->link_vars.periodic_flags &= ~ELINK_PERIODIC_FLAGS_LINK_EVENT; BXE_PHY_UNLOCK(sc); if (IS_MF(sc)) ; // XXX bxe_link_sync_notify(sc); bxe_link_report(sc); } /* * Always call it here: bxe_link_report() will * prevent the link indication duplication. */ bxe_link_status_update(sc); } else if (attn & BXE_MC_ASSERT_BITS) { BLOGE(sc, "MC assert!\n"); bxe_mc_assert(sc); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_10, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_9, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_8, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_7, 0); bxe_panic(sc, ("MC assert!\n")); } else if (attn & BXE_MCP_ASSERT) { BLOGE(sc, "MCP assert!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_11, 0); // XXX bxe_fw_dump(sc); } else { BLOGE(sc, "Unknown HW assert! (attn 0x%08x)\n", attn); } } if (attn & EVEREST_LATCHED_ATTN_IN_USE_MASK) { BLOGE(sc, "LATCHED attention 0x%08x (masked)\n", attn); if (attn & BXE_GRC_TIMEOUT) { val = CHIP_IS_E1(sc) ? 0 : REG_RD(sc, MISC_REG_GRC_TIMEOUT_ATTN); BLOGE(sc, "GRC time-out 0x%08x\n", val); } if (attn & BXE_GRC_RSV) { val = CHIP_IS_E1(sc) ? 0 : REG_RD(sc, MISC_REG_GRC_RSV_ATTN); BLOGE(sc, "GRC reserved 0x%08x\n", val); } REG_WR(sc, MISC_REG_AEU_CLR_LATCH_SIGNAL, 0x7ff); } } static void bxe_attn_int_deasserted2(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val0, mask0, val1, mask1; uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_CFC_HW_INTERRUPT) { val = REG_RD(sc, CFC_REG_CFC_INT_STS_CLR); BLOGE(sc, "CFC hw attention 0x%08x\n", val); /* CFC error attention */ if (val & 0x2) { BLOGE(sc, "FATAL error from CFC\n"); } } if (attn & AEU_INPUTS_ATTN_BITS_PXP_HW_INTERRUPT) { val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_0); BLOGE(sc, "PXP hw attention-0 0x%08x\n", val); /* RQ_USDMDP_FIFO_OVERFLOW */ if (val & 0x18000) { BLOGE(sc, "FATAL error from PXP\n"); } if (!CHIP_IS_E1x(sc)) { val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_1); BLOGE(sc, "PXP hw attention-1 0x%08x\n", val); } } #define PXP2_EOP_ERROR_BIT PXP2_PXP2_INT_STS_CLR_0_REG_WR_PGLUE_EOP_ERROR #define AEU_PXP2_HW_INT_BIT AEU_INPUTS_ATTN_BITS_PXPPCICLOCKCLIENT_HW_INTERRUPT if (attn & AEU_PXP2_HW_INT_BIT) { /* CQ47854 workaround do not panic on * PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR */ if (!CHIP_IS_E1x(sc)) { mask0 = REG_RD(sc, PXP2_REG_PXP2_INT_MASK_0); val1 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_1); mask1 = REG_RD(sc, PXP2_REG_PXP2_INT_MASK_1); val0 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_0); /* * If the olny PXP2_EOP_ERROR_BIT is set in * STS0 and STS1 - clear it * * probably we lose additional attentions between * STS0 and STS_CLR0, in this case user will not * be notified about them */ if (val0 & mask0 & PXP2_EOP_ERROR_BIT && !(val1 & mask1)) val0 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_CLR_0); /* print the register, since no one can restore it */ BLOGE(sc, "PXP2_REG_PXP2_INT_STS_CLR_0 0x%08x\n", val0); /* * if PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR * then notify */ if (val0 & PXP2_EOP_ERROR_BIT) { BLOGE(sc, "PXP2_WR_PGLUE_EOP_ERROR\n"); /* * if only PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR is * set then clear attention from PXP2 block without panic */ if (((val0 & mask0) == PXP2_EOP_ERROR_BIT) && ((val1 & mask1) == 0)) attn &= ~AEU_PXP2_HW_INT_BIT; } } } if (attn & HW_INTERRUT_ASSERT_SET_2) { reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_2 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_2); val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_2); REG_WR(sc, reg_offset, val); BLOGE(sc, "FATAL HW block attention set2 0x%x\n", (uint32_t)(attn & HW_INTERRUT_ASSERT_SET_2)); bxe_panic(sc, ("HW block attention set2\n")); } } static void bxe_attn_int_deasserted1(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_DOORBELLQ_HW_INTERRUPT) { val = REG_RD(sc, DORQ_REG_DORQ_INT_STS_CLR); BLOGE(sc, "DB hw attention 0x%08x\n", val); /* DORQ discard attention */ if (val & 0x2) { BLOGE(sc, "FATAL error from DORQ\n"); } } if (attn & HW_INTERRUT_ASSERT_SET_1) { reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_1 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_1); val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_1); REG_WR(sc, reg_offset, val); BLOGE(sc, "FATAL HW block attention set1 0x%08x\n", (uint32_t)(attn & HW_INTERRUT_ASSERT_SET_1)); bxe_panic(sc, ("HW block attention set1\n")); } } static void bxe_attn_int_deasserted0(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val; reg_offset = (port) ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0; if (attn & AEU_INPUTS_ATTN_BITS_SPIO5) { val = REG_RD(sc, reg_offset); val &= ~AEU_INPUTS_ATTN_BITS_SPIO5; REG_WR(sc, reg_offset, val); BLOGW(sc, "SPIO5 hw attention\n"); /* Fan failure attention */ elink_hw_reset_phy(&sc->link_params); bxe_fan_failure(sc); } if ((attn & sc->link_vars.aeu_int_mask) && sc->port.pmf) { BXE_PHY_LOCK(sc); elink_handle_module_detect_int(&sc->link_params); BXE_PHY_UNLOCK(sc); } if (attn & HW_INTERRUT_ASSERT_SET_0) { val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_0); REG_WR(sc, reg_offset, val); bxe_panic(sc, ("FATAL HW block attention set0 0x%lx\n", (attn & HW_INTERRUT_ASSERT_SET_0))); } } static void bxe_attn_int_deasserted(struct bxe_softc *sc, uint32_t deasserted) { struct attn_route attn; struct attn_route *group_mask; int port = SC_PORT(sc); int index; uint32_t reg_addr; uint32_t val; uint32_t aeu_mask; uint8_t global = FALSE; /* * Need to take HW lock because MCP or other port might also * try to handle this event. */ bxe_acquire_alr(sc); if (bxe_chk_parity_attn(sc, &global, TRUE)) { /* XXX * In case of parity errors don't handle attentions so that * other function would "see" parity errors. */ sc->recovery_state = BXE_RECOVERY_INIT; // XXX schedule a recovery task... /* disable HW interrupts */ bxe_int_disable(sc); bxe_release_alr(sc); return; } attn.sig[0] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + port*4); attn.sig[1] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_2_FUNC_0 + port*4); attn.sig[2] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_3_FUNC_0 + port*4); attn.sig[3] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 + port*4); if (!CHIP_IS_E1x(sc)) { attn.sig[4] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_5_FUNC_0 + port*4); } else { attn.sig[4] = 0; } BLOGD(sc, DBG_INTR, "attn: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", attn.sig[0], attn.sig[1], attn.sig[2], attn.sig[3], attn.sig[4]); for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) { if (deasserted & (1 << index)) { group_mask = &sc->attn_group[index]; BLOGD(sc, DBG_INTR, "group[%d]: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", index, group_mask->sig[0], group_mask->sig[1], group_mask->sig[2], group_mask->sig[3], group_mask->sig[4]); bxe_attn_int_deasserted4(sc, attn.sig[4] & group_mask->sig[4]); bxe_attn_int_deasserted3(sc, attn.sig[3] & group_mask->sig[3]); bxe_attn_int_deasserted1(sc, attn.sig[1] & group_mask->sig[1]); bxe_attn_int_deasserted2(sc, attn.sig[2] & group_mask->sig[2]); bxe_attn_int_deasserted0(sc, attn.sig[0] & group_mask->sig[0]); } } bxe_release_alr(sc); if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_addr = (HC_REG_COMMAND_REG + port*32 + COMMAND_REG_ATTN_BITS_CLR); } else { reg_addr = (BAR_IGU_INTMEM + IGU_CMD_ATTN_BIT_CLR_UPPER*8); } val = ~deasserted; BLOGD(sc, DBG_INTR, "about to mask 0x%08x at %s addr 0x%08x\n", val, (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU", reg_addr); REG_WR(sc, reg_addr, val); if (~sc->attn_state & deasserted) { BLOGE(sc, "IGU error\n"); } reg_addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); aeu_mask = REG_RD(sc, reg_addr); BLOGD(sc, DBG_INTR, "aeu_mask 0x%08x newly deasserted 0x%08x\n", aeu_mask, deasserted); aeu_mask |= (deasserted & 0x3ff); BLOGD(sc, DBG_INTR, "new mask 0x%08x\n", aeu_mask); REG_WR(sc, reg_addr, aeu_mask); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); BLOGD(sc, DBG_INTR, "attn_state 0x%08x\n", sc->attn_state); sc->attn_state &= ~deasserted; BLOGD(sc, DBG_INTR, "new state 0x%08x\n", sc->attn_state); } static void bxe_attn_int(struct bxe_softc *sc) { /* read local copy of bits */ uint32_t attn_bits = le32toh(sc->def_sb->atten_status_block.attn_bits); uint32_t attn_ack = le32toh(sc->def_sb->atten_status_block.attn_bits_ack); uint32_t attn_state = sc->attn_state; /* look for changed bits */ uint32_t asserted = attn_bits & ~attn_ack & ~attn_state; uint32_t deasserted = ~attn_bits & attn_ack & attn_state; BLOGD(sc, DBG_INTR, "attn_bits 0x%08x attn_ack 0x%08x asserted 0x%08x deasserted 0x%08x\n", attn_bits, attn_ack, asserted, deasserted); if (~(attn_bits ^ attn_ack) & (attn_bits ^ attn_state)) { BLOGE(sc, "BAD attention state\n"); } /* handle bits that were raised */ if (asserted) { bxe_attn_int_asserted(sc, asserted); } if (deasserted) { bxe_attn_int_deasserted(sc, deasserted); } } static uint16_t bxe_update_dsb_idx(struct bxe_softc *sc) { struct host_sp_status_block *def_sb = sc->def_sb; uint16_t rc = 0; mb(); /* status block is written to by the chip */ if (sc->def_att_idx != def_sb->atten_status_block.attn_bits_index) { sc->def_att_idx = def_sb->atten_status_block.attn_bits_index; rc |= BXE_DEF_SB_ATT_IDX; } if (sc->def_idx != def_sb->sp_sb.running_index) { sc->def_idx = def_sb->sp_sb.running_index; rc |= BXE_DEF_SB_IDX; } mb(); return (rc); } static inline struct ecore_queue_sp_obj * bxe_cid_to_q_obj(struct bxe_softc *sc, uint32_t cid) { BLOGD(sc, DBG_SP, "retrieving fp from cid %d\n", cid); return (&sc->sp_objs[CID_TO_FP(cid, sc)].q_obj); } static void bxe_handle_mcast_eqe(struct bxe_softc *sc) { struct ecore_mcast_ramrod_params rparam; int rc; memset(&rparam, 0, sizeof(rparam)); rparam.mcast_obj = &sc->mcast_obj; BXE_MCAST_LOCK(sc); /* clear pending state for the last command */ sc->mcast_obj.raw.clear_pending(&sc->mcast_obj.raw); /* if there are pending mcast commands - send them */ if (sc->mcast_obj.check_pending(&sc->mcast_obj)) { rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); if (rc < 0) { BLOGD(sc, DBG_SP, "ERROR: Failed to send pending mcast commands (%d)\n", rc); } } BXE_MCAST_UNLOCK(sc); } static void bxe_handle_classification_eqe(struct bxe_softc *sc, union event_ring_elem *elem) { unsigned long ramrod_flags = 0; int rc = 0; uint32_t cid = elem->message.data.eth_event.echo & BXE_SWCID_MASK; struct ecore_vlan_mac_obj *vlan_mac_obj; /* always push next commands out, don't wait here */ bit_set(&ramrod_flags, RAMROD_CONT); switch (le32toh(elem->message.data.eth_event.echo) >> BXE_SWCID_SHIFT) { case ECORE_FILTER_MAC_PENDING: BLOGD(sc, DBG_SP, "Got SETUP_MAC completions\n"); vlan_mac_obj = &sc->sp_objs[cid].mac_obj; break; case ECORE_FILTER_MCAST_PENDING: BLOGD(sc, DBG_SP, "Got SETUP_MCAST completions\n"); /* * This is only relevant for 57710 where multicast MACs are * configured as unicast MACs using the same ramrod. */ bxe_handle_mcast_eqe(sc); return; default: BLOGE(sc, "Unsupported classification command: %d\n", elem->message.data.eth_event.echo); return; } rc = vlan_mac_obj->complete(sc, vlan_mac_obj, elem, &ramrod_flags); if (rc < 0) { BLOGE(sc, "Failed to schedule new commands (%d)\n", rc); } else if (rc > 0) { BLOGD(sc, DBG_SP, "Scheduled next pending commands...\n"); } } static void bxe_handle_rx_mode_eqe(struct bxe_softc *sc, union event_ring_elem *elem) { bxe_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state); /* send rx_mode command again if was requested */ if (bxe_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) { bxe_set_storm_rx_mode(sc); } #if 0 else if (bxe_test_and_clear_bit(ECORE_FILTER_ISCSI_ETH_START_SCHED, &sc->sp_state)) { bxe_set_iscsi_eth_rx_mode(sc, TRUE); } else if (bxe_test_and_clear_bit(ECORE_FILTER_ISCSI_ETH_STOP_SCHED, &sc->sp_state)) { bxe_set_iscsi_eth_rx_mode(sc, FALSE); } #endif } static void bxe_update_eq_prod(struct bxe_softc *sc, uint16_t prod) { storm_memset_eq_prod(sc, prod, SC_FUNC(sc)); wmb(); /* keep prod updates ordered */ } static void bxe_eq_int(struct bxe_softc *sc) { uint16_t hw_cons, sw_cons, sw_prod; union event_ring_elem *elem; uint8_t echo; uint32_t cid; uint8_t opcode; int spqe_cnt = 0; struct ecore_queue_sp_obj *q_obj; struct ecore_func_sp_obj *f_obj = &sc->func_obj; struct ecore_raw_obj *rss_raw = &sc->rss_conf_obj.raw; hw_cons = le16toh(*sc->eq_cons_sb); /* * The hw_cons range is 1-255, 257 - the sw_cons range is 0-254, 256. * when we get to the next-page we need to adjust so the loop * condition below will be met. The next element is the size of a * regular element and hence incrementing by 1 */ if ((hw_cons & EQ_DESC_MAX_PAGE) == EQ_DESC_MAX_PAGE) { hw_cons++; } /* * This function may never run in parallel with itself for a * specific sc and no need for a read memory barrier here. */ sw_cons = sc->eq_cons; sw_prod = sc->eq_prod; BLOGD(sc, DBG_SP,"EQ: hw_cons=%u sw_cons=%u eq_spq_left=0x%lx\n", hw_cons, sw_cons, atomic_load_acq_long(&sc->eq_spq_left)); for (; sw_cons != hw_cons; sw_prod = NEXT_EQ_IDX(sw_prod), sw_cons = NEXT_EQ_IDX(sw_cons)) { elem = &sc->eq[EQ_DESC(sw_cons)]; #if 0 int rc; rc = bxe_iov_eq_sp_event(sc, elem); if (!rc) { BLOGE(sc, "bxe_iov_eq_sp_event returned %d\n", rc); goto next_spqe; } #endif /* elem CID originates from FW, actually LE */ cid = SW_CID(elem->message.data.cfc_del_event.cid); opcode = elem->message.opcode; /* handle eq element */ switch (opcode) { #if 0 case EVENT_RING_OPCODE_VF_PF_CHANNEL: BLOGD(sc, DBG_SP, "vf/pf channel element on eq\n"); bxe_vf_mbx(sc, &elem->message.data.vf_pf_event); continue; #endif case EVENT_RING_OPCODE_STAT_QUERY: BLOGD(sc, DBG_SP, "got statistics completion event %d\n", sc->stats_comp++); /* nothing to do with stats comp */ goto next_spqe; case EVENT_RING_OPCODE_CFC_DEL: /* handle according to cid range */ /* we may want to verify here that the sc state is HALTING */ BLOGD(sc, DBG_SP, "got delete ramrod for MULTI[%d]\n", cid); q_obj = bxe_cid_to_q_obj(sc, cid); if (q_obj->complete_cmd(sc, q_obj, ECORE_Q_CMD_CFC_DEL)) { break; } goto next_spqe; case EVENT_RING_OPCODE_STOP_TRAFFIC: BLOGD(sc, DBG_SP, "got STOP TRAFFIC\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_TX_STOP)) { break; } // XXX bxe_dcbx_set_params(sc, BXE_DCBX_STATE_TX_PAUSED); goto next_spqe; case EVENT_RING_OPCODE_START_TRAFFIC: BLOGD(sc, DBG_SP, "got START TRAFFIC\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_TX_START)) { break; } // XXX bxe_dcbx_set_params(sc, BXE_DCBX_STATE_TX_RELEASED); goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_UPDATE: echo = elem->message.data.function_update_event.echo; if (echo == SWITCH_UPDATE) { BLOGD(sc, DBG_SP, "got FUNC_SWITCH_UPDATE ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_SWITCH_UPDATE)) { break; } } else { BLOGD(sc, DBG_SP, "AFEX: ramrod completed FUNCTION_UPDATE\n"); #if 0 f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_AFEX_UPDATE); /* * We will perform the queues update from the sp_core_task as * all queue SP operations should run with CORE_LOCK. */ bxe_set_bit(BXE_SP_CORE_AFEX_F_UPDATE, &sc->sp_core_state); taskqueue_enqueue(sc->sp_tq, &sc->sp_tq_task); #endif } goto next_spqe; #if 0 case EVENT_RING_OPCODE_AFEX_VIF_LISTS: f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_AFEX_VIFLISTS); bxe_after_afex_vif_lists(sc, elem); goto next_spqe; #endif case EVENT_RING_OPCODE_FORWARD_SETUP: q_obj = &bxe_fwd_sp_obj(sc, q_obj); if (q_obj->complete_cmd(sc, q_obj, ECORE_Q_CMD_SETUP_TX_ONLY)) { break; } goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_START: BLOGD(sc, DBG_SP, "got FUNC_START ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_START)) { break; } goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_STOP: BLOGD(sc, DBG_SP, "got FUNC_STOP ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_STOP)) { break; } goto next_spqe; } switch (opcode | sc->state) { case (EVENT_RING_OPCODE_RSS_UPDATE_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_RSS_UPDATE_RULES | BXE_STATE_OPENING_WAITING_PORT): cid = elem->message.data.eth_event.echo & BXE_SWCID_MASK; BLOGD(sc, DBG_SP, "got RSS_UPDATE ramrod. CID %d\n", cid); rss_raw->clear_pending(rss_raw); break; case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_CLOSING_WAITING_HALT): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got (un)set mac ramrod\n"); bxe_handle_classification_eqe(sc, elem); break; case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got mcast ramrod\n"); bxe_handle_mcast_eqe(sc); break; case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got rx_mode ramrod\n"); bxe_handle_rx_mode_eqe(sc, elem); break; default: /* unknown event log error and continue */ BLOGE(sc, "Unknown EQ event %d, sc->state 0x%x\n", elem->message.opcode, sc->state); } next_spqe: spqe_cnt++; } /* for */ mb(); atomic_add_acq_long(&sc->eq_spq_left, spqe_cnt); sc->eq_cons = sw_cons; sc->eq_prod = sw_prod; /* make sure that above mem writes were issued towards the memory */ wmb(); /* update producer */ bxe_update_eq_prod(sc, sc->eq_prod); } static void bxe_handle_sp_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; uint16_t status; BLOGD(sc, DBG_SP, "---> SP TASK <---\n"); /* what work needs to be performed? */ status = bxe_update_dsb_idx(sc); BLOGD(sc, DBG_SP, "dsb status 0x%04x\n", status); /* HW attentions */ if (status & BXE_DEF_SB_ATT_IDX) { BLOGD(sc, DBG_SP, "---> ATTN INTR <---\n"); bxe_attn_int(sc); status &= ~BXE_DEF_SB_ATT_IDX; } /* SP events: STAT_QUERY and others */ if (status & BXE_DEF_SB_IDX) { /* handle EQ completions */ BLOGD(sc, DBG_SP, "---> EQ INTR <---\n"); bxe_eq_int(sc); bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, le16toh(sc->def_idx), IGU_INT_NOP, 1); status &= ~BXE_DEF_SB_IDX; } /* if status is non zero then something went wrong */ if (__predict_false(status)) { BLOGE(sc, "Got an unknown SP interrupt! (0x%04x)\n", status); } /* ack status block only if something was actually handled */ bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, le16toh(sc->def_att_idx), IGU_INT_ENABLE, 1); /* * Must be called after the EQ processing (since eq leads to sriov * ramrod completion flows). * This flow may have been scheduled by the arrival of a ramrod * completion, or by the sriov code rescheduling itself. */ // XXX bxe_iov_sp_task(sc); #if 0 /* AFEX - poll to check if VIFSET_ACK should be sent to MFW */ if (bxe_test_and_clear_bit(ECORE_AFEX_PENDING_VIFSET_MCP_ACK, &sc->sp_state)) { bxe_link_report(sc); bxe_fw_command(sc, DRV_MSG_CODE_AFEX_VIFSET_ACK, 0); } #endif } static void bxe_handle_fp_tq(void *context, int pending) { struct bxe_fastpath *fp = (struct bxe_fastpath *)context; struct bxe_softc *sc = fp->sc; uint8_t more_tx = FALSE; uint8_t more_rx = FALSE; BLOGD(sc, DBG_INTR, "---> FP TASK QUEUE (%d) <---\n", fp->index); /* XXX * IFF_DRV_RUNNING state can't be checked here since we process * slowpath events on a client queue during setup. Instead * we need to add a "process/continue" flag here that the driver * can use to tell the task here not to do anything. */ #if 0 if (!(sc->ifnet->if_drv_flags & IFF_DRV_RUNNING)) { return; } #endif /* update the fastpath index */ bxe_update_fp_sb_idx(fp); /* XXX add loop here if ever support multiple tx CoS */ /* fp->txdata[cos] */ if (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); more_tx = bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); } if (bxe_has_rx_work(fp)) { more_rx = bxe_rxeof(sc, fp); } if (more_rx /*|| more_tx*/) { /* still more work to do */ taskqueue_enqueue_fast(fp->tq, &fp->tq_task); return; } bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, le16toh(fp->fp_hc_idx), IGU_INT_ENABLE, 1); } static void bxe_task_fp(struct bxe_fastpath *fp) { struct bxe_softc *sc = fp->sc; uint8_t more_tx = FALSE; uint8_t more_rx = FALSE; BLOGD(sc, DBG_INTR, "---> FP TASK ISR (%d) <---\n", fp->index); /* update the fastpath index */ bxe_update_fp_sb_idx(fp); /* XXX add loop here if ever support multiple tx CoS */ /* fp->txdata[cos] */ if (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); more_tx = bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); } if (bxe_has_rx_work(fp)) { more_rx = bxe_rxeof(sc, fp); } if (more_rx /*|| more_tx*/) { /* still more work to do, bail out if this ISR and process later */ taskqueue_enqueue_fast(fp->tq, &fp->tq_task); return; } /* * Here we write the fastpath index taken before doing any tx or rx work. * It is very well possible other hw events occurred up to this point and * they were actually processed accordingly above. Since we're going to * write an older fastpath index, an interrupt is coming which we might * not do any work in. */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, le16toh(fp->fp_hc_idx), IGU_INT_ENABLE, 1); } /* * Legacy interrupt entry point. * * Verifies that the controller generated the interrupt and * then calls a separate routine to handle the various * interrupt causes: link, RX, and TX. */ static void bxe_intr_legacy(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; struct bxe_fastpath *fp; uint16_t status, mask; int i; BLOGD(sc, DBG_INTR, "---> BXE INTx <---\n"); #if 0 /* Don't handle any interrupts if we're not ready. */ if (__predict_false(sc->intr_sem != 0)) { return; } #endif /* * 0 for ustorm, 1 for cstorm * the bits returned from ack_int() are 0-15 * bit 0 = attention status block * bit 1 = fast path status block * a mask of 0x2 or more = tx/rx event * a mask of 1 = slow path event */ status = bxe_ack_int(sc); /* the interrupt is not for us */ if (__predict_false(status == 0)) { BLOGD(sc, DBG_INTR, "Not our interrupt!\n"); return; } BLOGD(sc, DBG_INTR, "Interrupt status 0x%04x\n", status); FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; mask = (0x2 << (fp->index + CNIC_SUPPORT(sc))); if (status & mask) { /* acknowledge and disable further fastpath interrupts */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); bxe_task_fp(fp); status &= ~mask; } } #if 0 if (CNIC_SUPPORT(sc)) { mask = 0x2; if (status & (mask | 0x1)) { ... status &= ~mask; } } #endif if (__predict_false(status & 0x1)) { /* acknowledge and disable further slowpath interrupts */ bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); /* schedule slowpath handler */ taskqueue_enqueue_fast(sc->sp_tq, &sc->sp_tq_task); status &= ~0x1; } if (__predict_false(status)) { BLOGW(sc, "Unexpected fastpath status (0x%08x)!\n", status); } } /* slowpath interrupt entry point */ static void bxe_intr_sp(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; BLOGD(sc, (DBG_INTR | DBG_SP), "---> SP INTR <---\n"); /* acknowledge and disable further slowpath interrupts */ bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); /* schedule slowpath handler */ taskqueue_enqueue_fast(sc->sp_tq, &sc->sp_tq_task); } /* fastpath interrupt entry point */ static void bxe_intr_fp(void *xfp) { struct bxe_fastpath *fp = (struct bxe_fastpath *)xfp; struct bxe_softc *sc = fp->sc; BLOGD(sc, DBG_INTR, "---> FP INTR %d <---\n", fp->index); BLOGD(sc, DBG_INTR, "(cpu=%d) MSI-X fp=%d fw_sb=%d igu_sb=%d\n", curcpu, fp->index, fp->fw_sb_id, fp->igu_sb_id); #if 0 /* Don't handle any interrupts if we're not ready. */ if (__predict_false(sc->intr_sem != 0)) { return; } #endif /* acknowledge and disable further fastpath interrupts */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); bxe_task_fp(fp); } /* Release all interrupts allocated by the driver. */ static void bxe_interrupt_free(struct bxe_softc *sc) { int i; switch (sc->interrupt_mode) { case INTR_MODE_INTX: BLOGD(sc, DBG_LOAD, "Releasing legacy INTx vector\n"); if (sc->intr[0].resource != NULL) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[0].rid, sc->intr[0].resource); } break; case INTR_MODE_MSI: for (i = 0; i < sc->intr_count; i++) { BLOGD(sc, DBG_LOAD, "Releasing MSI vector %d\n", i); if (sc->intr[i].resource && sc->intr[i].rid) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[i].rid, sc->intr[i].resource); } } pci_release_msi(sc->dev); break; case INTR_MODE_MSIX: for (i = 0; i < sc->intr_count; i++) { BLOGD(sc, DBG_LOAD, "Releasing MSI-X vector %d\n", i); if (sc->intr[i].resource && sc->intr[i].rid) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[i].rid, sc->intr[i].resource); } } pci_release_msi(sc->dev); break; default: /* nothing to do as initial allocation failed */ break; } } /* * This function determines and allocates the appropriate * interrupt based on system capabilites and user request. * * The user may force a particular interrupt mode, specify * the number of receive queues, specify the method for * distribuitng received frames to receive queues, or use * the default settings which will automatically select the * best supported combination. In addition, the OS may or * may not support certain combinations of these settings. * This routine attempts to reconcile the settings requested * by the user with the capabilites available from the system * to select the optimal combination of features. * * Returns: * 0 = Success, !0 = Failure. */ static int bxe_interrupt_alloc(struct bxe_softc *sc) { int msix_count = 0; int msi_count = 0; int num_requested = 0; int num_allocated = 0; int rid, i, j; int rc; /* get the number of available MSI/MSI-X interrupts from the OS */ if (sc->interrupt_mode > 0) { if (sc->devinfo.pcie_cap_flags & BXE_MSIX_CAPABLE_FLAG) { msix_count = pci_msix_count(sc->dev); } if (sc->devinfo.pcie_cap_flags & BXE_MSI_CAPABLE_FLAG) { msi_count = pci_msi_count(sc->dev); } BLOGD(sc, DBG_LOAD, "%d MSI and %d MSI-X vectors available\n", msi_count, msix_count); } do { /* try allocating MSI-X interrupt resources (at least 2) */ if (sc->interrupt_mode != INTR_MODE_MSIX) { break; } if (((sc->devinfo.pcie_cap_flags & BXE_MSIX_CAPABLE_FLAG) == 0) || (msix_count < 2)) { sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ break; } /* ask for the necessary number of MSI-X vectors */ num_requested = min((sc->num_queues + 1), msix_count); BLOGD(sc, DBG_LOAD, "Requesting %d MSI-X vectors\n", num_requested); num_allocated = num_requested; if ((rc = pci_alloc_msix(sc->dev, &num_allocated)) != 0) { BLOGE(sc, "MSI-X alloc failed! (%d)\n", rc); sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ break; } if (num_allocated < 2) { /* possible? */ BLOGE(sc, "MSI-X allocation less than 2!\n"); sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ pci_release_msi(sc->dev); break; } BLOGI(sc, "MSI-X vectors Requested %d and Allocated %d\n", num_requested, num_allocated); /* best effort so use the number of vectors allocated to us */ sc->intr_count = num_allocated; sc->num_queues = num_allocated - 1; rid = 1; /* initial resource identifier */ /* allocate the MSI-X vectors */ for (i = 0; i < num_allocated; i++) { sc->intr[i].rid = (rid + i); if ((sc->intr[i].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[i].rid, RF_ACTIVE)) == NULL) { BLOGE(sc, "Failed to map MSI-X[%d] (rid=%d)!\n", i, (rid + i)); for (j = (i - 1); j >= 0; j--) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[j].rid, sc->intr[j].resource); } sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ pci_release_msi(sc->dev); break; } BLOGD(sc, DBG_LOAD, "Mapped MSI-X[%d] (rid=%d)\n", i, (rid + i)); } } while (0); do { /* try allocating MSI vector resources (at least 2) */ if (sc->interrupt_mode != INTR_MODE_MSI) { break; } if (((sc->devinfo.pcie_cap_flags & BXE_MSI_CAPABLE_FLAG) == 0) || (msi_count < 1)) { sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ break; } /* ask for a single MSI vector */ num_requested = 1; BLOGD(sc, DBG_LOAD, "Requesting %d MSI vectors\n", num_requested); num_allocated = num_requested; if ((rc = pci_alloc_msi(sc->dev, &num_allocated)) != 0) { BLOGE(sc, "MSI alloc failed (%d)!\n", rc); sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ break; } if (num_allocated != 1) { /* possible? */ BLOGE(sc, "MSI allocation is not 1!\n"); sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ pci_release_msi(sc->dev); break; } BLOGI(sc, "MSI vectors Requested %d and Allocated %d\n", num_requested, num_allocated); /* best effort so use the number of vectors allocated to us */ sc->intr_count = num_allocated; sc->num_queues = num_allocated; rid = 1; /* initial resource identifier */ sc->intr[0].rid = rid; if ((sc->intr[0].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[0].rid, RF_ACTIVE)) == NULL) { BLOGE(sc, "Failed to map MSI[0] (rid=%d)!\n", rid); sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ pci_release_msi(sc->dev); break; } BLOGD(sc, DBG_LOAD, "Mapped MSI[0] (rid=%d)\n", rid); } while (0); do { /* try allocating INTx vector resources */ if (sc->interrupt_mode != INTR_MODE_INTX) { break; } BLOGD(sc, DBG_LOAD, "Requesting legacy INTx interrupt\n"); /* only one vector for INTx */ sc->intr_count = 1; sc->num_queues = 1; rid = 0; /* initial resource identifier */ sc->intr[0].rid = rid; if ((sc->intr[0].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[0].rid, (RF_ACTIVE | RF_SHAREABLE))) == NULL) { BLOGE(sc, "Failed to map INTx (rid=%d)!\n", rid); sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = -1; /* Failed! */ break; } BLOGD(sc, DBG_LOAD, "Mapped INTx (rid=%d)\n", rid); } while (0); if (sc->interrupt_mode == -1) { BLOGE(sc, "Interrupt Allocation: FAILED!!!\n"); rc = 1; } else { BLOGD(sc, DBG_LOAD, "Interrupt Allocation: interrupt_mode=%d, num_queues=%d\n", sc->interrupt_mode, sc->num_queues); rc = 0; } return (rc); } static void bxe_interrupt_detach(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; /* release interrupt resources */ for (i = 0; i < sc->intr_count; i++) { if (sc->intr[i].resource && sc->intr[i].tag) { BLOGD(sc, DBG_LOAD, "Disabling interrupt vector %d\n", i); bus_teardown_intr(sc->dev, sc->intr[i].resource, sc->intr[i].tag); } } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->tq) { taskqueue_drain(fp->tq, &fp->tq_task); taskqueue_free(fp->tq); fp->tq = NULL; } } if (sc->rx_mode_tq) { taskqueue_drain(sc->rx_mode_tq, &sc->rx_mode_tq_task); taskqueue_free(sc->rx_mode_tq); sc->rx_mode_tq = NULL; } if (sc->sp_tq) { taskqueue_drain(sc->sp_tq, &sc->sp_tq_task); taskqueue_free(sc->sp_tq); sc->sp_tq = NULL; } } /* * Enables interrupts and attach to the ISR. * * When using multiple MSI/MSI-X vectors the first vector * is used for slowpath operations while all remaining * vectors are used for fastpath operations. If only a * single MSI/MSI-X vector is used (SINGLE_ISR) then the * ISR must look for both slowpath and fastpath completions. */ static int bxe_interrupt_attach(struct bxe_softc *sc) { struct bxe_fastpath *fp; int rc = 0; int i; snprintf(sc->sp_tq_name, sizeof(sc->sp_tq_name), "bxe%d_sp_tq", sc->unit); TASK_INIT(&sc->sp_tq_task, 0, bxe_handle_sp_tq, sc); sc->sp_tq = taskqueue_create_fast(sc->sp_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->sp_tq); taskqueue_start_threads(&sc->sp_tq, 1, PWAIT, /* lower priority */ "%s", sc->sp_tq_name); snprintf(sc->rx_mode_tq_name, sizeof(sc->rx_mode_tq_name), "bxe%d_rx_mode_tq", sc->unit); TASK_INIT(&sc->rx_mode_tq_task, 0, bxe_handle_rx_mode_tq, sc); sc->rx_mode_tq = taskqueue_create_fast(sc->rx_mode_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->rx_mode_tq); taskqueue_start_threads(&sc->rx_mode_tq, 1, PWAIT, /* lower priority */ "%s", sc->rx_mode_tq_name); for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; snprintf(fp->tq_name, sizeof(fp->tq_name), "bxe%d_fp%d_tq", sc->unit, i); TASK_INIT(&fp->tq_task, 0, bxe_handle_fp_tq, fp); fp->tq = taskqueue_create_fast(fp->tq_name, M_NOWAIT, taskqueue_thread_enqueue, &fp->tq); taskqueue_start_threads(&fp->tq, 1, PI_NET, /* higher priority */ "%s", fp->tq_name); } /* setup interrupt handlers */ if (sc->interrupt_mode == INTR_MODE_MSIX) { BLOGD(sc, DBG_LOAD, "Enabling slowpath MSI-X[0] vector\n"); /* * Setup the interrupt handler. Note that we pass the driver instance * to the interrupt handler for the slowpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_sp, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI-X[0] vector (%d)\n", rc); goto bxe_interrupt_attach_exit; } bus_describe_intr(sc->dev, sc->intr[0].resource, sc->intr[0].tag, "sp"); /* bus_bind_intr(sc->dev, sc->intr[0].resource, 0); */ /* initialize the fastpath vectors (note the first was used for sp) */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; BLOGD(sc, DBG_LOAD, "Enabling MSI-X[%d] vector\n", (i + 1)); /* * Setup the interrupt handler. Note that we pass the * fastpath context to the interrupt handler in this * case. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[i + 1].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_fp, fp, &sc->intr[i + 1].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI-X[%d] vector (%d)\n", (i + 1), rc); goto bxe_interrupt_attach_exit; } bus_describe_intr(sc->dev, sc->intr[i + 1].resource, sc->intr[i + 1].tag, "fp%02d", i); /* bind the fastpath instance to a cpu */ if (sc->num_queues > 1) { bus_bind_intr(sc->dev, sc->intr[i + 1].resource, i); } fp->state = BXE_FP_STATE_IRQ; } } else if (sc->interrupt_mode == INTR_MODE_MSI) { BLOGD(sc, DBG_LOAD, "Enabling MSI[0] vector\n"); /* * Setup the interrupt handler. Note that we pass the * driver instance to the interrupt handler which * will handle both the slowpath and fastpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_legacy, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI[0] vector (%d)\n", rc); goto bxe_interrupt_attach_exit; } } else { /* (sc->interrupt_mode == INTR_MODE_INTX) */ BLOGD(sc, DBG_LOAD, "Enabling INTx interrupts\n"); /* * Setup the interrupt handler. Note that we pass the * driver instance to the interrupt handler which * will handle both the slowpath and fastpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_legacy, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate INTx interrupt (%d)\n", rc); goto bxe_interrupt_attach_exit; } } bxe_interrupt_attach_exit: return (rc); } static int bxe_init_hw_common_chip(struct bxe_softc *sc); static int bxe_init_hw_common(struct bxe_softc *sc); static int bxe_init_hw_port(struct bxe_softc *sc); static int bxe_init_hw_func(struct bxe_softc *sc); static void bxe_reset_common(struct bxe_softc *sc); static void bxe_reset_port(struct bxe_softc *sc); static void bxe_reset_func(struct bxe_softc *sc); static int bxe_gunzip_init(struct bxe_softc *sc); static void bxe_gunzip_end(struct bxe_softc *sc); static int bxe_init_firmware(struct bxe_softc *sc); static void bxe_release_firmware(struct bxe_softc *sc); static struct ecore_func_sp_drv_ops bxe_func_sp_drv = { .init_hw_cmn_chip = bxe_init_hw_common_chip, .init_hw_cmn = bxe_init_hw_common, .init_hw_port = bxe_init_hw_port, .init_hw_func = bxe_init_hw_func, .reset_hw_cmn = bxe_reset_common, .reset_hw_port = bxe_reset_port, .reset_hw_func = bxe_reset_func, .gunzip_init = bxe_gunzip_init, .gunzip_end = bxe_gunzip_end, .init_fw = bxe_init_firmware, .release_fw = bxe_release_firmware, }; static void bxe_init_func_obj(struct bxe_softc *sc) { sc->dmae_ready = 0; ecore_init_func_obj(sc, &sc->func_obj, BXE_SP(sc, func_rdata), BXE_SP_MAPPING(sc, func_rdata), BXE_SP(sc, func_afex_rdata), BXE_SP_MAPPING(sc, func_afex_rdata), &bxe_func_sp_drv); } static int bxe_init_hw(struct bxe_softc *sc, uint32_t load_code) { struct ecore_func_state_params func_params = { NULL }; int rc; /* prepare the parameters for function state transitions */ bit_set(&func_params.ramrod_flags, RAMROD_COMP_WAIT); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_HW_INIT; func_params.params.hw_init.load_phase = load_code; /* * Via a plethora of function pointers, we will eventually reach * bxe_init_hw_common(), bxe_init_hw_port(), or bxe_init_hw_func(). */ rc = ecore_func_state_change(sc, &func_params); return (rc); } static void bxe_fill(struct bxe_softc *sc, uint32_t addr, int fill, uint32_t len) { uint32_t i; if (!(len % 4) && !(addr % 4)) { for (i = 0; i < len; i += 4) { REG_WR(sc, (addr + i), fill); } } else { for (i = 0; i < len; i++) { REG_WR8(sc, (addr + i), fill); } } } /* writes FP SP data to FW - data_size in dwords */ static void bxe_wr_fp_sb_data(struct bxe_softc *sc, int fw_sb_id, uint32_t *sb_data_p, uint32_t data_size) { int index; for (index = 0; index < data_size; index++) { REG_WR(sc, (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_OFFSET(fw_sb_id) + (sizeof(uint32_t) * index)), *(sb_data_p + index)); } } static void bxe_zero_fp_sb(struct bxe_softc *sc, int fw_sb_id) { struct hc_status_block_data_e2 sb_data_e2; struct hc_status_block_data_e1x sb_data_e1x; uint32_t *sb_data_p; uint32_t data_size = 0; if (!CHIP_IS_E1x(sc)) { memset(&sb_data_e2, 0, sizeof(struct hc_status_block_data_e2)); sb_data_e2.common.state = SB_DISABLED; sb_data_e2.common.p_func.vf_valid = FALSE; sb_data_p = (uint32_t *)&sb_data_e2; data_size = (sizeof(struct hc_status_block_data_e2) / sizeof(uint32_t)); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); sb_data_e1x.common.state = SB_DISABLED; sb_data_e1x.common.p_func.vf_valid = FALSE; sb_data_p = (uint32_t *)&sb_data_e1x; data_size = (sizeof(struct hc_status_block_data_e1x) / sizeof(uint32_t)); } bxe_wr_fp_sb_data(sc, fw_sb_id, sb_data_p, data_size); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_OFFSET(fw_sb_id)), 0, CSTORM_STATUS_BLOCK_SIZE); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SYNC_BLOCK_OFFSET(fw_sb_id)), 0, CSTORM_SYNC_BLOCK_SIZE); } static void bxe_wr_sp_sb_data(struct bxe_softc *sc, struct hc_sp_status_block_data *sp_sb_data) { int i; for (i = 0; i < (sizeof(struct hc_sp_status_block_data) / sizeof(uint32_t)); i++) { REG_WR(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_DATA_OFFSET(SC_FUNC(sc)) + (i * sizeof(uint32_t))), *((uint32_t *)sp_sb_data + i)); } } static void bxe_zero_sp_sb(struct bxe_softc *sc) { struct hc_sp_status_block_data sp_sb_data; memset(&sp_sb_data, 0, sizeof(struct hc_sp_status_block_data)); sp_sb_data.state = SB_DISABLED; sp_sb_data.p_func.vf_valid = FALSE; bxe_wr_sp_sb_data(sc, &sp_sb_data); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_OFFSET(SC_FUNC(sc))), 0, CSTORM_SP_STATUS_BLOCK_SIZE); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_SYNC_BLOCK_OFFSET(SC_FUNC(sc))), 0, CSTORM_SP_SYNC_BLOCK_SIZE); } static void bxe_setup_ndsb_state_machine(struct hc_status_block_sm *hc_sm, int igu_sb_id, int igu_seg_id) { hc_sm->igu_sb_id = igu_sb_id; hc_sm->igu_seg_id = igu_seg_id; hc_sm->timer_value = 0xFF; hc_sm->time_to_expire = 0xFFFFFFFF; } static void bxe_map_sb_state_machines(struct hc_index_data *index_data) { /* zero out state machine indices */ /* rx indices */ index_data[HC_INDEX_ETH_RX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; /* tx indices */ index_data[HC_INDEX_OOO_TX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags &= ~HC_INDEX_DATA_SM_ID; /* map indices */ /* rx indices */ index_data[HC_INDEX_ETH_RX_CQ_CONS].flags |= (SM_RX_ID << HC_INDEX_DATA_SM_ID_SHIFT); /* tx indices */ index_data[HC_INDEX_OOO_TX_CQ_CONS].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); } static void bxe_init_sb(struct bxe_softc *sc, bus_addr_t busaddr, int vfid, uint8_t vf_valid, int fw_sb_id, int igu_sb_id) { struct hc_status_block_data_e2 sb_data_e2; struct hc_status_block_data_e1x sb_data_e1x; struct hc_status_block_sm *hc_sm_p; uint32_t *sb_data_p; int igu_seg_id; int data_size; if (CHIP_INT_MODE_IS_BC(sc)) { igu_seg_id = HC_SEG_ACCESS_NORM; } else { igu_seg_id = IGU_SEG_ACCESS_NORM; } bxe_zero_fp_sb(sc, fw_sb_id); if (!CHIP_IS_E1x(sc)) { memset(&sb_data_e2, 0, sizeof(struct hc_status_block_data_e2)); sb_data_e2.common.state = SB_ENABLED; sb_data_e2.common.p_func.pf_id = SC_FUNC(sc); sb_data_e2.common.p_func.vf_id = vfid; sb_data_e2.common.p_func.vf_valid = vf_valid; sb_data_e2.common.p_func.vnic_id = SC_VN(sc); sb_data_e2.common.same_igu_sb_1b = TRUE; sb_data_e2.common.host_sb_addr.hi = U64_HI(busaddr); sb_data_e2.common.host_sb_addr.lo = U64_LO(busaddr); hc_sm_p = sb_data_e2.common.state_machine; sb_data_p = (uint32_t *)&sb_data_e2; data_size = (sizeof(struct hc_status_block_data_e2) / sizeof(uint32_t)); bxe_map_sb_state_machines(sb_data_e2.index_data); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); sb_data_e1x.common.state = SB_ENABLED; sb_data_e1x.common.p_func.pf_id = SC_FUNC(sc); sb_data_e1x.common.p_func.vf_id = 0xff; sb_data_e1x.common.p_func.vf_valid = FALSE; sb_data_e1x.common.p_func.vnic_id = SC_VN(sc); sb_data_e1x.common.same_igu_sb_1b = TRUE; sb_data_e1x.common.host_sb_addr.hi = U64_HI(busaddr); sb_data_e1x.common.host_sb_addr.lo = U64_LO(busaddr); hc_sm_p = sb_data_e1x.common.state_machine; sb_data_p = (uint32_t *)&sb_data_e1x; data_size = (sizeof(struct hc_status_block_data_e1x) / sizeof(uint32_t)); bxe_map_sb_state_machines(sb_data_e1x.index_data); } bxe_setup_ndsb_state_machine(&hc_sm_p[SM_RX_ID], igu_sb_id, igu_seg_id); bxe_setup_ndsb_state_machine(&hc_sm_p[SM_TX_ID], igu_sb_id, igu_seg_id); BLOGD(sc, DBG_LOAD, "Init FW SB %d\n", fw_sb_id); /* write indices to HW - PCI guarantees endianity of regpairs */ bxe_wr_fp_sb_data(sc, fw_sb_id, sb_data_p, data_size); } static inline uint8_t bxe_fp_qzone_id(struct bxe_fastpath *fp) { if (CHIP_IS_E1x(fp->sc)) { return (fp->cl_id + SC_PORT(fp->sc) * ETH_MAX_RX_CLIENTS_E1H); } else { return (fp->cl_id); } } static inline uint32_t bxe_rx_ustorm_prods_offset(struct bxe_softc *sc, struct bxe_fastpath *fp) { uint32_t offset = BAR_USTRORM_INTMEM; #if 0 if (IS_VF(sc)) { return (PXP_VF_ADDR_USDM_QUEUES_START + (sc->acquire_resp.resc.hw_qid[fp->index] * sizeof(struct ustorm_queue_zone_data))); } else #endif if (!CHIP_IS_E1x(sc)) { offset += USTORM_RX_PRODS_E2_OFFSET(fp->cl_qzone_id); } else { offset += USTORM_RX_PRODS_E1X_OFFSET(SC_PORT(sc), fp->cl_id); } return (offset); } static void bxe_init_eth_fp(struct bxe_softc *sc, int idx) { struct bxe_fastpath *fp = &sc->fp[idx]; uint32_t cids[ECORE_MULTI_TX_COS] = { 0 }; unsigned long q_type = 0; int cos; fp->sc = sc; fp->index = idx; snprintf(fp->tx_mtx_name, sizeof(fp->tx_mtx_name), "bxe%d_fp%d_tx_lock", sc->unit, idx); mtx_init(&fp->tx_mtx, fp->tx_mtx_name, NULL, MTX_DEF); snprintf(fp->rx_mtx_name, sizeof(fp->rx_mtx_name), "bxe%d_fp%d_rx_lock", sc->unit, idx); mtx_init(&fp->rx_mtx, fp->rx_mtx_name, NULL, MTX_DEF); fp->igu_sb_id = (sc->igu_base_sb + idx + CNIC_SUPPORT(sc)); fp->fw_sb_id = (sc->base_fw_ndsb + idx + CNIC_SUPPORT(sc)); fp->cl_id = (CHIP_IS_E1x(sc)) ? (SC_L_ID(sc) + idx) : /* want client ID same as IGU SB ID for non-E1 */ fp->igu_sb_id; fp->cl_qzone_id = bxe_fp_qzone_id(fp); /* setup sb indices */ if (!CHIP_IS_E1x(sc)) { fp->sb_index_values = fp->status_block.e2_sb->sb.index_values; fp->sb_running_index = fp->status_block.e2_sb->sb.running_index; } else { fp->sb_index_values = fp->status_block.e1x_sb->sb.index_values; fp->sb_running_index = fp->status_block.e1x_sb->sb.running_index; } /* init shortcut */ fp->ustorm_rx_prods_offset = bxe_rx_ustorm_prods_offset(sc, fp); fp->rx_cq_cons_sb = &fp->sb_index_values[HC_INDEX_ETH_RX_CQ_CONS]; /* * XXX If multiple CoS is ever supported then each fastpath structure * will need to maintain tx producer/consumer/dma/etc values *per* CoS. */ for (cos = 0; cos < sc->max_cos; cos++) { cids[cos] = idx; } fp->tx_cons_sb = &fp->sb_index_values[HC_INDEX_ETH_TX_CQ_CONS_COS0]; /* nothing more for a VF to do */ if (IS_VF(sc)) { return; } bxe_init_sb(sc, fp->sb_dma.paddr, BXE_VF_ID_INVALID, FALSE, fp->fw_sb_id, fp->igu_sb_id); bxe_update_fp_sb_idx(fp); /* Configure Queue State object */ bit_set(&q_type, ECORE_Q_TYPE_HAS_RX); bit_set(&q_type, ECORE_Q_TYPE_HAS_TX); ecore_init_queue_obj(sc, &sc->sp_objs[idx].q_obj, fp->cl_id, cids, sc->max_cos, SC_FUNC(sc), BXE_SP(sc, q_rdata), BXE_SP_MAPPING(sc, q_rdata), q_type); /* configure classification DBs */ ecore_init_mac_obj(sc, &sc->sp_objs[idx].mac_obj, fp->cl_id, idx, SC_FUNC(sc), BXE_SP(sc, mac_rdata), BXE_SP_MAPPING(sc, mac_rdata), ECORE_FILTER_MAC_PENDING, &sc->sp_state, ECORE_OBJ_TYPE_RX_TX, &sc->macs_pool); BLOGD(sc, DBG_LOAD, "fp[%d]: sb=%p cl_id=%d fw_sb=%d igu_sb=%d\n", idx, fp->status_block.e2_sb, fp->cl_id, fp->fw_sb_id, fp->igu_sb_id); } static inline void bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t rx_bd_prod, uint16_t rx_cq_prod, uint16_t rx_sge_prod) { struct ustorm_eth_rx_producers rx_prods = { 0 }; uint32_t i; /* update producers */ rx_prods.bd_prod = rx_bd_prod; rx_prods.cqe_prod = rx_cq_prod; rx_prods.sge_prod = rx_sge_prod; /* * Make sure that the BD and SGE data is updated before updating the * producers since FW might read the BD/SGE right after the producer * is updated. * This is only applicable for weak-ordered memory model archs such * as IA-64. The following barrier is also mandatory since FW will * assumes BDs must have buffers. */ wmb(); for (i = 0; i < (sizeof(rx_prods) / 4); i++) { REG_WR(sc, (fp->ustorm_rx_prods_offset + (i * 4)), ((uint32_t *)&rx_prods)[i]); } wmb(); /* keep prod updates ordered */ BLOGD(sc, DBG_RX, "RX fp[%d]: wrote prods bd_prod=%u cqe_prod=%u sge_prod=%u\n", fp->index, rx_bd_prod, rx_cq_prod, rx_sge_prod); } static void bxe_init_rx_rings(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->rx_bd_cons = 0; /* * Activate the BD ring... * Warning, this will generate an interrupt (to the TSTORM) * so this can only be done after the chip is initialized */ bxe_update_rx_prod(sc, fp, fp->rx_bd_prod, fp->rx_cq_prod, fp->rx_sge_prod); if (i != 0) { continue; } if (CHIP_IS_E1(sc)) { REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(SC_FUNC(sc))), U64_LO(fp->rcq_dma.paddr)); REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(SC_FUNC(sc)) + 4), U64_HI(fp->rcq_dma.paddr)); } } } static void bxe_init_tx_ring_one(struct bxe_fastpath *fp) { SET_FLAG(fp->tx_db.data.header.header, DOORBELL_HDR_DB_TYPE, 1); fp->tx_db.data.zero_fill1 = 0; fp->tx_db.data.prod = 0; fp->tx_pkt_prod = 0; fp->tx_pkt_cons = 0; fp->tx_bd_prod = 0; fp->tx_bd_cons = 0; fp->eth_q_stats.tx_pkts = 0; } static inline void bxe_init_tx_rings(struct bxe_softc *sc) { int i; for (i = 0; i < sc->num_queues; i++) { #if 0 uint8_t cos; for (cos = 0; cos < sc->max_cos; cos++) { bxe_init_tx_ring_one(&sc->fp[i].txdata[cos]); } #else bxe_init_tx_ring_one(&sc->fp[i]); #endif } } static void bxe_init_def_sb(struct bxe_softc *sc) { struct host_sp_status_block *def_sb = sc->def_sb; bus_addr_t mapping = sc->def_sb_dma.paddr; int igu_sp_sb_index; int igu_seg_id; int port = SC_PORT(sc); int func = SC_FUNC(sc); int reg_offset, reg_offset_en5; uint64_t section; int index, sindex; struct hc_sp_status_block_data sp_sb_data; memset(&sp_sb_data, 0, sizeof(struct hc_sp_status_block_data)); if (CHIP_INT_MODE_IS_BC(sc)) { igu_sp_sb_index = DEF_SB_IGU_ID; igu_seg_id = HC_SEG_ACCESS_DEF; } else { igu_sp_sb_index = sc->igu_dsb_id; igu_seg_id = IGU_SEG_ACCESS_DEF; } /* attentions */ section = ((uint64_t)mapping + offsetof(struct host_sp_status_block, atten_status_block)); def_sb->atten_status_block.status_block_id = igu_sp_sb_index; sc->attn_state = 0; reg_offset = (port) ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0; reg_offset_en5 = (port) ? MISC_REG_AEU_ENABLE5_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE5_FUNC_0_OUT_0; for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) { /* take care of sig[0]..sig[4] */ for (sindex = 0; sindex < 4; sindex++) { sc->attn_group[index].sig[sindex] = REG_RD(sc, (reg_offset + (sindex * 0x4) + (0x10 * index))); } if (!CHIP_IS_E1x(sc)) { /* * enable5 is separate from the rest of the registers, * and the address skip is 4 and not 16 between the * different groups */ sc->attn_group[index].sig[4] = REG_RD(sc, (reg_offset_en5 + (0x4 * index))); } else { sc->attn_group[index].sig[4] = 0; } } if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_offset = (port) ? HC_REG_ATTN_MSG1_ADDR_L : HC_REG_ATTN_MSG0_ADDR_L; REG_WR(sc, reg_offset, U64_LO(section)); REG_WR(sc, (reg_offset + 4), U64_HI(section)); } else if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_ATTN_MSG_ADDR_L, U64_LO(section)); REG_WR(sc, IGU_REG_ATTN_MSG_ADDR_H, U64_HI(section)); } section = ((uint64_t)mapping + offsetof(struct host_sp_status_block, sp_sb)); bxe_zero_sp_sb(sc); /* PCI guarantees endianity of regpair */ sp_sb_data.state = SB_ENABLED; sp_sb_data.host_sb_addr.lo = U64_LO(section); sp_sb_data.host_sb_addr.hi = U64_HI(section); sp_sb_data.igu_sb_id = igu_sp_sb_index; sp_sb_data.igu_seg_id = igu_seg_id; sp_sb_data.p_func.pf_id = func; sp_sb_data.p_func.vnic_id = SC_VN(sc); sp_sb_data.p_func.vf_id = 0xff; bxe_wr_sp_sb_data(sc, &sp_sb_data); bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0); } static void bxe_init_sp_ring(struct bxe_softc *sc) { atomic_store_rel_long(&sc->cq_spq_left, MAX_SPQ_PENDING); sc->spq_prod_idx = 0; sc->dsb_sp_prod = &sc->def_sb->sp_sb.index_values[HC_SP_INDEX_ETH_DEF_CONS]; sc->spq_prod_bd = sc->spq; sc->spq_last_bd = (sc->spq_prod_bd + MAX_SP_DESC_CNT); } static void bxe_init_eq_ring(struct bxe_softc *sc) { union event_ring_elem *elem; int i; for (i = 1; i <= NUM_EQ_PAGES; i++) { elem = &sc->eq[EQ_DESC_CNT_PAGE * i - 1]; elem->next_page.addr.hi = htole32(U64_HI(sc->eq_dma.paddr + BCM_PAGE_SIZE * (i % NUM_EQ_PAGES))); elem->next_page.addr.lo = htole32(U64_LO(sc->eq_dma.paddr + BCM_PAGE_SIZE * (i % NUM_EQ_PAGES))); } sc->eq_cons = 0; sc->eq_prod = NUM_EQ_DESC; sc->eq_cons_sb = &sc->def_sb->sp_sb.index_values[HC_SP_INDEX_EQ_CONS]; atomic_store_rel_long(&sc->eq_spq_left, (min((MAX_SP_DESC_CNT - MAX_SPQ_PENDING), NUM_EQ_DESC) - 1)); } static void bxe_init_internal_common(struct bxe_softc *sc) { int i; if (IS_MF_SI(sc)) { /* * In switch independent mode, the TSTORM needs to accept * packets that failed classification, since approximate match * mac addresses aren't written to NIG LLH. */ REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_ACCEPT_CLASSIFY_FAILED_OFFSET), 2); } else if (!CHIP_IS_E1(sc)) { /* 57710 doesn't support MF */ REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_ACCEPT_CLASSIFY_FAILED_OFFSET), 0); } /* * Zero this manually as its initialization is currently missing * in the initTool. */ for (i = 0; i < (USTORM_AGG_DATA_SIZE >> 2); i++) { REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_AGG_DATA_OFFSET + (i * 4)), 0); } if (!CHIP_IS_E1x(sc)) { REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_IGU_MODE_OFFSET), CHIP_INT_MODE_IS_BC(sc) ? HC_IGU_BC_MODE : HC_IGU_NBC_MODE); } } static void bxe_init_internal(struct bxe_softc *sc, uint32_t load_code) { switch (load_code) { case FW_MSG_CODE_DRV_LOAD_COMMON: case FW_MSG_CODE_DRV_LOAD_COMMON_CHIP: bxe_init_internal_common(sc); /* no break */ case FW_MSG_CODE_DRV_LOAD_PORT: /* nothing to do */ /* no break */ case FW_MSG_CODE_DRV_LOAD_FUNCTION: /* internal memory per function is initialized inside bxe_pf_init */ break; default: BLOGE(sc, "Unknown load_code (0x%x) from MCP\n", load_code); break; } } static void storm_memset_func_cfg(struct bxe_softc *sc, struct tstorm_eth_function_common_config *tcfg, uint16_t abs_fid) { uint32_t addr; size_t size; addr = (BAR_TSTRORM_INTMEM + TSTORM_FUNCTION_COMMON_CONFIG_OFFSET(abs_fid)); size = sizeof(struct tstorm_eth_function_common_config); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)tcfg); } static void bxe_func_init(struct bxe_softc *sc, struct bxe_func_init_params *p) { struct tstorm_eth_function_common_config tcfg = { 0 }; if (CHIP_IS_E1x(sc)) { storm_memset_func_cfg(sc, &tcfg, p->func_id); } /* Enable the function in the FW */ storm_memset_vf_to_pf(sc, p->func_id, p->pf_id); storm_memset_func_en(sc, p->func_id, 1); /* spq */ if (p->func_flgs & FUNC_FLG_SPQ) { storm_memset_spq_addr(sc, p->spq_map, p->func_id); REG_WR(sc, (XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PROD_OFFSET(p->func_id)), p->spq_prod); } } /* * Calculates the sum of vn_min_rates. * It's needed for further normalizing of the min_rates. * Returns: * sum of vn_min_rates. * or * 0 - if all the min_rates are 0. * In the later case fainess algorithm should be deactivated. * If all min rates are not zero then those that are zeroes will be set to 1. */ static void bxe_calc_vn_min(struct bxe_softc *sc, struct cmng_init_input *input) { uint32_t vn_cfg; uint32_t vn_min_rate; int all_zero = 1; int vn; for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { vn_cfg = sc->devinfo.mf_info.mf_config[vn]; vn_min_rate = (((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT) * 100); if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE) { /* skip hidden VNs */ vn_min_rate = 0; } else if (!vn_min_rate) { /* If min rate is zero - set it to 100 */ vn_min_rate = DEF_MIN_RATE; } else { all_zero = 0; } input->vnic_min_rate[vn] = vn_min_rate; } /* if ETS or all min rates are zeros - disable fairness */ if (BXE_IS_ETS_ENABLED(sc)) { input->flags.cmng_enables &= ~CMNG_FLAGS_PER_PORT_FAIRNESS_VN; BLOGD(sc, DBG_LOAD, "Fairness disabled (ETS)\n"); } else if (all_zero) { input->flags.cmng_enables &= ~CMNG_FLAGS_PER_PORT_FAIRNESS_VN; BLOGD(sc, DBG_LOAD, "Fariness disabled (all MIN values are zeroes)\n"); } else { input->flags.cmng_enables |= CMNG_FLAGS_PER_PORT_FAIRNESS_VN; } } static inline uint16_t bxe_extract_max_cfg(struct bxe_softc *sc, uint32_t mf_cfg) { uint16_t max_cfg = ((mf_cfg & FUNC_MF_CFG_MAX_BW_MASK) >> FUNC_MF_CFG_MAX_BW_SHIFT); if (!max_cfg) { BLOGD(sc, DBG_LOAD, "Max BW configured to 0 - using 100 instead\n"); max_cfg = 100; } return (max_cfg); } static void bxe_calc_vn_max(struct bxe_softc *sc, int vn, struct cmng_init_input *input) { uint16_t vn_max_rate; uint32_t vn_cfg = sc->devinfo.mf_info.mf_config[vn]; uint32_t max_cfg; if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE) { vn_max_rate = 0; } else { max_cfg = bxe_extract_max_cfg(sc, vn_cfg); if (IS_MF_SI(sc)) { /* max_cfg in percents of linkspeed */ vn_max_rate = ((sc->link_vars.line_speed * max_cfg) / 100); } else { /* SD modes */ /* max_cfg is absolute in 100Mb units */ vn_max_rate = (max_cfg * 100); } } BLOGD(sc, DBG_LOAD, "vn %d: vn_max_rate %d\n", vn, vn_max_rate); input->vnic_max_rate[vn] = vn_max_rate; } static void bxe_cmng_fns_init(struct bxe_softc *sc, uint8_t read_cfg, uint8_t cmng_type) { struct cmng_init_input input; int vn; memset(&input, 0, sizeof(struct cmng_init_input)); input.port_rate = sc->link_vars.line_speed; if (cmng_type == CMNG_FNS_MINMAX) { /* read mf conf from shmem */ if (read_cfg) { bxe_read_mf_cfg(sc); } /* get VN min rate and enable fairness if not 0 */ bxe_calc_vn_min(sc, &input); /* get VN max rate */ if (sc->port.pmf) { for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { bxe_calc_vn_max(sc, vn, &input); } } /* always enable rate shaping and fairness */ input.flags.cmng_enables |= CMNG_FLAGS_PER_PORT_RATE_SHAPING_VN; ecore_init_cmng(&input, &sc->cmng); return; } /* rate shaping and fairness are disabled */ BLOGD(sc, DBG_LOAD, "rate shaping and fairness have been disabled\n"); } static int bxe_get_cmng_fns_mode(struct bxe_softc *sc) { if (CHIP_REV_IS_SLOW(sc)) { return (CMNG_FNS_NONE); } if (IS_MF(sc)) { return (CMNG_FNS_MINMAX); } return (CMNG_FNS_NONE); } static void storm_memset_cmng(struct bxe_softc *sc, struct cmng_init *cmng, uint8_t port) { int vn; int func; uint32_t addr; size_t size; addr = (BAR_XSTRORM_INTMEM + XSTORM_CMNG_PER_PORT_VARS_OFFSET(port)); size = sizeof(struct cmng_struct_per_port); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->port); for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { func = func_by_vn(sc, vn); addr = (BAR_XSTRORM_INTMEM + XSTORM_RATE_SHAPING_PER_VN_VARS_OFFSET(func)); size = sizeof(struct rate_shaping_vars_per_vn); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->vnic.vnic_max_rate[vn]); addr = (BAR_XSTRORM_INTMEM + XSTORM_FAIRNESS_PER_VN_VARS_OFFSET(func)); size = sizeof(struct fairness_vars_per_vn); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->vnic.vnic_min_rate[vn]); } } static void bxe_pf_init(struct bxe_softc *sc) { struct bxe_func_init_params func_init = { 0 }; struct event_ring_data eq_data = { { 0 } }; uint16_t flags; if (!CHIP_IS_E1x(sc)) { /* reset IGU PF statistics: MSIX + ATTN */ /* PF */ REG_WR(sc, (IGU_REG_STATISTIC_NUM_MESSAGE_SENT + (BXE_IGU_STAS_MSG_VF_CNT * 4) + ((CHIP_IS_MODE_4_PORT(sc) ? SC_FUNC(sc) : SC_VN(sc)) * 4)), 0); /* ATTN */ REG_WR(sc, (IGU_REG_STATISTIC_NUM_MESSAGE_SENT + (BXE_IGU_STAS_MSG_VF_CNT * 4) + (BXE_IGU_STAS_MSG_PF_CNT * 4) + ((CHIP_IS_MODE_4_PORT(sc) ? SC_FUNC(sc) : SC_VN(sc)) * 4)), 0); } /* function setup flags */ flags = (FUNC_FLG_STATS | FUNC_FLG_LEADING | FUNC_FLG_SPQ); /* * This flag is relevant for E1x only. * E2 doesn't have a TPA configuration in a function level. */ flags |= (sc->ifnet->if_capenable & IFCAP_LRO) ? FUNC_FLG_TPA : 0; func_init.func_flgs = flags; func_init.pf_id = SC_FUNC(sc); func_init.func_id = SC_FUNC(sc); func_init.spq_map = sc->spq_dma.paddr; func_init.spq_prod = sc->spq_prod_idx; bxe_func_init(sc, &func_init); memset(&sc->cmng, 0, sizeof(struct cmng_struct_per_port)); /* * Congestion management values depend on the link rate. * There is no active link so initial link rate is set to 10Gbps. * When the link comes up the congestion management values are * re-calculated according to the actual link rate. */ sc->link_vars.line_speed = SPEED_10000; bxe_cmng_fns_init(sc, TRUE, bxe_get_cmng_fns_mode(sc)); /* Only the PMF sets the HW */ if (sc->port.pmf) { storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } /* init Event Queue - PCI bus guarantees correct endainity */ eq_data.base_addr.hi = U64_HI(sc->eq_dma.paddr); eq_data.base_addr.lo = U64_LO(sc->eq_dma.paddr); eq_data.producer = sc->eq_prod; eq_data.index_id = HC_SP_INDEX_EQ_CONS; eq_data.sb_id = DEF_SB_ID; storm_memset_eq_data(sc, &eq_data, SC_FUNC(sc)); } static void bxe_hc_int_enable(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t addr = (port) ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; uint32_t val = REG_RD(sc, addr); uint8_t msix = (sc->interrupt_mode == INTR_MODE_MSIX) ? TRUE : FALSE; uint8_t single_msix = ((sc->interrupt_mode == INTR_MODE_MSIX) && (sc->intr_count == 1)) ? TRUE : FALSE; uint8_t msi = (sc->interrupt_mode == INTR_MODE_MSI) ? TRUE : FALSE; if (msix) { val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0); val |= (HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); if (single_msix) { val |= HC_CONFIG_0_REG_SINGLE_ISR_EN_0; } } else if (msi) { val &= ~HC_CONFIG_0_REG_INT_LINE_EN_0; val |= (HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } else { val |= (HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); if (!CHIP_IS_E1(sc)) { BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x)\n", val, port, addr); REG_WR(sc, addr, val); val &= ~HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0; } } if (CHIP_IS_E1(sc)) { REG_WR(sc, (HC_REG_INT_MASK + port*4), 0x1FFFF); } BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x) mode %s\n", val, port, addr, ((msix) ? "MSI-X" : ((msi) ? "MSI" : "INTx"))); REG_WR(sc, addr, val); /* ensure that HC_CONFIG is written before leading/trailing edge config */ mb(); if (!CHIP_IS_E1(sc)) { /* init leading/trailing edge */ if (IS_MF(sc)) { val = (0xee0f | (1 << (SC_VN(sc) + 4))); if (sc->port.pmf) { /* enable nig and gpio3 attention */ val |= 0x1100; } } else { val = 0xffff; } REG_WR(sc, (HC_REG_TRAILING_EDGE_0 + port*8), val); REG_WR(sc, (HC_REG_LEADING_EDGE_0 + port*8), val); } /* make sure that interrupts are indeed enabled from here on */ mb(); } static void bxe_igu_int_enable(struct bxe_softc *sc) { uint32_t val; uint8_t msix = (sc->interrupt_mode == INTR_MODE_MSIX) ? TRUE : FALSE; uint8_t single_msix = ((sc->interrupt_mode == INTR_MODE_MSIX) && (sc->intr_count == 1)) ? TRUE : FALSE; uint8_t msi = (sc->interrupt_mode == INTR_MODE_MSI) ? TRUE : FALSE; val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); if (msix) { val &= ~(IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_SINGLE_ISR_EN); val |= (IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_ATTN_BIT_EN); if (single_msix) { val |= IGU_PF_CONF_SINGLE_ISR_EN; } } else if (msi) { val &= ~IGU_PF_CONF_INT_LINE_EN; val |= (IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_ATTN_BIT_EN | IGU_PF_CONF_SINGLE_ISR_EN); } else { val &= ~IGU_PF_CONF_MSI_MSIX_EN; val |= (IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_ATTN_BIT_EN | IGU_PF_CONF_SINGLE_ISR_EN); } /* clean previous status - need to configure igu prior to ack*/ if ((!msix) || single_msix) { REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); bxe_ack_int(sc); } val |= IGU_PF_CONF_FUNC_EN; BLOGD(sc, DBG_INTR, "write 0x%x to IGU mode %s\n", val, ((msix) ? "MSI-X" : ((msi) ? "MSI" : "INTx"))); REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); mb(); /* init leading/trailing edge */ if (IS_MF(sc)) { val = (0xee0f | (1 << (SC_VN(sc) + 4))); if (sc->port.pmf) { /* enable nig and gpio3 attention */ val |= 0x1100; } } else { val = 0xffff; } REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, val); REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, val); /* make sure that interrupts are indeed enabled from here on */ mb(); } static void bxe_int_enable(struct bxe_softc *sc) { if (sc->devinfo.int_block == INT_BLOCK_HC) { bxe_hc_int_enable(sc); } else { bxe_igu_int_enable(sc); } } static void bxe_hc_int_disable(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t addr = (port) ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; uint32_t val = REG_RD(sc, addr); /* * In E1 we must use only PCI configuration space to disable MSI/MSIX * capablility. It's forbidden to disable IGU_PF_CONF_MSI_MSIX_EN in HC * block */ if (CHIP_IS_E1(sc)) { /* * Since IGU_PF_CONF_MSI_MSIX_EN still always on use mask register * to prevent from HC sending interrupts after we exit the function */ REG_WR(sc, (HC_REG_INT_MASK + port*4), 0); val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } else { val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x)\n", val, port, addr); /* flush all outstanding writes */ mb(); REG_WR(sc, addr, val); if (REG_RD(sc, addr) != val) { BLOGE(sc, "proper val not read from HC IGU!\n"); } } static void bxe_igu_int_disable(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); val &= ~(IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_ATTN_BIT_EN); BLOGD(sc, DBG_INTR, "write %x to IGU\n", val); /* flush all outstanding writes */ mb(); REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); if (REG_RD(sc, IGU_REG_PF_CONFIGURATION) != val) { BLOGE(sc, "proper val not read from IGU!\n"); } } static void bxe_int_disable(struct bxe_softc *sc) { if (sc->devinfo.int_block == INT_BLOCK_HC) { bxe_hc_int_disable(sc); } else { bxe_igu_int_disable(sc); } } static void bxe_nic_init(struct bxe_softc *sc, int load_code) { int i; for (i = 0; i < sc->num_queues; i++) { bxe_init_eth_fp(sc, i); } rmb(); /* ensure status block indices were read */ bxe_init_rx_rings(sc); bxe_init_tx_rings(sc); if (IS_VF(sc)) { return; } /* initialize MOD_ABS interrupts */ elink_init_mod_abs_int(sc, &sc->link_vars, sc->devinfo.chip_id, sc->devinfo.shmem_base, sc->devinfo.shmem2_base, SC_PORT(sc)); bxe_init_def_sb(sc); bxe_update_dsb_idx(sc); bxe_init_sp_ring(sc); bxe_init_eq_ring(sc); bxe_init_internal(sc, load_code); bxe_pf_init(sc); bxe_stats_init(sc); /* flush all before enabling interrupts */ mb(); bxe_int_enable(sc); /* check for SPIO5 */ bxe_attn_int_deasserted0(sc, REG_RD(sc, (MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + SC_PORT(sc)*4)) & AEU_INPUTS_ATTN_BITS_SPIO5); } static inline void bxe_init_objs(struct bxe_softc *sc) { /* mcast rules must be added to tx if tx switching is enabled */ ecore_obj_type o_type = (sc->flags & BXE_TX_SWITCHING) ? ECORE_OBJ_TYPE_RX_TX : ECORE_OBJ_TYPE_RX; /* RX_MODE controlling object */ ecore_init_rx_mode_obj(sc, &sc->rx_mode_obj); /* multicast configuration controlling object */ ecore_init_mcast_obj(sc, &sc->mcast_obj, sc->fp[0].cl_id, sc->fp[0].index, SC_FUNC(sc), SC_FUNC(sc), BXE_SP(sc, mcast_rdata), BXE_SP_MAPPING(sc, mcast_rdata), ECORE_FILTER_MCAST_PENDING, &sc->sp_state, o_type); /* Setup CAM credit pools */ ecore_init_mac_credit_pool(sc, &sc->macs_pool, SC_FUNC(sc), CHIP_IS_E1x(sc) ? VNICS_PER_PORT(sc) : VNICS_PER_PATH(sc)); ecore_init_vlan_credit_pool(sc, &sc->vlans_pool, SC_ABS_FUNC(sc) >> 1, CHIP_IS_E1x(sc) ? VNICS_PER_PORT(sc) : VNICS_PER_PATH(sc)); /* RSS configuration object */ ecore_init_rss_config_obj(sc, &sc->rss_conf_obj, sc->fp[0].cl_id, sc->fp[0].index, SC_FUNC(sc), SC_FUNC(sc), BXE_SP(sc, rss_rdata), BXE_SP_MAPPING(sc, rss_rdata), ECORE_FILTER_RSS_CONF_PENDING, &sc->sp_state, ECORE_OBJ_TYPE_RX); } /* * Initialize the function. This must be called before sending CLIENT_SETUP * for the first client. */ static inline int bxe_func_start(struct bxe_softc *sc) { struct ecore_func_state_params func_params = { NULL }; struct ecore_func_start_params *start_params = &func_params.params.start; /* Prepare parameters for function state transitions */ bit_set(&func_params.ramrod_flags, RAMROD_COMP_WAIT); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_START; /* Function parameters */ start_params->mf_mode = sc->devinfo.mf_info.mf_mode; start_params->sd_vlan_tag = OVLAN(sc); if (CHIP_IS_E2(sc) || CHIP_IS_E3(sc)) { start_params->network_cos_mode = STATIC_COS; } else { /* CHIP_IS_E1X */ start_params->network_cos_mode = FW_WRR; } start_params->gre_tunnel_mode = 0; start_params->gre_tunnel_rss = 0; return (ecore_func_state_change(sc, &func_params)); } static int bxe_set_power_state(struct bxe_softc *sc, uint8_t state) { uint16_t pmcsr; /* If there is no power capability, silently succeed */ if (!(sc->devinfo.pcie_cap_flags & BXE_PM_CAPABLE_FLAG)) { BLOGW(sc, "No power capability\n"); return (0); } pmcsr = pci_read_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), 2); switch (state) { case PCI_PM_D0: pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), ((pmcsr & ~PCIM_PSTAT_DMASK) | PCIM_PSTAT_PME), 2); if (pmcsr & PCIM_PSTAT_DMASK) { /* delay required during transition out of D3hot */ DELAY(20000); } break; case PCI_PM_D3hot: /* XXX if there are other clients above don't shut down the power */ /* don't shut down the power for emulation and FPGA */ if (CHIP_REV_IS_SLOW(sc)) { return (0); } pmcsr &= ~PCIM_PSTAT_DMASK; pmcsr |= PCIM_PSTAT_D3; if (sc->wol) { pmcsr |= PCIM_PSTAT_PMEENABLE; } pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), pmcsr, 4); /* * No more memory access after this point until device is brought back * to D0 state. */ break; default: BLOGE(sc, "Can't support PCI power state = %d\n", state); return (-1); } return (0); } /* return true if succeeded to acquire the lock */ static uint8_t bxe_trylock_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; BLOGD(sc, DBG_LOAD, "Trying to take a resource lock 0x%x\n", resource); /* Validating that the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGD(sc, DBG_LOAD, "resource(0x%x) > HW_LOCK_MAX_RESOURCE_VALUE(0x%x)\n", resource, HW_LOCK_MAX_RESOURCE_VALUE); return (FALSE); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + func*8); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + (func - 6)*8); } /* try to acquire the lock */ REG_WR(sc, hw_lock_control_reg + 4, resource_bit); lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { return (TRUE); } BLOGE(sc, "Failed to get a resource lock 0x%x\n", resource); return (FALSE); } /* * Get the recovery leader resource id according to the engine this function * belongs to. Currently only only 2 engines is supported. */ static int bxe_get_leader_lock_resource(struct bxe_softc *sc) { if (SC_PATH(sc)) { return (HW_LOCK_RESOURCE_RECOVERY_LEADER_1); } else { return (HW_LOCK_RESOURCE_RECOVERY_LEADER_0); } } /* try to acquire a leader lock for current engine */ static uint8_t bxe_trylock_leader_lock(struct bxe_softc *sc) { return (bxe_trylock_hw_lock(sc, bxe_get_leader_lock_resource(sc))); } static int bxe_release_leader_lock(struct bxe_softc *sc) { return (bxe_release_hw_lock(sc, bxe_get_leader_lock_resource(sc))); } /* close gates #2, #3 and #4 */ static void bxe_set_234_gates(struct bxe_softc *sc, uint8_t close) { uint32_t val; /* gates #2 and #4a are closed/opened for "not E1" only */ if (!CHIP_IS_E1(sc)) { /* #4 */ REG_WR(sc, PXP_REG_HST_DISCARD_DOORBELLS, !!close); /* #2 */ REG_WR(sc, PXP_REG_HST_DISCARD_INTERNAL_WRITES, !!close); } /* #3 */ if (CHIP_IS_E1x(sc)) { /* prevent interrupts from HC on both ports */ val = REG_RD(sc, HC_REG_CONFIG_1); REG_WR(sc, HC_REG_CONFIG_1, (!close) ? (val | HC_CONFIG_1_REG_BLOCK_DISABLE_1) : (val & ~(uint32_t)HC_CONFIG_1_REG_BLOCK_DISABLE_1)); val = REG_RD(sc, HC_REG_CONFIG_0); REG_WR(sc, HC_REG_CONFIG_0, (!close) ? (val | HC_CONFIG_0_REG_BLOCK_DISABLE_0) : (val & ~(uint32_t)HC_CONFIG_0_REG_BLOCK_DISABLE_0)); } else { /* Prevent incomming interrupts in IGU */ val = REG_RD(sc, IGU_REG_BLOCK_CONFIGURATION); REG_WR(sc, IGU_REG_BLOCK_CONFIGURATION, (!close) ? (val | IGU_BLOCK_CONFIGURATION_REG_BLOCK_ENABLE) : (val & ~(uint32_t)IGU_BLOCK_CONFIGURATION_REG_BLOCK_ENABLE)); } BLOGD(sc, DBG_LOAD, "%s gates #2, #3 and #4\n", close ? "closing" : "opening"); wmb(); } /* poll for pending writes bit, it should get cleared in no more than 1s */ static int bxe_er_poll_igu_vq(struct bxe_softc *sc) { uint32_t cnt = 1000; uint32_t pend_bits = 0; do { pend_bits = REG_RD(sc, IGU_REG_PENDING_BITS_STATUS); if (pend_bits == 0) { break; } DELAY(1000); } while (--cnt > 0); if (cnt == 0) { BLOGE(sc, "Still pending IGU requests bits=0x%08x!\n", pend_bits); return (-1); } return (0); } #define SHARED_MF_CLP_MAGIC 0x80000000 /* 'magic' bit */ static void bxe_clp_reset_prep(struct bxe_softc *sc, uint32_t *magic_val) { /* Do some magic... */ uint32_t val = MFCFG_RD(sc, shared_mf_config.clp_mb); *magic_val = val & SHARED_MF_CLP_MAGIC; MFCFG_WR(sc, shared_mf_config.clp_mb, val | SHARED_MF_CLP_MAGIC); } /* restore the value of the 'magic' bit */ static void bxe_clp_reset_done(struct bxe_softc *sc, uint32_t magic_val) { /* Restore the 'magic' bit value... */ uint32_t val = MFCFG_RD(sc, shared_mf_config.clp_mb); MFCFG_WR(sc, shared_mf_config.clp_mb, (val & (~SHARED_MF_CLP_MAGIC)) | magic_val); } /* prepare for MCP reset, takes care of CLP configurations */ static void bxe_reset_mcp_prep(struct bxe_softc *sc, uint32_t *magic_val) { uint32_t shmem; uint32_t validity_offset; /* set `magic' bit in order to save MF config */ if (!CHIP_IS_E1(sc)) { bxe_clp_reset_prep(sc, magic_val); } /* get shmem offset */ shmem = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); validity_offset = offsetof(struct shmem_region, validity_map[SC_PORT(sc)]); /* Clear validity map flags */ if (shmem > 0) { REG_WR(sc, shmem + validity_offset, 0); } } #define MCP_TIMEOUT 5000 /* 5 seconds (in ms) */ #define MCP_ONE_TIMEOUT 100 /* 100 ms */ static void bxe_mcp_wait_one(struct bxe_softc *sc) { /* special handling for emulation and FPGA (10 times longer) */ if (CHIP_REV_IS_SLOW(sc)) { DELAY((MCP_ONE_TIMEOUT*10) * 1000); } else { DELAY((MCP_ONE_TIMEOUT) * 1000); } } /* initialize shmem_base and waits for validity signature to appear */ static int bxe_init_shmem(struct bxe_softc *sc) { int cnt = 0; uint32_t val = 0; do { sc->devinfo.shmem_base = sc->link_params.shmem_base = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); if (sc->devinfo.shmem_base) { val = SHMEM_RD(sc, validity_map[SC_PORT(sc)]); if (val & SHR_MEM_VALIDITY_MB) return (0); } bxe_mcp_wait_one(sc); } while (cnt++ < (MCP_TIMEOUT / MCP_ONE_TIMEOUT)); BLOGE(sc, "BAD MCP validity signature\n"); return (-1); } static int bxe_reset_mcp_comp(struct bxe_softc *sc, uint32_t magic_val) { int rc = bxe_init_shmem(sc); /* Restore the `magic' bit value */ if (!CHIP_IS_E1(sc)) { bxe_clp_reset_done(sc, magic_val); } return (rc); } static void bxe_pxp_prep(struct bxe_softc *sc) { if (!CHIP_IS_E1(sc)) { REG_WR(sc, PXP2_REG_RD_START_INIT, 0); REG_WR(sc, PXP2_REG_RQ_RBC_DONE, 0); wmb(); } } /* * Reset the whole chip except for: * - PCIE core * - PCI Glue, PSWHST, PXP/PXP2 RF (all controlled by one reset bit) * - IGU * - MISC (including AEU) * - GRC * - RBCN, RBCP */ static void bxe_process_kill_chip_reset(struct bxe_softc *sc, uint8_t global) { uint32_t not_reset_mask1, reset_mask1, not_reset_mask2, reset_mask2; uint32_t global_bits2, stay_reset2; /* * Bits that have to be set in reset_mask2 if we want to reset 'global' * (per chip) blocks. */ global_bits2 = MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_CMN_CPU | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_CMN_CORE; /* * Don't reset the following blocks. * Important: per port blocks (such as EMAC, BMAC, UMAC) can't be * reset, as in 4 port device they might still be owned * by the MCP (there is only one leader per path). */ not_reset_mask1 = MISC_REGISTERS_RESET_REG_1_RST_HC | MISC_REGISTERS_RESET_REG_1_RST_PXPV | MISC_REGISTERS_RESET_REG_1_RST_PXP; not_reset_mask2 = MISC_REGISTERS_RESET_REG_2_RST_PCI_MDIO | MISC_REGISTERS_RESET_REG_2_RST_EMAC0_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_EMAC1_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_MISC_CORE | MISC_REGISTERS_RESET_REG_2_RST_RBCN | MISC_REGISTERS_RESET_REG_2_RST_GRC | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_REG_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_HARD_CORE_RST_B | MISC_REGISTERS_RESET_REG_2_RST_ATC | MISC_REGISTERS_RESET_REG_2_PGLC | MISC_REGISTERS_RESET_REG_2_RST_BMAC0 | MISC_REGISTERS_RESET_REG_2_RST_BMAC1 | MISC_REGISTERS_RESET_REG_2_RST_EMAC0 | MISC_REGISTERS_RESET_REG_2_RST_EMAC1 | MISC_REGISTERS_RESET_REG_2_UMAC0 | MISC_REGISTERS_RESET_REG_2_UMAC1; /* * Keep the following blocks in reset: * - all xxMACs are handled by the elink code. */ stay_reset2 = MISC_REGISTERS_RESET_REG_2_XMAC | MISC_REGISTERS_RESET_REG_2_XMAC_SOFT; /* Full reset masks according to the chip */ reset_mask1 = 0xffffffff; if (CHIP_IS_E1(sc)) reset_mask2 = 0xffff; else if (CHIP_IS_E1H(sc)) reset_mask2 = 0x1ffff; else if (CHIP_IS_E2(sc)) reset_mask2 = 0xfffff; else /* CHIP_IS_E3 */ reset_mask2 = 0x3ffffff; /* Don't reset global blocks unless we need to */ if (!global) reset_mask2 &= ~global_bits2; /* * In case of attention in the QM, we need to reset PXP * (MISC_REGISTERS_RESET_REG_2_RST_PXP_RQ_RD_WR) before QM * because otherwise QM reset would release 'close the gates' shortly * before resetting the PXP, then the PSWRQ would send a write * request to PGLUE. Then when PXP is reset, PGLUE would try to * read the payload data from PSWWR, but PSWWR would not * respond. The write queue in PGLUE would stuck, dmae commands * would not return. Therefore it's important to reset the second * reset register (containing the * MISC_REGISTERS_RESET_REG_2_RST_PXP_RQ_RD_WR bit) before the * first one (containing the MISC_REGISTERS_RESET_REG_1_RST_QM * bit). */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, reset_mask2 & (~not_reset_mask2)); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, reset_mask1 & (~not_reset_mask1)); mb(); wmb(); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, reset_mask2 & (~stay_reset2)); mb(); wmb(); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, reset_mask1); wmb(); } static int bxe_process_kill(struct bxe_softc *sc, uint8_t global) { int cnt = 1000; uint32_t val = 0; uint32_t sr_cnt, blk_cnt, port_is_idle_0, port_is_idle_1, pgl_exp_rom2; uint32_t tags_63_32 = 0; /* Empty the Tetris buffer, wait for 1s */ do { sr_cnt = REG_RD(sc, PXP2_REG_RD_SR_CNT); blk_cnt = REG_RD(sc, PXP2_REG_RD_BLK_CNT); port_is_idle_0 = REG_RD(sc, PXP2_REG_RD_PORT_IS_IDLE_0); port_is_idle_1 = REG_RD(sc, PXP2_REG_RD_PORT_IS_IDLE_1); pgl_exp_rom2 = REG_RD(sc, PXP2_REG_PGL_EXP_ROM2); if (CHIP_IS_E3(sc)) { tags_63_32 = REG_RD(sc, PGLUE_B_REG_TAGS_63_32); } if ((sr_cnt == 0x7e) && (blk_cnt == 0xa0) && ((port_is_idle_0 & 0x1) == 0x1) && ((port_is_idle_1 & 0x1) == 0x1) && (pgl_exp_rom2 == 0xffffffff) && (!CHIP_IS_E3(sc) || (tags_63_32 == 0xffffffff))) break; DELAY(1000); } while (cnt-- > 0); if (cnt <= 0) { BLOGE(sc, "ERROR: Tetris buffer didn't get empty or there " "are still outstanding read requests after 1s! " "sr_cnt=0x%08x, blk_cnt=0x%08x, port_is_idle_0=0x%08x, " "port_is_idle_1=0x%08x, pgl_exp_rom2=0x%08x\n", sr_cnt, blk_cnt, port_is_idle_0, port_is_idle_1, pgl_exp_rom2); return (-1); } mb(); /* Close gates #2, #3 and #4 */ bxe_set_234_gates(sc, TRUE); /* Poll for IGU VQs for 57712 and newer chips */ if (!CHIP_IS_E1x(sc) && bxe_er_poll_igu_vq(sc)) { return (-1); } /* XXX indicate that "process kill" is in progress to MCP */ /* clear "unprepared" bit */ REG_WR(sc, MISC_REG_UNPREPARED, 0); mb(); /* Make sure all is written to the chip before the reset */ wmb(); /* * Wait for 1ms to empty GLUE and PCI-E core queues, * PSWHST, GRC and PSWRD Tetris buffer. */ DELAY(1000); /* Prepare to chip reset: */ /* MCP */ if (global) { bxe_reset_mcp_prep(sc, &val); } /* PXP */ bxe_pxp_prep(sc); mb(); /* reset the chip */ bxe_process_kill_chip_reset(sc, global); mb(); /* Recover after reset: */ /* MCP */ if (global && bxe_reset_mcp_comp(sc, val)) { return (-1); } /* XXX add resetting the NO_MCP mode DB here */ /* Open the gates #2, #3 and #4 */ bxe_set_234_gates(sc, FALSE); /* XXX * IGU/AEU preparation bring back the AEU/IGU to a reset state * re-enable attentions */ return (0); } static int bxe_leader_reset(struct bxe_softc *sc) { int rc = 0; uint8_t global = bxe_reset_is_global(sc); uint32_t load_code; /* * If not going to reset MCP, load "fake" driver to reset HW while * driver is owner of the HW. */ if (!global && !BXE_NOMCP(sc)) { load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_REQ, DRV_MSG_CODE_LOAD_REQ_WITH_LFA); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; goto exit_leader_reset; } if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) && (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) { BLOGE(sc, "MCP unexpected response, aborting\n"); rc = -1; goto exit_leader_reset2; } load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; goto exit_leader_reset2; } } /* try to recover after the failure */ if (bxe_process_kill(sc, global)) { BLOGE(sc, "Something bad occurred on engine %d!\n", SC_PATH(sc)); rc = -1; goto exit_leader_reset2; } /* * Clear the RESET_IN_PROGRESS and RESET_GLOBAL bits and update the driver * state. */ bxe_set_reset_done(sc); if (global) { bxe_clear_reset_global(sc); } exit_leader_reset2: /* unload "fake driver" if it was loaded */ if (!global && !BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, 0); } exit_leader_reset: sc->is_leader = 0; bxe_release_leader_lock(sc); mb(); return (rc); } /* * prepare INIT transition, parameters configured: * - HC configuration * - Queue's CDU context */ static void bxe_pf_q_prep_init(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_queue_init_params *init_params) { uint8_t cos; int cxt_index, cxt_offset; bxe_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags); bxe_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags); bxe_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags); bxe_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags); /* HC rate */ init_params->rx.hc_rate = sc->hc_rx_ticks ? (1000000 / sc->hc_rx_ticks) : 0; init_params->tx.hc_rate = sc->hc_tx_ticks ? (1000000 / sc->hc_tx_ticks) : 0; /* FW SB ID */ init_params->rx.fw_sb_id = init_params->tx.fw_sb_id = fp->fw_sb_id; /* CQ index among the SB indices */ init_params->rx.sb_cq_index = HC_INDEX_ETH_RX_CQ_CONS; init_params->tx.sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS; /* set maximum number of COSs supported by this queue */ init_params->max_cos = sc->max_cos; BLOGD(sc, DBG_LOAD, "fp %d setting queue params max cos to %d\n", fp->index, init_params->max_cos); /* set the context pointers queue object */ for (cos = FIRST_TX_COS_INDEX; cos < init_params->max_cos; cos++) { /* XXX change index/cid here if ever support multiple tx CoS */ /* fp->txdata[cos]->cid */ cxt_index = fp->index / ILT_PAGE_CIDS; cxt_offset = fp->index - (cxt_index * ILT_PAGE_CIDS); init_params->cxts[cos] = &sc->context[cxt_index].vcxt[cxt_offset].eth; } } /* set flags that are common for the Tx-only and not normal connections */ static unsigned long bxe_get_common_flags(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t zero_stats) { unsigned long flags = 0; /* PF driver will always initialize the Queue to an ACTIVE state */ bxe_set_bit(ECORE_Q_FLG_ACTIVE, &flags); /* * tx only connections collect statistics (on the same index as the * parent connection). The statistics are zeroed when the parent * connection is initialized. */ bxe_set_bit(ECORE_Q_FLG_STATS, &flags); if (zero_stats) { bxe_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags); } /* * tx only connections can support tx-switching, though their * CoS-ness doesn't survive the loopback */ if (sc->flags & BXE_TX_SWITCHING) { bxe_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags); } bxe_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags); return (flags); } static unsigned long bxe_get_q_flags(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t leading) { unsigned long flags = 0; if (IS_MF_SD(sc)) { bxe_set_bit(ECORE_Q_FLG_OV, &flags); } if (sc->ifnet->if_capenable & IFCAP_LRO) { bxe_set_bit(ECORE_Q_FLG_TPA, &flags); bxe_set_bit(ECORE_Q_FLG_TPA_IPV6, &flags); #if 0 if (fp->mode == TPA_MODE_GRO) __set_bit(ECORE_Q_FLG_TPA_GRO, &flags); #endif } if (leading) { bxe_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags); bxe_set_bit(ECORE_Q_FLG_MCAST, &flags); } bxe_set_bit(ECORE_Q_FLG_VLAN, &flags); #if 0 /* configure silent vlan removal */ if (IS_MF_AFEX(sc)) { bxe_set_bit(ECORE_Q_FLG_SILENT_VLAN_REM, &flags); } #endif /* merge with common flags */ return (flags | bxe_get_common_flags(sc, fp, TRUE)); } static void bxe_pf_q_prep_general(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_general_setup_params *gen_init, uint8_t cos) { gen_init->stat_id = bxe_stats_id(fp); gen_init->spcl_id = fp->cl_id; gen_init->mtu = sc->mtu; gen_init->cos = cos; } static void bxe_pf_rx_q_prep(struct bxe_softc *sc, struct bxe_fastpath *fp, struct rxq_pause_params *pause, struct ecore_rxq_setup_params *rxq_init) { uint8_t max_sge = 0; uint16_t sge_sz = 0; uint16_t tpa_agg_size = 0; if (sc->ifnet->if_capenable & IFCAP_LRO) { pause->sge_th_lo = SGE_TH_LO(sc); pause->sge_th_hi = SGE_TH_HI(sc); /* validate SGE ring has enough to cross high threshold */ if (sc->dropless_fc && (pause->sge_th_hi + FW_PREFETCH_CNT) > (RX_SGE_USABLE_PER_PAGE * RX_SGE_NUM_PAGES)) { BLOGW(sc, "sge ring threshold limit\n"); } /* minimum max_aggregation_size is 2*MTU (two full buffers) */ tpa_agg_size = (2 * sc->mtu); if (tpa_agg_size < sc->max_aggregation_size) { tpa_agg_size = sc->max_aggregation_size; } max_sge = SGE_PAGE_ALIGN(sc->mtu) >> SGE_PAGE_SHIFT; max_sge = ((max_sge + PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >> PAGES_PER_SGE_SHIFT; sge_sz = (uint16_t)min(SGE_PAGES, 0xffff); } /* pause - not for e1 */ if (!CHIP_IS_E1(sc)) { pause->bd_th_lo = BD_TH_LO(sc); pause->bd_th_hi = BD_TH_HI(sc); pause->rcq_th_lo = RCQ_TH_LO(sc); pause->rcq_th_hi = RCQ_TH_HI(sc); /* validate rings have enough entries to cross high thresholds */ if (sc->dropless_fc && pause->bd_th_hi + FW_PREFETCH_CNT > sc->rx_ring_size) { BLOGW(sc, "rx bd ring threshold limit\n"); } if (sc->dropless_fc && pause->rcq_th_hi + FW_PREFETCH_CNT > RCQ_NUM_PAGES * RCQ_USABLE_PER_PAGE) { BLOGW(sc, "rcq ring threshold limit\n"); } pause->pri_map = 1; } /* rxq setup */ rxq_init->dscr_map = fp->rx_dma.paddr; rxq_init->sge_map = fp->rx_sge_dma.paddr; rxq_init->rcq_map = fp->rcq_dma.paddr; rxq_init->rcq_np_map = (fp->rcq_dma.paddr + BCM_PAGE_SIZE); /* * This should be a maximum number of data bytes that may be * placed on the BD (not including paddings). */ rxq_init->buf_sz = (fp->rx_buf_size - IP_HEADER_ALIGNMENT_PADDING); rxq_init->cl_qzone_id = fp->cl_qzone_id; rxq_init->tpa_agg_sz = tpa_agg_size; rxq_init->sge_buf_sz = sge_sz; rxq_init->max_sges_pkt = max_sge; rxq_init->rss_engine_id = SC_FUNC(sc); rxq_init->mcast_engine_id = SC_FUNC(sc); /* * Maximum number or simultaneous TPA aggregation for this Queue. * For PF Clients it should be the maximum available number. * VF driver(s) may want to define it to a smaller value. */ rxq_init->max_tpa_queues = MAX_AGG_QS(sc); rxq_init->cache_line_log = BXE_RX_ALIGN_SHIFT; rxq_init->fw_sb_id = fp->fw_sb_id; rxq_init->sb_cq_index = HC_INDEX_ETH_RX_CQ_CONS; /* * configure silent vlan removal * if multi function mode is afex, then mask default vlan */ if (IS_MF_AFEX(sc)) { rxq_init->silent_removal_value = sc->devinfo.mf_info.afex_def_vlan_tag; rxq_init->silent_removal_mask = EVL_VLID_MASK; } } static void bxe_pf_tx_q_prep(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_txq_setup_params *txq_init, uint8_t cos) { /* * XXX If multiple CoS is ever supported then each fastpath structure * will need to maintain tx producer/consumer/dma/etc values *per* CoS. * fp->txdata[cos]->tx_dma.paddr; */ txq_init->dscr_map = fp->tx_dma.paddr; txq_init->sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS + cos; txq_init->traffic_type = LLFC_TRAFFIC_TYPE_NW; txq_init->fw_sb_id = fp->fw_sb_id; /* * set the TSS leading client id for TX classfication to the * leading RSS client id */ txq_init->tss_leading_cl_id = BXE_FP(sc, 0, cl_id); } /* * This function performs 2 steps in a queue state machine: * 1) RESET->INIT * 2) INIT->SETUP */ static int bxe_setup_queue(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t leading) { struct ecore_queue_state_params q_params = { NULL }; struct ecore_queue_setup_params *setup_params = &q_params.params.setup; #if 0 struct ecore_queue_setup_tx_only_params *tx_only_params = &q_params.params.tx_only; uint8_t tx_index; #endif int rc; BLOGD(sc, DBG_LOAD, "setting up queue %d\n", fp->index); bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0); q_params.q_obj = &BXE_SP_OBJ(sc, fp).q_obj; /* we want to wait for completion in this context */ bxe_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); /* prepare the INIT parameters */ bxe_pf_q_prep_init(sc, fp, &q_params.params.init); /* Set the command */ q_params.cmd = ECORE_Q_CMD_INIT; /* Change the state to INIT */ rc = ecore_queue_state_change(sc, &q_params); if (rc) { BLOGE(sc, "Queue(%d) INIT failed\n", fp->index); return (rc); } BLOGD(sc, DBG_LOAD, "init complete\n"); /* now move the Queue to the SETUP state */ memset(setup_params, 0, sizeof(*setup_params)); /* set Queue flags */ setup_params->flags = bxe_get_q_flags(sc, fp, leading); /* set general SETUP parameters */ bxe_pf_q_prep_general(sc, fp, &setup_params->gen_params, FIRST_TX_COS_INDEX); bxe_pf_rx_q_prep(sc, fp, &setup_params->pause_params, &setup_params->rxq_params); bxe_pf_tx_q_prep(sc, fp, &setup_params->txq_params, FIRST_TX_COS_INDEX); /* Set the command */ q_params.cmd = ECORE_Q_CMD_SETUP; /* change the state to SETUP */ rc = ecore_queue_state_change(sc, &q_params); if (rc) { BLOGE(sc, "Queue(%d) SETUP failed\n", fp->index); return (rc); } #if 0 /* loop through the relevant tx-only indices */ for (tx_index = FIRST_TX_ONLY_COS_INDEX; tx_index < sc->max_cos; tx_index++) { /* prepare and send tx-only ramrod*/ rc = bxe_setup_tx_only(sc, fp, &q_params, tx_only_params, tx_index, leading); if (rc) { BLOGE(sc, "Queue(%d.%d) TX_ONLY_SETUP failed\n", fp->index, tx_index); return (rc); } } #endif return (rc); } static int bxe_setup_leading(struct bxe_softc *sc) { return (bxe_setup_queue(sc, &sc->fp[0], TRUE)); } static int bxe_config_rss_pf(struct bxe_softc *sc, struct ecore_rss_config_obj *rss_obj, uint8_t config_hash) { struct ecore_config_rss_params params = { NULL }; int i; /* * Although RSS is meaningless when there is a single HW queue we * still need it enabled in order to have HW Rx hash generated. */ params.rss_obj = rss_obj; bxe_set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags); bxe_set_bit(ECORE_RSS_MODE_REGULAR, ¶ms.rss_flags); /* RSS configuration */ bxe_set_bit(ECORE_RSS_IPV4, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV4_TCP, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV6, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV6_TCP, ¶ms.rss_flags); if (rss_obj->udp_rss_v4) { bxe_set_bit(ECORE_RSS_IPV4_UDP, ¶ms.rss_flags); } if (rss_obj->udp_rss_v6) { bxe_set_bit(ECORE_RSS_IPV6_UDP, ¶ms.rss_flags); } /* Hash bits */ params.rss_result_mask = MULTI_MASK; memcpy(params.ind_table, rss_obj->ind_table, sizeof(params.ind_table)); if (config_hash) { /* RSS keys */ for (i = 0; i < sizeof(params.rss_key) / 4; i++) { params.rss_key[i] = arc4random(); } bxe_set_bit(ECORE_RSS_SET_SRCH, ¶ms.rss_flags); } return (ecore_config_rss(sc, ¶ms)); } static int bxe_config_rss_eth(struct bxe_softc *sc, uint8_t config_hash) { return (bxe_config_rss_pf(sc, &sc->rss_conf_obj, config_hash)); } static int bxe_init_rss_pf(struct bxe_softc *sc) { uint8_t num_eth_queues = BXE_NUM_ETH_QUEUES(sc); int i; /* * Prepare the initial contents of the indirection table if * RSS is enabled */ for (i = 0; i < sizeof(sc->rss_conf_obj.ind_table); i++) { sc->rss_conf_obj.ind_table[i] = (sc->fp->cl_id + (i % num_eth_queues)); } if (sc->udp_rss) { sc->rss_conf_obj.udp_rss_v4 = sc->rss_conf_obj.udp_rss_v6 = 1; } /* * For 57710 and 57711 SEARCHER configuration (rss_keys) is * per-port, so if explicit configuration is needed, do it only * for a PMF. * * For 57712 and newer it's a per-function configuration. */ return (bxe_config_rss_eth(sc, sc->port.pmf || !CHIP_IS_E1x(sc))); } static int bxe_set_mac_one(struct bxe_softc *sc, uint8_t *mac, struct ecore_vlan_mac_obj *obj, uint8_t set, int mac_type, unsigned long *ramrod_flags) { struct ecore_vlan_mac_ramrod_params ramrod_param; int rc; memset(&ramrod_param, 0, sizeof(ramrod_param)); /* fill in general parameters */ ramrod_param.vlan_mac_obj = obj; ramrod_param.ramrod_flags = *ramrod_flags; /* fill a user request section if needed */ if (!bxe_test_bit(RAMROD_CONT, ramrod_flags)) { memcpy(ramrod_param.user_req.u.mac.mac, mac, ETH_ALEN); bxe_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags); /* Set the command: ADD or DEL */ ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD : ECORE_VLAN_MAC_DEL; } rc = ecore_config_vlan_mac(sc, &ramrod_param); if (rc == ECORE_EXISTS) { BLOGD(sc, DBG_SP, "Failed to schedule ADD operations (EEXIST)\n"); /* do not treat adding same MAC as error */ rc = 0; } else if (rc < 0) { BLOGE(sc, "%s MAC failed (%d)\n", (set ? "Set" : "Delete"), rc); } return (rc); } static int bxe_set_eth_mac(struct bxe_softc *sc, uint8_t set) { unsigned long ramrod_flags = 0; BLOGD(sc, DBG_LOAD, "Adding Ethernet MAC\n"); bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Eth MAC is set on RSS leading client (fp[0]) */ return (bxe_set_mac_one(sc, sc->link_params.mac_addr, &sc->sp_objs->mac_obj, set, ECORE_ETH_MAC, &ramrod_flags)); } #if 0 static void bxe_update_max_mf_config(struct bxe_softc *sc, uint32_t value) { /* load old values */ uint32_t mf_cfg = sc->devinfo.mf_info.mf_config[SC_VN(sc)]; if (value != bxe_extract_max_cfg(sc, mf_cfg)) { /* leave all but MAX value */ mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK; /* set new MAX value */ mf_cfg |= ((value << FUNC_MF_CFG_MAX_BW_SHIFT) & FUNC_MF_CFG_MAX_BW_MASK); bxe_fw_command(sc, DRV_MSG_CODE_SET_MF_BW, mf_cfg); } } #endif static int bxe_get_cur_phy_idx(struct bxe_softc *sc) { uint32_t sel_phy_idx = 0; if (sc->link_params.num_phys <= 1) { return (ELINK_INT_PHY); } if (sc->link_vars.link_up) { sel_phy_idx = ELINK_EXT_PHY1; /* In case link is SERDES, check if the ELINK_EXT_PHY2 is the one */ if ((sc->link_vars.link_status & LINK_STATUS_SERDES_LINK) && (sc->link_params.phy[ELINK_EXT_PHY2].supported & ELINK_SUPPORTED_FIBRE)) sel_phy_idx = ELINK_EXT_PHY2; } else { switch (elink_phy_selection(&sc->link_params)) { case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY: sel_phy_idx = ELINK_EXT_PHY1; break; case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY: case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY: sel_phy_idx = ELINK_EXT_PHY2; break; } } return (sel_phy_idx); } static int bxe_get_link_cfg_idx(struct bxe_softc *sc) { uint32_t sel_phy_idx = bxe_get_cur_phy_idx(sc); /* * The selected activated PHY is always after swapping (in case PHY * swapping is enabled). So when swapping is enabled, we need to reverse * the configuration */ if (sc->link_params.multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED) { if (sel_phy_idx == ELINK_EXT_PHY1) sel_phy_idx = ELINK_EXT_PHY2; else if (sel_phy_idx == ELINK_EXT_PHY2) sel_phy_idx = ELINK_EXT_PHY1; } return (ELINK_LINK_CONFIG_IDX(sel_phy_idx)); } static void bxe_set_requested_fc(struct bxe_softc *sc) { /* * Initialize link parameters structure variables * It is recommended to turn off RX FC for jumbo frames * for better performance */ if (CHIP_IS_E1x(sc) && (sc->mtu > 5000)) { sc->link_params.req_fc_auto_adv = ELINK_FLOW_CTRL_TX; } else { sc->link_params.req_fc_auto_adv = ELINK_FLOW_CTRL_BOTH; } } static void bxe_calc_fc_adv(struct bxe_softc *sc) { uint8_t cfg_idx = bxe_get_link_cfg_idx(sc); switch (sc->link_vars.ieee_fc & MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) { case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_NONE: default: sc->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause | ADVERTISED_Pause); break; case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH: sc->port.advertising[cfg_idx] |= (ADVERTISED_Asym_Pause | ADVERTISED_Pause); break; case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC: sc->port.advertising[cfg_idx] |= ADVERTISED_Asym_Pause; break; } } static uint16_t bxe_get_mf_speed(struct bxe_softc *sc) { uint16_t line_speed = sc->link_vars.line_speed; if (IS_MF(sc)) { uint16_t maxCfg = bxe_extract_max_cfg(sc, sc->devinfo.mf_info.mf_config[SC_VN(sc)]); /* calculate the current MAX line speed limit for the MF devices */ if (IS_MF_SI(sc)) { line_speed = (line_speed * maxCfg) / 100; } else { /* SD mode */ uint16_t vn_max_rate = maxCfg * 100; if (vn_max_rate < line_speed) { line_speed = vn_max_rate; } } } return (line_speed); } static void bxe_fill_report_data(struct bxe_softc *sc, struct bxe_link_report_data *data) { uint16_t line_speed = bxe_get_mf_speed(sc); memset(data, 0, sizeof(*data)); /* fill the report data with the effective line speed */ data->line_speed = line_speed; /* Link is down */ if (!sc->link_vars.link_up || (sc->flags & BXE_MF_FUNC_DIS)) { bxe_set_bit(BXE_LINK_REPORT_LINK_DOWN, &data->link_report_flags); } /* Full DUPLEX */ if (sc->link_vars.duplex == DUPLEX_FULL) { bxe_set_bit(BXE_LINK_REPORT_FULL_DUPLEX, &data->link_report_flags); } /* Rx Flow Control is ON */ if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) { bxe_set_bit(BXE_LINK_REPORT_RX_FC_ON, &data->link_report_flags); } /* Tx Flow Control is ON */ if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) { bxe_set_bit(BXE_LINK_REPORT_TX_FC_ON, &data->link_report_flags); } } /* report link status to OS, should be called under phy_lock */ static void bxe_link_report_locked(struct bxe_softc *sc) { struct bxe_link_report_data cur_data; /* reread mf_cfg */ if (IS_PF(sc) && !CHIP_IS_E1(sc)) { bxe_read_mf_cfg(sc); } /* Read the current link report info */ bxe_fill_report_data(sc, &cur_data); /* Don't report link down or exactly the same link status twice */ if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) || (bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &sc->last_reported_link.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags))) { return; } sc->link_cnt++; /* report new link params and remember the state for the next time */ memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data)); if (bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags)) { if_link_state_change(sc->ifnet, LINK_STATE_DOWN); BLOGI(sc, "NIC Link is Down\n"); } else { const char *duplex; const char *flow; if (bxe_test_and_clear_bit(BXE_LINK_REPORT_FULL_DUPLEX, &cur_data.link_report_flags)) { duplex = "full"; } else { duplex = "half"; } /* * Handle the FC at the end so that only these flags would be * possibly set. This way we may easily check if there is no FC * enabled. */ if (cur_data.link_report_flags) { if (bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - receive & transmit"; } else if (bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && !bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - receive"; } else if (!bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - transmit"; } else { flow = "none"; /* possible? */ } } else { flow = "none"; } if_link_state_change(sc->ifnet, LINK_STATE_UP); BLOGI(sc, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n", cur_data.line_speed, duplex, flow); } } static void bxe_link_report(struct bxe_softc *sc) { BXE_PHY_LOCK(sc); bxe_link_report_locked(sc); BXE_PHY_UNLOCK(sc); } static void bxe_link_status_update(struct bxe_softc *sc) { if (sc->state != BXE_STATE_OPEN) { return; } #if 0 /* read updated dcb configuration */ if (IS_PF(sc)) bxe_dcbx_pmf_update(sc); #endif if (IS_PF(sc) && !CHIP_REV_IS_SLOW(sc)) { elink_link_status_update(&sc->link_params, &sc->link_vars); } else { sc->port.supported[0] |= (ELINK_SUPPORTED_10baseT_Half | ELINK_SUPPORTED_10baseT_Full | ELINK_SUPPORTED_100baseT_Half | ELINK_SUPPORTED_100baseT_Full | ELINK_SUPPORTED_1000baseT_Full | ELINK_SUPPORTED_2500baseX_Full | ELINK_SUPPORTED_10000baseT_Full | ELINK_SUPPORTED_TP | ELINK_SUPPORTED_FIBRE | ELINK_SUPPORTED_Autoneg | ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause); sc->port.advertising[0] = sc->port.supported[0]; sc->link_params.sc = sc; sc->link_params.port = SC_PORT(sc); sc->link_params.req_duplex[0] = DUPLEX_FULL; sc->link_params.req_flow_ctrl[0] = ELINK_FLOW_CTRL_NONE; sc->link_params.req_line_speed[0] = SPEED_10000; sc->link_params.speed_cap_mask[0] = 0x7f0000; sc->link_params.switch_cfg = ELINK_SWITCH_CFG_10G; if (CHIP_REV_IS_FPGA(sc)) { sc->link_vars.mac_type = ELINK_MAC_TYPE_EMAC; sc->link_vars.line_speed = ELINK_SPEED_1000; sc->link_vars.link_status = (LINK_STATUS_LINK_UP | LINK_STATUS_SPEED_AND_DUPLEX_1000TFD); } else { sc->link_vars.mac_type = ELINK_MAC_TYPE_BMAC; sc->link_vars.line_speed = ELINK_SPEED_10000; sc->link_vars.link_status = (LINK_STATUS_LINK_UP | LINK_STATUS_SPEED_AND_DUPLEX_10GTFD); } sc->link_vars.link_up = 1; sc->link_vars.duplex = DUPLEX_FULL; sc->link_vars.flow_ctrl = ELINK_FLOW_CTRL_NONE; if (IS_PF(sc)) { REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + sc->link_params.port*4, 0); bxe_stats_handle(sc, STATS_EVENT_LINK_UP); bxe_link_report(sc); } } if (IS_PF(sc)) { if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } else { bxe_stats_handle(sc, STATS_EVENT_STOP); } bxe_link_report(sc); } else { bxe_link_report(sc); bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } } static int bxe_initial_phy_init(struct bxe_softc *sc, int load_mode) { int rc, cfg_idx = bxe_get_link_cfg_idx(sc); uint16_t req_line_speed = sc->link_params.req_line_speed[cfg_idx]; struct elink_params *lp = &sc->link_params; bxe_set_requested_fc(sc); if (CHIP_REV_IS_SLOW(sc)) { uint32_t bond = CHIP_BOND_ID(sc); uint32_t feat = 0; if (CHIP_IS_E2(sc) && CHIP_IS_MODE_4_PORT(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; } else if (bond & 0x4) { if (CHIP_IS_E3(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC; } else { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; } } else if (bond & 0x8) { if (CHIP_IS_E3(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC; } else { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; } } /* disable EMAC for E3 and above */ if (bond & 0x2) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; } sc->link_params.feature_config_flags |= feat; } BXE_PHY_LOCK(sc); if (load_mode == LOAD_DIAG) { lp->loopback_mode = ELINK_LOOPBACK_XGXS; /* Prefer doing PHY loopback at 10G speed, if possible */ if (lp->req_line_speed[cfg_idx] < ELINK_SPEED_10000) { if (lp->speed_cap_mask[cfg_idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) { lp->req_line_speed[cfg_idx] = ELINK_SPEED_10000; } else { lp->req_line_speed[cfg_idx] = ELINK_SPEED_1000; } } } if (load_mode == LOAD_LOOPBACK_EXT) { lp->loopback_mode = ELINK_LOOPBACK_EXT; } rc = elink_phy_init(&sc->link_params, &sc->link_vars); BXE_PHY_UNLOCK(sc); bxe_calc_fc_adv(sc); if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); bxe_link_report(sc); } if (!CHIP_REV_IS_SLOW(sc)) { bxe_periodic_start(sc); } sc->link_params.req_line_speed[cfg_idx] = req_line_speed; return (rc); } /* must be called under IF_ADDR_LOCK */ static int bxe_init_mcast_macs_list(struct bxe_softc *sc, struct ecore_mcast_ramrod_params *p) { struct ifnet *ifp = sc->ifnet; int mc_count = 0; struct ifmultiaddr *ifma; struct ecore_mcast_list_elem *mc_mac; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) { continue; } mc_count++; } ECORE_LIST_INIT(&p->mcast_list); p->mcast_list_len = 0; if (!mc_count) { return (0); } mc_mac = malloc(sizeof(*mc_mac) * mc_count, M_DEVBUF, (M_NOWAIT | M_ZERO)); if (!mc_mac) { BLOGE(sc, "Failed to allocate temp mcast list\n"); return (-1); } TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) { continue; } mc_mac->mac = (uint8_t *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); ECORE_LIST_PUSH_TAIL(&mc_mac->link, &p->mcast_list); BLOGD(sc, DBG_LOAD, "Setting MCAST %02X:%02X:%02X:%02X:%02X:%02X\n", mc_mac->mac[0], mc_mac->mac[1], mc_mac->mac[2], mc_mac->mac[3], mc_mac->mac[4], mc_mac->mac[5]); mc_mac++; } p->mcast_list_len = mc_count; return (0); } static void bxe_free_mcast_macs_list(struct ecore_mcast_ramrod_params *p) { struct ecore_mcast_list_elem *mc_mac = ECORE_LIST_FIRST_ENTRY(&p->mcast_list, struct ecore_mcast_list_elem, link); if (mc_mac) { /* only a single free as all mc_macs are in the same heap array */ free(mc_mac, M_DEVBUF); } } static int bxe_set_mc_list(struct bxe_softc *sc) { struct ecore_mcast_ramrod_params rparam = { NULL }; int rc = 0; rparam.mcast_obj = &sc->mcast_obj; BXE_MCAST_LOCK(sc); /* first, clear all configured multicast MACs */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to clear multicast configuration: %d\n", rc); return (rc); } /* configure a new MACs list */ rc = bxe_init_mcast_macs_list(sc, &rparam); if (rc) { BLOGE(sc, "Failed to create mcast MACs list (%d)\n", rc); BXE_MCAST_UNLOCK(sc); return (rc); } /* Now add the new MACs */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_ADD); if (rc < 0) { BLOGE(sc, "Failed to set new mcast config (%d)\n", rc); } bxe_free_mcast_macs_list(&rparam); BXE_MCAST_UNLOCK(sc); return (rc); } static int bxe_set_uc_list(struct bxe_softc *sc) { struct ifnet *ifp = sc->ifnet; struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj; struct ifaddr *ifa; unsigned long ramrod_flags = 0; int rc; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_addr_rlock(ifp); #endif /* first schedule a cleanup up of old configuration */ rc = bxe_del_all_macs(sc, mac_obj, ECORE_UC_LIST_MAC, FALSE); if (rc < 0) { BLOGE(sc, "Failed to schedule delete of all ETH MACs (%d)\n", rc); #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif return (rc); } ifa = ifp->if_addr; while (ifa) { if (ifa->ifa_addr->sa_family != AF_LINK) { ifa = TAILQ_NEXT(ifa, ifa_link); continue; } rc = bxe_set_mac_one(sc, (uint8_t *)LLADDR((struct sockaddr_dl *)ifa->ifa_addr), mac_obj, TRUE, ECORE_UC_LIST_MAC, &ramrod_flags); if (rc == -EEXIST) { BLOGD(sc, DBG_SP, "Failed to schedule ADD operations (EEXIST)\n"); /* do not treat adding same MAC as an error */ rc = 0; } else if (rc < 0) { BLOGE(sc, "Failed to schedule ADD operations (%d)\n", rc); #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif return (rc); } ifa = TAILQ_NEXT(ifa, ifa_link); } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif /* Execute the pending commands */ bit_set(&ramrod_flags, RAMROD_CONT); return (bxe_set_mac_one(sc, NULL, mac_obj, FALSE /* don't care */, ECORE_UC_LIST_MAC, &ramrod_flags)); } static void bxe_handle_rx_mode_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; struct ifnet *ifp = sc->ifnet; uint32_t rx_mode = BXE_RX_MODE_NORMAL; BXE_CORE_LOCK(sc); if (sc->state != BXE_STATE_OPEN) { BLOGD(sc, DBG_SP, "state is %x, returning\n", sc->state); BXE_CORE_UNLOCK(sc); return; } BLOGD(sc, DBG_SP, "ifp->if_flags=0x%x\n", ifp->if_flags); if (ifp->if_flags & IFF_PROMISC) { rx_mode = BXE_RX_MODE_PROMISC; } else if ((ifp->if_flags & IFF_ALLMULTI) || ((ifp->if_amcount > BXE_MAX_MULTICAST) && CHIP_IS_E1(sc))) { rx_mode = BXE_RX_MODE_ALLMULTI; } else { if (IS_PF(sc)) { /* some multicasts */ if (bxe_set_mc_list(sc) < 0) { rx_mode = BXE_RX_MODE_ALLMULTI; } if (bxe_set_uc_list(sc) < 0) { rx_mode = BXE_RX_MODE_PROMISC; } } #if 0 else { /* * Configuring mcast to a VF involves sleeping (when we * wait for the PF's response). Since this function is * called from a non sleepable context we must schedule * a work item for this purpose */ bxe_set_bit(BXE_SP_RTNL_VFPF_MCAST, &sc->sp_rtnl_state); schedule_delayed_work(&sc->sp_rtnl_task, 0); } #endif } sc->rx_mode = rx_mode; /* schedule the rx_mode command */ if (bxe_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) { BLOGD(sc, DBG_LOAD, "Scheduled setting rx_mode with ECORE...\n"); bxe_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state); BXE_CORE_UNLOCK(sc); return; } if (IS_PF(sc)) { bxe_set_storm_rx_mode(sc); } #if 0 else { /* * Configuring mcast to a VF involves sleeping (when we * wait for the PF's response). Since this function is * called from a non sleepable context we must schedule * a work item for this purpose */ bxe_set_bit(BXE_SP_RTNL_VFPF_STORM_RX_MODE, &sc->sp_rtnl_state); schedule_delayed_work(&sc->sp_rtnl_task, 0); } #endif BXE_CORE_UNLOCK(sc); } static void bxe_set_rx_mode(struct bxe_softc *sc) { taskqueue_enqueue(sc->rx_mode_tq, &sc->rx_mode_tq_task); } /* update flags in shmem */ static void bxe_update_drv_flags(struct bxe_softc *sc, uint32_t flags, uint32_t set) { uint32_t drv_flags; if (SHMEM2_HAS(sc, drv_flags)) { bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_DRV_FLAGS); drv_flags = SHMEM2_RD(sc, drv_flags); if (set) { SET_FLAGS(drv_flags, flags); } else { RESET_FLAGS(drv_flags, flags); } SHMEM2_WR(sc, drv_flags, drv_flags); BLOGD(sc, DBG_LOAD, "drv_flags 0x%08x\n", drv_flags); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_DRV_FLAGS); } } /* periodic timer callout routine, only runs when the interface is up */ static void bxe_periodic_callout_func(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; int i; if (!BXE_CORE_TRYLOCK(sc)) { /* just bail and try again next time */ if ((sc->state == BXE_STATE_OPEN) && (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_GO)) { /* schedule the next periodic callout */ callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } return; } if ((sc->state != BXE_STATE_OPEN) || (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_STOP)) { BLOGW(sc, "periodic callout exit (state=0x%x)\n", sc->state); BXE_CORE_UNLOCK(sc); return; } /* Check for TX timeouts on any fastpath. */ FOR_EACH_QUEUE(sc, i) { if (bxe_watchdog(sc, &sc->fp[i]) != 0) { /* Ruh-Roh, chip was reset! */ break; } } if (!CHIP_REV_IS_SLOW(sc)) { /* * This barrier is needed to ensure the ordering between the writing * to the sc->port.pmf in the bxe_nic_load() or bxe_pmf_update() and * the reading here. */ mb(); if (sc->port.pmf) { BXE_PHY_LOCK(sc); elink_period_func(&sc->link_params, &sc->link_vars); BXE_PHY_UNLOCK(sc); } } if (IS_PF(sc) && !BXE_NOMCP(sc)) { int mb_idx = SC_FW_MB_IDX(sc); uint32_t drv_pulse; uint32_t mcp_pulse; ++sc->fw_drv_pulse_wr_seq; sc->fw_drv_pulse_wr_seq &= DRV_PULSE_SEQ_MASK; drv_pulse = sc->fw_drv_pulse_wr_seq; bxe_drv_pulse(sc); mcp_pulse = (SHMEM_RD(sc, func_mb[mb_idx].mcp_pulse_mb) & MCP_PULSE_SEQ_MASK); /* * The delta between driver pulse and mcp response should * be 1 (before mcp response) or 0 (after mcp response). */ if ((drv_pulse != mcp_pulse) && (drv_pulse != ((mcp_pulse + 1) & MCP_PULSE_SEQ_MASK))) { /* someone lost a heartbeat... */ BLOGE(sc, "drv_pulse (0x%x) != mcp_pulse (0x%x)\n", drv_pulse, mcp_pulse); } } /* state is BXE_STATE_OPEN */ bxe_stats_handle(sc, STATS_EVENT_UPDATE); #if 0 /* sample VF bulletin board for new posts from PF */ if (IS_VF(sc)) { bxe_sample_bulletin(sc); } #endif BXE_CORE_UNLOCK(sc); if ((sc->state == BXE_STATE_OPEN) && (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_GO)) { /* schedule the next periodic callout */ callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } } static void bxe_periodic_start(struct bxe_softc *sc) { atomic_store_rel_long(&sc->periodic_flags, PERIODIC_GO); callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } static void bxe_periodic_stop(struct bxe_softc *sc) { atomic_store_rel_long(&sc->periodic_flags, PERIODIC_STOP); callout_drain(&sc->periodic_callout); } /* start the controller */ static __noinline int bxe_nic_load(struct bxe_softc *sc, int load_mode) { uint32_t val; int load_code = 0; int i, rc = 0; BXE_CORE_LOCK_ASSERT(sc); BLOGD(sc, DBG_LOAD, "Starting NIC load...\n"); sc->state = BXE_STATE_OPENING_WAITING_LOAD; if (IS_PF(sc)) { /* must be called before memory allocation and HW init */ bxe_ilt_set_info(sc); } sc->last_reported_link_state = LINK_STATE_UNKNOWN; bxe_set_fp_rx_buf_size(sc); if (bxe_alloc_fp_buffers(sc) != 0) { BLOGE(sc, "Failed to allocate fastpath memory\n"); sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (bxe_alloc_mem(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (bxe_alloc_fw_stats_mem(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (IS_PF(sc)) { /* set pf load just before approaching the MCP */ bxe_set_pf_load(sc); /* if MCP exists send load request and analyze response */ if (!BXE_NOMCP(sc)) { /* attempt to load pf */ if (bxe_nic_load_request(sc, &load_code) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error1; } /* what did the MCP say? */ if (bxe_nic_load_analyze_req(sc, load_code) != 0) { bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } } else { BLOGI(sc, "Device has no MCP!\n"); load_code = bxe_nic_load_no_mcp(sc); } /* mark PMF if applicable */ bxe_nic_load_pmf(sc, load_code); /* Init Function state controlling object */ bxe_init_func_obj(sc); /* Initialize HW */ if (bxe_init_hw(sc, load_code) != 0) { BLOGE(sc, "HW init failed\n"); bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } } /* attach interrupts */ if (bxe_interrupt_attach(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } bxe_nic_init(sc, load_code); /* Init per-function objects */ if (IS_PF(sc)) { bxe_init_objs(sc); // XXX bxe_iov_nic_init(sc); /* set AFEX default VLAN tag to an invalid value */ sc->devinfo.mf_info.afex_def_vlan_tag = -1; // XXX bxe_nic_load_afex_dcc(sc, load_code); sc->state = BXE_STATE_OPENING_WAITING_PORT; rc = bxe_func_start(sc); if (rc) { BLOGE(sc, "Function start failed!\n"); bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } /* send LOAD_DONE command to MCP */ if (!BXE_NOMCP(sc)) { load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); sc->state = BXE_STATE_ERROR; rc = ENXIO; goto bxe_nic_load_error3; } } rc = bxe_setup_leading(sc); if (rc) { BLOGE(sc, "Setup leading failed!\n"); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } FOR_EACH_NONDEFAULT_ETH_QUEUE(sc, i) { rc = bxe_setup_queue(sc, &sc->fp[i], FALSE); if (rc) { BLOGE(sc, "Queue(%d) setup failed\n", i); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } rc = bxe_init_rss_pf(sc); if (rc) { BLOGE(sc, "PF RSS init failed\n"); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } /* XXX VF */ #if 0 else { /* VF */ FOR_EACH_ETH_QUEUE(sc, i) { rc = bxe_vfpf_setup_q(sc, i); if (rc) { BLOGE(sc, "Queue(%d) setup failed\n", i); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } } #endif /* now when Clients are configured we are ready to work */ sc->state = BXE_STATE_OPEN; /* Configure a ucast MAC */ if (IS_PF(sc)) { rc = bxe_set_eth_mac(sc, TRUE); } #if 0 else { /* IS_VF(sc) */ rc = bxe_vfpf_set_mac(sc); } #endif if (rc) { BLOGE(sc, "Setting Ethernet MAC failed\n"); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } #if 0 if (IS_PF(sc) && sc->pending_max) { /* for AFEX */ bxe_update_max_mf_config(sc, sc->pending_max); sc->pending_max = 0; } #endif if (sc->port.pmf) { rc = bxe_initial_phy_init(sc, /* XXX load_mode */LOAD_OPEN); if (rc) { sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_BOOT_FROM_SAN; /* start fast path */ /* Initialize Rx filter */ bxe_set_rx_mode(sc); /* start the Tx */ switch (/* XXX load_mode */LOAD_OPEN) { case LOAD_NORMAL: case LOAD_OPEN: break; case LOAD_DIAG: case LOAD_LOOPBACK_EXT: sc->state = BXE_STATE_DIAG; break; default: break; } if (sc->port.pmf) { bxe_update_drv_flags(sc, 1 << DRV_FLAGS_PORT_MASK, 0); } else { bxe_link_status_update(sc); } /* start the periodic timer callout */ bxe_periodic_start(sc); if (IS_PF(sc) && SHMEM2_HAS(sc, drv_capabilities_flag)) { /* mark driver is loaded in shmem2 */ val = SHMEM2_RD(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)]); SHMEM2_WR(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)], (val | DRV_FLAGS_CAPABILITIES_LOADED_SUPPORTED | DRV_FLAGS_CAPABILITIES_LOADED_L2)); } /* wait for all pending SP commands to complete */ if (IS_PF(sc) && !bxe_wait_sp_comp(sc, ~0x0UL)) { BLOGE(sc, "Timeout waiting for all SPs to complete!\n"); bxe_periodic_stop(sc); bxe_nic_unload(sc, UNLOAD_CLOSE, FALSE); return (ENXIO); } #if 0 /* If PMF - send ADMIN DCBX msg to MFW to initiate DCBX FSM */ if (sc->port.pmf && (sc->state != BXE_STATE_DIAG)) { bxe_dcbx_init(sc, FALSE); } #endif /* Tell the stack the driver is running! */ sc->ifnet->if_drv_flags = IFF_DRV_RUNNING; BLOGD(sc, DBG_LOAD, "NIC successfully loaded\n"); return (0); bxe_nic_load_error3: if (IS_PF(sc)) { bxe_int_disable_sync(sc, 1); /* clean out queued objects */ bxe_squeeze_objects(sc); } bxe_interrupt_detach(sc); bxe_nic_load_error2: if (IS_PF(sc) && !BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, 0); } sc->port.pmf = 0; bxe_nic_load_error1: /* clear pf_load status, as it was already set */ if (IS_PF(sc)) { bxe_clear_pf_load(sc); } bxe_nic_load_error0: bxe_free_fw_stats_mem(sc); bxe_free_fp_buffers(sc); bxe_free_mem(sc); return (rc); } static int bxe_init_locked(struct bxe_softc *sc) { int other_engine = SC_PATH(sc) ? 0 : 1; uint8_t other_load_status, load_status; uint8_t global = FALSE; int rc; BXE_CORE_LOCK_ASSERT(sc); /* check if the driver is already running */ if (sc->ifnet->if_drv_flags & IFF_DRV_RUNNING) { BLOGD(sc, DBG_LOAD, "Init called while driver is running!\n"); return (0); } bxe_set_power_state(sc, PCI_PM_D0); /* * If parity occurred during the unload, then attentions and/or * RECOVERY_IN_PROGRES may still be set. If so we want the first function * loaded on the current engine to complete the recovery. Parity recovery * is only relevant for PF driver. */ if (IS_PF(sc)) { other_load_status = bxe_get_load_status(sc, other_engine); load_status = bxe_get_load_status(sc, SC_PATH(sc)); if (!bxe_reset_is_done(sc, SC_PATH(sc)) || bxe_chk_parity_attn(sc, &global, TRUE)) { do { /* * If there are attentions and they are in global blocks, set * the GLOBAL_RESET bit regardless whether it will be this * function that will complete the recovery or not. */ if (global) { bxe_set_reset_global(sc); } /* * Only the first function on the current engine should try * to recover in open. In case of attentions in global blocks * only the first in the chip should try to recover. */ if ((!load_status && (!global || !other_load_status)) && bxe_trylock_leader_lock(sc) && !bxe_leader_reset(sc)) { BLOGI(sc, "Recovered during init\n"); break; } /* recovery has failed... */ bxe_set_power_state(sc, PCI_PM_D3hot); sc->recovery_state = BXE_RECOVERY_FAILED; BLOGE(sc, "Recovery flow hasn't properly " "completed yet, try again later. " "If you still see this message after a " "few retries then power cycle is required.\n"); rc = ENXIO; goto bxe_init_locked_done; } while (0); } } sc->recovery_state = BXE_RECOVERY_DONE; rc = bxe_nic_load(sc, LOAD_OPEN); bxe_init_locked_done: if (rc) { /* Tell the stack the driver is NOT running! */ BLOGE(sc, "Initialization failed, " "stack notified driver is NOT running!\n"); sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING; } return (rc); } static int bxe_stop_locked(struct bxe_softc *sc) { BXE_CORE_LOCK_ASSERT(sc); return (bxe_nic_unload(sc, UNLOAD_NORMAL, TRUE)); } /* * Handles controller initialization when called from an unlocked routine. * ifconfig calls this function. * * Returns: * void */ static void bxe_init(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; BXE_CORE_LOCK(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } static int bxe_init_ifnet(struct bxe_softc *sc) { struct ifnet *ifp; /* ifconfig entrypoint for media type/status reporting */ ifmedia_init(&sc->ifmedia, IFM_IMASK, bxe_ifmedia_update, bxe_ifmedia_status); /* set the default interface values */ ifmedia_add(&sc->ifmedia, (IFM_ETHER | IFM_FDX | sc->media), 0, NULL); ifmedia_add(&sc->ifmedia, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&sc->ifmedia, (IFM_ETHER | IFM_AUTO)); sc->ifmedia.ifm_media = sc->ifmedia.ifm_cur->ifm_media; /* XXX ? */ /* allocate the ifnet structure */ if ((ifp = if_alloc(IFT_ETHER)) == NULL) { BLOGE(sc, "Interface allocation failed!\n"); return (ENXIO); } ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ifp->if_ioctl = bxe_ioctl; ifp->if_start = bxe_tx_start; #if __FreeBSD_version >= 800000 ifp->if_transmit = bxe_tx_mq_start; ifp->if_qflush = bxe_mq_flush; #endif #ifdef FreeBSD8_0 ifp->if_timer = 0; #endif ifp->if_init = bxe_init; ifp->if_mtu = sc->mtu; ifp->if_hwassist = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6); ifp->if_capabilities = #if __FreeBSD_version < 700000 (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_LRO); #else (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWCSUM | IFCAP_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_LRO | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_WOL_MAGIC); #endif ifp->if_capenable = ifp->if_capabilities; ifp->if_capenable &= ~IFCAP_WOL_MAGIC; /* XXX not yet... */ -#if __FreeBSD_version < 1000025 - ifp->if_baudrate = 1000000000; -#else - if_initbaudrate(ifp, IF_Gbps(10)); -#endif + ifp->if_baudrate = IF_Gbps(10); ifp->if_snd.ifq_drv_maxlen = sc->tx_ring_size; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); sc->ifnet = ifp; /* attach to the Ethernet interface list */ ether_ifattach(ifp, sc->link_params.mac_addr); return (0); } static void bxe_deallocate_bars(struct bxe_softc *sc) { int i; for (i = 0; i < MAX_BARS; i++) { if (sc->bar[i].resource != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->bar[i].rid, sc->bar[i].resource); BLOGD(sc, DBG_LOAD, "Released PCI BAR%d [%02x] memory\n", i, PCIR_BAR(i)); } } } static int bxe_allocate_bars(struct bxe_softc *sc) { u_int flags; int i; memset(sc->bar, 0, sizeof(sc->bar)); for (i = 0; i < MAX_BARS; i++) { /* memory resources reside at BARs 0, 2, 4 */ /* Run `pciconf -lb` to see mappings */ if ((i != 0) && (i != 2) && (i != 4)) { continue; } sc->bar[i].rid = PCIR_BAR(i); flags = RF_ACTIVE; if (i == 0) { flags |= RF_SHAREABLE; } if ((sc->bar[i].resource = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->bar[i].rid, flags)) == NULL) { #if 0 /* BAR4 doesn't exist for E1 */ BLOGE(sc, "PCI BAR%d [%02x] memory allocation failed\n", i, PCIR_BAR(i)); #endif return (0); } sc->bar[i].tag = rman_get_bustag(sc->bar[i].resource); sc->bar[i].handle = rman_get_bushandle(sc->bar[i].resource); sc->bar[i].kva = (vm_offset_t)rman_get_virtual(sc->bar[i].resource); BLOGI(sc, "PCI BAR%d [%02x] memory allocated: %p-%p (%ld) -> %p\n", i, PCIR_BAR(i), (void *)rman_get_start(sc->bar[i].resource), (void *)rman_get_end(sc->bar[i].resource), rman_get_size(sc->bar[i].resource), (void *)sc->bar[i].kva); } return (0); } static void bxe_get_function_num(struct bxe_softc *sc) { uint32_t val = 0; /* * Read the ME register to get the function number. The ME register * holds the relative-function number and absolute-function number. The * absolute-function number appears only in E2 and above. Before that * these bits always contained zero, therefore we cannot blindly use them. */ val = REG_RD(sc, BAR_ME_REGISTER); sc->pfunc_rel = (uint8_t)((val & ME_REG_PF_NUM) >> ME_REG_PF_NUM_SHIFT); sc->path_id = (uint8_t)((val & ME_REG_ABS_PF_NUM) >> ME_REG_ABS_PF_NUM_SHIFT) & 1; if (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) { sc->pfunc_abs = ((sc->pfunc_rel << 1) | sc->path_id); } else { sc->pfunc_abs = (sc->pfunc_rel | sc->path_id); } BLOGD(sc, DBG_LOAD, "Relative function %d, Absolute function %d, Path %d\n", sc->pfunc_rel, sc->pfunc_abs, sc->path_id); } static uint32_t bxe_get_shmem_mf_cfg_base(struct bxe_softc *sc) { uint32_t shmem2_size; uint32_t offset; uint32_t mf_cfg_offset_value; /* Non 57712 */ offset = (SHMEM_RD(sc, func_mb) + (MAX_FUNC_NUM * sizeof(struct drv_func_mb))); /* 57712 plus */ if (sc->devinfo.shmem2_base != 0) { shmem2_size = SHMEM2_RD(sc, size); if (shmem2_size > offsetof(struct shmem2_region, mf_cfg_addr)) { mf_cfg_offset_value = SHMEM2_RD(sc, mf_cfg_addr); if (SHMEM_MF_CFG_ADDR_NONE != mf_cfg_offset_value) { offset = mf_cfg_offset_value; } } } return (offset); } static uint32_t bxe_pcie_capability_read(struct bxe_softc *sc, int reg, int width) { int pcie_reg; /* ensure PCIe capability is enabled */ if (pci_find_cap(sc->dev, PCIY_EXPRESS, &pcie_reg) == 0) { if (pcie_reg != 0) { BLOGD(sc, DBG_LOAD, "PCIe capability at 0x%04x\n", pcie_reg); return (pci_read_config(sc->dev, (pcie_reg + reg), width)); } } BLOGE(sc, "PCIe capability NOT FOUND!!!\n"); return (0); } static uint8_t bxe_is_pcie_pending(struct bxe_softc *sc) { return (bxe_pcie_capability_read(sc, PCIR_EXPRESS_DEVICE_STA, 2) & PCIM_EXP_STA_TRANSACTION_PND); } /* * Walk the PCI capabiites list for the device to find what features are * supported. These capabilites may be enabled/disabled by firmware so it's * best to walk the list rather than make assumptions. */ static void bxe_probe_pci_caps(struct bxe_softc *sc) { uint16_t link_status; int reg; /* check if PCI Power Management is enabled */ if (pci_find_cap(sc->dev, PCIY_PMG, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found PM capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_PM_CAPABLE_FLAG; sc->devinfo.pcie_pm_cap_reg = (uint16_t)reg; } } link_status = bxe_pcie_capability_read(sc, PCIR_EXPRESS_LINK_STA, 2); /* handle PCIe 2.0 workarounds for 57710 */ if (CHIP_IS_E1(sc)) { /* workaround for 57710 errata E4_57710_27462 */ sc->devinfo.pcie_link_speed = (REG_RD(sc, 0x3d04) & (1 << 24)) ? 2 : 1; /* workaround for 57710 errata E4_57710_27488 */ sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4); if (sc->devinfo.pcie_link_speed > 1) { sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4) >> 1; } } else { sc->devinfo.pcie_link_speed = (link_status & PCIM_LINK_STA_SPEED); sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4); } BLOGD(sc, DBG_LOAD, "PCIe link speed=%d width=%d\n", sc->devinfo.pcie_link_speed, sc->devinfo.pcie_link_width); sc->devinfo.pcie_cap_flags |= BXE_PCIE_CAPABLE_FLAG; sc->devinfo.pcie_pcie_cap_reg = (uint16_t)reg; /* check if MSI capability is enabled */ if (pci_find_cap(sc->dev, PCIY_MSI, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found MSI capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_MSI_CAPABLE_FLAG; sc->devinfo.pcie_msi_cap_reg = (uint16_t)reg; } } /* check if MSI-X capability is enabled */ if (pci_find_cap(sc->dev, PCIY_MSIX, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found MSI-X capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_MSIX_CAPABLE_FLAG; sc->devinfo.pcie_msix_cap_reg = (uint16_t)reg; } } } static int bxe_get_shmem_mf_cfg_info_sd(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val; /* get the outer vlan if we're in switch-dependent mode */ val = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); mf_info->ext_id = (uint16_t)val; mf_info->multi_vnics_mode = 1; if (!VALID_OVLAN(mf_info->ext_id)) { BLOGE(sc, "Invalid VLAN (%d)\n", mf_info->ext_id); return (1); } /* get the capabilities */ if ((mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_PROTOCOL_MASK) == FUNC_MF_CFG_PROTOCOL_ISCSI) { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_ISCSI; } else if ((mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_PROTOCOL_MASK) == FUNC_MF_CFG_PROTOCOL_FCOE) { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_FCOE; } else { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_ETHERNET; } mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static uint32_t bxe_get_shmem_ext_proto_support_flags(struct bxe_softc *sc) { uint32_t retval = 0; uint32_t val; val = MFCFG_RD(sc, func_ext_config[SC_ABS_FUNC(sc)].func_cfg); if (val & MACP_FUNC_CFG_FLAGS_ENABLED) { if (val & MACP_FUNC_CFG_FLAGS_ETHERNET) { retval |= MF_PROTO_SUPPORT_ETHERNET; } if (val & MACP_FUNC_CFG_FLAGS_ISCSI_OFFLOAD) { retval |= MF_PROTO_SUPPORT_ISCSI; } if (val & MACP_FUNC_CFG_FLAGS_FCOE_OFFLOAD) { retval |= MF_PROTO_SUPPORT_FCOE; } } return (retval); } static int bxe_get_shmem_mf_cfg_info_si(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val; /* * There is no outer vlan if we're in switch-independent mode. * If the mac is valid then assume multi-function. */ val = MFCFG_RD(sc, func_ext_config[SC_ABS_FUNC(sc)].func_cfg); mf_info->multi_vnics_mode = ((val & MACP_FUNC_CFG_FLAGS_MASK) != 0); mf_info->mf_protos_supported = bxe_get_shmem_ext_proto_support_flags(sc); mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static int bxe_get_shmem_mf_cfg_info_niv(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t e1hov_tag; uint32_t func_config; uint32_t niv_config; mf_info->multi_vnics_mode = 1; e1hov_tag = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); func_config = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); niv_config = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].afex_config); mf_info->ext_id = (uint16_t)((e1hov_tag & FUNC_MF_CFG_E1HOV_TAG_MASK) >> FUNC_MF_CFG_E1HOV_TAG_SHIFT); mf_info->default_vlan = (uint16_t)((e1hov_tag & FUNC_MF_CFG_AFEX_VLAN_MASK) >> FUNC_MF_CFG_AFEX_VLAN_SHIFT); mf_info->niv_allowed_priorities = (uint8_t)((niv_config & FUNC_MF_CFG_AFEX_COS_FILTER_MASK) >> FUNC_MF_CFG_AFEX_COS_FILTER_SHIFT); mf_info->niv_default_cos = (uint8_t)((func_config & FUNC_MF_CFG_TRANSMIT_PRIORITY_MASK) >> FUNC_MF_CFG_TRANSMIT_PRIORITY_SHIFT); mf_info->afex_vlan_mode = ((niv_config & FUNC_MF_CFG_AFEX_VLAN_MODE_MASK) >> FUNC_MF_CFG_AFEX_VLAN_MODE_SHIFT); mf_info->niv_mba_enabled = ((niv_config & FUNC_MF_CFG_AFEX_MBA_ENABLED_MASK) >> FUNC_MF_CFG_AFEX_MBA_ENABLED_SHIFT); mf_info->mf_protos_supported = bxe_get_shmem_ext_proto_support_flags(sc); mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static int bxe_check_valid_mf_cfg(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t mf_cfg1; uint32_t mf_cfg2; uint32_t ovlan1; uint32_t ovlan2; uint8_t i, j; BLOGD(sc, DBG_LOAD, "MF config parameters for function %d\n", SC_PORT(sc)); BLOGD(sc, DBG_LOAD, "\tmf_config=0x%x\n", mf_info->mf_config[SC_VN(sc)]); BLOGD(sc, DBG_LOAD, "\tmulti_vnics_mode=%d\n", mf_info->multi_vnics_mode); BLOGD(sc, DBG_LOAD, "\tvnics_per_port=%d\n", mf_info->vnics_per_port); BLOGD(sc, DBG_LOAD, "\tovlan/vifid=%d\n", mf_info->ext_id); BLOGD(sc, DBG_LOAD, "\tmin_bw=%d/%d/%d/%d\n", mf_info->min_bw[0], mf_info->min_bw[1], mf_info->min_bw[2], mf_info->min_bw[3]); BLOGD(sc, DBG_LOAD, "\tmax_bw=%d/%d/%d/%d\n", mf_info->max_bw[0], mf_info->max_bw[1], mf_info->max_bw[2], mf_info->max_bw[3]); BLOGD(sc, DBG_LOAD, "\tmac_addr: %s\n", sc->mac_addr_str); /* various MF mode sanity checks... */ if (mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_HIDE) { BLOGE(sc, "Enumerated function %d is marked as hidden\n", SC_PORT(sc)); return (1); } if ((mf_info->vnics_per_port > 1) && !mf_info->multi_vnics_mode) { BLOGE(sc, "vnics_per_port=%d multi_vnics_mode=%d\n", mf_info->vnics_per_port, mf_info->multi_vnics_mode); return (1); } if (mf_info->mf_mode == MULTI_FUNCTION_SD) { /* vnic id > 0 must have valid ovlan in switch-dependent mode */ if ((SC_VN(sc) > 0) && !VALID_OVLAN(OVLAN(sc))) { BLOGE(sc, "mf_mode=SD vnic_id=%d ovlan=%d\n", SC_VN(sc), OVLAN(sc)); return (1); } if (!VALID_OVLAN(OVLAN(sc)) && mf_info->multi_vnics_mode) { BLOGE(sc, "mf_mode=SD multi_vnics_mode=%d ovlan=%d\n", mf_info->multi_vnics_mode, OVLAN(sc)); return (1); } /* * Verify all functions are either MF or SF mode. If MF, make sure * sure that all non-hidden functions have a valid ovlan. If SF, * make sure that all non-hidden functions have an invalid ovlan. */ FOREACH_ABS_FUNC_IN_PORT(sc, i) { mf_cfg1 = MFCFG_RD(sc, func_mf_config[i].config); ovlan1 = MFCFG_RD(sc, func_mf_config[i].e1hov_tag); if (!(mf_cfg1 & FUNC_MF_CFG_FUNC_HIDE) && (((mf_info->multi_vnics_mode) && !VALID_OVLAN(ovlan1)) || ((!mf_info->multi_vnics_mode) && VALID_OVLAN(ovlan1)))) { BLOGE(sc, "mf_mode=SD function %d MF config " "mismatch, multi_vnics_mode=%d ovlan=%d\n", i, mf_info->multi_vnics_mode, ovlan1); return (1); } } /* Verify all funcs on the same port each have a different ovlan. */ FOREACH_ABS_FUNC_IN_PORT(sc, i) { mf_cfg1 = MFCFG_RD(sc, func_mf_config[i].config); ovlan1 = MFCFG_RD(sc, func_mf_config[i].e1hov_tag); /* iterate from the next function on the port to the max func */ for (j = i + 2; j < MAX_FUNC_NUM; j += 2) { mf_cfg2 = MFCFG_RD(sc, func_mf_config[j].config); ovlan2 = MFCFG_RD(sc, func_mf_config[j].e1hov_tag); if (!(mf_cfg1 & FUNC_MF_CFG_FUNC_HIDE) && VALID_OVLAN(ovlan1) && !(mf_cfg2 & FUNC_MF_CFG_FUNC_HIDE) && VALID_OVLAN(ovlan2) && (ovlan1 == ovlan2)) { BLOGE(sc, "mf_mode=SD functions %d and %d " "have the same ovlan (%d)\n", i, j, ovlan1); return (1); } } } } /* MULTI_FUNCTION_SD */ return (0); } static int bxe_get_mf_cfg_info(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val, mac_upper; uint8_t i, vnic; /* initialize mf_info defaults */ mf_info->vnics_per_port = 1; mf_info->multi_vnics_mode = FALSE; mf_info->path_has_ovlan = FALSE; mf_info->mf_mode = SINGLE_FUNCTION; if (!CHIP_IS_MF_CAP(sc)) { return (0); } if (sc->devinfo.mf_cfg_base == SHMEM_MF_CFG_ADDR_NONE) { BLOGE(sc, "Invalid mf_cfg_base!\n"); return (1); } /* get the MF mode (switch dependent / independent / single-function) */ val = SHMEM_RD(sc, dev_info.shared_feature_config.config); switch (val & SHARED_FEAT_CFG_FORCE_SF_MODE_MASK) { case SHARED_FEAT_CFG_FORCE_SF_MODE_SWITCH_INDEPT: mac_upper = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); /* check for legal upper mac bytes */ if (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT) { mf_info->mf_mode = MULTI_FUNCTION_SI; } else { BLOGE(sc, "Invalid config for Switch Independent mode\n"); } break; case SHARED_FEAT_CFG_FORCE_SF_MODE_MF_ALLOWED: case SHARED_FEAT_CFG_FORCE_SF_MODE_SPIO4: /* get outer vlan configuration */ val = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); if ((val & FUNC_MF_CFG_E1HOV_TAG_MASK) != FUNC_MF_CFG_E1HOV_TAG_DEFAULT) { mf_info->mf_mode = MULTI_FUNCTION_SD; } else { BLOGE(sc, "Invalid config for Switch Dependent mode\n"); } break; case SHARED_FEAT_CFG_FORCE_SF_MODE_FORCED_SF: /* not in MF mode, vnics_per_port=1 and multi_vnics_mode=FALSE */ return (0); case SHARED_FEAT_CFG_FORCE_SF_MODE_AFEX_MODE: /* * Mark MF mode as NIV if MCP version includes NPAR-SD support * and the MAC address is valid. */ mac_upper = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); if ((SHMEM2_HAS(sc, afex_driver_support)) && (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT)) { mf_info->mf_mode = MULTI_FUNCTION_AFEX; } else { BLOGE(sc, "Invalid config for AFEX mode\n"); } break; default: BLOGE(sc, "Unknown MF mode (0x%08x)\n", (val & SHARED_FEAT_CFG_FORCE_SF_MODE_MASK)); return (1); } /* set path mf_mode (which could be different than function mf_mode) */ if (mf_info->mf_mode == MULTI_FUNCTION_SD) { mf_info->path_has_ovlan = TRUE; } else if (mf_info->mf_mode == SINGLE_FUNCTION) { /* * Decide on path multi vnics mode. If we're not in MF mode and in * 4-port mode, this is good enough to check vnic-0 of the other port * on the same path */ if (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) { uint8_t other_port = !(PORT_ID(sc) & 1); uint8_t abs_func_other_port = (SC_PATH(sc) + (2 * other_port)); val = MFCFG_RD(sc, func_mf_config[abs_func_other_port].e1hov_tag); mf_info->path_has_ovlan = VALID_OVLAN((uint16_t)val) ? 1 : 0; } } if (mf_info->mf_mode == SINGLE_FUNCTION) { /* invalid MF config */ if (SC_VN(sc) >= 1) { BLOGE(sc, "VNIC ID >= 1 in SF mode\n"); return (1); } return (0); } /* get the MF configuration */ mf_info->mf_config[SC_VN(sc)] = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); switch(mf_info->mf_mode) { case MULTI_FUNCTION_SD: bxe_get_shmem_mf_cfg_info_sd(sc); break; case MULTI_FUNCTION_SI: bxe_get_shmem_mf_cfg_info_si(sc); break; case MULTI_FUNCTION_AFEX: bxe_get_shmem_mf_cfg_info_niv(sc); break; default: BLOGE(sc, "Get MF config failed (mf_mode=0x%08x)\n", mf_info->mf_mode); return (1); } /* get the congestion management parameters */ vnic = 0; FOREACH_ABS_FUNC_IN_PORT(sc, i) { /* get min/max bw */ val = MFCFG_RD(sc, func_mf_config[i].config); mf_info->min_bw[vnic] = ((val & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT); mf_info->max_bw[vnic] = ((val & FUNC_MF_CFG_MAX_BW_MASK) >> FUNC_MF_CFG_MAX_BW_SHIFT); vnic++; } return (bxe_check_valid_mf_cfg(sc)); } static int bxe_get_shmem_info(struct bxe_softc *sc) { int port; uint32_t mac_hi, mac_lo, val; port = SC_PORT(sc); mac_hi = mac_lo = 0; sc->link_params.sc = sc; sc->link_params.port = port; /* get the hardware config info */ sc->devinfo.hw_config = SHMEM_RD(sc, dev_info.shared_hw_config.config); sc->devinfo.hw_config2 = SHMEM_RD(sc, dev_info.shared_hw_config.config2); sc->link_params.hw_led_mode = ((sc->devinfo.hw_config & SHARED_HW_CFG_LED_MODE_MASK) >> SHARED_HW_CFG_LED_MODE_SHIFT); /* get the port feature config */ sc->port.config = SHMEM_RD(sc, dev_info.port_feature_config[port].config), /* get the link params */ sc->link_params.speed_cap_mask[0] = SHMEM_RD(sc, dev_info.port_hw_config[port].speed_capability_mask); sc->link_params.speed_cap_mask[1] = SHMEM_RD(sc, dev_info.port_hw_config[port].speed_capability_mask2); /* get the lane config */ sc->link_params.lane_config = SHMEM_RD(sc, dev_info.port_hw_config[port].lane_config); /* get the link config */ val = SHMEM_RD(sc, dev_info.port_feature_config[port].link_config); sc->port.link_config[ELINK_INT_PHY] = val; sc->link_params.switch_cfg = (val & PORT_FEATURE_CONNECTED_SWITCH_MASK); sc->port.link_config[ELINK_EXT_PHY1] = SHMEM_RD(sc, dev_info.port_feature_config[port].link_config2); /* get the override preemphasis flag and enable it or turn it off */ val = SHMEM_RD(sc, dev_info.shared_feature_config.config); if (val & SHARED_FEAT_CFG_OVERRIDE_PREEMPHASIS_CFG_ENABLED) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED; } else { sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED; } /* get the initial value of the link params */ sc->link_params.multi_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].multi_phy_config); /* get external phy info */ sc->port.ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); /* get the multifunction configuration */ bxe_get_mf_cfg_info(sc); /* get the mac address */ if (IS_MF(sc)) { mac_hi = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); mac_lo = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_lower); } else { mac_hi = SHMEM_RD(sc, dev_info.port_hw_config[port].mac_upper); mac_lo = SHMEM_RD(sc, dev_info.port_hw_config[port].mac_lower); } if ((mac_lo == 0) && (mac_hi == 0)) { *sc->mac_addr_str = 0; BLOGE(sc, "No Ethernet address programmed!\n"); } else { sc->link_params.mac_addr[0] = (uint8_t)(mac_hi >> 8); sc->link_params.mac_addr[1] = (uint8_t)(mac_hi); sc->link_params.mac_addr[2] = (uint8_t)(mac_lo >> 24); sc->link_params.mac_addr[3] = (uint8_t)(mac_lo >> 16); sc->link_params.mac_addr[4] = (uint8_t)(mac_lo >> 8); sc->link_params.mac_addr[5] = (uint8_t)(mac_lo); snprintf(sc->mac_addr_str, sizeof(sc->mac_addr_str), "%02x:%02x:%02x:%02x:%02x:%02x", sc->link_params.mac_addr[0], sc->link_params.mac_addr[1], sc->link_params.mac_addr[2], sc->link_params.mac_addr[3], sc->link_params.mac_addr[4], sc->link_params.mac_addr[5]); BLOGD(sc, DBG_LOAD, "Ethernet address: %s\n", sc->mac_addr_str); } #if 0 if (!IS_MF(sc) && ((sc->port.config & PORT_FEAT_CFG_STORAGE_PERSONALITY_MASK) == PORT_FEAT_CFG_STORAGE_PERSONALITY_FCOE)) { sc->flags |= BXE_NO_ISCSI; } if (!IS_MF(sc) && ((sc->port.config & PORT_FEAT_CFG_STORAGE_PERSONALITY_MASK) == PORT_FEAT_CFG_STORAGE_PERSONALITY_ISCSI)) { sc->flags |= BXE_NO_FCOE_FLAG; } #endif return (0); } static void bxe_get_tunable_params(struct bxe_softc *sc) { /* sanity checks */ if ((bxe_interrupt_mode != INTR_MODE_INTX) && (bxe_interrupt_mode != INTR_MODE_MSI) && (bxe_interrupt_mode != INTR_MODE_MSIX)) { BLOGW(sc, "invalid interrupt_mode value (%d)\n", bxe_interrupt_mode); bxe_interrupt_mode = INTR_MODE_MSIX; } if ((bxe_queue_count < 0) || (bxe_queue_count > MAX_RSS_CHAINS)) { BLOGW(sc, "invalid queue_count value (%d)\n", bxe_queue_count); bxe_queue_count = 0; } if ((bxe_max_rx_bufs < 1) || (bxe_max_rx_bufs > RX_BD_USABLE)) { if (bxe_max_rx_bufs == 0) { bxe_max_rx_bufs = RX_BD_USABLE; } else { BLOGW(sc, "invalid max_rx_bufs (%d)\n", bxe_max_rx_bufs); bxe_max_rx_bufs = 2048; } } if ((bxe_hc_rx_ticks < 1) || (bxe_hc_rx_ticks > 100)) { BLOGW(sc, "invalid hc_rx_ticks (%d)\n", bxe_hc_rx_ticks); bxe_hc_rx_ticks = 25; } if ((bxe_hc_tx_ticks < 1) || (bxe_hc_tx_ticks > 100)) { BLOGW(sc, "invalid hc_tx_ticks (%d)\n", bxe_hc_tx_ticks); bxe_hc_tx_ticks = 50; } if (bxe_max_aggregation_size == 0) { bxe_max_aggregation_size = TPA_AGG_SIZE; } if (bxe_max_aggregation_size > 0xffff) { BLOGW(sc, "invalid max_aggregation_size (%d)\n", bxe_max_aggregation_size); bxe_max_aggregation_size = TPA_AGG_SIZE; } if ((bxe_mrrs < -1) || (bxe_mrrs > 3)) { BLOGW(sc, "invalid mrrs (%d)\n", bxe_mrrs); bxe_mrrs = -1; } if ((bxe_autogreeen < 0) || (bxe_autogreeen > 2)) { BLOGW(sc, "invalid autogreeen (%d)\n", bxe_autogreeen); bxe_autogreeen = 0; } if ((bxe_udp_rss < 0) || (bxe_udp_rss > 1)) { BLOGW(sc, "invalid udp_rss (%d)\n", bxe_udp_rss); bxe_udp_rss = 0; } /* pull in user settings */ sc->interrupt_mode = bxe_interrupt_mode; sc->max_rx_bufs = bxe_max_rx_bufs; sc->hc_rx_ticks = bxe_hc_rx_ticks; sc->hc_tx_ticks = bxe_hc_tx_ticks; sc->max_aggregation_size = bxe_max_aggregation_size; sc->mrrs = bxe_mrrs; sc->autogreeen = bxe_autogreeen; sc->udp_rss = bxe_udp_rss; if (bxe_interrupt_mode == INTR_MODE_INTX) { sc->num_queues = 1; } else { /* INTR_MODE_MSI or INTR_MODE_MSIX */ sc->num_queues = min((bxe_queue_count ? bxe_queue_count : mp_ncpus), MAX_RSS_CHAINS); if (sc->num_queues > mp_ncpus) { sc->num_queues = mp_ncpus; } } BLOGD(sc, DBG_LOAD, "User Config: " "debug=0x%lx " "interrupt_mode=%d " "queue_count=%d " "hc_rx_ticks=%d " "hc_tx_ticks=%d " "rx_budget=%d " "max_aggregation_size=%d " "mrrs=%d " "autogreeen=%d " "udp_rss=%d\n", bxe_debug, sc->interrupt_mode, sc->num_queues, sc->hc_rx_ticks, sc->hc_tx_ticks, bxe_rx_budget, sc->max_aggregation_size, sc->mrrs, sc->autogreeen, sc->udp_rss); } static void bxe_media_detect(struct bxe_softc *sc) { uint32_t phy_idx = bxe_get_cur_phy_idx(sc); switch (sc->link_params.phy[phy_idx].media_type) { case ELINK_ETH_PHY_SFPP_10G_FIBER: case ELINK_ETH_PHY_XFP_FIBER: BLOGI(sc, "Found 10Gb Fiber media.\n"); sc->media = IFM_10G_SR; break; case ELINK_ETH_PHY_SFP_1G_FIBER: BLOGI(sc, "Found 1Gb Fiber media.\n"); sc->media = IFM_1000_SX; break; case ELINK_ETH_PHY_KR: case ELINK_ETH_PHY_CX4: BLOGI(sc, "Found 10GBase-CX4 media.\n"); sc->media = IFM_10G_CX4; break; case ELINK_ETH_PHY_DA_TWINAX: BLOGI(sc, "Found 10Gb Twinax media.\n"); sc->media = IFM_10G_TWINAX; break; case ELINK_ETH_PHY_BASE_T: if (sc->link_params.speed_cap_mask[0] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) { BLOGI(sc, "Found 10GBase-T media.\n"); sc->media = IFM_10G_T; } else { BLOGI(sc, "Found 1000Base-T media.\n"); sc->media = IFM_1000_T; } break; case ELINK_ETH_PHY_NOT_PRESENT: BLOGI(sc, "Media not present.\n"); sc->media = 0; break; case ELINK_ETH_PHY_UNSPECIFIED: default: BLOGI(sc, "Unknown media!\n"); sc->media = 0; break; } } #define GET_FIELD(value, fname) \ (((value) & (fname##_MASK)) >> (fname##_SHIFT)) #define IGU_FID(val) GET_FIELD((val), IGU_REG_MAPPING_MEMORY_FID) #define IGU_VEC(val) GET_FIELD((val), IGU_REG_MAPPING_MEMORY_VECTOR) static int bxe_get_igu_cam_info(struct bxe_softc *sc) { int pfid = SC_FUNC(sc); int igu_sb_id; uint32_t val; uint8_t fid, igu_sb_cnt = 0; sc->igu_base_sb = 0xff; if (CHIP_INT_MODE_IS_BC(sc)) { int vn = SC_VN(sc); igu_sb_cnt = sc->igu_sb_cnt; sc->igu_base_sb = ((CHIP_IS_MODE_4_PORT(sc) ? pfid : vn) * FP_SB_MAX_E1x); sc->igu_dsb_id = (E1HVN_MAX * FP_SB_MAX_E1x + (CHIP_IS_MODE_4_PORT(sc) ? pfid : vn)); return (0); } /* IGU in normal mode - read CAM */ for (igu_sb_id = 0; igu_sb_id < IGU_REG_MAPPING_MEMORY_SIZE; igu_sb_id++) { val = REG_RD(sc, IGU_REG_MAPPING_MEMORY + igu_sb_id * 4); if (!(val & IGU_REG_MAPPING_MEMORY_VALID)) { continue; } fid = IGU_FID(val); if ((fid & IGU_FID_ENCODE_IS_PF)) { if ((fid & IGU_FID_PF_NUM_MASK) != pfid) { continue; } if (IGU_VEC(val) == 0) { /* default status block */ sc->igu_dsb_id = igu_sb_id; } else { if (sc->igu_base_sb == 0xff) { sc->igu_base_sb = igu_sb_id; } igu_sb_cnt++; } } } /* * Due to new PF resource allocation by MFW T7.4 and above, it's optional * that number of CAM entries will not be equal to the value advertised in * PCI. Driver should use the minimal value of both as the actual status * block count */ sc->igu_sb_cnt = min(sc->igu_sb_cnt, igu_sb_cnt); if (igu_sb_cnt == 0) { BLOGE(sc, "CAM configuration error\n"); return (-1); } return (0); } /* * Gather various information from the device config space, the device itself, * shmem, and the user input. */ static int bxe_get_device_info(struct bxe_softc *sc) { uint32_t val; int rc; /* Get the data for the device */ sc->devinfo.vendor_id = pci_get_vendor(sc->dev); sc->devinfo.device_id = pci_get_device(sc->dev); sc->devinfo.subvendor_id = pci_get_subvendor(sc->dev); sc->devinfo.subdevice_id = pci_get_subdevice(sc->dev); /* get the chip revision (chip metal comes from pci config space) */ sc->devinfo.chip_id = sc->link_params.chip_id = (((REG_RD(sc, MISC_REG_CHIP_NUM) & 0xffff) << 16) | ((REG_RD(sc, MISC_REG_CHIP_REV) & 0xf) << 12) | (((REG_RD(sc, PCICFG_OFFSET + PCI_ID_VAL3) >> 24) & 0xf) << 4) | ((REG_RD(sc, MISC_REG_BOND_ID) & 0xf) << 0)); /* force 57811 according to MISC register */ if (REG_RD(sc, MISC_REG_CHIP_TYPE) & MISC_REG_CHIP_TYPE_57811_MASK) { if (CHIP_IS_57810(sc)) { sc->devinfo.chip_id = ((CHIP_NUM_57811 << 16) | (sc->devinfo.chip_id & 0x0000ffff)); } else if (CHIP_IS_57810_MF(sc)) { sc->devinfo.chip_id = ((CHIP_NUM_57811_MF << 16) | (sc->devinfo.chip_id & 0x0000ffff)); } sc->devinfo.chip_id |= 0x1; } BLOGD(sc, DBG_LOAD, "chip_id=0x%08x (num=0x%04x rev=0x%01x metal=0x%02x bond=0x%01x)\n", sc->devinfo.chip_id, ((sc->devinfo.chip_id >> 16) & 0xffff), ((sc->devinfo.chip_id >> 12) & 0xf), ((sc->devinfo.chip_id >> 4) & 0xff), ((sc->devinfo.chip_id >> 0) & 0xf)); val = (REG_RD(sc, 0x2874) & 0x55); if ((sc->devinfo.chip_id & 0x1) || (CHIP_IS_E1(sc) && val) || (CHIP_IS_E1H(sc) && (val == 0x55))) { sc->flags |= BXE_ONE_PORT_FLAG; BLOGD(sc, DBG_LOAD, "single port device\n"); } /* set the doorbell size */ sc->doorbell_size = (1 << BXE_DB_SHIFT); /* determine whether the device is in 2 port or 4 port mode */ sc->devinfo.chip_port_mode = CHIP_PORT_MODE_NONE; /* E1 & E1h*/ if (CHIP_IS_E2E3(sc)) { /* * Read port4mode_en_ovwr[0]: * If 1, four port mode is in port4mode_en_ovwr[1]. * If 0, four port mode is in port4mode_en[0]. */ val = REG_RD(sc, MISC_REG_PORT4MODE_EN_OVWR); if (val & 1) { val = ((val >> 1) & 1); } else { val = REG_RD(sc, MISC_REG_PORT4MODE_EN); } sc->devinfo.chip_port_mode = (val) ? CHIP_4_PORT_MODE : CHIP_2_PORT_MODE; BLOGD(sc, DBG_LOAD, "Port mode = %s\n", (val) ? "4" : "2"); } /* get the function and path info for the device */ bxe_get_function_num(sc); /* get the shared memory base address */ sc->devinfo.shmem_base = sc->link_params.shmem_base = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); sc->devinfo.shmem2_base = REG_RD(sc, (SC_PATH(sc) ? MISC_REG_GENERIC_CR_1 : MISC_REG_GENERIC_CR_0)); BLOGD(sc, DBG_LOAD, "shmem_base=0x%08x, shmem2_base=0x%08x\n", sc->devinfo.shmem_base, sc->devinfo.shmem2_base); if (!sc->devinfo.shmem_base) { /* this should ONLY prevent upcoming shmem reads */ BLOGI(sc, "MCP not active\n"); sc->flags |= BXE_NO_MCP_FLAG; return (0); } /* make sure the shared memory contents are valid */ val = SHMEM_RD(sc, validity_map[SC_PORT(sc)]); if ((val & (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) != (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) { BLOGE(sc, "Invalid SHMEM validity signature: 0x%08x\n", val); return (0); } BLOGD(sc, DBG_LOAD, "Valid SHMEM validity signature: 0x%08x\n", val); /* get the bootcode version */ sc->devinfo.bc_ver = SHMEM_RD(sc, dev_info.bc_rev); snprintf(sc->devinfo.bc_ver_str, sizeof(sc->devinfo.bc_ver_str), "%d.%d.%d", ((sc->devinfo.bc_ver >> 24) & 0xff), ((sc->devinfo.bc_ver >> 16) & 0xff), ((sc->devinfo.bc_ver >> 8) & 0xff)); BLOGD(sc, DBG_LOAD, "Bootcode version: %s\n", sc->devinfo.bc_ver_str); /* get the bootcode shmem address */ sc->devinfo.mf_cfg_base = bxe_get_shmem_mf_cfg_base(sc); BLOGD(sc, DBG_LOAD, "mf_cfg_base=0x08%x \n", sc->devinfo.mf_cfg_base); /* clean indirect addresses as they're not used */ pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); if (IS_PF(sc)) { REG_WR(sc, PXP2_REG_PGL_ADDR_88_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_8C_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_90_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_94_F0, 0); if (CHIP_IS_E1x(sc)) { REG_WR(sc, PXP2_REG_PGL_ADDR_88_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_8C_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_90_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_94_F1, 0); } /* * Enable internal target-read (in case we are probed after PF * FLR). Must be done prior to any BAR read access. Only for * 57712 and up */ if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); } } /* get the nvram size */ val = REG_RD(sc, MCP_REG_MCPR_NVM_CFG4); sc->devinfo.flash_size = (NVRAM_1MB_SIZE << (val & MCPR_NVM_CFG4_FLASH_SIZE)); BLOGD(sc, DBG_LOAD, "nvram flash size: %d\n", sc->devinfo.flash_size); /* get PCI capabilites */ bxe_probe_pci_caps(sc); bxe_set_power_state(sc, PCI_PM_D0); /* get various configuration parameters from shmem */ bxe_get_shmem_info(sc); if (sc->devinfo.pcie_msix_cap_reg != 0) { val = pci_read_config(sc->dev, (sc->devinfo.pcie_msix_cap_reg + PCIR_MSIX_CTRL), 2); sc->igu_sb_cnt = (val & PCIM_MSIXCTRL_TABLE_SIZE); } else { sc->igu_sb_cnt = 1; } sc->igu_base_addr = BAR_IGU_INTMEM; /* initialize IGU parameters */ if (CHIP_IS_E1x(sc)) { sc->devinfo.int_block = INT_BLOCK_HC; sc->igu_dsb_id = DEF_SB_IGU_ID; sc->igu_base_sb = 0; } else { sc->devinfo.int_block = INT_BLOCK_IGU; /* do not allow device reset during IGU info preocessing */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RESET); val = REG_RD(sc, IGU_REG_BLOCK_CONFIGURATION); if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { int tout = 5000; BLOGD(sc, DBG_LOAD, "FORCING IGU Normal Mode\n"); val &= ~(IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN); REG_WR(sc, IGU_REG_BLOCK_CONFIGURATION, val); REG_WR(sc, IGU_REG_RESET_MEMORIES, 0x7f); while (tout && REG_RD(sc, IGU_REG_RESET_MEMORIES)) { tout--; DELAY(1000); } if (REG_RD(sc, IGU_REG_RESET_MEMORIES)) { BLOGD(sc, DBG_LOAD, "FORCING IGU Normal Mode failed!!!\n"); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); return (-1); } } if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { BLOGD(sc, DBG_LOAD, "IGU Backward Compatible Mode\n"); sc->devinfo.int_block |= INT_BLOCK_MODE_BW_COMP; } else { BLOGD(sc, DBG_LOAD, "IGU Normal Mode\n"); } rc = bxe_get_igu_cam_info(sc); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); if (rc) { return (rc); } } /* * Get base FW non-default (fast path) status block ID. This value is * used to initialize the fw_sb_id saved on the fp/queue structure to * determine the id used by the FW. */ if (CHIP_IS_E1x(sc)) { sc->base_fw_ndsb = ((SC_PORT(sc) * FP_SB_MAX_E1x) + SC_L_ID(sc)); } else { /* * 57712+ - We currently use one FW SB per IGU SB (Rx and Tx of * the same queue are indicated on the same IGU SB). So we prefer * FW and IGU SBs to be the same value. */ sc->base_fw_ndsb = sc->igu_base_sb; } BLOGD(sc, DBG_LOAD, "igu_dsb_id=%d igu_base_sb=%d igu_sb_cnt=%d base_fw_ndsb=%d\n", sc->igu_dsb_id, sc->igu_base_sb, sc->igu_sb_cnt, sc->base_fw_ndsb); elink_phy_probe(&sc->link_params); return (0); } static void bxe_link_settings_supported(struct bxe_softc *sc, uint32_t switch_cfg) { uint32_t cfg_size = 0; uint32_t idx; uint8_t port = SC_PORT(sc); /* aggregation of supported attributes of all external phys */ sc->port.supported[0] = 0; sc->port.supported[1] = 0; switch (sc->link_params.num_phys) { case 1: sc->port.supported[0] = sc->link_params.phy[ELINK_INT_PHY].supported; cfg_size = 1; break; case 2: sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY1].supported; cfg_size = 1; break; case 3: if (sc->link_params.multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED) { sc->port.supported[1] = sc->link_params.phy[ELINK_EXT_PHY1].supported; sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY2].supported; } else { sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY1].supported; sc->port.supported[1] = sc->link_params.phy[ELINK_EXT_PHY2].supported; } cfg_size = 2; break; } if (!(sc->port.supported[0] || sc->port.supported[1])) { BLOGE(sc, "Invalid phy config in NVRAM (PHY1=0x%08x PHY2=0x%08x)\n", SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config), SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config2)); return; } if (CHIP_IS_E3(sc)) sc->port.phy_addr = REG_RD(sc, MISC_REG_WC0_CTRL_PHY_ADDR); else { switch (switch_cfg) { case ELINK_SWITCH_CFG_1G: sc->port.phy_addr = REG_RD(sc, NIG_REG_SERDES0_CTRL_PHY_ADDR + port*0x10); break; case ELINK_SWITCH_CFG_10G: sc->port.phy_addr = REG_RD(sc, NIG_REG_XGXS0_CTRL_PHY_ADDR + port*0x18); break; default: BLOGE(sc, "Invalid switch config in link_config=0x%08x\n", sc->port.link_config[0]); return; } } BLOGD(sc, DBG_LOAD, "PHY addr 0x%08x\n", sc->port.phy_addr); /* mask what we support according to speed_cap_mask per configuration */ for (idx = 0; idx < cfg_size; idx++) { if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10baseT_Half; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_100baseT_Half; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_100baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_1000baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_2_5G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_2500baseX_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10000baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_20000baseKR2_Full; } } BLOGD(sc, DBG_LOAD, "PHY supported 0=0x%08x 1=0x%08x\n", sc->port.supported[0], sc->port.supported[1]); } static void bxe_link_settings_requested(struct bxe_softc *sc) { uint32_t link_config; uint32_t idx; uint32_t cfg_size = 0; sc->port.advertising[0] = 0; sc->port.advertising[1] = 0; switch (sc->link_params.num_phys) { case 1: case 2: cfg_size = 1; break; case 3: cfg_size = 2; break; } for (idx = 0; idx < cfg_size; idx++) { sc->link_params.req_duplex[idx] = DUPLEX_FULL; link_config = sc->port.link_config[idx]; switch (link_config & PORT_FEATURE_LINK_SPEED_MASK) { case PORT_FEATURE_LINK_SPEED_AUTO: if (sc->port.supported[idx] & ELINK_SUPPORTED_Autoneg) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_AUTO_NEG; sc->port.advertising[idx] |= sc->port.supported[idx]; if (sc->link_params.phy[ELINK_EXT_PHY1].type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) sc->port.advertising[idx] |= (ELINK_SUPPORTED_100baseT_Half | ELINK_SUPPORTED_100baseT_Full); } else { /* force 10G, no AN */ sc->link_params.req_line_speed[idx] = ELINK_SPEED_10000; sc->port.advertising[idx] |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); continue; } break; case PORT_FEATURE_LINK_SPEED_10M_FULL: if (sc->port.supported[idx] & ELINK_SUPPORTED_10baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10; sc->port.advertising[idx] |= (ADVERTISED_10baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_10M_HALF: if (sc->port.supported[idx] & ELINK_SUPPORTED_10baseT_Half) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10; sc->link_params.req_duplex[idx] = DUPLEX_HALF; sc->port.advertising[idx] |= (ADVERTISED_10baseT_Half | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_100M_FULL: if (sc->port.supported[idx] & ELINK_SUPPORTED_100baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_100; sc->port.advertising[idx] |= (ADVERTISED_100baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_100M_HALF: if (sc->port.supported[idx] & ELINK_SUPPORTED_100baseT_Half) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_100; sc->link_params.req_duplex[idx] = DUPLEX_HALF; sc->port.advertising[idx] |= (ADVERTISED_100baseT_Half | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_1G: if (sc->port.supported[idx] & ELINK_SUPPORTED_1000baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_1000; sc->port.advertising[idx] |= (ADVERTISED_1000baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_2_5G: if (sc->port.supported[idx] & ELINK_SUPPORTED_2500baseX_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_2500; sc->port.advertising[idx] |= (ADVERTISED_2500baseX_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_10G_CX4: if (sc->port.supported[idx] & ELINK_SUPPORTED_10000baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10000; sc->port.advertising[idx] |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_20G: sc->link_params.req_line_speed[idx] = ELINK_SPEED_20000; break; default: BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); sc->link_params.req_line_speed[idx] = ELINK_SPEED_AUTO_NEG; sc->port.advertising[idx] = sc->port.supported[idx]; break; } sc->link_params.req_flow_ctrl[idx] = (link_config & PORT_FEATURE_FLOW_CONTROL_MASK); if (sc->link_params.req_flow_ctrl[idx] == ELINK_FLOW_CTRL_AUTO) { if (!(sc->port.supported[idx] & ELINK_SUPPORTED_Autoneg)) { sc->link_params.req_flow_ctrl[idx] = ELINK_FLOW_CTRL_NONE; } else { bxe_set_requested_fc(sc); } } BLOGD(sc, DBG_LOAD, "req_line_speed=%d req_duplex=%d " "req_flow_ctrl=0x%x advertising=0x%x\n", sc->link_params.req_line_speed[idx], sc->link_params.req_duplex[idx], sc->link_params.req_flow_ctrl[idx], sc->port.advertising[idx]); } } static void bxe_get_phy_info(struct bxe_softc *sc) { uint8_t port = SC_PORT(sc); uint32_t config = sc->port.config; uint32_t eee_mode; /* shmem data already read in bxe_get_shmem_info() */ BLOGD(sc, DBG_LOAD, "lane_config=0x%08x speed_cap_mask0=0x%08x " "link_config0=0x%08x\n", sc->link_params.lane_config, sc->link_params.speed_cap_mask[0], sc->port.link_config[0]); bxe_link_settings_supported(sc, sc->link_params.switch_cfg); bxe_link_settings_requested(sc); if (sc->autogreeen == AUTO_GREEN_FORCE_ON) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } else if (sc->autogreeen == AUTO_GREEN_FORCE_OFF) { sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } else if (config & PORT_FEAT_CFG_AUTOGREEEN_ENABLED) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } /* configure link feature according to nvram value */ eee_mode = (((SHMEM_RD(sc, dev_info.port_feature_config[port].eee_power_mode)) & PORT_FEAT_CFG_EEE_POWER_MODE_MASK) >> PORT_FEAT_CFG_EEE_POWER_MODE_SHIFT); if (eee_mode != PORT_FEAT_CFG_EEE_POWER_MODE_DISABLED) { sc->link_params.eee_mode = (ELINK_EEE_MODE_ADV_LPI | ELINK_EEE_MODE_ENABLE_LPI | ELINK_EEE_MODE_OUTPUT_TIME); } else { sc->link_params.eee_mode = 0; } /* get the media type */ bxe_media_detect(sc); } static void bxe_get_params(struct bxe_softc *sc) { /* get user tunable params */ bxe_get_tunable_params(sc); /* select the RX and TX ring sizes */ sc->tx_ring_size = TX_BD_USABLE; sc->rx_ring_size = RX_BD_USABLE; /* XXX disable WoL */ sc->wol = 0; } static void bxe_set_modes_bitmap(struct bxe_softc *sc) { uint32_t flags = 0; if (CHIP_REV_IS_FPGA(sc)) { SET_FLAGS(flags, MODE_FPGA); } else if (CHIP_REV_IS_EMUL(sc)) { SET_FLAGS(flags, MODE_EMUL); } else { SET_FLAGS(flags, MODE_ASIC); } if (CHIP_IS_MODE_4_PORT(sc)) { SET_FLAGS(flags, MODE_PORT4); } else { SET_FLAGS(flags, MODE_PORT2); } if (CHIP_IS_E2(sc)) { SET_FLAGS(flags, MODE_E2); } else if (CHIP_IS_E3(sc)) { SET_FLAGS(flags, MODE_E3); if (CHIP_REV(sc) == CHIP_REV_Ax) { SET_FLAGS(flags, MODE_E3_A0); } else /*if (CHIP_REV(sc) == CHIP_REV_Bx)*/ { SET_FLAGS(flags, MODE_E3_B0 | MODE_COS3); } } if (IS_MF(sc)) { SET_FLAGS(flags, MODE_MF); switch (sc->devinfo.mf_info.mf_mode) { case MULTI_FUNCTION_SD: SET_FLAGS(flags, MODE_MF_SD); break; case MULTI_FUNCTION_SI: SET_FLAGS(flags, MODE_MF_SI); break; case MULTI_FUNCTION_AFEX: SET_FLAGS(flags, MODE_MF_AFEX); break; } } else { SET_FLAGS(flags, MODE_SF); } #if defined(__LITTLE_ENDIAN) SET_FLAGS(flags, MODE_LITTLE_ENDIAN); #else /* __BIG_ENDIAN */ SET_FLAGS(flags, MODE_BIG_ENDIAN); #endif INIT_MODE_FLAGS(sc) = flags; } static int bxe_alloc_hsi_mem(struct bxe_softc *sc) { struct bxe_fastpath *fp; bus_addr_t busaddr; int max_agg_queues; int max_segments; bus_size_t max_size; bus_size_t max_seg_size; char buf[32]; int rc; int i, j; /* XXX zero out all vars here and call bxe_alloc_hsi_mem on error */ /* allocate the parent bus DMA tag */ rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ BUS_SPACE_MAXSIZE_32BIT, /* max map size */ BUS_SPACE_UNRESTRICTED, /* num discontinuous */ BUS_SPACE_MAXSIZE_32BIT, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &sc->parent_dma_tag); /* returned dma tag */ if (rc != 0) { BLOGE(sc, "Failed to alloc parent DMA tag (%d)!\n", rc); return (1); } /************************/ /* DEFAULT STATUS BLOCK */ /************************/ if (bxe_dma_alloc(sc, sizeof(struct host_sp_status_block), &sc->def_sb_dma, "default status block") != 0) { /* XXX */ bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->def_sb = (struct host_sp_status_block *)sc->def_sb_dma.vaddr; /***************/ /* EVENT QUEUE */ /***************/ if (bxe_dma_alloc(sc, BCM_PAGE_SIZE, &sc->eq_dma, "event queue") != 0) { /* XXX */ bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->eq = (union event_ring_elem * )sc->eq_dma.vaddr; /*************/ /* SLOW PATH */ /*************/ if (bxe_dma_alloc(sc, sizeof(struct bxe_slowpath), &sc->sp_dma, "slow path") != 0) { /* XXX */ bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->sp = (struct bxe_slowpath *)sc->sp_dma.vaddr; /*******************/ /* SLOW PATH QUEUE */ /*******************/ if (bxe_dma_alloc(sc, BCM_PAGE_SIZE, &sc->spq_dma, "slow path queue") != 0) { /* XXX */ bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->spq = (struct eth_spe *)sc->spq_dma.vaddr; /***************************/ /* FW DECOMPRESSION BUFFER */ /***************************/ if (bxe_dma_alloc(sc, FW_BUF_SIZE, &sc->gz_buf_dma, "fw decompression buffer") != 0) { /* XXX */ bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->gz_buf = (void *)sc->gz_buf_dma.vaddr; if ((sc->gz_strm = malloc(sizeof(*sc->gz_strm), M_DEVBUF, M_NOWAIT)) == NULL) { /* XXX */ bxe_dma_free(sc, &sc->gz_buf_dma); sc->gz_buf = NULL; bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } /*************/ /* FASTPATHS */ /*************/ /* allocate DMA memory for each fastpath structure */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->sc = sc; fp->index = i; /*******************/ /* FP STATUS BLOCK */ /*******************/ snprintf(buf, sizeof(buf), "fp %d status block", i); if (bxe_dma_alloc(sc, sizeof(union bxe_host_hc_status_block), &fp->sb_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { if (CHIP_IS_E2E3(sc)) { fp->status_block.e2_sb = (struct host_hc_status_block_e2 *)fp->sb_dma.vaddr; } else { fp->status_block.e1x_sb = (struct host_hc_status_block_e1x *)fp->sb_dma.vaddr; } } /******************/ /* FP TX BD CHAIN */ /******************/ snprintf(buf, sizeof(buf), "fp %d tx bd chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * TX_BD_NUM_PAGES), &fp->tx_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->tx_chain = (union eth_tx_bd_types *)fp->tx_dma.vaddr; } /* link together the tx bd chain pages */ for (j = 1; j <= TX_BD_NUM_PAGES; j++) { /* index into the tx bd chain array to last entry per page */ struct eth_tx_next_bd *tx_next_bd = &fp->tx_chain[TX_BD_TOTAL_PER_PAGE * j - 1].next_bd; /* point to the next page and wrap from last page */ busaddr = (fp->tx_dma.paddr + (BCM_PAGE_SIZE * (j % TX_BD_NUM_PAGES))); tx_next_bd->addr_hi = htole32(U64_HI(busaddr)); tx_next_bd->addr_lo = htole32(U64_LO(busaddr)); } /******************/ /* FP RX BD CHAIN */ /******************/ snprintf(buf, sizeof(buf), "fp %d rx bd chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RX_BD_NUM_PAGES), &fp->rx_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rx_chain = (struct eth_rx_bd *)fp->rx_dma.vaddr; } /* link together the rx bd chain pages */ for (j = 1; j <= RX_BD_NUM_PAGES; j++) { /* index into the rx bd chain array to last entry per page */ struct eth_rx_bd *rx_bd = &fp->rx_chain[RX_BD_TOTAL_PER_PAGE * j - 2]; /* point to the next page and wrap from last page */ busaddr = (fp->rx_dma.paddr + (BCM_PAGE_SIZE * (j % RX_BD_NUM_PAGES))); rx_bd->addr_hi = htole32(U64_HI(busaddr)); rx_bd->addr_lo = htole32(U64_LO(busaddr)); } /*******************/ /* FP RX RCQ CHAIN */ /*******************/ snprintf(buf, sizeof(buf), "fp %d rcq chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RCQ_NUM_PAGES), &fp->rcq_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rcq_chain = (union eth_rx_cqe *)fp->rcq_dma.vaddr; } /* link together the rcq chain pages */ for (j = 1; j <= RCQ_NUM_PAGES; j++) { /* index into the rcq chain array to last entry per page */ struct eth_rx_cqe_next_page *rx_cqe_next = (struct eth_rx_cqe_next_page *) &fp->rcq_chain[RCQ_TOTAL_PER_PAGE * j - 1]; /* point to the next page and wrap from last page */ busaddr = (fp->rcq_dma.paddr + (BCM_PAGE_SIZE * (j % RCQ_NUM_PAGES))); rx_cqe_next->addr_hi = htole32(U64_HI(busaddr)); rx_cqe_next->addr_lo = htole32(U64_LO(busaddr)); } /*******************/ /* FP RX SGE CHAIN */ /*******************/ snprintf(buf, sizeof(buf), "fp %d sge chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RX_SGE_NUM_PAGES), &fp->rx_sge_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rx_sge_chain = (struct eth_rx_sge *)fp->rx_sge_dma.vaddr; } /* link together the sge chain pages */ for (j = 1; j <= RX_SGE_NUM_PAGES; j++) { /* index into the rcq chain array to last entry per page */ struct eth_rx_sge *rx_sge = &fp->rx_sge_chain[RX_SGE_TOTAL_PER_PAGE * j - 2]; /* point to the next page and wrap from last page */ busaddr = (fp->rx_sge_dma.paddr + (BCM_PAGE_SIZE * (j % RX_SGE_NUM_PAGES))); rx_sge->addr_hi = htole32(U64_HI(busaddr)); rx_sge->addr_lo = htole32(U64_LO(busaddr)); } /***********************/ /* FP TX MBUF DMA MAPS */ /***********************/ /* set required sizes before mapping to conserve resources */ if (sc->ifnet->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) { max_size = BXE_TSO_MAX_SIZE; max_segments = BXE_TSO_MAX_SEGMENTS; max_seg_size = BXE_TSO_MAX_SEG_SIZE; } else { max_size = (MCLBYTES * BXE_MAX_SEGMENTS); max_segments = BXE_MAX_SEGMENTS; max_seg_size = MCLBYTES; } /* create a dma tag for the tx mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ max_size, /* max map size */ max_segments, /* num discontinuous */ max_seg_size, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->tx_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d tx mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for each of the tx mbuf clusters */ for (j = 0; j < TX_BD_TOTAL; j++) { if (bus_dmamap_create(fp->tx_mbuf_tag, BUS_DMA_NOWAIT, &fp->tx_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d tx mbuf %d' (%d)\n", i, j, rc); return (1); } } /***********************/ /* FP RX MBUF DMA MAPS */ /***********************/ /* create a dma tag for the rx mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ MJUM9BYTES, /* max map size */ 1, /* num discontinuous */ MJUM9BYTES, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->rx_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d rx mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for each of the rx mbuf clusters */ for (j = 0; j < RX_BD_TOTAL; j++) { if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx mbuf cluster */ if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx mbuf' (%d)\n", i, rc); return (1); } /***************************/ /* FP RX SGE MBUF DMA MAPS */ /***************************/ /* create a dma tag for the rx sge mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ BCM_PAGE_SIZE, /* max map size */ 1, /* num discontinuous */ BCM_PAGE_SIZE, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->rx_sge_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d rx sge mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for the rx sge mbuf clusters */ for (j = 0; j < RX_SGE_TOTAL; j++) { if (bus_dmamap_create(fp->rx_sge_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_sge_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx sge mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx sge mbuf cluster */ if (bus_dmamap_create(fp->rx_sge_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_sge_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx sge mbuf' (%d)\n", i, rc); return (1); } /***************************/ /* FP RX TPA MBUF DMA MAPS */ /***************************/ /* create dma maps for the rx tpa mbuf clusters */ max_agg_queues = MAX_AGG_QS(sc); for (j = 0; j < max_agg_queues; j++) { if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_tpa_info[j].bd.m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx tpa mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx tpa mbuf cluster */ if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_tpa_info_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx tpa mbuf' (%d)\n", i, rc); return (1); } bxe_init_sge_ring_bit_mask(fp); } return (0); } static void bxe_free_hsi_mem(struct bxe_softc *sc) { struct bxe_fastpath *fp; int max_agg_queues; int i, j; if (sc->parent_dma_tag == NULL) { return; /* assume nothing was allocated */ } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; /*******************/ /* FP STATUS BLOCK */ /*******************/ bxe_dma_free(sc, &fp->sb_dma); memset(&fp->status_block, 0, sizeof(fp->status_block)); /******************/ /* FP TX BD CHAIN */ /******************/ bxe_dma_free(sc, &fp->tx_dma); fp->tx_chain = NULL; /******************/ /* FP RX BD CHAIN */ /******************/ bxe_dma_free(sc, &fp->rx_dma); fp->rx_chain = NULL; /*******************/ /* FP RX RCQ CHAIN */ /*******************/ bxe_dma_free(sc, &fp->rcq_dma); fp->rcq_chain = NULL; /*******************/ /* FP RX SGE CHAIN */ /*******************/ bxe_dma_free(sc, &fp->rx_sge_dma); fp->rx_sge_chain = NULL; /***********************/ /* FP TX MBUF DMA MAPS */ /***********************/ if (fp->tx_mbuf_tag != NULL) { for (j = 0; j < TX_BD_TOTAL; j++) { if (fp->tx_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->tx_mbuf_tag, fp->tx_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->tx_mbuf_tag, fp->tx_mbuf_chain[j].m_map); } } bus_dma_tag_destroy(fp->tx_mbuf_tag); fp->tx_mbuf_tag = NULL; } /***********************/ /* FP RX MBUF DMA MAPS */ /***********************/ if (fp->rx_mbuf_tag != NULL) { for (j = 0; j < RX_BD_TOTAL; j++) { if (fp->rx_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_mbuf_chain[j].m_map); } } if (fp->rx_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map); } /***************************/ /* FP RX TPA MBUF DMA MAPS */ /***************************/ max_agg_queues = MAX_AGG_QS(sc); for (j = 0; j < max_agg_queues; j++) { if (fp->rx_tpa_info[j].bd.m_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info[j].bd.m_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_tpa_info[j].bd.m_map); } } if (fp->rx_tpa_info_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map); } bus_dma_tag_destroy(fp->rx_mbuf_tag); fp->rx_mbuf_tag = NULL; } /***************************/ /* FP RX SGE MBUF DMA MAPS */ /***************************/ if (fp->rx_sge_mbuf_tag != NULL) { for (j = 0; j < RX_SGE_TOTAL; j++) { if (fp->rx_sge_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[j].m_map); } } if (fp->rx_sge_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map); bus_dmamap_destroy(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map); } bus_dma_tag_destroy(fp->rx_sge_mbuf_tag); fp->rx_sge_mbuf_tag = NULL; } } /***************************/ /* FW DECOMPRESSION BUFFER */ /***************************/ bxe_dma_free(sc, &sc->gz_buf_dma); sc->gz_buf = NULL; free(sc->gz_strm, M_DEVBUF); sc->gz_strm = NULL; /*******************/ /* SLOW PATH QUEUE */ /*******************/ bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; /*************/ /* SLOW PATH */ /*************/ bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; /***************/ /* EVENT QUEUE */ /***************/ bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; /************************/ /* DEFAULT STATUS BLOCK */ /************************/ bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); sc->parent_dma_tag = NULL; } /* * Previous driver DMAE transaction may have occurred when pre-boot stage * ended and boot began. This would invalidate the addresses of the * transaction, resulting in was-error bit set in the PCI causing all * hw-to-host PCIe transactions to timeout. If this happened we want to clear * the interrupt which detected this from the pglueb and the was-done bit */ static void bxe_prev_interrupted_dmae(struct bxe_softc *sc) { uint32_t val; if (!CHIP_IS_E1x(sc)) { val = REG_RD(sc, PGLUE_B_REG_PGLUE_B_INT_STS); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) { BLOGD(sc, DBG_LOAD, "Clearing 'was-error' bit that was set in pglueb"); REG_WR(sc, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, 1 << SC_FUNC(sc)); } } } static int bxe_prev_mcp_done(struct bxe_softc *sc) { uint32_t rc = bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET); if (!rc) { BLOGE(sc, "MCP response failure, aborting\n"); return (-1); } return (0); } static struct bxe_prev_list_node * bxe_prev_path_get_entry(struct bxe_softc *sc) { struct bxe_prev_list_node *tmp; LIST_FOREACH(tmp, &bxe_prev_list, node) { if ((sc->pcie_bus == tmp->bus) && (sc->pcie_device == tmp->slot) && (SC_PATH(sc) == tmp->path)) { return (tmp); } } return (NULL); } static uint8_t bxe_prev_is_path_marked(struct bxe_softc *sc) { struct bxe_prev_list_node *tmp; int rc = FALSE; mtx_lock(&bxe_prev_mtx); tmp = bxe_prev_path_get_entry(sc); if (tmp) { if (tmp->aer) { BLOGD(sc, DBG_LOAD, "Path %d/%d/%d was marked by AER\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } else { rc = TRUE; BLOGD(sc, DBG_LOAD, "Path %d/%d/%d was already cleaned from previous drivers\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } } mtx_unlock(&bxe_prev_mtx); return (rc); } static int bxe_prev_mark_path(struct bxe_softc *sc, uint8_t after_undi) { struct bxe_prev_list_node *tmp; mtx_lock(&bxe_prev_mtx); /* Check whether the entry for this path already exists */ tmp = bxe_prev_path_get_entry(sc); if (tmp) { if (!tmp->aer) { BLOGD(sc, DBG_LOAD, "Re-marking AER in path %d/%d/%d\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } else { BLOGD(sc, DBG_LOAD, "Removing AER indication from path %d/%d/%d\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); tmp->aer = 0; } mtx_unlock(&bxe_prev_mtx); return (0); } mtx_unlock(&bxe_prev_mtx); /* Create an entry for this path and add it */ tmp = malloc(sizeof(struct bxe_prev_list_node), M_DEVBUF, (M_NOWAIT | M_ZERO)); if (!tmp) { BLOGE(sc, "Failed to allocate 'bxe_prev_list_node'\n"); return (-1); } tmp->bus = sc->pcie_bus; tmp->slot = sc->pcie_device; tmp->path = SC_PATH(sc); tmp->aer = 0; tmp->undi = after_undi ? (1 << SC_PORT(sc)) : 0; mtx_lock(&bxe_prev_mtx); BLOGD(sc, DBG_LOAD, "Marked path %d/%d/%d - finished previous unload\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); LIST_INSERT_HEAD(&bxe_prev_list, tmp, node); mtx_unlock(&bxe_prev_mtx); return (0); } static int bxe_do_flr(struct bxe_softc *sc) { int i; /* only E2 and onwards support FLR */ if (CHIP_IS_E1x(sc)) { BLOGD(sc, DBG_LOAD, "FLR not supported in E1/E1H\n"); return (-1); } /* only bootcode REQ_BC_VER_4_INITIATE_FLR and onwards support flr */ if (sc->devinfo.bc_ver < REQ_BC_VER_4_INITIATE_FLR) { BLOGD(sc, DBG_LOAD, "FLR not supported by BC_VER: 0x%08x\n", sc->devinfo.bc_ver); return (-1); } /* Wait for Transaction Pending bit clean */ for (i = 0; i < 4; i++) { if (i) { DELAY(((1 << (i - 1)) * 100) * 1000); } if (!bxe_is_pcie_pending(sc)) { goto clear; } } BLOGE(sc, "PCIE transaction is not cleared, " "proceeding with reset anyway\n"); clear: BLOGD(sc, DBG_LOAD, "Initiating FLR\n"); bxe_fw_command(sc, DRV_MSG_CODE_INITIATE_FLR, 0); return (0); } struct bxe_mac_vals { uint32_t xmac_addr; uint32_t xmac_val; uint32_t emac_addr; uint32_t emac_val; uint32_t umac_addr; uint32_t umac_val; uint32_t bmac_addr; uint32_t bmac_val[2]; }; static void bxe_prev_unload_close_mac(struct bxe_softc *sc, struct bxe_mac_vals *vals) { uint32_t val, base_addr, offset, mask, reset_reg; uint8_t mac_stopped = FALSE; uint8_t port = SC_PORT(sc); uint32_t wb_data[2]; /* reset addresses as they also mark which values were changed */ vals->bmac_addr = 0; vals->umac_addr = 0; vals->xmac_addr = 0; vals->emac_addr = 0; reset_reg = REG_RD(sc, MISC_REG_RESET_REG_2); if (!CHIP_IS_E3(sc)) { val = REG_RD(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4); mask = MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port; if ((mask & reset_reg) && val) { BLOGD(sc, DBG_LOAD, "Disable BMAC Rx\n"); base_addr = SC_PORT(sc) ? NIG_REG_INGRESS_BMAC1_MEM : NIG_REG_INGRESS_BMAC0_MEM; offset = CHIP_IS_E2(sc) ? BIGMAC2_REGISTER_BMAC_CONTROL : BIGMAC_REGISTER_BMAC_CONTROL; /* * use rd/wr since we cannot use dmae. This is safe * since MCP won't access the bus due to the request * to unload, and no function on the path can be * loaded at this time. */ wb_data[0] = REG_RD(sc, base_addr + offset); wb_data[1] = REG_RD(sc, base_addr + offset + 0x4); vals->bmac_addr = base_addr + offset; vals->bmac_val[0] = wb_data[0]; vals->bmac_val[1] = wb_data[1]; wb_data[0] &= ~ELINK_BMAC_CONTROL_RX_ENABLE; REG_WR(sc, vals->bmac_addr, wb_data[0]); REG_WR(sc, vals->bmac_addr + 0x4, wb_data[1]); } BLOGD(sc, DBG_LOAD, "Disable EMAC Rx\n"); vals->emac_addr = NIG_REG_NIG_EMAC0_EN + SC_PORT(sc)*4; vals->emac_val = REG_RD(sc, vals->emac_addr); REG_WR(sc, vals->emac_addr, 0); mac_stopped = TRUE; } else { if (reset_reg & MISC_REGISTERS_RESET_REG_2_XMAC) { BLOGD(sc, DBG_LOAD, "Disable XMAC Rx\n"); base_addr = SC_PORT(sc) ? GRCBASE_XMAC1 : GRCBASE_XMAC0; val = REG_RD(sc, base_addr + XMAC_REG_PFC_CTRL_HI); REG_WR(sc, base_addr + XMAC_REG_PFC_CTRL_HI, val & ~(1 << 1)); REG_WR(sc, base_addr + XMAC_REG_PFC_CTRL_HI, val | (1 << 1)); vals->xmac_addr = base_addr + XMAC_REG_CTRL; vals->xmac_val = REG_RD(sc, vals->xmac_addr); REG_WR(sc, vals->xmac_addr, 0); mac_stopped = TRUE; } mask = MISC_REGISTERS_RESET_REG_2_UMAC0 << port; if (mask & reset_reg) { BLOGD(sc, DBG_LOAD, "Disable UMAC Rx\n"); base_addr = SC_PORT(sc) ? GRCBASE_UMAC1 : GRCBASE_UMAC0; vals->umac_addr = base_addr + UMAC_REG_COMMAND_CONFIG; vals->umac_val = REG_RD(sc, vals->umac_addr); REG_WR(sc, vals->umac_addr, 0); mac_stopped = TRUE; } } if (mac_stopped) { DELAY(20000); } } #define BXE_PREV_UNDI_PROD_ADDR(p) (BAR_TSTRORM_INTMEM + 0x1508 + ((p) << 4)) #define BXE_PREV_UNDI_RCQ(val) ((val) & 0xffff) #define BXE_PREV_UNDI_BD(val) ((val) >> 16 & 0xffff) #define BXE_PREV_UNDI_PROD(rcq, bd) ((bd) << 16 | (rcq)) static void bxe_prev_unload_undi_inc(struct bxe_softc *sc, uint8_t port, uint8_t inc) { uint16_t rcq, bd; uint32_t tmp_reg = REG_RD(sc, BXE_PREV_UNDI_PROD_ADDR(port)); rcq = BXE_PREV_UNDI_RCQ(tmp_reg) + inc; bd = BXE_PREV_UNDI_BD(tmp_reg) + inc; tmp_reg = BXE_PREV_UNDI_PROD(rcq, bd); REG_WR(sc, BXE_PREV_UNDI_PROD_ADDR(port), tmp_reg); BLOGD(sc, DBG_LOAD, "UNDI producer [%d] rings bd -> 0x%04x, rcq -> 0x%04x\n", port, bd, rcq); } static int bxe_prev_unload_common(struct bxe_softc *sc) { uint32_t reset_reg, tmp_reg = 0, rc; uint8_t prev_undi = FALSE; struct bxe_mac_vals mac_vals; uint32_t timer_count = 1000; uint32_t prev_brb; /* * It is possible a previous function received 'common' answer, * but hasn't loaded yet, therefore creating a scenario of * multiple functions receiving 'common' on the same path. */ BLOGD(sc, DBG_LOAD, "Common unload Flow\n"); memset(&mac_vals, 0, sizeof(mac_vals)); if (bxe_prev_is_path_marked(sc)) { return (bxe_prev_mcp_done(sc)); } reset_reg = REG_RD(sc, MISC_REG_RESET_REG_1); /* Reset should be performed after BRB is emptied */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_BRB1) { /* Close the MAC Rx to prevent BRB from filling up */ bxe_prev_unload_close_mac(sc, &mac_vals); /* close LLH filters towards the BRB */ elink_set_rx_filter(&sc->link_params, 0); /* * Check if the UNDI driver was previously loaded. * UNDI driver initializes CID offset for normal bell to 0x7 */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_DORQ) { tmp_reg = REG_RD(sc, DORQ_REG_NORM_CID_OFST); if (tmp_reg == 0x7) { BLOGD(sc, DBG_LOAD, "UNDI previously loaded\n"); prev_undi = TRUE; /* clear the UNDI indication */ REG_WR(sc, DORQ_REG_NORM_CID_OFST, 0); /* clear possible idle check errors */ REG_RD(sc, NIG_REG_NIG_INT_STS_CLR_0); } } /* wait until BRB is empty */ tmp_reg = REG_RD(sc, BRB1_REG_NUM_OF_FULL_BLOCKS); while (timer_count) { prev_brb = tmp_reg; tmp_reg = REG_RD(sc, BRB1_REG_NUM_OF_FULL_BLOCKS); if (!tmp_reg) { break; } BLOGD(sc, DBG_LOAD, "BRB still has 0x%08x\n", tmp_reg); /* reset timer as long as BRB actually gets emptied */ if (prev_brb > tmp_reg) { timer_count = 1000; } else { timer_count--; } /* If UNDI resides in memory, manually increment it */ if (prev_undi) { bxe_prev_unload_undi_inc(sc, SC_PORT(sc), 1); } DELAY(10); } if (!timer_count) { BLOGE(sc, "Failed to empty BRB\n"); } } /* No packets are in the pipeline, path is ready for reset */ bxe_reset_common(sc); if (mac_vals.xmac_addr) { REG_WR(sc, mac_vals.xmac_addr, mac_vals.xmac_val); } if (mac_vals.umac_addr) { REG_WR(sc, mac_vals.umac_addr, mac_vals.umac_val); } if (mac_vals.emac_addr) { REG_WR(sc, mac_vals.emac_addr, mac_vals.emac_val); } if (mac_vals.bmac_addr) { REG_WR(sc, mac_vals.bmac_addr, mac_vals.bmac_val[0]); REG_WR(sc, mac_vals.bmac_addr + 4, mac_vals.bmac_val[1]); } rc = bxe_prev_mark_path(sc, prev_undi); if (rc) { bxe_prev_mcp_done(sc); return (rc); } return (bxe_prev_mcp_done(sc)); } static int bxe_prev_unload_uncommon(struct bxe_softc *sc) { int rc; BLOGD(sc, DBG_LOAD, "Uncommon unload Flow\n"); /* Test if previous unload process was already finished for this path */ if (bxe_prev_is_path_marked(sc)) { return (bxe_prev_mcp_done(sc)); } BLOGD(sc, DBG_LOAD, "Path is unmarked\n"); /* * If function has FLR capabilities, and existing FW version matches * the one required, then FLR will be sufficient to clean any residue * left by previous driver */ rc = bxe_nic_load_analyze_req(sc, FW_MSG_CODE_DRV_LOAD_FUNCTION); if (!rc) { /* fw version is good */ BLOGD(sc, DBG_LOAD, "FW version matches our own, attempting FLR\n"); rc = bxe_do_flr(sc); } if (!rc) { /* FLR was performed */ BLOGD(sc, DBG_LOAD, "FLR successful\n"); return (0); } BLOGD(sc, DBG_LOAD, "Could not FLR\n"); /* Close the MCP request, return failure*/ rc = bxe_prev_mcp_done(sc); if (!rc) { rc = BXE_PREV_WAIT_NEEDED; } return (rc); } static int bxe_prev_unload(struct bxe_softc *sc) { int time_counter = 10; uint32_t fw, hw_lock_reg, hw_lock_val; uint32_t rc = 0; /* * Clear HW from errors which may have resulted from an interrupted * DMAE transaction. */ bxe_prev_interrupted_dmae(sc); /* Release previously held locks */ hw_lock_reg = (SC_FUNC(sc) <= 5) ? (MISC_REG_DRIVER_CONTROL_1 + SC_FUNC(sc) * 8) : (MISC_REG_DRIVER_CONTROL_7 + (SC_FUNC(sc) - 6) * 8); hw_lock_val = (REG_RD(sc, hw_lock_reg)); if (hw_lock_val) { if (hw_lock_val & HW_LOCK_RESOURCE_NVRAM) { BLOGD(sc, DBG_LOAD, "Releasing previously held NVRAM lock\n"); REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << SC_PORT(sc))); } BLOGD(sc, DBG_LOAD, "Releasing previously held HW lock\n"); REG_WR(sc, hw_lock_reg, 0xffffffff); } else { BLOGD(sc, DBG_LOAD, "No need to release HW/NVRAM locks\n"); } if (MCPR_ACCESS_LOCK_LOCK & REG_RD(sc, MCP_REG_MCPR_ACCESS_LOCK)) { BLOGD(sc, DBG_LOAD, "Releasing previously held ALR\n"); REG_WR(sc, MCP_REG_MCPR_ACCESS_LOCK, 0); } do { /* Lock MCP using an unload request */ fw = bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS, 0); if (!fw) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; break; } if (fw == FW_MSG_CODE_DRV_UNLOAD_COMMON) { rc = bxe_prev_unload_common(sc); break; } /* non-common reply from MCP night require looping */ rc = bxe_prev_unload_uncommon(sc); if (rc != BXE_PREV_WAIT_NEEDED) { break; } DELAY(20000); } while (--time_counter); if (!time_counter || rc) { BLOGE(sc, "Failed to unload previous driver!\n"); rc = -1; } return (rc); } void bxe_dcbx_set_state(struct bxe_softc *sc, uint8_t dcb_on, uint32_t dcbx_enabled) { if (!CHIP_IS_E1x(sc)) { sc->dcb_state = dcb_on; sc->dcbx_enabled = dcbx_enabled; } else { sc->dcb_state = FALSE; sc->dcbx_enabled = BXE_DCBX_ENABLED_INVALID; } BLOGD(sc, DBG_LOAD, "DCB state [%s:%s]\n", dcb_on ? "ON" : "OFF", (dcbx_enabled == BXE_DCBX_ENABLED_OFF) ? "user-mode" : (dcbx_enabled == BXE_DCBX_ENABLED_ON_NEG_OFF) ? "on-chip static" : (dcbx_enabled == BXE_DCBX_ENABLED_ON_NEG_ON) ? "on-chip with negotiation" : "invalid"); } /* must be called after sriov-enable */ static int bxe_set_qm_cid_count(struct bxe_softc *sc) { int cid_count = BXE_L2_MAX_CID(sc); if (IS_SRIOV(sc)) { cid_count += BXE_VF_CIDS; } if (CNIC_SUPPORT(sc)) { cid_count += CNIC_CID_MAX; } return (roundup(cid_count, QM_CID_ROUND)); } static void bxe_init_multi_cos(struct bxe_softc *sc) { int pri, cos; uint32_t pri_map = 0; /* XXX change to user config */ for (pri = 0; pri < BXE_MAX_PRIORITY; pri++) { cos = ((pri_map & (0xf << (pri * 4))) >> (pri * 4)); if (cos < sc->max_cos) { sc->prio_to_cos[pri] = cos; } else { BLOGW(sc, "Invalid COS %d for priority %d " "(max COS is %d), setting to 0\n", cos, pri, (sc->max_cos - 1)); sc->prio_to_cos[pri] = 0; } } } static int bxe_sysctl_state(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc; int error, result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) { return (error); } if (result == 1) { sc = (struct bxe_softc *)arg1; BLOGI(sc, "... dumping driver state ...\n"); /* XXX */ } return (error); } static int bxe_sysctl_eth_stat(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1; uint32_t *eth_stats = (uint32_t *)&sc->eth_stats; uint32_t *offset; uint64_t value = 0; int index = (int)arg2; if (index >= BXE_NUM_ETH_STATS) { BLOGE(sc, "bxe_eth_stats index out of range (%d)\n", index); return (-1); } offset = (eth_stats + bxe_eth_stats_arr[index].offset); switch (bxe_eth_stats_arr[index].size) { case 4: value = (uint64_t)*offset; break; case 8: value = HILO_U64(*offset, *(offset + 1)); break; default: BLOGE(sc, "Invalid bxe_eth_stats size (index=%d size=%d)\n", index, bxe_eth_stats_arr[index].size); return (-1); } return (sysctl_handle_64(oidp, &value, 0, req)); } static int bxe_sysctl_eth_q_stat(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1; uint32_t *eth_stats; uint32_t *offset; uint64_t value = 0; uint32_t q_stat = (uint32_t)arg2; uint32_t fp_index = ((q_stat >> 16) & 0xffff); uint32_t index = (q_stat & 0xffff); eth_stats = (uint32_t *)&sc->fp[fp_index].eth_q_stats; if (index >= BXE_NUM_ETH_Q_STATS) { BLOGE(sc, "bxe_eth_q_stats index out of range (%d)\n", index); return (-1); } offset = (eth_stats + bxe_eth_q_stats_arr[index].offset); switch (bxe_eth_q_stats_arr[index].size) { case 4: value = (uint64_t)*offset; break; case 8: value = HILO_U64(*offset, *(offset + 1)); break; default: BLOGE(sc, "Invalid bxe_eth_q_stats size (index=%d size=%d)\n", index, bxe_eth_q_stats_arr[index].size); return (-1); } return (sysctl_handle_64(oidp, &value, 0, req)); } static void bxe_add_sysctls(struct bxe_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *queue_top, *queue; struct sysctl_oid_list *queue_top_children, *queue_children; char queue_num_buf[32]; uint32_t q_stat; int i, j; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "version", CTLFLAG_RD, BXE_DRIVER_VERSION, 0, "version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bc_version", CTLFLAG_RD, &sc->devinfo.bc_ver_str, 0, "bootcode version"); snprintf(sc->fw_ver_str, sizeof(sc->fw_ver_str), "%d.%d.%d.%d", BCM_5710_FW_MAJOR_VERSION, BCM_5710_FW_MINOR_VERSION, BCM_5710_FW_REVISION_VERSION, BCM_5710_FW_ENGINEERING_VERSION); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "fw_version", CTLFLAG_RD, &sc->fw_ver_str, 0, "firmware version"); snprintf(sc->mf_mode_str, sizeof(sc->mf_mode_str), "%s", ((sc->devinfo.mf_info.mf_mode == SINGLE_FUNCTION) ? "Single" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SD) ? "MF-SD" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SI) ? "MF-SI" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_AFEX) ? "MF-AFEX" : "Unknown")); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mf_mode", CTLFLAG_RD, &sc->mf_mode_str, 0, "multifunction mode"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "mf_vnics", CTLFLAG_RD, &sc->devinfo.mf_info.vnics_per_port, 0, "multifunction vnics per port"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mac_addr", CTLFLAG_RD, &sc->mac_addr_str, 0, "mac address"); snprintf(sc->pci_link_str, sizeof(sc->pci_link_str), "%s x%d", ((sc->devinfo.pcie_link_speed == 1) ? "2.5GT/s" : (sc->devinfo.pcie_link_speed == 2) ? "5.0GT/s" : (sc->devinfo.pcie_link_speed == 4) ? "8.0GT/s" : "???GT/s"), sc->devinfo.pcie_link_width); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pci_link", CTLFLAG_RD, &sc->pci_link_str, 0, "pci link status"); sc->debug = bxe_debug; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "debug", CTLFLAG_RW, &sc->debug, 0, "debug logging mode"); sc->rx_budget = bxe_rx_budget; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_budget", CTLFLAG_RW, &sc->rx_budget, 0, "rx processing budget"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "state", CTLTYPE_UINT | CTLFLAG_RW, sc, 0, bxe_sysctl_state, "IU", "dump driver state"); for (i = 0; i < BXE_NUM_ETH_STATS; i++) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, bxe_eth_stats_arr[i].string, CTLTYPE_U64 | CTLFLAG_RD, sc, i, bxe_sysctl_eth_stat, "LU", bxe_eth_stats_arr[i].string); } /* add a new parent node for all queues "dev.bxe.#.queue" */ queue_top = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "queue", CTLFLAG_RD, NULL, "queue"); queue_top_children = SYSCTL_CHILDREN(queue_top); for (i = 0; i < sc->num_queues; i++) { /* add a new parent node for a single queue "dev.bxe.#.queue.#" */ snprintf(queue_num_buf, sizeof(queue_num_buf), "%d", i); queue = SYSCTL_ADD_NODE(ctx, queue_top_children, OID_AUTO, queue_num_buf, CTLFLAG_RD, NULL, "single queue"); queue_children = SYSCTL_CHILDREN(queue); for (j = 0; j < BXE_NUM_ETH_Q_STATS; j++) { q_stat = ((i << 16) | j); SYSCTL_ADD_PROC(ctx, queue_children, OID_AUTO, bxe_eth_q_stats_arr[j].string, CTLTYPE_U64 | CTLFLAG_RD, sc, q_stat, bxe_sysctl_eth_q_stat, "LU", bxe_eth_q_stats_arr[j].string); } } } /* * Device attach function. * * Allocates device resources, performs secondary chip identification, and * initializes driver instance variables. This function is called from driver * load after a successful probe. * * Returns: * 0 = Success, >0 = Failure */ static int bxe_attach(device_t dev) { struct bxe_softc *sc; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting attach...\n"); sc->state = BXE_STATE_CLOSED; sc->dev = dev; sc->unit = device_get_unit(dev); BLOGD(sc, DBG_LOAD, "softc = %p\n", sc); sc->pcie_bus = pci_get_bus(dev); sc->pcie_device = pci_get_slot(dev); sc->pcie_func = pci_get_function(dev); /* enable bus master capability */ pci_enable_busmaster(dev); /* get the BARs */ if (bxe_allocate_bars(sc) != 0) { return (ENXIO); } /* initialize the mutexes */ bxe_init_mutexes(sc); /* prepare the periodic callout */ callout_init(&sc->periodic_callout, 0); /* prepare the chip taskqueue */ sc->chip_tq_flags = CHIP_TQ_NONE; snprintf(sc->chip_tq_name, sizeof(sc->chip_tq_name), "bxe%d_chip_tq", sc->unit); TASK_INIT(&sc->chip_tq_task, 0, bxe_handle_chip_tq, sc); sc->chip_tq = taskqueue_create(sc->chip_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->chip_tq); taskqueue_start_threads(&sc->chip_tq, 1, PWAIT, /* lower priority */ "%s", sc->chip_tq_name); /* get device info and set params */ if (bxe_get_device_info(sc) != 0) { BLOGE(sc, "getting device info\n"); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* get final misc params */ bxe_get_params(sc); /* set the default MTU (changed via ifconfig) */ sc->mtu = ETHERMTU; bxe_set_modes_bitmap(sc); /* XXX * If in AFEX mode and the function is configured for FCoE * then bail... no L2 allowed. */ /* get phy settings from shmem and 'and' against admin settings */ bxe_get_phy_info(sc); /* initialize the FreeBSD ifnet interface */ if (bxe_init_ifnet(sc) != 0) { bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate device interrupts */ if (bxe_interrupt_alloc(sc) != 0) { if (sc->ifnet != NULL) { ether_ifdetach(sc->ifnet); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate ilt */ if (bxe_alloc_ilt_mem(sc) != 0) { bxe_interrupt_free(sc); if (sc->ifnet != NULL) { ether_ifdetach(sc->ifnet); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate the host hardware/software hsi structures */ if (bxe_alloc_hsi_mem(sc) != 0) { bxe_free_ilt_mem(sc); bxe_interrupt_free(sc); if (sc->ifnet != NULL) { ether_ifdetach(sc->ifnet); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* need to reset chip if UNDI was active */ if (IS_PF(sc) && !BXE_NOMCP(sc)) { /* init fw_seq */ sc->fw_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); BLOGD(sc, DBG_LOAD, "prev unload fw_seq 0x%04x\n", sc->fw_seq); bxe_prev_unload(sc); } #if 1 /* XXX */ bxe_dcbx_set_state(sc, FALSE, BXE_DCBX_ENABLED_OFF); #else if (SHMEM2_HAS(sc, dcbx_lldp_params_offset) && SHMEM2_HAS(sc, dcbx_lldp_dcbx_stat_offset) && SHMEM2_RD(sc, dcbx_lldp_params_offset) && SHMEM2_RD(sc, dcbx_lldp_dcbx_stat_offset)) { bxe_dcbx_set_state(sc, TRUE, BXE_DCBX_ENABLED_ON_NEG_ON); bxe_dcbx_init_params(sc); } else { bxe_dcbx_set_state(sc, FALSE, BXE_DCBX_ENABLED_OFF); } #endif /* calculate qm_cid_count */ sc->qm_cid_count = bxe_set_qm_cid_count(sc); BLOGD(sc, DBG_LOAD, "qm_cid_count=%d\n", sc->qm_cid_count); sc->max_cos = 1; bxe_init_multi_cos(sc); bxe_add_sysctls(sc); return (0); } /* * Device detach function. * * Stops the controller, resets the controller, and releases resources. * * Returns: * 0 = Success, >0 = Failure */ static int bxe_detach(device_t dev) { struct bxe_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting detach...\n"); ifp = sc->ifnet; if (ifp != NULL && ifp->if_vlantrunk != NULL) { BLOGE(sc, "Cannot detach while VLANs are in use.\n"); return(EBUSY); } /* stop the periodic callout */ bxe_periodic_stop(sc); /* stop the chip taskqueue */ atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_NONE); if (sc->chip_tq) { taskqueue_drain(sc->chip_tq, &sc->chip_tq_task); taskqueue_free(sc->chip_tq); sc->chip_tq = NULL; } /* stop and reset the controller if it was open */ if (sc->state != BXE_STATE_CLOSED) { BXE_CORE_LOCK(sc); bxe_nic_unload(sc, UNLOAD_CLOSE, TRUE); BXE_CORE_UNLOCK(sc); } /* release the network interface */ if (ifp != NULL) { ether_ifdetach(ifp); } ifmedia_removeall(&sc->ifmedia); /* XXX do the following based on driver state... */ /* free the host hardware/software hsi structures */ bxe_free_hsi_mem(sc); /* free ilt */ bxe_free_ilt_mem(sc); /* release the interrupts */ bxe_interrupt_free(sc); /* Release the mutexes*/ bxe_release_mutexes(sc); /* Release the PCIe BAR mapped memory */ bxe_deallocate_bars(sc); /* Release the FreeBSD interface. */ if (sc->ifnet != NULL) { if_free(sc->ifnet); } pci_disable_busmaster(dev); return (0); } /* * Device shutdown function. * * Stops and resets the controller. * * Returns: * Nothing */ static int bxe_shutdown(device_t dev) { struct bxe_softc *sc; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting shutdown...\n"); /* stop the periodic callout */ bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_nic_unload(sc, UNLOAD_NORMAL, FALSE); BXE_CORE_UNLOCK(sc); return (0); } void bxe_igu_ack_sb(struct bxe_softc *sc, uint8_t igu_sb_id, uint8_t segment, uint16_t index, uint8_t op, uint8_t update) { uint32_t igu_addr = sc->igu_base_addr; igu_addr += (IGU_CMD_INT_ACK_BASE + igu_sb_id)*8; bxe_igu_ack_sb_gen(sc, igu_sb_id, segment, index, op, update, igu_addr); } static void bxe_igu_clear_sb_gen(struct bxe_softc *sc, uint8_t func, uint8_t idu_sb_id, uint8_t is_pf) { uint32_t data, ctl, cnt = 100; uint32_t igu_addr_data = IGU_REG_COMMAND_REG_32LSB_DATA; uint32_t igu_addr_ctl = IGU_REG_COMMAND_REG_CTRL; uint32_t igu_addr_ack = IGU_REG_CSTORM_TYPE_0_SB_CLEANUP + (idu_sb_id/32)*4; uint32_t sb_bit = 1 << (idu_sb_id%32); uint32_t func_encode = func | (is_pf ? 1 : 0) << IGU_FID_ENCODE_IS_PF_SHIFT; uint32_t addr_encode = IGU_CMD_E2_PROD_UPD_BASE + idu_sb_id; /* Not supported in BC mode */ if (CHIP_INT_MODE_IS_BC(sc)) { return; } data = ((IGU_USE_REGISTER_cstorm_type_0_sb_cleanup << IGU_REGULAR_CLEANUP_TYPE_SHIFT) | IGU_REGULAR_CLEANUP_SET | IGU_REGULAR_BCLEANUP); ctl = ((addr_encode << IGU_CTRL_REG_ADDRESS_SHIFT) | (func_encode << IGU_CTRL_REG_FID_SHIFT) | (IGU_CTRL_CMD_TYPE_WR << IGU_CTRL_REG_TYPE_SHIFT)); BLOGD(sc, DBG_LOAD, "write 0x%08x to IGU(via GRC) addr 0x%x\n", data, igu_addr_data); REG_WR(sc, igu_addr_data, data); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); BLOGD(sc, DBG_LOAD, "write 0x%08x to IGU(via GRC) addr 0x%x\n", ctl, igu_addr_ctl); REG_WR(sc, igu_addr_ctl, ctl); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); /* wait for clean up to finish */ while (!(REG_RD(sc, igu_addr_ack) & sb_bit) && --cnt) { DELAY(20000); } if (!(REG_RD(sc, igu_addr_ack) & sb_bit)) { BLOGD(sc, DBG_LOAD, "Unable to finish IGU cleanup: " "idu_sb_id %d offset %d bit %d (cnt %d)\n", idu_sb_id, idu_sb_id/32, idu_sb_id%32, cnt); } } static void bxe_igu_clear_sb(struct bxe_softc *sc, uint8_t idu_sb_id) { bxe_igu_clear_sb_gen(sc, SC_FUNC(sc), idu_sb_id, TRUE /*PF*/); } /*******************/ /* ECORE CALLBACKS */ /*******************/ static void bxe_reset_common(struct bxe_softc *sc) { uint32_t val = 0x1400; /* reset_common */ REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR), 0xd3ffff7f); if (CHIP_IS_E3(sc)) { val |= MISC_REGISTERS_RESET_REG_2_MSTAT0; val |= MISC_REGISTERS_RESET_REG_2_MSTAT1; } REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR), val); } static void bxe_common_init_phy(struct bxe_softc *sc) { uint32_t shmem_base[2]; uint32_t shmem2_base[2]; /* Avoid common init in case MFW supports LFA */ if (SHMEM2_RD(sc, size) > (uint32_t)offsetof(struct shmem2_region, lfa_host_addr[SC_PORT(sc)])) { return; } shmem_base[0] = sc->devinfo.shmem_base; shmem2_base[0] = sc->devinfo.shmem2_base; if (!CHIP_IS_E1x(sc)) { shmem_base[1] = SHMEM2_RD(sc, other_shmem_base_addr); shmem2_base[1] = SHMEM2_RD(sc, other_shmem2_base_addr); } BXE_PHY_LOCK(sc); elink_common_init_phy(sc, shmem_base, shmem2_base, sc->devinfo.chip_id, 0); BXE_PHY_UNLOCK(sc); } static void bxe_pf_disable(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); val &= ~IGU_PF_CONF_FUNC_EN; REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); REG_WR(sc, CFC_REG_WEAK_ENABLE_PF, 0); } static void bxe_init_pxp(struct bxe_softc *sc) { uint16_t devctl; int r_order, w_order; devctl = bxe_pcie_capability_read(sc, PCIR_EXPRESS_DEVICE_CTL, 2); BLOGD(sc, DBG_LOAD, "read 0x%08x from devctl\n", devctl); w_order = ((devctl & PCIM_EXP_CTL_MAX_PAYLOAD) >> 5); if (sc->mrrs == -1) { r_order = ((devctl & PCIM_EXP_CTL_MAX_READ_REQUEST) >> 12); } else { BLOGD(sc, DBG_LOAD, "forcing read order to %d\n", sc->mrrs); r_order = sc->mrrs; } ecore_init_pxp_arb(sc, r_order, w_order); } static uint32_t bxe_get_pretend_reg(struct bxe_softc *sc) { uint32_t base = PXP2_REG_PGL_PRETEND_FUNC_F0; uint32_t stride = (PXP2_REG_PGL_PRETEND_FUNC_F1 - base); return (base + (SC_ABS_FUNC(sc)) * stride); } /* * Called only on E1H or E2. * When pretending to be PF, the pretend value is the function number 0..7. * When pretending to be VF, the pretend val is the PF-num:VF-valid:ABS-VFID * combination. */ static int bxe_pretend_func(struct bxe_softc *sc, uint16_t pretend_func_val) { uint32_t pretend_reg; if (CHIP_IS_E1H(sc) && (pretend_func_val > E1H_FUNC_MAX)) { return (-1); } /* get my own pretend register */ pretend_reg = bxe_get_pretend_reg(sc); REG_WR(sc, pretend_reg, pretend_func_val); REG_RD(sc, pretend_reg); return (0); } static void bxe_iov_init_dmae(struct bxe_softc *sc) { return; #if 0 BLOGD(sc, DBG_LOAD, "SRIOV is %s\n", IS_SRIOV(sc) ? "ON" : "OFF"); if (!IS_SRIOV(sc)) { return; } REG_WR(sc, DMAE_REG_BACKWARD_COMP_EN, 0); #endif } #if 0 static int bxe_iov_init_ilt(struct bxe_softc *sc, uint16_t line) { return (line); #if 0 int i; struct ecore_ilt* ilt = sc->ilt; if (!IS_SRIOV(sc)) { return (line); } /* set vfs ilt lines */ for (i = 0; i < BXE_VF_CIDS/ILT_PAGE_CIDS ; i++) { struct hw_dma *hw_cxt = SC_VF_CXT_PAGE(sc,i); ilt->lines[line+i].page = hw_cxt->addr; ilt->lines[line+i].page_mapping = hw_cxt->mapping; ilt->lines[line+i].size = hw_cxt->size; /* doesn't matter */ } return (line+i); #endif } #endif static void bxe_iov_init_dq(struct bxe_softc *sc) { return; #if 0 if (!IS_SRIOV(sc)) { return; } /* Set the DQ such that the CID reflect the abs_vfid */ REG_WR(sc, DORQ_REG_VF_NORM_VF_BASE, 0); REG_WR(sc, DORQ_REG_MAX_RVFID_SIZE, ilog2(BNX2X_MAX_NUM_OF_VFS)); /* * Set VFs starting CID. If its > 0 the preceding CIDs are belong to * the PF L2 queues */ REG_WR(sc, DORQ_REG_VF_NORM_CID_BASE, BNX2X_FIRST_VF_CID); /* The VF window size is the log2 of the max number of CIDs per VF */ REG_WR(sc, DORQ_REG_VF_NORM_CID_WND_SIZE, BNX2X_VF_CID_WND); /* * The VF doorbell size 0 - *B, 4 - 128B. We set it here to match * the Pf doorbell size although the 2 are independent. */ REG_WR(sc, DORQ_REG_VF_NORM_CID_OFST, BNX2X_DB_SHIFT - BNX2X_DB_MIN_SHIFT); /* * No security checks for now - * configure single rule (out of 16) mask = 0x1, value = 0x0, * CID range 0 - 0x1ffff */ REG_WR(sc, DORQ_REG_VF_TYPE_MASK_0, 1); REG_WR(sc, DORQ_REG_VF_TYPE_VALUE_0, 0); REG_WR(sc, DORQ_REG_VF_TYPE_MIN_MCID_0, 0); REG_WR(sc, DORQ_REG_VF_TYPE_MAX_MCID_0, 0x1ffff); /* set the number of VF alllowed doorbells to the full DQ range */ REG_WR(sc, DORQ_REG_VF_NORM_MAX_CID_COUNT, 0x20000); /* set the VF doorbell threshold */ REG_WR(sc, DORQ_REG_VF_USAGE_CT_LIMIT, 4); #endif } /* send a NIG loopback debug packet */ static void bxe_lb_pckt(struct bxe_softc *sc) { uint32_t wb_write[3]; /* Ethernet source and destination addresses */ wb_write[0] = 0x55555555; wb_write[1] = 0x55555555; wb_write[2] = 0x20; /* SOP */ REG_WR_DMAE(sc, NIG_REG_DEBUG_PACKET_LB, wb_write, 3); /* NON-IP protocol */ wb_write[0] = 0x09000000; wb_write[1] = 0x55555555; wb_write[2] = 0x10; /* EOP, eop_bvalid = 0 */ REG_WR_DMAE(sc, NIG_REG_DEBUG_PACKET_LB, wb_write, 3); } /* * Some of the internal memories are not directly readable from the driver. * To test them we send debug packets. */ static int bxe_int_mem_test(struct bxe_softc *sc) { int factor; int count, i; uint32_t val = 0; if (CHIP_REV_IS_FPGA(sc)) { factor = 120; } else if (CHIP_REV_IS_EMUL(sc)) { factor = 200; } else { factor = 1; } /* disable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x0); REG_WR(sc, TCM_REG_PRS_IFEN, 0x0); REG_WR(sc, CFC_REG_DEBUG0, 0x1); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x0); /* write 0 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x0); /* send Ethernet packet */ bxe_lb_pckt(sc); /* TODO do i reset NIG statistic? */ /* Wait until NIG register shows 1 packet of size 0x10 */ count = 1000 * factor; while (count) { bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); if (val == 0x10) { break; } DELAY(10000); count--; } if (val != 0x10) { BLOGE(sc, "NIG timeout val=0x%x\n", val); return (-1); } /* wait until PRS register shows 1 packet */ count = (1000 * factor); while (count) { val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val == 1) { break; } DELAY(10000); count--; } if (val != 0x1) { BLOGE(sc, "PRS timeout val=0x%x\n", val); return (-2); } /* Reset and init BRB, PRS */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x03); DELAY(50000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03); DELAY(50000); ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); /* Disable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x0); REG_WR(sc, TCM_REG_PRS_IFEN, 0x0); REG_WR(sc, CFC_REG_DEBUG0, 0x1); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x0); /* Write 0 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x0); /* send 10 Ethernet packets */ for (i = 0; i < 10; i++) { bxe_lb_pckt(sc); } /* Wait until NIG register shows 10+1 packets of size 11*0x10 = 0xb0 */ count = (1000 * factor); while (count) { bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); if (val == 0xb0) { break; } DELAY(10000); count--; } if (val != 0xb0) { BLOGE(sc, "NIG timeout val=0x%x\n", val); return (-3); } /* Wait until PRS register shows 2 packets */ val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val != 2) { BLOGE(sc, "PRS timeout val=0x%x\n", val); } /* Write 1 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x1); /* Wait until PRS register shows 3 packets */ DELAY(10000 * factor); /* Wait until NIG register shows 1 packet of size 0x10 */ val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val != 3) { BLOGE(sc, "PRS timeout val=0x%x\n", val); } /* clear NIG EOP FIFO */ for (i = 0; i < 11; i++) { REG_RD(sc, NIG_REG_INGRESS_EOP_LB_FIFO); } val = REG_RD(sc, NIG_REG_INGRESS_EOP_LB_EMPTY); if (val != 1) { BLOGE(sc, "clear of NIG failed\n"); return (-4); } /* Reset and init BRB, PRS, NIG */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x03); DELAY(50000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03); DELAY(50000); ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); if (!CNIC_SUPPORT(sc)) { /* set NIC mode */ REG_WR(sc, PRS_REG_NIC_MODE, 1); } /* Enable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x7fffffff); REG_WR(sc, TCM_REG_PRS_IFEN, 0x1); REG_WR(sc, CFC_REG_DEBUG0, 0x0); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x1); return (0); } static void bxe_setup_fan_failure_detection(struct bxe_softc *sc) { int is_required; uint32_t val; int port; is_required = 0; val = (SHMEM_RD(sc, dev_info.shared_hw_config.config2) & SHARED_HW_CFG_FAN_FAILURE_MASK); if (val == SHARED_HW_CFG_FAN_FAILURE_ENABLED) { is_required = 1; } /* * The fan failure mechanism is usually related to the PHY type since * the power consumption of the board is affected by the PHY. Currently, * fan is required for most designs with SFX7101, BCM8727 and BCM8481. */ else if (val == SHARED_HW_CFG_FAN_FAILURE_PHY_TYPE) { for (port = PORT_0; port < PORT_MAX; port++) { is_required |= elink_fan_failure_det_req(sc, sc->devinfo.shmem_base, sc->devinfo.shmem2_base, port); } } BLOGD(sc, DBG_LOAD, "fan detection setting: %d\n", is_required); if (is_required == 0) { return; } /* Fan failure is indicated by SPIO 5 */ bxe_set_spio(sc, MISC_SPIO_SPIO5, MISC_SPIO_INPUT_HI_Z); /* set to active low mode */ val = REG_RD(sc, MISC_REG_SPIO_INT); val |= (MISC_SPIO_SPIO5 << MISC_SPIO_INT_OLD_SET_POS); REG_WR(sc, MISC_REG_SPIO_INT, val); /* enable interrupt to signal the IGU */ val = REG_RD(sc, MISC_REG_SPIO_EVENT_EN); val |= MISC_SPIO_SPIO5; REG_WR(sc, MISC_REG_SPIO_EVENT_EN, val); } static void bxe_enable_blocks_attention(struct bxe_softc *sc) { uint32_t val; REG_WR(sc, PXP_REG_PXP_INT_MASK_0, 0); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PXP_REG_PXP_INT_MASK_1, 0x40); } else { REG_WR(sc, PXP_REG_PXP_INT_MASK_1, 0); } REG_WR(sc, DORQ_REG_DORQ_INT_MASK, 0); REG_WR(sc, CFC_REG_CFC_INT_MASK, 0); /* * mask read length error interrupts in brb for parser * (parsing unit and 'checksum and crc' unit) * these errors are legal (PU reads fixed length and CAC can cause * read length error on truncated packets) */ REG_WR(sc, BRB1_REG_BRB1_INT_MASK, 0xFC00); REG_WR(sc, QM_REG_QM_INT_MASK, 0); REG_WR(sc, TM_REG_TM_INT_MASK, 0); REG_WR(sc, XSDM_REG_XSDM_INT_MASK_0, 0); REG_WR(sc, XSDM_REG_XSDM_INT_MASK_1, 0); REG_WR(sc, XCM_REG_XCM_INT_MASK, 0); /* REG_WR(sc, XSEM_REG_XSEM_INT_MASK_0, 0); */ /* REG_WR(sc, XSEM_REG_XSEM_INT_MASK_1, 0); */ REG_WR(sc, USDM_REG_USDM_INT_MASK_0, 0); REG_WR(sc, USDM_REG_USDM_INT_MASK_1, 0); REG_WR(sc, UCM_REG_UCM_INT_MASK, 0); /* REG_WR(sc, USEM_REG_USEM_INT_MASK_0, 0); */ /* REG_WR(sc, USEM_REG_USEM_INT_MASK_1, 0); */ REG_WR(sc, GRCBASE_UPB + PB_REG_PB_INT_MASK, 0); REG_WR(sc, CSDM_REG_CSDM_INT_MASK_0, 0); REG_WR(sc, CSDM_REG_CSDM_INT_MASK_1, 0); REG_WR(sc, CCM_REG_CCM_INT_MASK, 0); /* REG_WR(sc, CSEM_REG_CSEM_INT_MASK_0, 0); */ /* REG_WR(sc, CSEM_REG_CSEM_INT_MASK_1, 0); */ val = (PXP2_PXP2_INT_MASK_0_REG_PGL_CPL_AFT | PXP2_PXP2_INT_MASK_0_REG_PGL_CPL_OF | PXP2_PXP2_INT_MASK_0_REG_PGL_PCIE_ATTN); if (!CHIP_IS_E1x(sc)) { val |= (PXP2_PXP2_INT_MASK_0_REG_PGL_READ_BLOCKED | PXP2_PXP2_INT_MASK_0_REG_PGL_WRITE_BLOCKED); } REG_WR(sc, PXP2_REG_PXP2_INT_MASK_0, val); REG_WR(sc, TSDM_REG_TSDM_INT_MASK_0, 0); REG_WR(sc, TSDM_REG_TSDM_INT_MASK_1, 0); REG_WR(sc, TCM_REG_TCM_INT_MASK, 0); /* REG_WR(sc, TSEM_REG_TSEM_INT_MASK_0, 0); */ if (!CHIP_IS_E1x(sc)) { /* enable VFC attentions: bits 11 and 12, bits 31:13 reserved */ REG_WR(sc, TSEM_REG_TSEM_INT_MASK_1, 0x07ff); } REG_WR(sc, CDU_REG_CDU_INT_MASK, 0); REG_WR(sc, DMAE_REG_DMAE_INT_MASK, 0); /* REG_WR(sc, MISC_REG_MISC_INT_MASK, 0); */ REG_WR(sc, PBF_REG_PBF_INT_MASK, 0x18); /* bit 3,4 masked */ } /** * bxe_init_hw_common - initialize the HW at the COMMON phase. * * @sc: driver handle */ static int bxe_init_hw_common(struct bxe_softc *sc) { uint8_t abs_func_id; uint32_t val; BLOGD(sc, DBG_LOAD, "starting common init for func %d\n", SC_ABS_FUNC(sc)); /* * take the RESET lock to protect undi_unload flow from accessing * registers while we are resetting the chip */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RESET); bxe_reset_common(sc); REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET), 0xffffffff); val = 0xfffc; if (CHIP_IS_E3(sc)) { val |= MISC_REGISTERS_RESET_REG_2_MSTAT0; val |= MISC_REGISTERS_RESET_REG_2_MSTAT1; } REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET), val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); ecore_init_block(sc, BLOCK_MISC, PHASE_COMMON); BLOGD(sc, DBG_LOAD, "after misc block init\n"); if (!CHIP_IS_E1x(sc)) { /* * 4-port mode or 2-port mode we need to turn off master-enable for * everyone. After that we turn it back on for self. So, we disregard * multi-function, and always disable all functions on the given path, * this means 0,2,4,6 for path 0 and 1,3,5,7 for path 1 */ for (abs_func_id = SC_PATH(sc); abs_func_id < (E2_FUNC_MAX * 2); abs_func_id += 2) { if (abs_func_id == SC_ABS_FUNC(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); continue; } bxe_pretend_func(sc, abs_func_id); /* clear pf enable */ bxe_pf_disable(sc); bxe_pretend_func(sc, SC_ABS_FUNC(sc)); } } BLOGD(sc, DBG_LOAD, "after pf disable\n"); ecore_init_block(sc, BLOCK_PXP, PHASE_COMMON); if (CHIP_IS_E1(sc)) { /* * enable HW interrupt from PXP on USDM overflow * bit 16 on INT_MASK_0 */ REG_WR(sc, PXP_REG_PXP_INT_MASK_0, 0); } ecore_init_block(sc, BLOCK_PXP2, PHASE_COMMON); bxe_init_pxp(sc); #ifdef __BIG_ENDIAN REG_WR(sc, PXP2_REG_RQ_QM_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_TM_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_SRC_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_CDU_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_DBG_ENDIAN_M, 1); /* make sure this value is 0 */ REG_WR(sc, PXP2_REG_RQ_HC_ENDIAN_M, 0); //REG_WR(sc, PXP2_REG_RD_PBF_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_QM_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_TM_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_SRC_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_CDURD_SWAP_MODE, 1); #endif ecore_ilt_init_page_size(sc, INITOP_SET); if (CHIP_REV_IS_FPGA(sc) && CHIP_IS_E1H(sc)) { REG_WR(sc, PXP2_REG_PGL_TAGS_LIMIT, 0x1); } /* let the HW do it's magic... */ DELAY(100000); /* finish PXP init */ val = REG_RD(sc, PXP2_REG_RQ_CFG_DONE); if (val != 1) { BLOGE(sc, "PXP2 CFG failed\n"); return (-1); } val = REG_RD(sc, PXP2_REG_RD_INIT_DONE); if (val != 1) { BLOGE(sc, "PXP2 RD_INIT failed\n"); return (-1); } BLOGD(sc, DBG_LOAD, "after pxp init\n"); /* * Timer bug workaround for E2 only. We need to set the entire ILT to have * entries with value "0" and valid bit on. This needs to be done by the * first PF that is loaded in a path (i.e. common phase) */ if (!CHIP_IS_E1x(sc)) { /* * In E2 there is a bug in the timers block that can cause function 6 / 7 * (i.e. vnic3) to start even if it is marked as "scan-off". * This occurs when a different function (func2,3) is being marked * as "scan-off". Real-life scenario for example: if a driver is being * load-unloaded while func6,7 are down. This will cause the timer to access * the ilt, translate to a logical address and send a request to read/write. * Since the ilt for the function that is down is not valid, this will cause * a translation error which is unrecoverable. * The Workaround is intended to make sure that when this happens nothing * fatal will occur. The workaround: * 1. First PF driver which loads on a path will: * a. After taking the chip out of reset, by using pretend, * it will write "0" to the following registers of * the other vnics. * REG_WR(pdev, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); * REG_WR(pdev, CFC_REG_WEAK_ENABLE_PF,0); * REG_WR(pdev, CFC_REG_STRONG_ENABLE_PF,0); * And for itself it will write '1' to * PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER to enable * dmae-operations (writing to pram for example.) * note: can be done for only function 6,7 but cleaner this * way. * b. Write zero+valid to the entire ILT. * c. Init the first_timers_ilt_entry, last_timers_ilt_entry of * VNIC3 (of that port). The range allocated will be the * entire ILT. This is needed to prevent ILT range error. * 2. Any PF driver load flow: * a. ILT update with the physical addresses of the allocated * logical pages. * b. Wait 20msec. - note that this timeout is needed to make * sure there are no requests in one of the PXP internal * queues with "old" ILT addresses. * c. PF enable in the PGLC. * d. Clear the was_error of the PF in the PGLC. (could have * occurred while driver was down) * e. PF enable in the CFC (WEAK + STRONG) * f. Timers scan enable * 3. PF driver unload flow: * a. Clear the Timers scan_en. * b. Polling for scan_on=0 for that PF. * c. Clear the PF enable bit in the PXP. * d. Clear the PF enable in the CFC (WEAK + STRONG) * e. Write zero+valid to all ILT entries (The valid bit must * stay set) * f. If this is VNIC 3 of a port then also init * first_timers_ilt_entry to zero and last_timers_ilt_entry * to the last enrty in the ILT. * * Notes: * Currently the PF error in the PGLC is non recoverable. * In the future the there will be a recovery routine for this error. * Currently attention is masked. * Having an MCP lock on the load/unload process does not guarantee that * there is no Timer disable during Func6/7 enable. This is because the * Timers scan is currently being cleared by the MCP on FLR. * Step 2.d can be done only for PF6/7 and the driver can also check if * there is error before clearing it. But the flow above is simpler and * more general. * All ILT entries are written by zero+valid and not just PF6/7 * ILT entries since in the future the ILT entries allocation for * PF-s might be dynamic. */ struct ilt_client_info ilt_cli; struct ecore_ilt ilt; memset(&ilt_cli, 0, sizeof(struct ilt_client_info)); memset(&ilt, 0, sizeof(struct ecore_ilt)); /* initialize dummy TM client */ ilt_cli.start = 0; ilt_cli.end = ILT_NUM_PAGE_ENTRIES - 1; ilt_cli.client_num = ILT_CLIENT_TM; /* * Step 1: set zeroes to all ilt page entries with valid bit on * Step 2: set the timers first/last ilt entry to point * to the entire range to prevent ILT range error for 3rd/4th * vnic (this code assumes existence of the vnic) * * both steps performed by call to ecore_ilt_client_init_op() * with dummy TM client * * we must use pretend since PXP2_REG_RQ_##blk##_FIRST_ILT * and his brother are split registers */ bxe_pretend_func(sc, (SC_PATH(sc) + 6)); ecore_ilt_client_init_op_ilt(sc, &ilt, &ilt_cli, INITOP_CLEAR); bxe_pretend_func(sc, SC_ABS_FUNC(sc)); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN, BXE_PXP_DRAM_ALIGN); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN_RD, BXE_PXP_DRAM_ALIGN); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN_SEL, 1); } REG_WR(sc, PXP2_REG_RQ_DISABLE_INPUTS, 0); REG_WR(sc, PXP2_REG_RD_DISABLE_INPUTS, 0); if (!CHIP_IS_E1x(sc)) { int factor = CHIP_REV_IS_EMUL(sc) ? 1000 : (CHIP_REV_IS_FPGA(sc) ? 400 : 0); ecore_init_block(sc, BLOCK_PGLUE_B, PHASE_COMMON); ecore_init_block(sc, BLOCK_ATC, PHASE_COMMON); /* let the HW do it's magic... */ do { DELAY(200000); val = REG_RD(sc, ATC_REG_ATC_INIT_DONE); } while (factor-- && (val != 1)); if (val != 1) { BLOGE(sc, "ATC_INIT failed\n"); return (-1); } } BLOGD(sc, DBG_LOAD, "after pglue and atc init\n"); ecore_init_block(sc, BLOCK_DMAE, PHASE_COMMON); bxe_iov_init_dmae(sc); /* clean the DMAE memory */ sc->dmae_ready = 1; ecore_init_fill(sc, TSEM_REG_PRAM, 0, 8, 1); ecore_init_block(sc, BLOCK_TCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_UCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XCM, PHASE_COMMON); bxe_read_dmae(sc, XSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, CSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, TSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, USEM_REG_PASSIVE_BUFFER, 3); ecore_init_block(sc, BLOCK_QM, PHASE_COMMON); /* QM queues pointers table */ ecore_qm_init_ptr_table(sc, sc->qm_cid_count, INITOP_SET); /* soft reset pulse */ REG_WR(sc, QM_REG_SOFT_RESET, 1); REG_WR(sc, QM_REG_SOFT_RESET, 0); if (CNIC_SUPPORT(sc)) ecore_init_block(sc, BLOCK_TM, PHASE_COMMON); ecore_init_block(sc, BLOCK_DORQ, PHASE_COMMON); REG_WR(sc, DORQ_REG_DPM_CID_OFST, BXE_DB_SHIFT); if (!CHIP_REV_IS_SLOW(sc)) { /* enable hw interrupt from doorbell Q */ REG_WR(sc, DORQ_REG_DORQ_INT_MASK, 0); } ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); REG_WR(sc, PRS_REG_A_PRSU_20, 0xf); if (!CHIP_IS_E1(sc)) { REG_WR(sc, PRS_REG_E1HOV_MODE, sc->devinfo.mf_info.path_has_ovlan); } if (!CHIP_IS_E1x(sc) && !CHIP_IS_E3B0(sc)) { if (IS_MF_AFEX(sc)) { /* * configure that AFEX and VLAN headers must be * received in AFEX mode */ REG_WR(sc, PRS_REG_HDRS_AFTER_BASIC, 0xE); REG_WR(sc, PRS_REG_MUST_HAVE_HDRS, 0xA); REG_WR(sc, PRS_REG_HDRS_AFTER_TAG_0, 0x6); REG_WR(sc, PRS_REG_TAG_ETHERTYPE_0, 0x8926); REG_WR(sc, PRS_REG_TAG_LEN_0, 0x4); } else { /* * Bit-map indicating which L2 hdrs may appear * after the basic Ethernet header */ REG_WR(sc, PRS_REG_HDRS_AFTER_BASIC, sc->devinfo.mf_info.path_has_ovlan ? 7 : 6); } } ecore_init_block(sc, BLOCK_TSDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CSDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_USDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XSDM, PHASE_COMMON); if (!CHIP_IS_E1x(sc)) { /* reset VFC memories */ REG_WR(sc, TSEM_REG_FAST_MEMORY + VFC_REG_MEMORIES_RST, VFC_MEMORIES_RST_REG_CAM_RST | VFC_MEMORIES_RST_REG_RAM_RST); REG_WR(sc, XSEM_REG_FAST_MEMORY + VFC_REG_MEMORIES_RST, VFC_MEMORIES_RST_REG_CAM_RST | VFC_MEMORIES_RST_REG_RAM_RST); DELAY(20000); } ecore_init_block(sc, BLOCK_TSEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_USEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CSEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XSEM, PHASE_COMMON); /* sync semi rtc */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x80000000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x80000000); ecore_init_block(sc, BLOCK_UPB, PHASE_COMMON); ecore_init_block(sc, BLOCK_XPB, PHASE_COMMON); ecore_init_block(sc, BLOCK_PBF, PHASE_COMMON); if (!CHIP_IS_E1x(sc)) { if (IS_MF_AFEX(sc)) { /* * configure that AFEX and VLAN headers must be * sent in AFEX mode */ REG_WR(sc, PBF_REG_HDRS_AFTER_BASIC, 0xE); REG_WR(sc, PBF_REG_MUST_HAVE_HDRS, 0xA); REG_WR(sc, PBF_REG_HDRS_AFTER_TAG_0, 0x6); REG_WR(sc, PBF_REG_TAG_ETHERTYPE_0, 0x8926); REG_WR(sc, PBF_REG_TAG_LEN_0, 0x4); } else { REG_WR(sc, PBF_REG_HDRS_AFTER_BASIC, sc->devinfo.mf_info.path_has_ovlan ? 7 : 6); } } REG_WR(sc, SRC_REG_SOFT_RST, 1); ecore_init_block(sc, BLOCK_SRC, PHASE_COMMON); if (CNIC_SUPPORT(sc)) { REG_WR(sc, SRC_REG_KEYSEARCH_0, 0x63285672); REG_WR(sc, SRC_REG_KEYSEARCH_1, 0x24b8f2cc); REG_WR(sc, SRC_REG_KEYSEARCH_2, 0x223aef9b); REG_WR(sc, SRC_REG_KEYSEARCH_3, 0x26001e3a); REG_WR(sc, SRC_REG_KEYSEARCH_4, 0x7ae91116); REG_WR(sc, SRC_REG_KEYSEARCH_5, 0x5ce5230b); REG_WR(sc, SRC_REG_KEYSEARCH_6, 0x298d8adf); REG_WR(sc, SRC_REG_KEYSEARCH_7, 0x6eb0ff09); REG_WR(sc, SRC_REG_KEYSEARCH_8, 0x1830f82f); REG_WR(sc, SRC_REG_KEYSEARCH_9, 0x01e46be7); } REG_WR(sc, SRC_REG_SOFT_RST, 0); if (sizeof(union cdu_context) != 1024) { /* we currently assume that a context is 1024 bytes */ BLOGE(sc, "please adjust the size of cdu_context(%ld)\n", (long)sizeof(union cdu_context)); } ecore_init_block(sc, BLOCK_CDU, PHASE_COMMON); val = (4 << 24) + (0 << 12) + 1024; REG_WR(sc, CDU_REG_CDU_GLOBAL_PARAMS, val); ecore_init_block(sc, BLOCK_CFC, PHASE_COMMON); REG_WR(sc, CFC_REG_INIT_REG, 0x7FF); /* enable context validation interrupt from CFC */ REG_WR(sc, CFC_REG_CFC_INT_MASK, 0); /* set the thresholds to prevent CFC/CDU race */ REG_WR(sc, CFC_REG_DEBUG0, 0x20020000); ecore_init_block(sc, BLOCK_HC, PHASE_COMMON); if (!CHIP_IS_E1x(sc) && BXE_NOMCP(sc)) { REG_WR(sc, IGU_REG_RESET_MEMORIES, 0x36); } ecore_init_block(sc, BLOCK_IGU, PHASE_COMMON); ecore_init_block(sc, BLOCK_MISC_AEU, PHASE_COMMON); /* Reset PCIE errors for debug */ REG_WR(sc, 0x2814, 0xffffffff); REG_WR(sc, 0x3820, 0xffffffff); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_CONTROL_5, (PXPCS_TL_CONTROL_5_ERR_UNSPPORT1 | PXPCS_TL_CONTROL_5_ERR_UNSPPORT)); REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_FUNC345_STAT, (PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT4 | PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT3 | PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT2)); REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_FUNC678_STAT, (PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT7 | PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT6 | PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT5)); } ecore_init_block(sc, BLOCK_NIG, PHASE_COMMON); if (!CHIP_IS_E1(sc)) { /* in E3 this done in per-port section */ if (!CHIP_IS_E3(sc)) REG_WR(sc, NIG_REG_LLH_MF_MODE, IS_MF(sc)); } if (CHIP_IS_E1H(sc)) { /* not applicable for E2 (and above ...) */ REG_WR(sc, NIG_REG_LLH_E1HOV_MODE, IS_MF_SD(sc)); } if (CHIP_REV_IS_SLOW(sc)) { DELAY(200000); } /* finish CFC init */ val = reg_poll(sc, CFC_REG_LL_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC LL_INIT failed\n"); return (-1); } val = reg_poll(sc, CFC_REG_AC_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC AC_INIT failed\n"); return (-1); } val = reg_poll(sc, CFC_REG_CAM_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC CAM_INIT failed\n"); return (-1); } REG_WR(sc, CFC_REG_DEBUG0, 0); if (CHIP_IS_E1(sc)) { /* read NIG statistic to see if this is our first up since powerup */ bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); /* do internal memory self test */ if ((val == 0) && bxe_int_mem_test(sc)) { BLOGE(sc, "internal mem self test failed\n"); return (-1); } } bxe_setup_fan_failure_detection(sc); /* clear PXP2 attentions */ REG_RD(sc, PXP2_REG_PXP2_INT_STS_CLR_0); bxe_enable_blocks_attention(sc); if (!CHIP_REV_IS_SLOW(sc)) { ecore_enable_blocks_parity(sc); } if (!BXE_NOMCP(sc)) { if (CHIP_IS_E1x(sc)) { bxe_common_init_phy(sc); } } return (0); } /** * bxe_init_hw_common_chip - init HW at the COMMON_CHIP phase. * * @sc: driver handle */ static int bxe_init_hw_common_chip(struct bxe_softc *sc) { int rc = bxe_init_hw_common(sc); if (rc) { return (rc); } /* In E2 2-PORT mode, same ext phy is used for the two paths */ if (!BXE_NOMCP(sc)) { bxe_common_init_phy(sc); } return (0); } static int bxe_init_hw_port(struct bxe_softc *sc) { int port = SC_PORT(sc); int init_phase = port ? PHASE_PORT1 : PHASE_PORT0; uint32_t low, high; uint32_t val; BLOGD(sc, DBG_LOAD, "starting port init for port %d\n", port); REG_WR(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port*4, 0); ecore_init_block(sc, BLOCK_MISC, init_phase); ecore_init_block(sc, BLOCK_PXP, init_phase); ecore_init_block(sc, BLOCK_PXP2, init_phase); /* * Timers bug workaround: disables the pf_master bit in pglue at * common phase, we need to enable it here before any dmae access are * attempted. Therefore we manually added the enable-master to the * port phase (it also happens in the function phase) */ if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); } ecore_init_block(sc, BLOCK_ATC, init_phase); ecore_init_block(sc, BLOCK_DMAE, init_phase); ecore_init_block(sc, BLOCK_PGLUE_B, init_phase); ecore_init_block(sc, BLOCK_QM, init_phase); ecore_init_block(sc, BLOCK_TCM, init_phase); ecore_init_block(sc, BLOCK_UCM, init_phase); ecore_init_block(sc, BLOCK_CCM, init_phase); ecore_init_block(sc, BLOCK_XCM, init_phase); /* QM cid (connection) count */ ecore_qm_init_cid_count(sc, sc->qm_cid_count, INITOP_SET); if (CNIC_SUPPORT(sc)) { ecore_init_block(sc, BLOCK_TM, init_phase); REG_WR(sc, TM_REG_LIN0_SCAN_TIME + port*4, 20); REG_WR(sc, TM_REG_LIN0_MAX_ACTIVE_CID + port*4, 31); } ecore_init_block(sc, BLOCK_DORQ, init_phase); ecore_init_block(sc, BLOCK_BRB1, init_phase); if (CHIP_IS_E1(sc) || CHIP_IS_E1H(sc)) { if (IS_MF(sc)) { low = (BXE_ONE_PORT(sc) ? 160 : 246); } else if (sc->mtu > 4096) { if (BXE_ONE_PORT(sc)) { low = 160; } else { val = sc->mtu; /* (24*1024 + val*4)/256 */ low = (96 + (val / 64) + ((val % 64) ? 1 : 0)); } } else { low = (BXE_ONE_PORT(sc) ? 80 : 160); } high = (low + 56); /* 14*1024/256 */ REG_WR(sc, BRB1_REG_PAUSE_LOW_THRESHOLD_0 + port*4, low); REG_WR(sc, BRB1_REG_PAUSE_HIGH_THRESHOLD_0 + port*4, high); } if (CHIP_IS_MODE_4_PORT(sc)) { REG_WR(sc, SC_PORT(sc) ? BRB1_REG_MAC_GUARANTIED_1 : BRB1_REG_MAC_GUARANTIED_0, 40); } ecore_init_block(sc, BLOCK_PRS, init_phase); if (CHIP_IS_E3B0(sc)) { if (IS_MF_AFEX(sc)) { /* configure headers for AFEX mode */ REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_BASIC_PORT_1 : PRS_REG_HDRS_AFTER_BASIC_PORT_0, 0xE); REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_TAG_0_PORT_1 : PRS_REG_HDRS_AFTER_TAG_0_PORT_0, 0x6); REG_WR(sc, SC_PORT(sc) ? PRS_REG_MUST_HAVE_HDRS_PORT_1 : PRS_REG_MUST_HAVE_HDRS_PORT_0, 0xA); } else { /* Ovlan exists only if we are in multi-function + * switch-dependent mode, in switch-independent there * is no ovlan headers */ REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_BASIC_PORT_1 : PRS_REG_HDRS_AFTER_BASIC_PORT_0, (sc->devinfo.mf_info.path_has_ovlan ? 7 : 6)); } } ecore_init_block(sc, BLOCK_TSDM, init_phase); ecore_init_block(sc, BLOCK_CSDM, init_phase); ecore_init_block(sc, BLOCK_USDM, init_phase); ecore_init_block(sc, BLOCK_XSDM, init_phase); ecore_init_block(sc, BLOCK_TSEM, init_phase); ecore_init_block(sc, BLOCK_USEM, init_phase); ecore_init_block(sc, BLOCK_CSEM, init_phase); ecore_init_block(sc, BLOCK_XSEM, init_phase); ecore_init_block(sc, BLOCK_UPB, init_phase); ecore_init_block(sc, BLOCK_XPB, init_phase); ecore_init_block(sc, BLOCK_PBF, init_phase); if (CHIP_IS_E1x(sc)) { /* configure PBF to work without PAUSE mtu 9000 */ REG_WR(sc, PBF_REG_P0_PAUSE_ENABLE + port*4, 0); /* update threshold */ REG_WR(sc, PBF_REG_P0_ARB_THRSH + port*4, (9040/16)); /* update init credit */ REG_WR(sc, PBF_REG_P0_INIT_CRD + port*4, (9040/16) + 553 - 22); /* probe changes */ REG_WR(sc, PBF_REG_INIT_P0 + port*4, 1); DELAY(50); REG_WR(sc, PBF_REG_INIT_P0 + port*4, 0); } if (CNIC_SUPPORT(sc)) { ecore_init_block(sc, BLOCK_SRC, init_phase); } ecore_init_block(sc, BLOCK_CDU, init_phase); ecore_init_block(sc, BLOCK_CFC, init_phase); if (CHIP_IS_E1(sc)) { REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } ecore_init_block(sc, BLOCK_HC, init_phase); ecore_init_block(sc, BLOCK_IGU, init_phase); ecore_init_block(sc, BLOCK_MISC_AEU, init_phase); /* init aeu_mask_attn_func_0/1: * - SF mode: bits 3-7 are masked. only bits 0-2 are in use * - MF mode: bit 3 is masked. bits 0-2 are in use as in SF * bits 4-7 are used for "per vn group attention" */ val = IS_MF(sc) ? 0xF7 : 0x7; /* Enable DCBX attention for all but E1 */ val |= CHIP_IS_E1(sc) ? 0 : 0x10; REG_WR(sc, MISC_REG_AEU_MASK_ATTN_FUNC_0 + port*4, val); ecore_init_block(sc, BLOCK_NIG, init_phase); if (!CHIP_IS_E1x(sc)) { /* Bit-map indicating which L2 hdrs may appear after the * basic Ethernet header */ if (IS_MF_AFEX(sc)) { REG_WR(sc, SC_PORT(sc) ? NIG_REG_P1_HDRS_AFTER_BASIC : NIG_REG_P0_HDRS_AFTER_BASIC, 0xE); } else { REG_WR(sc, SC_PORT(sc) ? NIG_REG_P1_HDRS_AFTER_BASIC : NIG_REG_P0_HDRS_AFTER_BASIC, IS_MF_SD(sc) ? 7 : 6); } if (CHIP_IS_E3(sc)) { REG_WR(sc, SC_PORT(sc) ? NIG_REG_LLH1_MF_MODE : NIG_REG_LLH_MF_MODE, IS_MF(sc)); } } if (!CHIP_IS_E3(sc)) { REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port*4, 1); } if (!CHIP_IS_E1(sc)) { /* 0x2 disable mf_ov, 0x1 enable */ REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK_MF + port*4, (IS_MF_SD(sc) ? 0x1 : 0x2)); if (!CHIP_IS_E1x(sc)) { val = 0; switch (sc->devinfo.mf_info.mf_mode) { case MULTI_FUNCTION_SD: val = 1; break; case MULTI_FUNCTION_SI: case MULTI_FUNCTION_AFEX: val = 2; break; } REG_WR(sc, (SC_PORT(sc) ? NIG_REG_LLH1_CLS_TYPE : NIG_REG_LLH0_CLS_TYPE), val); } REG_WR(sc, NIG_REG_LLFC_ENABLE_0 + port*4, 0); REG_WR(sc, NIG_REG_LLFC_OUT_EN_0 + port*4, 0); REG_WR(sc, NIG_REG_PAUSE_ENABLE_0 + port*4, 1); } /* If SPIO5 is set to generate interrupts, enable it for this port */ val = REG_RD(sc, MISC_REG_SPIO_EVENT_EN); if (val & MISC_SPIO_SPIO5) { uint32_t reg_addr = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0); val = REG_RD(sc, reg_addr); val |= AEU_INPUTS_ATTN_BITS_SPIO5; REG_WR(sc, reg_addr, val); } return (0); } static uint32_t bxe_flr_clnup_reg_poll(struct bxe_softc *sc, uint32_t reg, uint32_t expected, uint32_t poll_count) { uint32_t cur_cnt = poll_count; uint32_t val; while ((val = REG_RD(sc, reg)) != expected && cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); } return (val); } static int bxe_flr_clnup_poll_hw_counter(struct bxe_softc *sc, uint32_t reg, char *msg, uint32_t poll_cnt) { uint32_t val = bxe_flr_clnup_reg_poll(sc, reg, 0, poll_cnt); if (val != 0) { BLOGE(sc, "%s usage count=%d\n", msg, val); return (1); } return (0); } /* Common routines with VF FLR cleanup */ static uint32_t bxe_flr_clnup_poll_count(struct bxe_softc *sc) { /* adjust polling timeout */ if (CHIP_REV_IS_EMUL(sc)) { return (FLR_POLL_CNT * 2000); } if (CHIP_REV_IS_FPGA(sc)) { return (FLR_POLL_CNT * 120); } return (FLR_POLL_CNT); } static int bxe_poll_hw_usage_counters(struct bxe_softc *sc, uint32_t poll_cnt) { /* wait for CFC PF usage-counter to zero (includes all the VFs) */ if (bxe_flr_clnup_poll_hw_counter(sc, CFC_REG_NUM_LCIDS_INSIDE_PF, "CFC PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for DQ PF usage-counter to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, DORQ_REG_PF_USAGE_CNT, "DQ PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for QM PF usage-counter to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, QM_REG_PF_USG_CNT_0 + 4*SC_FUNC(sc), "QM PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for Timer PF usage-counters to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, TM_REG_LIN0_VNIC_UC + 4*SC_PORT(sc), "Timers VNIC usage counter timed out", poll_cnt)) { return (1); } if (bxe_flr_clnup_poll_hw_counter(sc, TM_REG_LIN0_NUM_SCANS + 4*SC_PORT(sc), "Timers NUM_SCANS usage counter timed out", poll_cnt)) { return (1); } /* Wait DMAE PF usage counter to zero */ if (bxe_flr_clnup_poll_hw_counter(sc, dmae_reg_go_c[INIT_DMAE_C(sc)], "DMAE dommand register timed out", poll_cnt)) { return (1); } return (0); } #define OP_GEN_PARAM(param) \ (((param) << SDM_OP_GEN_COMP_PARAM_SHIFT) & SDM_OP_GEN_COMP_PARAM) #define OP_GEN_TYPE(type) \ (((type) << SDM_OP_GEN_COMP_TYPE_SHIFT) & SDM_OP_GEN_COMP_TYPE) #define OP_GEN_AGG_VECT(index) \ (((index) << SDM_OP_GEN_AGG_VECT_IDX_SHIFT) & SDM_OP_GEN_AGG_VECT_IDX) static int bxe_send_final_clnup(struct bxe_softc *sc, uint8_t clnup_func, uint32_t poll_cnt) { uint32_t op_gen_command = 0; uint32_t comp_addr = (BAR_CSTRORM_INTMEM + CSTORM_FINAL_CLEANUP_COMPLETE_OFFSET(clnup_func)); int ret = 0; if (REG_RD(sc, comp_addr)) { BLOGE(sc, "Cleanup complete was not 0 before sending\n"); return (1); } op_gen_command |= OP_GEN_PARAM(XSTORM_AGG_INT_FINAL_CLEANUP_INDEX); op_gen_command |= OP_GEN_TYPE(XSTORM_AGG_INT_FINAL_CLEANUP_COMP_TYPE); op_gen_command |= OP_GEN_AGG_VECT(clnup_func); op_gen_command |= 1 << SDM_OP_GEN_AGG_VECT_IDX_VALID_SHIFT; BLOGD(sc, DBG_LOAD, "sending FW Final cleanup\n"); REG_WR(sc, XSDM_REG_OPERATION_GEN, op_gen_command); if (bxe_flr_clnup_reg_poll(sc, comp_addr, 1, poll_cnt) != 1) { BLOGE(sc, "FW final cleanup did not succeed\n"); BLOGD(sc, DBG_LOAD, "At timeout completion address contained %x\n", (REG_RD(sc, comp_addr))); bxe_panic(sc, ("FLR cleanup failed\n")); return (1); } /* Zero completion for nxt FLR */ REG_WR(sc, comp_addr, 0); return (ret); } static void bxe_pbf_pN_buf_flushed(struct bxe_softc *sc, struct pbf_pN_buf_regs *regs, uint32_t poll_count) { uint32_t init_crd, crd, crd_start, crd_freed, crd_freed_start; uint32_t cur_cnt = poll_count; crd_freed = crd_freed_start = REG_RD(sc, regs->crd_freed); crd = crd_start = REG_RD(sc, regs->crd); init_crd = REG_RD(sc, regs->init_crd); BLOGD(sc, DBG_LOAD, "INIT CREDIT[%d] : %x\n", regs->pN, init_crd); BLOGD(sc, DBG_LOAD, "CREDIT[%d] : s:%x\n", regs->pN, crd); BLOGD(sc, DBG_LOAD, "CREDIT_FREED[%d]: s:%x\n", regs->pN, crd_freed); while ((crd != init_crd) && ((uint32_t)((int32_t)crd_freed - (int32_t)crd_freed_start) < (init_crd - crd_start))) { if (cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); crd = REG_RD(sc, regs->crd); crd_freed = REG_RD(sc, regs->crd_freed); } else { BLOGD(sc, DBG_LOAD, "PBF tx buffer[%d] timed out\n", regs->pN); BLOGD(sc, DBG_LOAD, "CREDIT[%d] : c:%x\n", regs->pN, crd); BLOGD(sc, DBG_LOAD, "CREDIT_FREED[%d]: c:%x\n", regs->pN, crd_freed); break; } } BLOGD(sc, DBG_LOAD, "Waited %d*%d usec for PBF tx buffer[%d]\n", poll_count-cur_cnt, FLR_WAIT_INTERVAL, regs->pN); } static void bxe_pbf_pN_cmd_flushed(struct bxe_softc *sc, struct pbf_pN_cmd_regs *regs, uint32_t poll_count) { uint32_t occup, to_free, freed, freed_start; uint32_t cur_cnt = poll_count; occup = to_free = REG_RD(sc, regs->lines_occup); freed = freed_start = REG_RD(sc, regs->lines_freed); BLOGD(sc, DBG_LOAD, "OCCUPANCY[%d] : s:%x\n", regs->pN, occup); BLOGD(sc, DBG_LOAD, "LINES_FREED[%d] : s:%x\n", regs->pN, freed); while (occup && ((uint32_t)((int32_t)freed - (int32_t)freed_start) < to_free)) { if (cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); occup = REG_RD(sc, regs->lines_occup); freed = REG_RD(sc, regs->lines_freed); } else { BLOGD(sc, DBG_LOAD, "PBF cmd queue[%d] timed out\n", regs->pN); BLOGD(sc, DBG_LOAD, "OCCUPANCY[%d] : s:%x\n", regs->pN, occup); BLOGD(sc, DBG_LOAD, "LINES_FREED[%d] : s:%x\n", regs->pN, freed); break; } } BLOGD(sc, DBG_LOAD, "Waited %d*%d usec for PBF cmd queue[%d]\n", poll_count - cur_cnt, FLR_WAIT_INTERVAL, regs->pN); } static void bxe_tx_hw_flushed(struct bxe_softc *sc, uint32_t poll_count) { struct pbf_pN_cmd_regs cmd_regs[] = { {0, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_Q0 : PBF_REG_P0_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_Q0 : PBF_REG_P0_TQ_LINES_FREED_CNT}, {1, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_Q1 : PBF_REG_P1_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_Q1 : PBF_REG_P1_TQ_LINES_FREED_CNT}, {4, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_LB_Q : PBF_REG_P4_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_LB_Q : PBF_REG_P4_TQ_LINES_FREED_CNT} }; struct pbf_pN_buf_regs buf_regs[] = { {0, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_Q0 : PBF_REG_P0_INIT_CRD , (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_Q0 : PBF_REG_P0_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_Q0 : PBF_REG_P0_INTERNAL_CRD_FREED_CNT}, {1, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_Q1 : PBF_REG_P1_INIT_CRD, (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_Q1 : PBF_REG_P1_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_Q1 : PBF_REG_P1_INTERNAL_CRD_FREED_CNT}, {4, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_LB_Q : PBF_REG_P4_INIT_CRD, (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_LB_Q : PBF_REG_P4_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_LB_Q : PBF_REG_P4_INTERNAL_CRD_FREED_CNT}, }; int i; /* Verify the command queues are flushed P0, P1, P4 */ for (i = 0; i < ARRAY_SIZE(cmd_regs); i++) { bxe_pbf_pN_cmd_flushed(sc, &cmd_regs[i], poll_count); } /* Verify the transmission buffers are flushed P0, P1, P4 */ for (i = 0; i < ARRAY_SIZE(buf_regs); i++) { bxe_pbf_pN_buf_flushed(sc, &buf_regs[i], poll_count); } } static void bxe_hw_enable_status(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, CFC_REG_WEAK_ENABLE_PF); BLOGD(sc, DBG_LOAD, "CFC_REG_WEAK_ENABLE_PF is 0x%x\n", val); val = REG_RD(sc, PBF_REG_DISABLE_PF); BLOGD(sc, DBG_LOAD, "PBF_REG_DISABLE_PF is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSI_EN); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSI_EN is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_EN); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSIX_EN is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_FUNC_MASK); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSIX_FUNC_MASK is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER is 0x%x\n", val); } static int bxe_pf_flr_clnup(struct bxe_softc *sc) { uint32_t poll_cnt = bxe_flr_clnup_poll_count(sc); BLOGD(sc, DBG_LOAD, "Cleanup after FLR PF[%d]\n", SC_ABS_FUNC(sc)); /* Re-enable PF target read access */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); /* Poll HW usage counters */ BLOGD(sc, DBG_LOAD, "Polling usage counters\n"); if (bxe_poll_hw_usage_counters(sc, poll_cnt)) { return (-1); } /* Zero the igu 'trailing edge' and 'leading edge' */ /* Send the FW cleanup command */ if (bxe_send_final_clnup(sc, (uint8_t)SC_FUNC(sc), poll_cnt)) { return (-1); } /* ATC cleanup */ /* Verify TX hw is flushed */ bxe_tx_hw_flushed(sc, poll_cnt); /* Wait 100ms (not adjusted according to platform) */ DELAY(100000); /* Verify no pending pci transactions */ if (bxe_is_pcie_pending(sc)) { BLOGE(sc, "PCIE Transactions still pending\n"); } /* Debug */ bxe_hw_enable_status(sc); /* * Master enable - Due to WB DMAE writes performed before this * register is re-initialized as part of the regular function init */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); return (0); } #if 0 static void bxe_init_searcher(struct bxe_softc *sc) { int port = SC_PORT(sc); ecore_src_init_t2(sc, sc->t2, sc->t2_mapping, SRC_CONN_NUM); /* T1 hash bits value determines the T1 number of entries */ REG_WR(sc, SRC_REG_NUMBER_HASH_BITS0 + port*4, SRC_HASH_BITS); } #endif static int bxe_init_hw_func(struct bxe_softc *sc) { int port = SC_PORT(sc); int func = SC_FUNC(sc); int init_phase = PHASE_PF0 + func; struct ecore_ilt *ilt = sc->ilt; uint16_t cdu_ilt_start; uint32_t addr, val; uint32_t main_mem_base, main_mem_size, main_mem_prty_clr; int i, main_mem_width, rc; BLOGD(sc, DBG_LOAD, "starting func init for func %d\n", func); /* FLR cleanup */ if (!CHIP_IS_E1x(sc)) { rc = bxe_pf_flr_clnup(sc); if (rc) { BLOGE(sc, "FLR cleanup failed!\n"); // XXX bxe_fw_dump(sc); // XXX bxe_idle_chk(sc); return (rc); } } /* set MSI reconfigure capability */ if (sc->devinfo.int_block == INT_BLOCK_HC) { addr = (port ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0); val = REG_RD(sc, addr); val |= HC_CONFIG_0_REG_MSI_ATTN_EN_0; REG_WR(sc, addr, val); } ecore_init_block(sc, BLOCK_PXP, init_phase); ecore_init_block(sc, BLOCK_PXP2, init_phase); ilt = sc->ilt; cdu_ilt_start = ilt->clients[ILT_CLIENT_CDU].start; #if 0 if (IS_SRIOV(sc)) { cdu_ilt_start += BXE_FIRST_VF_CID/ILT_PAGE_CIDS; } cdu_ilt_start = bxe_iov_init_ilt(sc, cdu_ilt_start); #if (BXE_FIRST_VF_CID > 0) /* * If BXE_FIRST_VF_CID > 0 then the PF L2 cids precedes * those of the VFs, so start line should be reset */ cdu_ilt_start = ilt->clients[ILT_CLIENT_CDU].start; #endif #endif for (i = 0; i < L2_ILT_LINES(sc); i++) { ilt->lines[cdu_ilt_start + i].page = sc->context[i].vcxt; ilt->lines[cdu_ilt_start + i].page_mapping = sc->context[i].vcxt_dma.paddr; ilt->lines[cdu_ilt_start + i].size = sc->context[i].size; } ecore_ilt_init_op(sc, INITOP_SET); #if 0 if (!CONFIGURE_NIC_MODE(sc)) { bxe_init_searcher(sc); REG_WR(sc, PRS_REG_NIC_MODE, 0); BLOGD(sc, DBG_LOAD, "NIC MODE disabled\n"); } else #endif { /* Set NIC mode */ REG_WR(sc, PRS_REG_NIC_MODE, 1); BLOGD(sc, DBG_LOAD, "NIC MODE configured\n"); } if (!CHIP_IS_E1x(sc)) { uint32_t pf_conf = IGU_PF_CONF_FUNC_EN; /* Turn on a single ISR mode in IGU if driver is going to use * INT#x or MSI */ if (sc->interrupt_mode != INTR_MODE_MSIX) { pf_conf |= IGU_PF_CONF_SINGLE_ISR_EN; } /* * Timers workaround bug: function init part. * Need to wait 20msec after initializing ILT, * needed to make sure there are no requests in * one of the PXP internal queues with "old" ILT addresses */ DELAY(20000); /* * Master enable - Due to WB DMAE writes performed before this * register is re-initialized as part of the regular function * init */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); /* Enable the function in IGU */ REG_WR(sc, IGU_REG_PF_CONFIGURATION, pf_conf); } sc->dmae_ready = 1; ecore_init_block(sc, BLOCK_PGLUE_B, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, func); ecore_init_block(sc, BLOCK_ATC, init_phase); ecore_init_block(sc, BLOCK_DMAE, init_phase); ecore_init_block(sc, BLOCK_NIG, init_phase); ecore_init_block(sc, BLOCK_SRC, init_phase); ecore_init_block(sc, BLOCK_MISC, init_phase); ecore_init_block(sc, BLOCK_TCM, init_phase); ecore_init_block(sc, BLOCK_UCM, init_phase); ecore_init_block(sc, BLOCK_CCM, init_phase); ecore_init_block(sc, BLOCK_XCM, init_phase); ecore_init_block(sc, BLOCK_TSEM, init_phase); ecore_init_block(sc, BLOCK_USEM, init_phase); ecore_init_block(sc, BLOCK_CSEM, init_phase); ecore_init_block(sc, BLOCK_XSEM, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, QM_REG_PF_EN, 1); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, TSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, USEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, CSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, XSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); } ecore_init_block(sc, BLOCK_QM, init_phase); ecore_init_block(sc, BLOCK_TM, init_phase); ecore_init_block(sc, BLOCK_DORQ, init_phase); bxe_iov_init_dq(sc); ecore_init_block(sc, BLOCK_BRB1, init_phase); ecore_init_block(sc, BLOCK_PRS, init_phase); ecore_init_block(sc, BLOCK_TSDM, init_phase); ecore_init_block(sc, BLOCK_CSDM, init_phase); ecore_init_block(sc, BLOCK_USDM, init_phase); ecore_init_block(sc, BLOCK_XSDM, init_phase); ecore_init_block(sc, BLOCK_UPB, init_phase); ecore_init_block(sc, BLOCK_XPB, init_phase); ecore_init_block(sc, BLOCK_PBF, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, PBF_REG_DISABLE_PF, 0); ecore_init_block(sc, BLOCK_CDU, init_phase); ecore_init_block(sc, BLOCK_CFC, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, CFC_REG_WEAK_ENABLE_PF, 1); if (IS_MF(sc)) { REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 1); REG_WR(sc, NIG_REG_LLH0_FUNC_VLAN_ID + port*8, OVLAN(sc)); } ecore_init_block(sc, BLOCK_MISC_AEU, init_phase); /* HC init per function */ if (sc->devinfo.int_block == INT_BLOCK_HC) { if (CHIP_IS_E1H(sc)) { REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } ecore_init_block(sc, BLOCK_HC, init_phase); } else { int num_segs, sb_idx, prod_offset; REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, 0); REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, 0); } ecore_init_block(sc, BLOCK_IGU, init_phase); if (!CHIP_IS_E1x(sc)) { int dsb_idx = 0; /** * Producer memory: * E2 mode: address 0-135 match to the mapping memory; * 136 - PF0 default prod; 137 - PF1 default prod; * 138 - PF2 default prod; 139 - PF3 default prod; * 140 - PF0 attn prod; 141 - PF1 attn prod; * 142 - PF2 attn prod; 143 - PF3 attn prod; * 144-147 reserved. * * E1.5 mode - In backward compatible mode; * for non default SB; each even line in the memory * holds the U producer and each odd line hold * the C producer. The first 128 producers are for * NDSB (PF0 - 0-31; PF1 - 32-63 and so on). The last 20 * producers are for the DSB for each PF. * Each PF has five segments: (the order inside each * segment is PF0; PF1; PF2; PF3) - 128-131 U prods; * 132-135 C prods; 136-139 X prods; 140-143 T prods; * 144-147 attn prods; */ /* non-default-status-blocks */ num_segs = CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_NDSB_NUM_SEGS : IGU_NORM_NDSB_NUM_SEGS; for (sb_idx = 0; sb_idx < sc->igu_sb_cnt; sb_idx++) { prod_offset = (sc->igu_base_sb + sb_idx) * num_segs; for (i = 0; i < num_segs; i++) { addr = IGU_REG_PROD_CONS_MEMORY + (prod_offset + i) * 4; REG_WR(sc, addr, 0); } /* send consumer update with value 0 */ bxe_ack_sb(sc, sc->igu_base_sb + sb_idx, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_igu_clear_sb(sc, sc->igu_base_sb + sb_idx); } /* default-status-blocks */ num_segs = CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_DSB_NUM_SEGS : IGU_NORM_DSB_NUM_SEGS; if (CHIP_IS_MODE_4_PORT(sc)) dsb_idx = SC_FUNC(sc); else dsb_idx = SC_VN(sc); prod_offset = (CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_BASE_DSB_PROD + dsb_idx : IGU_NORM_BASE_DSB_PROD + dsb_idx); /* * igu prods come in chunks of E1HVN_MAX (4) - * does not matters what is the current chip mode */ for (i = 0; i < (num_segs * E1HVN_MAX); i += E1HVN_MAX) { addr = IGU_REG_PROD_CONS_MEMORY + (prod_offset + i)*4; REG_WR(sc, addr, 0); } /* send consumer update with 0 */ if (CHIP_INT_MODE_IS_BC(sc)) { bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, CSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, XSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, TSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, 0, IGU_INT_NOP, 1); } else { bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, 0, IGU_INT_NOP, 1); } bxe_igu_clear_sb(sc, sc->igu_dsb_id); /* !!! these should become driver const once rf-tool supports split-68 const */ REG_WR(sc, IGU_REG_SB_INT_BEFORE_MASK_LSB, 0); REG_WR(sc, IGU_REG_SB_INT_BEFORE_MASK_MSB, 0); REG_WR(sc, IGU_REG_SB_MASK_LSB, 0); REG_WR(sc, IGU_REG_SB_MASK_MSB, 0); REG_WR(sc, IGU_REG_PBA_STATUS_LSB, 0); REG_WR(sc, IGU_REG_PBA_STATUS_MSB, 0); } } /* Reset PCIE errors for debug */ REG_WR(sc, 0x2114, 0xffffffff); REG_WR(sc, 0x2120, 0xffffffff); if (CHIP_IS_E1x(sc)) { main_mem_size = HC_REG_MAIN_MEMORY_SIZE / 2; /*dwords*/ main_mem_base = HC_REG_MAIN_MEMORY + SC_PORT(sc) * (main_mem_size * 4); main_mem_prty_clr = HC_REG_HC_PRTY_STS_CLR; main_mem_width = 8; val = REG_RD(sc, main_mem_prty_clr); if (val) { BLOGD(sc, DBG_LOAD, "Parity errors in HC block during function init (0x%x)!\n", val); } /* Clear "false" parity errors in MSI-X table */ for (i = main_mem_base; i < main_mem_base + main_mem_size * 4; i += main_mem_width) { bxe_read_dmae(sc, i, main_mem_width / 4); bxe_write_dmae(sc, BXE_SP_MAPPING(sc, wb_data), i, main_mem_width / 4); } /* Clear HC parity attention */ REG_RD(sc, main_mem_prty_clr); } #if 1 /* Enable STORMs SP logging */ REG_WR8(sc, BAR_USTRORM_INTMEM + USTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_TSTRORM_INTMEM + TSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_XSTRORM_INTMEM + XSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); #endif elink_phy_probe(&sc->link_params); return (0); } static void bxe_link_reset(struct bxe_softc *sc) { if (!BXE_NOMCP(sc)) { BXE_PHY_LOCK(sc); elink_lfa_reset(&sc->link_params, &sc->link_vars); BXE_PHY_UNLOCK(sc); } else { if (!CHIP_REV_IS_SLOW(sc)) { BLOGW(sc, "Bootcode is missing - cannot reset link\n"); } } } static void bxe_reset_port(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t val; /* reset physical Link */ bxe_link_reset(sc); REG_WR(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port*4, 0); /* Do not rcv packets to BRB */ REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK + port*4, 0x0); /* Do not direct rcv packets that are not for MCP to the BRB */ REG_WR(sc, (port ? NIG_REG_LLH1_BRB1_NOT_MCP : NIG_REG_LLH0_BRB1_NOT_MCP), 0x0); /* Configure AEU */ REG_WR(sc, MISC_REG_AEU_MASK_ATTN_FUNC_0 + port*4, 0); DELAY(100000); /* Check for BRB port occupancy */ val = REG_RD(sc, BRB1_REG_PORT_NUM_OCC_BLOCKS_0 + port*4); if (val) { BLOGD(sc, DBG_LOAD, "BRB1 is not empty, %d blocks are occupied\n", val); } /* TODO: Close Doorbell port? */ } static void bxe_ilt_wr(struct bxe_softc *sc, uint32_t index, bus_addr_t addr) { int reg; uint32_t wb_write[2]; if (CHIP_IS_E1(sc)) { reg = PXP2_REG_RQ_ONCHIP_AT + index*8; } else { reg = PXP2_REG_RQ_ONCHIP_AT_B0 + index*8; } wb_write[0] = ONCHIP_ADDR1(addr); wb_write[1] = ONCHIP_ADDR2(addr); REG_WR_DMAE(sc, reg, wb_write, 2); } static void bxe_clear_func_ilt(struct bxe_softc *sc, uint32_t func) { uint32_t i, base = FUNC_ILT_BASE(func); for (i = base; i < base + ILT_PER_FUNC; i++) { bxe_ilt_wr(sc, i, 0); } } static void bxe_reset_func(struct bxe_softc *sc) { struct bxe_fastpath *fp; int port = SC_PORT(sc); int func = SC_FUNC(sc); int i; /* Disable the function in the FW */ REG_WR8(sc, BAR_XSTRORM_INTMEM + XSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_TSTRORM_INTMEM + TSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_USTRORM_INTMEM + USTORM_FUNC_EN_OFFSET(func), 0); /* FP SBs */ FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_STATE_OFFSET(fp->fw_sb_id), SB_DISABLED); } #if 0 if (CNIC_LOADED(sc)) { /* CNIC SB */ REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_STATE_OFFSET (bxe_cnic_fw_sb_id(sc)), SB_DISABLED); } #endif /* SP SB */ REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_DATA_STATE_OFFSET(func), SB_DISABLED); for (i = 0; i < XSTORM_SPQ_DATA_SIZE / 4; i++) { REG_WR(sc, BAR_XSTRORM_INTMEM + XSTORM_SPQ_DATA_OFFSET(func), 0); } /* Configure IGU */ if (sc->devinfo.int_block == INT_BLOCK_HC) { REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } else { REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, 0); REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, 0); } if (CNIC_LOADED(sc)) { /* Disable Timer scan */ REG_WR(sc, TM_REG_EN_LINEAR0_TIMER + port*4, 0); /* * Wait for at least 10ms and up to 2 second for the timers * scan to complete */ for (i = 0; i < 200; i++) { DELAY(10000); if (!REG_RD(sc, TM_REG_LIN0_SCAN_ON + port*4)) break; } } /* Clear ILT */ bxe_clear_func_ilt(sc, func); /* * Timers workaround bug for E2: if this is vnic-3, * we need to set the entire ilt range for this timers. */ if (!CHIP_IS_E1x(sc) && SC_VN(sc) == 3) { struct ilt_client_info ilt_cli; /* use dummy TM client */ memset(&ilt_cli, 0, sizeof(struct ilt_client_info)); ilt_cli.start = 0; ilt_cli.end = ILT_NUM_PAGE_ENTRIES - 1; ilt_cli.client_num = ILT_CLIENT_TM; ecore_ilt_boundry_init_op(sc, &ilt_cli, 0, INITOP_CLEAR); } /* this assumes that reset_port() called before reset_func()*/ if (!CHIP_IS_E1x(sc)) { bxe_pf_disable(sc); } sc->dmae_ready = 0; } static int bxe_gunzip_init(struct bxe_softc *sc) { return (0); } static void bxe_gunzip_end(struct bxe_softc *sc) { return; } static int bxe_init_firmware(struct bxe_softc *sc) { if (CHIP_IS_E1(sc)) { ecore_init_e1_firmware(sc); sc->iro_array = e1_iro_arr; } else if (CHIP_IS_E1H(sc)) { ecore_init_e1h_firmware(sc); sc->iro_array = e1h_iro_arr; } else if (!CHIP_IS_E1x(sc)) { ecore_init_e2_firmware(sc); sc->iro_array = e2_iro_arr; } else { BLOGE(sc, "Unsupported chip revision\n"); return (-1); } return (0); } static void bxe_release_firmware(struct bxe_softc *sc) { /* Do nothing */ return; } static int ecore_gunzip(struct bxe_softc *sc, const uint8_t *zbuf, int len) { /* XXX : Implement... */ BLOGD(sc, DBG_LOAD, "ECORE_GUNZIP NOT IMPLEMENTED\n"); return (FALSE); } static void ecore_reg_wr_ind(struct bxe_softc *sc, uint32_t addr, uint32_t val) { bxe_reg_wr_ind(sc, addr, val); } static void ecore_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len) { bxe_write_dmae_phys_len(sc, phys_addr, addr, len); } void ecore_storm_memset_struct(struct bxe_softc *sc, uint32_t addr, size_t size, uint32_t *data) { uint8_t i; for (i = 0; i < size/4; i++) { REG_WR(sc, addr + (i * 4), data[i]); } } Index: head/sys/dev/ixgbe/ixgbe.c =================================================================== --- head/sys/dev/ixgbe/ixgbe.c (revision 263101) +++ head/sys/dev/ixgbe/ixgbe.c (revision 263102) @@ -1,5831 +1,5827 @@ /****************************************************************************** Copyright (c) 2001-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "ixgbe.h" /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int ixgbe_display_debug_stats = 0; /********************************************************************* * Driver version *********************************************************************/ char ixgbe_driver_version[] = "2.5.15"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixgbe_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixgbe_vendor_info_t ixgbe_vendor_info_array[] = { {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixgbe_strings[] = { "Intel(R) PRO/10GbE PCI-Express Network Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixgbe_probe(device_t); static int ixgbe_attach(device_t); static int ixgbe_detach(device_t); static int ixgbe_shutdown(device_t); #ifdef IXGBE_LEGACY_TX static void ixgbe_start(struct ifnet *); static void ixgbe_start_locked(struct tx_ring *, struct ifnet *); #else /* ! IXGBE_LEGACY_TX */ static int ixgbe_mq_start(struct ifnet *, struct mbuf *); static int ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *); static void ixgbe_qflush(struct ifnet *); static void ixgbe_deferred_mq_start(void *, int); #endif /* IXGBE_LEGACY_TX */ static int ixgbe_ioctl(struct ifnet *, u_long, caddr_t); static void ixgbe_init(void *); static void ixgbe_init_locked(struct adapter *); static void ixgbe_stop(void *); static void ixgbe_media_status(struct ifnet *, struct ifmediareq *); static int ixgbe_media_change(struct ifnet *); static void ixgbe_identify_hardware(struct adapter *); static int ixgbe_allocate_pci_resources(struct adapter *); static void ixgbe_get_slot_info(struct ixgbe_hw *); static int ixgbe_allocate_msix(struct adapter *); static int ixgbe_allocate_legacy(struct adapter *); static int ixgbe_allocate_queues(struct adapter *); static int ixgbe_setup_msix(struct adapter *); static void ixgbe_free_pci_resources(struct adapter *); static void ixgbe_local_timer(void *); static int ixgbe_setup_interface(device_t, struct adapter *); static void ixgbe_config_link(struct adapter *); static int ixgbe_allocate_transmit_buffers(struct tx_ring *); static int ixgbe_setup_transmit_structures(struct adapter *); static void ixgbe_setup_transmit_ring(struct tx_ring *); static void ixgbe_initialize_transmit_units(struct adapter *); static void ixgbe_free_transmit_structures(struct adapter *); static void ixgbe_free_transmit_buffers(struct tx_ring *); static int ixgbe_allocate_receive_buffers(struct rx_ring *); static int ixgbe_setup_receive_structures(struct adapter *); static int ixgbe_setup_receive_ring(struct rx_ring *); static void ixgbe_initialize_receive_units(struct adapter *); static void ixgbe_free_receive_structures(struct adapter *); static void ixgbe_free_receive_buffers(struct rx_ring *); static void ixgbe_setup_hw_rsc(struct rx_ring *); static void ixgbe_enable_intr(struct adapter *); static void ixgbe_disable_intr(struct adapter *); static void ixgbe_update_stats_counters(struct adapter *); static void ixgbe_txeof(struct tx_ring *); static bool ixgbe_rxeof(struct ix_queue *); static void ixgbe_rx_checksum(u32, struct mbuf *, u32); static void ixgbe_set_promisc(struct adapter *); static void ixgbe_set_multi(struct adapter *); static void ixgbe_update_link_status(struct adapter *); static void ixgbe_refresh_mbufs(struct rx_ring *, int); static int ixgbe_xmit(struct tx_ring *, struct mbuf **); static int ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS); static int ixgbe_set_advertise(SYSCTL_HANDLER_ARGS); static int ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS); static int ixgbe_dma_malloc(struct adapter *, bus_size_t, struct ixgbe_dma_alloc *, int); static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *); static int ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static int ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); static void ixgbe_configure_ivars(struct adapter *); static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); static void ixgbe_setup_vlan_hw_support(struct adapter *); static void ixgbe_register_vlan(void *, struct ifnet *, u16); static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); static void ixgbe_add_hw_stats(struct adapter *adapter); static __inline void ixgbe_rx_discard(struct rx_ring *, int); static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, struct mbuf *, u32); static void ixgbe_enable_rx_drop(struct adapter *); static void ixgbe_disable_rx_drop(struct adapter *); /* Support for pluggable optic modules */ static bool ixgbe_sfp_probe(struct adapter *); static void ixgbe_setup_optics(struct adapter *); /* Legacy (single vector interrupt handler */ static void ixgbe_legacy_irq(void *); /* The MSI/X Interrupt handlers */ static void ixgbe_msix_que(void *); static void ixgbe_msix_link(void *); /* Deferred interrupt tasklets */ static void ixgbe_handle_que(void *, int); static void ixgbe_handle_link(void *, int); static void ixgbe_handle_msf(void *, int); static void ixgbe_handle_mod(void *, int); #ifdef IXGBE_FDIR static void ixgbe_atr(struct tx_ring *, struct mbuf *); static void ixgbe_reinit_fdir(void *, int); #endif /* Missing shared code prototype */ extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixgbe_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixgbe_probe), DEVMETHOD(device_attach, ixgbe_attach), DEVMETHOD(device_detach, ixgbe_detach), DEVMETHOD(device_shutdown, ixgbe_shutdown), DEVMETHOD_END }; static driver_t ixgbe_driver = { "ix", ixgbe_methods, sizeof(struct adapter), }; devclass_t ixgbe_devclass; DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0); MODULE_DEPEND(ixgbe, pci, 1, 1, 1); MODULE_DEPEND(ixgbe, ether, 1, 1, 1); /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0, "IXGBE driver parameters"); /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int ixgbe_enable_aim = TRUE; TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim); SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RW, &ixgbe_enable_aim, 0, "Enable adaptive interrupt moderation"); static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate); SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); /* How many packets rxeof tries to clean at a time */ static int ixgbe_rx_process_limit = 256; TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit); SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &ixgbe_rx_process_limit, 0, "Maximum number of received packets to process at a time," "-1 means unlimited"); /* How many packets txeof tries to clean at a time */ static int ixgbe_tx_process_limit = 256; TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit); SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, &ixgbe_tx_process_limit, 0, "Maximum number of sent packets to process at a time," "-1 means unlimited"); /* ** Smart speed setting, default to on ** this only works as a compile option ** right now as its during attach, set ** this to 'ixgbe_smart_speed_off' to ** disable. */ static int ixgbe_smart_speed = ixgbe_smart_speed_on; /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int ixgbe_enable_msix = 1; TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix); SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, "Enable MSI-X interrupts"); /* * Number of Queues, can be set to 0, * it then autoconfigures based on the * number of cpus with a max of 8. This * can be overriden manually here. */ static int ixgbe_num_queues = 0; TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues); SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, "Number of queues to configure, 0 indicates autoconfigure"); /* ** Number of TX descriptors per ring, ** setting higher than RX as this seems ** the better performing choice. */ static int ixgbe_txd = PERFORM_TXD; TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd); SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, "Number of receive descriptors per queue"); /* Number of RX descriptors per ring */ static int ixgbe_rxd = PERFORM_RXD; TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd); SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, "Number of receive descriptors per queue"); /* ** Defining this on will allow the use ** of unsupported SFP+ modules, note that ** doing so you are on your own :) */ static int allow_unsupported_sfp = FALSE; TUNABLE_INT("hw.ixgbe.unsupported_sfp", &allow_unsupported_sfp); /* ** HW RSC control: ** this feature only works with ** IPv4, and only on 82599 and later. ** Also this will cause IP forwarding to ** fail and that can't be controlled by ** the stack as LRO can. For all these ** reasons I've deemed it best to leave ** this off and not bother with a tuneable ** interface, this would need to be compiled ** to enable. */ static bool ixgbe_rsc_enable = FALSE; /* Keep running tab on them for sanity check */ static int ixgbe_total_ports; #ifdef IXGBE_FDIR /* ** For Flow Director: this is the ** number of TX packets we sample ** for the filter pool, this means ** every 20th packet will be probed. ** ** This feature can be disabled by ** setting this to 0. */ static int atr_sample_rate = 20; /* ** Flow Director actually 'steals' ** part of the packet buffer as its ** filter pool, this variable controls ** how much it uses: ** 0 = 64K, 1 = 128K, 2 = 256K */ static int fdir_pballoc = 1; #endif #ifdef DEV_NETMAP /* * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to * be a reference on how to implement netmap support in a driver. * Additional comments are in ixgbe_netmap.h . * * contains functions for netmap support * that extend the standard driver. */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * ixgbe_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixgbe_probe(device_t dev) { ixgbe_vendor_info_t *ent; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; char adapter_name[256]; INIT_DEBUGOUT("ixgbe_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixgbe_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(adapter_name, "%s, Version - %s", ixgbe_strings[ent->index], ixgbe_driver_version); device_set_desc_copy(dev, adapter_name); ++ixgbe_total_ports; return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixgbe_attach(device_t dev) { struct adapter *adapter; struct ixgbe_hw *hw; int error = 0; u16 csum; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_attach: begin"); /* Allocate, clear, and link in our adapter structure */ adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; hw = &adapter->hw; /* Core Lock Init*/ IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL APIs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW, &ixgbe_enable_aim, 1, "Interrupt Moderation"); /* ** Allow a kind of speed control by forcing the autoneg ** advertised speed list to only a certain value, this ** supports 1G on 82599 devices, and 100Mb on x540. */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_set_advertise, "I", "Link Speed"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_set_thermal_test, "I", "Thermal Test"); /* Set up the timer callout */ callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware revision */ ixgbe_identify_hardware(adapter); /* Do base PCI setup - map BAR0 */ if (ixgbe_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_out; } /* Do descriptor calc and sanity checks */ if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) { device_printf(dev, "TXD config issue, using default!\n"); adapter->num_tx_desc = DEFAULT_TXD; } else adapter->num_tx_desc = ixgbe_txd; /* ** With many RX rings it is easy to exceed the ** system mbuf allocation. Tuning nmbclusters ** can alleviate this. */ if (nmbclusters > 0 ) { int s; s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports; if (s > nmbclusters) { device_printf(dev, "RX Descriptors exceed " "system mbuf max, using default instead!\n"); ixgbe_rxd = DEFAULT_RXD; } } if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) { device_printf(dev, "RXD config issue, using default!\n"); adapter->num_rx_desc = DEFAULT_RXD; } else adapter->num_rx_desc = ixgbe_rxd; /* Allocate our TX/RX Queues */ if (ixgbe_allocate_queues(adapter)) { error = ENOMEM; goto err_out; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Initialize the shared code */ hw->allow_unsupported_sfp = allow_unsupported_sfp; error = ixgbe_init_shared_code(hw); if (error == IXGBE_ERR_SFP_NOT_PRESENT) { /* ** No optics in this port, set up ** so the timer routine will probe ** for later insertion. */ adapter->sfp_probe = TRUE; error = 0; } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev,"Unsupported SFP+ module detected!\n"); error = EIO; goto err_late; } else if (error) { device_printf(dev,"Unable to initialize the shared code\n"); error = EIO; goto err_late; } /* Make sure we have a good EEPROM before we read from it */ if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) { device_printf(dev,"The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } error = ixgbe_init_hw(hw); switch (error) { case IXGBE_ERR_EEPROM_VERSION: device_printf(dev, "This device is a pre-production adapter/" "LOM. Please be aware there may be issues associated " "with your hardware.\n If you are experiencing problems " "please contact your Intel or hardware representative " "who provided you with this hardware.\n"); break; case IXGBE_ERR_SFP_NOT_SUPPORTED: device_printf(dev,"Unsupported SFP+ Module\n"); error = EIO; goto err_late; case IXGBE_ERR_SFP_NOT_PRESENT: device_printf(dev,"No SFP+ Module found\n"); /* falls thru */ default: break; } /* Detect and set physical type */ ixgbe_setup_optics(adapter); if ((adapter->msix > 1) && (ixgbe_enable_msix)) error = ixgbe_allocate_msix(adapter); else error = ixgbe_allocate_legacy(adapter); if (error) goto err_late; /* Setup OS specific network interface */ if (ixgbe_setup_interface(dev, adapter) != 0) goto err_late; /* Initialize statistics */ ixgbe_update_stats_counters(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); /* ** Check PCIE slot type/speed/width */ ixgbe_get_slot_info(hw); /* Set an initial default flow control value */ adapter->fc = ixgbe_fc_full; /* let hardware know driver is loaded */ ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); ixgbe_add_hw_stats(adapter); #ifdef DEV_NETMAP ixgbe_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("ixgbe_attach: end"); return (0); err_late: ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); err_out: if (adapter->ifp != NULL) if_free(adapter->ifp); ixgbe_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixgbe_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } IXGBE_CORE_LOCK(adapter); ixgbe_stop(adapter); IXGBE_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if (que->tq) { #ifndef IXGBE_LEGACY_TX taskqueue_drain(que->tq, &txr->txq_task); #endif taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } /* Drain the Link queue */ if (adapter->tq) { taskqueue_drain(adapter->tq, &adapter->link_task); taskqueue_drain(adapter->tq, &adapter->mod_task); taskqueue_drain(adapter->tq, &adapter->msf_task); #ifdef IXGBE_FDIR taskqueue_drain(adapter->tq, &adapter->fdir_task); #endif taskqueue_free(adapter->tq); } /* let hardware know driver is unloading */ ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(adapter->ifp); #endif /* DEV_NETMAP */ ixgbe_free_pci_resources(adapter); bus_generic_detach(dev); if_free(adapter->ifp); ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); free(adapter->mta, M_DEVBUF); IXGBE_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixgbe_shutdown(device_t dev) { struct adapter *adapter = device_get_softc(dev); IXGBE_CORE_LOCK(adapter); ixgbe_stop(adapter); IXGBE_CORE_UNLOCK(adapter); return (0); } #ifdef IXGBE_LEGACY_TX /********************************************************************* * Transmit entry point * * ixgbe_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ static void ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) { struct mbuf *m_head; struct adapter *adapter = txr->adapter; IXGBE_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; if (!adapter->link_active) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) break; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (ixgbe_xmit(txr, &m_head)) { if (m_head != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set watchdog on */ txr->watchdog_time = ticks; txr->queue_status = IXGBE_QUEUE_WORKING; } return; } /* * Legacy TX start - called by the stack, this * always uses the first tx ring, and should * not be used with multiqueue tx enabled. */ static void ixgbe_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_TX_LOCK(txr); ixgbe_start_locked(txr, ifp); IXGBE_TX_UNLOCK(txr); } return; } #else /* ! IXGBE_LEGACY_TX */ /* ** Multiqueue Transmit driver ** */ static int ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) { struct adapter *adapter = ifp->if_softc; struct ix_queue *que; struct tx_ring *txr; int i, err = 0; /* Which queue to use */ if ((m->m_flags & M_FLOWID) != 0) i = m->m_pkthdr.flowid % adapter->num_queues; else i = curcpu % adapter->num_queues; txr = &adapter->tx_rings[i]; que = &adapter->queues[i]; err = drbr_enqueue(ifp, txr->br, m); if (err) return (err); if (IXGBE_TX_TRYLOCK(txr)) { err = ixgbe_mq_start_locked(ifp, txr); IXGBE_TX_UNLOCK(txr); } else taskqueue_enqueue(que->tq, &txr->txq_task); return (err); } static int ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; int enqueued = 0, err = 0; if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || adapter->link_active == 0) return (ENETDOWN); /* Process the queue */ #if __FreeBSD_version < 901504 next = drbr_dequeue(ifp, txr->br); while (next != NULL) { if ((err = ixgbe_xmit(txr, &next)) != 0) { if (next != NULL) err = drbr_enqueue(ifp, txr->br, next); #else while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = ixgbe_xmit(txr, &next)) != 0) { if (next == NULL) { drbr_advance(ifp, txr->br); } else { drbr_putback(ifp, txr->br, next); } #endif break; } #if __FreeBSD_version >= 901504 drbr_advance(ifp, txr->br); #endif enqueued++; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; #if __FreeBSD_version < 901504 next = drbr_dequeue(ifp, txr->br); #endif } if (enqueued > 0) { /* Set watchdog on */ txr->queue_status = IXGBE_QUEUE_WORKING; txr->watchdog_time = ticks; } if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) ixgbe_txeof(txr); return (err); } /* * Called from a taskqueue to drain queued transmit packets. */ static void ixgbe_deferred_mq_start(void *arg, int pending) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; IXGBE_TX_LOCK(txr); if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); IXGBE_TX_UNLOCK(txr); } /* ** Flush all ring buffers */ static void ixgbe_qflush(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { IXGBE_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); IXGBE_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* IXGBE_LEGACY_TX */ /********************************************************************* * Ioctl entry point * * ixgbe_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ixgbe_hw *hw = &adapter->hw; struct ifreq *ifr = (struct ifreq *) data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixgbe_init(adapter); if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); } else error = ether_ioctl(ifp, command, data); #endif break; case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) { error = EINVAL; } else { IXGBE_CORE_LOCK(adapter); ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXGBE_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { ixgbe_set_promisc(adapter); } } else ixgbe_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgbe_stop(adapter); adapter->if_flags = ifp->if_flags; IXGBE_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixgbe_disable_intr(adapter); ixgbe_set_multi(adapter); ixgbe_enable_intr(adapter); IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } VLAN_CAPABILITIES(ifp); break; } case SIOCGI2C: { struct ixgbe_i2c_req i2c; IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (error) break; if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){ error = EINVAL; break; } hw->phy.ops.read_i2c_byte(hw, i2c.offset, i2c.dev_addr, i2c.data); error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } default: IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ #define IXGBE_MHADD_MFS_SHIFT 16 static void ixgbe_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; u32 k, txdctl, mhadd, gpie; u32 rxdctl, rxctrl; mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixgbe_init_locked: begin"); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); callout_stop(&adapter->timer); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); hw->addr_ctrl.rar_used_count = 1; /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); #if __FreeBSD_version >= 800000 if (hw->mac.type != ixgbe_mac_82598EB) ifp->if_hwassist |= CSUM_SCTP; #endif } /* Prepare transmit descriptors and buffers */ if (ixgbe_setup_transmit_structures(adapter)) { device_printf(dev,"Could not setup transmit structures\n"); ixgbe_stop(adapter); return; } ixgbe_init_hw(hw); ixgbe_initialize_transmit_units(adapter); /* Setup Multicast table */ ixgbe_set_multi(adapter); /* ** Determine the correct mbuf pool ** for doing jumbo frames */ if (adapter->max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else if (adapter->max_frame_size <= 9216) adapter->rx_mbuf_sz = MJUM9BYTES; else adapter->rx_mbuf_sz = MJUM16BYTES; /* Prepare receive descriptors and buffers */ if (ixgbe_setup_receive_structures(adapter)) { device_printf(dev,"Could not setup receive structures\n"); ixgbe_stop(adapter); return; } /* Configure RX settings */ ixgbe_initialize_receive_units(adapter); gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE); /* Enable Fan Failure Interrupt */ gpie |= IXGBE_SDP1_GPIEN; /* Add for Module detection */ if (hw->mac.type == ixgbe_mac_82599EB) gpie |= IXGBE_SDP2_GPIEN; /* Thermal Failure Detection */ if (hw->mac.type == ixgbe_mac_X540) gpie |= IXGBE_SDP0_GPIEN; if (adapter->msix > 1) { /* Enable Enhanced MSIX mode */ gpie |= IXGBE_GPIE_MSIX_MODE; gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | IXGBE_GPIE_OCD; } IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); /* Set MTU size */ if (ifp->if_mtu > ETHERMTU) { mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } /* Now enable all the queues */ for (int i = 0; i < adapter->num_queues; i++) { txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i)); txdctl |= IXGBE_TXDCTL_ENABLE; /* Set WTHRESH to 8, burst writeback */ txdctl |= (8 << 16); /* * When the internal queue falls below PTHRESH (32), * start prefetching as long as there are at least * HTHRESH (1) buffers ready. The values are taken * from the Intel linux driver 3.8.21. * Prefetching enables tx line rate even with 1 queue. */ txdctl |= (32 << 0) | (1 << 8); IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl); } for (int i = 0; i < adapter->num_queues; i++) { rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); if (hw->mac.type == ixgbe_mac_82598EB) { /* ** PTHRESH = 21 ** HTHRESH = 4 ** WTHRESH = 8 */ rxdctl &= ~0x3FFFFF; rxdctl |= 0x080420; } rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl); for (k = 0; k < 10; k++) { if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) & IXGBE_RXDCTL_ENABLE) break; else msec_delay(1); } wmb(); #ifdef DEV_NETMAP /* * In netmap mode, we must preserve the buffers made * available to userspace before the if_init() * (this is true by default on the TX side, because * init makes all buffers available to userspace). * * netmap_reset() and the device specific routines * (e.g. ixgbe_setup_receive_rings()) map these * buffers at the end of the NIC ring, so here we * must set the RDT (tail) register to make sure * they are not overwritten. * * In this driver the NIC ring starts at RDH = 0, * RDT points to the last slot available for reception (?), * so RDT = num_rx_desc - 1 means the whole ring is available. */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t); } else #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1); } /* Enable Receive engine */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); if (hw->mac.type == ixgbe_mac_82598EB) rxctrl |= IXGBE_RXCTRL_DMBYPS; rxctrl |= IXGBE_RXCTRL_RXEN; ixgbe_enable_rx_dma(hw, rxctrl); callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); /* Set up MSI/X routing */ if (ixgbe_enable_msix) { ixgbe_configure_ivars(adapter); /* Set up auto-mask */ if (hw->mac.type == ixgbe_mac_82598EB) IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); else { IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); } } else { /* Simple settings for Legacy/MSI */ ixgbe_set_ivar(adapter, 0, 0, 0); ixgbe_set_ivar(adapter, 0, 0, 1); IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); } #ifdef IXGBE_FDIR /* Init Flow director */ if (hw->mac.type != ixgbe_mac_82598EB) { u32 hdrm = 32 << fdir_pballoc; hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL); ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc); } #endif /* ** Check on any SFP devices that ** need to be kick-started */ if (hw->phy.type == ixgbe_phy_none) { int err = hw->phy.ops.identify(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); return; } } /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR); /* Config/Enable Link */ ixgbe_config_link(adapter); /* Hardware Packet Buffer & Flow Control setup */ { u32 rxpb, frame, size, tmp; frame = adapter->max_frame_size; /* Calculate High Water */ if (hw->mac.type == ixgbe_mac_X540) tmp = IXGBE_DV_X540(frame, frame); else tmp = IXGBE_DV(frame, frame); size = IXGBE_BT2KB(tmp); rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; hw->fc.high_water[0] = rxpb - size; /* Now calculate Low Water */ if (hw->mac.type == ixgbe_mac_X540) tmp = IXGBE_LOW_DV_X540(frame); else tmp = IXGBE_LOW_DV(frame); hw->fc.low_water[0] = IXGBE_BT2KB(tmp); hw->fc.requested_mode = adapter->fc; hw->fc.pause_time = IXGBE_FC_PAUSE; hw->fc.send_xon = TRUE; } /* Initialize the FC settings */ ixgbe_start_hw(hw); /* Set up VLAN support and filter */ ixgbe_setup_vlan_hw_support(adapter); /* And now turn on interrupts */ ixgbe_enable_intr(adapter); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; return; } static void ixgbe_init(void *arg) { struct adapter *adapter = arg; IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); return; } /* ** ** MSIX Interrupt Handlers and Tasklets ** */ static inline void ixgbe_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } } static inline void ixgbe_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); } } static void ixgbe_handle_que(void *context, int pending) { struct ix_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifndef IXGBE_LEGACY_TX if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ixgbe_start_locked(txr, ifp); #endif IXGBE_TX_UNLOCK(txr); } /* Reenable this interrupt */ if (que->res != NULL) ixgbe_enable_queue(adapter, que->msix); else ixgbe_enable_intr(adapter); return; } /********************************************************************* * * Legacy Interrupt Service routine * **********************************************************************/ static void ixgbe_legacy_irq(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; bool more; u32 reg_eicr; reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR); ++que->irqs; if (reg_eicr == 0) { ixgbe_enable_intr(adapter); return; } more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifdef IXGBE_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ixgbe_start_locked(txr, ifp); #else if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #endif IXGBE_TX_UNLOCK(txr); /* Check for fan failure */ if ((hw->phy.media_type == ixgbe_media_type_copper) && (reg_eicr & IXGBE_EICR_GPI_SDP1)) { device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " "REPLACE IMMEDIATELY!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1); } /* Link status change */ if (reg_eicr & IXGBE_EICR_LSC) taskqueue_enqueue(adapter->tq, &adapter->link_task); if (more) taskqueue_enqueue(que->tq, &que->que_task); else ixgbe_enable_intr(adapter); return; } /********************************************************************* * * MSIX Queue Interrupt Service routine * **********************************************************************/ void ixgbe_msix_que(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; bool more; u32 newitr = 0; /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; ixgbe_disable_queue(adapter, que->msix); ++que->irqs; more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifdef IXGBE_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(ifp->if_snd)) ixgbe_start_locked(txr, ifp); #else if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #endif IXGBE_TX_UNLOCK(txr); /* Do AIM now? */ if (ixgbe_enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); if (adapter->hw.mac.type == ixgbe_mac_82598EB) newitr |= newitr << 16; else newitr |= IXGBE_EITR_CNT_WDIS; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: if (more) taskqueue_enqueue(que->tq, &que->que_task); else ixgbe_enable_queue(adapter, que->msix); return; } static void ixgbe_msix_link(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 reg_eicr; ++adapter->link_irq; /* First get the cause */ reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS); /* Be sure the queue bits are not cleared */ reg_eicr &= ~IXGBE_EICR_RTX_QUEUE; /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr); /* Link status change */ if (reg_eicr & IXGBE_EICR_LSC) taskqueue_enqueue(adapter->tq, &adapter->link_task); if (adapter->hw.mac.type != ixgbe_mac_82598EB) { #ifdef IXGBE_FDIR if (reg_eicr & IXGBE_EICR_FLOW_DIR) { /* This is probably overkill :) */ if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) return; /* Disable the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); taskqueue_enqueue(adapter->tq, &adapter->fdir_task); } else #endif if (reg_eicr & IXGBE_EICR_ECC) { device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! " "Please Reboot!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } else if (reg_eicr & IXGBE_EICR_GPI_SDP1) { /* Clear the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); taskqueue_enqueue(adapter->tq, &adapter->msf_task); } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) { /* Clear the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2); taskqueue_enqueue(adapter->tq, &adapter->mod_task); } } /* Check for fan failure */ if ((hw->device_id == IXGBE_DEV_ID_82598AT) && (reg_eicr & IXGBE_EICR_GPI_SDP1)) { device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " "REPLACE IMMEDIATELY!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); } /* Check for over temp condition */ if ((hw->mac.type == ixgbe_mac_X540) && (reg_eicr & IXGBE_EICR_TS)) { device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! " "PHY IS SHUT DOWN!!\n"); device_printf(adapter->dev, "System shutdown required\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); } IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct adapter *adapter = ifp->if_softc; INIT_DEBUGOUT("ixgbe_media_status: begin"); IXGBE_CORE_LOCK(adapter); ixgbe_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IXGBE_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: ifmr->ifm_active |= IFM_100_TX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= adapter->optics | IFM_FDX; break; } IXGBE_CORE_UNLOCK(adapter); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixgbe_media_change(struct ifnet * ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("ixgbe_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.phy.autoneg_advertised = IXGBE_LINK_SPEED_100_FULL | IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_10GB_FULL; break; default: device_printf(adapter->dev, "Only auto media type\n"); return (EINVAL); } return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors, allowing the * TX engine to transmit the packets. * - return 0 on success, positive on failure * **********************************************************************/ static int ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; u32 olinfo_status = 0, cmd_type_len; int i, j, error, nsegs; int first; bool remap = TRUE; struct mbuf *m_head; bus_dma_segment_t segs[adapter->num_segs]; bus_dmamap_t map; struct ixgbe_tx_buf *txbuf; union ixgbe_adv_tx_desc *txd = NULL; m_head = *m_headp; /* Basic descriptor defines */ cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); if (m_head->m_flags & M_VLANTAG) cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; /* * Important to capture the first descriptor * used because it will contain the index of * the one we tell the hardware to report back */ first = txr->next_avail_desc; txbuf = &txr->tx_buffers[first]; map = txbuf->map; /* * Map the packet for DMA. */ retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(error)) { struct mbuf *m; switch (error) { case EFBIG: /* Try it again? - one try */ if (remap == TRUE) { remap = FALSE; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; goto retry; } else return (error); case ENOMEM: txr->no_tx_dma_setup++; return (error); default: txr->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } /* Make certain there are enough descriptors */ if (nsegs > txr->tx_avail - 2) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* ** Set up the appropriate offload context ** this will consume the first descriptor */ error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); if (__predict_false(error)) { if (error == ENOBUFS) *m_headp = NULL; return (error); } #ifdef IXGBE_FDIR /* Do the flow director magic */ if ((txr->atr_sample) && (!adapter->fdir_reinit)) { ++txr->atr_count; if (txr->atr_count >= atr_sample_rate) { ixgbe_atr(txr, m_head); txr->atr_count = 0; } } #endif i = txr->next_avail_desc; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txbuf = &txr->tx_buffers[i]; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(txr->txd_cmd | cmd_type_len |seglen); txd->read.olinfo_status = htole32(olinfo_status); if (++i == txr->num_desc) i = 0; } txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); txr->tx_avail -= nsegs; txr->next_avail_desc = i; txbuf->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ txr->tx_buffers[first].map = txbuf->map; txbuf->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* Set the EOP descriptor that will be marked done */ txbuf = &txr->tx_buffers[first]; txbuf->eop = txd; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ ++txr->total_packets; IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i); return (0); } static void ixgbe_set_promisc(struct adapter *adapter) { u_int32_t reg_rctl; struct ifnet *ifp = adapter->ifp; int mcnt = 0; reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); reg_rctl &= (~IXGBE_FCTRL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { struct ifmultiaddr *ifma; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif } if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); if (ifp->if_flags & IFF_PROMISC) { reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { reg_rctl |= IXGBE_FCTRL_MPE; reg_rctl &= ~IXGBE_FCTRL_UPE; IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); } return; } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ #define IXGBE_RAR_ENTRIES 16 static void ixgbe_set_multi(struct adapter *adapter) { u32 fctrl; u8 *mta; u8 *update_ptr; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = adapter->ifp; IOCTL_DEBUGOUT("ixgbe_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS * MAX_NUM_MULTICAST_ADDRESSES); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], IXGBE_ETH_LENGTH_OF_ADDRESS); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); if (ifp->if_flags & IFF_PROMISC) fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || ifp->if_flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; fctrl &= ~IXGBE_FCTRL_UPE; } else fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { update_ptr = mta; ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); } return; } /* * This is an iterator function now needed by the multicast * shared code. It simply feeds the shared code routine the * addresses in the array of ixgbe_set_multi() one by one. */ static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { u8 *addr = *update_ptr; u8 *newptr; *vmdq = 0; newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; *update_ptr = newptr; return addr; } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * **********************************************************************/ static void ixgbe_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; int hung = 0, paused = 0; mtx_assert(&adapter->core_mtx, MA_OWNED); /* Check for pluggable optics */ if (adapter->sfp_probe) if (!ixgbe_sfp_probe(adapter)) goto out; /* Nothing to do */ ixgbe_update_link_status(adapter); ixgbe_update_stats_counters(adapter); /* * If the interface has been paused * then don't do the watchdog check */ if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) paused = 1; /* ** Check the TX queues status ** - watchdog only if all queues show hung */ for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if ((txr->queue_status == IXGBE_QUEUE_HUNG) && (paused == 0)) ++hung; else if (txr->queue_status == IXGBE_QUEUE_WORKING) taskqueue_enqueue(que->tq, &txr->txq_task); } /* Only truely watchdog if all queues show hung */ if (hung == adapter->num_queues) goto watchdog; out: callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); return; watchdog: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)), IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me))); device_printf(dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; ixgbe_init_locked(adapter); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ static void ixgbe_update_link_status(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; if (adapter->link_up){ if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev,"Link is up %d Gbps %s \n", ((adapter->link_speed == 128)? 10:1), "Full Duplex"); adapter->link_active = TRUE; /* Update any Flow Control changes */ ixgbe_fc_enable(&adapter->hw); if_link_state_change(ifp, LINK_STATE_UP); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev,"Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); adapter->link_active = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixgbe_stop(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; ifp = adapter->ifp; mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixgbe_stop: begin\n"); ixgbe_disable_intr(adapter); callout_stop(&adapter->timer); /* Let the stack know...*/ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ixgbe_reset_hw(hw); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); if (hw->mac.type == ixgbe_mac_82599EB) ixgbe_stop_mac_link_on_d3_82599(hw); /* Turn off the laser - noop with no optics */ ixgbe_disable_tx_laser(hw); /* Update the stack */ adapter->link_up = FALSE; ixgbe_update_link_status(adapter); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); return; } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void ixgbe_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; /* Save off the information about this board */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* We need this here to set the num_segs below */ ixgbe_set_mac_type(hw); /* Pick up the 82599 and VF settings */ if (hw->mac.type != ixgbe_mac_82598EB) { hw->phy.smart_speed = ixgbe_smart_speed; adapter->num_segs = IXGBE_82599_SCATTER; } else adapter->num_segs = IXGBE_82598_SCATTER; return; } /********************************************************************* * * Determine optic type * **********************************************************************/ static void ixgbe_setup_optics(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int layer; layer = ixgbe_get_supported_physical_layer(hw); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) { adapter->optics = IFM_10G_T; return; } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) { adapter->optics = IFM_1000_T; return; } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) { adapter->optics = IFM_1000_SX; return; } if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR | IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) { adapter->optics = IFM_10G_LR; return; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { adapter->optics = IFM_10G_SR; return; } if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) { adapter->optics = IFM_10G_TWINAX; return; } if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 | IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) { adapter->optics = IFM_10G_CX4; return; } /* If we get here just set the default */ adapter->optics = IFM_ETHER | IFM_AUTO; return; } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ static int ixgbe_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; #ifndef IXGBE_LEGACY_TX struct tx_ring *txr = adapter->tx_rings; #endif int error, rid = 0; /* MSI RID at 1 */ if (adapter->msix == 1) rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* * Try allocating a fast interrupt and the associated deferred * processing contexts. */ #ifndef IXGBE_LEGACY_TX TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); #endif TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq", device_get_nameunit(adapter->dev)); /* Tasklets for Link, SFP and Multispeed Fiber */ TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); #ifdef IXGBE_FDIR TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); #endif adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq, que, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(que->tq); taskqueue_free(adapter->tq); que->tq = NULL; adapter->tq = NULL; return (error); } /* For simplicity in the handlers */ adapter->que_mask = IXGBE_EIMS_ENABLE_MASK; return (0); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers * **********************************************************************/ static int ixgbe_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; int error, rid, vector = 0; for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { rid = vector + 1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register QUE handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "que %d", i); #endif que->msix = vector; adapter->que_mask |= (u64)(1 << que->msix); /* ** Bind the msix vector, and thus the ** ring to the corresponding cpu. */ if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, i); #ifndef IXGBE_LEGACY_TX TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); #endif TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } /* and Link */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate" " bus resource: Link interrupt [%d]\n", rid); return (ENXIO); } /* Set the link handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_msix_link, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; /* Tasklets for Link, SFP and Multispeed Fiber */ TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); #ifdef IXGBE_FDIR TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); #endif adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", device_get_nameunit(adapter->dev)); return (0); } /* * Setup Either MSI/X or MSI */ static int ixgbe_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int rid, want, queues, msgs; /* Override by tuneable */ if (ixgbe_enable_msix == 0) goto msi; /* First try MSI/X */ msgs = pci_msix_count(dev); if (msgs == 0) goto msi; rid = PCIR_BAR(MSIX_82598_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { rid += 4; /* 82599 maps in higher BAR */ adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); } if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } /* Figure out a reasonable auto config value */ queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; if (ixgbe_num_queues != 0) queues = ixgbe_num_queues; /* Set max queues to 8 when autoconfiguring */ else if ((ixgbe_num_queues == 0) && (queues > 8)) queues = 8; /* reflect correct sysctl value */ ixgbe_num_queues = queues; /* ** Want one vector (RX/TX pair) per queue ** plus an additional for Link. */ want = queues + 1; if (msgs >= want) msgs = want; else { device_printf(adapter->dev, "MSIX Configuration Problem, " "%d vectors but %d queues wanted!\n", msgs, want); goto msi; } if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", msgs); adapter->num_queues = queues; return (msgs); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rid, adapter->msix_mem); adapter->msix_mem = NULL; } msgs = 1; if (pci_alloc_msi(dev, &msgs) == 0) { device_printf(adapter->dev,"Using an MSI interrupt\n"); return (msgs); } device_printf(adapter->dev,"Using a Legacy interrupt\n"); return (0); } static int ixgbe_allocate_pci_resources(struct adapter *adapter) { int rid; device_t dev = adapter->dev; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev,"Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle; /* Legacy defaults */ adapter->num_queues = 1; adapter->hw.back = &adapter->osdep; /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ adapter->msix = ixgbe_setup_msix(adapter); return (0); } static void ixgbe_free_pci_resources(struct adapter * adapter) { struct ix_queue *que = adapter->queues; device_t dev = adapter->dev; int rid, memrid; if (adapter->hw.mac.type == ixgbe_mac_82598EB) memrid = PCIR_BAR(MSIX_82598_BAR); else memrid = PCIR_BAR(MSIX_82599_BAR); /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* ** Release all msix queue resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int ixgbe_setup_interface(device_t dev, struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp; INIT_DEBUGOUT("ixgbe_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); -#if __FreeBSD_version < 1000025 - ifp->if_baudrate = 1000000000; -#else - if_initbaudrate(ifp, IF_Gbps(10)); -#endif + ifp->if_baudrate = IF_Gbps(10); ifp->if_init = ixgbe_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixgbe_ioctl; #ifndef IXGBE_LEGACY_TX ifp->if_transmit = ixgbe_mq_start; ifp->if_qflush = ixgbe_qflush; #else ifp->if_start = ixgbe_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2; IFQ_SET_READY(&ifp->if_snd); #endif ether_ifattach(ifp, adapter->hw.mac.addr); adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_HWSTATS; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixgbe driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change, ixgbe_media_status); ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics); if (hw->device_id == IXGBE_DEV_ID_82598AT) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } static void ixgbe_config_link(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 autoneg, err = 0; bool sfp, negotiate; sfp = ixgbe_is_sfp(hw); if (sfp) { if (hw->phy.multispeed_fiber) { hw->mac.ops.setup_sfp(hw); ixgbe_enable_tx_laser(hw); taskqueue_enqueue(adapter->tq, &adapter->msf_task); } else taskqueue_enqueue(adapter->tq, &adapter->mod_task); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); if (err) goto out; autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) err = hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (err) goto out; if (hw->mac.ops.setup_link) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); } out: return; } /******************************************************************** * Manage DMA'able memory. *******************************************************************/ static void ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs->ds_addr; return; } static int ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, struct ixgbe_dma_alloc *dma, int mapflags) { device_t dev = adapter->dev; int r; r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &dma->dma_tag); if (r != 0) { device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " "error %u\n", r); goto fail_0; } r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, BUS_DMA_NOWAIT, &dma->dma_map); if (r != 0) { device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " "error %u\n", r); goto fail_1; } r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (r != 0) { device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " "error %u\n", r); goto fail_2; } dma->dma_size = size; return (0); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); fail_1: bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_map = NULL; dma->dma_tag = NULL; return (r); } static void ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int ixgbe_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que; struct tx_ring *txr; struct rx_ring *rxr; int rsize, tsize, error = IXGBE_SUCCESS; int txconf = 0, rxconf = 0; /* First allocate the top level queue structs */ if (!(adapter->queues = (struct ix_queue *) malloc(sizeof(struct ix_queue) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } /* First allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto tx_fail; } /* Next allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } /* For the ring itself */ tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; txr->num_desc = adapter->num_tx_desc; /* Initialize the TX side lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); /* Now allocate transmit buffers for the ring */ if (ixgbe_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #ifndef IXGBE_LEGACY_TX /* Allocate a buf ring */ txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, M_WAITOK, &txr->tx_mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up buf ring\n"); error = ENOMEM; goto err_tx_desc; } #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; /* Set up some basics */ rxr->adapter = adapter; rxr->me = i; rxr->num_desc = adapter->num_rx_desc; /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), rxr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (ixgbe_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } /* ** Finally set up the queue holding structs */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; que->adapter = adapter; que->txr = &adapter->tx_rings[i]; que->rxr = &adapter->rx_rings[i]; } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) ixgbe_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) ixgbe_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int ixgbe_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct ixgbe_tx_buf *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create( bus_get_dma_tag(adapter->dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IXGBE_TSO_SIZE, /* maxsize */ adapter->num_segs, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ ixgbe_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void ixgbe_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct ixgbe_tx_buf *txbuf; int i; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ /* Clear the old ring contents */ IXGBE_TX_LOCK(txr); #ifdef DEV_NETMAP /* * (under lock): if in netmap mode, do some consistency * checks and set slot to entry 0 of the netmap ring. */ slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < txr->num_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * Slots in the netmap ring (indexed by "si") are * kring->nkr_hwofs positions "ahead" wrt the * corresponding slot in the NIC ring. In some drivers * (not here) nkr_hwofs can be negative. Function * netmap_idx_n2k() handles wraparounds properly. */ if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si)); } #endif /* DEV_NETMAP */ /* Clear the EOP descriptor pointer */ txbuf->eop = NULL; } #ifdef IXGBE_FDIR /* Set the rate at which we sample packets */ if (adapter->hw.mac.type != ixgbe_mac_82598EB) txr->atr_sample = atr_sample_rate; #endif /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); IXGBE_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static int ixgbe_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) ixgbe_setup_transmit_ring(txr); return (0); } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void ixgbe_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct ixgbe_hw *hw = &adapter->hw; /* Setup the Base and Length of the Tx Descriptor Ring */ for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 tdba = txr->txdma.dma_paddr; u32 txctrl; IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i), adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0); IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0); /* Setup Transmit Descriptor Cmd Settings */ txr->txd_cmd = IXGBE_TXD_CMD_IFCS; txr->queue_status = IXGBE_QUEUE_IDLE; /* Set the processing limit */ txr->process_limit = ixgbe_tx_process_limit; /* Disable Head Writeback */ switch (hw->mac.type) { case ixgbe_mac_82598EB: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: default: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); break; } txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; switch (hw->mac.type) { case ixgbe_mac_82598EB: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: default: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl); break; } } if (hw->mac.type != ixgbe_mac_82598EB) { u32 dmatxctl, rttdcs; dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); dmatxctl |= IXGBE_DMATXCTL_TE; IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); /* Disable arbiter to set MTQC */ rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); rttdcs |= IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB); rttdcs &= ~IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } return; } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void ixgbe_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { IXGBE_TX_LOCK(txr); ixgbe_free_transmit_buffers(txr); ixgbe_dma_free(adapter, &txr->txdma); IXGBE_TX_UNLOCK(txr); IXGBE_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void ixgbe_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct ixgbe_tx_buf *tx_buffer; int i; INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; tx_buffer = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; if (tx_buffer->map != NULL) { bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } else if (tx_buffer->map != NULL) { bus_dmamap_unload(txr->txtag, tx_buffer->map); bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } #ifdef IXGBE_LEGACY_TX if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct ixgbe_adv_tx_context_desc *TXD; struct ether_vlan_header *eh; struct ip *ip; struct ip6_hdr *ip6; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; int ehdrlen, ip_hlen = 0; u16 etype; u8 ipproto = 0; int offload = TRUE; int ctxd = txr->next_avail_desc; u16 vtag = 0; /* First check if TSO is to be used */ if (mp->m_pkthdr.csum_flags & CSUM_TSO) return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); } else if (offload == FALSE) /* ... no offload to do */ return (0); /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* Set the ether header length */ vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; ipproto = ip->ip_p; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); /* XXX-BZ this will go badly in case of ext hdrs. */ ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; break; default: offload = FALSE; break; } vlan_macip_lens |= ip_hlen; type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; switch (ipproto) { case IPPROTO_TCP: if (mp->m_pkthdr.csum_flags & CSUM_TCP) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; break; case IPPROTO_UDP: if (mp->m_pkthdr.csum_flags & CSUM_UDP) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; break; #if __FreeBSD_version >= 800000 case IPPROTO_SCTP: if (mp->m_pkthdr.csum_flags & CSUM_SCTP) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; break; #endif default: offload = FALSE; break; } if (offload) /* For the TX descriptor setup */ *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(0); /* We've consumed the first desc, adjust counters */ if (++ctxd == txr->num_desc) ctxd = 0; txr->next_avail_desc = ctxd; --txr->tx_avail; return (0); } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct ixgbe_adv_tx_context_desc *TXD; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0, paylen; u16 vtag = 0, eh_type; int ctxd, ehdrlen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; #ifdef INET6 struct ip6_hdr *ip6; #endif #ifdef INET struct ip *ip; #endif struct tcphdr *th; /* * Determine where frame payload starts. * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; eh_type = eh->evl_proto; } else { ehdrlen = ETHER_HDR_LEN; eh_type = eh->evl_encap_proto; } switch (ntohs(eh_type)) { #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); /* XXX-BZ For now we do not pretend to support ext. hdrs. */ if (ip6->ip6_nxt != IPPROTO_TCP) return (ENXIO); ip_hlen = sizeof(struct ip6_hdr); ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; break; #endif #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return (ENXIO); ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; break; #endif default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(eh_type)); break; } ctxd = txr->next_avail_desc; TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; tcp_hlen = th->th_off << 2; /* This is used in the transmit desc in encap */ paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); if (++ctxd == txr->num_desc) ctxd = 0; txr->tx_avail--; txr->next_avail_desc = ctxd; *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; ++txr->tso_tx; return (0); } #ifdef IXGBE_FDIR /* ** This routine parses packet headers so that Flow ** Director can make a hashed filter table entry ** allowing traffic flows to be identified and kept ** on the same cpu. This would be a performance ** hit, but we only do it at IXGBE_FDIR_RATE of ** packets. */ static void ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) { struct adapter *adapter = txr->adapter; struct ix_queue *que; struct ip *ip; struct tcphdr *th; struct udphdr *uh; struct ether_vlan_header *eh; union ixgbe_atr_hash_dword input = {.dword = 0}; union ixgbe_atr_hash_dword common = {.dword = 0}; int ehdrlen, ip_hlen; u16 etype; eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; etype = eh->evl_proto; } else { ehdrlen = ETHER_HDR_LEN; etype = eh->evl_encap_proto; } /* Only handling IPv4 */ if (etype != htons(ETHERTYPE_IP)) return; ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; /* check if we're UDP or TCP */ switch (ip->ip_p) { case IPPROTO_TCP: th = (struct tcphdr *)((caddr_t)ip + ip_hlen); /* src and dst are inverted */ common.port.dst ^= th->th_sport; common.port.src ^= th->th_dport; input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; break; case IPPROTO_UDP: uh = (struct udphdr *)((caddr_t)ip + ip_hlen); /* src and dst are inverted */ common.port.dst ^= uh->uh_sport; common.port.src ^= uh->uh_dport; input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; break; default: return; } input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); if (mp->m_pkthdr.ether_vtag) common.flex_bytes ^= htons(ETHERTYPE_VLAN); else common.flex_bytes ^= etype; common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; que = &adapter->queues[txr->me]; /* ** This assumes the Rx queue and Tx ** queue are bound to the same CPU */ ixgbe_fdir_add_signature_filter_82599(&adapter->hw, input, common, que->msix); } #endif /* IXGBE_FDIR */ /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void ixgbe_txeof(struct tx_ring *txr) { #ifdef DEV_NETMAP struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; #endif u32 work, processed = 0; u16 limit = txr->process_limit; struct ixgbe_tx_buf *buf; union ixgbe_adv_tx_desc *txd; mtx_assert(&txr->tx_mtx, MA_OWNED); #ifdef DEV_NETMAP if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = &na->tx_rings[txr->me]; txd = txr->tx_base; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); /* * In netmap mode, all the work is done in the context * of the client thread. Interrupt handlers only wake up * clients, which may be sleeping on individual rings * or on a global resource for all rings. * To implement tx interrupt mitigation, we wake up the client * thread roughly every half ring, even if the NIC interrupts * more frequently. This is implemented as follows: * - ixgbe_txsync() sets kring->nr_kflags with the index of * the slot that should wake up the thread (nkr_num_slots * means the user thread should not be woken up); * - the driver ignores tx interrupts unless netmap_mitigate=0 * or the slot has the DD bit set. */ if (!netmap_mitigate || (kring->nr_kflags < kring->nkr_num_slots && txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { netmap_tx_irq(ifp, txr->me); } return; } #endif /* DEV_NETMAP */ if (txr->tx_avail == txr->num_desc) { txr->queue_status = IXGBE_QUEUE_IDLE; return; } /* Get work starting point */ work = txr->next_to_clean; buf = &txr->tx_buffers[work]; txd = &txr->tx_base[work]; work -= txr->num_desc; /* The distance to ring end */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); do { union ixgbe_adv_tx_desc *eop= buf->eop; if (eop == NULL) /* No work */ break; if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) break; /* I/O not complete */ if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; buf->map = NULL; } buf->eop = NULL; ++txr->tx_avail; /* We clean the range if multi segment */ while (txd != eop) { ++txd; ++buf; ++work; /* wrap the ring? */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; buf->map = NULL; } ++txr->tx_avail; buf->eop = NULL; } ++txr->packets; ++processed; txr->watchdog_time = ticks; /* Try the next packet */ ++txd; ++buf; ++work; /* reset with a wrap */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } prefetch(txd); } while (__predict_true(--limit)); bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); work += txr->num_desc; txr->next_to_clean = work; /* ** Watchdog calculation, we know there's ** work outstanding or the first return ** would have been taken, so none processed ** for too long indicates a hang. */ if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG)) txr->queue_status = IXGBE_QUEUE_HUNG; if (txr->tx_avail == txr->num_desc) txr->queue_status = IXGBE_QUEUE_IDLE; return; } /********************************************************************* * * Refresh mbuf buffers for RX descriptor rings * - now keeps its own state so discards due to resource * exhaustion are unnecessary, if an mbuf cannot be obtained * it just returns, keeping its placeholder, thus it can simply * be recalled to try again. * **********************************************************************/ static void ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; bus_dma_segment_t seg[1]; struct ixgbe_rx_buf *rxbuf; struct mbuf *mp; int i, j, nsegs, error; bool refreshed = FALSE; i = j = rxr->next_to_refresh; /* Control the loop with one beyond */ if (++j == rxr->num_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->buf == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rxr->mbuf_sz); if (mp == NULL) goto update; if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) m_adj(mp, ETHER_ALIGN); } else mp = rxbuf->buf; mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; /* If we're dealing with an mbuf that was copied rather * than replaced, there's no need to go through busdma. */ if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: payload dmamap load" " failure - %d\n", error); m_free(mp); rxbuf->buf = NULL; goto update; } rxbuf->buf = mp; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); rxbuf->addr = rxr->rx_base[i].read.pkt_addr = htole64(seg[0].ds_addr); } else { rxr->rx_base[i].read.pkt_addr = rxbuf->addr; rxbuf->flags &= ~IXGBE_RX_COPY; } refreshed = TRUE; /* Next is precalculated */ i = j; rxr->next_to_refresh = i; if (++j == rxr->num_desc) j = 0; } update: if (refreshed) /* Update hardware tail index */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int ixgbe_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct ixgbe_rx_buf *rxbuf; int i, bsize, error; bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; if (!(rxr->rx_buffers = (struct ixgbe_rx_buf *) malloc(bsize, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); error = ENOMEM; goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM16BYTES, /* maxsize */ 1, /* nsegments */ MJUM16BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->ptag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } for (i = 0; i < rxr->num_desc; i++, rxbuf++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->ptag, BUS_DMA_NOWAIT, &rxbuf->pmap); if (error) { device_printf(dev, "Unable to create RX dma map\n"); goto fail; } } return (0); fail: /* Frees all, but can handle partial completion */ ixgbe_free_receive_structures(adapter); return (error); } /* ** Used to detect a descriptor that has ** been merged by Hardware RSC. */ static inline u32 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) { return (le32toh(rx->wb.lower.lo_dword.data) & IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; } /********************************************************************* * * Initialize Hardware RSC (LRO) feature on 82599 * for an RX ring, this is toggled by the LRO capability * even though it is transparent to the stack. * * NOTE: since this HW feature only works with IPV4 and * our testing has shown soft LRO to be as effective * I have decided to disable this by default. * **********************************************************************/ static void ixgbe_setup_hw_rsc(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct ixgbe_hw *hw = &adapter->hw; u32 rscctrl, rdrxctl; /* If turning LRO/RSC off we need to disable it */ if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); rscctrl &= ~IXGBE_RSCCTL_RSCEN; return; } rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; #ifdef DEV_NETMAP /* crcstrip is optional in netmap */ if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) #endif /* DEV_NETMAP */ rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; rdrxctl |= IXGBE_RDRXCTL_RSCACKC; IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); rscctrl |= IXGBE_RSCCTL_RSCEN; /* ** Limit the total number of descriptors that ** can be combined, so it does not exceed 64K */ if (rxr->mbuf_sz == MCLBYTES) rscctrl |= IXGBE_RSCCTL_MAXDESC_16; else if (rxr->mbuf_sz == MJUMPAGESIZE) rscctrl |= IXGBE_RSCCTL_MAXDESC_8; else if (rxr->mbuf_sz == MJUM9BYTES) rscctrl |= IXGBE_RSCCTL_MAXDESC_4; else /* Using 16K cluster */ rscctrl |= IXGBE_RSCCTL_MAXDESC_1; IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); /* Enable TCP header recognition */ IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR)); /* Disable RSC for ACK packets */ IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); rxr->hw_rsc = TRUE; } static void ixgbe_free_receive_ring(struct rx_ring *rxr) { struct ixgbe_rx_buf *rxbuf; int i; for (i = 0; i < rxr->num_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->buf != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->buf->m_flags |= M_PKTHDR; m_freem(rxbuf->buf); rxbuf->buf = NULL; rxbuf->flags = 0; } } } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int ixgbe_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; struct ifnet *ifp; device_t dev; struct ixgbe_rx_buf *rxbuf; bus_dma_segment_t seg[1]; struct lro_ctrl *lro = &rxr->lro; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(rxr->adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ adapter = rxr->adapter; ifp = adapter->ifp; dev = adapter->dev; /* Clear the ring contents */ IXGBE_RX_LOCK(rxr); #ifdef DEV_NETMAP /* same as in ixgbe_setup_transmit_ring() */ slot = netmap_reset(na, NR_RX, rxr->me, 0); #endif /* DEV_NETMAP */ rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); /* Cache the size */ rxr->mbuf_sz = adapter->rx_mbuf_sz; /* Free current RX buffer structs and their mbufs */ ixgbe_free_receive_ring(rxr); /* Now replenish the mbufs */ for (int j = 0; j != rxr->num_desc; ++j) { struct mbuf *mp; rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP /* * In netmap mode, fill the map and set the buffer * address in the NIC ring, considering the offset * between the netmap and NIC rings (see comment in * ixgbe_setup_transmit_ring() ). No need to allocate * an mbuf, so end the block with a continue; */ if (slot) { int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(slot + sj, &paddr); netmap_load_map(rxr->ptag, rxbuf->pmap, addr); /* Update descriptor and the cached value */ rxr->rx_base[j].read.pkt_addr = htole64(paddr); rxbuf->addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ rxbuf->flags = 0; rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->buf == NULL) { error = ENOBUFS; goto fail; } mp = rxbuf->buf; mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); /* Update the descriptor and the cached value */ rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); rxbuf->addr = htole64(seg[0].ds_addr); } /* Setup our descriptor indices */ rxr->next_to_check = 0; rxr->next_to_refresh = 0; rxr->lro_enabled = FALSE; rxr->rx_copies = 0; rxr->rx_bytes = 0; rxr->discard = FALSE; rxr->vtag_strip = FALSE; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* ** Now set up the LRO interface: */ if (ixgbe_rsc_enable) ixgbe_setup_hw_rsc(rxr); else if (ifp->if_capenable & IFCAP_LRO) { int err = tcp_lro_init(lro); if (err) { device_printf(dev, "LRO Initialization failed!\n"); goto fail; } INIT_DEBUGOUT("RX Soft LRO Initialized\n"); rxr->lro_enabled = TRUE; lro->ifp = adapter->ifp; } IXGBE_RX_UNLOCK(rxr); return (0); fail: ixgbe_free_receive_ring(rxr); IXGBE_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int ixgbe_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int j; for (j = 0; j < adapter->num_queues; j++, rxr++) if (ixgbe_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'j' failed, so its the terminus. */ for (int i = 0; i < j; ++i) { rxr = &adapter->rx_rings[i]; ixgbe_free_receive_ring(rxr); } return (ENOBUFS); } /********************************************************************* * * Setup receive registers and features. * **********************************************************************/ #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 #define BSIZEPKT_ROUNDUP ((1<rx_rings; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 bufsz, rxctrl, fctrl, srrctl, rxcsum; u32 reta, mrqc = 0, hlreg, random[10]; /* * Make sure receives are disabled while * setting up the descriptor ring */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN); /* Enable broadcasts */ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); fctrl |= IXGBE_FCTRL_BAM; fctrl |= IXGBE_FCTRL_DPF; fctrl |= IXGBE_FCTRL_PMCF; IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); /* Set for Jumbo Frames? */ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); if (ifp->if_mtu > ETHERMTU) hlreg |= IXGBE_HLREG0_JUMBOEN; else hlreg &= ~IXGBE_HLREG0_JUMBOEN; #ifdef DEV_NETMAP /* crcstrip is conditional in netmap (in RDRXCTL too ?) */ if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; else hlreg |= IXGBE_HLREG0_RXCRCSTRP; #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); bufsz = (adapter->rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 rdba = rxr->rxdma.dma_paddr; /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i), adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); /* Set up the SRRCTL register */ srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; srrctl |= bufsz; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0); IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0); /* Set the processing limit */ rxr->process_limit = ixgbe_rx_process_limit; } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { u32 psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); } rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); /* Setup RSS */ if (adapter->num_queues > 1) { int i, j; reta = 0; /* set up random bits */ arc4rand(&random, sizeof(random), 0); /* Set up the redirection table */ for (i = 0, j = 0; i < 128; i++, j++) { if (j == adapter->num_queues) j = 0; reta = (reta << 8) | (j * 0x11); if ((i & 3) == 3) IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); } /* Now fill our hash function seeds */ for (int i = 0; i < 10; i++) IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]); /* Perform hash on these packet types */ mrqc = IXGBE_MRQC_RSSEN | IXGBE_MRQC_RSS_FIELD_IPV4 | IXGBE_MRQC_RSS_FIELD_IPV4_TCP | IXGBE_MRQC_RSS_FIELD_IPV4_UDP | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP | IXGBE_MRQC_RSS_FIELD_IPV6_EX | IXGBE_MRQC_RSS_FIELD_IPV6 | IXGBE_MRQC_RSS_FIELD_IPV6_TCP | IXGBE_MRQC_RSS_FIELD_IPV6_UDP | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); /* RSS and RX IPP Checksum are mutually exclusive */ rxcsum |= IXGBE_RXCSUM_PCSD; } if (ifp->if_capenable & IFCAP_RXCSUM) rxcsum |= IXGBE_RXCSUM_PCSD; if (!(rxcsum & IXGBE_RXCSUM_PCSD)) rxcsum |= IXGBE_RXCSUM_IPPCSE; IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); return; } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void ixgbe_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); for (int i = 0; i < adapter->num_queues; i++, rxr++) { struct lro_ctrl *lro = &rxr->lro; ixgbe_free_receive_buffers(rxr); /* Free LRO memory */ tcp_lro_free(lro); /* Free the ring memory as well */ ixgbe_dma_free(adapter, &rxr->rxdma); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void ixgbe_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct ixgbe_rx_buf *rxbuf; INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); /* Cleanup any existing buffers */ if (rxr->rx_buffers != NULL) { for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->buf != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->buf->m_flags |= M_PKTHDR; m_freem(rxbuf->buf); } rxbuf->buf = NULL; if (rxbuf->pmap != NULL) { bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); rxbuf->pmap = NULL; } } if (rxr->rx_buffers != NULL) { free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; } } if (rxr->ptag != NULL) { bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; } return; } static __inline void ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) { /* * ATM LRO is only for IP/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in * ethernet header. In case of IPv6 we do not yet support ext. hdrs. */ if (rxr->lro_enabled && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* * Send to the stack if: ** - LRO not enabled, or ** - no LRO resources, or ** - lro enqueue fails */ if (rxr->lro.lro_cnt != 0) if (tcp_lro_rx(&rxr->lro, m, 0) == 0) return; } IXGBE_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, m); IXGBE_RX_LOCK(rxr); } static __inline void ixgbe_rx_discard(struct rx_ring *rxr, int i) { struct ixgbe_rx_buf *rbuf; rbuf = &rxr->rx_buffers[i]; if (rbuf->fmp != NULL) {/* Partial chain ? */ rbuf->fmp->m_flags |= M_PKTHDR; m_freem(rbuf->fmp); rbuf->fmp = NULL; } /* ** With advanced descriptors the writeback ** clobbers the buffer addrs, so its easier ** to just free the existing mbufs and take ** the normal refresh path to get new buffers ** and mapping. */ if (rbuf->buf) { m_free(rbuf->buf); rbuf->buf = NULL; } rbuf->flags = 0; return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * Return TRUE for more work, FALSE for all clean. *********************************************************************/ static bool ixgbe_rxeof(struct ix_queue *que) { struct adapter *adapter = que->adapter; struct rx_ring *rxr = que->rxr; struct ifnet *ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; struct lro_entry *queued; int i, nextp, processed = 0; u32 staterr = 0; u16 count = rxr->process_limit; union ixgbe_adv_rx_desc *cur; struct ixgbe_rx_buf *rbuf, *nbuf; IXGBE_RX_LOCK(rxr); #ifdef DEV_NETMAP /* Same as the txeof routine: wakeup clients on intr. */ if (netmap_rx_irq(ifp, rxr->me, &processed)) { IXGBE_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mp; u32 rsc, ptype; u16 len; u16 vtag = 0; bool eop; /* Sync the ring. */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); cur = &rxr->rx_base[i]; staterr = le32toh(cur->wb.upper.status_error); if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; count--; sendmp = NULL; nbuf = NULL; rsc = 0; cur->wb.upper.status_error = 0; rbuf = &rxr->rx_buffers[i]; mp = rbuf->buf; len = le16toh(cur->wb.upper.length); ptype = le32toh(cur->wb.lower.lo_dword.data) & IXGBE_RXDADV_PKTTYPE_MASK; eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); /* Make sure bad packets are discarded */ if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) || (rxr->discard)) { rxr->rx_discarded++; if (eop) rxr->discard = FALSE; else rxr->discard = TRUE; ixgbe_rx_discard(rxr, i); goto next_desc; } /* ** On 82599 which supports a hardware ** LRO (called HW RSC), packets need ** not be fragmented across sequential ** descriptors, rather the next descriptor ** is indicated in bits of the descriptor. ** This also means that we might proceses ** more than one packet at a time, something ** that has never been true before, it ** required eliminating global chain pointers ** in favor of what we are doing here. -jfv */ if (!eop) { /* ** Figure out the next descriptor ** of this frame. */ if (rxr->hw_rsc == TRUE) { rsc = ixgbe_rsc_count(cur); rxr->rsc_num += (rsc - 1); } if (rsc) { /* Get hardware index */ nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >> IXGBE_RXDADV_NEXTP_SHIFT); } else { /* Just sequential */ nextp = i + 1; if (nextp == adapter->num_rx_desc) nextp = 0; } nbuf = &rxr->rx_buffers[nextp]; prefetch(nbuf); } /* ** Rather than using the fmp/lmp global pointers ** we now keep the head of a packet chain in the ** buffer struct and pass this along from one ** descriptor to the next, until we get EOP. */ mp->m_len = len; /* ** See if there is a stored head ** that determines what we are */ sendmp = rbuf->fmp; if (sendmp != NULL) { /* secondary frag */ rbuf->buf = rbuf->fmp = NULL; mp->m_flags &= ~M_PKTHDR; sendmp->m_pkthdr.len += mp->m_len; } else { /* * Optimize. This might be a small packet, * maybe just a TCP ACK. Do a fast copy that * is cache aligned into a new mbuf, and * leave the old mbuf+cluster for re-use. */ if (eop && len <= IXGBE_RX_COPY_LEN) { sendmp = m_gethdr(M_NOWAIT, MT_DATA); if (sendmp != NULL) { sendmp->m_data += IXGBE_RX_COPY_ALIGN; ixgbe_bcopy(mp->m_data, sendmp->m_data, len); sendmp->m_len = len; rxr->rx_copies++; rbuf->flags |= IXGBE_RX_COPY; } } if (sendmp == NULL) { rbuf->buf = rbuf->fmp = NULL; sendmp = mp; } /* first desc of a non-ps chain */ sendmp->m_flags |= M_PKTHDR; sendmp->m_pkthdr.len = mp->m_len; } ++processed; /* Pass the head pointer on */ if (eop == 0) { nbuf->fmp = sendmp; sendmp = NULL; mp->m_next = nbuf->buf; } else { /* Sending this frame */ sendmp->m_pkthdr.rcvif = ifp; rxr->rx_packets++; /* capture data for AIM */ rxr->bytes += sendmp->m_pkthdr.len; rxr->rx_bytes += sendmp->m_pkthdr.len; /* Process vlan info */ if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP)) vtag = le16toh(cur->wb.upper.vlan); if (vtag) { sendmp->m_pkthdr.ether_vtag = vtag; sendmp->m_flags |= M_VLANTAG; } if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) ixgbe_rx_checksum(staterr, sendmp, ptype); #if __FreeBSD_version >= 800000 sendmp->m_pkthdr.flowid = que->msix; sendmp->m_flags |= M_FLOWID; #endif } next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == rxr->num_desc) i = 0; /* Now send to the stack or do LRO */ if (sendmp != NULL) { rxr->next_to_check = i; ixgbe_rx_input(rxr, ifp, sendmp, ptype); i = rxr->next_to_check; } /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { ixgbe_refresh_mbufs(rxr, i); processed = 0; } } /* Refresh any remaining buf structs */ if (ixgbe_rx_unrefreshed(rxr)) ixgbe_refresh_mbufs(rxr, i); rxr->next_to_check = i; /* * Flush any outstanding LRO work */ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } IXGBE_RX_UNLOCK(rxr); /* ** Still have cleaning to do? */ if ((staterr & IXGBE_RXD_STAT_DD) != 0) return (TRUE); else return (FALSE); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) { u16 status = (u16) staterr; u8 errors = (u8) (staterr >> 24); bool sctp = FALSE; if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) sctp = TRUE; if (status & IXGBE_RXD_STAT_IPCS) { if (!(errors & IXGBE_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else mp->m_pkthdr.csum_flags = 0; } if (status & IXGBE_RXD_STAT_L4CS) { u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) type = CSUM_SCTP_VALID; #endif if (!(errors & IXGBE_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= type; if (!sctp) mp->m_pkthdr.csum_data = htons(0xffff); } } return; } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ static void ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; ixgbe_setup_vlan_hw_support(adapter); IXGBE_CORE_UNLOCK(adapter); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ static void ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ ixgbe_setup_vlan_hw_support(adapter); IXGBE_CORE_UNLOCK(adapter); } static void ixgbe_setup_vlan_hw_support(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; u32 ctrl; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* Setup the queues for vlans */ for (int i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; /* On 82599 the VLAN enable is per/queue in RXDCTL */ if (hw->mac.type != ixgbe_mac_82598EB) { ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl); } rxr->vtag_strip = TRUE; } if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < IXGBE_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), adapter->shadow_vfta[i]); ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); /* Enable the Filter Table if enabled */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { ctrl &= ~IXGBE_VLNCTRL_CFIEN; ctrl |= IXGBE_VLNCTRL_VFE; } if (hw->mac.type == ixgbe_mac_82598EB) ctrl |= IXGBE_VLNCTRL_VME; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); } static void ixgbe_enable_intr(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ix_queue *que = adapter->queues; u32 mask, fwsm; mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); /* Enable Fan Failure detection */ if (hw->device_id == IXGBE_DEV_ID_82598AT) mask |= IXGBE_EIMS_GPI_SDP1; switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: mask |= IXGBE_EIMS_ECC; mask |= IXGBE_EIMS_GPI_SDP0; mask |= IXGBE_EIMS_GPI_SDP1; mask |= IXGBE_EIMS_GPI_SDP2; #ifdef IXGBE_FDIR mask |= IXGBE_EIMS_FLOW_DIR; #endif break; case ixgbe_mac_X540: mask |= IXGBE_EIMS_ECC; /* Detect if Thermal Sensor is enabled */ fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); if (fwsm & IXGBE_FWSM_TS_ENABLED) mask |= IXGBE_EIMS_TS; #ifdef IXGBE_FDIR mask |= IXGBE_EIMS_FLOW_DIR; #endif /* falls through */ default: break; } IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); /* With RSS we use auto clear */ if (adapter->msix_mem) { mask = IXGBE_EIMS_ENABLE_MASK; /* Don't autoclear Link */ mask &= ~IXGBE_EIMS_OTHER; mask &= ~IXGBE_EIMS_LSC; IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); } /* ** Now enable all queues, this is done separately to ** allow for handling the extended (beyond 32) MSIX ** vectors that can be used by 82599 */ for (int i = 0; i < adapter->num_queues; i++, que++) ixgbe_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); return; } static void ixgbe_disable_intr(struct adapter *adapter) { if (adapter->msix_mem) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); if (adapter->hw.mac.type == ixgbe_mac_82598EB) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); } else { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); } IXGBE_WRITE_FLUSH(&adapter->hw); return; } u16 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg) { u16 value; value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev, reg, 2); return (value); } void ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value) { pci_write_config(((struct ixgbe_osdep *)hw->back)->dev, reg, value, 2); return; } /* ** Get the width and transaction speed of ** the slot this adapter is plugged into. */ static void ixgbe_get_slot_info(struct ixgbe_hw *hw) { device_t dev = ((struct ixgbe_osdep *)hw->back)->dev; struct ixgbe_mac_info *mac = &hw->mac; u16 link; u32 offset; /* For most devices simply call the shared code routine */ if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) { ixgbe_get_bus_info(hw); goto display; } /* ** For the Quad port adapter we need to parse back ** up the PCI tree to find the speed of the expansion ** slot into which this adapter is plugged. A bit more work. */ dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "parent pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "slot pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif /* Now get the PCI Express Capabilities offset */ pci_find_cap(dev, PCIY_EXPRESS, &offset); /* ...and read the Link Status Register */ link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); switch (link & IXGBE_PCI_LINK_WIDTH) { case IXGBE_PCI_LINK_WIDTH_1: hw->bus.width = ixgbe_bus_width_pcie_x1; break; case IXGBE_PCI_LINK_WIDTH_2: hw->bus.width = ixgbe_bus_width_pcie_x2; break; case IXGBE_PCI_LINK_WIDTH_4: hw->bus.width = ixgbe_bus_width_pcie_x4; break; case IXGBE_PCI_LINK_WIDTH_8: hw->bus.width = ixgbe_bus_width_pcie_x8; break; default: hw->bus.width = ixgbe_bus_width_unknown; break; } switch (link & IXGBE_PCI_LINK_SPEED) { case IXGBE_PCI_LINK_SPEED_2500: hw->bus.speed = ixgbe_bus_speed_2500; break; case IXGBE_PCI_LINK_SPEED_5000: hw->bus.speed = ixgbe_bus_speed_5000; break; case IXGBE_PCI_LINK_SPEED_8000: hw->bus.speed = ixgbe_bus_speed_8000; break; default: hw->bus.speed = ixgbe_bus_speed_unknown; break; } mac->ops.set_lan_id(hw); display: device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && (hw->bus.speed == ixgbe_bus_speed_2500))) { device_printf(dev, "PCI-Express bandwidth available" " for this card\n is not sufficient for" " optimal performance.\n"); device_printf(dev, "For optimal performance a x8 " "PCIE, or x4 PCIE Gen2 slot is required.\n"); } if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && (hw->bus.speed < ixgbe_bus_speed_8000))) { device_printf(dev, "PCI-Express bandwidth available" " for this card\n is not sufficient for" " optimal performance.\n"); device_printf(dev, "For optimal performance a x8 " "PCIE Gen3 slot is required.\n"); } return; } /* ** Setup the correct IVAR register for a particular MSIX interrupt ** (yes this is all very magic and confusing :) ** - entry is the register array entry ** - vector is the MSIX vector for this queue ** - type is RX/TX/MISC */ static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; switch (hw->mac.type) { case ixgbe_mac_82598EB: if (type == -1) entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; else entry += (type * 64); index = (entry >> 2) & 0x1F; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); ivar &= ~(0xFF << (8 * (entry & 0x3))); ivar |= (vector << (8 * (entry & 0x3))); IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: if (type == -1) { /* MISC IVAR */ index = (entry & 1) * 8; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); } default: break; } } static void ixgbe_configure_ivars(struct adapter *adapter) { struct ix_queue *que = adapter->queues; u32 newitr; if (ixgbe_max_interrupt_rate > 0) newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; else newitr = 0; for (int i = 0; i < adapter->num_queues; i++, que++) { /* First the RX queue entry */ ixgbe_set_ivar(adapter, i, que->msix, 0); /* ... and the TX */ ixgbe_set_ivar(adapter, i, que->msix, 1); /* Set an Initial EITR value */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), newitr); } /* For the Link interrupt */ ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1); } /* ** ixgbe_sfp_probe - called in the local timer to ** determine if a port had optics inserted. */ static bool ixgbe_sfp_probe(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; bool result = FALSE; if ((hw->phy.type == ixgbe_phy_nl) && (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { s32 ret = hw->phy.ops.identify_sfp(hw); if (ret) goto out; ret = hw->phy.ops.reset(hw); if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev,"Unsupported SFP+ module detected!"); printf(" Reload driver with supported module.\n"); adapter->sfp_probe = FALSE; goto out; } else device_printf(dev,"SFP+ module detected!\n"); /* We now have supported optics */ adapter->sfp_probe = FALSE; /* Set the optics type so system reports correctly */ ixgbe_setup_optics(adapter); result = TRUE; } out: return (result); } /* ** Tasklet handler for MSIX Link interrupts ** - do outside interrupt since it might sleep */ static void ixgbe_handle_link(void *context, int pending) { struct adapter *adapter = context; ixgbe_check_link(&adapter->hw, &adapter->link_speed, &adapter->link_up, 0); ixgbe_update_link_status(adapter); } /* ** Tasklet for handling SFP module interrupts */ static void ixgbe_handle_mod(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; u32 err; err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); return; } err = hw->mac.ops.setup_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Setup failure - unsupported SFP+ module type.\n"); return; } taskqueue_enqueue(adapter->tq, &adapter->msf_task); return; } /* ** Tasklet for handling MSF (multispeed fiber) interrupts */ static void ixgbe_handle_msf(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; u32 autoneg; bool negotiate; autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (hw->mac.ops.setup_link) hw->mac.ops.setup_link(hw, autoneg, TRUE); return; } #ifdef IXGBE_FDIR /* ** Tasklet for reinitializing the Flow Director filter table */ static void ixgbe_reinit_fdir(void *context, int pending) { struct adapter *adapter = context; struct ifnet *ifp = adapter->ifp; if (adapter->fdir_reinit != 1) /* Shouldn't happen */ return; ixgbe_reinit_fdir_tables_82599(&adapter->hw); adapter->fdir_reinit = 0; /* re-enable flow director interrupts */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); /* Restart the interface */ ifp->if_drv_flags |= IFF_DRV_RUNNING; return; } #endif /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void ixgbe_update_stats_counters(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ixgbe_hw *hw = &adapter->hw; u32 missed_rx = 0, bprc, lxon, lxoff, total; u64 total_missed_rx = 0; adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); /* ** Note: these are for the 8 possible traffic classes, ** which in current implementation is unused, ** therefore only 0 should read real data. */ for (int i = 0; i < 8; i++) { u32 mp; mp = IXGBE_READ_REG(hw, IXGBE_MPC(i)); /* missed_rx tallies misses for the gprc workaround */ missed_rx += mp; /* global total per queue */ adapter->stats.mpc[i] += mp; /* Running comprehensive total for stats display */ total_missed_rx += adapter->stats.mpc[i]; if (hw->mac.type == ixgbe_mac_82598EB) { adapter->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i)); adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i)); adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i)); adapter->stats.pxonrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); } else adapter->stats.pxonrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); adapter->stats.pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); adapter->stats.pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); adapter->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); adapter->stats.pxon2offc[i] += IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i)); } for (int i = 0; i < 16; i++) { adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); } adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); /* Hardware workaround, gprc counts missed packets */ adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); adapter->stats.gprc -= missed_rx; if (hw->mac.type != ixgbe_mac_82598EB) { adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) + ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); } else { adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); /* 82598 only has a counter in the high register */ adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH); } /* * Workaround: mprc hardware is incorrectly counting * broadcasts, so for now we subtract those. */ bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); adapter->stats.bprc += bprc; adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); if (hw->mac.type == ixgbe_mac_82598EB) adapter->stats.mprc -= bprc; adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); adapter->stats.lxontxc += lxon; lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); adapter->stats.lxofftxc += lxoff; total = lxon + lxoff; adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); adapter->stats.gptc -= total; adapter->stats.mptc -= total; adapter->stats.ptc64 -= total; adapter->stats.gotc -= total * ETHER_MIN_LEN; adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC); adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC); adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC); adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC); adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR); adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT); adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC); adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); /* Only read FCOE on 82599 */ if (hw->mac.type != ixgbe_mac_82598EB) { adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); } /* Fill out the OS statistics structure */ ifp->if_ipackets = adapter->stats.gprc; ifp->if_opackets = adapter->stats.gptc; ifp->if_ibytes = adapter->stats.gorc; ifp->if_obytes = adapter->stats.gotc; ifp->if_imcasts = adapter->stats.mprc; ifp->if_omcasts = adapter->stats.mptc; ifp->if_collisions = 0; /* Rx Errors */ ifp->if_iqdrops = total_missed_rx; ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec; } /** ixgbe_sysctl_tdh_handler - Handler function * Retrieves the TDH value from the hardware */ static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS) { int error; struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); if (!txr) return 0; unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } /** ixgbe_sysctl_tdt_handler - Handler function * Retrieves the TDT value from the hardware */ static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS) { int error; struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); if (!txr) return 0; unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } /** ixgbe_sysctl_rdh_handler - Handler function * Retrieves the RDH value from the hardware */ static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS) { int error; struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); if (!rxr) return 0; unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } /** ixgbe_sysctl_rdt_handler - Handler function * Retrieves the RDT value from the hardware */ static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) { int error; struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); if (!rxr) return 0; unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { int error; struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1); unsigned int reg, usec, rate; reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); usec = ((reg & 0x0FF8) >> 3); if (usec > 0) rate = 500000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; reg &= ~0xfff; /* default, no limitation */ ixgbe_max_interrupt_rate = 0; if (rate > 0 && rate < 500000) { if (rate < 1000) rate = 1000; ixgbe_max_interrupt_rate = rate; reg |= ((4000000/rate) & 0xff8 ); } IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); return 0; } /* * Add sysctl variables, one per statistic, to the system. */ static void ixgbe_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbe_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "m_defrag() failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); for (int i = 0; i < adapter->num_queues; i++, txr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i], sizeof(&adapter->queues[i]), ixgbe_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(adapter->queues[i].irqs), "irqs on this queue"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdh_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdt_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &txr->tso_tx, "TSO"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txr->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); } for (int i = 0; i < adapter->num_queues; i++, rxr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); struct lro_ctrl *lro = &rxr->lro; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), ixgbe_sysctl_rdh_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), ixgbe_sysctl_rdt_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", CTLFLAG_RD, &rxr->rx_copies, "Copied RX Frames"); SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get the own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", CTLFLAG_RD, &stats->illerrc, "Illegal Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", CTLFLAG_RD, &stats->errbc, "Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", CTLFLAG_RD, &stats->mspdc, "MAC Short Packets Discarded"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", CTLFLAG_RD, &stats->mlfc, "MAC Local Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", CTLFLAG_RD, &stats->mrfc, "MAC Remote Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", CTLFLAG_RD, &stats->rlec, "Receive Length Errors"); /* Flow Control stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->lxontxc, "Link XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->lxonrxc, "Link XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->lxofftxc, "Link XOFF Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->lxoffrxc, "Link XOFF Received"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", CTLFLAG_RD, &stats->tor, "Total Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", CTLFLAG_RD, &stats->ruc, "Receive Undersized"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", CTLFLAG_RD, &stats->rjc, "Received Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", CTLFLAG_RD, &stats->mngprc, "Management Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", CTLFLAG_RD, &stats->mngptc, "Management Packets Dropped"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", CTLFLAG_RD, &stats->xec, "Checksum Errors"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", CTLFLAG_RD, &stats->mngptc, "Management Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error, last; struct adapter *adapter = (struct adapter *) arg1; last = adapter->fc; error = sysctl_handle_int(oidp, &adapter->fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Don't bother if it's not changed */ if (adapter->fc == last) return (0); switch (adapter->fc) { case ixgbe_fc_rx_pause: case ixgbe_fc_tx_pause: case ixgbe_fc_full: adapter->hw.fc.requested_mode = adapter->fc; if (adapter->num_queues > 1) ixgbe_disable_rx_drop(adapter); break; case ixgbe_fc_none: adapter->hw.fc.requested_mode = ixgbe_fc_none; if (adapter->num_queues > 1) ixgbe_enable_rx_drop(adapter); break; default: adapter->fc = last; return (EINVAL); } /* Don't autoneg if forcing a value */ adapter->hw.fc.disable_fc_autoneg = TRUE; ixgbe_fc_enable(&adapter->hw); return error; } /* ** Control link advertise speed: ** 1 - advertise only 1G ** 2 - advertise 100Mb ** 3 - advertise normal */ static int ixgbe_set_advertise(SYSCTL_HANDLER_ARGS) { int error = 0; struct adapter *adapter; device_t dev; struct ixgbe_hw *hw; ixgbe_link_speed speed, last; adapter = (struct adapter *) arg1; dev = adapter->dev; hw = &adapter->hw; last = adapter->advertise; error = sysctl_handle_int(oidp, &adapter->advertise, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (adapter->advertise == last) /* no change */ return (0); if (!((hw->phy.media_type == ixgbe_media_type_copper) || (hw->phy.multispeed_fiber))) return (EINVAL); if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) { device_printf(dev, "Set Advertise: 100Mb on X540 only\n"); return (EINVAL); } if (adapter->advertise == 1) speed = IXGBE_LINK_SPEED_1GB_FULL; else if (adapter->advertise == 2) speed = IXGBE_LINK_SPEED_100_FULL; else if (adapter->advertise == 3) speed = IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_10GB_FULL; else { /* bogus value */ adapter->advertise = last; return (EINVAL); } hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); return (error); } /* ** Thermal Shutdown Trigger ** - cause a Thermal Overtemp IRQ ** - this now requires firmware enabling */ static int ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS) { int error, fire = 0; struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; if (hw->mac.type != ixgbe_mac_X540) return (0); error = sysctl_handle_int(oidp, &fire, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (fire) { u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS); reg |= IXGBE_EICR_TS; IXGBE_WRITE_REG(hw, IXGBE_EICS, reg); } return (0); } /* ** Enable the hardware to drop packets when the buffer is ** full. This is useful when multiqueue,so that no single ** queue being full stalls the entire RX engine. We only ** enable this when Multiqueue AND when Flow Control is ** disabled. */ static void ixgbe_enable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; for (int i = 0; i < adapter->num_queues; i++) { u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); srrctl |= IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); } } static void ixgbe_disable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; for (int i = 0; i < adapter->num_queues; i++) { u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); srrctl &= ~IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); } } Index: head/sys/dev/mxge/if_mxge.c =================================================================== --- head/sys/dev/mxge/if_mxge.c (revision 263101) +++ head/sys/dev/mxge/if_mxge.c (revision 263102) @@ -1,5033 +1,5031 @@ /****************************************************************************** Copyright (c) 2006-2013, Myricom Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Myricom Inc, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX for pci_cfg_restore */ #include /* for pmap_mapdev() */ #include #if defined(__i386) || defined(__amd64) #include #endif #include #include /*#define MXGE_FAKE_IFP*/ #include #ifdef IFNET_BUF_RING #include #endif #include "opt_inet.h" #include "opt_inet6.h" /* tunable params */ static int mxge_nvidia_ecrc_enable = 1; static int mxge_force_firmware = 0; static int mxge_intr_coal_delay = 30; static int mxge_deassert_wait = 1; static int mxge_flow_control = 1; static int mxge_verbose = 0; static int mxge_ticks; static int mxge_max_slices = 1; static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; static int mxge_always_promisc = 0; static int mxge_initial_mtu = ETHERMTU_JUMBO; static int mxge_throttle = 0; static char *mxge_fw_unaligned = "mxge_ethp_z8e"; static char *mxge_fw_aligned = "mxge_eth_z8e"; static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; static int mxge_probe(device_t dev); static int mxge_attach(device_t dev); static int mxge_detach(device_t dev); static int mxge_shutdown(device_t dev); static void mxge_intr(void *arg); static device_method_t mxge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, mxge_probe), DEVMETHOD(device_attach, mxge_attach), DEVMETHOD(device_detach, mxge_detach), DEVMETHOD(device_shutdown, mxge_shutdown), DEVMETHOD_END }; static driver_t mxge_driver = { "mxge", mxge_methods, sizeof(mxge_softc_t), }; static devclass_t mxge_devclass; /* Declare ourselves to be a child of the PCI bus.*/ DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); MODULE_DEPEND(mxge, firmware, 1, 1, 1); MODULE_DEPEND(mxge, zlib, 1, 1, 1); static int mxge_load_firmware(mxge_softc_t *sc, int adopt); static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); static int mxge_close(mxge_softc_t *sc, int down); static int mxge_open(mxge_softc_t *sc); static void mxge_tick(void *arg); static int mxge_probe(device_t dev) { int rev; if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { rev = pci_get_revid(dev); switch (rev) { case MXGE_PCI_REV_Z8E: device_set_desc(dev, "Myri10G-PCIE-8A"); break; case MXGE_PCI_REV_Z8ES: device_set_desc(dev, "Myri10G-PCIE-8B"); break; default: device_set_desc(dev, "Myri10G-PCIE-8??"); device_printf(dev, "Unrecognized rev %d NIC\n", rev); break; } return 0; } return ENXIO; } static void mxge_enable_wc(mxge_softc_t *sc) { #if defined(__i386) || defined(__amd64) vm_offset_t len; int err; sc->wc = 1; len = rman_get_size(sc->mem_res); err = pmap_change_attr((vm_offset_t) sc->sram, len, PAT_WRITE_COMBINING); if (err != 0) { device_printf(sc->dev, "pmap_change_attr failed, %d\n", err); sc->wc = 0; } #endif } /* callback to get our DMA address */ static void mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error == 0) { *(bus_addr_t *) arg = segs->ds_addr; } } static int mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, bus_size_t alignment) { int err; device_t dev = sc->dev; bus_size_t boundary, maxsegsize; if (bytes > 4096 && alignment == 4096) { boundary = 0; maxsegsize = bytes; } else { boundary = 4096; maxsegsize = 4096; } /* allocate DMAable memory tags */ err = bus_dma_tag_create(sc->parent_dmat, /* parent */ alignment, /* alignment */ boundary, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ bytes, /* maxsize */ 1, /* num segs */ maxsegsize, /* maxsegsize */ BUS_DMA_COHERENT, /* flags */ NULL, NULL, /* lock */ &dma->dmat); /* tag */ if (err != 0) { device_printf(dev, "couldn't alloc tag (err = %d)\n", err); return err; } /* allocate DMAable memory & map */ err = bus_dmamem_alloc(dma->dmat, &dma->addr, (BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO), &dma->map); if (err != 0) { device_printf(dev, "couldn't alloc mem (err = %d)\n", err); goto abort_with_dmat; } /* load the memory */ err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, mxge_dmamap_callback, (void *)&dma->bus_addr, 0); if (err != 0) { device_printf(dev, "couldn't load map (err = %d)\n", err); goto abort_with_mem; } return 0; abort_with_mem: bus_dmamem_free(dma->dmat, dma->addr, dma->map); abort_with_dmat: (void)bus_dma_tag_destroy(dma->dmat); return err; } static void mxge_dma_free(mxge_dma_t *dma) { bus_dmamap_unload(dma->dmat, dma->map); bus_dmamem_free(dma->dmat, dma->addr, dma->map); (void)bus_dma_tag_destroy(dma->dmat); } /* * The eeprom strings on the lanaiX have the format * SN=x\0 * MAC=x:x:x:x:x:x\0 * PC=text\0 */ static int mxge_parse_strings(mxge_softc_t *sc) { char *ptr; int i, found_mac, found_sn2; char *endptr; ptr = sc->eeprom_strings; found_mac = 0; found_sn2 = 0; while (*ptr != '\0') { if (strncmp(ptr, "MAC=", 4) == 0) { ptr += 4; for (i = 0;;) { sc->mac_addr[i] = strtoul(ptr, &endptr, 16); if (endptr - ptr != 2) goto abort; ptr = endptr; if (++i == 6) break; if (*ptr++ != ':') goto abort; } found_mac = 1; } else if (strncmp(ptr, "PC=", 3) == 0) { ptr += 3; strlcpy(sc->product_code_string, ptr, sizeof(sc->product_code_string)); } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { ptr += 3; strlcpy(sc->serial_number_string, ptr, sizeof(sc->serial_number_string)); } else if (strncmp(ptr, "SN2=", 4) == 0) { /* SN2 takes precedence over SN */ ptr += 4; found_sn2 = 1; strlcpy(sc->serial_number_string, ptr, sizeof(sc->serial_number_string)); } while (*ptr++ != '\0') {} } if (found_mac) return 0; abort: device_printf(sc->dev, "failed to parse eeprom_strings\n"); return ENXIO; } #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ static void mxge_enable_nvidia_ecrc(mxge_softc_t *sc) { uint32_t val; unsigned long base, off; char *va, *cfgptr; device_t pdev, mcp55; uint16_t vendor_id, device_id, word; uintptr_t bus, slot, func, ivend, idev; uint32_t *ptr32; if (!mxge_nvidia_ecrc_enable) return; pdev = device_get_parent(device_get_parent(sc->dev)); if (pdev == NULL) { device_printf(sc->dev, "could not find parent?\n"); return; } vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); device_id = pci_read_config(pdev, PCIR_DEVICE, 2); if (vendor_id != 0x10de) return; base = 0; if (device_id == 0x005d) { /* ck804, base address is magic */ base = 0xe0000000UL; } else if (device_id >= 0x0374 && device_id <= 0x378) { /* mcp55, base address stored in chipset */ mcp55 = pci_find_bsf(0, 0, 0); if (mcp55 && 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { word = pci_read_config(mcp55, 0x90, 2); base = ((unsigned long)word & 0x7ffeU) << 25; } } if (!base) return; /* XXXX Test below is commented because it is believed that doing config read/write beyond 0xff will access the config space for the next larger function. Uncomment this and remove the hacky pmap_mapdev() way of accessing config space when FreeBSD grows support for extended pcie config space access */ #if 0 /* See if we can, by some miracle, access the extended config space */ val = pci_read_config(pdev, 0x178, 4); if (val != 0xffffffff) { val |= 0x40; pci_write_config(pdev, 0x178, val, 4); return; } #endif /* Rather than using normal pci config space writes, we must * map the Nvidia config space ourselves. This is because on * opteron/nvidia class machine the 0xe000000 mapping is * handled by the nvidia chipset, that means the internal PCI * device (the on-chip northbridge), or the amd-8131 bridge * and things behind them are not visible by this method. */ BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_BUS, &bus); BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_SLOT, &slot); BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_FUNCTION, &func); BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_VENDOR, &ivend); BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_DEVICE, &idev); off = base + 0x00100000UL * (unsigned long)bus + 0x00001000UL * (unsigned long)(func + 8 * slot); /* map it into the kernel */ va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); if (va == NULL) { device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); return; } /* get a pointer to the config space mapped into the kernel */ cfgptr = va + (off & PAGE_MASK); /* make sure that we can really access it */ vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); if (! (vendor_id == ivend && device_id == idev)) { device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", vendor_id, device_id); pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); return; } ptr32 = (uint32_t*)(cfgptr + 0x178); val = *ptr32; if (val == 0xffffffff) { device_printf(sc->dev, "extended mapping failed\n"); pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); return; } *ptr32 = val | 0x40; pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); if (mxge_verbose) device_printf(sc->dev, "Enabled ECRC on upstream Nvidia bridge " "at %d:%d:%d\n", (int)bus, (int)slot, (int)func); return; } #else static void mxge_enable_nvidia_ecrc(mxge_softc_t *sc) { device_printf(sc->dev, "Nforce 4 chipset on non-x86/amd64!?!?!\n"); return; } #endif static int mxge_dma_test(mxge_softc_t *sc, int test_type) { mxge_cmd_t cmd; bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; int status; uint32_t len; char *test = " "; /* Run a small DMA test. * The magic multipliers to the length tell the firmware * to do DMA read, write, or read+write tests. The * results are returned in cmd.data0. The upper 16 * bits of the return is the number of transfers completed. * The lower 16 bits is the time in 0.5us ticks that the * transfers took to complete. */ len = sc->tx_boundary; cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); cmd.data2 = len * 0x10000; status = mxge_send_cmd(sc, test_type, &cmd); if (status != 0) { test = "read"; goto abort; } sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); cmd.data2 = len * 0x1; status = mxge_send_cmd(sc, test_type, &cmd); if (status != 0) { test = "write"; goto abort; } sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); cmd.data2 = len * 0x10001; status = mxge_send_cmd(sc, test_type, &cmd); if (status != 0) { test = "read/write"; goto abort; } sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / (cmd.data0 & 0xffff); abort: if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) device_printf(sc->dev, "DMA %s benchmark failed: %d\n", test, status); return status; } /* * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput * when the PCI-E Completion packets are aligned on an 8-byte * boundary. Some PCI-E chip sets always align Completion packets; on * the ones that do not, the alignment can be enforced by enabling * ECRC generation (if supported). * * When PCI-E Completion packets are not aligned, it is actually more * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. * * If the driver can neither enable ECRC nor verify that it has * already been enabled, then it must use a firmware image which works * around unaligned completion packets (ethp_z8e.dat), and it should * also ensure that it never gives the device a Read-DMA which is * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is * enabled, then the driver should use the aligned (eth_z8e.dat) * firmware image, and set tx_boundary to 4KB. */ static int mxge_firmware_probe(mxge_softc_t *sc) { device_t dev = sc->dev; int reg, status; uint16_t pectl; sc->tx_boundary = 4096; /* * Verify the max read request size was set to 4KB * before trying the test with 4KB. */ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { pectl = pci_read_config(dev, reg + 0x8, 2); if ((pectl & (5 << 12)) != (5 << 12)) { device_printf(dev, "Max Read Req. size != 4k (0x%x\n", pectl); sc->tx_boundary = 2048; } } /* * load the optimized firmware (which assumes aligned PCIe * completions) in order to see if it works on this host. */ sc->fw_name = mxge_fw_aligned; status = mxge_load_firmware(sc, 1); if (status != 0) { return status; } /* * Enable ECRC if possible */ mxge_enable_nvidia_ecrc(sc); /* * Run a DMA test which watches for unaligned completions and * aborts on the first one seen. Not required on Z8ES or newer. */ if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) return 0; status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); if (status == 0) return 0; /* keep the aligned firmware */ if (status != E2BIG) device_printf(dev, "DMA test failed: %d\n", status); if (status == ENOSYS) device_printf(dev, "Falling back to ethp! " "Please install up to date fw\n"); return status; } static int mxge_select_firmware(mxge_softc_t *sc) { int aligned = 0; int force_firmware = mxge_force_firmware; if (sc->throttle) force_firmware = sc->throttle; if (force_firmware != 0) { if (force_firmware == 1) aligned = 1; else aligned = 0; if (mxge_verbose) device_printf(sc->dev, "Assuming %s completions (forced)\n", aligned ? "aligned" : "unaligned"); goto abort; } /* if the PCIe link width is 4 or less, we can use the aligned firmware and skip any checks */ if (sc->link_width != 0 && sc->link_width <= 4) { device_printf(sc->dev, "PCIe x%d Link, expect reduced performance\n", sc->link_width); aligned = 1; goto abort; } if (0 == mxge_firmware_probe(sc)) return 0; abort: if (aligned) { sc->fw_name = mxge_fw_aligned; sc->tx_boundary = 4096; } else { sc->fw_name = mxge_fw_unaligned; sc->tx_boundary = 2048; } return (mxge_load_firmware(sc, 0)); } static int mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) { if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { device_printf(sc->dev, "Bad firmware type: 0x%x\n", be32toh(hdr->mcp_type)); return EIO; } /* save firmware version for sysctl */ strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); if (mxge_verbose) device_printf(sc->dev, "firmware id: %s\n", hdr->version); sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, &sc->fw_ver_minor, &sc->fw_ver_tiny); if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { device_printf(sc->dev, "Found firmware version %s\n", sc->fw_version); device_printf(sc->dev, "Driver needs %d.%d\n", MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); return EINVAL; } return 0; } static void * z_alloc(void *nil, u_int items, u_int size) { void *ptr; ptr = malloc(items * size, M_TEMP, M_NOWAIT); return ptr; } static void z_free(void *nil, void *ptr) { free(ptr, M_TEMP); } static int mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) { z_stream zs; char *inflate_buffer; const struct firmware *fw; const mcp_gen_header_t *hdr; unsigned hdr_offset; int status; unsigned int i; char dummy; size_t fw_len; fw = firmware_get(sc->fw_name); if (fw == NULL) { device_printf(sc->dev, "Could not find firmware image %s\n", sc->fw_name); return ENOENT; } /* setup zlib and decompress f/w */ bzero(&zs, sizeof (zs)); zs.zalloc = z_alloc; zs.zfree = z_free; status = inflateInit(&zs); if (status != Z_OK) { status = EIO; goto abort_with_fw; } /* the uncompressed size is stored as the firmware version, which would otherwise go unused */ fw_len = (size_t) fw->version; inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); if (inflate_buffer == NULL) goto abort_with_zs; zs.avail_in = fw->datasize; zs.next_in = __DECONST(char *, fw->data); zs.avail_out = fw_len; zs.next_out = inflate_buffer; status = inflate(&zs, Z_FINISH); if (status != Z_STREAM_END) { device_printf(sc->dev, "zlib %d\n", status); status = EIO; goto abort_with_buffer; } /* check id */ hdr_offset = htobe32(*(const uint32_t *) (inflate_buffer + MCP_HEADER_PTR_OFFSET)); if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { device_printf(sc->dev, "Bad firmware file"); status = EIO; goto abort_with_buffer; } hdr = (const void*)(inflate_buffer + hdr_offset); status = mxge_validate_firmware(sc, hdr); if (status != 0) goto abort_with_buffer; /* Copy the inflated firmware to NIC SRAM. */ for (i = 0; i < fw_len; i += 256) { mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, min(256U, (unsigned)(fw_len - i))); wmb(); dummy = *sc->sram; wmb(); } *limit = fw_len; status = 0; abort_with_buffer: free(inflate_buffer, M_TEMP); abort_with_zs: inflateEnd(&zs); abort_with_fw: firmware_put(fw, FIRMWARE_UNLOAD); return status; } /* * Enable or disable periodic RDMAs from the host to make certain * chipsets resend dropped PCIe messages */ static void mxge_dummy_rdma(mxge_softc_t *sc, int enable) { char buf_bytes[72]; volatile uint32_t *confirm; volatile char *submit; uint32_t *buf, dma_low, dma_high; int i; buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); /* clear confirmation addr */ confirm = (volatile uint32_t *)sc->cmd; *confirm = 0; wmb(); /* send an rdma command to the PCIe engine, and wait for the response in the confirmation address. The firmware should write a -1 there to indicate it is alive and well */ dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); buf[0] = htobe32(dma_high); /* confirm addr MSW */ buf[1] = htobe32(dma_low); /* confirm addr LSW */ buf[2] = htobe32(0xffffffff); /* confirm data */ dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); buf[3] = htobe32(dma_high); /* dummy addr MSW */ buf[4] = htobe32(dma_low); /* dummy addr LSW */ buf[5] = htobe32(enable); /* enable? */ submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); mxge_pio_copy(submit, buf, 64); wmb(); DELAY(1000); wmb(); i = 0; while (*confirm != 0xffffffff && i < 20) { DELAY(1000); i++; } if (*confirm != 0xffffffff) { device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", (enable ? "enable" : "disable"), confirm, *confirm); } return; } static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) { mcp_cmd_t *buf; char buf_bytes[sizeof(*buf) + 8]; volatile mcp_cmd_response_t *response = sc->cmd; volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; uint32_t dma_low, dma_high; int err, sleep_total = 0; /* ensure buf is aligned to 8 bytes */ buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); buf->data0 = htobe32(data->data0); buf->data1 = htobe32(data->data1); buf->data2 = htobe32(data->data2); buf->cmd = htobe32(cmd); dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); buf->response_addr.low = htobe32(dma_low); buf->response_addr.high = htobe32(dma_high); mtx_lock(&sc->cmd_mtx); response->result = 0xffffffff; wmb(); mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); /* wait up to 20ms */ err = EAGAIN; for (sleep_total = 0; sleep_total < 20; sleep_total++) { bus_dmamap_sync(sc->cmd_dma.dmat, sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); wmb(); switch (be32toh(response->result)) { case 0: data->data0 = be32toh(response->data); err = 0; break; case 0xffffffff: DELAY(1000); break; case MXGEFW_CMD_UNKNOWN: err = ENOSYS; break; case MXGEFW_CMD_ERROR_UNALIGNED: err = E2BIG; break; case MXGEFW_CMD_ERROR_BUSY: err = EBUSY; break; case MXGEFW_CMD_ERROR_I2C_ABSENT: err = ENXIO; break; default: device_printf(sc->dev, "mxge: command %d " "failed, result = %d\n", cmd, be32toh(response->result)); err = ENXIO; break; } if (err != EAGAIN) break; } if (err == EAGAIN) device_printf(sc->dev, "mxge: command %d timed out" "result = %d\n", cmd, be32toh(response->result)); mtx_unlock(&sc->cmd_mtx); return err; } static int mxge_adopt_running_firmware(mxge_softc_t *sc) { struct mcp_gen_header *hdr; const size_t bytes = sizeof (struct mcp_gen_header); size_t hdr_offset; int status; /* find running firmware header */ hdr_offset = htobe32(*(volatile uint32_t *) (sc->sram + MCP_HEADER_PTR_OFFSET)); if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { device_printf(sc->dev, "Running firmware has bad header offset (%d)\n", (int)hdr_offset); return EIO; } /* copy header of running firmware from SRAM to host memory to * validate firmware */ hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); if (hdr == NULL) { device_printf(sc->dev, "could not malloc firmware hdr\n"); return ENOMEM; } bus_space_read_region_1(rman_get_bustag(sc->mem_res), rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); status = mxge_validate_firmware(sc, hdr); free(hdr, M_DEVBUF); /* * check to see if adopted firmware has bug where adopting * it will cause broadcasts to be filtered unless the NIC * is kept in ALLMULTI mode */ if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { sc->adopted_rx_filter_bug = 1; device_printf(sc->dev, "Adopting fw %d.%d.%d: " "working around rx filter bug\n", sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); } return status; } static int mxge_load_firmware(mxge_softc_t *sc, int adopt) { volatile uint32_t *confirm; volatile char *submit; char buf_bytes[72]; uint32_t *buf, size, dma_low, dma_high; int status, i; buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); size = sc->sram_size; status = mxge_load_firmware_helper(sc, &size); if (status) { if (!adopt) return status; /* Try to use the currently running firmware, if it is new enough */ status = mxge_adopt_running_firmware(sc); if (status) { device_printf(sc->dev, "failed to adopt running firmware\n"); return status; } device_printf(sc->dev, "Successfully adopted running firmware\n"); if (sc->tx_boundary == 4096) { device_printf(sc->dev, "Using firmware currently running on NIC" ". For optimal\n"); device_printf(sc->dev, "performance consider loading optimized " "firmware\n"); } sc->fw_name = mxge_fw_unaligned; sc->tx_boundary = 2048; return 0; } /* clear confirmation addr */ confirm = (volatile uint32_t *)sc->cmd; *confirm = 0; wmb(); /* send a reload command to the bootstrap MCP, and wait for the response in the confirmation address. The firmware should write a -1 there to indicate it is alive and well */ dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); buf[0] = htobe32(dma_high); /* confirm addr MSW */ buf[1] = htobe32(dma_low); /* confirm addr LSW */ buf[2] = htobe32(0xffffffff); /* confirm data */ /* FIX: All newest firmware should un-protect the bottom of the sram before handoff. However, the very first interfaces do not. Therefore the handoff copy must skip the first 8 bytes */ /* where the code starts*/ buf[3] = htobe32(MXGE_FW_OFFSET + 8); buf[4] = htobe32(size - 8); /* length of code */ buf[5] = htobe32(8); /* where to copy to */ buf[6] = htobe32(0); /* where to jump to */ submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); mxge_pio_copy(submit, buf, 64); wmb(); DELAY(1000); wmb(); i = 0; while (*confirm != 0xffffffff && i < 20) { DELAY(1000*10); i++; bus_dmamap_sync(sc->cmd_dma.dmat, sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); } if (*confirm != 0xffffffff) { device_printf(sc->dev,"handoff failed (%p = 0x%x)", confirm, *confirm); return ENXIO; } return 0; } static int mxge_update_mac_address(mxge_softc_t *sc) { mxge_cmd_t cmd; uint8_t *addr = sc->mac_addr; int status; cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) | addr[3]); cmd.data1 = ((addr[4] << 8) | (addr[5])); status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); return status; } static int mxge_change_pause(mxge_softc_t *sc, int pause) { mxge_cmd_t cmd; int status; if (pause) status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); else status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); if (status) { device_printf(sc->dev, "Failed to set flow control mode\n"); return ENXIO; } sc->pause = pause; return 0; } static void mxge_change_promisc(mxge_softc_t *sc, int promisc) { mxge_cmd_t cmd; int status; if (mxge_always_promisc) promisc = 1; if (promisc) status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); else status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); if (status) { device_printf(sc->dev, "Failed to set promisc mode\n"); } } static void mxge_set_multicast_list(mxge_softc_t *sc) { mxge_cmd_t cmd; struct ifmultiaddr *ifma; struct ifnet *ifp = sc->ifp; int err; /* This firmware is known to not support multicast */ if (!sc->fw_multicast_support) return; /* Disable multicast filtering while we play with the lists*/ err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); if (err != 0) { device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," " error status: %d\n", err); return; } if (sc->adopted_rx_filter_bug) return; if (ifp->if_flags & IFF_ALLMULTI) /* request to disable multicast filtering, so quit here */ return; /* Flush all the filters */ err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); if (err != 0) { device_printf(sc->dev, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" ", error status: %d\n", err); return; } /* Walk the multicast list, and add each address */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &cmd.data0, 4); bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, &cmd.data1, 2); cmd.data0 = htonl(cmd.data0); cmd.data1 = htonl(cmd.data1); err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); if (err != 0) { device_printf(sc->dev, "Failed " "MXGEFW_JOIN_MULTICAST_GROUP, error status:" "%d\t", err); /* abort, leaving multicast filtering off */ if_maddr_runlock(ifp); return; } } if_maddr_runlock(ifp); /* Enable multicast filtering */ err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); if (err != 0) { device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" ", error status: %d\n", err); } } static int mxge_max_mtu(mxge_softc_t *sc) { mxge_cmd_t cmd; int status; if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) return MXGEFW_MAX_MTU - MXGEFW_PAD; /* try to set nbufs to see if it we can use virtually contiguous jumbos */ cmd.data0 = 0; status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); if (status == 0) return MXGEFW_MAX_MTU - MXGEFW_PAD; /* otherwise, we're limited to MJUMPAGESIZE */ return MJUMPAGESIZE - MXGEFW_PAD; } static int mxge_reset(mxge_softc_t *sc, int interrupts_setup) { struct mxge_slice_state *ss; mxge_rx_done_t *rx_done; volatile uint32_t *irq_claim; mxge_cmd_t cmd; int slice, status; /* try to send a reset command to the card to see if it is alive */ memset(&cmd, 0, sizeof (cmd)); status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); if (status != 0) { device_printf(sc->dev, "failed reset\n"); return ENXIO; } mxge_dummy_rdma(sc, 1); /* set the intrq size */ cmd.data0 = sc->rx_ring_size; status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); /* * Even though we already know how many slices are supported * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES * has magic side effects, and must be called after a reset. * It must be called prior to calling any RSS related cmds, * including assigning an interrupt queue for anything but * slice 0. It must also be called *after* * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by * the firmware to compute offsets. */ if (sc->num_slices > 1) { /* ask the maximum number of slices it supports */ status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); if (status != 0) { device_printf(sc->dev, "failed to get number of slices\n"); return status; } /* * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior * to setting up the interrupt queue DMA */ cmd.data0 = sc->num_slices; cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; #ifdef IFNET_BUF_RING cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; #endif status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); if (status != 0) { device_printf(sc->dev, "failed to set number of slices\n"); return status; } } if (interrupts_setup) { /* Now exchange information about interrupts */ for (slice = 0; slice < sc->num_slices; slice++) { rx_done = &sc->ss[slice].rx_done; memset(rx_done->entry, 0, sc->rx_ring_size); cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); cmd.data2 = slice; status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); } } status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); if (status != 0) { device_printf(sc->dev, "failed set interrupt parameters\n"); return status; } *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); /* run a DMA benchmark */ (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; ss->irq_claim = irq_claim + (2 * slice); /* reset mcp/driver shared state back to 0 */ ss->rx_done.idx = 0; ss->rx_done.cnt = 0; ss->tx.req = 0; ss->tx.done = 0; ss->tx.pkt_done = 0; ss->tx.queue_active = 0; ss->tx.activate = 0; ss->tx.deactivate = 0; ss->tx.wake = 0; ss->tx.defrag = 0; ss->tx.stall = 0; ss->rx_big.cnt = 0; ss->rx_small.cnt = 0; ss->lc.lro_bad_csum = 0; ss->lc.lro_queued = 0; ss->lc.lro_flushed = 0; if (ss->fw_stats != NULL) { bzero(ss->fw_stats, sizeof *ss->fw_stats); } } sc->rdma_tags_available = 15; status = mxge_update_mac_address(sc); mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); mxge_change_pause(sc, sc->pause); mxge_set_multicast_list(sc); if (sc->throttle) { cmd.data0 = sc->throttle; if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) { device_printf(sc->dev, "can't enable throttle\n"); } } return status; } static int mxge_change_throttle(SYSCTL_HANDLER_ARGS) { mxge_cmd_t cmd; mxge_softc_t *sc; int err; unsigned int throttle; sc = arg1; throttle = sc->throttle; err = sysctl_handle_int(oidp, &throttle, arg2, req); if (err != 0) { return err; } if (throttle == sc->throttle) return 0; if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) return EINVAL; mtx_lock(&sc->driver_mtx); cmd.data0 = throttle; err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); if (err == 0) sc->throttle = throttle; mtx_unlock(&sc->driver_mtx); return err; } static int mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) { mxge_softc_t *sc; unsigned int intr_coal_delay; int err; sc = arg1; intr_coal_delay = sc->intr_coal_delay; err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); if (err != 0) { return err; } if (intr_coal_delay == sc->intr_coal_delay) return 0; if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) return EINVAL; mtx_lock(&sc->driver_mtx); *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); sc->intr_coal_delay = intr_coal_delay; mtx_unlock(&sc->driver_mtx); return err; } static int mxge_change_flow_control(SYSCTL_HANDLER_ARGS) { mxge_softc_t *sc; unsigned int enabled; int err; sc = arg1; enabled = sc->pause; err = sysctl_handle_int(oidp, &enabled, arg2, req); if (err != 0) { return err; } if (enabled == sc->pause) return 0; mtx_lock(&sc->driver_mtx); err = mxge_change_pause(sc, enabled); mtx_unlock(&sc->driver_mtx); return err; } static int mxge_handle_be32(SYSCTL_HANDLER_ARGS) { int err; if (arg1 == NULL) return EFAULT; arg2 = be32toh(*(int *)arg1); arg1 = NULL; err = sysctl_handle_int(oidp, arg1, arg2, req); return err; } static void mxge_rem_sysctls(mxge_softc_t *sc) { struct mxge_slice_state *ss; int slice; if (sc->slice_sysctl_tree == NULL) return; for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; if (ss == NULL || ss->sysctl_tree == NULL) continue; sysctl_ctx_free(&ss->sysctl_ctx); ss->sysctl_tree = NULL; } sysctl_ctx_free(&sc->slice_sysctl_ctx); sc->slice_sysctl_tree = NULL; } static void mxge_add_sysctls(mxge_softc_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; mcp_irq_data_t *fw; struct mxge_slice_state *ss; int slice; char slice_num[8]; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); fw = sc->ss[0].fw_stats; /* random information */ SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", CTLFLAG_RD, &sc->product_code_string, 0, "product_code"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", CTLFLAG_RD, &sc->link_width, 0, "tx_boundary"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", CTLFLAG_RD, &sc->tx_boundary, 0, "tx_boundary"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", CTLFLAG_RD, &sc->wc, 0, "write combining PIO?"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", CTLFLAG_RD, &sc->read_write_dma, 0, "DMA concurrent Read/Write speed in MB/s"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", CTLFLAG_RD, &sc->watchdog_resets, 0, "Number of times NIC was reset"); /* performance related tunables */ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", "interrupt coalescing delay in usecs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", "transmit throttling"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "flow_control_enabled", CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_flow_control, "I", "interrupt coalescing delay in usecs"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", CTLFLAG_RW, &mxge_deassert_wait, 0, "Wait for IRQ line to go low in ihandler"); /* stats block from firmware is in network byte order. Need to swap it */ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, mxge_handle_be32, "I", "link up"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, mxge_handle_be32, "I", "rdma_tags_available"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", "dropped_bad_crc32"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", "dropped_link_error_or_filtered"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", "dropped_link_overflow"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", "dropped_multicast_filtered"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", "dropped_no_big_buffer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", "dropped_no_small_buffer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, mxge_handle_be32, "I", "dropped_overrun"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", "dropped_unicast_filtered"); /* verbose printing? */ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "verbose", CTLFLAG_RW, &mxge_verbose, 0, "verbose printing"); /* add counters exported for debugging from all slices */ sysctl_ctx_init(&sc->slice_sysctl_ctx); sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; sysctl_ctx_init(&ss->sysctl_ctx); ctx = &ss->sysctl_ctx; children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); sprintf(slice_num, "%d", slice); ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, CTLFLAG_RD, 0, ""); children = SYSCTL_CHILDREN(ss->sysctl_tree); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", CTLFLAG_RD, &ss->rx_small.cnt, 0, "rx_small_cnt"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", CTLFLAG_RD, &ss->rx_big.cnt, 0, "rx_small_cnt"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 0, "number of lro merge queues flushed"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 0, "number of bad csums preventing LRO"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 0, "number of frames appended to lro merge" "queues"); #ifndef IFNET_BUF_RING /* only transmit from slice 0 for now */ if (slice > 0) continue; #endif SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_stall", CTLFLAG_RD, &ss->tx.stall, 0, "tx_stall"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_wake", CTLFLAG_RD, &ss->tx.wake, 0, "tx_wake"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_defrag", CTLFLAG_RD, &ss->tx.defrag, 0, "tx_defrag"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); } } /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy backwards one at a time and handle ring wraps */ static inline void mxge_submit_req_backwards(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) { int idx, starting_slot; starting_slot = tx->req; while (cnt > 1) { cnt--; idx = (starting_slot + cnt) & tx->mask; mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); wmb(); } } /* * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy * at most 32 bytes at a time, so as to avoid involving the software * pio handler in the nic. We re-write the first segment's flags * to mark them valid only after writing the entire chain */ static inline void mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) { int idx, i; uint32_t *src_ints; volatile uint32_t *dst_ints; mcp_kreq_ether_send_t *srcp; volatile mcp_kreq_ether_send_t *dstp, *dst; uint8_t last_flags; idx = tx->req & tx->mask; last_flags = src->flags; src->flags = 0; wmb(); dst = dstp = &tx->lanai[idx]; srcp = src; if ((idx + cnt) < tx->mask) { for (i = 0; i < (cnt - 1); i += 2) { mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); wmb(); /* force write every 32 bytes */ srcp += 2; dstp += 2; } } else { /* submit all but the first request, and ensure that it is submitted below */ mxge_submit_req_backwards(tx, src, cnt); i = 0; } if (i < cnt) { /* submit the first request */ mxge_pio_copy(dstp, srcp, sizeof(*src)); wmb(); /* barrier before setting valid flag */ } /* re-write the last 32-bits with the valid flags */ src->flags = last_flags; src_ints = (uint32_t *)src; src_ints+=3; dst_ints = (volatile uint32_t *)dst; dst_ints+=3; *dst_ints = *src_ints; tx->req += cnt; wmb(); } static int mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, struct mxge_pkt_info *pi) { struct ether_vlan_header *eh; uint16_t etype; int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); #if IFCAP_TSO6 && defined(INET6) int nxt; #endif eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); pi->ip_off = ETHER_HDR_LEN; } switch (etype) { case ETHERTYPE_IP: /* * ensure ip header is in first mbuf, copy it to a * scratch buffer if not */ pi->ip = (struct ip *)(m->m_data + pi->ip_off); pi->ip6 = NULL; if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), ss->scratch); pi->ip = (struct ip *)(ss->scratch + pi->ip_off); } pi->ip_hlen = pi->ip->ip_hl << 2; if (!tso) return 0; if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + sizeof(struct tcphdr))) { m_copydata(m, 0, pi->ip_off + pi->ip_hlen + sizeof(struct tcphdr), ss->scratch); pi->ip = (struct ip *)(ss->scratch + pi->ip_off); } pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); break; #if IFCAP_TSO6 && defined(INET6) case ETHERTYPE_IPV6: pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), ss->scratch); pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); } nxt = 0; pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); pi->ip_hlen -= pi->ip_off; if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) return EINVAL; if (!tso) return 0; if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) return EINVAL; if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + sizeof(struct tcphdr))) { m_copydata(m, 0, pi->ip_off + pi->ip_hlen + sizeof(struct tcphdr), ss->scratch); pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); } pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); break; #endif default: return EINVAL; } return 0; } #if IFCAP_TSO4 static void mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, int busdma_seg_cnt, struct mxge_pkt_info *pi) { mxge_tx_ring_t *tx; mcp_kreq_ether_send_t *req; bus_dma_segment_t *seg; uint32_t low, high_swapped; int len, seglen, cum_len, cum_len_next; int next_is_first, chop, cnt, rdma_count, small; uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; uint8_t flags, flags_next; static int once; mss = m->m_pkthdr.tso_segsz; /* negative cum_len signifies to the * send loop that we are still in the * header portion of the TSO packet. */ cksum_offset = pi->ip_off + pi->ip_hlen; cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); /* TSO implies checksum offload on this hardware */ if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { /* * If packet has full TCP csum, replace it with pseudo hdr * sum that the NIC expects, otherwise the NIC will emit * packets with bad TCP checksums. */ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); if (pi->ip6) { #if (CSUM_TCP_IPV6 != 0) && defined(INET6) m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; sum = in6_cksum_pseudo(pi->ip6, m->m_pkthdr.len - cksum_offset, IPPROTO_TCP, 0); #endif } else { #ifdef INET m->m_pkthdr.csum_flags |= CSUM_TCP; sum = in_pseudo(pi->ip->ip_src.s_addr, pi->ip->ip_dst.s_addr, htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset))); #endif } m_copyback(m, offsetof(struct tcphdr, th_sum) + cksum_offset, sizeof(sum), (caddr_t)&sum); } flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; /* for TSO, pseudo_hdr_offset holds mss. * The firmware figures out where to put * the checksum by parsing the header. */ pseudo_hdr_offset = htobe16(mss); if (pi->ip6) { /* * for IPv6 TSO, the "checksum offset" is re-purposed * to store the TCP header len */ cksum_offset = (pi->tcp->th_off << 2); } tx = &ss->tx; req = tx->req_list; seg = tx->seg_list; cnt = 0; rdma_count = 0; /* "rdma_count" is the number of RDMAs belonging to the * current packet BEFORE the current send request. For * non-TSO packets, this is equal to "count". * For TSO packets, rdma_count needs to be reset * to 0 after a segment cut. * * The rdma_count field of the send request is * the number of RDMAs of the packet starting at * that request. For TSO send requests with one ore more cuts * in the middle, this is the number of RDMAs starting * after the last cut in the request. All previous * segments before the last cut implicitly have 1 RDMA. * * Since the number of RDMAs is not known beforehand, * it must be filled-in retroactively - after each * segmentation cut or at the end of the entire packet. */ while (busdma_seg_cnt) { /* Break the busdma segment up into pieces*/ low = MXGE_LOWPART_TO_U32(seg->ds_addr); high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); len = seg->ds_len; while (len) { flags_next = flags & ~MXGEFW_FLAGS_FIRST; seglen = len; cum_len_next = cum_len + seglen; (req-rdma_count)->rdma_count = rdma_count + 1; if (__predict_true(cum_len >= 0)) { /* payload */ chop = (cum_len_next > mss); cum_len_next = cum_len_next % mss; next_is_first = (cum_len_next == 0); flags |= chop * MXGEFW_FLAGS_TSO_CHOP; flags_next |= next_is_first * MXGEFW_FLAGS_FIRST; rdma_count |= -(chop | next_is_first); rdma_count += chop & !next_is_first; } else if (cum_len_next >= 0) { /* header ends */ rdma_count = -1; cum_len_next = 0; seglen = -cum_len; small = (mss <= MXGEFW_SEND_SMALL_SIZE); flags_next = MXGEFW_FLAGS_TSO_PLD | MXGEFW_FLAGS_FIRST | (small * MXGEFW_FLAGS_SMALL); } req->addr_high = high_swapped; req->addr_low = htobe32(low); req->pseudo_hdr_offset = pseudo_hdr_offset; req->pad = 0; req->rdma_count = 1; req->length = htobe16(seglen); req->cksum_offset = cksum_offset; req->flags = flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); low += seglen; len -= seglen; cum_len = cum_len_next; flags = flags_next; req++; cnt++; rdma_count++; if (cksum_offset != 0 && !pi->ip6) { if (__predict_false(cksum_offset > seglen)) cksum_offset -= seglen; else cksum_offset = 0; } if (__predict_false(cnt > tx->max_desc)) goto drop; } busdma_seg_cnt--; seg++; } (req-rdma_count)->rdma_count = rdma_count; do { req--; req->flags |= MXGEFW_FLAGS_TSO_LAST; } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; mxge_submit_req(tx, tx->req_list, cnt); #ifdef IFNET_BUF_RING if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { /* tell the NIC to start polling this slice */ *tx->send_go = 1; tx->queue_active = 1; tx->activate++; wmb(); } #endif return; drop: bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); m_freem(m); ss->oerrors++; if (!once) { printf("tx->max_desc exceeded via TSO!\n"); printf("mss = %d, %ld, %d!\n", mss, (long)seg - (long)tx->seg_list, tx->max_desc); once = 1; } return; } #endif /* IFCAP_TSO4 */ #ifdef MXGE_NEW_VLAN_API /* * We reproduce the software vlan tag insertion from * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" * vlan tag insertion. We need to advertise this in order to have the * vlan interface respect our csum offload flags. */ static struct mbuf * mxge_vlan_tag_insert(struct mbuf *m) { struct ether_vlan_header *evl; M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); if (__predict_false(m == NULL)) return NULL; if (m->m_len < sizeof(*evl)) { m = m_pullup(m, sizeof(*evl)); if (__predict_false(m == NULL)) return NULL; } /* * Transform the Ethernet header into an Ethernet header * with 802.1Q encapsulation. */ evl = mtod(m, struct ether_vlan_header *); bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); evl->evl_encap_proto = htons(ETHERTYPE_VLAN); evl->evl_tag = htons(m->m_pkthdr.ether_vtag); m->m_flags &= ~M_VLANTAG; return m; } #endif /* MXGE_NEW_VLAN_API */ static void mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) { struct mxge_pkt_info pi = {0,0,0,0}; mxge_softc_t *sc; mcp_kreq_ether_send_t *req; bus_dma_segment_t *seg; struct mbuf *m_tmp; struct ifnet *ifp; mxge_tx_ring_t *tx; int cnt, cum_len, err, i, idx, odd_flag; uint16_t pseudo_hdr_offset; uint8_t flags, cksum_offset; sc = ss->sc; ifp = sc->ifp; tx = &ss->tx; #ifdef MXGE_NEW_VLAN_API if (m->m_flags & M_VLANTAG) { m = mxge_vlan_tag_insert(m); if (__predict_false(m == NULL)) goto drop_without_m; } #endif if (m->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { if (mxge_parse_tx(ss, m, &pi)) goto drop; } /* (try to) map the frame for DMA */ idx = tx->req & tx->mask; err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, m, tx->seg_list, &cnt, BUS_DMA_NOWAIT); if (__predict_false(err == EFBIG)) { /* Too many segments in the chain. Try to defrag */ m_tmp = m_defrag(m, M_NOWAIT); if (m_tmp == NULL) { goto drop; } ss->tx.defrag++; m = m_tmp; err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, m, tx->seg_list, &cnt, BUS_DMA_NOWAIT); } if (__predict_false(err != 0)) { device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" " packet len = %d\n", err, m->m_pkthdr.len); goto drop; } bus_dmamap_sync(tx->dmat, tx->info[idx].map, BUS_DMASYNC_PREWRITE); tx->info[idx].m = m; #if IFCAP_TSO4 /* TSO is different enough, we handle it in another routine */ if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { mxge_encap_tso(ss, m, cnt, &pi); return; } #endif req = tx->req_list; cksum_offset = 0; pseudo_hdr_offset = 0; flags = MXGEFW_FLAGS_NO_TSO; /* checksum offloading? */ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { /* ensure ip header is in first mbuf, copy it to a scratch buffer if not */ cksum_offset = pi.ip_off + pi.ip_hlen; pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; pseudo_hdr_offset = htobe16(pseudo_hdr_offset); req->cksum_offset = cksum_offset; flags |= MXGEFW_FLAGS_CKSUM; odd_flag = MXGEFW_FLAGS_ALIGN_ODD; } else { odd_flag = 0; } if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) flags |= MXGEFW_FLAGS_SMALL; /* convert segments into a request list */ cum_len = 0; seg = tx->seg_list; req->flags = MXGEFW_FLAGS_FIRST; for (i = 0; i < cnt; i++) { req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); req->length = htobe16(seg->ds_len); req->cksum_offset = cksum_offset; if (cksum_offset > seg->ds_len) cksum_offset -= seg->ds_len; else cksum_offset = 0; req->pseudo_hdr_offset = pseudo_hdr_offset; req->pad = 0; /* complete solid 16-byte block */ req->rdma_count = 1; req->flags |= flags | ((cum_len & 1) * odd_flag); cum_len += seg->ds_len; seg++; req++; req->flags = 0; } req--; /* pad runts to 60 bytes */ if (cum_len < 60) { req++; req->addr_low = htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); req->length = htobe16(60 - cum_len); req->cksum_offset = 0; req->pseudo_hdr_offset = pseudo_hdr_offset; req->pad = 0; /* complete solid 16-byte block */ req->rdma_count = 1; req->flags |= flags | ((cum_len & 1) * odd_flag); cnt++; } tx->req_list[0].rdma_count = cnt; #if 0 /* print what the firmware will see */ for (i = 0; i < cnt; i++) { printf("%d: addr: 0x%x 0x%x len:%d pso%d," "cso:%d, flags:0x%x, rdma:%d\n", i, (int)ntohl(tx->req_list[i].addr_high), (int)ntohl(tx->req_list[i].addr_low), (int)ntohs(tx->req_list[i].length), (int)ntohs(tx->req_list[i].pseudo_hdr_offset), tx->req_list[i].cksum_offset, tx->req_list[i].flags, tx->req_list[i].rdma_count); } printf("--------------\n"); #endif tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; mxge_submit_req(tx, tx->req_list, cnt); #ifdef IFNET_BUF_RING if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { /* tell the NIC to start polling this slice */ *tx->send_go = 1; tx->queue_active = 1; tx->activate++; wmb(); } #endif return; drop: m_freem(m); drop_without_m: ss->oerrors++; return; } #ifdef IFNET_BUF_RING static void mxge_qflush(struct ifnet *ifp) { mxge_softc_t *sc = ifp->if_softc; mxge_tx_ring_t *tx; struct mbuf *m; int slice; for (slice = 0; slice < sc->num_slices; slice++) { tx = &sc->ss[slice].tx; mtx_lock(&tx->mtx); while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) m_freem(m); mtx_unlock(&tx->mtx); } if_qflush(ifp); } static inline void mxge_start_locked(struct mxge_slice_state *ss) { mxge_softc_t *sc; struct mbuf *m; struct ifnet *ifp; mxge_tx_ring_t *tx; sc = ss->sc; ifp = sc->ifp; tx = &ss->tx; while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { m = drbr_dequeue(ifp, tx->br); if (m == NULL) { return; } /* let BPF see it */ BPF_MTAP(ifp, m); /* give it to the nic */ mxge_encap(ss, m); } /* ran out of transmit slots */ if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) && (!drbr_empty(ifp, tx->br))) { ss->if_drv_flags |= IFF_DRV_OACTIVE; tx->stall++; } } static int mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) { mxge_softc_t *sc; struct ifnet *ifp; mxge_tx_ring_t *tx; int err; sc = ss->sc; ifp = sc->ifp; tx = &ss->tx; if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { err = drbr_enqueue(ifp, tx->br, m); return (err); } if (!drbr_needs_enqueue(ifp, tx->br) && ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { /* let BPF see it */ BPF_MTAP(ifp, m); /* give it to the nic */ mxge_encap(ss, m); } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { return (err); } if (!drbr_empty(ifp, tx->br)) mxge_start_locked(ss); return (0); } static int mxge_transmit(struct ifnet *ifp, struct mbuf *m) { mxge_softc_t *sc = ifp->if_softc; struct mxge_slice_state *ss; mxge_tx_ring_t *tx; int err = 0; int slice; slice = m->m_pkthdr.flowid; slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ ss = &sc->ss[slice]; tx = &ss->tx; if (mtx_trylock(&tx->mtx)) { err = mxge_transmit_locked(ss, m); mtx_unlock(&tx->mtx); } else { err = drbr_enqueue(ifp, tx->br, m); } return (err); } #else static inline void mxge_start_locked(struct mxge_slice_state *ss) { mxge_softc_t *sc; struct mbuf *m; struct ifnet *ifp; mxge_tx_ring_t *tx; sc = ss->sc; ifp = sc->ifp; tx = &ss->tx; while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) { return; } /* let BPF see it */ BPF_MTAP(ifp, m); /* give it to the nic */ mxge_encap(ss, m); } /* ran out of transmit slots */ if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; tx->stall++; } } #endif static void mxge_start(struct ifnet *ifp) { mxge_softc_t *sc = ifp->if_softc; struct mxge_slice_state *ss; /* only use the first slice for now */ ss = &sc->ss[0]; mtx_lock(&ss->tx.mtx); mxge_start_locked(ss); mtx_unlock(&ss->tx.mtx); } /* * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy * at most 32 bytes at a time, so as to avoid involving the software * pio handler in the nic. We re-write the first segment's low * DMA address to mark it valid only after we write the entire chunk * in a burst */ static inline void mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src) { uint32_t low; low = src->addr_low; src->addr_low = 0xffffffff; mxge_pio_copy(dst, src, 4 * sizeof (*src)); wmb(); mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); wmb(); src->addr_low = low; dst->addr_low = low; wmb(); } static int mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) { bus_dma_segment_t seg; struct mbuf *m; mxge_rx_ring_t *rx = &ss->rx_small; int cnt, err; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { rx->alloc_fail++; err = ENOBUFS; goto done; } m->m_len = MHLEN; err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, &seg, &cnt, BUS_DMA_NOWAIT); if (err != 0) { m_free(m); goto done; } rx->info[idx].m = m; rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); done: if ((idx & 7) == 7) mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); return err; } static int mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) { bus_dma_segment_t seg[3]; struct mbuf *m; mxge_rx_ring_t *rx = &ss->rx_big; int cnt, err, i; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); if (m == NULL) { rx->alloc_fail++; err = ENOBUFS; goto done; } m->m_len = rx->mlen; err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, seg, &cnt, BUS_DMA_NOWAIT); if (err != 0) { m_free(m); goto done; } rx->info[idx].m = m; rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); #if MXGE_VIRT_JUMBOS for (i = 1; i < cnt; i++) { rx->shadow[idx + i].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); rx->shadow[idx + i].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); } #endif done: for (i = 0; i < rx->nbufs; i++) { if ((idx & 7) == 7) { mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); } idx++; } return err; } #ifdef INET6 static uint16_t mxge_csum_generic(uint16_t *raw, int len) { uint32_t csum; csum = 0; while (len > 0) { csum += *raw; raw++; len -= 2; } csum = (csum >> 16) + (csum & 0xffff); csum = (csum >> 16) + (csum & 0xffff); return (uint16_t)csum; } static inline uint16_t mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) { uint32_t partial; int nxt, cksum_offset; struct ip6_hdr *ip6 = p; uint16_t c; nxt = ip6->ip6_nxt; cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, IPPROTO_IPV6, &nxt); if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) return (1); } /* * IPv6 headers do not contain a checksum, and hence * do not checksum to zero, so they don't "fall out" * of the partial checksum calculation like IPv4 * headers do. We need to fix the partial checksum by * subtracting the checksum of the IPv6 header. */ partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - ETHER_HDR_LEN); csum += ~partial; csum += (csum < ~partial); csum = (csum >> 16) + (csum & 0xFFFF); csum = (csum >> 16) + (csum & 0xFFFF); c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, csum); c ^= 0xffff; return (c); } #endif /* INET6 */ /* * Myri10GE hardware checksums are not valid if the sender * padded the frame with non-zero padding. This is because * the firmware just does a simple 16-bit 1s complement * checksum across the entire frame, excluding the first 14 * bytes. It is best to simply to check the checksum and * tell the stack about it only if the checksum is good */ static inline uint16_t mxge_rx_csum(struct mbuf *m, int csum) { struct ether_header *eh; #ifdef INET struct ip *ip; #endif #if defined(INET) || defined(INET6) int cap = m->m_pkthdr.rcvif->if_capenable; #endif uint16_t c, etype; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); switch (etype) { #ifdef INET case ETHERTYPE_IP: if ((cap & IFCAP_RXCSUM) == 0) return (1); ip = (struct ip *)(eh + 1); if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) return (1); c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(ntohs(csum) + ntohs(ip->ip_len) - (ip->ip_hl << 2) + ip->ip_p)); c ^= 0xffff; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: if ((cap & IFCAP_RXCSUM_IPV6) == 0) return (1); c = mxge_rx_csum6((eh + 1), m, csum); break; #endif default: c = 1; } return (c); } static void mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) { struct ether_vlan_header *evl; struct ether_header *eh; uint32_t partial; evl = mtod(m, struct ether_vlan_header *); eh = mtod(m, struct ether_header *); /* * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes * after what the firmware thought was the end of the ethernet * header. */ /* put checksum into host byte order */ *csum = ntohs(*csum); partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); (*csum) += ~partial; (*csum) += ((*csum) < ~partial); (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); /* restore checksum to network byte order; later consumers expect this */ *csum = htons(*csum); /* save the tag */ #ifdef MXGE_NEW_VLAN_API m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); #else { struct m_tag *mtag; mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), M_NOWAIT); if (mtag == NULL) return; VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); m_tag_prepend(m, mtag); } #endif m->m_flags |= M_VLANTAG; /* * Remove the 802.1q header by copying the Ethernet * addresses over it and adjusting the beginning of * the data in the mbuf. The encapsulated Ethernet * type field is already in place. */ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); } static inline void mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum, int lro) { mxge_softc_t *sc; struct ifnet *ifp; struct mbuf *m; struct ether_header *eh; mxge_rx_ring_t *rx; bus_dmamap_t old_map; int idx; sc = ss->sc; ifp = sc->ifp; rx = &ss->rx_big; idx = rx->cnt & rx->mask; rx->cnt += rx->nbufs; /* save a pointer to the received mbuf */ m = rx->info[idx].m; /* try to replace the received mbuf */ if (mxge_get_buf_big(ss, rx->extra_map, idx)) { /* drop the frame -- the old mbuf is re-cycled */ ifp->if_ierrors++; return; } /* unmap the received buffer */ old_map = rx->info[idx].map; bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rx->dmat, old_map); /* swap the bus_dmamap_t's */ rx->info[idx].map = rx->extra_map; rx->extra_map = old_map; /* mcp implicitly skips 1st 2 bytes so that packet is properly * aligned */ m->m_data += MXGEFW_PAD; m->m_pkthdr.rcvif = ifp; m->m_len = m->m_pkthdr.len = len; ss->ipackets++; eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { mxge_vlan_tag_remove(m, &csum); } /* if the checksum is valid, mark it in the mbuf header */ if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && (0 == mxge_rx_csum(m, csum))) { /* Tell the stack that the checksum is good */ m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; #if defined(INET) || defined (INET6) if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) return; #endif } /* flowid only valid if RSS hashing is enabled */ if (sc->num_slices > 1) { m->m_pkthdr.flowid = (ss - sc->ss); m->m_flags |= M_FLOWID; } /* pass the frame up the stack */ (*ifp->if_input)(ifp, m); } static inline void mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum, int lro) { mxge_softc_t *sc; struct ifnet *ifp; struct ether_header *eh; struct mbuf *m; mxge_rx_ring_t *rx; bus_dmamap_t old_map; int idx; sc = ss->sc; ifp = sc->ifp; rx = &ss->rx_small; idx = rx->cnt & rx->mask; rx->cnt++; /* save a pointer to the received mbuf */ m = rx->info[idx].m; /* try to replace the received mbuf */ if (mxge_get_buf_small(ss, rx->extra_map, idx)) { /* drop the frame -- the old mbuf is re-cycled */ ifp->if_ierrors++; return; } /* unmap the received buffer */ old_map = rx->info[idx].map; bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rx->dmat, old_map); /* swap the bus_dmamap_t's */ rx->info[idx].map = rx->extra_map; rx->extra_map = old_map; /* mcp implicitly skips 1st 2 bytes so that packet is properly * aligned */ m->m_data += MXGEFW_PAD; m->m_pkthdr.rcvif = ifp; m->m_len = m->m_pkthdr.len = len; ss->ipackets++; eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { mxge_vlan_tag_remove(m, &csum); } /* if the checksum is valid, mark it in the mbuf header */ if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && (0 == mxge_rx_csum(m, csum))) { /* Tell the stack that the checksum is good */ m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; #if defined(INET) || defined (INET6) if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) return; #endif } /* flowid only valid if RSS hashing is enabled */ if (sc->num_slices > 1) { m->m_pkthdr.flowid = (ss - sc->ss); m->m_flags |= M_FLOWID; } /* pass the frame up the stack */ (*ifp->if_input)(ifp, m); } static inline void mxge_clean_rx_done(struct mxge_slice_state *ss) { mxge_rx_done_t *rx_done = &ss->rx_done; int limit = 0; uint16_t length; uint16_t checksum; int lro; lro = ss->sc->ifp->if_capenable & IFCAP_LRO; while (rx_done->entry[rx_done->idx].length != 0) { length = ntohs(rx_done->entry[rx_done->idx].length); rx_done->entry[rx_done->idx].length = 0; checksum = rx_done->entry[rx_done->idx].checksum; if (length <= (MHLEN - MXGEFW_PAD)) mxge_rx_done_small(ss, length, checksum, lro); else mxge_rx_done_big(ss, length, checksum, lro); rx_done->cnt++; rx_done->idx = rx_done->cnt & rx_done->mask; /* limit potential for livelock */ if (__predict_false(++limit > rx_done->mask / 2)) break; } #if defined(INET) || defined (INET6) while (!SLIST_EMPTY(&ss->lc.lro_active)) { struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active); SLIST_REMOVE_HEAD(&ss->lc.lro_active, next); tcp_lro_flush(&ss->lc, lro); } #endif } static inline void mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) { struct ifnet *ifp; mxge_tx_ring_t *tx; struct mbuf *m; bus_dmamap_t map; int idx; int *flags; tx = &ss->tx; ifp = ss->sc->ifp; while (tx->pkt_done != mcp_idx) { idx = tx->done & tx->mask; tx->done++; m = tx->info[idx].m; /* mbuf and DMA map only attached to the first segment per-mbuf */ if (m != NULL) { ss->obytes += m->m_pkthdr.len; if (m->m_flags & M_MCAST) ss->omcasts++; ss->opackets++; tx->info[idx].m = NULL; map = tx->info[idx].map; bus_dmamap_unload(tx->dmat, map); m_freem(m); } if (tx->info[idx].flag) { tx->info[idx].flag = 0; tx->pkt_done++; } } /* If we have space, clear IFF_OACTIVE to tell the stack that its OK to send packets */ #ifdef IFNET_BUF_RING flags = &ss->if_drv_flags; #else flags = &ifp->if_drv_flags; #endif mtx_lock(&ss->tx.mtx); if ((*flags) & IFF_DRV_OACTIVE && tx->req - tx->done < (tx->mask + 1)/4) { *(flags) &= ~IFF_DRV_OACTIVE; ss->tx.wake++; mxge_start_locked(ss); } #ifdef IFNET_BUF_RING if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { /* let the NIC stop polling this queue, since there * are no more transmits pending */ if (tx->req == tx->done) { *tx->send_stop = 1; tx->queue_active = 0; tx->deactivate++; wmb(); } } #endif mtx_unlock(&ss->tx.mtx); } static struct mxge_media_type mxge_xfp_media_types[] = { {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, {0, (1 << 5), "10GBASE-ER"}, {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, {0, (1 << 3), "10GBASE-SW"}, {0, (1 << 2), "10GBASE-LW"}, {0, (1 << 1), "10GBASE-EW"}, {0, (1 << 0), "Reserved"} }; static struct mxge_media_type mxge_sfp_media_types[] = { {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, {0, (1 << 7), "Reserved"}, {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} }; static void mxge_media_set(mxge_softc_t *sc, int media_type) { ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); sc->current_media = media_type; sc->media.ifm_media = sc->media.ifm_cur->ifm_media; } static void mxge_media_init(mxge_softc_t *sc) { char *ptr; int i; ifmedia_removeall(&sc->media); mxge_media_set(sc, IFM_AUTO); /* * parse the product code to deterimine the interface type * (CX4, XFP, Quad Ribbon Fiber) by looking at the character * after the 3rd dash in the driver's cached copy of the * EEPROM's product code string. */ ptr = sc->product_code_string; if (ptr == NULL) { device_printf(sc->dev, "Missing product code\n"); return; } for (i = 0; i < 3; i++, ptr++) { ptr = strchr(ptr, '-'); if (ptr == NULL) { device_printf(sc->dev, "only %d dashes in PC?!?\n", i); return; } } if (*ptr == 'C' || *(ptr +1) == 'C') { /* -C is CX4 */ sc->connector = MXGE_CX4; mxge_media_set(sc, IFM_10G_CX4); } else if (*ptr == 'Q') { /* -Q is Quad Ribbon Fiber */ sc->connector = MXGE_QRF; device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); /* FreeBSD has no media type for Quad ribbon fiber */ } else if (*ptr == 'R') { /* -R is XFP */ sc->connector = MXGE_XFP; } else if (*ptr == 'S' || *(ptr +1) == 'S') { /* -S or -2S is SFP+ */ sc->connector = MXGE_SFP; } else { device_printf(sc->dev, "Unknown media type: %c\n", *ptr); } } /* * Determine the media type for a NIC. Some XFPs will identify * themselves only when their link is up, so this is initiated via a * link up interrupt. However, this can potentially take up to * several milliseconds, so it is run via the watchdog routine, rather * than in the interrupt handler itself. */ static void mxge_media_probe(mxge_softc_t *sc) { mxge_cmd_t cmd; char *cage_type; struct mxge_media_type *mxge_media_types = NULL; int i, err, ms, mxge_media_type_entries; uint32_t byte; sc->need_media_probe = 0; if (sc->connector == MXGE_XFP) { /* -R is XFP */ mxge_media_types = mxge_xfp_media_types; mxge_media_type_entries = sizeof (mxge_xfp_media_types) / sizeof (mxge_xfp_media_types[0]); byte = MXGE_XFP_COMPLIANCE_BYTE; cage_type = "XFP"; } else if (sc->connector == MXGE_SFP) { /* -S or -2S is SFP+ */ mxge_media_types = mxge_sfp_media_types; mxge_media_type_entries = sizeof (mxge_sfp_media_types) / sizeof (mxge_sfp_media_types[0]); cage_type = "SFP+"; byte = 3; } else { /* nothing to do; media type cannot change */ return; } /* * At this point we know the NIC has an XFP cage, so now we * try to determine what is in the cage by using the * firmware's XFP I2C commands to read the XFP 10GbE compilance * register. We read just one byte, which may take over * a millisecond */ cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ cmd.data1 = byte; err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { device_printf(sc->dev, "failed to read XFP\n"); } if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); } if (err != MXGEFW_CMD_OK) { return; } /* now we wait for the data to be cached */ cmd.data0 = byte; err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { DELAY(1000); cmd.data0 = byte; err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); } if (err != MXGEFW_CMD_OK) { device_printf(sc->dev, "failed to read %s (%d, %dms)\n", cage_type, err, ms); return; } if (cmd.data0 == mxge_media_types[0].bitmask) { if (mxge_verbose) device_printf(sc->dev, "%s:%s\n", cage_type, mxge_media_types[0].name); if (sc->current_media != mxge_media_types[0].flag) { mxge_media_init(sc); mxge_media_set(sc, mxge_media_types[0].flag); } return; } for (i = 1; i < mxge_media_type_entries; i++) { if (cmd.data0 & mxge_media_types[i].bitmask) { if (mxge_verbose) device_printf(sc->dev, "%s:%s\n", cage_type, mxge_media_types[i].name); if (sc->current_media != mxge_media_types[i].flag) { mxge_media_init(sc); mxge_media_set(sc, mxge_media_types[i].flag); } return; } } if (mxge_verbose) device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, cmd.data0); return; } static void mxge_intr(void *arg) { struct mxge_slice_state *ss = arg; mxge_softc_t *sc = ss->sc; mcp_irq_data_t *stats = ss->fw_stats; mxge_tx_ring_t *tx = &ss->tx; mxge_rx_done_t *rx_done = &ss->rx_done; uint32_t send_done_count; uint8_t valid; #ifndef IFNET_BUF_RING /* an interrupt on a non-zero slice is implicitly valid since MSI-X irqs are not shared */ if (ss != sc->ss) { mxge_clean_rx_done(ss); *ss->irq_claim = be32toh(3); return; } #endif /* make sure the DMA has finished */ if (!stats->valid) { return; } valid = stats->valid; if (sc->legacy_irq) { /* lower legacy IRQ */ *sc->irq_deassert = 0; if (!mxge_deassert_wait) /* don't wait for conf. that irq is low */ stats->valid = 0; } else { stats->valid = 0; } /* loop while waiting for legacy irq deassertion */ do { /* check for transmit completes and receives */ send_done_count = be32toh(stats->send_done_count); while ((send_done_count != tx->pkt_done) || (rx_done->entry[rx_done->idx].length != 0)) { if (send_done_count != tx->pkt_done) mxge_tx_done(ss, (int)send_done_count); mxge_clean_rx_done(ss); send_done_count = be32toh(stats->send_done_count); } if (sc->legacy_irq && mxge_deassert_wait) wmb(); } while (*((volatile uint8_t *) &stats->valid)); /* fw link & error stats meaningful only on the first slice */ if (__predict_false((ss == sc->ss) && stats->stats_updated)) { if (sc->link_state != stats->link_up) { sc->link_state = stats->link_up; if (sc->link_state) { if_link_state_change(sc->ifp, LINK_STATE_UP); - if_initbaudrate(sc->ifp, IF_Gbps(10)); if (mxge_verbose) device_printf(sc->dev, "link up\n"); } else { if_link_state_change(sc->ifp, LINK_STATE_DOWN); - sc->ifp->if_baudrate = 0; if (mxge_verbose) device_printf(sc->dev, "link down\n"); } sc->need_media_probe = 1; } if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { sc->rdma_tags_available = be32toh(stats->rdma_tags_available); device_printf(sc->dev, "RDMA timed out! %d tags " "left\n", sc->rdma_tags_available); } if (stats->link_down) { sc->down_cnt += stats->link_down; sc->link_state = 0; if_link_state_change(sc->ifp, LINK_STATE_DOWN); } } /* check to see if we have rx token to pass back */ if (valid & 0x1) *ss->irq_claim = be32toh(3); *(ss->irq_claim + 1) = be32toh(3); } static void mxge_init(void *arg) { mxge_softc_t *sc = arg; struct ifnet *ifp = sc->ifp; mtx_lock(&sc->driver_mtx); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) (void) mxge_open(sc); mtx_unlock(&sc->driver_mtx); } static void mxge_free_slice_mbufs(struct mxge_slice_state *ss) { int i; #if defined(INET) || defined(INET6) tcp_lro_free(&ss->lc); #endif for (i = 0; i <= ss->rx_big.mask; i++) { if (ss->rx_big.info[i].m == NULL) continue; bus_dmamap_unload(ss->rx_big.dmat, ss->rx_big.info[i].map); m_freem(ss->rx_big.info[i].m); ss->rx_big.info[i].m = NULL; } for (i = 0; i <= ss->rx_small.mask; i++) { if (ss->rx_small.info[i].m == NULL) continue; bus_dmamap_unload(ss->rx_small.dmat, ss->rx_small.info[i].map); m_freem(ss->rx_small.info[i].m); ss->rx_small.info[i].m = NULL; } /* transmit ring used only on the first slice */ if (ss->tx.info == NULL) return; for (i = 0; i <= ss->tx.mask; i++) { ss->tx.info[i].flag = 0; if (ss->tx.info[i].m == NULL) continue; bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); m_freem(ss->tx.info[i].m); ss->tx.info[i].m = NULL; } } static void mxge_free_mbufs(mxge_softc_t *sc) { int slice; for (slice = 0; slice < sc->num_slices; slice++) mxge_free_slice_mbufs(&sc->ss[slice]); } static void mxge_free_slice_rings(struct mxge_slice_state *ss) { int i; if (ss->rx_done.entry != NULL) mxge_dma_free(&ss->rx_done.dma); ss->rx_done.entry = NULL; if (ss->tx.req_bytes != NULL) free(ss->tx.req_bytes, M_DEVBUF); ss->tx.req_bytes = NULL; if (ss->tx.seg_list != NULL) free(ss->tx.seg_list, M_DEVBUF); ss->tx.seg_list = NULL; if (ss->rx_small.shadow != NULL) free(ss->rx_small.shadow, M_DEVBUF); ss->rx_small.shadow = NULL; if (ss->rx_big.shadow != NULL) free(ss->rx_big.shadow, M_DEVBUF); ss->rx_big.shadow = NULL; if (ss->tx.info != NULL) { if (ss->tx.dmat != NULL) { for (i = 0; i <= ss->tx.mask; i++) { bus_dmamap_destroy(ss->tx.dmat, ss->tx.info[i].map); } bus_dma_tag_destroy(ss->tx.dmat); } free(ss->tx.info, M_DEVBUF); } ss->tx.info = NULL; if (ss->rx_small.info != NULL) { if (ss->rx_small.dmat != NULL) { for (i = 0; i <= ss->rx_small.mask; i++) { bus_dmamap_destroy(ss->rx_small.dmat, ss->rx_small.info[i].map); } bus_dmamap_destroy(ss->rx_small.dmat, ss->rx_small.extra_map); bus_dma_tag_destroy(ss->rx_small.dmat); } free(ss->rx_small.info, M_DEVBUF); } ss->rx_small.info = NULL; if (ss->rx_big.info != NULL) { if (ss->rx_big.dmat != NULL) { for (i = 0; i <= ss->rx_big.mask; i++) { bus_dmamap_destroy(ss->rx_big.dmat, ss->rx_big.info[i].map); } bus_dmamap_destroy(ss->rx_big.dmat, ss->rx_big.extra_map); bus_dma_tag_destroy(ss->rx_big.dmat); } free(ss->rx_big.info, M_DEVBUF); } ss->rx_big.info = NULL; } static void mxge_free_rings(mxge_softc_t *sc) { int slice; for (slice = 0; slice < sc->num_slices; slice++) mxge_free_slice_rings(&sc->ss[slice]); } static int mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, int tx_ring_entries) { mxge_softc_t *sc = ss->sc; size_t bytes; int err, i; /* allocate per-slice receive resources */ ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; ss->rx_done.mask = (2 * rx_ring_entries) - 1; /* allocate the rx shadow rings */ bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); /* allocate the rx host info rings */ bytes = rx_ring_entries * sizeof (*ss->rx_small.info); ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); bytes = rx_ring_entries * sizeof (*ss->rx_big.info); ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); /* allocate the rx busdma resources */ err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 1, /* alignment */ 4096, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ MHLEN, /* maxsize */ 1, /* num segs */ MHLEN, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lock */ &ss->rx_small.dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating rx_small dmat\n", err); return err; } err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 1, /* alignment */ #if MXGE_VIRT_JUMBOS 4096, /* boundary */ #else 0, /* boundary */ #endif BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ 3*4096, /* maxsize */ #if MXGE_VIRT_JUMBOS 3, /* num segs */ 4096, /* maxsegsize*/ #else 1, /* num segs */ MJUM9BYTES, /* maxsegsize*/ #endif BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lock */ &ss->rx_big.dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating rx_big dmat\n", err); return err; } for (i = 0; i <= ss->rx_small.mask; i++) { err = bus_dmamap_create(ss->rx_small.dmat, 0, &ss->rx_small.info[i].map); if (err != 0) { device_printf(sc->dev, "Err %d rx_small dmamap\n", err); return err; } } err = bus_dmamap_create(ss->rx_small.dmat, 0, &ss->rx_small.extra_map); if (err != 0) { device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); return err; } for (i = 0; i <= ss->rx_big.mask; i++) { err = bus_dmamap_create(ss->rx_big.dmat, 0, &ss->rx_big.info[i].map); if (err != 0) { device_printf(sc->dev, "Err %d rx_big dmamap\n", err); return err; } } err = bus_dmamap_create(ss->rx_big.dmat, 0, &ss->rx_big.extra_map); if (err != 0) { device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); return err; } /* now allocate TX resources */ #ifndef IFNET_BUF_RING /* only use a single TX ring for now */ if (ss != ss->sc->ss) return 0; #endif ss->tx.mask = tx_ring_entries - 1; ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); /* allocate the tx request copy block */ bytes = 8 + sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); /* ensure req_list entries are aligned to 8 bytes */ ss->tx.req_list = (mcp_kreq_ether_send_t *) ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); /* allocate the tx busdma segment list */ bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; ss->tx.seg_list = (bus_dma_segment_t *) malloc(bytes, M_DEVBUF, M_WAITOK); /* allocate the tx host info ring */ bytes = tx_ring_entries * sizeof (*ss->tx.info); ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); /* allocate the tx busdma resources */ err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 1, /* alignment */ sc->tx_boundary, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ 65536 + 256, /* maxsize */ ss->tx.max_desc - 2, /* num segs */ sc->tx_boundary, /* maxsegsz */ BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lock */ &ss->tx.dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating tx dmat\n", err); return err; } /* now use these tags to setup dmamaps for each slot in the ring */ for (i = 0; i <= ss->tx.mask; i++) { err = bus_dmamap_create(ss->tx.dmat, 0, &ss->tx.info[i].map); if (err != 0) { device_printf(sc->dev, "Err %d tx dmamap\n", err); return err; } } return 0; } static int mxge_alloc_rings(mxge_softc_t *sc) { mxge_cmd_t cmd; int tx_ring_size; int tx_ring_entries, rx_ring_entries; int err, slice; /* get ring sizes */ err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); tx_ring_size = cmd.data0; if (err != 0) { device_printf(sc->dev, "Cannot determine tx ring sizes\n"); goto abort; } tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; IFQ_SET_READY(&sc->ifp->if_snd); for (slice = 0; slice < sc->num_slices; slice++) { err = mxge_alloc_slice_rings(&sc->ss[slice], rx_ring_entries, tx_ring_entries); if (err != 0) goto abort; } return 0; abort: mxge_free_rings(sc); return err; } static void mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) { int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; if (bufsize < MCLBYTES) { /* easy, everything fits in a single buffer */ *big_buf_size = MCLBYTES; *cl_size = MCLBYTES; *nbufs = 1; return; } if (bufsize < MJUMPAGESIZE) { /* still easy, everything still fits in a single buffer */ *big_buf_size = MJUMPAGESIZE; *cl_size = MJUMPAGESIZE; *nbufs = 1; return; } #if MXGE_VIRT_JUMBOS /* now we need to use virtually contiguous buffers */ *cl_size = MJUM9BYTES; *big_buf_size = 4096; *nbufs = mtu / 4096 + 1; /* needs to be a power of two, so round up */ if (*nbufs == 3) *nbufs = 4; #else *cl_size = MJUM9BYTES; *big_buf_size = MJUM9BYTES; *nbufs = 1; #endif } static int mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) { mxge_softc_t *sc; mxge_cmd_t cmd; bus_dmamap_t map; int err, i, slice; sc = ss->sc; slice = ss - sc->ss; #if defined(INET) || defined(INET6) (void)tcp_lro_init(&ss->lc); #endif ss->lc.ifp = sc->ifp; /* get the lanai pointers to the send and receive rings */ err = 0; #ifndef IFNET_BUF_RING /* We currently only send from the first slice */ if (slice == 0) { #endif cmd.data0 = slice; err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); ss->tx.send_go = (volatile uint32_t *) (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); ss->tx.send_stop = (volatile uint32_t *) (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); #ifndef IFNET_BUF_RING } #endif cmd.data0 = slice; err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); ss->rx_small.lanai = (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); cmd.data0 = slice; err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); ss->rx_big.lanai = (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); if (err != 0) { device_printf(sc->dev, "failed to get ring sizes or locations\n"); return EIO; } /* stock receive rings */ for (i = 0; i <= ss->rx_small.mask; i++) { map = ss->rx_small.info[i].map; err = mxge_get_buf_small(ss, map, i); if (err) { device_printf(sc->dev, "alloced %d/%d smalls\n", i, ss->rx_small.mask + 1); return ENOMEM; } } for (i = 0; i <= ss->rx_big.mask; i++) { ss->rx_big.shadow[i].addr_low = 0xffffffff; ss->rx_big.shadow[i].addr_high = 0xffffffff; } ss->rx_big.nbufs = nbufs; ss->rx_big.cl_size = cl_size; ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { map = ss->rx_big.info[i].map; err = mxge_get_buf_big(ss, map, i); if (err) { device_printf(sc->dev, "alloced %d/%d bigs\n", i, ss->rx_big.mask + 1); return ENOMEM; } } return 0; } static int mxge_open(mxge_softc_t *sc) { mxge_cmd_t cmd; int err, big_bytes, nbufs, slice, cl_size, i; bus_addr_t bus; volatile uint8_t *itable; struct mxge_slice_state *ss; /* Copy the MAC address in case it was overridden */ bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); err = mxge_reset(sc, 1); if (err != 0) { device_printf(sc->dev, "failed to reset\n"); return EIO; } if (sc->num_slices > 1) { /* setup the indirection table */ cmd.data0 = sc->num_slices; err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); if (err != 0) { device_printf(sc->dev, "failed to setup rss tables\n"); return err; } /* just enable an identity mapping */ itable = sc->sram + cmd.data0; for (i = 0; i < sc->num_slices; i++) itable[i] = (uint8_t)i; cmd.data0 = 1; cmd.data1 = mxge_rss_hash_type; err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); if (err != 0) { device_printf(sc->dev, "failed to enable slices\n"); return err; } } mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); cmd.data0 = nbufs; err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); /* error is only meaningful if we're trying to set MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ if (err && nbufs > 1) { device_printf(sc->dev, "Failed to set alway-use-n to %d\n", nbufs); return EIO; } /* Give the firmware the mtu and the big and small buffer sizes. The firmware wants the big buf size to be a power of two. Luckily, FreeBSD's clusters are powers of two */ cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); cmd.data0 = MHLEN - MXGEFW_PAD; err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); cmd.data0 = big_bytes; err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); if (err != 0) { device_printf(sc->dev, "failed to setup params\n"); goto abort; } /* Now give him the pointer to the stats block */ for (slice = 0; #ifdef IFNET_BUF_RING slice < sc->num_slices; #else slice < 1; #endif slice++) { ss = &sc->ss[slice]; cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); cmd.data2 = sizeof(struct mcp_irq_data); cmd.data2 |= (slice << 16); err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); } if (err != 0) { bus = sc->ss->fw_stats_dma.bus_addr; bus += offsetof(struct mcp_irq_data, send_done_count); cmd.data0 = MXGE_LOWPART_TO_U32(bus); cmd.data1 = MXGE_HIGHPART_TO_U32(bus); err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd); /* Firmware cannot support multicast without STATS_DMA_V2 */ sc->fw_multicast_support = 0; } else { sc->fw_multicast_support = 1; } if (err != 0) { device_printf(sc->dev, "failed to setup params\n"); goto abort; } for (slice = 0; slice < sc->num_slices; slice++) { err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); if (err != 0) { device_printf(sc->dev, "couldn't open slice %d\n", slice); goto abort; } } /* Finally, start the firmware running */ err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); if (err) { device_printf(sc->dev, "Couldn't bring up link\n"); goto abort; } #ifdef IFNET_BUF_RING for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; ss->if_drv_flags |= IFF_DRV_RUNNING; ss->if_drv_flags &= ~IFF_DRV_OACTIVE; } #endif sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return 0; abort: mxge_free_mbufs(sc); return err; } static int mxge_close(mxge_softc_t *sc, int down) { mxge_cmd_t cmd; int err, old_down_cnt; #ifdef IFNET_BUF_RING struct mxge_slice_state *ss; int slice; #endif #ifdef IFNET_BUF_RING for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; ss->if_drv_flags &= ~IFF_DRV_RUNNING; } #endif sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if (!down) { old_down_cnt = sc->down_cnt; wmb(); err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); if (err) { device_printf(sc->dev, "Couldn't bring down link\n"); } if (old_down_cnt == sc->down_cnt) { /* wait for down irq */ DELAY(10 * sc->intr_coal_delay); } wmb(); if (old_down_cnt == sc->down_cnt) { device_printf(sc->dev, "never got down irq\n"); } } mxge_free_mbufs(sc); return 0; } static void mxge_setup_cfg_space(mxge_softc_t *sc) { device_t dev = sc->dev; int reg; uint16_t lnk, pectl; /* find the PCIe link width and set max read request to 4KB*/ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { lnk = pci_read_config(dev, reg + 0x12, 2); sc->link_width = (lnk >> 4) & 0x3f; if (sc->pectl == 0) { pectl = pci_read_config(dev, reg + 0x8, 2); pectl = (pectl & ~0x7000) | (5 << 12); pci_write_config(dev, reg + 0x8, pectl, 2); sc->pectl = pectl; } else { /* restore saved pectl after watchdog reset */ pci_write_config(dev, reg + 0x8, sc->pectl, 2); } } /* Enable DMA and Memory space access */ pci_enable_busmaster(dev); } static uint32_t mxge_read_reboot(mxge_softc_t *sc) { device_t dev = sc->dev; uint32_t vs; /* find the vendor specific offset */ if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { device_printf(sc->dev, "could not find vendor specific offset\n"); return (uint32_t)-1; } /* enable read32 mode */ pci_write_config(dev, vs + 0x10, 0x3, 1); /* tell NIC which register to read */ pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); return (pci_read_config(dev, vs + 0x14, 4)); } static void mxge_watchdog_reset(mxge_softc_t *sc) { struct pci_devinfo *dinfo; struct mxge_slice_state *ss; int err, running, s, num_tx_slices = 1; uint32_t reboot; uint16_t cmd; err = ENXIO; device_printf(sc->dev, "Watchdog reset!\n"); /* * check to see if the NIC rebooted. If it did, then all of * PCI config space has been reset, and things like the * busmaster bit will be zero. If this is the case, then we * must restore PCI config space before the NIC can be used * again */ cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); if (cmd == 0xffff) { /* * maybe the watchdog caught the NIC rebooting; wait * up to 100ms for it to finish. If it does not come * back, then give up */ DELAY(1000*100); cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); if (cmd == 0xffff) { device_printf(sc->dev, "NIC disappeared!\n"); } } if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { /* print the reboot status */ reboot = mxge_read_reboot(sc); device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", reboot); running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; if (running) { /* * quiesce NIC so that TX routines will not try to * xmit after restoration of BAR */ /* Mark the link as down */ if (sc->link_state) { sc->link_state = 0; if_link_state_change(sc->ifp, LINK_STATE_DOWN); } #ifdef IFNET_BUF_RING num_tx_slices = sc->num_slices; #endif /* grab all TX locks to ensure no tx */ for (s = 0; s < num_tx_slices; s++) { ss = &sc->ss[s]; mtx_lock(&ss->tx.mtx); } mxge_close(sc, 1); } /* restore PCI configuration space */ dinfo = device_get_ivars(sc->dev); pci_cfg_restore(sc->dev, dinfo); /* and redo any changes we made to our config space */ mxge_setup_cfg_space(sc); /* reload f/w */ err = mxge_load_firmware(sc, 0); if (err) { device_printf(sc->dev, "Unable to re-load f/w\n"); } if (running) { if (!err) err = mxge_open(sc); /* release all TX locks */ for (s = 0; s < num_tx_slices; s++) { ss = &sc->ss[s]; #ifdef IFNET_BUF_RING mxge_start_locked(ss); #endif mtx_unlock(&ss->tx.mtx); } } sc->watchdog_resets++; } else { device_printf(sc->dev, "NIC did not reboot, not resetting\n"); err = 0; } if (err) { device_printf(sc->dev, "watchdog reset failed\n"); } else { if (sc->dying == 2) sc->dying = 0; callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); } } static void mxge_watchdog_task(void *arg, int pending) { mxge_softc_t *sc = arg; mtx_lock(&sc->driver_mtx); mxge_watchdog_reset(sc); mtx_unlock(&sc->driver_mtx); } static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) { tx = &sc->ss[slice].tx; device_printf(sc->dev, "slice %d struck? ring state:\n", slice); device_printf(sc->dev, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", tx->req, tx->done, tx->queue_active); device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", tx->activate, tx->deactivate); device_printf(sc->dev, "pkt_done=%d fw=%d\n", tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); } static int mxge_watchdog(mxge_softc_t *sc) { mxge_tx_ring_t *tx; uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); int i, err = 0; /* see if we have outstanding transmits, which have been pending for more than mxge_ticks */ for (i = 0; #ifdef IFNET_BUF_RING (i < sc->num_slices) && (err == 0); #else (i < 1) && (err == 0); #endif i++) { tx = &sc->ss[i].tx; if (tx->req != tx->done && tx->watchdog_req != tx->watchdog_done && tx->done == tx->watchdog_done) { /* check for pause blocking before resetting */ if (tx->watchdog_rx_pause == rx_pause) { mxge_warn_stuck(sc, tx, i); taskqueue_enqueue(sc->tq, &sc->watchdog_task); return (ENXIO); } else device_printf(sc->dev, "Flow control blocking " "xmits, check link partner\n"); } tx->watchdog_req = tx->req; tx->watchdog_done = tx->done; tx->watchdog_rx_pause = rx_pause; } if (sc->need_media_probe) mxge_media_probe(sc); return (err); } static u_long mxge_update_stats(mxge_softc_t *sc) { struct mxge_slice_state *ss; u_long pkts = 0; u_long ipackets = 0; u_long opackets = 0; #ifdef IFNET_BUF_RING u_long obytes = 0; u_long omcasts = 0; u_long odrops = 0; #endif u_long oerrors = 0; int slice; for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; ipackets += ss->ipackets; opackets += ss->opackets; #ifdef IFNET_BUF_RING obytes += ss->obytes; omcasts += ss->omcasts; odrops += ss->tx.br->br_drops; #endif oerrors += ss->oerrors; } pkts = (ipackets - sc->ifp->if_ipackets); pkts += (opackets - sc->ifp->if_opackets); sc->ifp->if_ipackets = ipackets; sc->ifp->if_opackets = opackets; #ifdef IFNET_BUF_RING sc->ifp->if_obytes = obytes; sc->ifp->if_omcasts = omcasts; sc->ifp->if_snd.ifq_drops = odrops; #endif sc->ifp->if_oerrors = oerrors; return pkts; } static void mxge_tick(void *arg) { mxge_softc_t *sc = arg; u_long pkts = 0; int err = 0; int running, ticks; uint16_t cmd; ticks = mxge_ticks; running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; if (running) { /* aggregate stats from different slices */ pkts = mxge_update_stats(sc); if (!sc->watchdog_countdown) { err = mxge_watchdog(sc); sc->watchdog_countdown = 4; } sc->watchdog_countdown--; } if (pkts == 0) { /* ensure NIC did not suffer h/w fault while idle */ cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { sc->dying = 2; taskqueue_enqueue(sc->tq, &sc->watchdog_task); err = ENXIO; } /* look less often if NIC is idle */ ticks *= 4; } if (err == 0) callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); } static int mxge_media_change(struct ifnet *ifp) { return EINVAL; } static int mxge_change_mtu(mxge_softc_t *sc, int mtu) { struct ifnet *ifp = sc->ifp; int real_mtu, old_mtu; int err = 0; real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; if ((real_mtu > sc->max_mtu) || real_mtu < 60) return EINVAL; mtx_lock(&sc->driver_mtx); old_mtu = ifp->if_mtu; ifp->if_mtu = mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mxge_close(sc, 0); err = mxge_open(sc); if (err != 0) { ifp->if_mtu = old_mtu; mxge_close(sc, 0); (void) mxge_open(sc); } } mtx_unlock(&sc->driver_mtx); return err; } static void mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { mxge_softc_t *sc = ifp->if_softc; if (sc == NULL) return; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER | IFM_FDX; ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; ifmr->ifm_active |= sc->current_media; } static int mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { mxge_softc_t *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int err, mask; err = 0; switch (command) { case SIOCSIFADDR: case SIOCGIFADDR: err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: err = mxge_change_mtu(sc, ifr->ifr_mtu); break; case SIOCSIFFLAGS: mtx_lock(&sc->driver_mtx); if (sc->dying) { mtx_unlock(&sc->driver_mtx); return EINVAL; } if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { err = mxge_open(sc); } else { /* take care of promis can allmulti flag chages */ mxge_change_promisc(sc, ifp->if_flags & IFF_PROMISC); mxge_set_multicast_list(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mxge_close(sc, 0); } } mtx_unlock(&sc->driver_mtx); break; case SIOCADDMULTI: case SIOCDELMULTI: mtx_lock(&sc->driver_mtx); mxge_set_multicast_list(sc); mtx_unlock(&sc->driver_mtx); break; case SIOCSIFCAP: mtx_lock(&sc->driver_mtx); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); } else { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); } } else if (mask & IFCAP_RXCSUM) { if (IFCAP_RXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_RXCSUM; } else { ifp->if_capenable |= IFCAP_RXCSUM; } } if (mask & IFCAP_TSO4) { if (IFCAP_TSO4 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_TSO4; } else if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; } else { printf("mxge requires tx checksum offload" " be enabled to use TSO\n"); err = EINVAL; } } #if IFCAP_TSO6 if (mask & IFCAP_TXCSUM_IPV6) { if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP); } else { ifp->if_capenable |= IFCAP_TXCSUM_IPV6; ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); } } else if (mask & IFCAP_RXCSUM_IPV6) { if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; } else { ifp->if_capenable |= IFCAP_RXCSUM_IPV6; } } if (mask & IFCAP_TSO6) { if (IFCAP_TSO6 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_TSO6; } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO6; ifp->if_hwassist |= CSUM_TSO; } else { printf("mxge requires tx checksum offload" " be enabled to use TSO\n"); err = EINVAL; } } #endif /*IFCAP_TSO6 */ if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; mtx_unlock(&sc->driver_mtx); VLAN_CAPABILITIES(ifp); break; case SIOCGIFMEDIA: mtx_lock(&sc->driver_mtx); mxge_media_probe(sc); mtx_unlock(&sc->driver_mtx); err = ifmedia_ioctl(ifp, (struct ifreq *)data, &sc->media, command); break; default: err = ENOTTY; } return err; } static void mxge_fetch_tunables(mxge_softc_t *sc) { TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", &mxge_flow_control); TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); TUNABLE_INT_FETCH("hw.mxge.force_firmware", &mxge_force_firmware); TUNABLE_INT_FETCH("hw.mxge.deassert_wait", &mxge_deassert_wait); TUNABLE_INT_FETCH("hw.mxge.verbose", &mxge_verbose); TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); if (bootverbose) mxge_verbose = 1; if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) mxge_intr_coal_delay = 30; if (mxge_ticks == 0) mxge_ticks = hz / 2; sc->pause = mxge_flow_control; if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; } if (mxge_initial_mtu > ETHERMTU_JUMBO || mxge_initial_mtu < ETHER_MIN_LEN) mxge_initial_mtu = ETHERMTU_JUMBO; if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) mxge_throttle = MXGE_MAX_THROTTLE; if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) mxge_throttle = MXGE_MIN_THROTTLE; sc->throttle = mxge_throttle; } static void mxge_free_slices(mxge_softc_t *sc) { struct mxge_slice_state *ss; int i; if (sc->ss == NULL) return; for (i = 0; i < sc->num_slices; i++) { ss = &sc->ss[i]; if (ss->fw_stats != NULL) { mxge_dma_free(&ss->fw_stats_dma); ss->fw_stats = NULL; #ifdef IFNET_BUF_RING if (ss->tx.br != NULL) { drbr_free(ss->tx.br, M_DEVBUF); ss->tx.br = NULL; } #endif mtx_destroy(&ss->tx.mtx); } if (ss->rx_done.entry != NULL) { mxge_dma_free(&ss->rx_done.dma); ss->rx_done.entry = NULL; } } free(sc->ss, M_DEVBUF); sc->ss = NULL; } static int mxge_alloc_slices(mxge_softc_t *sc) { mxge_cmd_t cmd; struct mxge_slice_state *ss; size_t bytes; int err, i, max_intr_slots; err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); if (err != 0) { device_printf(sc->dev, "Cannot determine rx ring size\n"); return err; } sc->rx_ring_size = cmd.data0; max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); bytes = sizeof (*sc->ss) * sc->num_slices; sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->ss == NULL) return (ENOMEM); for (i = 0; i < sc->num_slices; i++) { ss = &sc->ss[i]; ss->sc = sc; /* allocate per-slice rx interrupt queues */ bytes = max_intr_slots * sizeof (*ss->rx_done.entry); err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); if (err != 0) goto abort; ss->rx_done.entry = ss->rx_done.dma.addr; bzero(ss->rx_done.entry, bytes); /* * allocate the per-slice firmware stats; stats * (including tx) are used used only on the first * slice for now */ #ifndef IFNET_BUF_RING if (i > 0) continue; #endif bytes = sizeof (*ss->fw_stats); err = mxge_dma_alloc(sc, &ss->fw_stats_dma, sizeof (*ss->fw_stats), 64); if (err != 0) goto abort; ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), "%s:tx(%d)", device_get_nameunit(sc->dev), i); mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); #ifdef IFNET_BUF_RING ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, &ss->tx.mtx); #endif } return (0); abort: mxge_free_slices(sc); return (ENOMEM); } static void mxge_slice_probe(mxge_softc_t *sc) { mxge_cmd_t cmd; char *old_fw; int msix_cnt, status, max_intr_slots; sc->num_slices = 1; /* * don't enable multiple slices if they are not enabled, * or if this is not an SMP system */ if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) return; /* see how many MSI-X interrupts are available */ msix_cnt = pci_msix_count(sc->dev); if (msix_cnt < 2) return; /* now load the slice aware firmware see what it supports */ old_fw = sc->fw_name; if (old_fw == mxge_fw_aligned) sc->fw_name = mxge_fw_rss_aligned; else sc->fw_name = mxge_fw_rss_unaligned; status = mxge_load_firmware(sc, 0); if (status != 0) { device_printf(sc->dev, "Falling back to a single slice\n"); return; } /* try to send a reset command to the card to see if it is alive */ memset(&cmd, 0, sizeof (cmd)); status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); if (status != 0) { device_printf(sc->dev, "failed reset\n"); goto abort_with_fw; } /* get rx ring size */ status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); if (status != 0) { device_printf(sc->dev, "Cannot determine rx ring size\n"); goto abort_with_fw; } max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); /* tell it the size of the interrupt queues */ cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); if (status != 0) { device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); goto abort_with_fw; } /* ask the maximum number of slices it supports */ status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); if (status != 0) { device_printf(sc->dev, "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); goto abort_with_fw; } sc->num_slices = cmd.data0; if (sc->num_slices > msix_cnt) sc->num_slices = msix_cnt; if (mxge_max_slices == -1) { /* cap to number of CPUs in system */ if (sc->num_slices > mp_ncpus) sc->num_slices = mp_ncpus; } else { if (sc->num_slices > mxge_max_slices) sc->num_slices = mxge_max_slices; } /* make sure it is a power of two */ while (sc->num_slices & (sc->num_slices - 1)) sc->num_slices--; if (mxge_verbose) device_printf(sc->dev, "using %d slices\n", sc->num_slices); return; abort_with_fw: sc->fw_name = old_fw; (void) mxge_load_firmware(sc, 0); } static int mxge_add_msix_irqs(mxge_softc_t *sc) { size_t bytes; int count, err, i, rid; rid = PCIR_BAR(2); sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->msix_table_res == NULL) { device_printf(sc->dev, "couldn't alloc MSIX table res\n"); return ENXIO; } count = sc->num_slices; err = pci_alloc_msix(sc->dev, &count); if (err != 0) { device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" "err = %d \n", sc->num_slices, err); goto abort_with_msix_table; } if (count < sc->num_slices) { device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", count, sc->num_slices); device_printf(sc->dev, "Try setting hw.mxge.max_slices to %d\n", count); err = ENOSPC; goto abort_with_msix; } bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); if (sc->msix_irq_res == NULL) { err = ENOMEM; goto abort_with_msix; } for (i = 0; i < sc->num_slices; i++) { rid = i + 1; sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->msix_irq_res[i] == NULL) { device_printf(sc->dev, "couldn't allocate IRQ res" " for message %d\n", i); err = ENXIO; goto abort_with_res; } } bytes = sizeof (*sc->msix_ih) * sc->num_slices; sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); for (i = 0; i < sc->num_slices; i++) { err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], INTR_TYPE_NET | INTR_MPSAFE, #if __FreeBSD_version > 700030 NULL, #endif mxge_intr, &sc->ss[i], &sc->msix_ih[i]); if (err != 0) { device_printf(sc->dev, "couldn't setup intr for " "message %d\n", i); goto abort_with_intr; } bus_describe_intr(sc->dev, sc->msix_irq_res[i], sc->msix_ih[i], "s%d", i); } if (mxge_verbose) { device_printf(sc->dev, "using %d msix IRQs:", sc->num_slices); for (i = 0; i < sc->num_slices; i++) printf(" %ld", rman_get_start(sc->msix_irq_res[i])); printf("\n"); } return (0); abort_with_intr: for (i = 0; i < sc->num_slices; i++) { if (sc->msix_ih[i] != NULL) { bus_teardown_intr(sc->dev, sc->msix_irq_res[i], sc->msix_ih[i]); sc->msix_ih[i] = NULL; } } free(sc->msix_ih, M_DEVBUF); abort_with_res: for (i = 0; i < sc->num_slices; i++) { rid = i + 1; if (sc->msix_irq_res[i] != NULL) bus_release_resource(sc->dev, SYS_RES_IRQ, rid, sc->msix_irq_res[i]); sc->msix_irq_res[i] = NULL; } free(sc->msix_irq_res, M_DEVBUF); abort_with_msix: pci_release_msi(sc->dev); abort_with_msix_table: bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), sc->msix_table_res); return err; } static int mxge_add_single_irq(mxge_softc_t *sc) { int count, err, rid; count = pci_msi_count(sc->dev); if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { rid = 1; } else { rid = 0; sc->legacy_irq = 1; } sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 1, RF_SHAREABLE | RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(sc->dev, "could not alloc interrupt\n"); return ENXIO; } if (mxge_verbose) device_printf(sc->dev, "using %s irq %ld\n", sc->legacy_irq ? "INTx" : "MSI", rman_get_start(sc->irq_res)); err = bus_setup_intr(sc->dev, sc->irq_res, INTR_TYPE_NET | INTR_MPSAFE, #if __FreeBSD_version > 700030 NULL, #endif mxge_intr, &sc->ss[0], &sc->ih); if (err != 0) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->legacy_irq ? 0 : 1, sc->irq_res); if (!sc->legacy_irq) pci_release_msi(sc->dev); } return err; } static void mxge_rem_msix_irqs(mxge_softc_t *sc) { int i, rid; for (i = 0; i < sc->num_slices; i++) { if (sc->msix_ih[i] != NULL) { bus_teardown_intr(sc->dev, sc->msix_irq_res[i], sc->msix_ih[i]); sc->msix_ih[i] = NULL; } } free(sc->msix_ih, M_DEVBUF); for (i = 0; i < sc->num_slices; i++) { rid = i + 1; if (sc->msix_irq_res[i] != NULL) bus_release_resource(sc->dev, SYS_RES_IRQ, rid, sc->msix_irq_res[i]); sc->msix_irq_res[i] = NULL; } free(sc->msix_irq_res, M_DEVBUF); bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), sc->msix_table_res); pci_release_msi(sc->dev); return; } static void mxge_rem_single_irq(mxge_softc_t *sc) { bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->legacy_irq ? 0 : 1, sc->irq_res); if (!sc->legacy_irq) pci_release_msi(sc->dev); } static void mxge_rem_irq(mxge_softc_t *sc) { if (sc->num_slices > 1) mxge_rem_msix_irqs(sc); else mxge_rem_single_irq(sc); } static int mxge_add_irq(mxge_softc_t *sc) { int err; if (sc->num_slices > 1) err = mxge_add_msix_irqs(sc); else err = mxge_add_single_irq(sc); if (0 && err == 0 && sc->num_slices > 1) { mxge_rem_msix_irqs(sc); err = mxge_add_msix_irqs(sc); } return err; } static int mxge_attach(device_t dev) { mxge_cmd_t cmd; mxge_softc_t *sc = device_get_softc(dev); struct ifnet *ifp; int err, rid; sc->dev = dev; mxge_fetch_tunables(sc); TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->tq); if (sc->tq == NULL) { err = ENOMEM; goto abort_with_nothing; } err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, /* alignment */ 0, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ 65536 + 256, /* maxsize */ MXGE_MAX_SEND_DESC, /* num segs */ 65536, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lock */ &sc->parent_dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating parent dmat\n", err); goto abort_with_tq; } ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); err = ENOSPC; goto abort_with_parent_dmat; } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", device_get_nameunit(dev)); mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), "%s:drv", device_get_nameunit(dev)); mtx_init(&sc->driver_mtx, sc->driver_mtx_name, MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); mxge_setup_cfg_space(sc); /* Map the board into the kernel */ rid = PCIR_BARS; sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, ~0, 1, RF_ACTIVE); if (sc->mem_res == NULL) { device_printf(dev, "could not map memory\n"); err = ENXIO; goto abort_with_lock; } sc->sram = rman_get_virtual(sc->mem_res); sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; if (sc->sram_size > rman_get_size(sc->mem_res)) { device_printf(dev, "impossible memory region size %ld\n", rman_get_size(sc->mem_res)); err = ENXIO; goto abort_with_mem_res; } /* make NULL terminated copy of the EEPROM strings section of lanai SRAM */ bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); bus_space_read_region_1(rman_get_bustag(sc->mem_res), rman_get_bushandle(sc->mem_res), sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); err = mxge_parse_strings(sc); if (err != 0) goto abort_with_mem_res; /* Enable write combining for efficient use of PCIe bus */ mxge_enable_wc(sc); /* Allocate the out of band dma memory */ err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof (mxge_cmd_t), 64); if (err != 0) goto abort_with_mem_res; sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); if (err != 0) goto abort_with_cmd_dma; err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); if (err != 0) goto abort_with_zeropad_dma; /* select & load the firmware */ err = mxge_select_firmware(sc); if (err != 0) goto abort_with_dmabench; sc->intr_coal_delay = mxge_intr_coal_delay; mxge_slice_probe(sc); err = mxge_alloc_slices(sc); if (err != 0) goto abort_with_dmabench; err = mxge_reset(sc, 0); if (err != 0) goto abort_with_slices; err = mxge_alloc_rings(sc); if (err != 0) { device_printf(sc->dev, "failed to allocate rings\n"); goto abort_with_slices; } err = mxge_add_irq(sc); if (err != 0) { device_printf(sc->dev, "failed to add irq\n"); goto abort_with_rings; } - if_initbaudrate(ifp, IF_Gbps(10)); + ifp->if_baudrate = IF_Gbps(10); ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6; #if defined(INET) || defined(INET6) ifp->if_capabilities |= IFCAP_LRO; #endif #ifdef MXGE_NEW_VLAN_API ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; /* Only FW 1.4.32 and newer can do TSO over vlans */ if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && sc->fw_ver_tiny >= 32) ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif sc->max_mtu = mxge_max_mtu(sc); if (sc->max_mtu >= 9000) ifp->if_capabilities |= IFCAP_JUMBO_MTU; else device_printf(dev, "MTU limited to %d. Install " "latest firmware for 9000 byte jumbo support\n", sc->max_mtu - ETHER_HDR_LEN); ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; /* check to see if f/w supports TSO for IPv6 */ if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { if (CSUM_TCP_IPV6) ifp->if_capabilities |= IFCAP_TSO6; sc->max_tso6_hlen = min(cmd.data0, sizeof (sc->ss[0].scratch)); } ifp->if_capenable = ifp->if_capabilities; if (sc->lro_cnt == 0) ifp->if_capenable &= ~IFCAP_LRO; ifp->if_init = mxge_init; ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = mxge_ioctl; ifp->if_start = mxge_start; /* Initialise the ifmedia structure */ ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status); mxge_media_init(sc); mxge_media_probe(sc); sc->dying = 0; ether_ifattach(ifp, sc->mac_addr); /* ether_ifattach sets mtu to ETHERMTU */ if (mxge_initial_mtu != ETHERMTU) mxge_change_mtu(sc, mxge_initial_mtu); mxge_add_sysctls(sc); #ifdef IFNET_BUF_RING ifp->if_transmit = mxge_transmit; ifp->if_qflush = mxge_qflush; #endif taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->dev)); callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); return 0; abort_with_rings: mxge_free_rings(sc); abort_with_slices: mxge_free_slices(sc); abort_with_dmabench: mxge_dma_free(&sc->dmabench_dma); abort_with_zeropad_dma: mxge_dma_free(&sc->zeropad_dma); abort_with_cmd_dma: mxge_dma_free(&sc->cmd_dma); abort_with_mem_res: bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); abort_with_lock: pci_disable_busmaster(dev); mtx_destroy(&sc->cmd_mtx); mtx_destroy(&sc->driver_mtx); if_free(ifp); abort_with_parent_dmat: bus_dma_tag_destroy(sc->parent_dmat); abort_with_tq: if (sc->tq != NULL) { taskqueue_drain(sc->tq, &sc->watchdog_task); taskqueue_free(sc->tq); sc->tq = NULL; } abort_with_nothing: return err; } static int mxge_detach(device_t dev) { mxge_softc_t *sc = device_get_softc(dev); if (mxge_vlans_active(sc)) { device_printf(sc->dev, "Detach vlans before removing module\n"); return EBUSY; } mtx_lock(&sc->driver_mtx); sc->dying = 1; if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) mxge_close(sc, 0); mtx_unlock(&sc->driver_mtx); ether_ifdetach(sc->ifp); if (sc->tq != NULL) { taskqueue_drain(sc->tq, &sc->watchdog_task); taskqueue_free(sc->tq); sc->tq = NULL; } callout_drain(&sc->co_hdl); ifmedia_removeall(&sc->media); mxge_dummy_rdma(sc, 0); mxge_rem_sysctls(sc); mxge_rem_irq(sc); mxge_free_rings(sc); mxge_free_slices(sc); mxge_dma_free(&sc->dmabench_dma); mxge_dma_free(&sc->zeropad_dma); mxge_dma_free(&sc->cmd_dma); bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); pci_disable_busmaster(dev); mtx_destroy(&sc->cmd_mtx); mtx_destroy(&sc->driver_mtx); if_free(sc->ifp); bus_dma_tag_destroy(sc->parent_dmat); return 0; } static int mxge_shutdown(device_t dev) { return 0; } /* This file uses Myri10GE driver indentation. Local Variables: c-file-style:"linux" tab-width:8 End: */ Index: head/sys/dev/mxge/if_mxge_var.h =================================================================== --- head/sys/dev/mxge/if_mxge_var.h (revision 263101) +++ head/sys/dev/mxge/if_mxge_var.h (revision 263102) @@ -1,382 +1,378 @@ /******************************************************************************* Copyright (c) 2006-2013, Myricom Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Myricom Inc, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. $FreeBSD$ ***************************************************************************/ #define MXGE_ETH_STOPPED 0 #define MXGE_ETH_STOPPING 1 #define MXGE_ETH_STARTING 2 #define MXGE_ETH_RUNNING 3 #define MXGE_ETH_OPEN_FAILED 4 #define MXGE_FW_OFFSET 1024*1024 #define MXGE_EEPROM_STRINGS_SIZE 256 #define MXGE_MAX_SEND_DESC 128 #if ((__FreeBSD_version > 800000 && __FreeBSD_version < 800005) \ || __FreeBSD_version < 700111) #define MXGE_VIRT_JUMBOS 1 #else #define MXGE_VIRT_JUMBOS 0 #endif #if (__FreeBSD_version > 800082) #define IFNET_BUF_RING 1 #endif #if (__FreeBSD_version < 1000020) #undef IF_Kbps #undef IF_Mbps #undef IF_Gbps #define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */ #define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */ #define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */ -static __inline void -if_initbaudrate(struct ifnet *ifp, uintmax_t baud) -{ - ifp->if_baudrate = baud; -} #endif + #ifndef VLAN_CAPABILITIES #define VLAN_CAPABILITIES(ifp) #define mxge_vlans_active(sc) (sc)->ifp->if_nvlans #else #define mxge_vlans_active(sc) (sc)->ifp->if_vlantrunk #endif #ifndef VLAN_TAG_VALUE #define MXGE_NEW_VLAN_API #endif #ifndef IFCAP_LRO #define IFCAP_LRO 0 #endif #ifndef IFCAP_TSO #define IFCAP_TSO 0 #endif #ifndef IFCAP_TSO4 #define IFCAP_TSO4 0 #endif #ifndef IFCAP_TSO6 #define IFCAP_TSO6 0 #endif #ifndef IFCAP_TXCSUM_IPV6 #define IFCAP_TXCSUM_IPV6 0 #endif #ifndef IFCAP_RXCSUM_IPV6 #define IFCAP_RXCSUM_IPV6 0 #endif #ifndef CSUM_TSO #define CSUM_TSO 0 #endif #ifndef CSUM_TCP_IPV6 #define CSUM_TCP_IPV6 0 #endif #ifndef CSUM_UDP_IPV6 #define CSUM_UDP_IPV6 0 #endif #ifndef CSUM_DELAY_DATA_IPV6 #define CSUM_DELAY_DATA_IPV6 0 #endif typedef struct { void *addr; bus_addr_t bus_addr; bus_dma_tag_t dmat; bus_dmamap_t map; } mxge_dma_t; typedef struct { mcp_slot_t *entry; mxge_dma_t dma; int cnt; int idx; int mask; } mxge_rx_done_t; typedef struct { uint32_t data0; uint32_t data1; uint32_t data2; } mxge_cmd_t; struct mxge_rx_buffer_state { struct mbuf *m; bus_dmamap_t map; }; struct mxge_tx_buffer_state { struct mbuf *m; bus_dmamap_t map; int flag; }; typedef struct { volatile mcp_kreq_ether_recv_t *lanai; /* lanai ptr for recv ring */ mcp_kreq_ether_recv_t *shadow; /* host shadow of recv ring */ struct mxge_rx_buffer_state *info; bus_dma_tag_t dmat; bus_dmamap_t extra_map; int cnt; int nbufs; int cl_size; int alloc_fail; int mask; /* number of rx slots -1 */ int mlen; } mxge_rx_ring_t; typedef struct { struct mtx mtx; #ifdef IFNET_BUF_RING struct buf_ring *br; #endif volatile mcp_kreq_ether_send_t *lanai; /* lanai ptr for sendq */ volatile uint32_t *send_go; /* doorbell for sendq */ volatile uint32_t *send_stop; /* doorbell for sendq */ mcp_kreq_ether_send_t *req_list; /* host shadow of sendq */ char *req_bytes; bus_dma_segment_t *seg_list; struct mxge_tx_buffer_state *info; bus_dma_tag_t dmat; int req; /* transmits submitted */ int mask; /* number of transmit slots -1 */ int done; /* transmits completed */ int pkt_done; /* packets completed */ int max_desc; /* max descriptors per xmit */ int queue_active; /* fw currently polling this queue*/ int activate; int deactivate; int stall; /* #times hw queue exhausted */ int wake; /* #times irq re-enabled xmit */ int watchdog_req; /* cache of req */ int watchdog_done; /* cache of done */ int watchdog_rx_pause; /* cache of pause rq recvd */ int defrag; char mtx_name[16]; } mxge_tx_ring_t; struct mxge_softc; typedef struct mxge_softc mxge_softc_t; struct mxge_slice_state { mxge_softc_t *sc; mxge_tx_ring_t tx; /* transmit ring */ mxge_rx_ring_t rx_small; mxge_rx_ring_t rx_big; mxge_rx_done_t rx_done; mcp_irq_data_t *fw_stats; volatile uint32_t *irq_claim; u_long ipackets; u_long opackets; u_long obytes; u_long omcasts; u_long oerrors; int if_drv_flags; struct lro_ctrl lc; mxge_dma_t fw_stats_dma; struct sysctl_oid *sysctl_tree; struct sysctl_ctx_list sysctl_ctx; char scratch[256]; }; struct mxge_softc { struct ifnet* ifp; struct mxge_slice_state *ss; int tx_boundary; /* boundary transmits cannot cross*/ int lro_cnt; bus_dma_tag_t parent_dmat; volatile uint8_t *sram; int sram_size; volatile uint32_t *irq_deassert; mcp_cmd_response_t *cmd; mxge_dma_t cmd_dma; mxge_dma_t zeropad_dma; struct pci_dev *pdev; int legacy_irq; int link_state; unsigned int rdma_tags_available; int intr_coal_delay; volatile uint32_t *intr_coal_delay_ptr; int wc; struct mtx cmd_mtx; struct mtx driver_mtx; int wake_queue; int stop_queue; int down_cnt; int watchdog_resets; int watchdog_countdown; int pause; struct resource *mem_res; struct resource *irq_res; struct resource **msix_irq_res; struct resource *msix_table_res; struct resource *msix_pba_res; void *ih; void **msix_ih; char *fw_name; char eeprom_strings[MXGE_EEPROM_STRINGS_SIZE]; char fw_version[128]; int fw_ver_major; int fw_ver_minor; int fw_ver_tiny; int adopted_rx_filter_bug; device_t dev; struct ifmedia media; int read_dma; int write_dma; int read_write_dma; int fw_multicast_support; int link_width; int max_mtu; int throttle; int tx_defrag; int media_flags; int need_media_probe; int num_slices; int rx_ring_size; int dying; int connector; int current_media; int max_tso6_hlen; mxge_dma_t dmabench_dma; struct callout co_hdl; struct taskqueue *tq; struct task watchdog_task; struct sysctl_oid *slice_sysctl_tree; struct sysctl_ctx_list slice_sysctl_ctx; char *mac_addr_string; uint8_t mac_addr[6]; /* eeprom mac address */ uint16_t pectl; /* save PCIe CTL state */ char product_code_string[64]; char serial_number_string[64]; char cmd_mtx_name[16]; char driver_mtx_name[16]; }; #define MXGE_PCI_VENDOR_MYRICOM 0x14c1 #define MXGE_PCI_DEVICE_Z8E 0x0008 #define MXGE_PCI_DEVICE_Z8E_9 0x0009 #define MXGE_PCI_REV_Z8E 0 #define MXGE_PCI_REV_Z8ES 1 #define MXGE_XFP_COMPLIANCE_BYTE 131 #define MXGE_SFP_COMPLIANCE_BYTE 3 #define MXGE_MIN_THROTTLE 416 #define MXGE_MAX_THROTTLE 4096 /* Types of connectors on NICs supported by this driver */ #define MXGE_CX4 0 #define MXGE_XFP 1 #define MXGE_SFP 2 #define MXGE_QRF 3 #define MXGE_HIGHPART_TO_U32(X) \ (sizeof (X) == 8) ? ((uint32_t)((uint64_t)(X) >> 32)) : (0) #define MXGE_LOWPART_TO_U32(X) ((uint32_t)(X)) struct mxge_media_type { int flag; uint8_t bitmask; char *name; }; struct mxge_pkt_info { int ip_off; int ip_hlen; struct ip *ip; struct ip6_hdr *ip6; struct tcphdr *tcp; }; /* implement our own memory barriers, since bus_space_barrier cannot handle write-combining regions */ #if __FreeBSD_version < 800053 #if defined (__GNUC__) #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ #define wmb() __asm__ __volatile__ ("sfence;": : :"memory") #elif #cpu(sparc64) || defined sparc64 || defined __sparcv9 #define wmb() __asm__ __volatile__ ("membar #MemIssue": : :"memory") #elif #cpu(sparc) || defined sparc || defined __sparc__ #define wmb() __asm__ __volatile__ ("stbar;": : :"memory") #else #define wmb() /* XXX just to make this compile */ #endif #else #error "unknown compiler" #endif #endif static inline void mxge_pio_copy(volatile void *to_v, void *from_v, size_t size) { register volatile uintptr_t *to; volatile uintptr_t *from; size_t i; to = (volatile uintptr_t *) to_v; from = from_v; for (i = (size / sizeof (uintptr_t)); i; i--) { *to = *from; to++; from++; } } void mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro); int mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum); /* This file uses Myri10GE driver indentation. Local Variables: c-file-style:"linux" tab-width:8 End: */ Index: head/sys/dev/oce/oce_if.c =================================================================== --- head/sys/dev/oce/oce_if.c (revision 263101) +++ head/sys/dev/oce/oce_if.c (revision 263102) @@ -1,2344 +1,2344 @@ /*- * Copyright (C) 2013 Emulex * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Emulex Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * freebsd-drivers@emulex.com * * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 */ /* $FreeBSD$ */ #include "opt_inet6.h" #include "opt_inet.h" #include "oce_if.h" /* UE Status Low CSR */ static char *ue_status_low_desc[] = { "CEV", "CTX", "DBUF", "ERX", "Host", "MPU", "NDMA", "PTC ", "RDMA ", "RXF ", "RXIPS ", "RXULP0 ", "RXULP1 ", "RXULP2 ", "TIM ", "TPOST ", "TPRE ", "TXIPS ", "TXULP0 ", "TXULP1 ", "UC ", "WDMA ", "TXULP2 ", "HOST1 ", "P0_OB_LINK ", "P1_OB_LINK ", "HOST_GPIO ", "MBOX ", "AXGMAC0", "AXGMAC1", "JTAG", "MPU_INTPEND" }; /* UE Status High CSR */ static char *ue_status_hi_desc[] = { "LPCMEMHOST", "MGMT_MAC", "PCS0ONLINE", "MPU_IRAM", "PCS1ONLINE", "PCTL0", "PCTL1", "PMEM", "RR", "TXPB", "RXPP", "XAUI", "TXP", "ARM", "IPC", "HOST2", "HOST3", "HOST4", "HOST5", "HOST6", "HOST7", "HOST8", "HOST9", "NETC", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown" }; /* Driver entry points prototypes */ static int oce_probe(device_t dev); static int oce_attach(device_t dev); static int oce_detach(device_t dev); static int oce_shutdown(device_t dev); static int oce_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void oce_init(void *xsc); static int oce_multiq_start(struct ifnet *ifp, struct mbuf *m); static void oce_multiq_flush(struct ifnet *ifp); /* Driver interrupt routines protypes */ static void oce_intr(void *arg, int pending); static int oce_setup_intr(POCE_SOFTC sc); static int oce_fast_isr(void *arg); static int oce_alloc_intr(POCE_SOFTC sc, int vector, void (*isr) (void *arg, int pending)); /* Media callbacks prototypes */ static void oce_media_status(struct ifnet *ifp, struct ifmediareq *req); static int oce_media_change(struct ifnet *ifp); /* Transmit routines prototypes */ static int oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index); static void oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq); static void oce_tx_complete(struct oce_wq *wq, uint32_t wqe_idx, uint32_t status); static int oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m, struct oce_wq *wq); /* Receive routines prototypes */ static void oce_discard_rx_comp(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe); static int oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe); static int oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe); static void oce_rx(struct oce_rq *rq, uint32_t rqe_idx, struct oce_nic_rx_cqe *cqe); /* Helper function prototypes in this file */ static int oce_attach_ifp(POCE_SOFTC sc); static void oce_add_vlan(void *arg, struct ifnet *ifp, uint16_t vtag); static void oce_del_vlan(void *arg, struct ifnet *ifp, uint16_t vtag); static int oce_vid_config(POCE_SOFTC sc); static void oce_mac_addr_set(POCE_SOFTC sc); static int oce_handle_passthrough(struct ifnet *ifp, caddr_t data); static void oce_local_timer(void *arg); static void oce_if_deactivate(POCE_SOFTC sc); static void oce_if_activate(POCE_SOFTC sc); static void setup_max_queues_want(POCE_SOFTC sc); static void update_queues_got(POCE_SOFTC sc); static void process_link_state(POCE_SOFTC sc, struct oce_async_cqe_link_state *acqe); static int oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m); static void oce_get_config(POCE_SOFTC sc); static struct mbuf *oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete); /* IP specific */ #if defined(INET6) || defined(INET) static int oce_init_lro(POCE_SOFTC sc); static void oce_rx_flush_lro(struct oce_rq *rq); static struct mbuf * oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp); #endif static device_method_t oce_dispatch[] = { DEVMETHOD(device_probe, oce_probe), DEVMETHOD(device_attach, oce_attach), DEVMETHOD(device_detach, oce_detach), DEVMETHOD(device_shutdown, oce_shutdown), DEVMETHOD_END }; static driver_t oce_driver = { "oce", oce_dispatch, sizeof(OCE_SOFTC) }; static devclass_t oce_devclass; DRIVER_MODULE(oce, pci, oce_driver, oce_devclass, 0, 0); MODULE_DEPEND(oce, pci, 1, 1, 1); MODULE_DEPEND(oce, ether, 1, 1, 1); MODULE_VERSION(oce, 1); /* global vars */ const char component_revision[32] = {"///" COMPONENT_REVISION "///"}; /* Module capabilites and parameters */ uint32_t oce_max_rsp_handled = OCE_MAX_RSP_HANDLED; uint32_t oce_enable_rss = OCE_MODCAP_RSS; TUNABLE_INT("hw.oce.max_rsp_handled", &oce_max_rsp_handled); TUNABLE_INT("hw.oce.enable_rss", &oce_enable_rss); /* Supported devices table */ static uint32_t supportedDevices[] = { (PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE2, (PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE3, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_BE3, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201_VF, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_SH }; /***************************************************************************** * Driver entry points functions * *****************************************************************************/ static int oce_probe(device_t dev) { uint16_t vendor = 0; uint16_t device = 0; int i = 0; char str[256] = {0}; POCE_SOFTC sc; sc = device_get_softc(dev); bzero(sc, sizeof(OCE_SOFTC)); sc->dev = dev; vendor = pci_get_vendor(dev); device = pci_get_device(dev); for (i = 0; i < (sizeof(supportedDevices) / sizeof(uint32_t)); i++) { if (vendor == ((supportedDevices[i] >> 16) & 0xffff)) { if (device == (supportedDevices[i] & 0xffff)) { sprintf(str, "%s:%s", "Emulex CNA NIC function", component_revision); device_set_desc_copy(dev, str); switch (device) { case PCI_PRODUCT_BE2: sc->flags |= OCE_FLAGS_BE2; break; case PCI_PRODUCT_BE3: sc->flags |= OCE_FLAGS_BE3; break; case PCI_PRODUCT_XE201: case PCI_PRODUCT_XE201_VF: sc->flags |= OCE_FLAGS_XE201; break; case PCI_PRODUCT_SH: sc->flags |= OCE_FLAGS_SH; break; default: return ENXIO; } return BUS_PROBE_DEFAULT; } } } return ENXIO; } static int oce_attach(device_t dev) { POCE_SOFTC sc; int rc = 0; sc = device_get_softc(dev); rc = oce_hw_pci_alloc(sc); if (rc) return rc; sc->tx_ring_size = OCE_TX_RING_SIZE; sc->rx_ring_size = OCE_RX_RING_SIZE; sc->rq_frag_size = OCE_RQ_BUF_SIZE; sc->flow_control = OCE_DEFAULT_FLOW_CONTROL; sc->promisc = OCE_DEFAULT_PROMISCUOUS; LOCK_CREATE(&sc->bmbx_lock, "Mailbox_lock"); LOCK_CREATE(&sc->dev_lock, "Device_lock"); /* initialise the hardware */ rc = oce_hw_init(sc); if (rc) goto pci_res_free; oce_get_config(sc); setup_max_queues_want(sc); rc = oce_setup_intr(sc); if (rc) goto mbox_free; rc = oce_queue_init_all(sc); if (rc) goto intr_free; rc = oce_attach_ifp(sc); if (rc) goto queues_free; #if defined(INET6) || defined(INET) rc = oce_init_lro(sc); if (rc) goto ifp_free; #endif rc = oce_hw_start(sc); if (rc) goto lro_free; sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, oce_add_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, oce_del_vlan, sc, EVENTHANDLER_PRI_FIRST); rc = oce_stats_init(sc); if (rc) goto vlan_free; oce_add_sysctls(sc); callout_init(&sc->timer, CALLOUT_MPSAFE); rc = callout_reset(&sc->timer, 2 * hz, oce_local_timer, sc); if (rc) goto stats_free; return 0; stats_free: callout_drain(&sc->timer); oce_stats_free(sc); vlan_free: if (sc->vlan_attach) EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); if (sc->vlan_detach) EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); oce_hw_intr_disable(sc); lro_free: #if defined(INET6) || defined(INET) oce_free_lro(sc); ifp_free: #endif ether_ifdetach(sc->ifp); if_free(sc->ifp); queues_free: oce_queue_release_all(sc); intr_free: oce_intr_free(sc); mbox_free: oce_dma_free(sc, &sc->bsmbx); pci_res_free: oce_hw_pci_free(sc); LOCK_DESTROY(&sc->dev_lock); LOCK_DESTROY(&sc->bmbx_lock); return rc; } static int oce_detach(device_t dev) { POCE_SOFTC sc = device_get_softc(dev); LOCK(&sc->dev_lock); oce_if_deactivate(sc); UNLOCK(&sc->dev_lock); callout_drain(&sc->timer); if (sc->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); if (sc->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); ether_ifdetach(sc->ifp); if_free(sc->ifp); oce_hw_shutdown(sc); bus_generic_detach(dev); return 0; } static int oce_shutdown(device_t dev) { int rc; rc = oce_detach(dev); return rc; } static int oce_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; POCE_SOFTC sc = ifp->if_softc; int rc = 0; uint32_t u; switch (command) { case SIOCGIFMEDIA: rc = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; case SIOCSIFMTU: if (ifr->ifr_mtu > OCE_MAX_MTU) rc = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; oce_init(sc); } device_printf(sc->dev, "Interface Up\n"); } else { LOCK(&sc->dev_lock); sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); oce_if_deactivate(sc); UNLOCK(&sc->dev_lock); device_printf(sc->dev, "Interface Down\n"); } if ((ifp->if_flags & IFF_PROMISC) && !sc->promisc) { if (!oce_rxf_set_promiscuous(sc, (1 | (1 << 1)))) sc->promisc = TRUE; } else if (!(ifp->if_flags & IFF_PROMISC) && sc->promisc) { if (!oce_rxf_set_promiscuous(sc, 0)) sc->promisc = FALSE; } break; case SIOCADDMULTI: case SIOCDELMULTI: rc = oce_hw_update_multicast(sc); if (rc) device_printf(sc->dev, "Update multicast address failed\n"); break; case SIOCSIFCAP: u = ifr->ifr_reqcap ^ ifp->if_capenable; if (u & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO disabled due to -txcsum.\n"); } } if (u & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (u & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (IFCAP_TSO & ifp->if_capenable) { if (IFCAP_TXCSUM & ifp->if_capenable) ifp->if_hwassist |= CSUM_TSO; else { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "Enable txcsum first.\n"); rc = EAGAIN; } } else ifp->if_hwassist &= ~CSUM_TSO; } if (u & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (u & IFCAP_VLAN_HWFILTER) { ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; oce_vid_config(sc); } #if defined(INET6) || defined(INET) if (u & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; #endif break; case SIOCGPRIVATE_0: rc = oce_handle_passthrough(ifp, data); break; default: rc = ether_ioctl(ifp, command, data); break; } return rc; } static void oce_init(void *arg) { POCE_SOFTC sc = arg; LOCK(&sc->dev_lock); if (sc->ifp->if_flags & IFF_UP) { oce_if_deactivate(sc); oce_if_activate(sc); } UNLOCK(&sc->dev_lock); } static int oce_multiq_start(struct ifnet *ifp, struct mbuf *m) { POCE_SOFTC sc = ifp->if_softc; struct oce_wq *wq = NULL; int queue_index = 0; int status = 0; if (!sc->link_status) return ENXIO; if ((m->m_flags & M_FLOWID) != 0) queue_index = m->m_pkthdr.flowid % sc->nwqs; wq = sc->wq[queue_index]; LOCK(&wq->tx_lock); status = oce_multiq_transmit(ifp, m, wq); UNLOCK(&wq->tx_lock); return status; } static void oce_multiq_flush(struct ifnet *ifp) { POCE_SOFTC sc = ifp->if_softc; struct mbuf *m; int i = 0; for (i = 0; i < sc->nwqs; i++) { while ((m = buf_ring_dequeue_sc(sc->wq[i]->br)) != NULL) m_freem(m); } if_qflush(ifp); } /***************************************************************************** * Driver interrupt routines functions * *****************************************************************************/ static void oce_intr(void *arg, int pending) { POCE_INTR_INFO ii = (POCE_INTR_INFO) arg; POCE_SOFTC sc = ii->sc; struct oce_eq *eq = ii->eq; struct oce_eqe *eqe; struct oce_cq *cq = NULL; int i, num_eqes = 0; bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map, BUS_DMASYNC_POSTWRITE); do { eqe = RING_GET_CONSUMER_ITEM_VA(eq->ring, struct oce_eqe); if (eqe->evnt == 0) break; eqe->evnt = 0; bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map, BUS_DMASYNC_POSTWRITE); RING_GET(eq->ring, 1); num_eqes++; } while (TRUE); if (!num_eqes) goto eq_arm; /* Spurious */ /* Clear EQ entries, but dont arm */ oce_arm_eq(sc, eq->eq_id, num_eqes, FALSE, FALSE); /* Process TX, RX and MCC. But dont arm CQ*/ for (i = 0; i < eq->cq_valid; i++) { cq = eq->cq[i]; (*cq->cq_handler)(cq->cb_arg); } /* Arm all cqs connected to this EQ */ for (i = 0; i < eq->cq_valid; i++) { cq = eq->cq[i]; oce_arm_cq(sc, cq->cq_id, 0, TRUE); } eq_arm: oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE); return; } static int oce_setup_intr(POCE_SOFTC sc) { int rc = 0, use_intx = 0; int vector = 0, req_vectors = 0; if (is_rss_enabled(sc)) req_vectors = MAX((sc->nrqs - 1), sc->nwqs); else req_vectors = 1; if (sc->flags & OCE_FLAGS_MSIX_CAPABLE) { sc->intr_count = req_vectors; rc = pci_alloc_msix(sc->dev, &sc->intr_count); if (rc != 0) { use_intx = 1; pci_release_msi(sc->dev); } else sc->flags |= OCE_FLAGS_USING_MSIX; } else use_intx = 1; if (use_intx) sc->intr_count = 1; /* Scale number of queues based on intr we got */ update_queues_got(sc); if (use_intx) { device_printf(sc->dev, "Using legacy interrupt\n"); rc = oce_alloc_intr(sc, vector, oce_intr); if (rc) goto error; } else { for (; vector < sc->intr_count; vector++) { rc = oce_alloc_intr(sc, vector, oce_intr); if (rc) goto error; } } return 0; error: oce_intr_free(sc); return rc; } static int oce_fast_isr(void *arg) { POCE_INTR_INFO ii = (POCE_INTR_INFO) arg; POCE_SOFTC sc = ii->sc; if (ii->eq == NULL) return FILTER_STRAY; oce_arm_eq(sc, ii->eq->eq_id, 0, FALSE, TRUE); taskqueue_enqueue_fast(ii->tq, &ii->task); ii->eq->intr++; return FILTER_HANDLED; } static int oce_alloc_intr(POCE_SOFTC sc, int vector, void (*isr) (void *arg, int pending)) { POCE_INTR_INFO ii = &sc->intrs[vector]; int rc = 0, rr; if (vector >= OCE_MAX_EQ) return (EINVAL); /* Set the resource id for the interrupt. * MSIx is vector + 1 for the resource id, * INTx is 0 for the resource id. */ if (sc->flags & OCE_FLAGS_USING_MSIX) rr = vector + 1; else rr = 0; ii->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rr, RF_ACTIVE|RF_SHAREABLE); ii->irq_rr = rr; if (ii->intr_res == NULL) { device_printf(sc->dev, "Could not allocate interrupt\n"); rc = ENXIO; return rc; } TASK_INIT(&ii->task, 0, isr, ii); ii->vector = vector; sprintf(ii->task_name, "oce_task[%d]", ii->vector); ii->tq = taskqueue_create_fast(ii->task_name, M_NOWAIT, taskqueue_thread_enqueue, &ii->tq); taskqueue_start_threads(&ii->tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->dev)); ii->sc = sc; rc = bus_setup_intr(sc->dev, ii->intr_res, INTR_TYPE_NET, oce_fast_isr, NULL, ii, &ii->tag); return rc; } void oce_intr_free(POCE_SOFTC sc) { int i = 0; for (i = 0; i < sc->intr_count; i++) { if (sc->intrs[i].tag != NULL) bus_teardown_intr(sc->dev, sc->intrs[i].intr_res, sc->intrs[i].tag); if (sc->intrs[i].tq != NULL) taskqueue_free(sc->intrs[i].tq); if (sc->intrs[i].intr_res != NULL) bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intrs[i].irq_rr, sc->intrs[i].intr_res); sc->intrs[i].tag = NULL; sc->intrs[i].intr_res = NULL; } if (sc->flags & OCE_FLAGS_USING_MSIX) pci_release_msi(sc->dev); } /****************************************************************************** * Media callbacks functions * ******************************************************************************/ static void oce_media_status(struct ifnet *ifp, struct ifmediareq *req) { POCE_SOFTC sc = (POCE_SOFTC) ifp->if_softc; req->ifm_status = IFM_AVALID; req->ifm_active = IFM_ETHER; if (sc->link_status == 1) req->ifm_status |= IFM_ACTIVE; else return; switch (sc->link_speed) { case 1: /* 10 Mbps */ req->ifm_active |= IFM_10_T | IFM_FDX; sc->speed = 10; break; case 2: /* 100 Mbps */ req->ifm_active |= IFM_100_TX | IFM_FDX; sc->speed = 100; break; case 3: /* 1 Gbps */ req->ifm_active |= IFM_1000_T | IFM_FDX; sc->speed = 1000; break; case 4: /* 10 Gbps */ req->ifm_active |= IFM_10G_SR | IFM_FDX; sc->speed = 10000; break; case 7: /* 40 Gbps */ req->ifm_active |= IFM_40G_SR4 | IFM_FDX; sc->speed = 40000; break; } return; } int oce_media_change(struct ifnet *ifp) { return 0; } /***************************************************************************** * Transmit routines functions * *****************************************************************************/ static int oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index) { int rc = 0, i, retry_cnt = 0; bus_dma_segment_t segs[OCE_MAX_TX_ELEMENTS]; struct mbuf *m, *m_temp; struct oce_wq *wq = sc->wq[wq_index]; struct oce_packet_desc *pd; struct oce_nic_hdr_wqe *nichdr; struct oce_nic_frag_wqe *nicfrag; int num_wqes; uint32_t reg_value; boolean_t complete = TRUE; m = *mpp; if (!m) return EINVAL; if (!(m->m_flags & M_PKTHDR)) { rc = ENXIO; goto free_ret; } if(oce_tx_asic_stall_verify(sc, m)) { m = oce_insert_vlan_tag(sc, m, &complete); if(!m) { device_printf(sc->dev, "Insertion unsuccessful\n"); return 0; } } if (m->m_pkthdr.csum_flags & CSUM_TSO) { /* consolidate packet buffers for TSO/LSO segment offload */ #if defined(INET6) || defined(INET) m = oce_tso_setup(sc, mpp); #else m = NULL; #endif if (m == NULL) { rc = ENXIO; goto free_ret; } } pd = &wq->pckts[wq->pkt_desc_head]; retry: rc = bus_dmamap_load_mbuf_sg(wq->tag, pd->map, m, segs, &pd->nsegs, BUS_DMA_NOWAIT); if (rc == 0) { num_wqes = pd->nsegs + 1; if (IS_BE(sc) || IS_SH(sc)) { /*Dummy required only for BE3.*/ if (num_wqes & 1) num_wqes++; } if (num_wqes >= RING_NUM_FREE(wq->ring)) { bus_dmamap_unload(wq->tag, pd->map); return EBUSY; } atomic_store_rel_int(&wq->pkt_desc_head, (wq->pkt_desc_head + 1) % \ OCE_WQ_PACKET_ARRAY_SIZE); bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_PREWRITE); pd->mbuf = m; nichdr = RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_hdr_wqe); nichdr->u0.dw[0] = 0; nichdr->u0.dw[1] = 0; nichdr->u0.dw[2] = 0; nichdr->u0.dw[3] = 0; nichdr->u0.s.complete = complete; nichdr->u0.s.event = 1; nichdr->u0.s.crc = 1; nichdr->u0.s.forward = 0; nichdr->u0.s.ipcs = (m->m_pkthdr.csum_flags & CSUM_IP) ? 1 : 0; nichdr->u0.s.udpcs = (m->m_pkthdr.csum_flags & CSUM_UDP) ? 1 : 0; nichdr->u0.s.tcpcs = (m->m_pkthdr.csum_flags & CSUM_TCP) ? 1 : 0; nichdr->u0.s.num_wqe = num_wqes; nichdr->u0.s.total_length = m->m_pkthdr.len; if (m->m_flags & M_VLANTAG) { nichdr->u0.s.vlan = 1; /*Vlan present*/ nichdr->u0.s.vlan_tag = m->m_pkthdr.ether_vtag; } if (m->m_pkthdr.csum_flags & CSUM_TSO) { if (m->m_pkthdr.tso_segsz) { nichdr->u0.s.lso = 1; nichdr->u0.s.lso_mss = m->m_pkthdr.tso_segsz; } if (!IS_BE(sc) || !IS_SH(sc)) nichdr->u0.s.ipcs = 1; } RING_PUT(wq->ring, 1); atomic_add_int(&wq->ring->num_used, 1); for (i = 0; i < pd->nsegs; i++) { nicfrag = RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_frag_wqe); nicfrag->u0.s.rsvd0 = 0; nicfrag->u0.s.frag_pa_hi = ADDR_HI(segs[i].ds_addr); nicfrag->u0.s.frag_pa_lo = ADDR_LO(segs[i].ds_addr); nicfrag->u0.s.frag_len = segs[i].ds_len; pd->wqe_idx = wq->ring->pidx; RING_PUT(wq->ring, 1); atomic_add_int(&wq->ring->num_used, 1); } if (num_wqes > (pd->nsegs + 1)) { nicfrag = RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_frag_wqe); nicfrag->u0.dw[0] = 0; nicfrag->u0.dw[1] = 0; nicfrag->u0.dw[2] = 0; nicfrag->u0.dw[3] = 0; pd->wqe_idx = wq->ring->pidx; RING_PUT(wq->ring, 1); atomic_add_int(&wq->ring->num_used, 1); pd->nsegs++; } sc->ifp->if_opackets++; wq->tx_stats.tx_reqs++; wq->tx_stats.tx_wrbs += num_wqes; wq->tx_stats.tx_bytes += m->m_pkthdr.len; wq->tx_stats.tx_pkts++; bus_dmamap_sync(wq->ring->dma.tag, wq->ring->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); reg_value = (num_wqes << 16) | wq->wq_id; OCE_WRITE_REG32(sc, db, wq->db_offset, reg_value); } else if (rc == EFBIG) { if (retry_cnt == 0) { m_temp = m_defrag(m, M_NOWAIT); if (m_temp == NULL) goto free_ret; m = m_temp; *mpp = m_temp; retry_cnt = retry_cnt + 1; goto retry; } else goto free_ret; } else if (rc == ENOMEM) return rc; else goto free_ret; return 0; free_ret: m_freem(*mpp); *mpp = NULL; return rc; } static void oce_tx_complete(struct oce_wq *wq, uint32_t wqe_idx, uint32_t status) { struct oce_packet_desc *pd; POCE_SOFTC sc = (POCE_SOFTC) wq->parent; struct mbuf *m; pd = &wq->pckts[wq->pkt_desc_tail]; atomic_store_rel_int(&wq->pkt_desc_tail, (wq->pkt_desc_tail + 1) % OCE_WQ_PACKET_ARRAY_SIZE); atomic_subtract_int(&wq->ring->num_used, pd->nsegs + 1); bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(wq->tag, pd->map); m = pd->mbuf; m_freem(m); pd->mbuf = NULL; if (sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) { if (wq->ring->num_used < (wq->ring->num_items / 2)) { sc->ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE); oce_tx_restart(sc, wq); } } } static void oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq) { if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) return; #if __FreeBSD_version >= 800000 if (!drbr_empty(sc->ifp, wq->br)) #else if (!IFQ_DRV_IS_EMPTY(&sc->ifp->if_snd)) #endif taskqueue_enqueue_fast(taskqueue_swi, &wq->txtask); } #if defined(INET6) || defined(INET) static struct mbuf * oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp) { struct mbuf *m; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; #endif struct ether_vlan_header *eh; struct tcphdr *th; uint16_t etype; int total_len = 0, ehdrlen = 0; m = *mpp; if (M_WRITABLE(m) == 0) { m = m_dup(*mpp, M_NOWAIT); if (!m) return NULL; m_freem(*mpp); *mpp = m; } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } switch (etype) { #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(m->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return NULL; th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); total_len = ehdrlen + (ip->ip_hl << 2) + (th->th_off << 2); break; #endif #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(m->m_data + ehdrlen); if (ip6->ip6_nxt != IPPROTO_TCP) return NULL; th = (struct tcphdr *)((caddr_t)ip6 + sizeof(struct ip6_hdr)); total_len = ehdrlen + sizeof(struct ip6_hdr) + (th->th_off << 2); break; #endif default: return NULL; } m = m_pullup(m, total_len); if (!m) return NULL; *mpp = m; return m; } #endif /* INET6 || INET */ void oce_tx_task(void *arg, int npending) { struct oce_wq *wq = arg; POCE_SOFTC sc = wq->parent; struct ifnet *ifp = sc->ifp; int rc = 0; #if __FreeBSD_version >= 800000 LOCK(&wq->tx_lock); rc = oce_multiq_transmit(ifp, NULL, wq); if (rc) { device_printf(sc->dev, "TX[%d] restart failed\n", wq->queue_index); } UNLOCK(&wq->tx_lock); #else oce_start(ifp); #endif } void oce_start(struct ifnet *ifp) { POCE_SOFTC sc = ifp->if_softc; struct mbuf *m; int rc = 0; int def_q = 0; /* Defualt tx queue is 0*/ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!sc->link_status) return; do { IF_DEQUEUE(&sc->ifp->if_snd, m); if (m == NULL) break; LOCK(&sc->wq[def_q]->tx_lock); rc = oce_tx(sc, &m, def_q); UNLOCK(&sc->wq[def_q]->tx_lock); if (rc) { if (m != NULL) { sc->wq[def_q]->tx_stats.tx_stops ++; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m); m = NULL; } break; } if (m != NULL) ETHER_BPF_MTAP(ifp, m); } while (TRUE); return; } /* Handle the Completion Queue for transmit */ uint16_t oce_wq_handler(void *arg) { struct oce_wq *wq = (struct oce_wq *)arg; POCE_SOFTC sc = wq->parent; struct oce_cq *cq = wq->cq; struct oce_nic_tx_cqe *cqe; int num_cqes = 0; bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe); while (cqe->u0.dw[3]) { DW_SWAP((uint32_t *) cqe, sizeof(oce_wq_cqe)); wq->ring->cidx = cqe->u0.s.wqe_index + 1; if (wq->ring->cidx >= wq->ring->num_items) wq->ring->cidx -= wq->ring->num_items; oce_tx_complete(wq, cqe->u0.s.wqe_index, cqe->u0.s.status); wq->tx_stats.tx_compl++; cqe->u0.dw[3] = 0; RING_GET(cq->ring, 1); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe); num_cqes++; } if (num_cqes) oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE); return 0; } static int oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m, struct oce_wq *wq) { POCE_SOFTC sc = ifp->if_softc; int status = 0, queue_index = 0; struct mbuf *next = NULL; struct buf_ring *br = NULL; br = wq->br; queue_index = wq->queue_index; if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { if (m != NULL) status = drbr_enqueue(ifp, br, m); return status; } if (m != NULL) { if ((status = drbr_enqueue(ifp, br, m)) != 0) return status; } while ((next = drbr_peek(ifp, br)) != NULL) { if (oce_tx(sc, &next, queue_index)) { if (next == NULL) { drbr_advance(ifp, br); } else { drbr_putback(ifp, br, next); wq->tx_stats.tx_stops ++; ifp->if_drv_flags |= IFF_DRV_OACTIVE; status = drbr_enqueue(ifp, br, next); } break; } drbr_advance(ifp, br); ifp->if_obytes += next->m_pkthdr.len; if (next->m_flags & M_MCAST) ifp->if_omcasts++; ETHER_BPF_MTAP(ifp, next); } return status; } /***************************************************************************** * Receive routines functions * *****************************************************************************/ static void oce_rx(struct oce_rq *rq, uint32_t rqe_idx, struct oce_nic_rx_cqe *cqe) { uint32_t out; struct oce_packet_desc *pd; POCE_SOFTC sc = (POCE_SOFTC) rq->parent; int i, len, frag_len; struct mbuf *m = NULL, *tail = NULL; uint16_t vtag; len = cqe->u0.s.pkt_size; if (!len) { /*partial DMA workaround for Lancer*/ oce_discard_rx_comp(rq, cqe); goto exit; } /* Get vlan_tag value */ if(IS_BE(sc) || IS_SH(sc)) vtag = BSWAP_16(cqe->u0.s.vlan_tag); else vtag = cqe->u0.s.vlan_tag; for (i = 0; i < cqe->u0.s.num_fragments; i++) { if (rq->packets_out == rq->packets_in) { device_printf(sc->dev, "RQ transmit descriptor missing\n"); } out = rq->packets_out + 1; if (out == OCE_RQ_PACKET_ARRAY_SIZE) out = 0; pd = &rq->pckts[rq->packets_out]; rq->packets_out = out; bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(rq->tag, pd->map); rq->pending--; frag_len = (len > rq->cfg.frag_size) ? rq->cfg.frag_size : len; pd->mbuf->m_len = frag_len; if (tail != NULL) { /* additional fragments */ pd->mbuf->m_flags &= ~M_PKTHDR; tail->m_next = pd->mbuf; tail = pd->mbuf; } else { /* first fragment, fill out much of the packet header */ pd->mbuf->m_pkthdr.len = len; pd->mbuf->m_pkthdr.csum_flags = 0; if (IF_CSUM_ENABLED(sc)) { if (cqe->u0.s.l4_cksum_pass) { pd->mbuf->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); pd->mbuf->m_pkthdr.csum_data = 0xffff; } if (cqe->u0.s.ip_cksum_pass) { if (!cqe->u0.s.ip_ver) { /* IPV4 */ pd->mbuf->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID); } } } m = tail = pd->mbuf; } pd->mbuf = NULL; len -= frag_len; } if (m) { if (!oce_cqe_portid_valid(sc, cqe)) { m_freem(m); goto exit; } m->m_pkthdr.rcvif = sc->ifp; #if __FreeBSD_version >= 800000 if (rq->queue_index) m->m_pkthdr.flowid = (rq->queue_index - 1); else m->m_pkthdr.flowid = rq->queue_index; m->m_flags |= M_FLOWID; #endif /* This deternies if vlan tag is Valid */ if (oce_cqe_vtp_valid(sc, cqe)) { if (sc->function_mode & FNM_FLEX10_MODE) { /* FLEX10. If QnQ is not set, neglect VLAN */ if (cqe->u0.s.qnq) { m->m_pkthdr.ether_vtag = vtag; m->m_flags |= M_VLANTAG; } } else if (sc->pvid != (vtag & VLAN_VID_MASK)) { /* In UMC mode generally pvid will be striped by hw. But in some cases we have seen it comes with pvid. So if pvid == vlan, neglect vlan. */ m->m_pkthdr.ether_vtag = vtag; m->m_flags |= M_VLANTAG; } } sc->ifp->if_ipackets++; #if defined(INET6) || defined(INET) /* Try to queue to LRO */ if (IF_LRO_ENABLED(sc) && (cqe->u0.s.ip_cksum_pass) && (cqe->u0.s.l4_cksum_pass) && (!cqe->u0.s.ip_ver) && (rq->lro.lro_cnt != 0)) { if (tcp_lro_rx(&rq->lro, m, 0) == 0) { rq->lro_pkts_queued ++; goto post_done; } /* If LRO posting fails then try to post to STACK */ } #endif (*sc->ifp->if_input) (sc->ifp, m); #if defined(INET6) || defined(INET) post_done: #endif /* Update rx stats per queue */ rq->rx_stats.rx_pkts++; rq->rx_stats.rx_bytes += cqe->u0.s.pkt_size; rq->rx_stats.rx_frags += cqe->u0.s.num_fragments; if (cqe->u0.s.pkt_type == OCE_MULTICAST_PACKET) rq->rx_stats.rx_mcast_pkts++; if (cqe->u0.s.pkt_type == OCE_UNICAST_PACKET) rq->rx_stats.rx_ucast_pkts++; } exit: return; } static void oce_discard_rx_comp(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe) { uint32_t out, i = 0; struct oce_packet_desc *pd; POCE_SOFTC sc = (POCE_SOFTC) rq->parent; int num_frags = cqe->u0.s.num_fragments; for (i = 0; i < num_frags; i++) { if (rq->packets_out == rq->packets_in) { device_printf(sc->dev, "RQ transmit descriptor missing\n"); } out = rq->packets_out + 1; if (out == OCE_RQ_PACKET_ARRAY_SIZE) out = 0; pd = &rq->pckts[rq->packets_out]; rq->packets_out = out; bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(rq->tag, pd->map); rq->pending--; m_freem(pd->mbuf); } } static int oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe) { struct oce_nic_rx_cqe_v1 *cqe_v1; int vtp = 0; if (sc->be3_native) { cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe; vtp = cqe_v1->u0.s.vlan_tag_present; } else vtp = cqe->u0.s.vlan_tag_present; return vtp; } static int oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe) { struct oce_nic_rx_cqe_v1 *cqe_v1; int port_id = 0; if (sc->be3_native && (IS_BE(sc) || IS_SH(sc))) { cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe; port_id = cqe_v1->u0.s.port; if (sc->port_id != port_id) return 0; } else ;/* For BE3 legacy and Lancer this is dummy */ return 1; } #if defined(INET6) || defined(INET) static void oce_rx_flush_lro(struct oce_rq *rq) { struct lro_ctrl *lro = &rq->lro; struct lro_entry *queued; POCE_SOFTC sc = (POCE_SOFTC) rq->parent; if (!IF_LRO_ENABLED(sc)) return; while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } rq->lro_pkts_queued = 0; return; } static int oce_init_lro(POCE_SOFTC sc) { struct lro_ctrl *lro = NULL; int i = 0, rc = 0; for (i = 0; i < sc->nrqs; i++) { lro = &sc->rq[i]->lro; rc = tcp_lro_init(lro); if (rc != 0) { device_printf(sc->dev, "LRO init failed\n"); return rc; } lro->ifp = sc->ifp; } return rc; } void oce_free_lro(POCE_SOFTC sc) { struct lro_ctrl *lro = NULL; int i = 0; for (i = 0; i < sc->nrqs; i++) { lro = &sc->rq[i]->lro; if (lro) tcp_lro_free(lro); } } #endif int oce_alloc_rx_bufs(struct oce_rq *rq, int count) { POCE_SOFTC sc = (POCE_SOFTC) rq->parent; int i, in, rc; struct oce_packet_desc *pd; bus_dma_segment_t segs[6]; int nsegs, added = 0; struct oce_nic_rqe *rqe; pd_rxulp_db_t rxdb_reg; bzero(&rxdb_reg, sizeof(pd_rxulp_db_t)); for (i = 0; i < count; i++) { in = rq->packets_in + 1; if (in == OCE_RQ_PACKET_ARRAY_SIZE) in = 0; if (in == rq->packets_out) break; /* no more room */ pd = &rq->pckts[rq->packets_in]; pd->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (pd->mbuf == NULL) break; pd->mbuf->m_len = pd->mbuf->m_pkthdr.len = MCLBYTES; rc = bus_dmamap_load_mbuf_sg(rq->tag, pd->map, pd->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); if (rc) { m_free(pd->mbuf); break; } if (nsegs != 1) { i--; continue; } rq->packets_in = in; bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_PREREAD); rqe = RING_GET_PRODUCER_ITEM_VA(rq->ring, struct oce_nic_rqe); rqe->u0.s.frag_pa_hi = ADDR_HI(segs[0].ds_addr); rqe->u0.s.frag_pa_lo = ADDR_LO(segs[0].ds_addr); DW_SWAP(u32ptr(rqe), sizeof(struct oce_nic_rqe)); RING_PUT(rq->ring, 1); added++; rq->pending++; } if (added != 0) { for (i = added / OCE_MAX_RQ_POSTS; i > 0; i--) { rxdb_reg.bits.num_posted = OCE_MAX_RQ_POSTS; rxdb_reg.bits.qid = rq->rq_id; OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0); added -= OCE_MAX_RQ_POSTS; } if (added > 0) { rxdb_reg.bits.qid = rq->rq_id; rxdb_reg.bits.num_posted = added; OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0); } } return 0; } /* Handle the Completion Queue for receive */ uint16_t oce_rq_handler(void *arg) { struct oce_rq *rq = (struct oce_rq *)arg; struct oce_cq *cq = rq->cq; POCE_SOFTC sc = rq->parent; struct oce_nic_rx_cqe *cqe; int num_cqes = 0, rq_buffers_used = 0; bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe); while (cqe->u0.dw[2]) { DW_SWAP((uint32_t *) cqe, sizeof(oce_rq_cqe)); RING_GET(rq->ring, 1); if (cqe->u0.s.error == 0) { oce_rx(rq, cqe->u0.s.frag_index, cqe); } else { rq->rx_stats.rxcp_err++; sc->ifp->if_ierrors++; /* Post L3/L4 errors to stack.*/ oce_rx(rq, cqe->u0.s.frag_index, cqe); } rq->rx_stats.rx_compl++; cqe->u0.dw[2] = 0; #if defined(INET6) || defined(INET) if (IF_LRO_ENABLED(sc) && rq->lro_pkts_queued >= 16) { oce_rx_flush_lro(rq); } #endif RING_GET(cq->ring, 1); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe); num_cqes++; if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled)) break; } #if defined(INET6) || defined(INET) if (IF_LRO_ENABLED(sc)) oce_rx_flush_lro(rq); #endif if (num_cqes) { oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE); rq_buffers_used = OCE_RQ_PACKET_ARRAY_SIZE - rq->pending; if (rq_buffers_used > 1) oce_alloc_rx_bufs(rq, (rq_buffers_used - 1)); } return 0; } /***************************************************************************** * Helper function prototypes in this file * *****************************************************************************/ static int oce_attach_ifp(POCE_SOFTC sc) { sc->ifp = if_alloc(IFT_ETHER); if (!sc->ifp) return ENOMEM; ifmedia_init(&sc->media, IFM_IMASK, oce_media_change, oce_media_status); ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); sc->ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST; sc->ifp->if_ioctl = oce_ioctl; sc->ifp->if_start = oce_start; sc->ifp->if_init = oce_init; sc->ifp->if_mtu = ETHERMTU; sc->ifp->if_softc = sc; #if __FreeBSD_version >= 800000 sc->ifp->if_transmit = oce_multiq_start; sc->ifp->if_qflush = oce_multiq_flush; #endif if_initname(sc->ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); sc->ifp->if_snd.ifq_drv_maxlen = OCE_MAX_TX_DESC - 1; IFQ_SET_MAXLEN(&sc->ifp->if_snd, sc->ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&sc->ifp->if_snd); sc->ifp->if_hwassist = OCE_IF_HWASSIST; sc->ifp->if_hwassist |= CSUM_TSO; sc->ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP); sc->ifp->if_capabilities = OCE_IF_CAPABILITIES; sc->ifp->if_capabilities |= IFCAP_HWCSUM; sc->ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; #if defined(INET6) || defined(INET) sc->ifp->if_capabilities |= IFCAP_TSO; sc->ifp->if_capabilities |= IFCAP_LRO; sc->ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif sc->ifp->if_capenable = sc->ifp->if_capabilities; - if_initbaudrate(sc->ifp, IF_Gbps(10)); + sc->ifp->if_baudrate = IF_Gbps(10); #if __FreeBSD_version >= 1000000 sc->ifp->if_hw_tsomax = OCE_MAX_TSO_SIZE; #endif ether_ifattach(sc->ifp, sc->macaddr.mac_addr); return 0; } static void oce_add_vlan(void *arg, struct ifnet *ifp, uint16_t vtag) { POCE_SOFTC sc = ifp->if_softc; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) return; sc->vlan_tag[vtag] = 1; sc->vlans_added++; if (sc->vlans_added <= (sc->max_vlans + 1)) oce_vid_config(sc); } static void oce_del_vlan(void *arg, struct ifnet *ifp, uint16_t vtag) { POCE_SOFTC sc = ifp->if_softc; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) return; sc->vlan_tag[vtag] = 0; sc->vlans_added--; oce_vid_config(sc); } /* * A max of 64 vlans can be configured in BE. If the user configures * more, place the card in vlan promiscuous mode. */ static int oce_vid_config(POCE_SOFTC sc) { struct normal_vlan vtags[MAX_VLANFILTER_SIZE]; uint16_t ntags = 0, i; int status = 0; if ((sc->vlans_added <= MAX_VLANFILTER_SIZE) && (sc->ifp->if_capenable & IFCAP_VLAN_HWFILTER)) { for (i = 0; i < MAX_VLANS; i++) { if (sc->vlan_tag[i]) { vtags[ntags].vtag = i; ntags++; } } if (ntags) status = oce_config_vlan(sc, (uint8_t) sc->if_id, vtags, ntags, 1, 0); } else status = oce_config_vlan(sc, (uint8_t) sc->if_id, NULL, 0, 1, 1); return status; } static void oce_mac_addr_set(POCE_SOFTC sc) { uint32_t old_pmac_id = sc->pmac_id; int status = 0; status = bcmp((IF_LLADDR(sc->ifp)), sc->macaddr.mac_addr, sc->macaddr.size_of_struct); if (!status) return; status = oce_mbox_macaddr_add(sc, (uint8_t *)(IF_LLADDR(sc->ifp)), sc->if_id, &sc->pmac_id); if (!status) { status = oce_mbox_macaddr_del(sc, sc->if_id, old_pmac_id); bcopy((IF_LLADDR(sc->ifp)), sc->macaddr.mac_addr, sc->macaddr.size_of_struct); } if (status) device_printf(sc->dev, "Failed update macaddress\n"); } static int oce_handle_passthrough(struct ifnet *ifp, caddr_t data) { POCE_SOFTC sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int rc = ENXIO; char cookie[32] = {0}; void *priv_data = (void *)ifr->ifr_data; void *ioctl_ptr; uint32_t req_size; struct mbx_hdr req; OCE_DMA_MEM dma_mem; struct mbx_common_get_cntl_attr *fw_cmd; if (copyin(priv_data, cookie, strlen(IOCTL_COOKIE))) return EFAULT; if (memcmp(cookie, IOCTL_COOKIE, strlen(IOCTL_COOKIE))) return EINVAL; ioctl_ptr = (char *)priv_data + strlen(IOCTL_COOKIE); if (copyin(ioctl_ptr, &req, sizeof(struct mbx_hdr))) return EFAULT; req_size = le32toh(req.u0.req.request_length); if (req_size > 65536) return EINVAL; req_size += sizeof(struct mbx_hdr); rc = oce_dma_alloc(sc, req_size, &dma_mem, 0); if (rc) return ENOMEM; if (copyin(ioctl_ptr, OCE_DMAPTR(&dma_mem,char), req_size)) { rc = EFAULT; goto dma_free; } rc = oce_pass_through_mbox(sc, &dma_mem, req_size); if (rc) { rc = EIO; goto dma_free; } if (copyout(OCE_DMAPTR(&dma_mem,char), ioctl_ptr, req_size)) rc = EFAULT; /* firmware is filling all the attributes for this ioctl except the driver version..so fill it */ if(req.u0.rsp.opcode == OPCODE_COMMON_GET_CNTL_ATTRIBUTES) { fw_cmd = (struct mbx_common_get_cntl_attr *) ioctl_ptr; strncpy(fw_cmd->params.rsp.cntl_attr_info.hba_attr.drv_ver_str, COMPONENT_REVISION, strlen(COMPONENT_REVISION)); } dma_free: oce_dma_free(sc, &dma_mem); return rc; } static void oce_eqd_set_periodic(POCE_SOFTC sc) { struct oce_set_eqd set_eqd[OCE_MAX_EQ]; struct oce_aic_obj *aic; struct oce_eq *eqo; uint64_t now = 0, delta; int eqd, i, num = 0; uint32_t ips = 0; int tps; for (i = 0 ; i < sc->neqs; i++) { eqo = sc->eq[i]; aic = &sc->aic_obj[i]; /* When setting the static eq delay from the user space */ if (!aic->enable) { eqd = aic->et_eqd; goto modify_eqd; } now = ticks; /* Over flow check */ if ((now < aic->ticks) || (eqo->intr < aic->intr_prev)) goto done; delta = now - aic->ticks; tps = delta/hz; /* Interrupt rate based on elapsed ticks */ if(tps) ips = (uint32_t)(eqo->intr - aic->intr_prev) / tps; if (ips > INTR_RATE_HWM) eqd = aic->cur_eqd + 20; else if (ips < INTR_RATE_LWM) eqd = aic->cur_eqd / 2; else goto done; if (eqd < 10) eqd = 0; /* Make sure that the eq delay is in the known range */ eqd = min(eqd, aic->max_eqd); eqd = max(eqd, aic->min_eqd); modify_eqd: if (eqd != aic->cur_eqd) { set_eqd[num].delay_multiplier = (eqd * 65)/100; set_eqd[num].eq_id = eqo->eq_id; aic->cur_eqd = eqd; num++; } done: aic->intr_prev = eqo->intr; aic->ticks = now; } /* Is there atleast one eq that needs to be modified? */ if(num) oce_mbox_eqd_modify_periodic(sc, set_eqd, num); } static void oce_detect_hw_error(POCE_SOFTC sc) { uint32_t ue_low = 0, ue_high = 0, ue_low_mask = 0, ue_high_mask = 0; uint32_t sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; uint32_t i; if (sc->hw_error) return; if (IS_XE201(sc)) { sliport_status = OCE_READ_REG32(sc, db, SLIPORT_STATUS_OFFSET); if (sliport_status & SLIPORT_STATUS_ERR_MASK) { sliport_err1 = OCE_READ_REG32(sc, db, SLIPORT_ERROR1_OFFSET); sliport_err2 = OCE_READ_REG32(sc, db, SLIPORT_ERROR2_OFFSET); } } else { ue_low = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW); ue_high = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HIGH); ue_low_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW_MASK); ue_high_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HI_MASK); ue_low = (ue_low & ~ue_low_mask); ue_high = (ue_high & ~ue_high_mask); } /* On certain platforms BE hardware can indicate spurious UEs. * Allow the h/w to stop working completely in case of a real UE. * Hence not setting the hw_error for UE detection. */ if (sliport_status & SLIPORT_STATUS_ERR_MASK) { sc->hw_error = TRUE; device_printf(sc->dev, "Error detected in the card\n"); } if (sliport_status & SLIPORT_STATUS_ERR_MASK) { device_printf(sc->dev, "ERR: sliport status 0x%x\n", sliport_status); device_printf(sc->dev, "ERR: sliport error1 0x%x\n", sliport_err1); device_printf(sc->dev, "ERR: sliport error2 0x%x\n", sliport_err2); } if (ue_low) { for (i = 0; ue_low; ue_low >>= 1, i++) { if (ue_low & 1) device_printf(sc->dev, "UE: %s bit set\n", ue_status_low_desc[i]); } } if (ue_high) { for (i = 0; ue_high; ue_high >>= 1, i++) { if (ue_high & 1) device_printf(sc->dev, "UE: %s bit set\n", ue_status_hi_desc[i]); } } } static void oce_local_timer(void *arg) { POCE_SOFTC sc = arg; int i = 0; oce_detect_hw_error(sc); oce_refresh_nic_stats(sc); oce_refresh_queue_stats(sc); oce_mac_addr_set(sc); /* TX Watch Dog*/ for (i = 0; i < sc->nwqs; i++) oce_tx_restart(sc, sc->wq[i]); /* calculate and set the eq delay for optimal interrupt rate */ if (IS_BE(sc) || IS_SH(sc)) oce_eqd_set_periodic(sc); callout_reset(&sc->timer, hz, oce_local_timer, sc); } /* NOTE : This should only be called holding * DEVICE_LOCK. */ static void oce_if_deactivate(POCE_SOFTC sc) { int i, mtime = 0; int wait_req = 0; struct oce_rq *rq; struct oce_wq *wq; struct oce_eq *eq; sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /*Wait for max of 400ms for TX completions to be done */ while (mtime < 400) { wait_req = 0; for_all_wq_queues(sc, wq, i) { if (wq->ring->num_used) { wait_req = 1; DELAY(1); break; } } mtime += 1; if (!wait_req) break; } /* Stop intrs and finish any bottom halves pending */ oce_hw_intr_disable(sc); /* Since taskqueue_drain takes a Gaint Lock, We should not acquire any other lock. So unlock device lock and require after completing taskqueue_drain. */ UNLOCK(&sc->dev_lock); for (i = 0; i < sc->intr_count; i++) { if (sc->intrs[i].tq != NULL) { taskqueue_drain(sc->intrs[i].tq, &sc->intrs[i].task); } } LOCK(&sc->dev_lock); /* Delete RX queue in card with flush param */ oce_stop_rx(sc); /* Invalidate any pending cq and eq entries*/ for_all_evnt_queues(sc, eq, i) oce_drain_eq(eq); for_all_rq_queues(sc, rq, i) oce_drain_rq_cq(rq); for_all_wq_queues(sc, wq, i) oce_drain_wq_cq(wq); /* But still we need to get MCC aync events. So enable intrs and also arm first EQ */ oce_hw_intr_enable(sc); oce_arm_eq(sc, sc->eq[0]->eq_id, 0, TRUE, FALSE); DELAY(10); } static void oce_if_activate(POCE_SOFTC sc) { struct oce_eq *eq; struct oce_rq *rq; struct oce_wq *wq; int i, rc = 0; sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; oce_hw_intr_disable(sc); oce_start_rx(sc); for_all_rq_queues(sc, rq, i) { rc = oce_start_rq(rq); if (rc) device_printf(sc->dev, "Unable to start RX\n"); } for_all_wq_queues(sc, wq, i) { rc = oce_start_wq(wq); if (rc) device_printf(sc->dev, "Unable to start TX\n"); } for_all_evnt_queues(sc, eq, i) oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE); oce_hw_intr_enable(sc); } static void process_link_state(POCE_SOFTC sc, struct oce_async_cqe_link_state *acqe) { /* Update Link status */ if ((acqe->u0.s.link_status & ~ASYNC_EVENT_LOGICAL) == ASYNC_EVENT_LINK_UP) { sc->link_status = ASYNC_EVENT_LINK_UP; if_link_state_change(sc->ifp, LINK_STATE_UP); } else { sc->link_status = ASYNC_EVENT_LINK_DOWN; if_link_state_change(sc->ifp, LINK_STATE_DOWN); } } /* Handle the Completion Queue for the Mailbox/Async notifications */ uint16_t oce_mq_handler(void *arg) { struct oce_mq *mq = (struct oce_mq *)arg; POCE_SOFTC sc = mq->parent; struct oce_cq *cq = mq->cq; int num_cqes = 0, evt_type = 0, optype = 0; struct oce_mq_cqe *cqe; struct oce_async_cqe_link_state *acqe; struct oce_async_event_grp5_pvid_state *gcqe; struct oce_async_event_qnq *dbgcqe; bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe); while (cqe->u0.dw[3]) { DW_SWAP((uint32_t *) cqe, sizeof(oce_mq_cqe)); if (cqe->u0.s.async_event) { evt_type = cqe->u0.s.event_type; optype = cqe->u0.s.async_type; if (evt_type == ASYNC_EVENT_CODE_LINK_STATE) { /* Link status evt */ acqe = (struct oce_async_cqe_link_state *)cqe; process_link_state(sc, acqe); } else if ((evt_type == ASYNC_EVENT_GRP5) && (optype == ASYNC_EVENT_PVID_STATE)) { /* GRP5 PVID */ gcqe = (struct oce_async_event_grp5_pvid_state *)cqe; if (gcqe->enabled) sc->pvid = gcqe->tag & VLAN_VID_MASK; else sc->pvid = 0; } else if(evt_type == ASYNC_EVENT_CODE_DEBUG && optype == ASYNC_EVENT_DEBUG_QNQ) { dbgcqe = (struct oce_async_event_qnq *)cqe; if(dbgcqe->valid) sc->qnqid = dbgcqe->vlan_tag; sc->qnq_debug_event = TRUE; } } cqe->u0.dw[3] = 0; RING_GET(cq->ring, 1); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe); num_cqes++; } if (num_cqes) oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE); return 0; } static void setup_max_queues_want(POCE_SOFTC sc) { /* Check if it is FLEX machine. Is so dont use RSS */ if ((sc->function_mode & FNM_FLEX10_MODE) || (sc->function_mode & FNM_UMC_MODE) || (sc->function_mode & FNM_VNIC_MODE) || (!is_rss_enabled(sc)) || (sc->flags & OCE_FLAGS_BE2)) { sc->nrqs = 1; sc->nwqs = 1; } else { sc->nrqs = MIN(OCE_NCPUS, sc->nrssqs) + 1; sc->nwqs = MIN(OCE_NCPUS, sc->nrssqs); } } static void update_queues_got(POCE_SOFTC sc) { if (is_rss_enabled(sc)) { sc->nrqs = sc->intr_count + 1; sc->nwqs = sc->intr_count; } else { sc->nrqs = 1; sc->nwqs = 1; } } static int oce_check_ipv6_ext_hdr(struct mbuf *m) { struct ether_header *eh = mtod(m, struct ether_header *); caddr_t m_datatemp = m->m_data; if (eh->ether_type == htons(ETHERTYPE_IPV6)) { m->m_data += sizeof(struct ether_header); struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); if((ip6->ip6_nxt != IPPROTO_TCP) && \ (ip6->ip6_nxt != IPPROTO_UDP)){ struct ip6_ext *ip6e = NULL; m->m_data += sizeof(struct ip6_hdr); ip6e = (struct ip6_ext *) mtod(m, struct ip6_ext *); if(ip6e->ip6e_len == 0xff) { m->m_data = m_datatemp; return TRUE; } } m->m_data = m_datatemp; } return FALSE; } static int is_be3_a1(POCE_SOFTC sc) { if((sc->flags & OCE_FLAGS_BE3) && ((sc->asic_revision & 0xFF) < 2)) { return TRUE; } return FALSE; } static struct mbuf * oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete) { uint16_t vlan_tag = 0; if(!M_WRITABLE(m)) return NULL; /* Embed vlan tag in the packet if it is not part of it */ if(m->m_flags & M_VLANTAG) { vlan_tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag); m->m_flags &= ~M_VLANTAG; } /* if UMC, ignore vlan tag insertion and instead insert pvid */ if(sc->pvid) { if(!vlan_tag) vlan_tag = sc->pvid; *complete = FALSE; } if(vlan_tag) { m = ether_vlanencap(m, vlan_tag); } if(sc->qnqid) { m = ether_vlanencap(m, sc->qnqid); *complete = FALSE; } return m; } static int oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m) { if(is_be3_a1(sc) && IS_QNQ_OR_UMC(sc) && \ oce_check_ipv6_ext_hdr(m)) { return TRUE; } return FALSE; } static void oce_get_config(POCE_SOFTC sc) { int rc = 0; uint32_t max_rss = 0; if ((IS_BE(sc) || IS_SH(sc)) && (!sc->be3_native)) max_rss = OCE_LEGACY_MODE_RSS; else max_rss = OCE_MAX_RSS; if (!IS_BE(sc)) { rc = oce_get_profile_config(sc, max_rss); if (rc) { sc->nwqs = OCE_MAX_WQ; sc->nrssqs = max_rss; sc->nrqs = sc->nrssqs + 1; } } else { /* For BE3 don't rely on fw for determining the resources */ sc->nrssqs = max_rss; sc->nrqs = sc->nrssqs + 1; sc->nwqs = OCE_MAX_WQ; sc->max_vlans = MAX_VLANFILTER_SIZE; } } Index: head/sys/dev/qlxgb/qla_os.c =================================================================== --- head/sys/dev/qlxgb/qla_os.c (revision 263101) +++ head/sys/dev/qlxgb/qla_os.c (revision 263102) @@ -1,1486 +1,1486 @@ /* * Copyright (c) 2011-2013 Qlogic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * File: qla_os.c * Author : David C Somayajulu, Qlogic Corporation, Aliso Viejo, CA 92656. */ #include __FBSDID("$FreeBSD$"); #include "qla_os.h" #include "qla_reg.h" #include "qla_hw.h" #include "qla_def.h" #include "qla_inline.h" #include "qla_ver.h" #include "qla_glbl.h" #include "qla_dbg.h" /* * Some PCI Configuration Space Related Defines */ #ifndef PCI_VENDOR_QLOGIC #define PCI_VENDOR_QLOGIC 0x1077 #endif #ifndef PCI_PRODUCT_QLOGIC_ISP8020 #define PCI_PRODUCT_QLOGIC_ISP8020 0x8020 #endif #define PCI_QLOGIC_ISP8020 \ ((PCI_PRODUCT_QLOGIC_ISP8020 << 16) | PCI_VENDOR_QLOGIC) /* * static functions */ static int qla_alloc_parent_dma_tag(qla_host_t *ha); static void qla_free_parent_dma_tag(qla_host_t *ha); static int qla_alloc_xmt_bufs(qla_host_t *ha); static void qla_free_xmt_bufs(qla_host_t *ha); static int qla_alloc_rcv_bufs(qla_host_t *ha); static void qla_free_rcv_bufs(qla_host_t *ha); static void qla_init_ifnet(device_t dev, qla_host_t *ha); static int qla_sysctl_get_stats(SYSCTL_HANDLER_ARGS); static void qla_release(qla_host_t *ha); static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void qla_stop(qla_host_t *ha); static int qla_send(qla_host_t *ha, struct mbuf **m_headp); static void qla_tx_done(void *context, int pending); /* * Hooks to the Operating Systems */ static int qla_pci_probe (device_t); static int qla_pci_attach (device_t); static int qla_pci_detach (device_t); static void qla_init(void *arg); static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int qla_media_change(struct ifnet *ifp); static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); static device_method_t qla_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qla_pci_probe), DEVMETHOD(device_attach, qla_pci_attach), DEVMETHOD(device_detach, qla_pci_detach), { 0, 0 } }; static driver_t qla_pci_driver = { "ql", qla_pci_methods, sizeof (qla_host_t), }; static devclass_t qla80xx_devclass; DRIVER_MODULE(qla80xx, pci, qla_pci_driver, qla80xx_devclass, 0, 0); MODULE_DEPEND(qla80xx, pci, 1, 1, 1); MODULE_DEPEND(qla80xx, ether, 1, 1, 1); MALLOC_DEFINE(M_QLA8XXXBUF, "qla80xxbuf", "Buffers for qla80xx driver"); uint32_t std_replenish = 8; uint32_t jumbo_replenish = 2; uint32_t rcv_pkt_thres = 128; uint32_t rcv_pkt_thres_d = 32; uint32_t snd_pkt_thres = 16; uint32_t free_pkt_thres = (NUM_TX_DESCRIPTORS / 2); static char dev_str[64]; /* * Name: qla_pci_probe * Function: Validate the PCI device to be a QLA80XX device */ static int qla_pci_probe(device_t dev) { switch ((pci_get_device(dev) << 16) | (pci_get_vendor(dev))) { case PCI_QLOGIC_ISP8020: snprintf(dev_str, sizeof(dev_str), "%s v%d.%d.%d", "Qlogic ISP 80xx PCI CNA Adapter-Ethernet Function", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); device_set_desc(dev, dev_str); break; default: return (ENXIO); } if (bootverbose) printf("%s: %s\n ", __func__, dev_str); return (BUS_PROBE_DEFAULT); } static void qla_add_sysctls(qla_host_t *ha) { device_t dev = ha->pci_dev; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RD, (void *)ha, 0, qla_sysctl_get_stats, "I", "Statistics"); SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fw_version", CTLFLAG_RD, &ha->fw_ver_str, 0, "firmware version"); dbg_level = 0; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &dbg_level, dbg_level, "Debug Level"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "std_replenish", CTLFLAG_RW, &std_replenish, std_replenish, "Threshold for Replenishing Standard Frames"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "jumbo_replenish", CTLFLAG_RW, &jumbo_replenish, jumbo_replenish, "Threshold for Replenishing Jumbo Frames"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rcv_pkt_thres", CTLFLAG_RW, &rcv_pkt_thres, rcv_pkt_thres, "Threshold for # of rcv pkts to trigger indication isr"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rcv_pkt_thres_d", CTLFLAG_RW, &rcv_pkt_thres_d, rcv_pkt_thres_d, "Threshold for # of rcv pkts to trigger indication defered"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "snd_pkt_thres", CTLFLAG_RW, &snd_pkt_thres, snd_pkt_thres, "Threshold for # of snd packets"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "free_pkt_thres", CTLFLAG_RW, &free_pkt_thres, free_pkt_thres, "Threshold for # of packets to free at a time"); return; } static void qla_watchdog(void *arg) { qla_host_t *ha = arg; qla_hw_t *hw; struct ifnet *ifp; hw = &ha->hw; ifp = ha->ifp; if (ha->flags.qla_watchdog_exit) return; if (!ha->flags.qla_watchdog_pause) { if (qla_le32_to_host(*(hw->tx_cons)) != hw->txr_comp) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); } else if ((ifp->if_snd.ifq_head != NULL) && QL_RUNNING(ifp)) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); } } ha->watchdog_ticks = ha->watchdog_ticks++ % 1000; callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); } /* * Name: qla_pci_attach * Function: attaches the device to the operating system */ static int qla_pci_attach(device_t dev) { qla_host_t *ha = NULL; uint32_t rsrc_len, i; QL_DPRINT2((dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } memset(ha, 0, sizeof (qla_host_t)); if (pci_get_device(dev) != PCI_PRODUCT_QLOGIC_ISP8020) { device_printf(dev, "device is not ISP8020\n"); return (ENXIO); } ha->pci_func = pci_get_function(dev); ha->pci_dev = dev; pci_enable_busmaster(dev); ha->reg_rid = PCIR_BAR(0); ha->pci_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid, RF_ACTIVE); if (ha->pci_reg == NULL) { device_printf(dev, "unable to map any ports\n"); goto qla_pci_attach_err; } rsrc_len = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->reg_rid); mtx_init(&ha->hw_lock, "qla80xx_hw_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->tx_lock, "qla80xx_tx_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->rx_lock, "qla80xx_rx_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->rxj_lock, "qla80xx_rxj_lock", MTX_NETWORK_LOCK, MTX_DEF); ha->flags.lock_init = 1; ha->msix_count = pci_msix_count(dev); if (ha->msix_count < qla_get_msix_count(ha)) { device_printf(dev, "%s: msix_count[%d] not enough\n", __func__, ha->msix_count); goto qla_pci_attach_err; } QL_DPRINT2((dev, "%s: ha %p irq %p pci_func 0x%x rsrc_count 0x%08x" " msix_count 0x%x pci_reg %p\n", __func__, ha, ha->irq, ha->pci_func, rsrc_len, ha->msix_count, ha->pci_reg)); ha->msix_count = qla_get_msix_count(ha); if (pci_alloc_msix(dev, &ha->msix_count)) { device_printf(dev, "%s: pci_alloc_msi[%d] failed\n", __func__, ha->msix_count); ha->msix_count = 0; goto qla_pci_attach_err; } TASK_INIT(&ha->tx_task, 0, qla_tx_done, ha); ha->tx_tq = taskqueue_create_fast("qla_txq", M_NOWAIT, taskqueue_thread_enqueue, &ha->tx_tq); taskqueue_start_threads(&ha->tx_tq, 1, PI_NET, "%s txq", device_get_nameunit(ha->pci_dev)); for (i = 0; i < ha->msix_count; i++) { ha->irq_vec[i].irq_rid = i+1; ha->irq_vec[i].ha = ha; ha->irq_vec[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->irq_vec[i].irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->irq_vec[i].irq == NULL) { device_printf(dev, "could not allocate interrupt\n"); goto qla_pci_attach_err; } if (bus_setup_intr(dev, ha->irq_vec[i].irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, qla_isr, &ha->irq_vec[i], &ha->irq_vec[i].handle)) { device_printf(dev, "could not setup interrupt\n"); goto qla_pci_attach_err; } TASK_INIT(&ha->irq_vec[i].rcv_task, 0, qla_rcv,\ &ha->irq_vec[i]); ha->irq_vec[i].rcv_tq = taskqueue_create_fast("qla_rcvq", M_NOWAIT, taskqueue_thread_enqueue, &ha->irq_vec[i].rcv_tq); taskqueue_start_threads(&ha->irq_vec[i].rcv_tq, 1, PI_NET, "%s rcvq", device_get_nameunit(ha->pci_dev)); } qla_add_sysctls(ha); /* add hardware specific sysctls */ qla_hw_add_sysctls(ha); /* initialize hardware */ if (qla_init_hw(ha)) { device_printf(dev, "%s: qla_init_hw failed\n", __func__); goto qla_pci_attach_err; } device_printf(dev, "%s: firmware[%d.%d.%d.%d]\n", __func__, ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); snprintf(ha->fw_ver_str, sizeof(ha->fw_ver_str), "%d.%d.%d.%d", ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); //qla_get_hw_caps(ha); qla_read_mac_addr(ha); /* allocate parent dma tag */ if (qla_alloc_parent_dma_tag(ha)) { device_printf(dev, "%s: qla_alloc_parent_dma_tag failed\n", __func__); goto qla_pci_attach_err; } /* alloc all dma buffers */ if (qla_alloc_dma(ha)) { device_printf(dev, "%s: qla_alloc_dma failed\n", __func__); goto qla_pci_attach_err; } /* create the o.s ethernet interface */ qla_init_ifnet(dev, ha); ha->flags.qla_watchdog_active = 1; ha->flags.qla_watchdog_pause = 1; callout_init(&ha->tx_callout, TRUE); /* create ioctl device interface */ if (qla_make_cdev(ha)) { device_printf(dev, "%s: qla_make_cdev failed\n", __func__); goto qla_pci_attach_err; } callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); QL_DPRINT2((dev, "%s: exit 0\n", __func__)); return (0); qla_pci_attach_err: qla_release(ha); QL_DPRINT2((dev, "%s: exit ENXIO\n", __func__)); return (ENXIO); } /* * Name: qla_pci_detach * Function: Unhooks the device from the operating system */ static int qla_pci_detach(device_t dev) { qla_host_t *ha = NULL; struct ifnet *ifp; int i; QL_DPRINT2((dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } ifp = ha->ifp; QLA_LOCK(ha, __func__); qla_stop(ha); QLA_UNLOCK(ha, __func__); if (ha->tx_tq) { taskqueue_drain(ha->tx_tq, &ha->tx_task); taskqueue_free(ha->tx_tq); } for (i = 0; i < ha->msix_count; i++) { taskqueue_drain(ha->irq_vec[i].rcv_tq, &ha->irq_vec[i].rcv_task); taskqueue_free(ha->irq_vec[i].rcv_tq); } qla_release(ha); QL_DPRINT2((dev, "%s: exit\n", __func__)); return (0); } /* * SYSCTL Related Callbacks */ static int qla_sysctl_get_stats(SYSCTL_HANDLER_ARGS) { int err, ret = 0; qla_host_t *ha; err = sysctl_handle_int(oidp, &ret, 0, req); if (err) return (err); ha = (qla_host_t *)arg1; //qla_get_stats(ha); QL_DPRINT2((ha->pci_dev, "%s: called ret %d\n", __func__, ret)); return (err); } /* * Name: qla_release * Function: Releases the resources allocated for the device */ static void qla_release(qla_host_t *ha) { device_t dev; int i; dev = ha->pci_dev; qla_del_cdev(ha); if (ha->flags.qla_watchdog_active) ha->flags.qla_watchdog_exit = 1; callout_stop(&ha->tx_callout); qla_mdelay(__func__, 100); if (ha->ifp != NULL) ether_ifdetach(ha->ifp); qla_free_dma(ha); qla_free_parent_dma_tag(ha); for (i = 0; i < ha->msix_count; i++) { if (ha->irq_vec[i].handle) (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); if (ha->irq_vec[i].irq) (void) bus_release_resource(dev, SYS_RES_IRQ, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq); } if (ha->msix_count) pci_release_msi(dev); if (ha->flags.lock_init) { mtx_destroy(&ha->tx_lock); mtx_destroy(&ha->rx_lock); mtx_destroy(&ha->rxj_lock); mtx_destroy(&ha->hw_lock); } if (ha->pci_reg) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid, ha->pci_reg); } /* * DMA Related Functions */ static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { *((bus_addr_t *)arg) = 0; if (error) { printf("%s: bus_dmamap_load failed (%d)\n", __func__, error); return; } QL_ASSERT((nsegs == 1), ("%s: %d segments returned!", __func__, nsegs)); *((bus_addr_t *)arg) = segs[0].ds_addr; return; } int qla_alloc_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { int ret = 0; device_t dev; bus_addr_t b_addr; dev = ha->pci_dev; QL_DPRINT2((dev, "%s: enter\n", __func__)); ret = bus_dma_tag_create( ha->parent_tag,/* parent */ dma_buf->alignment, ((bus_size_t)(1ULL << 32)),/* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_buf->size, /* maxsize */ 1, /* nsegments */ dma_buf->size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &dma_buf->dma_tag); if (ret) { device_printf(dev, "%s: could not create dma tag\n", __func__); goto qla_alloc_dmabuf_exit; } ret = bus_dmamem_alloc(dma_buf->dma_tag, (void **)&dma_buf->dma_b, (BUS_DMA_ZERO | BUS_DMA_COHERENT | BUS_DMA_NOWAIT), &dma_buf->dma_map); if (ret) { bus_dma_tag_destroy(dma_buf->dma_tag); device_printf(dev, "%s: bus_dmamem_alloc failed\n", __func__); goto qla_alloc_dmabuf_exit; } ret = bus_dmamap_load(dma_buf->dma_tag, dma_buf->dma_map, dma_buf->dma_b, dma_buf->size, qla_dmamap_callback, &b_addr, BUS_DMA_NOWAIT); if (ret || !b_addr) { bus_dma_tag_destroy(dma_buf->dma_tag); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); ret = -1; goto qla_alloc_dmabuf_exit; } dma_buf->dma_addr = b_addr; qla_alloc_dmabuf_exit: QL_DPRINT2((dev, "%s: exit ret 0x%08x tag %p map %p b %p sz 0x%x\n", __func__, ret, (void *)dma_buf->dma_tag, (void *)dma_buf->dma_map, (void *)dma_buf->dma_b, dma_buf->size)); return ret; } void qla_free_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); bus_dma_tag_destroy(dma_buf->dma_tag); } static int qla_alloc_parent_dma_tag(qla_host_t *ha) { int ret; device_t dev; dev = ha->pci_dev; /* * Allocate parent DMA Tag */ ret = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1,((bus_size_t)(1ULL << 32)),/* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &ha->parent_tag); if (ret) { device_printf(dev, "%s: could not create parent dma tag\n", __func__); return (-1); } ha->flags.parent_tag = 1; return (0); } static void qla_free_parent_dma_tag(qla_host_t *ha) { if (ha->flags.parent_tag) { bus_dma_tag_destroy(ha->parent_tag); ha->flags.parent_tag = 0; } } /* * Name: qla_init_ifnet * Function: Creates the Network Device Interface and Registers it with the O.S */ static void qla_init_ifnet(device_t dev, qla_host_t *ha) { struct ifnet *ifp; QL_DPRINT2((dev, "%s: enter\n", __func__)); ifp = ha->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: cannot if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - if_initbaudrate(ifp, IF_Gbps(10)); + ifp->if_baudrate = IF_Gbps(10); ifp->if_init = qla_init; ifp->if_softc = ha; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = qla_ioctl; ifp->if_start = qla_start; IFQ_SET_MAXLEN(&ifp->if_snd, qla_get_ifq_snd_maxlen(ha)); ifp->if_snd.ifq_drv_maxlen = qla_get_ifq_snd_maxlen(ha); IFQ_SET_READY(&ifp->if_snd); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ether_ifattach(ifp, qla_get_mac_addr(ha)); ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_LINKSTATE; #if defined(__FreeBSD_version) && (__FreeBSD_version < 900002) ifp->if_timer = 0; ifp->if_watchdog = NULL; #endif /* #if defined(__FreeBSD_version) && (__FreeBSD_version < 900002) */ ifp->if_capenable = ifp->if_capabilities; ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifmedia_init(&ha->media, IFM_IMASK, qla_media_change, qla_media_status); ifmedia_add(&ha->media, (IFM_ETHER | qla_get_optics(ha) | IFM_FDX), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&ha->media, (IFM_ETHER | IFM_AUTO)); QL_DPRINT2((dev, "%s: exit\n", __func__)); return; } static void qla_init_locked(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; qla_stop(ha); if (qla_alloc_xmt_bufs(ha) != 0) return; if (qla_alloc_rcv_bufs(ha) != 0) return; if (qla_config_lro(ha)) return; bcopy(IF_LLADDR(ha->ifp), ha->hw.mac_addr, ETHER_ADDR_LEN); ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; ha->flags.stop_rcv = 0; if (qla_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ha->flags.qla_watchdog_pause = 0; } return; } static void qla_init(void *arg) { qla_host_t *ha; ha = (qla_host_t *)arg; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); QLA_LOCK(ha, __func__); qla_init_locked(ha); QLA_UNLOCK(ha, __func__); QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); } static void qla_set_multi(qla_host_t *ha, uint32_t add_multi) { uint8_t mta[Q8_MAX_NUM_MULTICAST_ADDRS * Q8_MAC_ADDR_LEN]; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = ha->ifp; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == Q8_MAX_NUM_MULTICAST_ADDRS) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * Q8_MAC_ADDR_LEN], Q8_MAC_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); qla_hw_set_multi(ha, mta, mcnt, add_multi); return; } static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int ret = 0; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; switch (cmd) { case SIOCSIFADDR: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFADDR (0x%lx)\n", __func__, cmd)); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { QLA_LOCK(ha, __func__); qla_init_locked(ha); QLA_UNLOCK(ha, __func__); } QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFADDR (0x%lx) ipv4 [0x%08x]\n", __func__, cmd, ntohl(IA_SIN(ifa)->sin_addr.s_addr))); arp_ifinit(ifp, ifa); if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) { qla_config_ipv4_addr(ha, (IA_SIN(ifa)->sin_addr.s_addr)); } } else { ether_ioctl(ifp, cmd, data); } break; case SIOCSIFMTU: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFMTU (0x%lx)\n", __func__, cmd)); if (ifr->ifr_mtu > QLA_MAX_FRAME_SIZE - ETHER_HDR_LEN) { ret = EINVAL; } else { QLA_LOCK(ha, __func__); ifp->if_mtu = ifr->ifr_mtu; ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { ret = qla_set_max_mtu(ha, ha->max_frame_size, (ha->hw.rx_cntxt_rsp)->rx_rsp.cntxt_id); } QLA_UNLOCK(ha, __func__); if (ret) ret = EINVAL; } break; case SIOCSIFFLAGS: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFFLAGS (0x%lx)\n", __func__, cmd)); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ ha->if_flags) & IFF_PROMISC) { qla_set_promisc(ha); } else if ((ifp->if_flags ^ ha->if_flags) & IFF_ALLMULTI) { qla_set_allmulti(ha); } } else { QLA_LOCK(ha, __func__); qla_init_locked(ha); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ret = qla_set_max_mtu(ha, ha->max_frame_size, (ha->hw.rx_cntxt_rsp)->rx_rsp.cntxt_id); QLA_UNLOCK(ha, __func__); } } else { QLA_LOCK(ha, __func__); if (ifp->if_drv_flags & IFF_DRV_RUNNING) qla_stop(ha); ha->if_flags = ifp->if_flags; QLA_UNLOCK(ha, __func__); } break; case SIOCADDMULTI: QL_DPRINT4((ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCADDMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qla_set_multi(ha, 1); } break; case SIOCDELMULTI: QL_DPRINT4((ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCDELMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qla_set_multi(ha, 0); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", __func__, cmd)); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFCAP (0x%lx)\n", __func__, cmd)); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) qla_init(ha); VLAN_CAPABILITIES(ifp); break; } default: QL_DPRINT4((ha->pci_dev, "%s: default (0x%lx)\n", __func__, cmd)); ret = ether_ioctl(ifp, cmd, data); break; } return (ret); } static int qla_media_change(struct ifnet *ifp) { qla_host_t *ha; struct ifmedia *ifm; int ret = 0; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifm = &ha->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) ret = EINVAL; QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; qla_update_link_state(ha); if (ha->hw.flags.link_up) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= (IFM_FDX | qla_get_optics(ha)); } QL_DPRINT2((ha->pci_dev, "%s: exit (%s)\n", __func__,\ (ha->hw.flags.link_up ? "link_up" : "link_down"))); return; } void qla_start(struct ifnet *ifp) { struct mbuf *m_head; qla_host_t *ha = (qla_host_t *)ifp->if_softc; QL_DPRINT8((ha->pci_dev, "%s: enter\n", __func__)); if (!mtx_trylock(&ha->tx_lock)) { QL_DPRINT8((ha->pci_dev, "%s: mtx_trylock(&ha->tx_lock) failed\n", __func__)); return; } if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { QL_DPRINT8((ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); QLA_TX_UNLOCK(ha); return; } if (!ha->watchdog_ticks) qla_update_link_state(ha); if (!ha->hw.flags.link_up) { QL_DPRINT8((ha->pci_dev, "%s: link down\n", __func__)); QLA_TX_UNLOCK(ha); return; } while (ifp->if_snd.ifq_head != NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) { QL_DPRINT8((ha->pci_dev, "%s: m_head == NULL\n", __func__)); break; } if (qla_send(ha, &m_head)) { if (m_head == NULL) break; QL_DPRINT8((ha->pci_dev, "%s: PREPEND\n", __func__)); ifp->if_drv_flags |= IFF_DRV_OACTIVE; IF_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } QLA_TX_UNLOCK(ha); QL_DPRINT8((ha->pci_dev, "%s: exit\n", __func__)); return; } static int qla_send(qla_host_t *ha, struct mbuf **m_headp) { bus_dma_segment_t segs[QLA_MAX_SEGMENTS]; bus_dmamap_t map; int nsegs; int ret = -1; uint32_t tx_idx; struct mbuf *m_head = *m_headp; QL_DPRINT8((ha->pci_dev, "%s: enter\n", __func__)); if ((ret = bus_dmamap_create(ha->tx_tag, BUS_DMA_NOWAIT, &map))) { ha->err_tx_dmamap_create++; device_printf(ha->pci_dev, "%s: bus_dmamap_create failed[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); return (ret); } ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (ret == EFBIG) { struct mbuf *m; QL_DPRINT8((ha->pci_dev, "%s: EFBIG [%d]\n", __func__, m_head->m_pkthdr.len)); m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { ha->err_tx_defrag++; m_freem(m_head); *m_headp = NULL; device_printf(ha->pci_dev, "%s: m_defrag() = NULL [%d]\n", __func__, ret); return (ENOBUFS); } m_head = m; if ((ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT))) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed0[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); bus_dmamap_destroy(ha->tx_tag, map); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } } else if (ret) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed1[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); bus_dmamap_destroy(ha->tx_tag, map); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } QL_ASSERT((nsegs != 0), ("qla_send: empty packet")); bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_PREWRITE); if (!(ret = qla_hw_send(ha, segs, nsegs, &tx_idx, m_head))) { ha->tx_buf[tx_idx].m_head = m_head; ha->tx_buf[tx_idx].map = map; } else { if (ret == EINVAL) { m_freem(m_head); *m_headp = NULL; } } QL_DPRINT8((ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qla_stop(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; device_t dev; dev = ha->pci_dev; ha->flags.qla_watchdog_pause = 1; qla_mdelay(__func__, 100); ha->flags.stop_rcv = 1; qla_hw_stop_rcv(ha); qla_del_hw_if(ha); qla_free_lro(ha); qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); return; } /* * Buffer Management Functions for Transmit and Receive Rings */ static int qla_alloc_xmt_bufs(qla_host_t *ha) { if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ QLA_MAX_TSO_FRAME_SIZE, /* maxsize */ QLA_MAX_SEGMENTS, /* nsegments */ PAGE_SIZE, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->tx_tag)) { device_printf(ha->pci_dev, "%s: tx_tag alloc failed\n", __func__); return (ENOMEM); } bzero((void *)ha->tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); return 0; } /* * Release mbuf after it sent on the wire */ static void qla_clear_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb) { QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); if (txb->m_head) { bus_dmamap_unload(ha->tx_tag, txb->map); bus_dmamap_destroy(ha->tx_tag, txb->map); m_freem(txb->m_head); txb->m_head = NULL; } QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); } static void qla_free_xmt_bufs(qla_host_t *ha) { int i; for (i = 0; i < NUM_TX_DESCRIPTORS; i++) qla_clear_tx_buf(ha, &ha->tx_buf[i]); if (ha->tx_tag != NULL) { bus_dma_tag_destroy(ha->tx_tag); ha->tx_tag = NULL; } bzero((void *)ha->tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); return; } static int qla_alloc_rcv_bufs(qla_host_t *ha) { int i, j, ret = 0; qla_rx_buf_t *rxb; if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->rx_tag)) { device_printf(ha->pci_dev, "%s: rx_tag alloc failed\n", __func__); return (ENOMEM); } bzero((void *)ha->rx_buf, (sizeof(qla_rx_buf_t) * NUM_RX_DESCRIPTORS)); bzero((void *)ha->rx_jbuf, (sizeof(qla_rx_buf_t) * NUM_RX_JUMBO_DESCRIPTORS)); for (i = 0; i < MAX_SDS_RINGS; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; ha->hw.sds[i].rxjb_free = NULL; ha->hw.sds[i].rxj_free = 0; } for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &ha->rx_buf[i]; ret = bus_dmamap_create(ha->rx_tag, BUS_DMA_NOWAIT, &rxb->map); if (ret) { device_printf(ha->pci_dev, "%s: dmamap[%d] failed\n", __func__, i); for (j = 0; j < i; j++) { bus_dmamap_destroy(ha->rx_tag, ha->rx_buf[j].map); } goto qla_alloc_rcv_bufs_failed; } } qla_init_hw_rcv_descriptors(ha, RDS_RING_INDEX_NORMAL); for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &ha->rx_buf[i]; rxb->handle = i; if (!(ret = qla_get_mbuf(ha, rxb, NULL, 0))) { /* * set the physical address in the corresponding * descriptor entry in the receive ring/queue for the * hba */ qla_set_hw_rcv_desc(ha, RDS_RING_INDEX_NORMAL, i, rxb->handle, rxb->paddr, (rxb->m_head)->m_pkthdr.len); } else { device_printf(ha->pci_dev, "%s: qla_get_mbuf [standard(%d)] failed\n", __func__, i); bus_dmamap_destroy(ha->rx_tag, rxb->map); goto qla_alloc_rcv_bufs_failed; } } for (i = 0; i < NUM_RX_JUMBO_DESCRIPTORS; i++) { rxb = &ha->rx_jbuf[i]; ret = bus_dmamap_create(ha->rx_tag, BUS_DMA_NOWAIT, &rxb->map); if (ret) { device_printf(ha->pci_dev, "%s: dmamap[%d] failed\n", __func__, i); for (j = 0; j < i; j++) { bus_dmamap_destroy(ha->rx_tag, ha->rx_jbuf[j].map); } goto qla_alloc_rcv_bufs_failed; } } qla_init_hw_rcv_descriptors(ha, RDS_RING_INDEX_JUMBO); for (i = 0; i < NUM_RX_JUMBO_DESCRIPTORS; i++) { rxb = &ha->rx_jbuf[i]; rxb->handle = i; if (!(ret = qla_get_mbuf(ha, rxb, NULL, 1))) { /* * set the physical address in the corresponding * descriptor entry in the receive ring/queue for the * hba */ qla_set_hw_rcv_desc(ha, RDS_RING_INDEX_JUMBO, i, rxb->handle, rxb->paddr, (rxb->m_head)->m_pkthdr.len); } else { device_printf(ha->pci_dev, "%s: qla_get_mbuf [jumbo(%d)] failed\n", __func__, i); bus_dmamap_destroy(ha->rx_tag, rxb->map); goto qla_alloc_rcv_bufs_failed; } } return (0); qla_alloc_rcv_bufs_failed: qla_free_rcv_bufs(ha); return (ret); } static void qla_free_rcv_bufs(qla_host_t *ha) { int i; qla_rx_buf_t *rxb; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &ha->rx_buf[i]; if (rxb->m_head != NULL) { bus_dmamap_unload(ha->rx_tag, rxb->map); bus_dmamap_destroy(ha->rx_tag, rxb->map); m_freem(rxb->m_head); rxb->m_head = NULL; } } for (i = 0; i < NUM_RX_JUMBO_DESCRIPTORS; i++) { rxb = &ha->rx_jbuf[i]; if (rxb->m_head != NULL) { bus_dmamap_unload(ha->rx_tag, rxb->map); bus_dmamap_destroy(ha->rx_tag, rxb->map); m_freem(rxb->m_head); rxb->m_head = NULL; } } if (ha->rx_tag != NULL) { bus_dma_tag_destroy(ha->rx_tag); ha->rx_tag = NULL; } bzero((void *)ha->rx_buf, (sizeof(qla_rx_buf_t) * NUM_RX_DESCRIPTORS)); bzero((void *)ha->rx_jbuf, (sizeof(qla_rx_buf_t) * NUM_RX_JUMBO_DESCRIPTORS)); for (i = 0; i < MAX_SDS_RINGS; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; ha->hw.sds[i].rxjb_free = NULL; ha->hw.sds[i].rxj_free = 0; } return; } int qla_get_mbuf(qla_host_t *ha, qla_rx_buf_t *rxb, struct mbuf *nmp, uint32_t jumbo) { register struct mbuf *mp = nmp; struct ifnet *ifp; int ret = 0; uint32_t offset; QL_DPRINT2((ha->pci_dev, "%s: jumbo(0x%x) enter\n", __func__, jumbo)); ifp = ha->ifp; if (mp == NULL) { if (!jumbo) { mp = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (mp == NULL) { ha->err_m_getcl++; ret = ENOBUFS; device_printf(ha->pci_dev, "%s: m_getcl failed\n", __func__); goto exit_qla_get_mbuf; } mp->m_len = mp->m_pkthdr.len = MCLBYTES; } else { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (mp == NULL) { ha->err_m_getjcl++; ret = ENOBUFS; device_printf(ha->pci_dev, "%s: m_getjcl failed\n", __func__); goto exit_qla_get_mbuf; } mp->m_len = mp->m_pkthdr.len = MJUM9BYTES; } } else { if (!jumbo) mp->m_len = mp->m_pkthdr.len = MCLBYTES; else mp->m_len = mp->m_pkthdr.len = MJUM9BYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } offset = (uint32_t)((unsigned long long)mp->m_data & 0x7ULL); if (offset) { offset = 8 - offset; m_adj(mp, offset); } /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ ret = bus_dmamap_load(ha->rx_tag, rxb->map, mtod(mp, void *), mp->m_len, qla_dmamap_callback, &rxb->paddr, BUS_DMA_NOWAIT); if (ret || !rxb->paddr) { m_free(mp); rxb->m_head = NULL; device_printf(ha->pci_dev, "%s: bus_dmamap_load failed\n", __func__); ret = -1; goto exit_qla_get_mbuf; } rxb->m_head = mp; bus_dmamap_sync(ha->rx_tag, rxb->map, BUS_DMASYNC_PREREAD); exit_qla_get_mbuf: QL_DPRINT2((ha->pci_dev, "%s: exit ret = 0x%08x\n", __func__, ret)); return (ret); } static void qla_tx_done(void *context, int pending) { qla_host_t *ha = context; qla_hw_tx_done(ha); qla_start(ha->ifp); } Index: head/sys/dev/qlxgbe/ql_os.c =================================================================== --- head/sys/dev/qlxgbe/ql_os.c (revision 263101) +++ head/sys/dev/qlxgbe/ql_os.c (revision 263102) @@ -1,1705 +1,1699 @@ /* * Copyright (c) 2013-2014 Qlogic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * and ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * File: ql_os.c * Author : David C Somayajulu, Qlogic Corporation, Aliso Viejo, CA 92656. */ #include __FBSDID("$FreeBSD$"); #include "ql_os.h" #include "ql_hw.h" #include "ql_def.h" #include "ql_inline.h" #include "ql_ver.h" #include "ql_glbl.h" #include "ql_dbg.h" #include /* * Some PCI Configuration Space Related Defines */ #ifndef PCI_VENDOR_QLOGIC #define PCI_VENDOR_QLOGIC 0x1077 #endif #ifndef PCI_PRODUCT_QLOGIC_ISP8030 #define PCI_PRODUCT_QLOGIC_ISP8030 0x8030 #endif #define PCI_QLOGIC_ISP8030 \ ((PCI_PRODUCT_QLOGIC_ISP8030 << 16) | PCI_VENDOR_QLOGIC) /* * static functions */ static int qla_alloc_parent_dma_tag(qla_host_t *ha); static void qla_free_parent_dma_tag(qla_host_t *ha); static int qla_alloc_xmt_bufs(qla_host_t *ha); static void qla_free_xmt_bufs(qla_host_t *ha); static int qla_alloc_rcv_bufs(qla_host_t *ha); static void qla_free_rcv_bufs(qla_host_t *ha); static void qla_clear_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb); static void qla_init_ifnet(device_t dev, qla_host_t *ha); static int qla_sysctl_get_stats(SYSCTL_HANDLER_ARGS); static int qla_sysctl_get_link_status(SYSCTL_HANDLER_ARGS); static void qla_release(qla_host_t *ha); static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void qla_stop(qla_host_t *ha); static int qla_send(qla_host_t *ha, struct mbuf **m_headp); static void qla_tx_done(void *context, int pending); static void qla_get_peer(qla_host_t *ha); static void qla_error_recovery(void *context, int pending); /* * Hooks to the Operating Systems */ static int qla_pci_probe (device_t); static int qla_pci_attach (device_t); static int qla_pci_detach (device_t); static void qla_init(void *arg); static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int qla_media_change(struct ifnet *ifp); static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); static void qla_start(struct ifnet *ifp); static device_method_t qla_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qla_pci_probe), DEVMETHOD(device_attach, qla_pci_attach), DEVMETHOD(device_detach, qla_pci_detach), { 0, 0 } }; static driver_t qla_pci_driver = { "ql", qla_pci_methods, sizeof (qla_host_t), }; static devclass_t qla83xx_devclass; DRIVER_MODULE(qla83xx, pci, qla_pci_driver, qla83xx_devclass, 0, 0); MODULE_DEPEND(qla83xx, pci, 1, 1, 1); MODULE_DEPEND(qla83xx, ether, 1, 1, 1); MALLOC_DEFINE(M_QLA83XXBUF, "qla83xxbuf", "Buffers for qla83xx driver"); #define QL_STD_REPLENISH_THRES 0 #define QL_JUMBO_REPLENISH_THRES 32 static char dev_str[64]; /* * Name: qla_pci_probe * Function: Validate the PCI device to be a QLA80XX device */ static int qla_pci_probe(device_t dev) { switch ((pci_get_device(dev) << 16) | (pci_get_vendor(dev))) { case PCI_QLOGIC_ISP8030: snprintf(dev_str, sizeof(dev_str), "%s v%d.%d.%d", "Qlogic ISP 83xx PCI CNA Adapter-Ethernet Function", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); device_set_desc(dev, dev_str); break; default: return (ENXIO); } if (bootverbose) printf("%s: %s\n ", __func__, dev_str); return (BUS_PROBE_DEFAULT); } static void qla_add_sysctls(qla_host_t *ha) { device_t dev = ha->pci_dev; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qla_sysctl_get_stats, "I", "Statistics"); SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fw_version", CTLFLAG_RD, &ha->fw_ver_str, 0, "firmware version"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "link_status", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qla_sysctl_get_link_status, "I", "Link Status"); ha->dbg_level = 0; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &ha->dbg_level, ha->dbg_level, "Debug Level"); ha->std_replenish = QL_STD_REPLENISH_THRES; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "std_replenish", CTLFLAG_RW, &ha->std_replenish, ha->std_replenish, "Threshold for Replenishing Standard Frames"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ipv4_lro", CTLFLAG_RD, &ha->ipv4_lro, "number of ipv4 lro completions"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ipv6_lro", CTLFLAG_RD, &ha->ipv6_lro, "number of ipv6 lro completions"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "tx_tso_frames", CTLFLAG_RD, &ha->tx_tso_frames, "number of Tx TSO Frames"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "hw_vlan_tx_frames", CTLFLAG_RD, &ha->hw_vlan_tx_frames, "number of Tx VLAN Frames"); return; } static void qla_watchdog(void *arg) { qla_host_t *ha = arg; qla_hw_t *hw; struct ifnet *ifp; uint32_t i; qla_hw_tx_cntxt_t *hw_tx_cntxt; hw = &ha->hw; ifp = ha->ifp; if (ha->flags.qla_watchdog_exit) { ha->qla_watchdog_exited = 1; return; } ha->qla_watchdog_exited = 0; if (!ha->flags.qla_watchdog_pause) { if (ql_hw_check_health(ha) || ha->qla_initiate_recovery || (ha->msg_from_peer == QL_PEER_MSG_RESET)) { ha->qla_watchdog_paused = 1; ha->flags.qla_watchdog_pause = 1; ha->qla_initiate_recovery = 0; ha->err_inject = 0; taskqueue_enqueue(ha->err_tq, &ha->err_task); } else { for (i = 0; i < ha->hw.num_tx_rings; i++) { hw_tx_cntxt = &hw->tx_cntxt[i]; if (qla_le32_to_host(*(hw_tx_cntxt->tx_cons)) != hw_tx_cntxt->txr_comp) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); break; } } if ((ifp->if_snd.ifq_head != NULL) && QL_RUNNING(ifp)) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); } ha->qla_watchdog_paused = 0; } } else { ha->qla_watchdog_paused = 1; } ha->watchdog_ticks = ha->watchdog_ticks++ % 1000; callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); } /* * Name: qla_pci_attach * Function: attaches the device to the operating system */ static int qla_pci_attach(device_t dev) { qla_host_t *ha = NULL; uint32_t rsrc_len; int i; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } memset(ha, 0, sizeof (qla_host_t)); if (pci_get_device(dev) != PCI_PRODUCT_QLOGIC_ISP8030) { device_printf(dev, "device is not ISP8030\n"); return (ENXIO); } ha->pci_func = pci_get_function(dev); ha->pci_dev = dev; pci_enable_busmaster(dev); ha->reg_rid = PCIR_BAR(0); ha->pci_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid, RF_ACTIVE); if (ha->pci_reg == NULL) { device_printf(dev, "unable to map any ports\n"); goto qla_pci_attach_err; } rsrc_len = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->reg_rid); mtx_init(&ha->hw_lock, "qla83xx_hw_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->tx_lock, "qla83xx_tx_lock", MTX_NETWORK_LOCK, MTX_DEF); qla_add_sysctls(ha); ql_hw_add_sysctls(ha); ha->flags.lock_init = 1; ha->reg_rid1 = PCIR_BAR(2); ha->pci_reg1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid1, RF_ACTIVE); ha->msix_count = pci_msix_count(dev); if (ha->msix_count < (ha->hw.num_sds_rings + 1)) { device_printf(dev, "%s: msix_count[%d] not enough\n", __func__, ha->msix_count); goto qla_pci_attach_err; } QL_DPRINT2(ha, (dev, "%s: ha %p pci_func 0x%x rsrc_count 0x%08x" " msix_count 0x%x pci_reg %p\n", __func__, ha, ha->pci_func, rsrc_len, ha->msix_count, ha->pci_reg)); ha->msix_count = ha->hw.num_sds_rings + 1; if (pci_alloc_msix(dev, &ha->msix_count)) { device_printf(dev, "%s: pci_alloc_msi[%d] failed\n", __func__, ha->msix_count); ha->msix_count = 0; goto qla_pci_attach_err; } ha->mbx_irq_rid = 1; ha->mbx_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->mbx_irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->mbx_irq == NULL) { device_printf(dev, "could not allocate mbx interrupt\n"); goto qla_pci_attach_err; } if (bus_setup_intr(dev, ha->mbx_irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, ql_mbx_isr, ha, &ha->mbx_handle)) { device_printf(dev, "could not setup mbx interrupt\n"); goto qla_pci_attach_err; } for (i = 0; i < ha->hw.num_sds_rings; i++) { ha->irq_vec[i].sds_idx = i; ha->irq_vec[i].ha = ha; ha->irq_vec[i].irq_rid = 2 + i; ha->irq_vec[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->irq_vec[i].irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->irq_vec[i].irq == NULL) { device_printf(dev, "could not allocate interrupt\n"); goto qla_pci_attach_err; } if (bus_setup_intr(dev, ha->irq_vec[i].irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, ql_isr, &ha->irq_vec[i], &ha->irq_vec[i].handle)) { device_printf(dev, "could not setup interrupt\n"); goto qla_pci_attach_err; } } printf("%s: mp__ncpus %d sds %d rds %d msi-x %d\n", __func__, mp_ncpus, ha->hw.num_sds_rings, ha->hw.num_rds_rings, ha->msix_count); /* initialize hardware */ if (ql_init_hw(ha)) { device_printf(dev, "%s: ql_init_hw failed\n", __func__); goto qla_pci_attach_err; } device_printf(dev, "%s: firmware[%d.%d.%d.%d]\n", __func__, ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); snprintf(ha->fw_ver_str, sizeof(ha->fw_ver_str), "%d.%d.%d.%d", ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); ql_read_mac_addr(ha); /* allocate parent dma tag */ if (qla_alloc_parent_dma_tag(ha)) { device_printf(dev, "%s: qla_alloc_parent_dma_tag failed\n", __func__); goto qla_pci_attach_err; } /* alloc all dma buffers */ if (ql_alloc_dma(ha)) { device_printf(dev, "%s: ql_alloc_dma failed\n", __func__); goto qla_pci_attach_err; } qla_get_peer(ha); /* create the o.s ethernet interface */ qla_init_ifnet(dev, ha); ha->flags.qla_watchdog_active = 1; ha->flags.qla_watchdog_pause = 1; TASK_INIT(&ha->tx_task, 0, qla_tx_done, ha); ha->tx_tq = taskqueue_create_fast("qla_txq", M_NOWAIT, taskqueue_thread_enqueue, &ha->tx_tq); taskqueue_start_threads(&ha->tx_tq, 1, PI_NET, "%s txq", device_get_nameunit(ha->pci_dev)); callout_init(&ha->tx_callout, TRUE); ha->flags.qla_callout_init = 1; /* create ioctl device interface */ if (ql_make_cdev(ha)) { device_printf(dev, "%s: ql_make_cdev failed\n", __func__); goto qla_pci_attach_err; } callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); TASK_INIT(&ha->err_task, 0, qla_error_recovery, ha); ha->err_tq = taskqueue_create_fast("qla_errq", M_NOWAIT, taskqueue_thread_enqueue, &ha->err_tq); taskqueue_start_threads(&ha->err_tq, 1, PI_NET, "%s errq", device_get_nameunit(ha->pci_dev)); QL_DPRINT2(ha, (dev, "%s: exit 0\n", __func__)); return (0); qla_pci_attach_err: qla_release(ha); QL_DPRINT2(ha, (dev, "%s: exit ENXIO\n", __func__)); return (ENXIO); } /* * Name: qla_pci_detach * Function: Unhooks the device from the operating system */ static int qla_pci_detach(device_t dev) { qla_host_t *ha = NULL; struct ifnet *ifp; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } ifp = ha->ifp; (void)QLA_LOCK(ha, __func__, 0); qla_stop(ha); QLA_UNLOCK(ha, __func__); qla_release(ha); QL_DPRINT2(ha, (dev, "%s: exit\n", __func__)); return (0); } /* * SYSCTL Related Callbacks */ static int qla_sysctl_get_stats(SYSCTL_HANDLER_ARGS) { int err, ret = 0; qla_host_t *ha; err = sysctl_handle_int(oidp, &ret, 0, req); if (err || !req->newptr) return (err); if (ret == 1) { ha = (qla_host_t *)arg1; ql_get_stats(ha); } return (err); } static int qla_sysctl_get_link_status(SYSCTL_HANDLER_ARGS) { int err, ret = 0; qla_host_t *ha; err = sysctl_handle_int(oidp, &ret, 0, req); if (err || !req->newptr) return (err); if (ret == 1) { ha = (qla_host_t *)arg1; ql_hw_link_status(ha); } return (err); } /* * Name: qla_release * Function: Releases the resources allocated for the device */ static void qla_release(qla_host_t *ha) { device_t dev; int i; dev = ha->pci_dev; if (ha->err_tq) { taskqueue_drain(ha->err_tq, &ha->err_task); taskqueue_free(ha->err_tq); } if (ha->tx_tq) { taskqueue_drain(ha->tx_tq, &ha->tx_task); taskqueue_free(ha->tx_tq); } ql_del_cdev(ha); if (ha->flags.qla_watchdog_active) { ha->flags.qla_watchdog_exit = 1; while (ha->qla_watchdog_exited == 0) qla_mdelay(__func__, 1); } if (ha->flags.qla_callout_init) callout_stop(&ha->tx_callout); if (ha->ifp != NULL) ether_ifdetach(ha->ifp); ql_free_dma(ha); qla_free_parent_dma_tag(ha); if (ha->mbx_handle) (void)bus_teardown_intr(dev, ha->mbx_irq, ha->mbx_handle); if (ha->mbx_irq) (void) bus_release_resource(dev, SYS_RES_IRQ, ha->mbx_irq_rid, ha->mbx_irq); for (i = 0; i < ha->hw.num_sds_rings; i++) { if (ha->irq_vec[i].handle) { (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); } if (ha->irq_vec[i].irq) { (void)bus_release_resource(dev, SYS_RES_IRQ, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq); } } if (ha->msix_count) pci_release_msi(dev); if (ha->flags.lock_init) { mtx_destroy(&ha->tx_lock); mtx_destroy(&ha->hw_lock); } if (ha->pci_reg) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid, ha->pci_reg); if (ha->pci_reg1) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid1, ha->pci_reg1); } /* * DMA Related Functions */ static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { *((bus_addr_t *)arg) = 0; if (error) { printf("%s: bus_dmamap_load failed (%d)\n", __func__, error); return; } *((bus_addr_t *)arg) = segs[0].ds_addr; return; } int ql_alloc_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { int ret = 0; device_t dev; bus_addr_t b_addr; dev = ha->pci_dev; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); ret = bus_dma_tag_create( ha->parent_tag,/* parent */ dma_buf->alignment, ((bus_size_t)(1ULL << 32)),/* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_buf->size, /* maxsize */ 1, /* nsegments */ dma_buf->size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &dma_buf->dma_tag); if (ret) { device_printf(dev, "%s: could not create dma tag\n", __func__); goto ql_alloc_dmabuf_exit; } ret = bus_dmamem_alloc(dma_buf->dma_tag, (void **)&dma_buf->dma_b, (BUS_DMA_ZERO | BUS_DMA_COHERENT | BUS_DMA_NOWAIT), &dma_buf->dma_map); if (ret) { bus_dma_tag_destroy(dma_buf->dma_tag); device_printf(dev, "%s: bus_dmamem_alloc failed\n", __func__); goto ql_alloc_dmabuf_exit; } ret = bus_dmamap_load(dma_buf->dma_tag, dma_buf->dma_map, dma_buf->dma_b, dma_buf->size, qla_dmamap_callback, &b_addr, BUS_DMA_NOWAIT); if (ret || !b_addr) { bus_dma_tag_destroy(dma_buf->dma_tag); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); ret = -1; goto ql_alloc_dmabuf_exit; } dma_buf->dma_addr = b_addr; ql_alloc_dmabuf_exit: QL_DPRINT2(ha, (dev, "%s: exit ret 0x%08x tag %p map %p b %p sz 0x%x\n", __func__, ret, (void *)dma_buf->dma_tag, (void *)dma_buf->dma_map, (void *)dma_buf->dma_b, dma_buf->size)); return ret; } void ql_free_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); bus_dma_tag_destroy(dma_buf->dma_tag); } static int qla_alloc_parent_dma_tag(qla_host_t *ha) { int ret; device_t dev; dev = ha->pci_dev; /* * Allocate parent DMA Tag */ ret = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1,((bus_size_t)(1ULL << 32)),/* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &ha->parent_tag); if (ret) { device_printf(dev, "%s: could not create parent dma tag\n", __func__); return (-1); } ha->flags.parent_tag = 1; return (0); } static void qla_free_parent_dma_tag(qla_host_t *ha) { if (ha->flags.parent_tag) { bus_dma_tag_destroy(ha->parent_tag); ha->flags.parent_tag = 0; } } /* * Name: qla_init_ifnet * Function: Creates the Network Device Interface and Registers it with the O.S */ static void qla_init_ifnet(device_t dev, qla_host_t *ha) { struct ifnet *ifp; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); ifp = ha->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: cannot if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); -#if __FreeBSD_version >= 1000000 - if_initbaudrate(ifp, IF_Gbps(10)); + ifp->if_baudrate = IF_Gbps(10); ifp->if_capabilities = IFCAP_LINKSTATE; -#else - ifp->if_mtu = ETHERMTU; - ifp->if_baudrate = (1 * 1000 * 1000 *1000); - -#endif /* #if __FreeBSD_version >= 1000000 */ ifp->if_init = qla_init; ifp->if_softc = ha; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = qla_ioctl; ifp->if_start = qla_start; IFQ_SET_MAXLEN(&ifp->if_snd, qla_get_ifq_snd_maxlen(ha)); ifp->if_snd.ifq_drv_maxlen = qla_get_ifq_snd_maxlen(ha); IFQ_SET_READY(&ifp->if_snd); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ether_ifattach(ifp, qla_get_mac_addr(ha)); ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTSO; ifp->if_capenable = ifp->if_capabilities; ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifmedia_init(&ha->media, IFM_IMASK, qla_media_change, qla_media_status); ifmedia_add(&ha->media, (IFM_ETHER | qla_get_optics(ha) | IFM_FDX), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&ha->media, (IFM_ETHER | IFM_AUTO)); QL_DPRINT2(ha, (dev, "%s: exit\n", __func__)); return; } static void qla_init_locked(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; qla_stop(ha); if (qla_alloc_xmt_bufs(ha) != 0) return; if (qla_alloc_rcv_bufs(ha) != 0) return; bcopy(IF_LLADDR(ha->ifp), ha->hw.mac_addr, ETHER_ADDR_LEN); ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; ha->flags.stop_rcv = 0; if (ql_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ha->flags.qla_watchdog_pause = 0; ha->hw_vlan_tx_frames = 0; ha->tx_tso_frames = 0; } return; } static void qla_init(void *arg) { qla_host_t *ha; ha = (qla_host_t *)arg; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); (void)QLA_LOCK(ha, __func__, 0); qla_init_locked(ha); QLA_UNLOCK(ha, __func__); QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); } static int qla_set_multi(qla_host_t *ha, uint32_t add_multi) { uint8_t mta[Q8_MAX_NUM_MULTICAST_ADDRS * Q8_MAC_ADDR_LEN]; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = ha->ifp; int ret = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == Q8_MAX_NUM_MULTICAST_ADDRS) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * Q8_MAC_ADDR_LEN], Q8_MAC_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); if (QLA_LOCK(ha, __func__, 1) == 0) { ret = ql_hw_set_multi(ha, mta, mcnt, add_multi); QLA_UNLOCK(ha, __func__); } return (ret); } static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int ret = 0; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; switch (cmd) { case SIOCSIFADDR: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFADDR (0x%lx)\n", __func__, cmd)); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { (void)QLA_LOCK(ha, __func__, 0); qla_init_locked(ha); QLA_UNLOCK(ha, __func__); } QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFADDR (0x%lx) ipv4 [0x%08x]\n", __func__, cmd, ntohl(IA_SIN(ifa)->sin_addr.s_addr))); arp_ifinit(ifp, ifa); } else { ether_ioctl(ifp, cmd, data); } break; case SIOCSIFMTU: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFMTU (0x%lx)\n", __func__, cmd)); if (ifr->ifr_mtu > QLA_MAX_MTU) { ret = EINVAL; } else { (void) QLA_LOCK(ha, __func__, 0); ifp->if_mtu = ifr->ifr_mtu; ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { ret = ql_set_max_mtu(ha, ha->max_frame_size, ha->hw.rcv_cntxt_id); } if (ifp->if_mtu > ETHERMTU) ha->std_replenish = QL_JUMBO_REPLENISH_THRES; else ha->std_replenish = QL_STD_REPLENISH_THRES; QLA_UNLOCK(ha, __func__); if (ret) ret = EINVAL; } break; case SIOCSIFFLAGS: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFFLAGS (0x%lx)\n", __func__, cmd)); (void)QLA_LOCK(ha, __func__, 0); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ ha->if_flags) & IFF_PROMISC) { ret = ql_set_promisc(ha); } else if ((ifp->if_flags ^ ha->if_flags) & IFF_ALLMULTI) { ret = ql_set_allmulti(ha); } } else { qla_init_locked(ha); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ret = ql_set_max_mtu(ha, ha->max_frame_size, ha->hw.rcv_cntxt_id); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) qla_stop(ha); ha->if_flags = ifp->if_flags; } QLA_UNLOCK(ha, __func__); break; case SIOCADDMULTI: QL_DPRINT4(ha, (ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCADDMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (qla_set_multi(ha, 1)) ret = EINVAL; } break; case SIOCDELMULTI: QL_DPRINT4(ha, (ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCDELMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (qla_set_multi(ha, 0)) ret = EINVAL; } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", __func__, cmd)); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFCAP (0x%lx)\n", __func__, cmd)); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) qla_init(ha); VLAN_CAPABILITIES(ifp); break; } default: QL_DPRINT4(ha, (ha->pci_dev, "%s: default (0x%lx)\n", __func__, cmd)); ret = ether_ioctl(ifp, cmd, data); break; } return (ret); } static int qla_media_change(struct ifnet *ifp) { qla_host_t *ha; struct ifmedia *ifm; int ret = 0; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); ifm = &ha->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) ret = EINVAL; QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; ql_update_link_state(ha); if (ha->hw.link_up) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= (IFM_FDX | qla_get_optics(ha)); } QL_DPRINT2(ha, (ha->pci_dev, "%s: exit (%s)\n", __func__,\ (ha->hw.link_up ? "link_up" : "link_down"))); return; } static void qla_start(struct ifnet *ifp) { struct mbuf *m_head; qla_host_t *ha = (qla_host_t *)ifp->if_softc; QL_DPRINT8(ha, (ha->pci_dev, "%s: enter\n", __func__)); if (!mtx_trylock(&ha->tx_lock)) { QL_DPRINT8(ha, (ha->pci_dev, "%s: mtx_trylock(&ha->tx_lock) failed\n", __func__)); return; } if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { QL_DPRINT8(ha, (ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); QLA_TX_UNLOCK(ha); return; } if (!ha->watchdog_ticks) ql_update_link_state(ha); if (!ha->hw.link_up) { QL_DPRINT8(ha, (ha->pci_dev, "%s: link down\n", __func__)); QLA_TX_UNLOCK(ha); return; } while (ifp->if_snd.ifq_head != NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) { QL_DPRINT8(ha, (ha->pci_dev, "%s: m_head == NULL\n", __func__)); break; } if (qla_send(ha, &m_head)) { if (m_head == NULL) break; QL_DPRINT8(ha, (ha->pci_dev, "%s: PREPEND\n", __func__)); ifp->if_drv_flags |= IFF_DRV_OACTIVE; IF_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } QLA_TX_UNLOCK(ha); QL_DPRINT8(ha, (ha->pci_dev, "%s: exit\n", __func__)); return; } static int qla_send(qla_host_t *ha, struct mbuf **m_headp) { bus_dma_segment_t segs[QLA_MAX_SEGMENTS]; bus_dmamap_t map; int nsegs; int ret = -1; uint32_t tx_idx; struct mbuf *m_head = *m_headp; uint32_t txr_idx = ha->txr_idx; QL_DPRINT8(ha, (ha->pci_dev, "%s: enter\n", __func__)); if (m_head->m_flags & M_FLOWID) txr_idx = m_head->m_pkthdr.flowid & (ha->hw.num_tx_rings - 1); tx_idx = ha->hw.tx_cntxt[txr_idx].txr_next; map = ha->tx_ring[txr_idx].tx_buf[tx_idx].map; ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (ret == EFBIG) { struct mbuf *m; QL_DPRINT8(ha, (ha->pci_dev, "%s: EFBIG [%d]\n", __func__, m_head->m_pkthdr.len)); m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { ha->err_tx_defrag++; m_freem(m_head); *m_headp = NULL; device_printf(ha->pci_dev, "%s: m_defrag() = NULL [%d]\n", __func__, ret); return (ENOBUFS); } m_head = m; *m_headp = m_head; if ((ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT))) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed0[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } } else if (ret) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed1[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } QL_ASSERT(ha, (nsegs != 0), ("qla_send: empty packet")); bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_PREWRITE); if (!(ret = ql_hw_send(ha, segs, nsegs, tx_idx, m_head, txr_idx))) { ha->tx_ring[txr_idx].count++; ha->tx_ring[txr_idx].tx_buf[tx_idx].m_head = m_head; } else { if (ret == EINVAL) { if (m_head) m_freem(m_head); *m_headp = NULL; } } QL_DPRINT8(ha, (ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qla_stop(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; device_t dev; dev = ha->pci_dev; ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); ha->flags.qla_watchdog_pause = 1; while (!ha->qla_watchdog_paused) qla_mdelay(__func__, 1); ha->flags.stop_rcv = 1; ql_hw_stop_rcv(ha); ql_del_hw_if(ha); qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); return; } /* * Buffer Management Functions for Transmit and Receive Rings */ static int qla_alloc_xmt_bufs(qla_host_t *ha) { int ret = 0; uint32_t i, j; qla_tx_buf_t *txb; if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ QLA_MAX_TSO_FRAME_SIZE, /* maxsize */ QLA_MAX_SEGMENTS, /* nsegments */ PAGE_SIZE, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->tx_tag)) { device_printf(ha->pci_dev, "%s: tx_tag alloc failed\n", __func__); return (ENOMEM); } for (i = 0; i < ha->hw.num_tx_rings; i++) { bzero((void *)ha->tx_ring[i].tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); } for (j = 0; j < ha->hw.num_tx_rings; j++) { for (i = 0; i < NUM_TX_DESCRIPTORS; i++) { txb = &ha->tx_ring[j].tx_buf[i]; if ((ret = bus_dmamap_create(ha->tx_tag, BUS_DMA_NOWAIT, &txb->map))) { ha->err_tx_dmamap_create++; device_printf(ha->pci_dev, "%s: bus_dmamap_create failed[%d]\n", __func__, ret); qla_free_xmt_bufs(ha); return (ret); } } } return 0; } /* * Release mbuf after it sent on the wire */ static void qla_clear_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb) { QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); if (txb->m_head && txb->map) { bus_dmamap_unload(ha->tx_tag, txb->map); m_freem(txb->m_head); txb->m_head = NULL; } if (txb->map) bus_dmamap_destroy(ha->tx_tag, txb->map); QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); } static void qla_free_xmt_bufs(qla_host_t *ha) { int i, j; for (j = 0; j < ha->hw.num_tx_rings; j++) { for (i = 0; i < NUM_TX_DESCRIPTORS; i++) qla_clear_tx_buf(ha, &ha->tx_ring[j].tx_buf[i]); } if (ha->tx_tag != NULL) { bus_dma_tag_destroy(ha->tx_tag); ha->tx_tag = NULL; } for (i = 0; i < ha->hw.num_tx_rings; i++) { bzero((void *)ha->tx_ring[i].tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); } return; } static int qla_alloc_rcv_std(qla_host_t *ha) { int i, j, k, r, ret = 0; qla_rx_buf_t *rxb; qla_rx_ring_t *rx_ring; for (r = 0; r < ha->hw.num_rds_rings; r++) { rx_ring = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rx_ring->rx_buf[i]; ret = bus_dmamap_create(ha->rx_tag, BUS_DMA_NOWAIT, &rxb->map); if (ret) { device_printf(ha->pci_dev, "%s: dmamap[%d, %d] failed\n", __func__, r, i); for (k = 0; k < r; k++) { for (j = 0; j < NUM_RX_DESCRIPTORS; j++) { rxb = &ha->rx_ring[k].rx_buf[j]; bus_dmamap_destroy(ha->rx_tag, rxb->map); } } for (j = 0; j < i; j++) { bus_dmamap_destroy(ha->rx_tag, rx_ring->rx_buf[j].map); } goto qla_alloc_rcv_std_err; } } } qla_init_hw_rcv_descriptors(ha); for (r = 0; r < ha->hw.num_rds_rings; r++) { rx_ring = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rx_ring->rx_buf[i]; rxb->handle = i; if (!(ret = ql_get_mbuf(ha, rxb, NULL))) { /* * set the physical address in the * corresponding descriptor entry in the * receive ring/queue for the hba */ qla_set_hw_rcv_desc(ha, r, i, rxb->handle, rxb->paddr, (rxb->m_head)->m_pkthdr.len); } else { device_printf(ha->pci_dev, "%s: ql_get_mbuf [%d, %d] failed\n", __func__, r, i); bus_dmamap_destroy(ha->rx_tag, rxb->map); goto qla_alloc_rcv_std_err; } } } return 0; qla_alloc_rcv_std_err: return (-1); } static void qla_free_rcv_std(qla_host_t *ha) { int i, r; qla_rx_buf_t *rxb; for (r = 0; r < ha->hw.num_rds_rings; r++) { for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &ha->rx_ring[r].rx_buf[i]; if (rxb->m_head != NULL) { bus_dmamap_unload(ha->rx_tag, rxb->map); bus_dmamap_destroy(ha->rx_tag, rxb->map); m_freem(rxb->m_head); rxb->m_head = NULL; } } } return; } static int qla_alloc_rcv_bufs(qla_host_t *ha) { int i, ret = 0; if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->rx_tag)) { device_printf(ha->pci_dev, "%s: rx_tag alloc failed\n", __func__); return (ENOMEM); } bzero((void *)ha->rx_ring, (sizeof(qla_rx_ring_t) * MAX_RDS_RINGS)); for (i = 0; i < ha->hw.num_sds_rings; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; } ret = qla_alloc_rcv_std(ha); return (ret); } static void qla_free_rcv_bufs(qla_host_t *ha) { int i; qla_free_rcv_std(ha); if (ha->rx_tag != NULL) { bus_dma_tag_destroy(ha->rx_tag); ha->rx_tag = NULL; } bzero((void *)ha->rx_ring, (sizeof(qla_rx_ring_t) * MAX_RDS_RINGS)); for (i = 0; i < ha->hw.num_sds_rings; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; } return; } int ql_get_mbuf(qla_host_t *ha, qla_rx_buf_t *rxb, struct mbuf *nmp) { register struct mbuf *mp = nmp; struct ifnet *ifp; int ret = 0; uint32_t offset; bus_dma_segment_t segs[1]; int nsegs; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); ifp = ha->ifp; if (mp == NULL) { mp = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (mp == NULL) { ha->err_m_getcl++; ret = ENOBUFS; device_printf(ha->pci_dev, "%s: m_getcl failed\n", __func__); goto exit_ql_get_mbuf; } mp->m_len = mp->m_pkthdr.len = MCLBYTES; } else { mp->m_len = mp->m_pkthdr.len = MCLBYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } offset = (uint32_t)((unsigned long long)mp->m_data & 0x7ULL); if (offset) { offset = 8 - offset; m_adj(mp, offset); } /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ ret = bus_dmamap_load_mbuf_sg(ha->rx_tag, rxb->map, mp, segs, &nsegs, BUS_DMA_NOWAIT); rxb->paddr = segs[0].ds_addr; if (ret || !rxb->paddr || (nsegs != 1)) { m_free(mp); rxb->m_head = NULL; device_printf(ha->pci_dev, "%s: bus_dmamap_load failed[%d, 0x%016llx, %d]\n", __func__, ret, (long long unsigned int)rxb->paddr, nsegs); ret = -1; goto exit_ql_get_mbuf; } rxb->m_head = mp; bus_dmamap_sync(ha->rx_tag, rxb->map, BUS_DMASYNC_PREREAD); exit_ql_get_mbuf: QL_DPRINT2(ha, (ha->pci_dev, "%s: exit ret = 0x%08x\n", __func__, ret)); return (ret); } static void qla_tx_done(void *context, int pending) { qla_host_t *ha = context; struct ifnet *ifp; ifp = ha->ifp; if (!ifp) return; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { QL_DPRINT8(ha, (ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); return; } ql_hw_tx_done(ha); qla_start(ha->ifp); } static void qla_get_peer(qla_host_t *ha) { device_t *peers; int count, i, slot; int my_slot = pci_get_slot(ha->pci_dev); if (device_get_children(device_get_parent(ha->pci_dev), &peers, &count)) return; for (i = 0; i < count; i++) { slot = pci_get_slot(peers[i]); if ((slot >= 0) && (slot == my_slot) && (pci_get_device(peers[i]) == pci_get_device(ha->pci_dev))) { if (ha->pci_dev != peers[i]) ha->peer_dev = peers[i]; } } } static void qla_send_msg_to_peer(qla_host_t *ha, uint32_t msg_to_peer) { qla_host_t *ha_peer; if (ha->peer_dev) { if ((ha_peer = device_get_softc(ha->peer_dev)) != NULL) { ha_peer->msg_from_peer = msg_to_peer; } } } static void qla_error_recovery(void *context, int pending) { qla_host_t *ha = context; uint32_t msecs_100 = 100; struct ifnet *ifp = ha->ifp; (void)QLA_LOCK(ha, __func__, 0); ha->flags.stop_rcv = 1; ql_hw_stop_rcv(ha); ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); QLA_UNLOCK(ha, __func__); if ((ha->pci_func & 0x1) == 0) { if (!ha->msg_from_peer) { qla_send_msg_to_peer(ha, QL_PEER_MSG_RESET); while ((ha->msg_from_peer != QL_PEER_MSG_ACK) && msecs_100--) qla_mdelay(__func__, 100); } ha->msg_from_peer = 0; ql_minidump(ha); (void) ql_init_hw(ha); qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); qla_send_msg_to_peer(ha, QL_PEER_MSG_ACK); } else { if (ha->msg_from_peer == QL_PEER_MSG_RESET) { ha->msg_from_peer = 0; qla_send_msg_to_peer(ha, QL_PEER_MSG_ACK); } else { qla_send_msg_to_peer(ha, QL_PEER_MSG_RESET); } while ((ha->msg_from_peer != QL_PEER_MSG_ACK) && msecs_100--) qla_mdelay(__func__, 100); ha->msg_from_peer = 0; (void) ql_init_hw(ha); qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); } (void)QLA_LOCK(ha, __func__, 0); if (qla_alloc_xmt_bufs(ha) != 0) { QLA_UNLOCK(ha, __func__); return; } if (qla_alloc_rcv_bufs(ha) != 0) { QLA_UNLOCK(ha, __func__); return; } ha->flags.stop_rcv = 0; if (ql_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ha->flags.qla_watchdog_pause = 0; } QLA_UNLOCK(ha, __func__); } Index: head/sys/dev/qlxge/qls_os.c =================================================================== --- head/sys/dev/qlxge/qls_os.c (revision 263101) +++ head/sys/dev/qlxge/qls_os.c (revision 263102) @@ -1,1536 +1,1530 @@ /* * Copyright (c) 2013-2014 Qlogic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * and ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * File: qls_os.c * Author : David C Somayajulu, Qlogic Corporation, Aliso Viejo, CA 92656. */ #include __FBSDID("$FreeBSD$"); #include "qls_os.h" #include "qls_hw.h" #include "qls_def.h" #include "qls_inline.h" #include "qls_ver.h" #include "qls_glbl.h" #include "qls_dbg.h" #include /* * Some PCI Configuration Space Related Defines */ #ifndef PCI_VENDOR_QLOGIC #define PCI_VENDOR_QLOGIC 0x1077 #endif #ifndef PCI_DEVICE_QLOGIC_8000 #define PCI_DEVICE_QLOGIC_8000 0x8000 #endif #define PCI_QLOGIC_DEV8000 \ ((PCI_DEVICE_QLOGIC_8000 << 16) | PCI_VENDOR_QLOGIC) /* * static functions */ static int qls_alloc_parent_dma_tag(qla_host_t *ha); static void qls_free_parent_dma_tag(qla_host_t *ha); static void qls_flush_xmt_bufs(qla_host_t *ha); static int qls_alloc_rcv_bufs(qla_host_t *ha); static void qls_free_rcv_bufs(qla_host_t *ha); static void qls_init_ifnet(device_t dev, qla_host_t *ha); static void qls_release(qla_host_t *ha); static void qls_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void qls_stop(qla_host_t *ha); static int qls_send(qla_host_t *ha, struct mbuf **m_headp); static void qls_tx_done(void *context, int pending); static int qls_config_lro(qla_host_t *ha); static void qls_free_lro(qla_host_t *ha); static void qls_error_recovery(void *context, int pending); /* * Hooks to the Operating Systems */ static int qls_pci_probe (device_t); static int qls_pci_attach (device_t); static int qls_pci_detach (device_t); static void qls_start(struct ifnet *ifp); static void qls_init(void *arg); static int qls_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int qls_media_change(struct ifnet *ifp); static void qls_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); static device_method_t qla_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qls_pci_probe), DEVMETHOD(device_attach, qls_pci_attach), DEVMETHOD(device_detach, qls_pci_detach), { 0, 0 } }; static driver_t qla_pci_driver = { "ql", qla_pci_methods, sizeof (qla_host_t), }; static devclass_t qla8000_devclass; DRIVER_MODULE(qla8000, pci, qla_pci_driver, qla8000_devclass, 0, 0); MODULE_DEPEND(qla8000, pci, 1, 1, 1); MODULE_DEPEND(qla8000, ether, 1, 1, 1); MALLOC_DEFINE(M_QLA8000BUF, "qla8000buf", "Buffers for qla8000 driver"); static char dev_str[64]; static char ver_str[64]; /* * Name: qls_pci_probe * Function: Validate the PCI device to be a QLA80XX device */ static int qls_pci_probe(device_t dev) { switch ((pci_get_device(dev) << 16) | (pci_get_vendor(dev))) { case PCI_QLOGIC_DEV8000: snprintf(dev_str, sizeof(dev_str), "%s v%d.%d.%d", "Qlogic ISP 8000 PCI CNA Adapter-Ethernet Function", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); snprintf(ver_str, sizeof(ver_str), "v%d.%d.%d", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); device_set_desc(dev, dev_str); break; default: return (ENXIO); } if (bootverbose) printf("%s: %s\n ", __func__, dev_str); return (BUS_PROBE_DEFAULT); } static int qls_sysctl_get_drvr_stats(SYSCTL_HANDLER_ARGS) { int err = 0, ret; qla_host_t *ha; uint32_t i; err = sysctl_handle_int(oidp, &ret, 0, req); if (err || !req->newptr) return (err); if (ret == 1) { ha = (qla_host_t *)arg1; for (i = 0; i < ha->num_tx_rings; i++) { device_printf(ha->pci_dev, "%s: tx_ring[%d].tx_frames= %p\n", __func__, i, (void *)ha->tx_ring[i].tx_frames); device_printf(ha->pci_dev, "%s: tx_ring[%d].tx_tso_frames= %p\n", __func__, i, (void *)ha->tx_ring[i].tx_tso_frames); device_printf(ha->pci_dev, "%s: tx_ring[%d].tx_vlan_frames= %p\n", __func__, i, (void *)ha->tx_ring[i].tx_vlan_frames); device_printf(ha->pci_dev, "%s: tx_ring[%d].txr_free= 0x%08x\n", __func__, i, ha->tx_ring[i].txr_free); device_printf(ha->pci_dev, "%s: tx_ring[%d].txr_next= 0x%08x\n", __func__, i, ha->tx_ring[i].txr_next); device_printf(ha->pci_dev, "%s: tx_ring[%d].txr_done= 0x%08x\n", __func__, i, ha->tx_ring[i].txr_done); device_printf(ha->pci_dev, "%s: tx_ring[%d].txr_cons_idx= 0x%08x\n", __func__, i, *(ha->tx_ring[i].txr_cons_vaddr)); } for (i = 0; i < ha->num_rx_rings; i++) { device_printf(ha->pci_dev, "%s: rx_ring[%d].rx_int= %p\n", __func__, i, (void *)ha->rx_ring[i].rx_int); device_printf(ha->pci_dev, "%s: rx_ring[%d].rss_int= %p\n", __func__, i, (void *)ha->rx_ring[i].rss_int); device_printf(ha->pci_dev, "%s: rx_ring[%d].lbq_next= 0x%08x\n", __func__, i, ha->rx_ring[i].lbq_next); device_printf(ha->pci_dev, "%s: rx_ring[%d].lbq_free= 0x%08x\n", __func__, i, ha->rx_ring[i].lbq_free); device_printf(ha->pci_dev, "%s: rx_ring[%d].lbq_in= 0x%08x\n", __func__, i, ha->rx_ring[i].lbq_in); device_printf(ha->pci_dev, "%s: rx_ring[%d].sbq_next= 0x%08x\n", __func__, i, ha->rx_ring[i].sbq_next); device_printf(ha->pci_dev, "%s: rx_ring[%d].sbq_free= 0x%08x\n", __func__, i, ha->rx_ring[i].sbq_free); device_printf(ha->pci_dev, "%s: rx_ring[%d].sbq_in= 0x%08x\n", __func__, i, ha->rx_ring[i].sbq_in); } device_printf(ha->pci_dev, "%s: err_m_getcl = 0x%08x\n", __func__, ha->err_m_getcl); device_printf(ha->pci_dev, "%s: err_m_getjcl = 0x%08x\n", __func__, ha->err_m_getjcl); device_printf(ha->pci_dev, "%s: err_tx_dmamap_create = 0x%08x\n", __func__, ha->err_tx_dmamap_create); device_printf(ha->pci_dev, "%s: err_tx_dmamap_load = 0x%08x\n", __func__, ha->err_tx_dmamap_load); device_printf(ha->pci_dev, "%s: err_tx_defrag = 0x%08x\n", __func__, ha->err_tx_defrag); } return (err); } static void qls_add_sysctls(qla_host_t *ha) { device_t dev = ha->pci_dev; SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "version", CTLFLAG_RD, ver_str, 0, "Driver Version"); qls_dbg_level = 0; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &qls_dbg_level, qls_dbg_level, "Debug Level"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "drvr_stats", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qls_sysctl_get_drvr_stats, "I", "Driver Maintained Statistics"); return; } static void qls_watchdog(void *arg) { qla_host_t *ha = arg; struct ifnet *ifp; ifp = ha->ifp; if (ha->flags.qla_watchdog_exit) { ha->qla_watchdog_exited = 1; return; } ha->qla_watchdog_exited = 0; if (!ha->flags.qla_watchdog_pause) { if (ha->qla_initiate_recovery) { ha->qla_watchdog_paused = 1; ha->qla_initiate_recovery = 0; ha->err_inject = 0; taskqueue_enqueue(ha->err_tq, &ha->err_task); } else if ((ifp->if_snd.ifq_head != NULL) && QL_RUNNING(ifp)) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); } ha->qla_watchdog_paused = 0; } else { ha->qla_watchdog_paused = 1; } ha->watchdog_ticks = ha->watchdog_ticks++ % 1000; callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qls_watchdog, ha); return; } /* * Name: qls_pci_attach * Function: attaches the device to the operating system */ static int qls_pci_attach(device_t dev) { qla_host_t *ha = NULL; int i; QL_DPRINT2((dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } memset(ha, 0, sizeof (qla_host_t)); if (pci_get_device(dev) != PCI_DEVICE_QLOGIC_8000) { device_printf(dev, "device is not QLE8000\n"); return (ENXIO); } ha->pci_func = pci_get_function(dev); ha->pci_dev = dev; pci_enable_busmaster(dev); ha->reg_rid = PCIR_BAR(1); ha->pci_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid, RF_ACTIVE); if (ha->pci_reg == NULL) { device_printf(dev, "unable to map any ports\n"); goto qls_pci_attach_err; } ha->reg_rid1 = PCIR_BAR(3); ha->pci_reg1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid1, RF_ACTIVE); if (ha->pci_reg1 == NULL) { device_printf(dev, "unable to map any ports\n"); goto qls_pci_attach_err; } mtx_init(&ha->hw_lock, "qla80xx_hw_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->tx_lock, "qla80xx_tx_lock", MTX_NETWORK_LOCK, MTX_DEF); qls_add_sysctls(ha); qls_hw_add_sysctls(ha); ha->flags.lock_init = 1; ha->msix_count = pci_msix_count(dev); if (ha->msix_count < qls_get_msix_count(ha)) { device_printf(dev, "%s: msix_count[%d] not enough\n", __func__, ha->msix_count); goto qls_pci_attach_err; } ha->msix_count = qls_get_msix_count(ha); device_printf(dev, "\n%s: ha %p pci_func 0x%x msix_count 0x%x" " pci_reg %p pci_reg1 %p\n", __func__, ha, ha->pci_func, ha->msix_count, ha->pci_reg, ha->pci_reg1); if (pci_alloc_msix(dev, &ha->msix_count)) { device_printf(dev, "%s: pci_alloc_msi[%d] failed\n", __func__, ha->msix_count); ha->msix_count = 0; goto qls_pci_attach_err; } for (i = 0; i < ha->num_rx_rings; i++) { ha->irq_vec[i].cq_idx = i; ha->irq_vec[i].ha = ha; ha->irq_vec[i].irq_rid = 1 + i; ha->irq_vec[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->irq_vec[i].irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->irq_vec[i].irq == NULL) { device_printf(dev, "could not allocate interrupt\n"); goto qls_pci_attach_err; } if (bus_setup_intr(dev, ha->irq_vec[i].irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, qls_isr, &ha->irq_vec[i], &ha->irq_vec[i].handle)) { device_printf(dev, "could not setup interrupt\n"); goto qls_pci_attach_err; } } qls_rd_nic_params(ha); /* allocate parent dma tag */ if (qls_alloc_parent_dma_tag(ha)) { device_printf(dev, "%s: qls_alloc_parent_dma_tag failed\n", __func__); goto qls_pci_attach_err; } /* alloc all dma buffers */ if (qls_alloc_dma(ha)) { device_printf(dev, "%s: qls_alloc_dma failed\n", __func__); goto qls_pci_attach_err; } /* create the o.s ethernet interface */ qls_init_ifnet(dev, ha); ha->flags.qla_watchdog_active = 1; ha->flags.qla_watchdog_pause = 1; TASK_INIT(&ha->tx_task, 0, qls_tx_done, ha); ha->tx_tq = taskqueue_create_fast("qla_txq", M_NOWAIT, taskqueue_thread_enqueue, &ha->tx_tq); taskqueue_start_threads(&ha->tx_tq, 1, PI_NET, "%s txq", device_get_nameunit(ha->pci_dev)); callout_init(&ha->tx_callout, TRUE); ha->flags.qla_callout_init = 1; /* create ioctl device interface */ if (qls_make_cdev(ha)) { device_printf(dev, "%s: qls_make_cdev failed\n", __func__); goto qls_pci_attach_err; } callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qls_watchdog, ha); TASK_INIT(&ha->err_task, 0, qls_error_recovery, ha); ha->err_tq = taskqueue_create_fast("qla_errq", M_NOWAIT, taskqueue_thread_enqueue, &ha->err_tq); taskqueue_start_threads(&ha->err_tq, 1, PI_NET, "%s errq", device_get_nameunit(ha->pci_dev)); QL_DPRINT2((dev, "%s: exit 0\n", __func__)); return (0); qls_pci_attach_err: qls_release(ha); QL_DPRINT2((dev, "%s: exit ENXIO\n", __func__)); return (ENXIO); } /* * Name: qls_pci_detach * Function: Unhooks the device from the operating system */ static int qls_pci_detach(device_t dev) { qla_host_t *ha = NULL; struct ifnet *ifp; QL_DPRINT2((dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } ifp = ha->ifp; (void)QLA_LOCK(ha, __func__, 0); qls_stop(ha); QLA_UNLOCK(ha, __func__); qls_release(ha); QL_DPRINT2((dev, "%s: exit\n", __func__)); return (0); } /* * Name: qls_release * Function: Releases the resources allocated for the device */ static void qls_release(qla_host_t *ha) { device_t dev; int i; dev = ha->pci_dev; if (ha->err_tq) { taskqueue_drain(ha->err_tq, &ha->err_task); taskqueue_free(ha->err_tq); } if (ha->tx_tq) { taskqueue_drain(ha->tx_tq, &ha->tx_task); taskqueue_free(ha->tx_tq); } qls_del_cdev(ha); if (ha->flags.qla_watchdog_active) { ha->flags.qla_watchdog_exit = 1; while (ha->qla_watchdog_exited == 0) qls_mdelay(__func__, 1); } if (ha->flags.qla_callout_init) callout_stop(&ha->tx_callout); if (ha->ifp != NULL) ether_ifdetach(ha->ifp); qls_free_dma(ha); qls_free_parent_dma_tag(ha); for (i = 0; i < ha->num_rx_rings; i++) { if (ha->irq_vec[i].handle) { (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); } if (ha->irq_vec[i].irq) { (void)bus_release_resource(dev, SYS_RES_IRQ, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq); } } if (ha->msix_count) pci_release_msi(dev); if (ha->flags.lock_init) { mtx_destroy(&ha->tx_lock); mtx_destroy(&ha->hw_lock); } if (ha->pci_reg) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid, ha->pci_reg); if (ha->pci_reg1) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid1, ha->pci_reg1); } /* * DMA Related Functions */ static void qls_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { *((bus_addr_t *)arg) = 0; if (error) { printf("%s: bus_dmamap_load failed (%d)\n", __func__, error); return; } *((bus_addr_t *)arg) = segs[0].ds_addr; return; } int qls_alloc_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { int ret = 0; device_t dev; bus_addr_t b_addr; dev = ha->pci_dev; QL_DPRINT2((dev, "%s: enter\n", __func__)); ret = bus_dma_tag_create( ha->parent_tag,/* parent */ dma_buf->alignment, ((bus_size_t)(1ULL << 32)),/* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_buf->size, /* maxsize */ 1, /* nsegments */ dma_buf->size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &dma_buf->dma_tag); if (ret) { device_printf(dev, "%s: could not create dma tag\n", __func__); goto qls_alloc_dmabuf_exit; } ret = bus_dmamem_alloc(dma_buf->dma_tag, (void **)&dma_buf->dma_b, (BUS_DMA_ZERO | BUS_DMA_COHERENT | BUS_DMA_NOWAIT), &dma_buf->dma_map); if (ret) { bus_dma_tag_destroy(dma_buf->dma_tag); device_printf(dev, "%s: bus_dmamem_alloc failed\n", __func__); goto qls_alloc_dmabuf_exit; } ret = bus_dmamap_load(dma_buf->dma_tag, dma_buf->dma_map, dma_buf->dma_b, dma_buf->size, qls_dmamap_callback, &b_addr, BUS_DMA_NOWAIT); if (ret || !b_addr) { bus_dma_tag_destroy(dma_buf->dma_tag); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); ret = -1; goto qls_alloc_dmabuf_exit; } dma_buf->dma_addr = b_addr; qls_alloc_dmabuf_exit: QL_DPRINT2((dev, "%s: exit ret 0x%08x tag %p map %p b %p sz 0x%x\n", __func__, ret, (void *)dma_buf->dma_tag, (void *)dma_buf->dma_map, (void *)dma_buf->dma_b, dma_buf->size)); return ret; } void qls_free_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); bus_dma_tag_destroy(dma_buf->dma_tag); } static int qls_alloc_parent_dma_tag(qla_host_t *ha) { int ret; device_t dev; dev = ha->pci_dev; /* * Allocate parent DMA Tag */ ret = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1,((bus_size_t)(1ULL << 32)),/* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &ha->parent_tag); if (ret) { device_printf(dev, "%s: could not create parent dma tag\n", __func__); return (-1); } ha->flags.parent_tag = 1; return (0); } static void qls_free_parent_dma_tag(qla_host_t *ha) { if (ha->flags.parent_tag) { bus_dma_tag_destroy(ha->parent_tag); ha->flags.parent_tag = 0; } } /* * Name: qls_init_ifnet * Function: Creates the Network Device Interface and Registers it with the O.S */ static void qls_init_ifnet(device_t dev, qla_host_t *ha) { struct ifnet *ifp; QL_DPRINT2((dev, "%s: enter\n", __func__)); ifp = ha->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: cannot if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - -#if __FreeBSD_version >= 1000000 - if_initbaudrate(ifp, IF_Gbps(10)); -#else - ifp->if_baudrate = 1 * 1000 * 1000 * 1000; -#endif /* #if (__FreeBSD_version > 1000000) */ - + ifp->if_baudrate = IF_Gbps(10); ifp->if_init = qls_init; ifp->if_softc = ha; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = qls_ioctl; ifp->if_start = qls_start; IFQ_SET_MAXLEN(&ifp->if_snd, qls_get_ifq_snd_maxlen(ha)); ifp->if_snd.ifq_drv_maxlen = qls_get_ifq_snd_maxlen(ha); IFQ_SET_READY(&ifp->if_snd); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if (ha->max_frame_size <= MCLBYTES) { ha->msize = MCLBYTES; } else if (ha->max_frame_size <= MJUMPAGESIZE) { ha->msize = MJUMPAGESIZE; } else ha->msize = MJUM9BYTES; ether_ifattach(ifp, qls_get_mac_addr(ha)); ifp->if_capabilities = IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_TSO4; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; ifp->if_capabilities |= IFCAP_VLAN_HWTSO; ifp->if_capabilities |= IFCAP_LINKSTATE; ifp->if_capenable = ifp->if_capabilities; ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifmedia_init(&ha->media, IFM_IMASK, qls_media_change, qls_media_status); ifmedia_add(&ha->media, (IFM_ETHER | qls_get_optics(ha) | IFM_FDX), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&ha->media, (IFM_ETHER | IFM_AUTO)); QL_DPRINT2((dev, "%s: exit\n", __func__)); return; } static void qls_init_locked(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; qls_stop(ha); qls_flush_xmt_bufs(ha); if (qls_alloc_rcv_bufs(ha) != 0) return; if (qls_config_lro(ha)) return; bcopy(IF_LLADDR(ha->ifp), ha->mac_addr, ETHER_ADDR_LEN); ifp->if_hwassist = CSUM_IP; ifp->if_hwassist |= CSUM_TCP; ifp->if_hwassist |= CSUM_UDP; ifp->if_hwassist |= CSUM_TSO; if (qls_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ha->flags.qla_watchdog_pause = 0; } return; } static void qls_init(void *arg) { qla_host_t *ha; ha = (qla_host_t *)arg; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); (void)QLA_LOCK(ha, __func__, 0); qls_init_locked(ha); QLA_UNLOCK(ha, __func__); QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); } static void qls_set_multi(qla_host_t *ha, uint32_t add_multi) { uint8_t mta[Q8_MAX_NUM_MULTICAST_ADDRS * Q8_MAC_ADDR_LEN]; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = ha->ifp; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == Q8_MAX_NUM_MULTICAST_ADDRS) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * Q8_MAC_ADDR_LEN], Q8_MAC_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); if (QLA_LOCK(ha, __func__, 1) == 0) { qls_hw_set_multi(ha, mta, mcnt, add_multi); QLA_UNLOCK(ha, __func__); } return; } static int qls_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int ret = 0; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; switch (cmd) { case SIOCSIFADDR: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFADDR (0x%lx)\n", __func__, cmd)); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { (void)QLA_LOCK(ha, __func__, 0); qls_init_locked(ha); QLA_UNLOCK(ha, __func__); } QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFADDR (0x%lx) ipv4 [0x%08x]\n", __func__, cmd, ntohl(IA_SIN(ifa)->sin_addr.s_addr))); arp_ifinit(ifp, ifa); } else { ether_ioctl(ifp, cmd, data); } break; case SIOCSIFMTU: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFMTU (0x%lx)\n", __func__, cmd)); if (ifr->ifr_mtu > QLA_MAX_MTU) { ret = EINVAL; } else { (void) QLA_LOCK(ha, __func__, 0); ifp->if_mtu = ifr->ifr_mtu; ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; QLA_UNLOCK(ha, __func__); if (ret) ret = EINVAL; } break; case SIOCSIFFLAGS: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFFLAGS (0x%lx)\n", __func__, cmd)); (void)QLA_LOCK(ha, __func__, 0); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ ha->if_flags) & IFF_PROMISC) { ret = qls_set_promisc(ha); } else if ((ifp->if_flags ^ ha->if_flags) & IFF_ALLMULTI) { ret = qls_set_allmulti(ha); } } else { ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; qls_init_locked(ha); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) qls_stop(ha); ha->if_flags = ifp->if_flags; } QLA_UNLOCK(ha, __func__); break; case SIOCADDMULTI: QL_DPRINT4((ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCADDMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qls_set_multi(ha, 1); } break; case SIOCDELMULTI: QL_DPRINT4((ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCDELMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qls_set_multi(ha, 0); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", __func__, cmd)); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; QL_DPRINT4((ha->pci_dev, "%s: SIOCSIFCAP (0x%lx)\n", __func__, cmd)); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) qls_init(ha); VLAN_CAPABILITIES(ifp); break; } default: QL_DPRINT4((ha->pci_dev, "%s: default (0x%lx)\n", __func__, cmd)); ret = ether_ioctl(ifp, cmd, data); break; } return (ret); } static int qls_media_change(struct ifnet *ifp) { qla_host_t *ha; struct ifmedia *ifm; int ret = 0; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifm = &ha->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) ret = EINVAL; QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qls_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; qls_update_link_state(ha); if (ha->link_up) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= (IFM_FDX | qls_get_optics(ha)); } QL_DPRINT2((ha->pci_dev, "%s: exit (%s)\n", __func__,\ (ha->link_up ? "link_up" : "link_down"))); return; } static void qls_start(struct ifnet *ifp) { int i, ret = 0; struct mbuf *m_head; qla_host_t *ha = (qla_host_t *)ifp->if_softc; QL_DPRINT8((ha->pci_dev, "%s: enter\n", __func__)); if (!mtx_trylock(&ha->tx_lock)) { QL_DPRINT8((ha->pci_dev, "%s: mtx_trylock(&ha->tx_lock) failed\n", __func__)); return; } if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == IFF_DRV_RUNNING) { for (i = 0; i < ha->num_tx_rings; i++) { ret |= qls_hw_tx_done(ha, i); } if (ret == 0) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { QL_DPRINT8((ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); QLA_TX_UNLOCK(ha); return; } if (!ha->link_up) { qls_update_link_state(ha); if (!ha->link_up) { QL_DPRINT8((ha->pci_dev, "%s: link down\n", __func__)); QLA_TX_UNLOCK(ha); return; } } while (ifp->if_snd.ifq_head != NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) { QL_DPRINT8((ha->pci_dev, "%s: m_head == NULL\n", __func__)); break; } if (qls_send(ha, &m_head)) { if (m_head == NULL) break; QL_DPRINT8((ha->pci_dev, "%s: PREPEND\n", __func__)); ifp->if_drv_flags |= IFF_DRV_OACTIVE; IF_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } QLA_TX_UNLOCK(ha); QL_DPRINT8((ha->pci_dev, "%s: exit\n", __func__)); return; } static int qls_send(qla_host_t *ha, struct mbuf **m_headp) { bus_dma_segment_t segs[QLA_MAX_SEGMENTS]; bus_dmamap_t map; int nsegs; int ret = -1; uint32_t tx_idx; struct mbuf *m_head = *m_headp; uint32_t txr_idx = 0; QL_DPRINT8((ha->pci_dev, "%s: enter\n", __func__)); if (m_head->m_flags & M_FLOWID) txr_idx = m_head->m_pkthdr.flowid & (ha->num_tx_rings - 1); tx_idx = ha->tx_ring[txr_idx].txr_next; map = ha->tx_ring[txr_idx].tx_buf[tx_idx].map; ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (ret == EFBIG) { struct mbuf *m; QL_DPRINT8((ha->pci_dev, "%s: EFBIG [%d]\n", __func__, m_head->m_pkthdr.len)); m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { ha->err_tx_defrag++; m_freem(m_head); *m_headp = NULL; device_printf(ha->pci_dev, "%s: m_defrag() = NULL [%d]\n", __func__, ret); return (ENOBUFS); } m_head = m; *m_headp = m_head; if ((ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT))) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed0[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } } else if (ret) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed1[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } QL_ASSERT(ha, (nsegs != 0), ("qls_send: empty packet")); bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_PREWRITE); if (!(ret = qls_hw_send(ha, segs, nsegs, tx_idx, m_head, txr_idx))) { ha->tx_ring[txr_idx].count++; ha->tx_ring[txr_idx].tx_buf[tx_idx].m_head = m_head; ha->tx_ring[txr_idx].tx_buf[tx_idx].map = map; } else { if (ret == EINVAL) { if (m_head) m_freem(m_head); *m_headp = NULL; } } QL_DPRINT8((ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qls_stop(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; device_t dev; dev = ha->pci_dev; ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); ha->flags.qla_watchdog_pause = 1; while (!ha->qla_watchdog_paused) qls_mdelay(__func__, 1); qls_del_hw_if(ha); qls_free_lro(ha); qls_flush_xmt_bufs(ha); qls_free_rcv_bufs(ha); return; } /* * Buffer Management Functions for Transmit and Receive Rings */ /* * Release mbuf after it sent on the wire */ static void qls_flush_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb) { QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); if (txb->m_head) { bus_dmamap_unload(ha->tx_tag, txb->map); m_freem(txb->m_head); txb->m_head = NULL; } QL_DPRINT2((ha->pci_dev, "%s: exit\n", __func__)); } static void qls_flush_xmt_bufs(qla_host_t *ha) { int i, j; for (j = 0; j < ha->num_tx_rings; j++) { for (i = 0; i < NUM_TX_DESCRIPTORS; i++) qls_flush_tx_buf(ha, &ha->tx_ring[j].tx_buf[i]); } return; } static int qls_alloc_rcv_mbufs(qla_host_t *ha, int r) { int i, j, ret = 0; qla_rx_buf_t *rxb; qla_rx_ring_t *rx_ring; volatile q81_bq_addr_e_t *sbq_e; rx_ring = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rx_ring->rx_buf[i]; ret = bus_dmamap_create(ha->rx_tag, BUS_DMA_NOWAIT, &rxb->map); if (ret) { device_printf(ha->pci_dev, "%s: dmamap[%d, %d] failed\n", __func__, r, i); for (j = 0; j < i; j++) { rxb = &rx_ring->rx_buf[j]; bus_dmamap_destroy(ha->rx_tag, rxb->map); } goto qls_alloc_rcv_mbufs_err; } } rx_ring = &ha->rx_ring[r]; sbq_e = rx_ring->sbq_vaddr; rxb = &rx_ring->rx_buf[0]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { if (!(ret = qls_get_mbuf(ha, rxb, NULL))) { /* * set the physical address in the * corresponding descriptor entry in the * receive ring/queue for the hba */ sbq_e->addr_lo = rxb->paddr & 0xFFFFFFFF; sbq_e->addr_hi = (rxb->paddr >> 32) & 0xFFFFFFFF; } else { device_printf(ha->pci_dev, "%s: qls_get_mbuf [%d, %d] failed\n", __func__, r, i); bus_dmamap_destroy(ha->rx_tag, rxb->map); goto qls_alloc_rcv_mbufs_err; } rxb++; sbq_e++; } return 0; qls_alloc_rcv_mbufs_err: return (-1); } static void qls_free_rcv_bufs(qla_host_t *ha) { int i, r; qla_rx_buf_t *rxb; qla_rx_ring_t *rxr; for (r = 0; r < ha->num_rx_rings; r++) { rxr = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rxr->rx_buf[i]; if (rxb->m_head != NULL) { bus_dmamap_unload(ha->rx_tag, rxb->map); bus_dmamap_destroy(ha->rx_tag, rxb->map); m_freem(rxb->m_head); } } bzero(rxr->rx_buf, (sizeof(qla_rx_buf_t) * NUM_RX_DESCRIPTORS)); } return; } static int qls_alloc_rcv_bufs(qla_host_t *ha) { int r, ret = 0; qla_rx_ring_t *rxr; for (r = 0; r < ha->num_rx_rings; r++) { rxr = &ha->rx_ring[r]; bzero(rxr->rx_buf, (sizeof(qla_rx_buf_t) * NUM_RX_DESCRIPTORS)); } for (r = 0; r < ha->num_rx_rings; r++) { ret = qls_alloc_rcv_mbufs(ha, r); if (ret) qls_free_rcv_bufs(ha); } return (ret); } int qls_get_mbuf(qla_host_t *ha, qla_rx_buf_t *rxb, struct mbuf *nmp) { register struct mbuf *mp = nmp; struct ifnet *ifp; int ret = 0; uint32_t offset; bus_dma_segment_t segs[1]; int nsegs; QL_DPRINT2((ha->pci_dev, "%s: enter\n", __func__)); ifp = ha->ifp; if (mp == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ha->msize); if (mp == NULL) { if (ha->msize == MCLBYTES) ha->err_m_getcl++; else ha->err_m_getjcl++; ret = ENOBUFS; device_printf(ha->pci_dev, "%s: m_getcl failed\n", __func__); goto exit_qls_get_mbuf; } mp->m_len = mp->m_pkthdr.len = ha->msize; } else { mp->m_len = mp->m_pkthdr.len = ha->msize; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } /* align the receive buffers to 8 byte boundary */ offset = (uint32_t)((unsigned long long)mp->m_data & 0x7ULL); if (offset) { offset = 8 - offset; m_adj(mp, offset); } /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ ret = bus_dmamap_load_mbuf_sg(ha->rx_tag, rxb->map, mp, segs, &nsegs, BUS_DMA_NOWAIT); rxb->paddr = segs[0].ds_addr; if (ret || !rxb->paddr || (nsegs != 1)) { m_freem(mp); rxb->m_head = NULL; device_printf(ha->pci_dev, "%s: bus_dmamap_load failed[%d, 0x%016llx, %d]\n", __func__, ret, (long long unsigned int)rxb->paddr, nsegs); ret = -1; goto exit_qls_get_mbuf; } rxb->m_head = mp; bus_dmamap_sync(ha->rx_tag, rxb->map, BUS_DMASYNC_PREREAD); exit_qls_get_mbuf: QL_DPRINT2((ha->pci_dev, "%s: exit ret = 0x%08x\n", __func__, ret)); return (ret); } static void qls_tx_done(void *context, int pending) { qla_host_t *ha = context; struct ifnet *ifp; ifp = ha->ifp; if (!ifp) return; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { QL_DPRINT8((ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); return; } qls_start(ha->ifp); return; } static int qls_config_lro(qla_host_t *ha) { int i; struct lro_ctrl *lro; for (i = 0; i < ha->num_rx_rings; i++) { lro = &ha->rx_ring[i].lro; if (tcp_lro_init(lro)) { device_printf(ha->pci_dev, "%s: tcp_lro_init failed\n", __func__); return (-1); } lro->ifp = ha->ifp; } ha->flags.lro_init = 1; QL_DPRINT2((ha->pci_dev, "%s: LRO initialized\n", __func__)); return (0); } static void qls_free_lro(qla_host_t *ha) { int i; struct lro_ctrl *lro; if (!ha->flags.lro_init) return; for (i = 0; i < ha->num_rx_rings; i++) { lro = &ha->rx_ring[i].lro; tcp_lro_free(lro); } ha->flags.lro_init = 0; } static void qls_error_recovery(void *context, int pending) { qla_host_t *ha = context; qls_init(ha); return; } Index: head/sys/dev/sbni/if_sbni.c =================================================================== --- head/sys/dev/sbni/if_sbni.c (revision 263101) +++ head/sys/dev/sbni/if_sbni.c (revision 263102) @@ -1,1278 +1,1278 @@ /*- * Copyright (c) 1997-2001 Granch, Ltd. All rights reserved. * Author: Denis I.Timofeev * * Redistributon and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Device driver for Granch SBNI12 leased line adapters * * Revision 2.0.0 1997/08/06 * Initial revision by Alexey Zverev * * Revision 2.0.1 1997/08/11 * Additional internal statistics support (tx statistics) * * Revision 2.0.2 1997/11/05 * if_bpf bug has been fixed * * Revision 2.0.3 1998/12/20 * Memory leakage has been eliminated in * the sbni_st and sbni_timeout routines. * * Revision 3.0 2000/08/10 by Yaroslav Polyakov * Support for PCI cards. 4.1 modification. * * Revision 3.1 2000/09/12 * Removed extra #defines around bpf functions * * Revision 4.0 2000/11/23 by Denis Timofeev * Completely redesigned the buffer management * * Revision 4.1 2001/01/21 * Support for PCI Dual cards and new SBNI12D-10, -11 Dual/ISA cards * * Written with reference to NE2000 driver developed by David Greenman. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void sbni_init(void *); static void sbni_init_locked(struct sbni_softc *); static void sbni_start(struct ifnet *); static void sbni_start_locked(struct ifnet *); static int sbni_ioctl(struct ifnet *, u_long, caddr_t); static void sbni_stop(struct sbni_softc *); static void handle_channel(struct sbni_softc *); static void card_start(struct sbni_softc *); static int recv_frame(struct sbni_softc *); static void send_frame(struct sbni_softc *); static int upload_data(struct sbni_softc *, u_int, u_int, u_int, u_int32_t); static int skip_tail(struct sbni_softc *, u_int, u_int32_t); static void interpret_ack(struct sbni_softc *, u_int); static void download_data(struct sbni_softc *, u_int32_t *); static void prepare_to_send(struct sbni_softc *); static void drop_xmit_queue(struct sbni_softc *); static int get_rx_buf(struct sbni_softc *); static void indicate_pkt(struct sbni_softc *); static void change_level(struct sbni_softc *); static int check_fhdr(struct sbni_softc *, u_int *, u_int *, u_int *, u_int *, u_int32_t *); static int append_frame_to_pkt(struct sbni_softc *, u_int, u_int32_t); static void timeout_change_level(struct sbni_softc *); static void send_frame_header(struct sbni_softc *, u_int32_t *); static void set_initial_values(struct sbni_softc *, struct sbni_flags); static u_int32_t calc_crc32(u_int32_t, caddr_t, u_int); static timeout_t sbni_timeout; static __inline u_char sbni_inb(struct sbni_softc *, enum sbni_reg); static __inline void sbni_outb(struct sbni_softc *, enum sbni_reg, u_char); static __inline void sbni_insb(struct sbni_softc *, u_char *, u_int); static __inline void sbni_outsb(struct sbni_softc *, u_char *, u_int); static u_int32_t crc32tab[]; #ifdef SBNI_DUAL_COMPOUND static struct mtx headlist_lock; MTX_SYSINIT(headlist_lock, &headlist_lock, "sbni headlist", MTX_DEF); static struct sbni_softc *sbni_headlist; #endif /* -------------------------------------------------------------------------- */ static __inline u_char sbni_inb(struct sbni_softc *sc, enum sbni_reg reg) { return bus_space_read_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + reg); } static __inline void sbni_outb(struct sbni_softc *sc, enum sbni_reg reg, u_char value) { bus_space_write_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + reg, value); } static __inline void sbni_insb(struct sbni_softc *sc, u_char *to, u_int len) { bus_space_read_multi_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + DAT, to, len); } static __inline void sbni_outsb(struct sbni_softc *sc, u_char *from, u_int len) { bus_space_write_multi_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + DAT, from, len); } /* Valid combinations in CSR0 (for probing): VALID_DECODER 0000,0011,1011,1010 ; 0 ; - TR_REQ ; 1 ; + TR_RDY ; 2 ; - TR_RDY TR_REQ ; 3 ; + BU_EMP ; 4 ; + BU_EMP TR_REQ ; 5 ; + BU_EMP TR_RDY ; 6 ; - BU_EMP TR_RDY TR_REQ ; 7 ; + RC_RDY ; 8 ; + RC_RDY TR_REQ ; 9 ; + RC_RDY TR_RDY ; 10 ; - RC_RDY TR_RDY TR_REQ ; 11 ; - RC_RDY BU_EMP ; 12 ; - RC_RDY BU_EMP TR_REQ ; 13 ; - RC_RDY BU_EMP TR_RDY ; 14 ; - RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; - */ #define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200) int sbni_probe(struct sbni_softc *sc) { u_char csr0; csr0 = sbni_inb(sc, CSR0); if (csr0 != 0xff && csr0 != 0x00) { csr0 &= ~EN_INT; if (csr0 & BU_EMP) csr0 |= EN_INT; if (VALID_DECODER & (1 << (csr0 >> 4))) return (0); } return (ENXIO); } /* * Install interface into kernel networking data structures */ int sbni_attach(struct sbni_softc *sc, int unit, struct sbni_flags flags) { struct ifnet *ifp; u_char csr0; ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOMEM); sbni_outb(sc, CSR0, 0); set_initial_values(sc, flags); /* Initialize ifnet structure */ ifp->if_softc = sc; if_initname(ifp, "sbni", unit); ifp->if_init = sbni_init; ifp->if_start = sbni_start; ifp->if_ioctl = sbni_ioctl; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); /* report real baud rate */ csr0 = sbni_inb(sc, CSR0); ifp->if_baudrate = (csr0 & 0x01 ? 500000 : 2000000) / (1 << flags.rate); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; mtx_init(&sc->lock, ifp->if_xname, MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->wch, &sc->lock, 0); ether_ifattach(ifp, sc->enaddr); /* device attach does transition from UNCONFIGURED to IDLE state */ - if_printf(ifp, "speed %ld, rxl ", ifp->if_baudrate); + if_printf(ifp, "speed %ju, rxl ", (uintmax_t)ifp->if_baudrate); if (sc->delta_rxl) printf("auto\n"); else printf("%d (fixed)\n", sc->cur_rxl_index); return (0); } void sbni_detach(struct sbni_softc *sc) { SBNI_LOCK(sc); sbni_stop(sc); SBNI_UNLOCK(sc); callout_drain(&sc->wch); ether_ifdetach(sc->ifp); if (sc->irq_handle) bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handle); mtx_destroy(&sc->lock); if_free(sc->ifp); } void sbni_release_resources(struct sbni_softc *sc) { if (sc->irq_res) bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); if (sc->io_res && sc->io_off == 0) bus_release_resource(sc->dev, SYS_RES_IOPORT, sc->io_rid, sc->io_res); } /* -------------------------------------------------------------------------- */ static void sbni_init(void *xsc) { struct sbni_softc *sc; sc = (struct sbni_softc *)xsc; SBNI_LOCK(sc); sbni_init_locked(sc); SBNI_UNLOCK(sc); } static void sbni_init_locked(struct sbni_softc *sc) { struct ifnet *ifp; ifp = sc->ifp; /* * kludge to avoid multiple initialization when more than once * protocols configured */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; card_start(sc); callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* attempt to start output */ sbni_start_locked(ifp); } static void sbni_start(struct ifnet *ifp) { struct sbni_softc *sc = ifp->if_softc; SBNI_LOCK(sc); sbni_start_locked(ifp); SBNI_UNLOCK(sc); } static void sbni_start_locked(struct ifnet *ifp) { struct sbni_softc *sc = ifp->if_softc; if (sc->tx_frameno == 0) prepare_to_send(sc); } static void sbni_stop(struct sbni_softc *sc) { sbni_outb(sc, CSR0, 0); drop_xmit_queue(sc); if (sc->rx_buf_p) { m_freem(sc->rx_buf_p); sc->rx_buf_p = NULL; } callout_stop(&sc->wch); sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } /* -------------------------------------------------------------------------- */ /* interrupt handler */ /* * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not * be looked as two independent single-channel devices. Every channel seems * as Ethernet interface but interrupt handler must be common. Really, first * channel ("master") driver only registers the handler. In it's struct softc * it has got pointer to "slave" channel's struct softc and handles that's * interrupts too. * softc of successfully attached ISA SBNI boards is linked to list. * While next board driver is initialized, it scans this list. If one * has found softc with same irq and ioaddr different by 4 then it assumes * this board to be "master". */ void sbni_intr(void *arg) { struct sbni_softc *sc; int repeat; sc = (struct sbni_softc *)arg; do { repeat = 0; SBNI_LOCK(sc); if (sbni_inb(sc, CSR0) & (RC_RDY | TR_RDY)) { handle_channel(sc); repeat = 1; } SBNI_UNLOCK(sc); if (sc->slave_sc) { /* second channel present */ SBNI_LOCK(sc->slave_sc); if (sbni_inb(sc->slave_sc, CSR0) & (RC_RDY | TR_RDY)) { handle_channel(sc->slave_sc); repeat = 1; } SBNI_UNLOCK(sc->slave_sc); } } while (repeat); } static void handle_channel(struct sbni_softc *sc) { int req_ans; u_char csr0; sbni_outb(sc, CSR0, (sbni_inb(sc, CSR0) & ~EN_INT) | TR_REQ); sc->timer_ticks = CHANGE_LEVEL_START_TICKS; for (;;) { csr0 = sbni_inb(sc, CSR0); if ((csr0 & (RC_RDY | TR_RDY)) == 0) break; req_ans = !(sc->state & FL_PREV_OK); if (csr0 & RC_RDY) req_ans = recv_frame(sc); /* * TR_RDY always equals 1 here because we have owned the marker, * and we set TR_REQ when disabled interrupts */ csr0 = sbni_inb(sc, CSR0); if ((csr0 & TR_RDY) == 0 || (csr0 & RC_RDY) != 0) if_printf(sc->ifp, "internal error!\n"); /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */ if (req_ans || sc->tx_frameno != 0) send_frame(sc); else { /* send the marker without any data */ sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) & ~TR_REQ); } } sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | EN_INT); } /* * Routine returns 1 if it need to acknoweledge received frame. * Empty frame received without errors won't be acknoweledged. */ static int recv_frame(struct sbni_softc *sc) { u_int32_t crc; u_int framelen, frameno, ack; u_int is_first, frame_ok; crc = CRC32_INITIAL; if (check_fhdr(sc, &framelen, &frameno, &ack, &is_first, &crc)) { frame_ok = framelen > 4 ? upload_data(sc, framelen, frameno, is_first, crc) : skip_tail(sc, framelen, crc); if (frame_ok) interpret_ack(sc, ack); } else { framelen = 0; frame_ok = 0; } sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) ^ CT_ZER); if (frame_ok) { sc->state |= FL_PREV_OK; if (framelen > 4) sc->in_stats.all_rx_number++; } else { sc->state &= ~FL_PREV_OK; change_level(sc); sc->in_stats.all_rx_number++; sc->in_stats.bad_rx_number++; } return (!frame_ok || framelen > 4); } static void send_frame(struct sbni_softc *sc) { u_int32_t crc; u_char csr0; crc = CRC32_INITIAL; if (sc->state & FL_NEED_RESEND) { /* if frame was sended but not ACK'ed - resend it */ if (sc->trans_errors) { sc->trans_errors--; if (sc->framelen != 0) sc->in_stats.resend_tx_number++; } else { /* cannot xmit with many attempts */ drop_xmit_queue(sc); goto do_send; } } else sc->trans_errors = TR_ERROR_COUNT; send_frame_header(sc, &crc); sc->state |= FL_NEED_RESEND; /* * FL_NEED_RESEND will be cleared after ACK, but if empty * frame sended then in prepare_to_send next frame */ if (sc->framelen) { download_data(sc, &crc); sc->in_stats.all_tx_number++; sc->state |= FL_WAIT_ACK; } sbni_outsb(sc, (u_char *)&crc, sizeof crc); do_send: csr0 = sbni_inb(sc, CSR0); sbni_outb(sc, CSR0, csr0 & ~TR_REQ); if (sc->tx_frameno) { /* next frame exists - request to send */ sbni_outb(sc, CSR0, csr0 | TR_REQ); } } static void download_data(struct sbni_softc *sc, u_int32_t *crc_p) { struct mbuf *m; caddr_t data_p; u_int data_len, pos, slice; data_p = NULL; /* initialized to avoid warn */ pos = 0; for (m = sc->tx_buf_p; m != NULL && pos < sc->pktlen; m = m->m_next) { if (pos + m->m_len > sc->outpos) { data_len = m->m_len - (sc->outpos - pos); data_p = mtod(m, caddr_t) + (sc->outpos - pos); goto do_copy; } else pos += m->m_len; } data_len = 0; do_copy: pos = 0; do { if (data_len) { slice = min(data_len, sc->framelen - pos); sbni_outsb(sc, data_p, slice); *crc_p = calc_crc32(*crc_p, data_p, slice); pos += slice; if (data_len -= slice) data_p += slice; else { do { m = m->m_next; } while (m != NULL && m->m_len == 0); if (m) { data_len = m->m_len; data_p = mtod(m, caddr_t); } } } else { /* frame too short - zero padding */ pos = sc->framelen - pos; while (pos--) { sbni_outb(sc, DAT, 0); *crc_p = CRC32(0, *crc_p); } return; } } while (pos < sc->framelen); } static int upload_data(struct sbni_softc *sc, u_int framelen, u_int frameno, u_int is_first, u_int32_t crc) { int frame_ok; if (is_first) { sc->wait_frameno = frameno; sc->inppos = 0; } if (sc->wait_frameno == frameno) { if (sc->inppos + framelen <= ETHER_MAX_LEN) { frame_ok = append_frame_to_pkt(sc, framelen, crc); /* * if CRC is right but framelen incorrect then transmitter * error was occured... drop entire packet */ } else if ((frame_ok = skip_tail(sc, framelen, crc)) != 0) { sc->wait_frameno = 0; sc->inppos = 0; sc->ifp->if_ierrors++; /* now skip all frames until is_first != 0 */ } } else frame_ok = skip_tail(sc, framelen, crc); if (is_first && !frame_ok) { /* * Frame has been violated, but we have stored * is_first already... Drop entire packet. */ sc->wait_frameno = 0; sc->ifp->if_ierrors++; } return (frame_ok); } static __inline void send_complete(struct sbni_softc *); static __inline void send_complete(struct sbni_softc *sc) { m_freem(sc->tx_buf_p); sc->tx_buf_p = NULL; sc->ifp->if_opackets++; } static void interpret_ack(struct sbni_softc *sc, u_int ack) { if (ack == FRAME_SENT_OK) { sc->state &= ~FL_NEED_RESEND; if (sc->state & FL_WAIT_ACK) { sc->outpos += sc->framelen; if (--sc->tx_frameno) { sc->framelen = min( sc->maxframe, sc->pktlen - sc->outpos); } else { send_complete(sc); prepare_to_send(sc); } } } sc->state &= ~FL_WAIT_ACK; } /* * Glue received frame with previous fragments of packet. * Indicate packet when last frame would be accepted. */ static int append_frame_to_pkt(struct sbni_softc *sc, u_int framelen, u_int32_t crc) { caddr_t p; if (sc->inppos + framelen > ETHER_MAX_LEN) return (0); if (!sc->rx_buf_p && !get_rx_buf(sc)) return (0); p = sc->rx_buf_p->m_data + sc->inppos; sbni_insb(sc, p, framelen); if (calc_crc32(crc, p, framelen) != CRC32_REMAINDER) return (0); sc->inppos += framelen - 4; if (--sc->wait_frameno == 0) { /* last frame received */ indicate_pkt(sc); sc->ifp->if_ipackets++; } return (1); } /* * Prepare to start output on adapter. Current priority must be set to splimp * before this routine is called. * Transmitter will be actually activated when marker has been accepted. */ static void prepare_to_send(struct sbni_softc *sc) { struct mbuf *m; u_int len; /* sc->tx_buf_p == NULL here! */ if (sc->tx_buf_p) printf("sbni: memory leak!\n"); sc->outpos = 0; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); for (;;) { IF_DEQUEUE(&sc->ifp->if_snd, sc->tx_buf_p); if (!sc->tx_buf_p) { /* nothing to transmit... */ sc->pktlen = 0; sc->tx_frameno = 0; sc->framelen = 0; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; } for (len = 0, m = sc->tx_buf_p; m; m = m->m_next) len += m->m_len; if (len != 0) break; m_freem(sc->tx_buf_p); } if (len < SBNI_MIN_LEN) len = SBNI_MIN_LEN; sc->pktlen = len; sc->tx_frameno = (len + sc->maxframe - 1) / sc->maxframe; sc->framelen = min(len, sc->maxframe); sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | TR_REQ); sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; BPF_MTAP(sc->ifp, sc->tx_buf_p); } static void drop_xmit_queue(struct sbni_softc *sc) { struct mbuf *m; if (sc->tx_buf_p) { m_freem(sc->tx_buf_p); sc->tx_buf_p = NULL; sc->ifp->if_oerrors++; } for (;;) { IF_DEQUEUE(&sc->ifp->if_snd, m); if (m == NULL) break; m_freem(m); sc->ifp->if_oerrors++; } sc->tx_frameno = 0; sc->framelen = 0; sc->outpos = 0; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } static void send_frame_header(struct sbni_softc *sc, u_int32_t *crc_p) { u_int32_t crc; u_int len_field; u_char value; crc = *crc_p; len_field = sc->framelen + 6; /* CRC + frameno + reserved */ if (sc->state & FL_NEED_RESEND) len_field |= FRAME_RETRY; /* non-first attempt... */ if (sc->outpos == 0) len_field |= FRAME_FIRST; len_field |= (sc->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD; sbni_outb(sc, DAT, SBNI_SIG); value = (u_char)len_field; sbni_outb(sc, DAT, value); crc = CRC32(value, crc); value = (u_char)(len_field >> 8); sbni_outb(sc, DAT, value); crc = CRC32(value, crc); sbni_outb(sc, DAT, sc->tx_frameno); crc = CRC32(sc->tx_frameno, crc); sbni_outb(sc, DAT, 0); crc = CRC32(0, crc); *crc_p = crc; } /* * if frame tail not needed (incorrect number or received twice), * it won't store, but CRC will be calculated */ static int skip_tail(struct sbni_softc *sc, u_int tail_len, u_int32_t crc) { while (tail_len--) crc = CRC32(sbni_inb(sc, DAT), crc); return (crc == CRC32_REMAINDER); } static int check_fhdr(struct sbni_softc *sc, u_int *framelen, u_int *frameno, u_int *ack, u_int *is_first, u_int32_t *crc_p) { u_int32_t crc; u_char value; crc = *crc_p; if (sbni_inb(sc, DAT) != SBNI_SIG) return (0); value = sbni_inb(sc, DAT); *framelen = (u_int)value; crc = CRC32(value, crc); value = sbni_inb(sc, DAT); *framelen |= ((u_int)value) << 8; crc = CRC32(value, crc); *ack = *framelen & FRAME_ACK_MASK; *is_first = (*framelen & FRAME_FIRST) != 0; if ((*framelen &= FRAME_LEN_MASK) < 6 || *framelen > SBNI_MAX_FRAME - 3) return (0); value = sbni_inb(sc, DAT); *frameno = (u_int)value; crc = CRC32(value, crc); crc = CRC32(sbni_inb(sc, DAT), crc); /* reserved byte */ *framelen -= 2; *crc_p = crc; return (1); } static int get_rx_buf(struct sbni_softc *sc) { struct mbuf *m; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { if_printf(sc->ifp, "cannot allocate header mbuf\n"); return (0); } /* * We always put the received packet in a single buffer - * either with just an mbuf header or in a cluster attached * to the header. The +2 is to compensate for the alignment * fixup below. */ if (ETHER_MAX_LEN + 2 > MHLEN) { /* Attach an mbuf cluster */ MCLGET(m, M_NOWAIT); if ((m->m_flags & M_EXT) == 0) { m_freem(m); return (0); } } m->m_pkthdr.len = m->m_len = ETHER_MAX_LEN + 2; /* * The +2 is to longword align the start of the real packet. * (sizeof ether_header == 14) * This is important for NFS. */ m_adj(m, 2); sc->rx_buf_p = m; return (1); } static void indicate_pkt(struct sbni_softc *sc) { struct ifnet *ifp = sc->ifp; struct mbuf *m; m = sc->rx_buf_p; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = sc->inppos; sc->rx_buf_p = NULL; SBNI_UNLOCK(sc); (*ifp->if_input)(ifp, m); SBNI_LOCK(sc); } /* -------------------------------------------------------------------------- */ /* * Routine checks periodically wire activity and regenerates marker if * connect was inactive for a long time. */ static void sbni_timeout(void *xsc) { struct sbni_softc *sc; u_char csr0; sc = (struct sbni_softc *)xsc; SBNI_ASSERT_LOCKED(sc); csr0 = sbni_inb(sc, CSR0); if (csr0 & RC_CHK) { if (sc->timer_ticks) { if (csr0 & (RC_RDY | BU_EMP)) /* receiving not active */ sc->timer_ticks--; } else { sc->in_stats.timeout_number++; if (sc->delta_rxl) timeout_change_level(sc); sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES); csr0 = sbni_inb(sc, CSR0); } } sbni_outb(sc, CSR0, csr0 | RC_CHK); callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc); } /* -------------------------------------------------------------------------- */ static void card_start(struct sbni_softc *sc) { sc->timer_ticks = CHANGE_LEVEL_START_TICKS; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); sc->state |= FL_PREV_OK; sc->inppos = 0; sc->wait_frameno = 0; sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES); sbni_outb(sc, CSR0, EN_INT); } /* -------------------------------------------------------------------------- */ static u_char rxl_tab[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f }; #define SIZE_OF_TIMEOUT_RXL_TAB 4 static u_char timeout_rxl_tab[] = { 0x03, 0x05, 0x08, 0x0b }; static void set_initial_values(struct sbni_softc *sc, struct sbni_flags flags) { if (flags.fixed_rxl) { sc->delta_rxl = 0; /* disable receive level autodetection */ sc->cur_rxl_index = flags.rxl; } else { sc->delta_rxl = DEF_RXL_DELTA; sc->cur_rxl_index = DEF_RXL; } sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sc->maxframe = DEFAULT_FRAME_LEN; /* * generate Ethernet address (0x00ff01xxxxxx) */ *(u_int16_t *) sc->enaddr = htons(0x00ff); if (flags.mac_addr) { *(u_int32_t *) (sc->enaddr + 2) = htonl(flags.mac_addr | 0x01000000); } else { *(u_char *) (sc->enaddr + 2) = 0x01; read_random(sc->enaddr + 3, 3); } } #ifdef SBNI_DUAL_COMPOUND void sbni_add(struct sbni_softc *sc) { mtx_lock(&headlist_lock); sc->link = sbni_headlist; sbni_headlist = sc; mtx_unlock(&headlist_lock); } struct sbni_softc * connect_to_master(struct sbni_softc *sc) { struct sbni_softc *p, *p_prev; mtx_lock(&headlist_lock); for (p = sbni_headlist, p_prev = NULL; p; p_prev = p, p = p->link) { if (rman_get_start(p->io_res) == rman_get_start(sc->io_res) + 4 || rman_get_start(p->io_res) == rman_get_start(sc->io_res) - 4) { p->slave_sc = sc; if (p_prev) p_prev->link = p->link; else sbni_headlist = p->link; mtx_unlock(&headlist_lock); return p; } } mtx_unlock(&headlist_lock); return (NULL); } #endif /* SBNI_DUAL_COMPOUND */ /* Receive level auto-selection */ static void change_level(struct sbni_softc *sc) { if (sc->delta_rxl == 0) /* do not auto-negotiate RxL */ return; if (sc->cur_rxl_index == 0) sc->delta_rxl = 1; else if (sc->cur_rxl_index == 15) sc->delta_rxl = -1; else if (sc->cur_rxl_rcvd < sc->prev_rxl_rcvd) sc->delta_rxl = -sc->delta_rxl; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index += sc->delta_rxl]; sbni_inb(sc, CSR0); /* it needed for PCI cards */ sbni_outb(sc, CSR1, *(u_char *)&sc->csr1); sc->prev_rxl_rcvd = sc->cur_rxl_rcvd; sc->cur_rxl_rcvd = 0; } static void timeout_change_level(struct sbni_softc *sc) { sc->cur_rxl_index = timeout_rxl_tab[sc->timeout_rxl]; if (++sc->timeout_rxl >= 4) sc->timeout_rxl = 0; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sbni_inb(sc, CSR0); sbni_outb(sc, CSR1, *(u_char *)&sc->csr1); sc->prev_rxl_rcvd = sc->cur_rxl_rcvd; sc->cur_rxl_rcvd = 0; } /* -------------------------------------------------------------------------- */ /* * Process an ioctl request. This code needs some work - it looks * pretty ugly. */ static int sbni_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct sbni_softc *sc; struct ifreq *ifr; struct thread *td; struct sbni_in_stats *in_stats; struct sbni_flags flags; int error; sc = ifp->if_softc; ifr = (struct ifreq *)data; td = curthread; error = 0; switch (command) { case SIOCSIFFLAGS: /* * If the interface is marked up and stopped, then start it. * If it is marked down and running, then stop it. */ SBNI_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) sbni_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { sbni_stop(sc); } } SBNI_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * Multicast list has changed; set the hardware filter * accordingly. */ error = 0; /* if (ifr == NULL) error = EAFNOSUPPORT; */ break; /* * SBNI specific ioctl */ case SIOCGHWFLAGS: /* get flags */ SBNI_LOCK(sc); bcopy((caddr_t)IF_LLADDR(sc->ifp)+3, (caddr_t) &flags, 3); flags.rxl = sc->cur_rxl_index; flags.rate = sc->csr1.rate; flags.fixed_rxl = (sc->delta_rxl == 0); flags.fixed_rate = 1; SBNI_UNLOCK(sc); ifr->ifr_data = *(caddr_t*) &flags; break; case SIOCGINSTATS: in_stats = malloc(sizeof(struct sbni_in_stats), M_DEVBUF, M_WAITOK); SBNI_LOCK(sc); bcopy(&sc->in_stats, in_stats, sizeof(struct sbni_in_stats)); SBNI_UNLOCK(sc); error = copyout(ifr->ifr_data, in_stats, sizeof(struct sbni_in_stats)); free(in_stats, M_DEVBUF); break; case SIOCSHWFLAGS: /* set flags */ /* root only */ error = priv_check(td, PRIV_DRIVER); if (error) break; flags = *(struct sbni_flags*)&ifr->ifr_data; SBNI_LOCK(sc); if (flags.fixed_rxl) { sc->delta_rxl = 0; sc->cur_rxl_index = flags.rxl; } else { sc->delta_rxl = DEF_RXL_DELTA; sc->cur_rxl_index = DEF_RXL; } sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE; if (flags.mac_addr) bcopy((caddr_t) &flags, (caddr_t) IF_LLADDR(sc->ifp)+3, 3); /* Don't be afraid... */ sbni_outb(sc, CSR1, *(char*)(&sc->csr1) | PR_RES); SBNI_UNLOCK(sc); break; case SIOCRINSTATS: SBNI_LOCK(sc); if (!(error = priv_check(td, PRIV_DRIVER))) /* root only */ bzero(&sc->in_stats, sizeof(struct sbni_in_stats)); SBNI_UNLOCK(sc); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } /* -------------------------------------------------------------------------- */ static u_int32_t calc_crc32(u_int32_t crc, caddr_t p, u_int len) { while (len--) crc = CRC32(*p++, crc); return (crc); } static u_int32_t crc32tab[] __aligned(8) = { 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605, 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C, 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53, 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A, 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661, 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278, 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF, 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6, 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD, 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4, 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B, 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82, 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9, 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0, 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7, 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE, 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795, 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C, 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3, 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA, 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1, 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8, 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F, 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76, 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D, 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344, 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B, 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12, 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739, 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320, 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17, 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E, 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525, 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C, 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73, 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A, 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541, 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158, 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF, 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6, 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED, 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4, 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB, 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2, 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589, 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190, 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87, 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E, 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5, 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC, 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3, 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA, 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1, 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8, 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F, 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856, 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D, 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064, 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B, 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832, 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419, 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000 }; Index: head/sys/dev/virtio/network/if_vtnet.c =================================================================== --- head/sys/dev/virtio/network/if_vtnet.c (revision 263101) +++ head/sys/dev/virtio/network/if_vtnet.c (revision 263102) @@ -1,3840 +1,3840 @@ /*- * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Driver for VirtIO network devices. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "virtio_if.h" #include "opt_inet.h" #include "opt_inet6.h" static int vtnet_modevent(module_t, int, void *); static int vtnet_probe(device_t); static int vtnet_attach(device_t); static int vtnet_detach(device_t); static int vtnet_suspend(device_t); static int vtnet_resume(device_t); static int vtnet_shutdown(device_t); static int vtnet_attach_completed(device_t); static int vtnet_config_change(device_t); static void vtnet_negotiate_features(struct vtnet_softc *); static void vtnet_setup_features(struct vtnet_softc *); static int vtnet_init_rxq(struct vtnet_softc *, int); static int vtnet_init_txq(struct vtnet_softc *, int); static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); static void vtnet_free_rxtx_queues(struct vtnet_softc *); static int vtnet_alloc_rx_filters(struct vtnet_softc *); static void vtnet_free_rx_filters(struct vtnet_softc *); static int vtnet_alloc_virtqueues(struct vtnet_softc *); static int vtnet_setup_interface(struct vtnet_softc *); static int vtnet_change_mtu(struct vtnet_softc *, int); static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); static int vtnet_rxq_populate(struct vtnet_rxq *); static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_new_buf(struct vtnet_rxq *); static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); static int vtnet_rxq_eof(struct vtnet_rxq *); static void vtnet_rx_vq_intr(void *); static void vtnet_rxq_tq_intr(void *, int); static void vtnet_txq_free_mbufs(struct vtnet_txq *); static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, int *, int *, int *); static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, int, struct virtio_net_hdr *); static struct mbuf * vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, struct virtio_net_hdr *); static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, struct vtnet_tx_header *); static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **); #ifdef VTNET_LEGACY_TX static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); static void vtnet_start(struct ifnet *); #else static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *); static void vtnet_txq_tq_deferred(void *, int); #endif static void vtnet_txq_start(struct vtnet_txq *); static void vtnet_txq_tq_intr(void *, int); static void vtnet_txq_eof(struct vtnet_txq *); static void vtnet_tx_vq_intr(void *); static void vtnet_tx_start_all(struct vtnet_softc *); #ifndef VTNET_LEGACY_TX static void vtnet_qflush(struct ifnet *); #endif static int vtnet_watchdog(struct vtnet_txq *); static void vtnet_rxq_accum_stats(struct vtnet_rxq *, struct vtnet_rxq_stats *); static void vtnet_txq_accum_stats(struct vtnet_txq *, struct vtnet_txq_stats *); static void vtnet_accumulate_stats(struct vtnet_softc *); static void vtnet_tick(void *); static void vtnet_start_taskqueues(struct vtnet_softc *); static void vtnet_free_taskqueues(struct vtnet_softc *); static void vtnet_drain_taskqueues(struct vtnet_softc *); static void vtnet_drain_rxtx_queues(struct vtnet_softc *); static void vtnet_stop_rendezvous(struct vtnet_softc *); static void vtnet_stop(struct vtnet_softc *); static int vtnet_virtio_reinit(struct vtnet_softc *); static void vtnet_init_rx_filters(struct vtnet_softc *); static int vtnet_init_rx_queues(struct vtnet_softc *); static int vtnet_init_tx_queues(struct vtnet_softc *); static int vtnet_init_rxtx_queues(struct vtnet_softc *); static void vtnet_set_active_vq_pairs(struct vtnet_softc *); static int vtnet_reinit(struct vtnet_softc *); static void vtnet_init_locked(struct vtnet_softc *); static void vtnet_init(void *); static void vtnet_free_ctrl_vq(struct vtnet_softc *); static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, struct sglist *, int, int); static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); static int vtnet_set_promisc(struct vtnet_softc *, int); static int vtnet_set_allmulti(struct vtnet_softc *, int); static void vtnet_attach_disable_promisc(struct vtnet_softc *); static void vtnet_rx_filter(struct vtnet_softc *); static void vtnet_rx_filter_mac(struct vtnet_softc *); static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_rx_filter_vlan(struct vtnet_softc *); static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); static int vtnet_is_link_up(struct vtnet_softc *); static void vtnet_update_link_status(struct vtnet_softc *); static int vtnet_ifmedia_upd(struct ifnet *); static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); static void vtnet_get_hwaddr(struct vtnet_softc *); static void vtnet_set_hwaddr(struct vtnet_softc *); static void vtnet_vlan_tag_remove(struct mbuf *); static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_rxq *); static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_txq *); static void vtnet_setup_queue_sysctl(struct vtnet_softc *); static void vtnet_setup_sysctl(struct vtnet_softc *); static int vtnet_rxq_enable_intr(struct vtnet_rxq *); static void vtnet_rxq_disable_intr(struct vtnet_rxq *); static int vtnet_txq_enable_intr(struct vtnet_txq *); static void vtnet_txq_disable_intr(struct vtnet_txq *); static void vtnet_enable_rx_interrupts(struct vtnet_softc *); static void vtnet_enable_tx_interrupts(struct vtnet_softc *); static void vtnet_enable_interrupts(struct vtnet_softc *); static void vtnet_disable_rx_interrupts(struct vtnet_softc *); static void vtnet_disable_tx_interrupts(struct vtnet_softc *); static void vtnet_disable_interrupts(struct vtnet_softc *); static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); /* Tunables. */ static int vtnet_csum_disable = 0; TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); static int vtnet_tso_disable = 0; TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); static int vtnet_lro_disable = 0; TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); static int vtnet_mq_disable = 0; TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); static int vtnet_mq_max_pairs = 0; TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); static int vtnet_rx_process_limit = 512; TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); /* * Reducing the number of transmit completed interrupts can improve * performance. To do so, the define below keeps the Tx vq interrupt * disabled and adds calls to vtnet_txeof() in the start and watchdog * paths. The price to pay for this is the m_free'ing of transmitted * mbufs may be delayed until the watchdog fires. * * BMV: Reintroduce this later as a run-time option, if it makes * sense after the EVENT_IDX feature is supported. * * #define VTNET_TX_INTR_MODERATION */ static uma_zone_t vtnet_tx_header_zone; static struct virtio_feature_desc vtnet_feature_desc[] = { { VIRTIO_NET_F_CSUM, "TxChecksum" }, { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, { VIRTIO_NET_F_MAC, "MacAddress" }, { VIRTIO_NET_F_GSO, "TxAllGSO" }, { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" }, { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" }, { VIRTIO_NET_F_GUEST_ECN, "RxECN" }, { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, { VIRTIO_NET_F_STATUS, "Status" }, { VIRTIO_NET_F_CTRL_VQ, "ControlVq" }, { VIRTIO_NET_F_CTRL_RX, "RxMode" }, { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, { VIRTIO_NET_F_MQ, "Multiqueue" }, { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, { 0, NULL } }; static device_method_t vtnet_methods[] = { /* Device methods. */ DEVMETHOD(device_probe, vtnet_probe), DEVMETHOD(device_attach, vtnet_attach), DEVMETHOD(device_detach, vtnet_detach), DEVMETHOD(device_suspend, vtnet_suspend), DEVMETHOD(device_resume, vtnet_resume), DEVMETHOD(device_shutdown, vtnet_shutdown), /* VirtIO methods. */ DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), DEVMETHOD(virtio_config_change, vtnet_config_change), DEVMETHOD_END }; static driver_t vtnet_driver = { "vtnet", vtnet_methods, sizeof(struct vtnet_softc) }; static devclass_t vtnet_devclass; DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); MODULE_VERSION(vtnet, 1); MODULE_DEPEND(vtnet, virtio, 1, 1, 1); static int vtnet_modevent(module_t mod, int type, void *unused) { int error; error = 0; switch (type) { case MOD_LOAD: vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", sizeof(struct vtnet_tx_header), NULL, NULL, NULL, NULL, 0, 0); break; case MOD_QUIESCE: case MOD_UNLOAD: if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) error = EBUSY; else if (type == MOD_UNLOAD) { uma_zdestroy(vtnet_tx_header_zone); vtnet_tx_header_zone = NULL; } break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } static int vtnet_probe(device_t dev) { if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK) return (ENXIO); device_set_desc(dev, "VirtIO Networking Adapter"); return (BUS_PROBE_DEFAULT); } static int vtnet_attach(device_t dev) { struct vtnet_softc *sc; int error; sc = device_get_softc(dev); sc->vtnet_dev = dev; /* Register our feature descriptions. */ virtio_set_feature_desc(dev, vtnet_feature_desc); VTNET_CORE_LOCK_INIT(sc); callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); vtnet_setup_sysctl(sc); vtnet_setup_features(sc); error = vtnet_alloc_rx_filters(sc); if (error) { device_printf(dev, "cannot allocate Rx filters\n"); goto fail; } error = vtnet_alloc_rxtx_queues(sc); if (error) { device_printf(dev, "cannot allocate queues\n"); goto fail; } error = vtnet_alloc_virtqueues(sc); if (error) { device_printf(dev, "cannot allocate virtqueues\n"); goto fail; } error = vtnet_setup_interface(sc); if (error) { device_printf(dev, "cannot setup interface\n"); goto fail; } error = virtio_setup_intr(dev, INTR_TYPE_NET); if (error) { device_printf(dev, "cannot setup virtqueue interrupts\n"); /* BMV: This will crash if during boot! */ ether_ifdetach(sc->vtnet_ifp); goto fail; } vtnet_start_taskqueues(sc); fail: if (error) vtnet_detach(dev); return (error); } static int vtnet_detach(device_t dev) { struct vtnet_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->vtnet_ifp; if (device_is_attached(dev)) { VTNET_CORE_LOCK(sc); vtnet_stop(sc); VTNET_CORE_UNLOCK(sc); callout_drain(&sc->vtnet_tick_ch); vtnet_drain_taskqueues(sc); ether_ifdetach(ifp); } vtnet_free_taskqueues(sc); if (sc->vtnet_vlan_attach != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); sc->vtnet_vlan_attach = NULL; } if (sc->vtnet_vlan_detach != NULL) { EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach); sc->vtnet_vlan_detach = NULL; } ifmedia_removeall(&sc->vtnet_media); if (ifp != NULL) { if_free(ifp); sc->vtnet_ifp = NULL; } vtnet_free_rxtx_queues(sc); vtnet_free_rx_filters(sc); if (sc->vtnet_ctrl_vq != NULL) vtnet_free_ctrl_vq(sc); VTNET_CORE_LOCK_DESTROY(sc); return (0); } static int vtnet_suspend(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); VTNET_CORE_LOCK(sc); vtnet_stop(sc); sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; VTNET_CORE_UNLOCK(sc); return (0); } static int vtnet_resume(device_t dev) { struct vtnet_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->vtnet_ifp; VTNET_CORE_LOCK(sc); if (ifp->if_flags & IFF_UP) vtnet_init_locked(sc); sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; VTNET_CORE_UNLOCK(sc); return (0); } static int vtnet_shutdown(device_t dev) { /* * Suspend already does all of what we need to * do here; we just never expect to be resumed. */ return (vtnet_suspend(dev)); } static int vtnet_attach_completed(device_t dev) { vtnet_attach_disable_promisc(device_get_softc(dev)); return (0); } static int vtnet_config_change(device_t dev) { struct vtnet_softc *sc; sc = device_get_softc(dev); VTNET_CORE_LOCK(sc); vtnet_update_link_status(sc); if (sc->vtnet_link_active != 0) vtnet_tx_start_all(sc); VTNET_CORE_UNLOCK(sc); return (0); } static void vtnet_negotiate_features(struct vtnet_softc *sc) { device_t dev; uint64_t mask, features; dev = sc->vtnet_dev; mask = 0; /* * TSO and LRO are only available when their corresponding checksum * offload feature is also negotiated. */ if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; } if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) mask |= VTNET_TSO_FEATURES; if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) mask |= VTNET_LRO_FEATURES; if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) mask |= VIRTIO_NET_F_MQ; #ifdef VTNET_LEGACY_TX mask |= VIRTIO_NET_F_MQ; #endif features = VTNET_FEATURES & ~mask; sc->vtnet_features = virtio_negotiate_features(dev, features); if (virtio_with_feature(dev, VTNET_LRO_FEATURES) == 0) return; if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) return; /* * LRO without mergeable buffers requires special care. This is not * ideal because every receive buffer must be large enough to hold * the maximum TCP packet, the Ethernet header, and the header. This * requires up to 34 descriptors with MCLBYTES clusters. If we do * not have indirect descriptors, LRO is disabled since the virtqueue * will not contain very many receive buffers. */ if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) { device_printf(dev, "LRO disabled due to both mergeable buffers and indirect " "descriptors not negotiated\n"); features &= ~VTNET_LRO_FEATURES; sc->vtnet_features = virtio_negotiate_features(dev, features); } else sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; } static void vtnet_setup_features(struct vtnet_softc *sc) { device_t dev; int max_pairs, max; dev = sc->vtnet_dev; vtnet_negotiate_features(sc); if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { /* This feature should always be negotiated. */ sc->vtnet_flags |= VTNET_FLAG_MAC; } if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); } else sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; else sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; else sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; } if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { max_pairs = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) max_pairs = 1; } else max_pairs = 1; if (max_pairs > 1) { /* * Limit the maximum number of queue pairs to the number of * CPUs or the configured maximum. The actual number of * queues that get used may be less. */ max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); if (max > 0 && max_pairs > max) max_pairs = max; if (max_pairs > mp_ncpus) max_pairs = mp_ncpus; if (max_pairs > VTNET_MAX_QUEUE_PAIRS) max_pairs = VTNET_MAX_QUEUE_PAIRS; if (max_pairs > 1) sc->vtnet_flags |= VTNET_FLAG_MULTIQ; } sc->vtnet_max_vq_pairs = max_pairs; } static int vtnet_init_rxq(struct vtnet_softc *sc, int id) { struct vtnet_rxq *rxq; rxq = &sc->vtnet_rxqs[id]; snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", device_get_nameunit(sc->vtnet_dev), id); mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); rxq->vtnrx_sc = sc; rxq->vtnrx_id = id; rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); if (rxq->vtnrx_sg == NULL) return (ENOMEM); TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, taskqueue_thread_enqueue, &rxq->vtnrx_tq); return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); } static int vtnet_init_txq(struct vtnet_softc *sc, int id) { struct vtnet_txq *txq; txq = &sc->vtnet_txqs[id]; snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", device_get_nameunit(sc->vtnet_dev), id); mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); txq->vtntx_sc = sc; txq->vtntx_id = id; txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); if (txq->vtntx_sg == NULL) return (ENOMEM); #ifndef VTNET_LEGACY_TX txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, M_NOWAIT, &txq->vtntx_mtx); if (txq->vtntx_br == NULL) return (ENOMEM); TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); #endif TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, taskqueue_thread_enqueue, &txq->vtntx_tq); if (txq->vtntx_tq == NULL) return (ENOMEM); return (0); } static int vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) { int i, npairs, error; npairs = sc->vtnet_max_vq_pairs; sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, M_NOWAIT | M_ZERO); sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) return (ENOMEM); for (i = 0; i < npairs; i++) { error = vtnet_init_rxq(sc, i); if (error) return (error); error = vtnet_init_txq(sc, i); if (error) return (error); } vtnet_setup_queue_sysctl(sc); return (0); } static void vtnet_destroy_rxq(struct vtnet_rxq *rxq) { rxq->vtnrx_sc = NULL; rxq->vtnrx_id = -1; if (rxq->vtnrx_sg != NULL) { sglist_free(rxq->vtnrx_sg); rxq->vtnrx_sg = NULL; } if (mtx_initialized(&rxq->vtnrx_mtx) != 0) mtx_destroy(&rxq->vtnrx_mtx); } static void vtnet_destroy_txq(struct vtnet_txq *txq) { txq->vtntx_sc = NULL; txq->vtntx_id = -1; if (txq->vtntx_sg != NULL) { sglist_free(txq->vtntx_sg); txq->vtntx_sg = NULL; } #ifndef VTNET_LEGACY_TX if (txq->vtntx_br != NULL) { buf_ring_free(txq->vtntx_br, M_DEVBUF); txq->vtntx_br = NULL; } #endif if (mtx_initialized(&txq->vtntx_mtx) != 0) mtx_destroy(&txq->vtntx_mtx); } static void vtnet_free_rxtx_queues(struct vtnet_softc *sc) { int i; if (sc->vtnet_rxqs != NULL) { for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); free(sc->vtnet_rxqs, M_DEVBUF); sc->vtnet_rxqs = NULL; } if (sc->vtnet_txqs != NULL) { for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_destroy_txq(&sc->vtnet_txqs[i]); free(sc->vtnet_txqs, M_DEVBUF); sc->vtnet_txqs = NULL; } } static int vtnet_alloc_rx_filters(struct vtnet_softc *sc) { if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_mac_filter == NULL) return (ENOMEM); } if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vtnet_vlan_filter == NULL) return (ENOMEM); } return (0); } static void vtnet_free_rx_filters(struct vtnet_softc *sc) { if (sc->vtnet_mac_filter != NULL) { free(sc->vtnet_mac_filter, M_DEVBUF); sc->vtnet_mac_filter = NULL; } if (sc->vtnet_vlan_filter != NULL) { free(sc->vtnet_vlan_filter, M_DEVBUF); sc->vtnet_vlan_filter = NULL; } } static int vtnet_alloc_virtqueues(struct vtnet_softc *sc) { device_t dev; struct vq_alloc_info *info; struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i, idx, flags, nvqs, error; dev = sc->vtnet_dev; flags = 0; nvqs = sc->vtnet_max_vq_pairs * 2; if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) nvqs++; info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); if (info == NULL) return (ENOMEM); for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { rxq = &sc->vtnet_rxqs[i]; VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); txq = &sc->vtnet_txqs[i]; VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, vtnet_tx_vq_intr, txq, &txq->vtntx_vq, "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); } if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); } /* * Enable interrupt binding if this is multiqueue. This only matters * when per-vq MSIX is available. */ if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) flags |= 0; error = virtio_alloc_virtqueues(dev, flags, nvqs, info); free(info, M_TEMP); return (error); } static int vtnet_setup_interface(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; int limit; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "cannot allocate ifnet structure\n"); return (ENOSPC); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - if_initbaudrate(ifp, IF_Gbps(10)); /* Approx. */ + ifp->if_baudrate = IF_Gbps(10); /* Approx. */ ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vtnet_init; ifp->if_ioctl = vtnet_ioctl; #ifndef VTNET_LEGACY_TX ifp->if_transmit = vtnet_txq_mq_start; ifp->if_qflush = vtnet_qflush; #else struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; ifp->if_start = vtnet_start; IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1); ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1; IFQ_SET_READY(&ifp->if_snd); #endif ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, vtnet_ifmedia_sts); ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); /* Read (or generate) the MAC address for the adapter. */ vtnet_get_hwaddr(sc); ether_ifattach(ifp, sc->vtnet_hwaddr); if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) ifp->if_capabilities |= IFCAP_LINKSTATE; /* Tell the upper layer(s) we support long frames. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; } else { if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) ifp->if_capabilities |= IFCAP_TSO4; if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) ifp->if_capabilities |= IFCAP_TSO6; if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; } if (ifp->if_capabilities & IFCAP_TSO) ifp->if_capabilities |= IFCAP_VLAN_HWTSO; } if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; if (ifp->if_capabilities & IFCAP_HWCSUM) { /* * VirtIO does not support VLAN tagging, but we can fake * it by inserting and removing the 802.1Q header during * transmit and receive. We are then able to do checksum * offloading of VLAN frames. */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; } ifp->if_capenable = ifp->if_capabilities; /* * Capabilities after here are not enabled by default. */ if (ifp->if_capabilities & IFCAP_RXCSUM) { if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) ifp->if_capabilities |= IFCAP_LRO; } if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); } limit = vtnet_tunable_int(sc, "rx_process_limit", vtnet_rx_process_limit); if (limit < 0) limit = INT_MAX; sc->vtnet_rx_process_limit = limit; return (0); } static int vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) { struct ifnet *ifp; int frame_size, clsize; ifp = sc->vtnet_ifp; if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) return (EINVAL); frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + new_mtu; /* * Based on the new MTU (and hence frame size) determine which * cluster size is most appropriate for the receive queues. */ if (frame_size <= MCLBYTES) { clsize = MCLBYTES; } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { /* Avoid going past 9K jumbos. */ if (frame_size > MJUM9BYTES) return (EINVAL); clsize = MJUM9BYTES; } else clsize = MJUMPAGESIZE; ifp->if_mtu = new_mtu; sc->vtnet_rx_new_clsize = clsize; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } return (0); } static int vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct vtnet_softc *sc; struct ifreq *ifr; int reinit, mask, error; sc = ifp->if_softc; ifr = (struct ifreq *) data; error = 0; switch (cmd) { case SIOCSIFMTU: if (ifp->if_mtu != ifr->ifr_mtu) { VTNET_CORE_LOCK(sc); error = vtnet_change_mtu(sc, ifr->ifr_mtu); VTNET_CORE_UNLOCK(sc); } break; case SIOCSIFFLAGS: VTNET_CORE_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vtnet_stop(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ sc->vtnet_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) vtnet_rx_filter(sc); else error = ENOTSUP; } } else vtnet_init_locked(sc); if (error == 0) sc->vtnet_if_flags = ifp->if_flags; VTNET_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) break; VTNET_CORE_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) vtnet_rx_filter_mac(sc); VTNET_CORE_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); break; case SIOCSIFCAP: VTNET_CORE_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) ifp->if_capenable ^= IFCAP_TXCSUM; if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | IFCAP_VLAN_HWFILTER)) { /* These Rx features require us to renegotiate. */ reinit = 1; if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; } else reinit = 0; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } VTNET_CORE_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, cmd, data); break; } VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); return (error); } static int vtnet_rxq_populate(struct vtnet_rxq *rxq) { struct virtqueue *vq; int nbufs, error; vq = rxq->vtnrx_vq; error = ENOSPC; for (nbufs = 0; !virtqueue_full(vq); nbufs++) { error = vtnet_rxq_new_buf(rxq); if (error) break; } if (nbufs > 0) { virtqueue_notify(vq); /* * EMSGSIZE signifies the virtqueue did not have enough * entries available to hold the last mbuf. This is not * an error. */ if (error == EMSGSIZE) error = 0; } return (error); } static void vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) { struct virtqueue *vq; struct mbuf *m; int last; vq = rxq->vtnrx_vq; last = 0; while ((m = virtqueue_drain(vq, &last)) != NULL) m_freem(m); KASSERT(virtqueue_empty(vq), ("%s: mbufs remaining in rx queue %p", __func__, rxq)); } static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) { struct mbuf *m_head, *m_tail, *m; int i, clsize; clsize = sc->vtnet_rx_clsize; KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); if (m_head == NULL) goto fail; m_head->m_len = clsize; m_tail = m_head; /* Allocate the rest of the chain. */ for (i = 1; i < nbufs; i++) { m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); if (m == NULL) goto fail; m->m_len = clsize; m_tail->m_next = m; m_tail = m; } if (m_tailp != NULL) *m_tailp = m_tail; return (m_head); fail: sc->vtnet_stats.mbuf_alloc_failed++; m_freem(m_head); return (NULL); } /* * Slow path for when LRO without mergeable buffers is negotiated. */ static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, int len0) { struct vtnet_softc *sc; struct mbuf *m, *m_prev; struct mbuf *m_new, *m_tail; int len, clsize, nreplace, error; sc = rxq->vtnrx_sc; clsize = sc->vtnet_rx_clsize; m_prev = NULL; m_tail = NULL; nreplace = 0; m = m0; len = len0; /* * Since these mbuf chains are so large, we avoid allocating an * entire replacement chain if possible. When the received frame * did not consume the entire chain, the unused mbufs are moved * to the replacement chain. */ while (len > 0) { /* * Something is seriously wrong if we received a frame * larger than the chain. Drop it. */ if (m == NULL) { sc->vtnet_stats.rx_frame_too_large++; return (EMSGSIZE); } /* We always allocate the same cluster size. */ KASSERT(m->m_len == clsize, ("%s: mbuf size %d is not the cluster size %d", __func__, m->m_len, clsize)); m->m_len = MIN(m->m_len, len); len -= m->m_len; m_prev = m; m = m->m_next; nreplace++; } KASSERT(nreplace <= sc->vtnet_rx_nmbufs, ("%s: too many replacement mbufs %d max %d", __func__, nreplace, sc->vtnet_rx_nmbufs)); m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); if (m_new == NULL) { m_prev->m_len = clsize; return (ENOBUFS); } /* * Move any unused mbufs from the received chain onto the end * of the new chain. */ if (m_prev->m_next != NULL) { m_tail->m_next = m_prev->m_next; m_prev->m_next = NULL; } error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* * BAD! We could not enqueue the replacement mbuf chain. We * must restore the m0 chain to the original state if it was * modified so we can subsequently discard it. * * NOTE: The replacement is suppose to be an identical copy * to the one just dequeued so this is an unexpected error. */ sc->vtnet_stats.rx_enq_replacement_failed++; if (m_tail->m_next != NULL) { m_prev->m_next = m_tail->m_next; m_tail->m_next = NULL; } m_prev->m_len = clsize; m_freem(m_new); } return (error); } static int vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) { struct vtnet_softc *sc; struct mbuf *m_new; int error; sc = rxq->vtnrx_sc; KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, ("%s: chained mbuf without LRO_NOMRG", __func__)); if (m->m_next == NULL) { /* Fast-path for the common case of just one mbuf. */ if (m->m_len < len) return (EINVAL); m_new = vtnet_rx_alloc_buf(sc, 1, NULL); if (m_new == NULL) return (ENOBUFS); error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* * The new mbuf is suppose to be an identical * copy of the one just dequeued so this is an * unexpected error. */ m_freem(m_new); sc->vtnet_stats.rx_enq_replacement_failed++; } else m->m_len = len; } else error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); return (error); } static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) { struct vtnet_softc *sc; struct sglist *sg; struct vtnet_rx_header *rxhdr; uint8_t *mdata; int offset, error; sc = rxq->vtnrx_sc; sg = rxq->vtnrx_sg; mdata = mtod(m, uint8_t *); VTNET_RXQ_LOCK_ASSERT(rxq); KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, ("%s: chained mbuf without LRO_NOMRG", __func__)); KASSERT(m->m_len == sc->vtnet_rx_clsize, ("%s: unexpected cluster size %d/%d", __func__, m->m_len, sc->vtnet_rx_clsize)); sglist_reset(sg); if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); rxhdr = (struct vtnet_rx_header *) mdata; sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); offset = sizeof(struct vtnet_rx_header); } else offset = 0; sglist_append(sg, mdata + offset, m->m_len - offset); if (m->m_next != NULL) { error = sglist_append_mbuf(sg, m->m_next); MPASS(error == 0); } error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); return (error); } static int vtnet_rxq_new_buf(struct vtnet_rxq *rxq) { struct vtnet_softc *sc; struct mbuf *m; int error; sc = rxq->vtnrx_sc; m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); if (m == NULL) return (ENOBUFS); error = vtnet_rxq_enqueue_buf(rxq, m); if (error) m_freem(m); return (error); } /* * Use the checksum offset in the VirtIO header to set the * correct CSUM_* flags. */ static int vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; #if defined(INET) || defined(INET6) int offset = hdr->csum_start + hdr->csum_offset; #endif sc = rxq->vtnrx_sc; /* Only do a basic sanity check on the offset. */ switch (eth_type) { #if defined(INET) case ETHERTYPE_IP: if (__predict_false(offset < ip_start + sizeof(struct ip))) return (1); break; #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) return (1); break; #endif default: sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } /* * Use the offset to determine the appropriate CSUM_* flags. This is * a bit dirty, but we can get by with it since the checksum offsets * happen to be different. We assume the host host does not do IPv4 * header checksum offloading. */ switch (hdr->csum_offset) { case offsetof(struct udphdr, uh_sum): case offsetof(struct tcphdr, th_sum): m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case offsetof(struct sctphdr, checksum): m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; default: sc->vtnet_stats.rx_csum_bad_offset++; return (1); } return (0); } static int vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; int offset, proto; sc = rxq->vtnrx_sc; switch (eth_type) { #if defined(INET) case ETHERTYPE_IP: { struct ip *ip; if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) return (1); ip = (struct ip *)(m->m_data + ip_start); proto = ip->ip_p; offset = ip_start + (ip->ip_hl << 2); break; } #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(m->m_len < ip_start + sizeof(struct ip6_hdr))) return (1); offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); if (__predict_false(offset < 0)) return (1); break; #endif default: sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } switch (proto) { case IPPROTO_TCP: if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case IPPROTO_UDP: if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; case IPPROTO_SCTP: if (__predict_false(m->m_len < offset + sizeof(struct sctphdr))) return (1); m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; default: /* * For the remaining protocols, FreeBSD does not support * checksum offloading, so the checksum will be recomputed. */ #if 0 if_printf(sc->vtnet_ifp, "cksum offload of unsupported " "protocol eth_type=%#x proto=%d csum_start=%d " "csum_offset=%d\n", __func__, eth_type, proto, hdr->csum_start, hdr->csum_offset); #endif break; } return (0); } /* * Set the appropriate CSUM_* flags. Unfortunately, the information * provided is not directly useful to us. The VirtIO header gives the * offset of the checksum, which is all Linux needs, but this is not * how FreeBSD does things. We are forced to peek inside the packet * a bit. * * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD * could accept the offsets and let the stack figure it out. */ static int vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct ether_header *eh; struct ether_vlan_header *evh; uint16_t eth_type; int offset, error; eh = mtod(m, struct ether_header *); eth_type = ntohs(eh->ether_type); if (eth_type == ETHERTYPE_VLAN) { /* BMV: We should handle nested VLAN tags too. */ evh = mtod(m, struct ether_vlan_header *); eth_type = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else offset = sizeof(struct ether_header); if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); else error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); return (error); } static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) { struct mbuf *m; while (--nbufs > 0) { m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); if (m == NULL) break; vtnet_rxq_discard_buf(rxq, m); } } static void vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) { int error; /* * Requeue the discarded mbuf. This should always be successful * since it was just dequeued. */ error = vtnet_rxq_enqueue_buf(rxq, m); KASSERT(error == 0, ("%s: cannot requeue discarded mbuf %d", __func__, error)); } static int vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) { struct vtnet_softc *sc; struct ifnet *ifp; struct virtqueue *vq; struct mbuf *m, *m_tail; int len; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; ifp = sc->vtnet_ifp; m_tail = m_head; while (--nbufs > 0) { m = virtqueue_dequeue(vq, &len); if (m == NULL) { rxq->vtnrx_stats.vrxs_ierrors++; goto fail; } if (vtnet_rxq_new_buf(rxq) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) vtnet_rxq_discard_merged_bufs(rxq, nbufs); goto fail; } if (m->m_len < len) len = m->m_len; m->m_len = len; m->m_flags &= ~M_PKTHDR; m_head->m_pkthdr.len += len; m_tail->m_next = m; m_tail = m; } return (0); fail: sc->vtnet_stats.rx_mergeable_failed++; m_freem(m_head); return (1); } static void vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; struct ifnet *ifp; struct ether_header *eh; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { vtnet_vlan_tag_remove(m); /* * With the 802.1Q header removed, update the * checksum starting location accordingly. */ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; } } m->m_pkthdr.flowid = rxq->vtnrx_id; m->m_flags |= M_FLOWID; /* * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum * distinction that Linux does. Need to reevaluate if performing * offloading for the NEEDS_CSUM case is really appropriate. */ if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { if (vtnet_rxq_csum(rxq, m, hdr) == 0) rxq->vtnrx_stats.vrxs_csum++; else rxq->vtnrx_stats.vrxs_csum_failed++; } rxq->vtnrx_stats.vrxs_ipackets++; rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; VTNET_RXQ_UNLOCK(rxq); (*ifp->if_input)(ifp, m); VTNET_RXQ_LOCK(rxq); } static int vtnet_rxq_eof(struct vtnet_rxq *rxq) { struct virtio_net_hdr lhdr, *hdr; struct vtnet_softc *sc; struct ifnet *ifp; struct virtqueue *vq; struct mbuf *m; struct virtio_net_hdr_mrg_rxbuf *mhdr; int len, deq, nbufs, adjsz, count; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; ifp = sc->vtnet_ifp; hdr = &lhdr; deq = 0; count = sc->vtnet_rx_process_limit; VTNET_RXQ_LOCK_ASSERT(rxq); while (count-- > 0) { m = virtqueue_dequeue(vq, &len); if (m == NULL) break; deq++; if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { rxq->vtnrx_stats.vrxs_ierrors++; vtnet_rxq_discard_buf(rxq, m); continue; } if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { nbufs = 1; adjsz = sizeof(struct vtnet_rx_header); /* * Account for our pad inserted between the header * and the actual start of the frame. */ len += VTNET_RX_HEADER_PAD; } else { mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *); nbufs = mhdr->num_buffers; adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); } if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) vtnet_rxq_discard_merged_bufs(rxq, nbufs); continue; } m->m_pkthdr.len = len; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.csum_flags = 0; if (nbufs > 1) { /* Dequeue the rest of chain. */ if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) continue; } /* * Save copy of header before we strip it. For both mergeable * and non-mergeable, the header is at the beginning of the * mbuf data. We no longer need num_buffers, so always use a * regular header. * * BMV: Is this memcpy() expensive? We know the mbuf data is * still valid even after the m_adj(). */ memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); m_adj(m, adjsz); vtnet_rxq_input(rxq, m, hdr); /* Must recheck after dropping the Rx lock. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (deq > 0) virtqueue_notify(vq); return (count > 0 ? 0 : EAGAIN); } static void vtnet_rx_vq_intr(void *xrxq) { struct vtnet_softc *sc; struct vtnet_rxq *rxq; struct ifnet *ifp; int tries, more; rxq = xrxq; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; tries = 0; if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { /* * Ignore this interrupt. Either this is a spurious interrupt * or multiqueue without per-VQ MSIX so every queue needs to * be polled (a brain dead configuration we could try harder * to avoid). */ vtnet_rxq_disable_intr(rxq); return; } VTNET_RXQ_LOCK(rxq); again: if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_RXQ_UNLOCK(rxq); return; } more = vtnet_rxq_eof(rxq); if (more || vtnet_rxq_enable_intr(rxq) != 0) { if (!more) vtnet_rxq_disable_intr(rxq); /* * This is an occasional condition or race (when !more), * so retry a few times before scheduling the taskqueue. */ if (tries++ < VTNET_INTR_DISABLE_RETRIES) goto again; VTNET_RXQ_UNLOCK(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } else VTNET_RXQ_UNLOCK(rxq); } static void vtnet_rxq_tq_intr(void *xrxq, int pending) { struct vtnet_softc *sc; struct vtnet_rxq *rxq; struct ifnet *ifp; int more; rxq = xrxq; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; VTNET_RXQ_LOCK(rxq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_RXQ_UNLOCK(rxq); return; } more = vtnet_rxq_eof(rxq); if (more || vtnet_rxq_enable_intr(rxq) != 0) { if (!more) vtnet_rxq_disable_intr(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } VTNET_RXQ_UNLOCK(rxq); } static void vtnet_txq_free_mbufs(struct vtnet_txq *txq) { struct virtqueue *vq; struct vtnet_tx_header *txhdr; int last; vq = txq->vtntx_vq; last = 0; while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { m_freem(txhdr->vth_mbuf); uma_zfree(vtnet_tx_header_zone, txhdr); } KASSERT(virtqueue_empty(vq), ("%s: mbufs remaining in tx queue %p", __func__, txq)); } /* * BMV: Much of this can go away once we finally have offsets in * the mbuf packet header. Bug andre@. */ static int vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype, int *proto, int *start) { struct vtnet_softc *sc; struct ether_vlan_header *evh; int offset; sc = txq->vtntx_sc; evh = mtod(m, struct ether_vlan_header *); if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { /* BMV: We should handle nested VLAN tags too. */ *etype = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else { *etype = ntohs(evh->evl_encap_proto); offset = sizeof(struct ether_header); } switch (*etype) { #if defined(INET) case ETHERTYPE_IP: { struct ip *ip, iphdr; if (__predict_false(m->m_len < offset + sizeof(struct ip))) { m_copydata(m, offset, sizeof(struct ip), (caddr_t) &iphdr); ip = &iphdr; } else ip = (struct ip *)(m->m_data + offset); *proto = ip->ip_p; *start = offset + (ip->ip_hl << 2); break; } #endif #if defined(INET6) case ETHERTYPE_IPV6: *proto = -1; *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); /* Assert the network stack sent us a valid packet. */ KASSERT(*start > offset, ("%s: mbuf %p start %d offset %d proto %d", __func__, m, *start, offset, *proto)); break; #endif default: sc->vtnet_stats.tx_csum_bad_ethtype++; return (EINVAL); } return (0); } static int vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, int offset, struct virtio_net_hdr *hdr) { static struct timeval lastecn; static int curecn; struct vtnet_softc *sc; struct tcphdr *tcp, tcphdr; sc = txq->vtntx_sc; if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); tcp = &tcphdr; } else tcp = (struct tcphdr *)(m->m_data + offset); hdr->hdr_len = offset + (tcp->th_off << 2); hdr->gso_size = m->m_pkthdr.tso_segsz; hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : VIRTIO_NET_HDR_GSO_TCPV6; if (tcp->th_flags & TH_CWR) { /* * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, * ECN support is not on a per-interface basis, but globally via * the net.inet.tcp.ecn.enable sysctl knob. The default is off. */ if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { if (ppsratecheck(&lastecn, &curecn, 1)) if_printf(sc->vtnet_ifp, "TSO with ECN not negotiated with host\n"); return (ENOTSUP); } hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } txq->vtntx_stats.vtxs_tso++; return (0); } static struct mbuf * vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; int flags, etype, csum_start, proto, error; sc = txq->vtntx_sc; flags = m->m_pkthdr.csum_flags; error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); if (error) goto drop; if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { /* * We could compare the IP protocol vs the CSUM_ flag too, * but that really should not be necessary. */ hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; hdr->csum_start = csum_start; hdr->csum_offset = m->m_pkthdr.csum_data; txq->vtntx_stats.vtxs_csum++; } if (flags & CSUM_TSO) { if (__predict_false(proto != IPPROTO_TCP)) { /* Likely failed to correctly parse the mbuf. */ sc->vtnet_stats.tx_tso_not_tcp++; goto drop; } KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, ("%s: mbuf %p TSO without checksum offload %#x", __func__, m, flags)); error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); if (error) goto drop; } return (m); drop: m_freem(m); return (NULL); } static int vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, struct vtnet_tx_header *txhdr) { struct vtnet_softc *sc; struct virtqueue *vq; struct sglist *sg; struct mbuf *m; int error; sc = txq->vtntx_sc; vq = txq->vtntx_vq; sg = txq->vtntx_sg; m = *m_head; sglist_reset(sg); error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); KASSERT(error == 0 && sg->sg_nseg == 1, ("%s: error %d adding header to sglist", __func__, error)); error = sglist_append_mbuf(sg, m); if (error) { m = m_defrag(m, M_NOWAIT); if (m == NULL) goto fail; *m_head = m; sc->vtnet_stats.tx_defragged++; error = sglist_append_mbuf(sg, m); if (error) goto fail; } txhdr->vth_mbuf = m; error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); return (error); fail: sc->vtnet_stats.tx_defrag_failed++; m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } static int vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head) { struct vtnet_softc *sc; struct vtnet_tx_header *txhdr; struct virtio_net_hdr *hdr; struct mbuf *m; int error; sc = txq->vtntx_sc; m = *m_head; M_ASSERTPKTHDR(m); txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO); if (txhdr == NULL) { m_freem(m); *m_head = NULL; return (ENOMEM); } /* * Always use the non-mergeable header, regardless if the feature * was negotiated. For transmit, num_buffers is always zero. The * vtnet_hdr_size is used to enqueue the correct header size. */ hdr = &txhdr->vth_uhdr.hdr; if (m->m_flags & M_VLANTAG) { m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); if ((*m_head = m) == NULL) { error = ENOBUFS; goto fail; } m->m_flags &= ~M_VLANTAG; } if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { m = vtnet_txq_offload(txq, m, hdr); if ((*m_head = m) == NULL) { error = ENOBUFS; goto fail; } } error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); if (error == 0) return (0); fail: uma_zfree(vtnet_tx_header_zone, txhdr); return (error); } #ifdef VTNET_LEGACY_TX static void vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) { struct vtnet_softc *sc; struct virtqueue *vq; struct mbuf *m0; int enq; sc = txq->vtntx_sc; vq = txq->vtntx_vq; enq = 0; VTNET_TXQ_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->vtnet_link_active == 0) return; vtnet_txq_eof(txq); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (virtqueue_full(vq)) break; IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; if (vtnet_txq_encap(txq, &m0) != 0) { if (m0 != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m0); break; } enq++; ETHER_BPF_MTAP(ifp, m0); } if (enq > 0) { virtqueue_notify(vq); txq->vtntx_watchdog = VTNET_TX_TIMEOUT; } } static void vtnet_start(struct ifnet *ifp) { struct vtnet_softc *sc; struct vtnet_txq *txq; sc = ifp->if_softc; txq = &sc->vtnet_txqs[0]; VTNET_TXQ_LOCK(txq); vtnet_start_locked(txq, ifp); VTNET_TXQ_UNLOCK(txq); } #else /* !VTNET_LEGACY_TX */ static int vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) { struct vtnet_softc *sc; struct virtqueue *vq; struct buf_ring *br; struct ifnet *ifp; int enq, error; sc = txq->vtntx_sc; vq = txq->vtntx_vq; br = txq->vtntx_br; ifp = sc->vtnet_ifp; enq = 0; error = 0; VTNET_TXQ_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->vtnet_link_active == 0) { if (m != NULL) error = drbr_enqueue(ifp, br, m); return (error); } if (m != NULL) { error = drbr_enqueue(ifp, br, m); if (error) return (error); } vtnet_txq_eof(txq); while ((m = drbr_peek(ifp, br)) != NULL) { if (virtqueue_full(vq)) { drbr_putback(ifp, br, m); error = ENOBUFS; break; } error = vtnet_txq_encap(txq, &m); if (error) { if (m != NULL) drbr_putback(ifp, br, m); else drbr_advance(ifp, br); break; } drbr_advance(ifp, br); enq++; ETHER_BPF_MTAP(ifp, m); } if (enq > 0) { virtqueue_notify(vq); txq->vtntx_watchdog = VTNET_TX_TIMEOUT; } return (error); } static int vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m) { struct vtnet_softc *sc; struct vtnet_txq *txq; int i, npairs, error; sc = ifp->if_softc; npairs = sc->vtnet_act_vq_pairs; if (m->m_flags & M_FLOWID) i = m->m_pkthdr.flowid % npairs; else i = curcpu % npairs; txq = &sc->vtnet_txqs[i]; if (VTNET_TXQ_TRYLOCK(txq) != 0) { error = vtnet_txq_mq_start_locked(txq, m); VTNET_TXQ_UNLOCK(txq); } else { error = drbr_enqueue(ifp, txq->vtntx_br, m); taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); } return (error); } static void vtnet_txq_tq_deferred(void *xtxq, int pending) { struct vtnet_softc *sc; struct vtnet_txq *txq; txq = xtxq; sc = txq->vtntx_sc; VTNET_TXQ_LOCK(txq); if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) vtnet_txq_mq_start_locked(txq, NULL); VTNET_TXQ_UNLOCK(txq); } #endif /* VTNET_LEGACY_TX */ static void vtnet_txq_start(struct vtnet_txq *txq) { struct vtnet_softc *sc; struct ifnet *ifp; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; #ifdef VTNET_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) vtnet_start_locked(txq, ifp); #else if (!drbr_empty(ifp, txq->vtntx_br)) vtnet_txq_mq_start_locked(txq, NULL); #endif } static void vtnet_txq_tq_intr(void *xtxq, int pending) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct ifnet *ifp; txq = xtxq; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; VTNET_TXQ_LOCK(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_TXQ_UNLOCK(txq); return; } vtnet_txq_eof(txq); vtnet_txq_start(txq); if (vtnet_txq_enable_intr(txq) != 0) { vtnet_txq_disable_intr(txq); txq->vtntx_stats.vtxs_rescheduled++; taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); } VTNET_TXQ_UNLOCK(txq); } static void vtnet_txq_eof(struct vtnet_txq *txq) { struct virtqueue *vq; struct vtnet_tx_header *txhdr; struct mbuf *m; vq = txq->vtntx_vq; VTNET_TXQ_LOCK_ASSERT(txq); while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { m = txhdr->vth_mbuf; txq->vtntx_stats.vtxs_opackets++; txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; if (m->m_flags & M_MCAST) txq->vtntx_stats.vtxs_omcasts++; m_freem(m); uma_zfree(vtnet_tx_header_zone, txhdr); } if (virtqueue_empty(vq)) txq->vtntx_watchdog = 0; } static void vtnet_tx_vq_intr(void *xtxq) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct ifnet *ifp; int tries; txq = xtxq; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; tries = 0; if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { /* * Ignore this interrupt. Either this is a spurious interrupt * or multiqueue without per-VQ MSIX so every queue needs to * be polled (a brain dead configuration we could try harder * to avoid). */ vtnet_txq_disable_intr(txq); return; } VTNET_TXQ_LOCK(txq); again: if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_TXQ_UNLOCK(txq); return; } vtnet_txq_eof(txq); vtnet_txq_start(txq); if (vtnet_txq_enable_intr(txq) != 0) { vtnet_txq_disable_intr(txq); /* * This is an occasional race, so retry a few times * before scheduling the taskqueue. */ if (tries++ < VTNET_INTR_DISABLE_RETRIES) goto again; VTNET_TXQ_UNLOCK(txq); txq->vtntx_stats.vtxs_rescheduled++; taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); } else VTNET_TXQ_UNLOCK(txq); } static void vtnet_tx_start_all(struct vtnet_softc *sc) { struct vtnet_txq *txq; int i; VTNET_CORE_LOCK_ASSERT(sc); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); vtnet_txq_start(txq); VTNET_TXQ_UNLOCK(txq); } } #ifndef VTNET_LEGACY_TX static void vtnet_qflush(struct ifnet *ifp) { struct vtnet_softc *sc; struct vtnet_txq *txq; struct mbuf *m; int i; sc = ifp->if_softc; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) m_freem(m); VTNET_TXQ_UNLOCK(txq); } if_qflush(ifp); } #endif static int vtnet_watchdog(struct vtnet_txq *txq) { struct vtnet_softc *sc; sc = txq->vtntx_sc; VTNET_TXQ_LOCK(txq); if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX) vtnet_txq_eof(txq); if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { VTNET_TXQ_UNLOCK(txq); return (0); } VTNET_TXQ_UNLOCK(txq); if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); return (1); } static void vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum) { struct vtnet_rxq_stats *st; st = &rxq->vtnrx_stats; accum->vrxs_ipackets += st->vrxs_ipackets; accum->vrxs_ibytes += st->vrxs_ibytes; accum->vrxs_iqdrops += st->vrxs_iqdrops; accum->vrxs_csum += st->vrxs_csum; accum->vrxs_csum_failed += st->vrxs_csum_failed; accum->vrxs_rescheduled += st->vrxs_rescheduled; } static void vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum) { struct vtnet_txq_stats *st; st = &txq->vtntx_stats; accum->vtxs_opackets += st->vtxs_opackets; accum->vtxs_obytes += st->vtxs_obytes; accum->vtxs_csum += st->vtxs_csum; accum->vtxs_tso += st->vtxs_tso; accum->vtxs_rescheduled += st->vtxs_rescheduled; } static void vtnet_accumulate_stats(struct vtnet_softc *sc) { struct ifnet *ifp; struct vtnet_statistics *st; struct vtnet_rxq_stats rxaccum; struct vtnet_txq_stats txaccum; int i; ifp = sc->vtnet_ifp; st = &sc->vtnet_stats; bzero(&rxaccum, sizeof(struct vtnet_rxq_stats)); bzero(&txaccum, sizeof(struct vtnet_txq_stats)); for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum); vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum); } st->rx_csum_offloaded = rxaccum.vrxs_csum; st->rx_csum_failed = rxaccum.vrxs_csum_failed; st->rx_task_rescheduled = rxaccum.vrxs_rescheduled; st->tx_csum_offloaded = txaccum.vtxs_csum; st->tx_tso_offloaded = txaccum.vtxs_tso; st->tx_task_rescheduled = txaccum.vtxs_rescheduled; /* * With the exception of if_ierrors, these ifnet statistics are * only updated in the driver, so just set them to our accumulated * values. if_ierrors is updated in ether_input() for malformed * frames that we should have already discarded. */ ifp->if_ipackets = rxaccum.vrxs_ipackets; ifp->if_iqdrops = rxaccum.vrxs_iqdrops; ifp->if_ierrors = rxaccum.vrxs_ierrors; ifp->if_opackets = txaccum.vtxs_opackets; #ifndef VTNET_LEGACY_TX ifp->if_obytes = txaccum.vtxs_obytes; ifp->if_omcasts = txaccum.vtxs_omcasts; #endif } static void vtnet_tick(void *xsc) { struct vtnet_softc *sc; struct ifnet *ifp; int i, timedout; sc = xsc; ifp = sc->vtnet_ifp; timedout = 0; VTNET_CORE_LOCK_ASSERT(sc); vtnet_accumulate_stats(sc); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); if (timedout != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } else callout_schedule(&sc->vtnet_tick_ch, hz); } static void vtnet_start_taskqueues(struct vtnet_softc *sc) { device_t dev; struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i, error; dev = sc->vtnet_dev; /* * Errors here are very difficult to recover from - we cannot * easily fail because, if this is during boot, we will hang * when freeing any successfully started taskqueues because * the scheduler isn't up yet. * * Most drivers just ignore the return value - it only fails * with ENOMEM so an error is not likely. */ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); if (error) { device_printf(dev, "failed to start rx taskq %d\n", rxq->vtnrx_id); } txq = &sc->vtnet_txqs[i]; error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); if (error) { device_printf(dev, "failed to start tx taskq %d\n", txq->vtntx_id); } } } static void vtnet_free_taskqueues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; if (rxq->vtnrx_tq != NULL) { taskqueue_free(rxq->vtnrx_tq); rxq->vtnrx_vq = NULL; } txq = &sc->vtnet_txqs[i]; if (txq->vtntx_tq != NULL) { taskqueue_free(txq->vtntx_tq); txq->vtntx_tq = NULL; } } } static void vtnet_drain_taskqueues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; if (rxq->vtnrx_tq != NULL) taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); txq = &sc->vtnet_txqs[i]; if (txq->vtntx_tq != NULL) { taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); #ifndef VTNET_LEGACY_TX taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); #endif } } } static void vtnet_drain_rxtx_queues(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; vtnet_rxq_free_mbufs(rxq); txq = &sc->vtnet_txqs[i]; vtnet_txq_free_mbufs(txq); } } static void vtnet_stop_rendezvous(struct vtnet_softc *sc) { struct vtnet_rxq *rxq; struct vtnet_txq *txq; int i; /* * Lock and unlock the per-queue mutex so we known the stop * state is visible. Doing only the active queues should be * sufficient, but it does not cost much extra to do all the * queues. Note we hold the core mutex here too. */ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; VTNET_RXQ_LOCK(rxq); VTNET_RXQ_UNLOCK(rxq); txq = &sc->vtnet_txqs[i]; VTNET_TXQ_LOCK(txq); VTNET_TXQ_UNLOCK(txq); } } static void vtnet_stop(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sc->vtnet_link_active = 0; callout_stop(&sc->vtnet_tick_ch); /* Only advisory. */ vtnet_disable_interrupts(sc); /* * Stop the host adapter. This resets it to the pre-initialized * state. It will not generate any interrupts until after it is * reinitialized. */ virtio_stop(dev); vtnet_stop_rendezvous(sc); /* Free any mbufs left in the virtqueues. */ vtnet_drain_rxtx_queues(sc); } static int vtnet_virtio_reinit(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; uint64_t features; int mask, error; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; features = sc->vtnet_features; mask = 0; #if defined(INET) mask |= IFCAP_RXCSUM; #endif #if defined (INET6) mask |= IFCAP_RXCSUM_IPV6; #endif /* * Re-negotiate with the host, removing any disabled receive * features. Transmit features are disabled only on our side * via if_capenable and if_hwassist. */ if (ifp->if_capabilities & mask) { /* * We require both IPv4 and IPv6 offloading to be enabled * in order to negotiated it: VirtIO does not distinguish * between the two. */ if ((ifp->if_capenable & mask) != mask) features &= ~VIRTIO_NET_F_GUEST_CSUM; } if (ifp->if_capabilities & IFCAP_LRO) { if ((ifp->if_capenable & IFCAP_LRO) == 0) features &= ~VTNET_LRO_FEATURES; } if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) { if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) features &= ~VIRTIO_NET_F_CTRL_VLAN; } error = virtio_reinit(dev, features); if (error) device_printf(dev, "virtio reinit error %d\n", error); return (error); } static void vtnet_init_rx_filters(struct vtnet_softc *sc) { struct ifnet *ifp; ifp = sc->vtnet_ifp; if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { /* Restore promiscuous and all-multicast modes. */ vtnet_rx_filter(sc); /* Restore filtered MAC addresses. */ vtnet_rx_filter_mac(sc); } if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) vtnet_rx_filter_vlan(sc); } static int vtnet_init_rx_queues(struct vtnet_softc *sc) { device_t dev; struct vtnet_rxq *rxq; int i, clsize, error; dev = sc->vtnet_dev; /* * Use the new cluster size if one has been set (via a MTU * change). Otherwise, use the standard 2K clusters. * * BMV: It might make sense to use page sized clusters as * the default (depending on the features negotiated). */ if (sc->vtnet_rx_new_clsize != 0) { clsize = sc->vtnet_rx_new_clsize; sc->vtnet_rx_new_clsize = 0; } else clsize = MCLBYTES; sc->vtnet_rx_clsize = clsize; sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, ("%s: too many rx mbufs %d for %d segments", __func__, sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; /* Hold the lock to satisfy asserts. */ VTNET_RXQ_LOCK(rxq); error = vtnet_rxq_populate(rxq); VTNET_RXQ_UNLOCK(rxq); if (error) { device_printf(dev, "cannot allocate mbufs for Rx queue %d\n", i); return (error); } } return (0); } static int vtnet_init_tx_queues(struct vtnet_softc *sc) { struct vtnet_txq *txq; int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; txq->vtntx_watchdog = 0; } return (0); } static int vtnet_init_rxtx_queues(struct vtnet_softc *sc) { int error; error = vtnet_init_rx_queues(sc); if (error) return (error); error = vtnet_init_tx_queues(sc); if (error) return (error); return (0); } static void vtnet_set_active_vq_pairs(struct vtnet_softc *sc) { device_t dev; int npairs; dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { MPASS(sc->vtnet_max_vq_pairs == 1); sc->vtnet_act_vq_pairs = 1; return; } /* BMV: Just use the maximum configured for now. */ npairs = sc->vtnet_max_vq_pairs; if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { device_printf(dev, "cannot set active queue pairs to %d\n", npairs); npairs = 1; } sc->vtnet_act_vq_pairs = npairs; } static int vtnet_reinit(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; int error; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; /* Use the current MAC address. */ bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); vtnet_set_hwaddr(sc); vtnet_set_active_vq_pairs(sc); ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */ if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) vtnet_init_rx_filters(sc); error = vtnet_init_rxtx_queues(sc); if (error) return (error); vtnet_enable_interrupts(sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; return (0); } static void vtnet_init_locked(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; vtnet_stop(sc); /* Reinitialize with the host. */ if (vtnet_virtio_reinit(sc) != 0) goto fail; if (vtnet_reinit(sc) != 0) goto fail; virtio_reinit_complete(dev); vtnet_update_link_status(sc); callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); return; fail: vtnet_stop(sc); } static void vtnet_init(void *xsc) { struct vtnet_softc *sc; sc = xsc; VTNET_CORE_LOCK(sc); vtnet_init_locked(sc); VTNET_CORE_UNLOCK(sc); } static void vtnet_free_ctrl_vq(struct vtnet_softc *sc) { struct virtqueue *vq; vq = sc->vtnet_ctrl_vq; /* * The control virtqueue is only polled and therefore it should * already be empty. */ KASSERT(virtqueue_empty(vq), ("%s: ctrl vq %p not empty", __func__, vq)); } static void vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, struct sglist *sg, int readable, int writable) { struct virtqueue *vq; vq = sc->vtnet_ctrl_vq; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, ("%s: CTRL_VQ feature not negotiated", __func__)); if (!virtqueue_empty(vq)) return; if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) return; /* * Poll for the response, but the command is likely already * done when we return from the notify. */ virtqueue_notify(vq); virtqueue_poll(vq, NULL); } static int vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) { struct virtio_net_ctrl_hdr hdr; struct sglist_seg segs[3]; struct sglist sg; uint8_t ack; int error; hdr.class = VIRTIO_NET_CTRL_MAC; hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding set MAC msg to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); return (ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; struct virtio_net_ctrl_mq mq; uint8_t pad2; uint8_t ack; } s; int error; s.hdr.class = VIRTIO_NET_CTRL_MQ; s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; s.mq.virtqueue_pairs = npairs; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding MQ message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; uint8_t onoff; uint8_t pad2; uint8_t ack; } s; int error; KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, ("%s: CTRL_RX feature not negotiated", __func__)); s.hdr.class = VIRTIO_NET_CTRL_RX; s.hdr.cmd = cmd; s.onoff = !!on; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding Rx message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_set_promisc(struct vtnet_softc *sc, int on) { return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); } static int vtnet_set_allmulti(struct vtnet_softc *sc, int on) { return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); } /* * The device defaults to promiscuous mode for backwards compatibility. * Turn it off at attach time if possible. */ static void vtnet_attach_disable_promisc(struct vtnet_softc *sc) { struct ifnet *ifp; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK(sc); if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { ifp->if_flags |= IFF_PROMISC; } else if (vtnet_set_promisc(sc, 0) != 0) { ifp->if_flags |= IFF_PROMISC; device_printf(sc->vtnet_dev, "cannot disable default promiscuous mode\n"); } VTNET_CORE_UNLOCK(sc); } static void vtnet_rx_filter(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) device_printf(dev, "cannot %s promiscuous mode\n", ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) device_printf(dev, "cannot %s all-multicast mode\n", ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); } static void vtnet_rx_filter_mac(struct vtnet_softc *sc) { struct virtio_net_ctrl_hdr hdr; struct vtnet_mac_filter *filter; struct sglist_seg segs[4]; struct sglist sg; struct ifnet *ifp; struct ifaddr *ifa; struct ifmultiaddr *ifma; int ucnt, mcnt, promisc, allmulti, error; uint8_t ack; ifp = sc->vtnet_ifp; filter = sc->vtnet_mac_filter; ucnt = 0; mcnt = 0; promisc = 0; allmulti = 0; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, ("%s: CTRL_RX feature not negotiated", __func__)); /* Unicast MAC addresses: */ if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) continue; else if (ucnt == VTNET_MAX_MAC_ENTRIES) { promisc = 1; break; } bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN); ucnt++; } if_addr_runlock(ifp); if (promisc != 0) { filter->vmf_unicast.nentries = 0; if_printf(ifp, "more than %d MAC addresses assigned, " "falling back to promiscuous mode\n", VTNET_MAX_MAC_ENTRIES); } else filter->vmf_unicast.nentries = ucnt; /* Multicast MAC addresses: */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; else if (mcnt == VTNET_MAX_MAC_ENTRIES) { allmulti = 1; break; } bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); if (allmulti != 0) { filter->vmf_multicast.nentries = 0; if_printf(ifp, "more than %d multicast MAC addresses " "assigned, falling back to all-multicast mode\n", VTNET_MAX_MAC_ENTRIES); } else filter->vmf_multicast.nentries = mcnt; if (promisc != 0 && allmulti != 0) goto out; hdr.class = VIRTIO_NET_CTRL_MAC; hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; ack = VIRTIO_NET_ERR; sglist_init(&sg, 4, segs); error = 0; error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &filter->vmf_unicast, sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); error |= sglist_append(&sg, &filter->vmf_multicast, sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 4, ("%s: error %d adding MAC filter msg to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); if (ack != VIRTIO_NET_OK) if_printf(ifp, "error setting host MAC filter table\n"); out: if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0) if_printf(ifp, "cannot enable promiscuous mode\n"); if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0) if_printf(ifp, "cannot enable all-multicast mode\n"); } static int vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { struct sglist_seg segs[3]; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr; uint8_t pad1; uint16_t tag; uint8_t pad2; uint8_t ack; } s; int error; s.hdr.class = VIRTIO_NET_CTRL_VLAN; s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; s.tag = tag; s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); error = 0; error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, ("%s: error %d adding VLAN message to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static void vtnet_rx_filter_vlan(struct vtnet_softc *sc) { uint32_t w; uint16_t tag; int i, bit; VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, ("%s: VLAN_FILTER feature not negotiated", __func__)); /* Enable the filter for each configured VLAN. */ for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { w = sc->vtnet_vlan_filter[i]; while ((bit = ffs(w) - 1) != -1) { w &= ~(1 << bit); tag = sizeof(w) * CHAR_BIT * i + bit; if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { device_printf(sc->vtnet_dev, "cannot enable VLAN %d filter\n", tag); } } } } static void vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { struct ifnet *ifp; int idx, bit; ifp = sc->vtnet_ifp; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; if (tag == 0 || tag > 4095) return; VTNET_CORE_LOCK(sc); if (add) sc->vtnet_vlan_filter[idx] |= (1 << bit); else sc->vtnet_vlan_filter[idx] &= ~(1 << bit); if (ifp->if_capenable & IFCAP_VLAN_HWFILTER && vtnet_exec_vlan_filter(sc, add, tag) != 0) { device_printf(sc->vtnet_dev, "cannot %s VLAN %d %s the host filter table\n", add ? "add" : "remove", tag, add ? "to" : "from"); } VTNET_CORE_UNLOCK(sc); } static void vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc != arg) return; vtnet_update_vlan_filter(arg, 1, tag); } static void vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc != arg) return; vtnet_update_vlan_filter(arg, 0, tag); } static int vtnet_is_link_up(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; uint16_t status; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0) status = VIRTIO_NET_S_LINK_UP; else status = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, status)); return ((status & VIRTIO_NET_S_LINK_UP) != 0); } static void vtnet_update_link_status(struct vtnet_softc *sc) { struct ifnet *ifp; int link; ifp = sc->vtnet_ifp; VTNET_CORE_LOCK_ASSERT(sc); link = vtnet_is_link_up(sc); /* Notify if the link status has changed. */ if (link != 0 && sc->vtnet_link_active == 0) { sc->vtnet_link_active = 1; if_link_state_change(ifp, LINK_STATE_UP); } else if (link == 0 && sc->vtnet_link_active != 0) { sc->vtnet_link_active = 0; if_link_state_change(ifp, LINK_STATE_DOWN); } } static int vtnet_ifmedia_upd(struct ifnet *ifp) { struct vtnet_softc *sc; struct ifmedia *ifm; sc = ifp->if_softc; ifm = &sc->vtnet_media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); return (0); } static void vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vtnet_softc *sc; sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; VTNET_CORE_LOCK(sc); if (vtnet_is_link_up(sc) != 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= VTNET_MEDIATYPE; } else ifmr->ifm_active |= IFM_NONE; VTNET_CORE_UNLOCK(sc); } static void vtnet_set_hwaddr(struct vtnet_softc *sc) { device_t dev; int i; dev = sc->vtnet_dev; if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) device_printf(dev, "unable to set MAC address\n"); } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { for (i = 0; i < ETHER_ADDR_LEN; i++) { virtio_write_dev_config_1(dev, offsetof(struct virtio_net_config, mac) + i, sc->vtnet_hwaddr[i]); } } } static void vtnet_get_hwaddr(struct vtnet_softc *sc) { device_t dev; int i; dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { /* * Generate a random locally administered unicast address. * * It would be nice to generate the same MAC address across * reboots, but it seems all the hosts currently available * support the MAC feature, so this isn't too important. */ sc->vtnet_hwaddr[0] = 0xB2; arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); vtnet_set_hwaddr(sc); return; } for (i = 0; i < ETHER_ADDR_LEN; i++) { sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, offsetof(struct virtio_net_config, mac) + i); } } static void vtnet_vlan_tag_remove(struct mbuf *m) { struct ether_vlan_header *evh; evh = mtod(m, struct ether_vlan_header *); m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); m->m_flags |= M_VLANTAG; /* Strip the 802.1Q header. */ bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); } static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_rxq *rxq) { struct sysctl_oid *node; struct sysctl_oid_list *list; struct vtnet_rxq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Receive Queue"); list = SYSCTL_CHILDREN(node); stats = &rxq->vtnrx_stats; SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, &stats->vrxs_ipackets, "Receive packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, &stats->vrxs_ibytes, "Receive bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, &stats->vrxs_iqdrops, "Receive drops"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, &stats->vrxs_ierrors, "Receive errors"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vrxs_csum, "Receive checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, &stats->vrxs_csum_failed, "Receive checksum offload failed"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vrxs_rescheduled, "Receive interrupt handler rescheduled"); } static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_txq *txq) { struct sysctl_oid *node; struct sysctl_oid_list *list; struct vtnet_txq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Transmit Queue"); list = SYSCTL_CHILDREN(node); stats = &txq->vtntx_stats; SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, &stats->vtxs_opackets, "Transmit packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, &stats->vtxs_obytes, "Transmit bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, &stats->vtxs_omcasts, "Transmit multicasts"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vtxs_csum, "Transmit checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, &stats->vtxs_tso, "Transmit segmentation offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vtxs_rescheduled, "Transmit interrupt handler rescheduled"); } static void vtnet_setup_queue_sysctl(struct vtnet_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; int i; dev = sc->vtnet_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); } } static void vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_softc *sc) { struct vtnet_statistics *stats; stats = &sc->vtnet_stats; SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", CTLFLAG_RD, &stats->mbuf_alloc_failed, "Mbuf cluster allocation failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", CTLFLAG_RD, &stats->rx_frame_too_large, "Received frame larger than the mbuf chain"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", CTLFLAG_RD, &stats->rx_enq_replacement_failed, "Enqueuing the replacement receive mbuf failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", CTLFLAG_RD, &stats->rx_mergeable_failed, "Mergeable buffers receive failures"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", CTLFLAG_RD, &stats->rx_csum_bad_ethtype, "Received checksum offloaded buffer with unsupported " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", CTLFLAG_RD, &stats->rx_csum_bad_ipproto, "Received checksum offloaded buffer with incorrect IP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", CTLFLAG_RD, &stats->rx_csum_bad_offset, "Received checksum offloaded buffer with incorrect offset"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", CTLFLAG_RD, &stats->rx_csum_bad_proto, "Received checksum offloaded buffer with incorrect protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", CTLFLAG_RD, &stats->rx_csum_failed, "Received buffer checksum offload failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", CTLFLAG_RD, &stats->rx_csum_offloaded, "Received buffer checksum offload succeeded"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", CTLFLAG_RD, &stats->rx_task_rescheduled, "Times the receive interrupt task rescheduled itself"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", CTLFLAG_RD, &stats->tx_csum_bad_ethtype, "Aborted transmit of checksum offloaded buffer with unknown " "Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", CTLFLAG_RD, &stats->tx_tso_bad_ethtype, "Aborted transmit of TSO buffer with unknown Ethernet type"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", CTLFLAG_RD, &stats->tx_tso_not_tcp, "Aborted transmit of TSO buffer with non TCP protocol"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", CTLFLAG_RD, &stats->tx_defragged, "Transmit mbufs defragged"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", CTLFLAG_RD, &stats->tx_defrag_failed, "Aborted transmit of buffer because defrag failed"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", CTLFLAG_RD, &stats->tx_csum_offloaded, "Offloaded checksum of transmitted buffer"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", CTLFLAG_RD, &stats->tx_tso_offloaded, "Segmentation offload of transmitted buffer"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", CTLFLAG_RD, &stats->tx_task_rescheduled, "Times the transmit interrupt task rescheduled itself"); } static void vtnet_setup_sysctl(struct vtnet_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = sc->vtnet_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, "Maximum number of supported virtqueue pairs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, "Number of active virtqueue pairs"); vtnet_setup_stat_sysctl(ctx, child, sc); } static int vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) { return (virtqueue_enable_intr(rxq->vtnrx_vq)); } static void vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) { virtqueue_disable_intr(rxq->vtnrx_vq); } static int vtnet_txq_enable_intr(struct vtnet_txq *txq) { return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG)); } static void vtnet_txq_disable_intr(struct vtnet_txq *txq) { virtqueue_disable_intr(txq->vtntx_vq); } static void vtnet_enable_rx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); } static void vtnet_enable_tx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); } static void vtnet_enable_interrupts(struct vtnet_softc *sc) { vtnet_enable_rx_interrupts(sc); vtnet_enable_tx_interrupts(sc); } static void vtnet_disable_rx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); } static void vtnet_disable_tx_interrupts(struct vtnet_softc *sc) { int i; for (i = 0; i < sc->vtnet_act_vq_pairs; i++) vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); } static void vtnet_disable_interrupts(struct vtnet_softc *sc) { vtnet_disable_rx_interrupts(sc); vtnet_disable_tx_interrupts(sc); } static int vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) { char path[64]; snprintf(path, sizeof(path), "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); TUNABLE_INT_FETCH(path, &def); return (def); } Index: head/sys/dev/vmware/vmxnet3/if_vmx.c =================================================================== --- head/sys/dev/vmware/vmxnet3/if_vmx.c (revision 263101) +++ head/sys/dev/vmware/vmxnet3/if_vmx.c (revision 263102) @@ -1,3350 +1,3346 @@ /*- * Copyright (c) 2013 Tsubai Masanari * Copyright (c) 2013 Bryan Venteicher * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $ */ /* Driver for VMware vmxnet3 virtual ethernet devices. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "if_vmxreg.h" #include "if_vmxvar.h" #include "opt_inet.h" #include "opt_inet6.h" /* Always enable for now - useful for queue hangs. */ #define VMXNET3_DEBUG_SYSCTL #ifdef VMXNET3_FAILPOINTS #include static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0, "vmxnet3 fail points"); #define VMXNET3_FP _debug_fail_point_vmxnet3 #endif static int vmxnet3_probe(device_t); static int vmxnet3_attach(device_t); static int vmxnet3_detach(device_t); static int vmxnet3_shutdown(device_t); static int vmxnet3_alloc_resources(struct vmxnet3_softc *); static void vmxnet3_free_resources(struct vmxnet3_softc *); static int vmxnet3_check_version(struct vmxnet3_softc *); static void vmxnet3_initial_config(struct vmxnet3_softc *); static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *); static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *); static int vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *); static int vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int, struct vmxnet3_interrupt *); static int vmxnet3_alloc_intr_resources(struct vmxnet3_softc *); static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *); static int vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *); static int vmxnet3_setup_interrupts(struct vmxnet3_softc *); static int vmxnet3_alloc_interrupts(struct vmxnet3_softc *); static void vmxnet3_free_interrupt(struct vmxnet3_softc *, struct vmxnet3_interrupt *); static void vmxnet3_free_interrupts(struct vmxnet3_softc *); static int vmxnet3_init_rxq(struct vmxnet3_softc *, int); static int vmxnet3_init_txq(struct vmxnet3_softc *, int); static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *); static void vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *); static void vmxnet3_destroy_txq(struct vmxnet3_txqueue *); static void vmxnet3_free_rxtx_queues(struct vmxnet3_softc *); static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *); static void vmxnet3_free_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_txq_data(struct vmxnet3_softc *); static void vmxnet3_free_txq_data(struct vmxnet3_softc *); static int vmxnet3_alloc_rxq_data(struct vmxnet3_softc *); static void vmxnet3_free_rxq_data(struct vmxnet3_softc *); static int vmxnet3_alloc_queue_data(struct vmxnet3_softc *); static void vmxnet3_free_queue_data(struct vmxnet3_softc *); static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *); static void vmxnet3_init_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_interface(struct vmxnet3_softc *); static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_data(struct vmxnet3_softc *); static void vmxnet3_free_data(struct vmxnet3_softc *); static int vmxnet3_setup_interface(struct vmxnet3_softc *); static void vmxnet3_evintr(struct vmxnet3_softc *); static void vmxnet3_txq_eof(struct vmxnet3_txqueue *); static void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *); static int vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *); static void vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *, struct vmxnet3_rxring *, int); static void vmxnet3_rxq_eof(struct vmxnet3_rxqueue *); static void vmxnet3_legacy_intr(void *); static void vmxnet3_txq_intr(void *); static void vmxnet3_rxq_intr(void *); static void vmxnet3_event_intr(void *); static void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *); static void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); static void vmxnet3_stop(struct vmxnet3_softc *); static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *); static int vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); static int vmxnet3_reinit_queues(struct vmxnet3_softc *); static int vmxnet3_enable_device(struct vmxnet3_softc *); static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *); static int vmxnet3_reinit(struct vmxnet3_softc *); static void vmxnet3_init_locked(struct vmxnet3_softc *); static void vmxnet3_init(void *); static int vmxnet3_txq_offload_ctx(struct mbuf *, int *, int *, int *); static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **, bus_dmamap_t, bus_dma_segment_t [], int *); static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t); static int vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **); static void vmxnet3_start_locked(struct ifnet *); static void vmxnet3_start(struct ifnet *); static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int, uint16_t); static void vmxnet3_register_vlan(void *, struct ifnet *, uint16_t); static void vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t); static void vmxnet3_set_rxfilter(struct vmxnet3_softc *); static int vmxnet3_change_mtu(struct vmxnet3_softc *, int); static int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t); static int vmxnet3_watchdog(struct vmxnet3_txqueue *); static void vmxnet3_tick(void *); static void vmxnet3_link_status(struct vmxnet3_softc *); static void vmxnet3_media_status(struct ifnet *, struct ifmediareq *); static int vmxnet3_media_change(struct ifnet *); static void vmxnet3_set_lladdr(struct vmxnet3_softc *); static void vmxnet3_get_lladdr(struct vmxnet3_softc *); static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_sysctl(struct vmxnet3_softc *); static void vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t, uint32_t); static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t); static void vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t, uint32_t); static void vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t); static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t); static void vmxnet3_enable_intr(struct vmxnet3_softc *, int); static void vmxnet3_disable_intr(struct vmxnet3_softc *, int); static void vmxnet3_enable_all_intrs(struct vmxnet3_softc *); static void vmxnet3_disable_all_intrs(struct vmxnet3_softc *); static int vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t, bus_size_t, struct vmxnet3_dma_alloc *); static void vmxnet3_dma_free(struct vmxnet3_softc *, struct vmxnet3_dma_alloc *); static int vmxnet3_tunable_int(struct vmxnet3_softc *, const char *, int); typedef enum { VMXNET3_BARRIER_RD, VMXNET3_BARRIER_WR, VMXNET3_BARRIER_RDWR, } vmxnet3_barrier_t; static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t); /* Tunables. */ static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC; TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc); static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC; TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc); static device_method_t vmxnet3_methods[] = { /* Device interface. */ DEVMETHOD(device_probe, vmxnet3_probe), DEVMETHOD(device_attach, vmxnet3_attach), DEVMETHOD(device_detach, vmxnet3_detach), DEVMETHOD(device_shutdown, vmxnet3_shutdown), DEVMETHOD_END }; static driver_t vmxnet3_driver = { "vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc) }; static devclass_t vmxnet3_devclass; DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0); MODULE_DEPEND(vmx, pci, 1, 1, 1); MODULE_DEPEND(vmx, ether, 1, 1, 1); #define VMXNET3_VMWARE_VENDOR_ID 0x15AD #define VMXNET3_VMWARE_DEVICE_ID 0x07B0 static int vmxnet3_probe(device_t dev) { if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID && pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) { device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static int vmxnet3_attach(device_t dev) { struct vmxnet3_softc *sc; int error; sc = device_get_softc(dev); sc->vmx_dev = dev; pci_enable_busmaster(dev); VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev)); callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0); vmxnet3_initial_config(sc); error = vmxnet3_alloc_resources(sc); if (error) goto fail; error = vmxnet3_check_version(sc); if (error) goto fail; error = vmxnet3_alloc_rxtx_queues(sc); if (error) goto fail; error = vmxnet3_alloc_interrupts(sc); if (error) goto fail; error = vmxnet3_alloc_data(sc); if (error) goto fail; error = vmxnet3_setup_interface(sc); if (error) goto fail; error = vmxnet3_setup_interrupts(sc); if (error) { ether_ifdetach(sc->vmx_ifp); device_printf(dev, "could not set up interrupt\n"); goto fail; } vmxnet3_setup_sysctl(sc); vmxnet3_link_status(sc); fail: if (error) vmxnet3_detach(dev); return (error); } static int vmxnet3_detach(device_t dev) { struct vmxnet3_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->vmx_ifp; if (device_is_attached(dev)) { ether_ifdetach(ifp); VMXNET3_CORE_LOCK(sc); vmxnet3_stop(sc); VMXNET3_CORE_UNLOCK(sc); callout_drain(&sc->vmx_tick); } if (sc->vmx_vlan_attach != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach); sc->vmx_vlan_attach = NULL; } if (sc->vmx_vlan_detach != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach); sc->vmx_vlan_detach = NULL; } vmxnet3_free_interrupts(sc); if (ifp != NULL) { if_free(ifp); sc->vmx_ifp = NULL; } ifmedia_removeall(&sc->vmx_media); vmxnet3_free_data(sc); vmxnet3_free_resources(sc); vmxnet3_free_rxtx_queues(sc); VMXNET3_CORE_LOCK_DESTROY(sc); return (0); } static int vmxnet3_shutdown(device_t dev) { return (0); } static int vmxnet3_alloc_resources(struct vmxnet3_softc *sc) { device_t dev; int rid; dev = sc->vmx_dev; rid = PCIR_BAR(0); sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res0 == NULL) { device_printf(dev, "could not map BAR0 memory\n"); return (ENXIO); } sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0); sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0); rid = PCIR_BAR(1); sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res1 == NULL) { device_printf(dev, "could not map BAR1 memory\n"); return (ENXIO); } sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1); sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1); if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) { rid = PCIR_BAR(2); sc->vmx_msix_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); } if (sc->vmx_msix_res == NULL) sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX; return (0); } static void vmxnet3_free_resources(struct vmxnet3_softc *sc) { device_t dev; int rid; dev = sc->vmx_dev; if (sc->vmx_res0 != NULL) { rid = PCIR_BAR(0); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0); sc->vmx_res0 = NULL; } if (sc->vmx_res1 != NULL) { rid = PCIR_BAR(1); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1); sc->vmx_res1 = NULL; } if (sc->vmx_msix_res != NULL) { rid = PCIR_BAR(2); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_msix_res); sc->vmx_msix_res = NULL; } } static int vmxnet3_check_version(struct vmxnet3_softc *sc) { device_t dev; uint32_t version; dev = sc->vmx_dev; version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported hardware version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1); version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported UPT version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1); return (0); } static void vmxnet3_initial_config(struct vmxnet3_softc *sc) { int ndesc; /* * BMV Much of the work is already done, but this driver does * not support multiqueue yet. */ sc->vmx_ntxqueues = VMXNET3_TX_QUEUES; sc->vmx_nrxqueues = VMXNET3_RX_QUEUES; ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc); if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC) ndesc = VMXNET3_DEF_TX_NDESC; if (ndesc & VMXNET3_MASK_TX_NDESC) ndesc &= ~VMXNET3_MASK_TX_NDESC; sc->vmx_ntxdescs = ndesc; ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc); if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC) ndesc = VMXNET3_DEF_RX_NDESC; if (ndesc & VMXNET3_MASK_RX_NDESC) ndesc &= ~VMXNET3_MASK_RX_NDESC; sc->vmx_nrxdescs = ndesc; sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS; } static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc) { device_t dev; int nmsix, cnt, required; dev = sc->vmx_dev; if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) return (1); /* Allocate an additional vector for the events interrupt. */ required = sc->vmx_nrxqueues + sc->vmx_ntxqueues + 1; nmsix = pci_msix_count(dev); if (nmsix < required) return (1); cnt = required; if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { sc->vmx_nintrs = required; return (0); } else pci_release_msi(dev); return (1); } static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc) { device_t dev; int nmsi, cnt, required; dev = sc->vmx_dev; required = 1; nmsi = pci_msi_count(dev); if (nmsi < required) return (1); cnt = required; if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) { sc->vmx_nintrs = 1; return (0); } else pci_release_msi(dev); return (1); } static int vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc) { sc->vmx_nintrs = 1; return (0); } static int vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags, struct vmxnet3_interrupt *intr) { struct resource *irq; irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags); if (irq == NULL) return (ENXIO); intr->vmxi_irq = irq; intr->vmxi_rid = rid; return (0); } static int vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc) { int i, rid, flags, error; rid = 0; flags = RF_ACTIVE; if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) flags |= RF_SHAREABLE; else rid = 1; for (i = 0; i < sc->vmx_nintrs; i++, rid++) { error = vmxnet3_alloc_interrupt(sc, rid, flags, &sc->vmx_intrs[i]); if (error) return (error); } return (0); } /* * NOTE: We only support the simple case of each Rx and Tx queue on its * own MSIX vector. This is good enough until we support mulitqueue. */ static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_txqueue *txq; struct vmxnet3_rxqueue *rxq; struct vmxnet3_interrupt *intr; enum intr_type type; int i, error; dev = sc->vmx_dev; intr = &sc->vmx_intrs[0]; type = INTR_TYPE_NET | INTR_MPSAFE; for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) { txq = &sc->vmx_txq[i]; error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL, vmxnet3_txq_intr, txq, &intr->vmxi_handler); if (error) return (error); txq->vxtxq_intr_idx = intr->vmxi_rid - 1; } for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) { rxq = &sc->vmx_rxq[i]; error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL, vmxnet3_rxq_intr, rxq, &intr->vmxi_handler); if (error) return (error); rxq->vxrxq_intr_idx = intr->vmxi_rid - 1; } error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL, vmxnet3_event_intr, sc, &intr->vmxi_handler); if (error) return (error); sc->vmx_event_intr_idx = intr->vmxi_rid - 1; return (0); } static int vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc) { struct vmxnet3_interrupt *intr; int i, error; intr = &sc->vmx_intrs[0]; error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc, &intr->vmxi_handler); for (i = 0; i < sc->vmx_ntxqueues; i++) sc->vmx_txq[i].vxtxq_intr_idx = 0; for (i = 0; i < sc->vmx_nrxqueues; i++) sc->vmx_rxq[i].vxrxq_intr_idx = 0; sc->vmx_event_intr_idx = 0; return (error); } /* * XXX BMV Should probably reorganize the attach and just do * this in vmxnet3_init_shared_data(). */ static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc) { struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int i; sc->vmx_ds->evintr = sc->vmx_event_intr_idx; for (i = 0; i < sc->vmx_ntxqueues; i++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txs->intr_idx = txq->vxtxq_intr_idx; } for (i = 0; i < sc->vmx_nrxqueues; i++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxs->intr_idx = rxq->vxrxq_intr_idx; } } static int vmxnet3_setup_interrupts(struct vmxnet3_softc *sc) { int error; error = vmxnet3_alloc_intr_resources(sc); if (error) return (error); switch (sc->vmx_intr_type) { case VMXNET3_IT_MSIX: error = vmxnet3_setup_msix_interrupts(sc); break; case VMXNET3_IT_MSI: case VMXNET3_IT_LEGACY: error = vmxnet3_setup_legacy_interrupt(sc); break; default: panic("%s: invalid interrupt type %d", __func__, sc->vmx_intr_type); } if (error == 0) vmxnet3_set_interrupt_idx(sc); return (error); } static int vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc) { device_t dev; uint32_t config; int error; dev = sc->vmx_dev; config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG); sc->vmx_intr_type = config & 0x03; sc->vmx_intr_mask_mode = (config >> 2) & 0x03; switch (sc->vmx_intr_type) { case VMXNET3_IT_AUTO: sc->vmx_intr_type = VMXNET3_IT_MSIX; /* FALLTHROUGH */ case VMXNET3_IT_MSIX: error = vmxnet3_alloc_msix_interrupts(sc); if (error == 0) break; sc->vmx_intr_type = VMXNET3_IT_MSI; /* FALLTHROUGH */ case VMXNET3_IT_MSI: error = vmxnet3_alloc_msi_interrupts(sc); if (error == 0) break; sc->vmx_intr_type = VMXNET3_IT_LEGACY; /* FALLTHROUGH */ case VMXNET3_IT_LEGACY: error = vmxnet3_alloc_legacy_interrupts(sc); if (error == 0) break; /* FALLTHROUGH */ default: sc->vmx_intr_type = -1; device_printf(dev, "cannot allocate any interrupt resources\n"); return (ENXIO); } return (error); } static void vmxnet3_free_interrupt(struct vmxnet3_softc *sc, struct vmxnet3_interrupt *intr) { device_t dev; dev = sc->vmx_dev; if (intr->vmxi_handler != NULL) { bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler); intr->vmxi_handler = NULL; } if (intr->vmxi_irq != NULL) { bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid, intr->vmxi_irq); intr->vmxi_irq = NULL; intr->vmxi_rid = -1; } } static void vmxnet3_free_interrupts(struct vmxnet3_softc *sc) { int i; for (i = 0; i < sc->vmx_nintrs; i++) vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]); if (sc->vmx_intr_type == VMXNET3_IT_MSI || sc->vmx_intr_type == VMXNET3_IT_MSIX) pci_release_msi(sc->vmx_dev); } static int vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; int i; rxq = &sc->vmx_rxq[q]; snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d", device_get_nameunit(sc->vmx_dev), q); mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF); rxq->vxrxq_sc = sc; rxq->vxrxq_id = q; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_rid = i; rxr->vxrxr_ndesc = sc->vmx_nrxdescs; rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO); if (rxr->vxrxr_rxbuf == NULL) return (ENOMEM); rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs; } return (0); } static int vmxnet3_init_txq(struct vmxnet3_softc *sc, int q) { struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; txq = &sc->vmx_txq[q]; txr = &txq->vxtxq_cmd_ring; snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d", device_get_nameunit(sc->vmx_dev), q); mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF); txq->vxtxq_sc = sc; txq->vxtxq_id = q; txr->vxtxr_ndesc = sc->vmx_ntxdescs; txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc * sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO); if (txr->vxtxr_txbuf == NULL) return (ENOMEM); txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs; return (0); } static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc) { int i, error; sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) * sc->vmx_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO); sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) * sc->vmx_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL) return (ENOMEM); for (i = 0; i < sc->vmx_nrxqueues; i++) { error = vmxnet3_init_rxq(sc, i); if (error) return (error); } for (i = 0; i < sc->vmx_ntxqueues; i++) { error = vmxnet3_init_txq(sc, i); if (error) return (error); } return (0); } static void vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq) { struct vmxnet3_rxring *rxr; int i; rxq->vxrxq_sc = NULL; rxq->vxrxq_id = -1; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; if (rxr->vxrxr_rxbuf != NULL) { free(rxr->vxrxr_rxbuf, M_DEVBUF); rxr->vxrxr_rxbuf = NULL; } } if (mtx_initialized(&rxq->vxrxq_mtx) != 0) mtx_destroy(&rxq->vxrxq_mtx); } static void vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq) { struct vmxnet3_txring *txr; txr = &txq->vxtxq_cmd_ring; txq->vxtxq_sc = NULL; txq->vxtxq_id = -1; if (txr->vxtxr_txbuf != NULL) { free(txr->vxtxr_txbuf, M_DEVBUF); txr->vxtxr_txbuf = NULL; } if (mtx_initialized(&txq->vxtxq_mtx) != 0) mtx_destroy(&txq->vxtxq_mtx); } static void vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc) { int i; if (sc->vmx_rxq != NULL) { for (i = 0; i < sc->vmx_nrxqueues; i++) vmxnet3_destroy_rxq(&sc->vmx_rxq[i]); free(sc->vmx_rxq, M_DEVBUF); sc->vmx_rxq = NULL; } if (sc->vmx_txq != NULL) { for (i = 0; i < sc->vmx_ntxqueues; i++) vmxnet3_destroy_txq(&sc->vmx_txq[i]); free(sc->vmx_txq, M_DEVBUF); sc->vmx_txq = NULL; } } static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc) { device_t dev; uint8_t *kva; size_t size; int i, error; dev = sc->vmx_dev; size = sizeof(struct vmxnet3_driver_shared); error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma); if (error) { device_printf(dev, "cannot alloc shared memory\n"); return (error); } sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr; size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) + sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared); error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma); if (error) { device_printf(dev, "cannot alloc queue shared memory\n"); return (error); } sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr; kva = sc->vmx_qs; for (i = 0; i < sc->vmx_ntxqueues; i++) { sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva; kva += sizeof(struct vmxnet3_txq_shared); } for (i = 0; i < sc->vmx_nrxqueues; i++) { sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva; kva += sizeof(struct vmxnet3_rxq_shared); } return (0); } static void vmxnet3_free_shared_data(struct vmxnet3_softc *sc) { if (sc->vmx_qs != NULL) { vmxnet3_dma_free(sc, &sc->vmx_qs_dma); sc->vmx_qs = NULL; } if (sc->vmx_ds != NULL) { vmxnet3_dma_free(sc, &sc->vmx_ds_dma); sc->vmx_ds = NULL; } } static int vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; size_t descsz, compsz; int i, q, error; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_ntxqueues; q++) { txq = &sc->vmx_txq[q]; txr = &txq->vxtxq_cmd_ring; txc = &txq->vxtxq_comp_ring; descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc); compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc); error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ VMXNET3_TSO_MAXSIZE, /* maxsize */ VMXNET3_TX_MAXSEGS, /* nsegments */ VMXNET3_TX_MAXSEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &txr->vxtxr_txtag); if (error) { device_printf(dev, "unable to create Tx buffer tag for queue %d\n", q); return (error); } error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma); if (error) { device_printf(dev, "cannot alloc Tx descriptors for " "queue %d error %d\n", q, error); return (error); } txr->vxtxr_txd = (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr; error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma); if (error) { device_printf(dev, "cannot alloc Tx comp descriptors " "for queue %d error %d\n", q, error); return (error); } txc->vxcr_u.txcd = (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr; for (i = 0; i < txr->vxtxr_ndesc; i++) { error = bus_dmamap_create(txr->vxtxr_txtag, 0, &txr->vxtxr_txbuf[i].vtxb_dmamap); if (error) { device_printf(dev, "unable to create Tx buf " "dmamap for queue %d idx %d\n", q, i); return (error); } } } return (0); } static void vmxnet3_free_txq_data(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; struct vmxnet3_txbuf *txb; int i, q; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_ntxqueues; q++) { txq = &sc->vmx_txq[q]; txr = &txq->vxtxq_cmd_ring; txc = &txq->vxtxq_comp_ring; for (i = 0; i < txr->vxtxr_ndesc; i++) { txb = &txr->vxtxr_txbuf[i]; if (txb->vtxb_dmamap != NULL) { bus_dmamap_destroy(txr->vxtxr_txtag, txb->vtxb_dmamap); txb->vtxb_dmamap = NULL; } } if (txc->vxcr_u.txcd != NULL) { vmxnet3_dma_free(sc, &txc->vxcr_dma); txc->vxcr_u.txcd = NULL; } if (txr->vxtxr_txd != NULL) { vmxnet3_dma_free(sc, &txr->vxtxr_dma); txr->vxtxr_txd = NULL; } if (txr->vxtxr_txtag != NULL) { bus_dma_tag_destroy(txr->vxtxr_txtag); txr->vxtxr_txtag = NULL; } } } static int vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; int descsz, compsz; int i, j, q, error; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_nrxqueues; q++) { rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; compsz = 0; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; descsz = rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc); compsz += rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxcompdesc); error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUMPAGESIZE, /* maxsize */ 1, /* nsegments */ MJUMPAGESIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &rxr->vxrxr_rxtag); if (error) { device_printf(dev, "unable to create Rx buffer tag for " "queue %d\n", q); return (error); } error = vmxnet3_dma_malloc(sc, descsz, 512, &rxr->vxrxr_dma); if (error) { device_printf(dev, "cannot allocate Rx " "descriptors for queue %d/%d error %d\n", i, q, error); return (error); } rxr->vxrxr_rxd = (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr; } error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma); if (error) { device_printf(dev, "cannot alloc Rx comp descriptors " "for queue %d error %d\n", q, error); return (error); } rxc->vxcr_u.rxcd = (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; error = bus_dmamap_create(rxr->vxrxr_rxtag, 0, &rxr->vxrxr_spare_dmap); if (error) { device_printf(dev, "unable to create spare " "dmamap for queue %d/%d error %d\n", q, i, error); return (error); } for (j = 0; j < rxr->vxrxr_ndesc; j++) { error = bus_dmamap_create(rxr->vxrxr_rxtag, 0, &rxr->vxrxr_rxbuf[j].vrxb_dmamap); if (error) { device_printf(dev, "unable to create " "dmamap for queue %d/%d slot %d " "error %d\n", q, i, j, error); return (error); } } } } return (0); } static void vmxnet3_free_rxq_data(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxbuf *rxb; int i, j, q; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_nrxqueues; q++) { rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; if (rxr->vxrxr_spare_dmap != NULL) { bus_dmamap_destroy(rxr->vxrxr_rxtag, rxr->vxrxr_spare_dmap); rxr->vxrxr_spare_dmap = NULL; } for (j = 0; j < rxr->vxrxr_ndesc; j++) { rxb = &rxr->vxrxr_rxbuf[j]; if (rxb->vrxb_dmamap != NULL) { bus_dmamap_destroy(rxr->vxrxr_rxtag, rxb->vrxb_dmamap); rxb->vrxb_dmamap = NULL; } } } if (rxc->vxcr_u.rxcd != NULL) { vmxnet3_dma_free(sc, &rxc->vxcr_dma); rxc->vxcr_u.rxcd = NULL; } for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; if (rxr->vxrxr_rxd != NULL) { vmxnet3_dma_free(sc, &rxr->vxrxr_dma); rxr->vxrxr_rxd = NULL; } if (rxr->vxrxr_rxtag != NULL) { bus_dma_tag_destroy(rxr->vxrxr_rxtag); rxr->vxrxr_rxtag = NULL; } } } } static int vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc) { int error; error = vmxnet3_alloc_txq_data(sc); if (error) return (error); error = vmxnet3_alloc_rxq_data(sc); if (error) return (error); return (0); } static void vmxnet3_free_queue_data(struct vmxnet3_softc *sc) { if (sc->vmx_rxq != NULL) vmxnet3_free_rxq_data(sc); if (sc->vmx_txq != NULL) vmxnet3_free_txq_data(sc); } static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc) { int error; error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma); if (error) device_printf(sc->vmx_dev, "unable to alloc multicast table\n"); else sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr; return (error); } static void vmxnet3_free_mcast_table(struct vmxnet3_softc *sc) { if (sc->vmx_mcast != NULL) { vmxnet3_dma_free(sc, &sc->vmx_mcast_dma); sc->vmx_mcast = NULL; } } static void vmxnet3_init_shared_data(struct vmxnet3_softc *sc) { struct vmxnet3_driver_shared *ds; struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int i; ds = sc->vmx_ds; /* * Initialize fields of the shared data that remains the same across * reinits. Note the shared data is zero'd when allocated. */ ds->magic = VMXNET3_REV1_MAGIC; /* DriverInfo */ ds->version = VMXNET3_DRIVER_VERSION; ds->guest = VMXNET3_GOS_FREEBSD | #ifdef __LP64__ VMXNET3_GOS_64BIT; #else VMXNET3_GOS_32BIT; #endif ds->vmxnet3_revision = 1; ds->upt_version = 1; /* Misc. conf */ ds->driver_data = vtophys(sc); ds->driver_data_len = sizeof(struct vmxnet3_softc); ds->queue_shared = sc->vmx_qs_dma.dma_paddr; ds->queue_shared_len = sc->vmx_qs_dma.dma_size; ds->nrxsg_max = sc->vmx_max_rxsegs; /* Interrupt control. */ ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO; ds->nintr = sc->vmx_nintrs; ds->evintr = sc->vmx_event_intr_idx; ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < sc->vmx_nintrs; i++) ds->modlevel[i] = UPT1_IMOD_ADAPTIVE; /* Receive filter. */ ds->mcast_table = sc->vmx_mcast_dma.dma_paddr; ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size; /* Tx queues */ for (i = 0; i < sc->vmx_ntxqueues; i++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr; txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc; txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr; txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc; txs->driver_data = vtophys(txq); txs->driver_data_len = sizeof(struct vmxnet3_txqueue); } /* Rx queues */ for (i = 0; i < sc->vmx_nrxqueues; i++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr; rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc; rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr; rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc; rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr; rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc; rxs->driver_data = vtophys(rxq); rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue); } } static void vmxnet3_reinit_interface(struct vmxnet3_softc *sc) { struct ifnet *ifp; ifp = sc->vmx_ifp; /* Use the current MAC address. */ bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN); vmxnet3_set_lladdr(sc); ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD; if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */ } static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc) { struct ifnet *ifp; struct vmxnet3_driver_shared *ds; ifp = sc->vmx_ifp; ds = sc->vmx_ds; ds->upt_features = 0; if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) ds->upt_features |= UPT1_F_CSUM; if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) ds->upt_features |= UPT1_F_VLAN; if (ifp->if_capenable & IFCAP_LRO) ds->upt_features |= UPT1_F_LRO; ds->mtu = ifp->if_mtu; ds->ntxqueue = sc->vmx_ntxqueues; ds->nrxqueue = sc->vmx_nrxqueues; vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr); vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH, (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32); } static int vmxnet3_alloc_data(struct vmxnet3_softc *sc) { int error; error = vmxnet3_alloc_shared_data(sc); if (error) return (error); error = vmxnet3_alloc_queue_data(sc); if (error) return (error); error = vmxnet3_alloc_mcast_table(sc); if (error) return (error); vmxnet3_init_shared_data(sc); return (0); } static void vmxnet3_free_data(struct vmxnet3_softc *sc) { vmxnet3_free_mcast_table(sc); vmxnet3_free_queue_data(sc); vmxnet3_free_shared_data(sc); } static int vmxnet3_setup_interface(struct vmxnet3_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vmx_dev; ifp = sc->vmx_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "cannot allocate ifnet structure\n"); return (ENOSPC); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); -#if __FreeBSD_version < 1000025 - ifp->if_baudrate = 1000000000; -#else - if_initbaudrate(ifp, IF_Gbps(10)); -#endif + ifp->if_baudrate = IF_Gbps(10); ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vmxnet3_init; ifp->if_ioctl = vmxnet3_ioctl; ifp->if_start = vmxnet3_start; ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1; IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1); IFQ_SET_READY(&ifp->if_snd); vmxnet3_get_lladdr(sc); ether_ifattach(ifp, sc->vmx_lladdr); ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM; ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6; ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; ifp->if_capenable = ifp->if_capabilities; /* These capabilities are not enabled by default. */ ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER; sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config, vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change, vmxnet3_media_status); ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO); return (0); } static void vmxnet3_evintr(struct vmxnet3_softc *sc) { device_t dev; struct ifnet *ifp; struct vmxnet3_txq_shared *ts; struct vmxnet3_rxq_shared *rs; uint32_t event; int reset; dev = sc->vmx_dev; ifp = sc->vmx_ifp; reset = 0; VMXNET3_CORE_LOCK(sc); /* Clear events. */ event = sc->vmx_ds->event; vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event); if (event & VMXNET3_EVENT_LINK) vmxnet3_link_status(sc); if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) { reset = 1; vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS); ts = sc->vmx_txq[0].vxtxq_ts; if (ts->stopped != 0) device_printf(dev, "Tx queue error %#x\n", ts->error); rs = sc->vmx_rxq[0].vxrxq_rs; if (rs->stopped != 0) device_printf(dev, "Rx queue error %#x\n", rs->error); device_printf(dev, "Rx/Tx queue error event ... resetting\n"); } if (event & VMXNET3_EVENT_DIC) device_printf(dev, "device implementation change event\n"); if (event & VMXNET3_EVENT_DEBUG) device_printf(dev, "debug event\n"); if (reset != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vmxnet3_init_locked(sc); } VMXNET3_CORE_UNLOCK(sc); } static void vmxnet3_txq_eof(struct vmxnet3_txqueue *txq) { struct vmxnet3_softc *sc; struct ifnet *ifp; struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; struct vmxnet3_txcompdesc *txcd; struct vmxnet3_txbuf *txb; u_int sop; sc = txq->vxtxq_sc; ifp = sc->vmx_ifp; txr = &txq->vxtxq_cmd_ring; txc = &txq->vxtxq_comp_ring; VMXNET3_TXQ_LOCK_ASSERT(txq); for (;;) { txcd = &txc->vxcr_u.txcd[txc->vxcr_next]; if (txcd->gen != txc->vxcr_gen) break; vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); if (++txc->vxcr_next == txc->vxcr_ndesc) { txc->vxcr_next = 0; txc->vxcr_gen ^= 1; } sop = txr->vxtxr_next; txb = &txr->vxtxr_txbuf[sop]; if (txb->vtxb_m != NULL) { bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap); m_freem(txb->vtxb_m); txb->vtxb_m = NULL; ifp->if_opackets++; } txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc; } if (txr->vxtxr_head == txr->vxtxr_next) txq->vxtxq_watchdog = 0; } static int vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr) { struct ifnet *ifp; struct mbuf *m; struct vmxnet3_rxdesc *rxd; struct vmxnet3_rxbuf *rxb; bus_dma_tag_t tag; bus_dmamap_t dmap; bus_dma_segment_t segs[1]; int idx, clsize, btype, flags, nsegs, error; ifp = sc->vmx_ifp; tag = rxr->vxrxr_rxtag; dmap = rxr->vxrxr_spare_dmap; idx = rxr->vxrxr_fill; rxd = &rxr->vxrxr_rxd[idx]; rxb = &rxr->vxrxr_rxbuf[idx]; #ifdef VMXNET3_FAILPOINTS KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS); if (rxr->vxrxr_rid != 0) KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS); #endif if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) { flags = M_PKTHDR; clsize = MCLBYTES; btype = VMXNET3_BTYPE_HEAD; } else { #if __FreeBSD_version < 902001 /* * These mbufs will never be used for the start of a frame. * Roughly prior to branching releng/9.2, the load_mbuf_sg() * required the mbuf to always be a packet header. Avoid * unnecessary mbuf initialization in newer versions where * that is not the case. */ flags = M_PKTHDR; #else flags = 0; #endif clsize = MJUMPAGESIZE; btype = VMXNET3_BTYPE_BODY; } m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize); if (m == NULL) { sc->vmx_stats.vmst_mgetcl_failed++; return (ENOBUFS); } if (btype == VMXNET3_BTYPE_HEAD) { m->m_len = m->m_pkthdr.len = clsize; m_adj(m, ETHER_ALIGN); } else m->m_len = clsize; error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs, BUS_DMA_NOWAIT); if (error) { m_freem(m); sc->vmx_stats.vmst_mbuf_load_failed++; return (error); } KASSERT(nsegs == 1, ("%s: mbuf %p with too many segments %d", __func__, m, nsegs)); #if __FreeBSD_version < 902001 if (btype == VMXNET3_BTYPE_BODY) m->m_flags &= ~M_PKTHDR; #endif if (rxb->vrxb_m != NULL) { bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(tag, rxb->vrxb_dmamap); } rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap; rxb->vrxb_dmamap = dmap; rxb->vrxb_m = m; rxd->addr = segs[0].ds_addr; rxd->len = segs[0].ds_len; rxd->btype = btype; rxd->gen = rxr->vxrxr_gen; vmxnet3_rxr_increment_fill(rxr); return (0); } static void vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq, struct vmxnet3_rxring *rxr, int idx) { struct vmxnet3_rxdesc *rxd; rxd = &rxr->vxrxr_rxd[idx]; rxd->gen = rxr->vxrxr_gen; vmxnet3_rxr_increment_fill(rxr); } static void vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq) { struct vmxnet3_softc *sc; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxcompdesc *rxcd; int idx, eof; sc = rxq->vxrxq_sc; rxc = &rxq->vxrxq_comp_ring; do { rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next]; if (rxcd->gen != rxc->vxcr_gen) break; /* Not expected. */ vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); if (++rxc->vxcr_next == rxc->vxcr_ndesc) { rxc->vxcr_next = 0; rxc->vxcr_gen ^= 1; } idx = rxcd->rxd_idx; eof = rxcd->eop; if (rxcd->qid < sc->vmx_nrxqueues) rxr = &rxq->vxrxq_cmd_ring[0]; else rxr = &rxq->vxrxq_cmd_ring[1]; vmxnet3_rxq_eof_discard(rxq, rxr, idx); } while (!eof); } static void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m) { if (rxcd->ipv4) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (rxcd->ipcsum_ok) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (!rxcd->fragment) { if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; } } } static void vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq, struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m) { struct vmxnet3_softc *sc; struct ifnet *ifp; sc = rxq->vxrxq_sc; ifp = sc->vmx_ifp; if (rxcd->error) { ifp->if_ierrors++; m_freem(m); return; } if (!rxcd->no_csum) vmxnet3_rx_csum(rxcd, m); if (rxcd->vlan) { m->m_flags |= M_VLANTAG; m->m_pkthdr.ether_vtag = rxcd->vtag; } ifp->if_ipackets++; VMXNET3_RXQ_UNLOCK(rxq); (*ifp->if_input)(ifp, m); VMXNET3_RXQ_LOCK(rxq); } static void vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq) { struct vmxnet3_softc *sc; struct ifnet *ifp; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxdesc *rxd; struct vmxnet3_rxcompdesc *rxcd; struct mbuf *m, *m_head, *m_tail; int idx, length; sc = rxq->vxrxq_sc; ifp = sc->vmx_ifp; rxc = &rxq->vxrxq_comp_ring; m_head = m_tail = NULL; VMXNET3_RXQ_LOCK_ASSERT(rxq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; for (;;) { rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next]; if (rxcd->gen != rxc->vxcr_gen) break; vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); if (++rxc->vxcr_next == rxc->vxcr_ndesc) { rxc->vxcr_next = 0; rxc->vxcr_gen ^= 1; } idx = rxcd->rxd_idx; length = rxcd->len; if (rxcd->qid < sc->vmx_nrxqueues) rxr = &rxq->vxrxq_cmd_ring[0]; else rxr = &rxq->vxrxq_cmd_ring[1]; rxd = &rxr->vxrxr_rxd[idx]; m = rxr->vxrxr_rxbuf[idx].vrxb_m; KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf", __func__, rxcd->qid, idx)); /* * The host may skip descriptors. We detect this when this * descriptor does not match the previous fill index. Catch * up with the host now. */ if (__predict_false(rxr->vxrxr_fill != idx)) { while (rxr->vxrxr_fill != idx) { rxr->vxrxr_rxd[rxr->vxrxr_fill].gen = rxr->vxrxr_gen; vmxnet3_rxr_increment_fill(rxr); } } if (rxcd->sop) { KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD, ("%s: start of frame w/o head buffer", __func__)); KASSERT(rxr == &rxq->vxrxq_cmd_ring[0], ("%s: start of frame not in ring 0", __func__)); KASSERT((idx % sc->vmx_rx_max_chain) == 0, ("%s: start of frame at unexcepted index %d (%d)", __func__, idx, sc->vmx_rx_max_chain)); KASSERT(m_head == NULL, ("%s: duplicate start of frame?", __func__)); if (length == 0) { /* Just ignore this descriptor. */ vmxnet3_rxq_eof_discard(rxq, rxr, idx); goto nextp; } if (vmxnet3_newbuf(sc, rxr) != 0) { ifp->if_iqdrops++; vmxnet3_rxq_eof_discard(rxq, rxr, idx); if (!rxcd->eop) vmxnet3_rxq_discard_chain(rxq); goto nextp; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = length; m->m_pkthdr.csum_flags = 0; m_head = m_tail = m; } else { KASSERT(rxd->btype == VMXNET3_BTYPE_BODY, ("%s: non start of frame w/o body buffer", __func__)); KASSERT(m_head != NULL, ("%s: frame not started?", __func__)); if (vmxnet3_newbuf(sc, rxr) != 0) { ifp->if_iqdrops++; vmxnet3_rxq_eof_discard(rxq, rxr, idx); if (!rxcd->eop) vmxnet3_rxq_discard_chain(rxq); m_freem(m_head); m_head = m_tail = NULL; goto nextp; } m->m_len = length; m_head->m_pkthdr.len += length; m_tail->m_next = m; m_tail = m; } if (rxcd->eop) { vmxnet3_rxq_input(rxq, rxcd, m_head); m_head = m_tail = NULL; /* Must recheck after dropping the Rx lock. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } nextp: if (__predict_false(rxq->vxrxq_rs->update_rxhead)) { int qid = rxcd->qid; bus_size_t r; idx = (idx + 1) % rxr->vxrxr_ndesc; if (qid >= sc->vmx_nrxqueues) { qid -= sc->vmx_nrxqueues; r = VMXNET3_BAR0_RXH2(qid); } else r = VMXNET3_BAR0_RXH1(qid); vmxnet3_write_bar0(sc, r, idx); } } } static void vmxnet3_legacy_intr(void *xsc) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; struct vmxnet3_txqueue *txq; struct ifnet *ifp; sc = xsc; rxq = &sc->vmx_rxq[0]; txq = &sc->vmx_txq[0]; ifp = sc->vmx_ifp; if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) { if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0) return; } if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_all_intrs(sc); if (sc->vmx_ds->event != 0) vmxnet3_evintr(sc); VMXNET3_RXQ_LOCK(rxq); vmxnet3_rxq_eof(rxq); VMXNET3_RXQ_UNLOCK(rxq); VMXNET3_TXQ_LOCK(txq); vmxnet3_txq_eof(txq); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) vmxnet3_start_locked(ifp); VMXNET3_TXQ_UNLOCK(txq); vmxnet3_enable_all_intrs(sc); } static void vmxnet3_txq_intr(void *xtxq) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct ifnet *ifp; txq = xtxq; sc = txq->vxtxq_sc; ifp = sc->vmx_ifp; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx); VMXNET3_TXQ_LOCK(txq); vmxnet3_txq_eof(txq); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) vmxnet3_start_locked(ifp); VMXNET3_TXQ_UNLOCK(txq); vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx); } static void vmxnet3_rxq_intr(void *xrxq) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; rxq = xrxq; sc = rxq->vxrxq_sc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx); VMXNET3_RXQ_LOCK(rxq); vmxnet3_rxq_eof(rxq); VMXNET3_RXQ_UNLOCK(rxq); vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx); } static void vmxnet3_event_intr(void *xsc) { struct vmxnet3_softc *sc; sc = xsc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx); if (sc->vmx_ds->event != 0) vmxnet3_evintr(sc); vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx); } static void vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq) { struct vmxnet3_txring *txr; struct vmxnet3_txbuf *txb; int i; txr = &txq->vxtxq_cmd_ring; for (i = 0; i < txr->vxtxr_ndesc; i++) { txb = &txr->vxtxr_txbuf[i]; if (txb->vtxb_m == NULL) continue; bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap); m_freem(txb->vtxb_m); txb->vtxb_m = NULL; } } static void vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) { struct vmxnet3_rxring *rxr; struct vmxnet3_rxbuf *rxb; int i, j; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; for (j = 0; j < rxr->vxrxr_ndesc; j++) { rxb = &rxr->vxrxr_rxbuf[j]; if (rxb->vrxb_m == NULL) continue; bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap); m_freem(rxb->vrxb_m); rxb->vrxb_m = NULL; } } } static void vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_txqueue *txq; int i; for (i = 0; i < sc->vmx_nrxqueues; i++) { rxq = &sc->vmx_rxq[i]; VMXNET3_RXQ_LOCK(rxq); VMXNET3_RXQ_UNLOCK(rxq); } for (i = 0; i < sc->vmx_ntxqueues; i++) { txq = &sc->vmx_txq[i]; VMXNET3_TXQ_LOCK(txq); VMXNET3_TXQ_UNLOCK(txq); } } static void vmxnet3_stop(struct vmxnet3_softc *sc) { struct ifnet *ifp; int q; ifp = sc->vmx_ifp; VMXNET3_CORE_LOCK_ASSERT(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sc->vmx_link_active = 0; callout_stop(&sc->vmx_tick); /* Disable interrupts. */ vmxnet3_disable_all_intrs(sc); vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE); vmxnet3_stop_rendezvous(sc); for (q = 0; q < sc->vmx_ntxqueues; q++) vmxnet3_txstop(sc, &sc->vmx_txq[q]); for (q = 0; q < sc->vmx_nrxqueues; q++) vmxnet3_rxstop(sc, &sc->vmx_rxq[q]); vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET); } static void vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq) { struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; txr = &txq->vxtxq_cmd_ring; txr->vxtxr_head = 0; txr->vxtxr_next = 0; txr->vxtxr_gen = VMXNET3_INIT_GEN; bzero(txr->vxtxr_txd, txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc)); txc = &txq->vxtxq_comp_ring; txc->vxcr_next = 0; txc->vxcr_gen = VMXNET3_INIT_GEN; bzero(txc->vxcr_u.txcd, txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc)); } static int vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) { struct ifnet *ifp; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; int i, populate, idx, frame_size, error; ifp = sc->vmx_ifp; frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) + ifp->if_mtu; /* * If the MTU causes us to exceed what a regular sized cluster can * handle, we allocate a second MJUMPAGESIZE cluster after it in * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters. * * Keep rx_max_chain a divisor of the maximum Rx ring size to make * our life easier. We do not support changing the ring size after * the attach. */ if (frame_size <= MCLBYTES) sc->vmx_rx_max_chain = 1; else sc->vmx_rx_max_chain = 2; /* * Only populate ring 1 if the configuration will take advantage * of it. That is either when LRO is enabled or the frame size * exceeds what ring 0 can contain. */ if ((ifp->if_capenable & IFCAP_LRO) == 0 && frame_size <= MCLBYTES + MJUMPAGESIZE) populate = 1; else populate = VMXNET3_RXRINGS_PERQ; for (i = 0; i < populate; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_fill = 0; rxr->vxrxr_gen = VMXNET3_INIT_GEN; bzero(rxr->vxrxr_rxd, rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc)); for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) { error = vmxnet3_newbuf(sc, rxr); if (error) return (error); } } for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_fill = 0; rxr->vxrxr_gen = 0; bzero(rxr->vxrxr_rxd, rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc)); } rxc = &rxq->vxrxq_comp_ring; rxc->vxcr_next = 0; rxc->vxcr_gen = VMXNET3_INIT_GEN; bzero(rxc->vxcr_u.rxcd, rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc)); return (0); } static int vmxnet3_reinit_queues(struct vmxnet3_softc *sc) { device_t dev; int q, error; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_ntxqueues; q++) vmxnet3_txinit(sc, &sc->vmx_txq[q]); for (q = 0; q < sc->vmx_nrxqueues; q++) { error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]); if (error) { device_printf(dev, "cannot populate Rx queue %d\n", q); return (error); } } return (0); } static int vmxnet3_enable_device(struct vmxnet3_softc *sc) { int q; if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) { device_printf(sc->vmx_dev, "device enable command failed!\n"); return (1); } /* Reset the Rx queue heads. */ for (q = 0; q < sc->vmx_nrxqueues; q++) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0); vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0); } return (0); } static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc) { struct ifnet *ifp; ifp = sc->vmx_ifp; vmxnet3_set_rxfilter(sc); if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); else bzero(sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER); } static int vmxnet3_reinit(struct vmxnet3_softc *sc) { vmxnet3_reinit_interface(sc); vmxnet3_reinit_shared_data(sc); if (vmxnet3_reinit_queues(sc) != 0) return (ENXIO); if (vmxnet3_enable_device(sc) != 0) return (ENXIO); vmxnet3_reinit_rxfilters(sc); return (0); } static void vmxnet3_init_locked(struct vmxnet3_softc *sc) { struct ifnet *ifp; ifp = sc->vmx_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; vmxnet3_stop(sc); if (vmxnet3_reinit(sc) != 0) { vmxnet3_stop(sc); return; } ifp->if_drv_flags |= IFF_DRV_RUNNING; vmxnet3_link_status(sc); vmxnet3_enable_all_intrs(sc); callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc); } static void vmxnet3_init(void *xsc) { struct vmxnet3_softc *sc; sc = xsc; VMXNET3_CORE_LOCK(sc); vmxnet3_init_locked(sc); VMXNET3_CORE_UNLOCK(sc); } /* * BMV: Much of this can go away once we finally have offsets in * the mbuf packet header. Bug andre@. */ static int vmxnet3_txq_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start) { struct ether_vlan_header *evh; int offset; evh = mtod(m, struct ether_vlan_header *); if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { /* BMV: We should handle nested VLAN tags too. */ *etype = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else { *etype = ntohs(evh->evl_encap_proto); offset = sizeof(struct ether_header); } switch (*etype) { #if defined(INET) case ETHERTYPE_IP: { struct ip *ip, iphdr; if (__predict_false(m->m_len < offset + sizeof(struct ip))) { m_copydata(m, offset, sizeof(struct ip), (caddr_t) &iphdr); ip = &iphdr; } else ip = (struct ip *)(m->m_data + offset); *proto = ip->ip_p; *start = offset + (ip->ip_hl << 2); break; } #endif #if defined(INET6) case ETHERTYPE_IPV6: *proto = -1; *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); /* Assert the network stack sent us a valid packet. */ KASSERT(*start > offset, ("%s: mbuf %p start %d offset %d proto %d", __func__, m, *start, offset, *proto)); break; #endif default: return (EINVAL); } if (m->m_pkthdr.csum_flags & CSUM_TSO) { struct tcphdr *tcp, tcphdr; if (__predict_false(*proto != IPPROTO_TCP)) { /* Likely failed to correctly parse the mbuf. */ return (EINVAL); } if (m->m_len < *start + sizeof(struct tcphdr)) { m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); tcp = &tcphdr; } else tcp = (struct tcphdr *)(m->m_data + *start); /* * For TSO, the size of the protocol header is also * included in the descriptor header size. */ *start += (tcp->th_off << 2); } return (0); } static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0, bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs) { struct vmxnet3_txring *txr; struct mbuf *m; bus_dma_tag_t tag; int maxsegs, error; txr = &txq->vxtxq_cmd_ring; m = *m0; tag = txr->vxtxr_txtag; maxsegs = VMXNET3_TX_MAXSEGS; error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0); if (error == 0 || error != EFBIG) return (error); m = m_collapse(m, M_NOWAIT, maxsegs); if (m != NULL) { *m0 = m; error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0); } else error = ENOBUFS; if (error) { m_freem(*m0); *m0 = NULL; } else txq->vxtxq_sc->vmx_stats.vmst_collapsed++; return (error); } static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap) { struct vmxnet3_txring *txr; txr = &txq->vxtxq_cmd_ring; bus_dmamap_unload(txr->vxtxr_txtag, dmap); } static int vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0) { struct vmxnet3_softc *sc; struct ifnet *ifp; struct vmxnet3_txring *txr; struct vmxnet3_txdesc *txd, *sop; struct mbuf *m; bus_dmamap_t dmap; bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS]; int i, gen, nsegs, etype, proto, start, error; sc = txq->vxtxq_sc; ifp = sc->vmx_ifp; start = 0; txd = NULL; txr = &txq->vxtxq_cmd_ring; dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap; error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs); if (error) return (error); m = *m0; M_ASSERTPKTHDR(m); KASSERT(nsegs <= VMXNET3_TX_MAXSEGS, ("%s: mbuf %p with too many segments %d", __func__, m, nsegs)); if (VMXNET3_TXRING_AVAIL(txr) < nsegs) { txq->vxtxq_stats.vtxrs_full++; vmxnet3_txq_unload_mbuf(txq, dmap); return (ENOSPC); } else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) { error = vmxnet3_txq_offload_ctx(m, &etype, &proto, &start); if (error) { txq->vxtxq_stats.vtxrs_offload_failed++; vmxnet3_txq_unload_mbuf(txq, dmap); m_freem(m); *m0 = NULL; return (error); } } txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m = *m0; sop = &txr->vxtxr_txd[txr->vxtxr_head]; gen = txr->vxtxr_gen ^ 1; /* Owned by cpu (yet) */ for (i = 0; i < nsegs; i++) { txd = &txr->vxtxr_txd[txr->vxtxr_head]; txd->addr = segs[i].ds_addr; txd->len = segs[i].ds_len; txd->gen = gen; txd->dtype = 0; txd->offload_mode = VMXNET3_OM_NONE; txd->offload_pos = 0; txd->hlen = 0; txd->eop = 0; txd->compreq = 0; txd->vtag_mode = 0; txd->vtag = 0; if (++txr->vxtxr_head == txr->vxtxr_ndesc) { txr->vxtxr_head = 0; txr->vxtxr_gen ^= 1; } gen = txr->vxtxr_gen; } txd->eop = 1; txd->compreq = 1; if (m->m_flags & M_VLANTAG) { sop->vtag_mode = 1; sop->vtag = m->m_pkthdr.ether_vtag; } if (m->m_pkthdr.csum_flags & CSUM_TSO) { sop->offload_mode = VMXNET3_OM_TSO; sop->hlen = start; sop->offload_pos = m->m_pkthdr.tso_segsz; } else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6)) { sop->offload_mode = VMXNET3_OM_CSUM; sop->hlen = start; sop->offload_pos = start + m->m_pkthdr.csum_data; } /* Finally, change the ownership. */ vmxnet3_barrier(sc, VMXNET3_BARRIER_WR); sop->gen ^= 1; if (++txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) { txq->vxtxq_ts->npending = 0; vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), txr->vxtxr_head); } return (0); } static void vmxnet3_start_locked(struct ifnet *ifp) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct mbuf *m_head; int tx, avail; sc = ifp->if_softc; txq = &sc->vmx_txq[0]; txr = &txq->vxtxq_cmd_ring; tx = 0; VMXNET3_TXQ_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->vmx_link_active == 0) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2) break; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* Assume worse case if this mbuf is the head of a chain. */ if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) { IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } if (vmxnet3_txq_encap(txq, &m_head) != 0) { if (m_head != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } tx++; ETHER_BPF_MTAP(ifp, m_head); } if (tx > 0) { if (txq->vxtxq_ts->npending > 0) { txq->vxtxq_ts->npending = 0; vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), txr->vxtxr_head); } txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT; } } static void vmxnet3_start(struct ifnet *ifp) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; sc = ifp->if_softc; txq = &sc->vmx_txq[0]; VMXNET3_TXQ_LOCK(txq); vmxnet3_start_locked(ifp); VMXNET3_TXQ_UNLOCK(txq); } static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag) { struct ifnet *ifp; int idx, bit; ifp = sc->vmx_ifp; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; if (tag == 0 || tag > 4095) return; VMXNET3_CORE_LOCK(sc); /* Update our private VLAN bitvector. */ if (add) sc->vmx_vlan_filter[idx] |= (1 << bit); else sc->vmx_vlan_filter[idx] &= ~(1 << bit); if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { if (add) sc->vmx_ds->vlan_filter[idx] |= (1 << bit); else sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit); vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER); } VMXNET3_CORE_UNLOCK(sc); } static void vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc == arg) vmxnet3_update_vlan_filter(arg, 1, tag); } static void vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc == arg) vmxnet3_update_vlan_filter(arg, 0, tag); } static void vmxnet3_set_rxfilter(struct vmxnet3_softc *sc) { struct ifnet *ifp; struct vmxnet3_driver_shared *ds; struct ifmultiaddr *ifma; u_int mode; ifp = sc->vmx_ifp; ds = sc->vmx_ds; mode = VMXNET3_RXMODE_UCAST; if (ifp->if_flags & IFF_BROADCAST) mode |= VMXNET3_RXMODE_BCAST; if (ifp->if_flags & IFF_PROMISC) mode |= VMXNET3_RXMODE_PROMISC; if (ifp->if_flags & IFF_ALLMULTI) mode |= VMXNET3_RXMODE_ALLMULTI; else { int cnt = 0, overflow = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; else if (cnt == VMXNET3_MULTICAST_MAX) { overflow = 1; break; } bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN); cnt++; } if_maddr_runlock(ifp); if (overflow != 0) { cnt = 0; mode |= VMXNET3_RXMODE_ALLMULTI; } else if (cnt > 0) mode |= VMXNET3_RXMODE_MCAST; ds->mcast_tablelen = cnt * ETHER_ADDR_LEN; } ds->rxmode = mode; vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER); vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE); } static int vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu) { struct ifnet *ifp; ifp = sc->vmx_ifp; if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU) return (EINVAL); ifp->if_mtu = mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vmxnet3_init_locked(sc); } return (0); } static int vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct vmxnet3_softc *sc; struct ifreq *ifr; int reinit, mask, error; sc = ifp->if_softc; ifr = (struct ifreq *) data; error = 0; switch (cmd) { case SIOCSIFMTU: if (ifp->if_mtu != ifr->ifr_mtu) { VMXNET3_CORE_LOCK(sc); error = vmxnet3_change_mtu(sc, ifr->ifr_mtu); VMXNET3_CORE_UNLOCK(sc); } break; case SIOCSIFFLAGS: VMXNET3_CORE_LOCK(sc); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ sc->vmx_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { vmxnet3_set_rxfilter(sc); } } else vmxnet3_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vmxnet3_stop(sc); } sc->vmx_if_flags = ifp->if_flags; VMXNET3_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: VMXNET3_CORE_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) vmxnet3_set_rxfilter(sc); VMXNET3_CORE_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd); break; case SIOCSIFCAP: VMXNET3_CORE_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) ifp->if_capenable ^= IFCAP_TXCSUM; if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) { /* Changing these features requires us to reinit. */ reinit = 1; if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; } else reinit = 0; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vmxnet3_init_locked(sc); } VMXNET3_CORE_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, cmd, data); break; } VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc); return (error); } static int vmxnet3_watchdog(struct vmxnet3_txqueue *txq) { struct vmxnet3_softc *sc; sc = txq->vxtxq_sc; VMXNET3_TXQ_LOCK(txq); if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) { VMXNET3_TXQ_UNLOCK(txq); return (0); } VMXNET3_TXQ_UNLOCK(txq); if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n", txq->vxtxq_id); return (1); } static void vmxnet3_refresh_stats(struct vmxnet3_softc *sc) { vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS); } static void vmxnet3_tick(void *xsc) { struct vmxnet3_softc *sc; struct ifnet *ifp; int i, timedout; sc = xsc; ifp = sc->vmx_ifp; timedout = 0; VMXNET3_CORE_LOCK_ASSERT(sc); vmxnet3_refresh_stats(sc); for (i = 0; i < sc->vmx_ntxqueues; i++) timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]); if (timedout != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vmxnet3_init_locked(sc); } else callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc); } static int vmxnet3_link_is_up(struct vmxnet3_softc *sc) { uint32_t status; /* Also update the link speed while here. */ status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK); sc->vmx_link_speed = status >> 16; return !!(status & 0x1); } static void vmxnet3_link_status(struct vmxnet3_softc *sc) { struct ifnet *ifp; int link; ifp = sc->vmx_ifp; link = vmxnet3_link_is_up(sc); if (link != 0 && sc->vmx_link_active == 0) { sc->vmx_link_active = 1; if_link_state_change(ifp, LINK_STATE_UP); } else if (link == 0 && sc->vmx_link_active != 0) { sc->vmx_link_active = 0; if_link_state_change(ifp, LINK_STATE_DOWN); } } static void vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vmxnet3_softc *sc; sc = ifp->if_softc; ifmr->ifm_active = IFM_ETHER | IFM_AUTO; ifmr->ifm_status = IFM_AVALID; VMXNET3_CORE_LOCK(sc); if (vmxnet3_link_is_up(sc) != 0) ifmr->ifm_status |= IFM_ACTIVE; else ifmr->ifm_status |= IFM_NONE; VMXNET3_CORE_UNLOCK(sc); } static int vmxnet3_media_change(struct ifnet *ifp) { /* Ignore. */ return (0); } static void vmxnet3_set_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = sc->vmx_lladdr[0]; ml |= sc->vmx_lladdr[1] << 8; ml |= sc->vmx_lladdr[2] << 16; ml |= sc->vmx_lladdr[3] << 24; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml); mh = sc->vmx_lladdr[4]; mh |= sc->vmx_lladdr[5] << 8; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh); } static void vmxnet3_get_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL); mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH); sc->vmx_lladdr[0] = ml; sc->vmx_lladdr[1] = ml >> 8; sc->vmx_lladdr[2] = ml >> 16; sc->vmx_lladdr[3] = ml >> 24; sc->vmx_lladdr[4] = mh; sc->vmx_lladdr[5] = mh >> 8; } static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *txsnode; struct sysctl_oid_list *list, *txslist; struct vmxnet3_txq_stats *stats; struct UPT1_TxStats *txstats; char namebuf[16]; stats = &txq->vxtxq_stats; txstats = &txq->vxtxq_ts->stats; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Transmit Queue"); txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD, &stats->vtxrs_full, "Tx ring full"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD, &stats->vtxrs_offload_failed, "Tx checksum offload failed"); /* * Add statistics reported by the host. These are updated once * per second. */ txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD, NULL, "Host Statistics"); txslist = SYSCTL_CHILDREN(txsnode); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD, &txstats->TSO_packets, "TSO packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD, &txstats->TSO_bytes, "TSO bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &txstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &txstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &txstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &txstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD, &txstats->error, "Errors"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD, &txstats->discard, "Discards"); } static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *rxsnode; struct sysctl_oid_list *list, *rxslist; struct vmxnet3_rxq_stats *stats; struct UPT1_RxStats *rxstats; char namebuf[16]; stats = &rxq->vxrxq_stats; rxstats = &rxq->vxrxq_rs->stats; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Receive Queue"); rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node); /* * Add statistics reported by the host. These are updated once * per second. */ rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD, NULL, "Host Statistics"); rxslist = SYSCTL_CHILDREN(rxsnode); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD, &rxstats->LRO_packets, "LRO packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD, &rxstats->LRO_bytes, "LRO bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &rxstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &rxstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &rxstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &rxstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD, &rxstats->bcast_packets, "Broadcast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD, &rxstats->bcast_bytes, "Broadcast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD, &rxstats->nobuffer, "No buffer"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD, &rxstats->error, "Errors"); } #ifdef VMXNET3_DEBUG_SYSCTL static void vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node; struct sysctl_oid_list *list; int i; for (i = 0; i < sc->vmx_ntxqueues; i++) { struct vmxnet3_txqueue *txq = &sc->vmx_txq[i]; node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_head, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_gen, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_gen, 0, ""); } for (i = 0; i < sc->vmx_nrxqueues; i++) { struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i]; node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_gen, 0, ""); } } #endif static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { int i; for (i = 0; i < sc->vmx_ntxqueues; i++) vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child); for (i = 0; i < sc->vmx_nrxqueues; i++) vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child); #ifdef VMXNET3_DEBUG_SYSCTL vmxnet3_setup_debug_sysctl(sc, ctx, child); #endif } static void vmxnet3_setup_sysctl(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_statistics *stats; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = sc->vmx_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD, &sc->vmx_ntxqueues, 0, "Number of Tx queues"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD, &sc->vmx_nrxqueues, 0, "Number of Rx queues"); stats = &sc->vmx_stats; SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "collapsed", CTLFLAG_RD, &stats->vmst_collapsed, 0, "Tx mbuf chains collapsed"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD, &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD, &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed"); vmxnet3_setup_queue_sysctl(sc, ctx, child); } static void vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v); } static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r) { return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r)); } static void vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v); } static void vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd); } static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_cmd(sc, cmd); bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD)); } static void vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0); } static void vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1); } static void vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc) { int i; sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < sc->vmx_nintrs; i++) vmxnet3_enable_intr(sc, i); } static void vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc) { int i; sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < sc->vmx_nintrs; i++) vmxnet3_disable_intr(sc, i); } static void vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *baddr = arg; if (error == 0) *baddr = segs->ds_addr; } static int vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align, struct vmxnet3_dma_alloc *dma) { device_t dev; int error; dev = sc->vmx_dev; bzero(dma, sizeof(struct vmxnet3_dma_alloc)); error = bus_dma_tag_create(bus_get_dma_tag(dev), align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &dma->dma_tag); if (error) { device_printf(dev, "bus_dma_tag_create failed: %d\n", error); goto fail; } error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map); if (error) { device_printf(dev, "bus_dmamem_alloc failed: %d\n", error); goto fail; } error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "bus_dmamap_load failed: %d\n", error); goto fail; } dma->dma_size = size; fail: if (error) vmxnet3_dma_free(sc, dma); return (error); } static void vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma) { if (dma->dma_tag != NULL) { if (dma->dma_map != NULL) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); } if (dma->dma_vaddr != NULL) { bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); } bus_dma_tag_destroy(dma->dma_tag); } bzero(dma, sizeof(struct vmxnet3_dma_alloc)); } static int vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def) { char path[64]; snprintf(path, sizeof(path), "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob); TUNABLE_INT_FETCH(path, &def); return (def); } /* * Since this is a purely paravirtualized device, we do not have * to worry about DMA coherency. But at times, we must make sure * both the compiler and CPU do not reorder memory operations. */ static inline void vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type) { switch (type) { case VMXNET3_BARRIER_RD: rmb(); break; case VMXNET3_BARRIER_WR: wmb(); break; case VMXNET3_BARRIER_RDWR: mb(); break; default: panic("%s: bad barrier type %d", __func__, type); } } Index: head/sys/dev/vxge/vxge.c =================================================================== --- head/sys/dev/vxge/vxge.c (revision 263101) +++ head/sys/dev/vxge/vxge.c (revision 263102) @@ -1,4201 +1,4201 @@ /*- * Copyright(c) 2002-2011 Exar Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification are permitted provided the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Exar Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ #include static int vxge_pci_bd_no = -1; static u32 vxge_drv_copyright = 0; static u32 vxge_dev_ref_count = 0; static u32 vxge_dev_req_reboot = 0; static int vpath_selector[VXGE_HAL_MAX_VIRTUAL_PATHS] = \ {0, 1, 3, 3, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15, 31}; /* * vxge_probe * Probes for x3100 devices */ int vxge_probe(device_t ndev) { int err = ENXIO; u16 pci_bd_no = 0; u16 pci_vendor_id = 0; u16 pci_device_id = 0; char adapter_name[64]; pci_vendor_id = pci_get_vendor(ndev); if (pci_vendor_id != VXGE_PCI_VENDOR_ID) goto _exit0; pci_device_id = pci_get_device(ndev); if (pci_device_id == VXGE_PCI_DEVICE_ID_TITAN_1) { pci_bd_no = (pci_get_bus(ndev) | pci_get_slot(ndev)); snprintf(adapter_name, sizeof(adapter_name), VXGE_ADAPTER_NAME, pci_get_revid(ndev)); device_set_desc_copy(ndev, adapter_name); if (!vxge_drv_copyright) { device_printf(ndev, VXGE_COPYRIGHT); vxge_drv_copyright = 1; } if (vxge_dev_req_reboot == 0) { vxge_pci_bd_no = pci_bd_no; err = BUS_PROBE_DEFAULT; } else { if (pci_bd_no != vxge_pci_bd_no) { vxge_pci_bd_no = pci_bd_no; err = BUS_PROBE_DEFAULT; } } } _exit0: return (err); } /* * vxge_attach * Connects driver to the system if probe was success @ndev handle */ int vxge_attach(device_t ndev) { int err = 0; vxge_dev_t *vdev; vxge_hal_device_t *hldev = NULL; vxge_hal_device_attr_t device_attr; vxge_free_resources_e error_level = VXGE_FREE_NONE; vxge_hal_status_e status = VXGE_HAL_OK; /* Get per-ndev buffer */ vdev = (vxge_dev_t *) device_get_softc(ndev); if (!vdev) goto _exit0; bzero(vdev, sizeof(vxge_dev_t)); vdev->ndev = ndev; strlcpy(vdev->ndev_name, "vxge", sizeof(vdev->ndev_name)); err = vxge_driver_config(vdev); if (err != 0) goto _exit0; /* Initialize HAL driver */ status = vxge_driver_init(vdev); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "Failed to initialize driver\n"); goto _exit0; } /* Enable PCI bus-master */ pci_enable_busmaster(ndev); /* Allocate resources */ err = vxge_alloc_resources(vdev); if (err != 0) { device_printf(vdev->ndev, "resource allocation failed\n"); goto _exit0; } err = vxge_device_hw_info_get(vdev); if (err != 0) { error_level = VXGE_FREE_BAR2; goto _exit0; } /* Get firmware default values for Device Configuration */ vxge_hal_device_config_default_get(vdev->device_config); /* Customize Device Configuration based on User request */ vxge_vpath_config(vdev); /* Allocate ISR resources */ err = vxge_alloc_isr_resources(vdev); if (err != 0) { error_level = VXGE_FREE_ISR_RESOURCE; device_printf(vdev->ndev, "isr resource allocation failed\n"); goto _exit0; } /* HAL attributes */ device_attr.bar0 = (u8 *) vdev->pdev->bar_info[0]; device_attr.bar1 = (u8 *) vdev->pdev->bar_info[1]; device_attr.bar2 = (u8 *) vdev->pdev->bar_info[2]; device_attr.regh0 = (vxge_bus_res_t *) vdev->pdev->reg_map[0]; device_attr.regh1 = (vxge_bus_res_t *) vdev->pdev->reg_map[1]; device_attr.regh2 = (vxge_bus_res_t *) vdev->pdev->reg_map[2]; device_attr.irqh = (pci_irq_h) vdev->config.isr_info[0].irq_handle; device_attr.cfgh = vdev->pdev; device_attr.pdev = vdev->pdev; /* Initialize HAL Device */ status = vxge_hal_device_initialize((vxge_hal_device_h *) &hldev, &device_attr, vdev->device_config); if (status != VXGE_HAL_OK) { error_level = VXGE_FREE_ISR_RESOURCE; device_printf(vdev->ndev, "hal device initialization failed\n"); goto _exit0; } vdev->devh = hldev; vxge_hal_device_private_set(hldev, vdev); if (vdev->is_privilaged) { err = vxge_firmware_verify(vdev); if (err != 0) { vxge_dev_req_reboot = 1; error_level = VXGE_FREE_TERMINATE_DEVICE; goto _exit0; } } /* Allocate memory for vpath */ vdev->vpaths = (vxge_vpath_t *) vxge_mem_alloc(vdev->no_of_vpath * sizeof(vxge_vpath_t)); if (vdev->vpaths == NULL) { error_level = VXGE_FREE_TERMINATE_DEVICE; device_printf(vdev->ndev, "vpath memory allocation failed\n"); goto _exit0; } vdev->no_of_func = 1; if (vdev->is_privilaged) { vxge_hal_func_mode_count(vdev->devh, vdev->config.hw_info.function_mode, &vdev->no_of_func); vxge_bw_priority_config(vdev); } /* Initialize mutexes */ vxge_mutex_init(vdev); /* Initialize Media */ vxge_media_init(vdev); err = vxge_ifp_setup(ndev); if (err != 0) { error_level = VXGE_FREE_MEDIA; device_printf(vdev->ndev, "setting up interface failed\n"); goto _exit0; } err = vxge_isr_setup(vdev); if (err != 0) { error_level = VXGE_FREE_INTERFACE; device_printf(vdev->ndev, "failed to associate interrupt handler with device\n"); goto _exit0; } vxge_device_hw_info_print(vdev); vdev->is_active = TRUE; _exit0: if (error_level) { vxge_free_resources(ndev, error_level); err = ENXIO; } return (err); } /* * vxge_detach * Detaches driver from the Kernel subsystem */ int vxge_detach(device_t ndev) { vxge_dev_t *vdev; vdev = (vxge_dev_t *) device_get_softc(ndev); if (vdev->is_active) { vdev->is_active = FALSE; vxge_stop(vdev); vxge_free_resources(ndev, VXGE_FREE_ALL); } return (0); } /* * vxge_shutdown * To shutdown device before system shutdown */ int vxge_shutdown(device_t ndev) { vxge_dev_t *vdev = (vxge_dev_t *) device_get_softc(ndev); vxge_stop(vdev); return (0); } /* * vxge_init * Initialize the interface */ void vxge_init(void *vdev_ptr) { vxge_dev_t *vdev = (vxge_dev_t *) vdev_ptr; VXGE_DRV_LOCK(vdev); vxge_init_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_init_locked * Initialize the interface */ void vxge_init_locked(vxge_dev_t *vdev) { int i, err = EINVAL; vxge_hal_device_t *hldev = vdev->devh; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_vpath_h vpath_handle; ifnet_t ifp = vdev->ifp; /* If device is in running state, initializing is not required */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) goto _exit0; VXGE_DRV_LOCK_ASSERT(vdev); /* Opening vpaths */ err = vxge_vpath_open(vdev); if (err != 0) goto _exit1; if (vdev->config.rth_enable) { status = vxge_rth_config(vdev); if (status != VXGE_HAL_OK) goto _exit1; } for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; /* check initial mtu before enabling the device */ status = vxge_hal_device_mtu_check(vpath_handle, ifp->if_mtu); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, - "invalid mtu size %ld specified\n", ifp->if_mtu); + "invalid mtu size %u specified\n", ifp->if_mtu); goto _exit1; } status = vxge_hal_vpath_mtu_set(vpath_handle, ifp->if_mtu); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "setting mtu in device failed\n"); goto _exit1; } } /* Enable HAL device */ status = vxge_hal_device_enable(hldev); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to enable device\n"); goto _exit1; } if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) vxge_msix_enable(vdev); /* Checksum capability */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; /* Enabling mcast for all vpath */ vxge_hal_vpath_mcast_enable(vpath_handle); /* Enabling bcast for all vpath */ status = vxge_hal_vpath_bcast_enable(vpath_handle); if (status != VXGE_HAL_OK) device_printf(vdev->ndev, "can't enable bcast on vpath (%d)\n", i); } /* Enable interrupts */ vxge_hal_device_intr_enable(vdev->devh); for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; bzero(&(vdev->vpaths[i].driver_stats), sizeof(vxge_drv_stats_t)); status = vxge_hal_vpath_enable(vpath_handle); if (status != VXGE_HAL_OK) goto _exit2; } vxge_os_mdelay(1000); /* Device is initialized */ vdev->is_initialized = TRUE; /* Now inform the stack we're ready */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_RUNNING; goto _exit0; _exit2: vxge_hal_device_intr_disable(vdev->devh); vxge_hal_device_disable(hldev); _exit1: vxge_vpath_close(vdev); _exit0: return; } /* * vxge_driver_init * Initializes HAL driver */ vxge_hal_status_e vxge_driver_init(vxge_dev_t *vdev) { vxge_hal_uld_cbs_t uld_callbacks; vxge_hal_driver_config_t driver_config; vxge_hal_status_e status = VXGE_HAL_OK; /* Initialize HAL driver */ if (!vxge_dev_ref_count) { bzero(&uld_callbacks, sizeof(vxge_hal_uld_cbs_t)); bzero(&driver_config, sizeof(vxge_hal_driver_config_t)); uld_callbacks.link_up = vxge_link_up; uld_callbacks.link_down = vxge_link_down; uld_callbacks.crit_err = vxge_crit_error; uld_callbacks.sched_timer = NULL; uld_callbacks.xpak_alarm_log = NULL; status = vxge_hal_driver_initialize(&driver_config, &uld_callbacks); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to initialize driver\n"); goto _exit0; } } vxge_hal_driver_debug_set(VXGE_TRACE); vxge_dev_ref_count++; _exit0: return (status); } /* * vxge_driver_config */ int vxge_driver_config(vxge_dev_t *vdev) { int i, err = 0; char temp_buffer[30]; vxge_bw_info_t bw_info; VXGE_GET_PARAM("hint.vxge.0.no_of_vpath", vdev->config, no_of_vpath, VXGE_DEFAULT_USER_HARDCODED); if (vdev->config.no_of_vpath == VXGE_DEFAULT_USER_HARDCODED) vdev->config.no_of_vpath = mp_ncpus; if (vdev->config.no_of_vpath <= 0) { err = EINVAL; device_printf(vdev->ndev, "Failed to load driver, \ invalid config : \'no_of_vpath\'\n"); goto _exit0; } VXGE_GET_PARAM("hint.vxge.0.intr_coalesce", vdev->config, intr_coalesce, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.rth_enable", vdev->config, rth_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.rth_bkt_sz", vdev->config, rth_bkt_sz, VXGE_DEFAULT_RTH_BUCKET_SIZE); VXGE_GET_PARAM("hint.vxge.0.lro_enable", vdev->config, lro_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.tso_enable", vdev->config, tso_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.tx_steering", vdev->config, tx_steering, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.msix_enable", vdev->config, intr_mode, VXGE_HAL_INTR_MODE_MSIX); VXGE_GET_PARAM("hint.vxge.0.ifqmaxlen", vdev->config, ifq_maxlen, VXGE_DEFAULT_CONFIG_IFQ_MAXLEN); VXGE_GET_PARAM("hint.vxge.0.port_mode", vdev->config, port_mode, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.port_mode == VXGE_DEFAULT_USER_HARDCODED) vdev->config.port_mode = VXGE_DEFAULT_CONFIG_VALUE; VXGE_GET_PARAM("hint.vxge.0.l2_switch", vdev->config, l2_switch, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.l2_switch == VXGE_DEFAULT_USER_HARDCODED) vdev->config.l2_switch = VXGE_DEFAULT_CONFIG_VALUE; VXGE_GET_PARAM("hint.vxge.0.fw_upgrade", vdev->config, fw_option, VXGE_FW_UPGRADE_ALL); VXGE_GET_PARAM("hint.vxge.0.low_latency", vdev->config, low_latency, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.func_mode", vdev->config, function_mode, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.function_mode == VXGE_DEFAULT_USER_HARDCODED) vdev->config.function_mode = VXGE_DEFAULT_CONFIG_VALUE; if (!(is_multi_func(vdev->config.function_mode) || is_single_func(vdev->config.function_mode))) vdev->config.function_mode = VXGE_DEFAULT_CONFIG_VALUE; for (i = 0; i < VXGE_HAL_MAX_FUNCTIONS; i++) { bw_info.func_id = i; sprintf(temp_buffer, "hint.vxge.0.bandwidth_%d", i); VXGE_GET_PARAM(temp_buffer, bw_info, bandwidth, VXGE_DEFAULT_USER_HARDCODED); if (bw_info.bandwidth == VXGE_DEFAULT_USER_HARDCODED) bw_info.bandwidth = VXGE_HAL_VPATH_BW_LIMIT_DEFAULT; sprintf(temp_buffer, "hint.vxge.0.priority_%d", i); VXGE_GET_PARAM(temp_buffer, bw_info, priority, VXGE_DEFAULT_USER_HARDCODED); if (bw_info.priority == VXGE_DEFAULT_USER_HARDCODED) bw_info.priority = VXGE_HAL_VPATH_PRIORITY_DEFAULT; vxge_os_memcpy(&vdev->config.bw_info[i], &bw_info, sizeof(vxge_bw_info_t)); } _exit0: return (err); } /* * vxge_stop */ void vxge_stop(vxge_dev_t *vdev) { VXGE_DRV_LOCK(vdev); vxge_stop_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_stop_locked * Common code for both stop and part of reset. * disables device, interrupts and closes vpaths handle */ void vxge_stop_locked(vxge_dev_t *vdev) { u64 adapter_status = 0; vxge_hal_status_e status; vxge_hal_device_t *hldev = vdev->devh; ifnet_t ifp = vdev->ifp; VXGE_DRV_LOCK_ASSERT(vdev); /* If device is not in "Running" state, return */ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; /* Set appropriate flags */ vdev->is_initialized = FALSE; hldev->link_state = VXGE_HAL_LINK_NONE; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); /* Disable interrupts */ vxge_hal_device_intr_disable(hldev); /* Disable HAL device */ status = vxge_hal_device_disable(hldev); if (status != VXGE_HAL_OK) { vxge_hal_device_status(hldev, &adapter_status); device_printf(vdev->ndev, "adapter status: 0x%llx\n", adapter_status); } /* reset vpaths */ vxge_vpath_reset(vdev); vxge_os_mdelay(1000); /* Close Vpaths */ vxge_vpath_close(vdev); } void vxge_send(ifnet_t ifp) { vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; vpath = &(vdev->vpaths[0]); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (VXGE_TX_TRYLOCK(vpath)) { vxge_send_locked(ifp, vpath); VXGE_TX_UNLOCK(vpath); } } } static inline void vxge_send_locked(ifnet_t ifp, vxge_vpath_t *vpath) { mbuf_t m_head = NULL; vxge_dev_t *vdev = vpath->vdev; VXGE_TX_LOCK_ASSERT(vpath); if ((!vdev->is_initialized) || ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING)) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (vxge_xmit(ifp, vpath, &m_head)) { if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); VXGE_DRV_STATS(vpath, tx_again); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } } #if __FreeBSD_version >= 800000 int vxge_mq_send(ifnet_t ifp, mbuf_t m_head) { int i = 0, err = 0; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; if (vdev->config.tx_steering) { i = vxge_vpath_get(vdev, m_head); } else if ((m_head->m_flags & M_FLOWID) != 0) { i = m_head->m_pkthdr.flowid % vdev->no_of_vpath; } vpath = &(vdev->vpaths[i]); if (VXGE_TX_TRYLOCK(vpath)) { err = vxge_mq_send_locked(ifp, vpath, m_head); VXGE_TX_UNLOCK(vpath); } else err = drbr_enqueue(ifp, vpath->br, m_head); return (err); } static inline int vxge_mq_send_locked(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t m_head) { int err = 0; mbuf_t next = NULL; vxge_dev_t *vdev = vpath->vdev; VXGE_TX_LOCK_ASSERT(vpath); if ((!vdev->is_initialized) || ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING)) { err = drbr_enqueue(ifp, vpath->br, m_head); goto _exit0; } if (m_head == NULL) { next = drbr_dequeue(ifp, vpath->br); } else if (drbr_needs_enqueue(ifp, vpath->br)) { if ((err = drbr_enqueue(ifp, vpath->br, m_head)) != 0) goto _exit0; next = drbr_dequeue(ifp, vpath->br); } else next = m_head; /* Process the queue */ while (next != NULL) { if ((err = vxge_xmit(ifp, vpath, &next)) != 0) { if (next == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; err = drbr_enqueue(ifp, vpath->br, next); VXGE_DRV_STATS(vpath, tx_again); break; } ifp->if_obytes += next->m_pkthdr.len; if (next->m_flags & M_MCAST) ifp->if_omcasts++; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; next = drbr_dequeue(ifp, vpath->br); } _exit0: return (err); } void vxge_mq_qflush(ifnet_t ifp) { int i; mbuf_t m_head; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); if (!vpath->handle) continue; VXGE_TX_LOCK(vpath); while ((m_head = buf_ring_dequeue_sc(vpath->br)) != NULL) vxge_free_packet(m_head); VXGE_TX_UNLOCK(vpath); } if_qflush(ifp); } #endif static inline int vxge_xmit(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t *m_headp) { int err, num_segs = 0; u32 txdl_avail, dma_index, tagged = 0; dma_addr_t dma_addr; bus_size_t dma_sizes; void *dtr_priv; vxge_txdl_priv_t *txdl_priv; vxge_hal_txdl_h txdlh; vxge_hal_status_e status; vxge_dev_t *vdev = vpath->vdev; VXGE_DRV_STATS(vpath, tx_xmit); txdl_avail = vxge_hal_fifo_free_txdl_count_get(vpath->handle); if (txdl_avail < VXGE_TX_LOW_THRESHOLD) { VXGE_DRV_STATS(vpath, tx_low_dtr_cnt); err = ENOBUFS; goto _exit0; } /* Reserve descriptors */ status = vxge_hal_fifo_txdl_reserve(vpath->handle, &txdlh, &dtr_priv); if (status != VXGE_HAL_OK) { VXGE_DRV_STATS(vpath, tx_reserve_failed); err = ENOBUFS; goto _exit0; } /* Update Tx private structure for this descriptor */ txdl_priv = (vxge_txdl_priv_t *) dtr_priv; /* * Map the packet for DMA. * Returns number of segments through num_segs. */ err = vxge_dma_mbuf_coalesce(vpath->dma_tag_tx, txdl_priv->dma_map, m_headp, txdl_priv->dma_buffers, &num_segs); if (vpath->driver_stats.tx_max_frags < num_segs) vpath->driver_stats.tx_max_frags = num_segs; if (err == ENOMEM) { VXGE_DRV_STATS(vpath, tx_no_dma_setup); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); goto _exit0; } else if (err != 0) { vxge_free_packet(*m_headp); VXGE_DRV_STATS(vpath, tx_no_dma_setup); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); goto _exit0; } txdl_priv->mbuf_pkt = *m_headp; /* Set VLAN tag in descriptor only if this packet has it */ if ((*m_headp)->m_flags & M_VLANTAG) vxge_hal_fifo_txdl_vlan_set(txdlh, (*m_headp)->m_pkthdr.ether_vtag); /* Set descriptor buffer for header and each fragment/segment */ for (dma_index = 0; dma_index < num_segs; dma_index++) { dma_sizes = txdl_priv->dma_buffers[dma_index].ds_len; dma_addr = htole64(txdl_priv->dma_buffers[dma_index].ds_addr); vxge_hal_fifo_txdl_buffer_set(vpath->handle, txdlh, dma_index, dma_addr, dma_sizes); } /* Pre-write Sync of mapping */ bus_dmamap_sync(vpath->dma_tag_tx, txdl_priv->dma_map, BUS_DMASYNC_PREWRITE); if ((*m_headp)->m_pkthdr.csum_flags & CSUM_TSO) { if ((*m_headp)->m_pkthdr.tso_segsz) { VXGE_DRV_STATS(vpath, tx_tso); vxge_hal_fifo_txdl_lso_set(txdlh, VXGE_HAL_FIFO_LSO_FRM_ENCAP_AUTO, (*m_headp)->m_pkthdr.tso_segsz); } } /* Checksum */ if (ifp->if_hwassist > 0) { vxge_hal_fifo_txdl_cksum_set_bits(txdlh, VXGE_HAL_FIFO_TXD_TX_CKO_IPV4_EN | VXGE_HAL_FIFO_TXD_TX_CKO_TCP_EN | VXGE_HAL_FIFO_TXD_TX_CKO_UDP_EN); } if ((vxge_hal_device_check_id(vdev->devh) == VXGE_HAL_CARD_TITAN_1A) && (vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 0))) tagged = 1; vxge_hal_fifo_txdl_post(vpath->handle, txdlh, tagged); VXGE_DRV_STATS(vpath, tx_posted); _exit0: return (err); } /* * vxge_tx_replenish * Allocate buffers and set them into descriptors for later use */ /* ARGSUSED */ vxge_hal_status_e vxge_tx_replenish(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, u32 dtr_index, void *userdata, vxge_hal_reopen_e reopen) { int err = 0; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_txdl_priv_t *txdl_priv = (vxge_txdl_priv_t *) dtr_priv; err = bus_dmamap_create(vpath->dma_tag_tx, BUS_DMA_NOWAIT, &txdl_priv->dma_map); return ((err == 0) ? VXGE_HAL_OK : VXGE_HAL_FAIL); } /* * vxge_tx_compl * If the interrupt is due to Tx completion, free the sent buffer */ vxge_hal_status_e vxge_tx_compl(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, vxge_hal_fifo_tcode_e t_code, void *userdata) { vxge_hal_status_e status = VXGE_HAL_OK; vxge_txdl_priv_t *txdl_priv; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_dev_t *vdev = vpath->vdev; ifnet_t ifp = vdev->ifp; VXGE_TX_LOCK(vpath); /* * For each completed descriptor * Get private structure, free buffer, do unmapping, and free descriptor */ do { VXGE_DRV_STATS(vpath, tx_compl); if (t_code != VXGE_HAL_FIFO_T_CODE_OK) { device_printf(vdev->ndev, "tx transfer code %d\n", t_code); ifp->if_oerrors++; VXGE_DRV_STATS(vpath, tx_tcode); vxge_hal_fifo_handle_tcode(vpath_handle, txdlh, t_code); } ifp->if_opackets++; txdl_priv = (vxge_txdl_priv_t *) dtr_priv; bus_dmamap_unload(vpath->dma_tag_tx, txdl_priv->dma_map); vxge_free_packet(txdl_priv->mbuf_pkt); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); } while (vxge_hal_fifo_txdl_next_completed(vpath_handle, &txdlh, &dtr_priv, &t_code) == VXGE_HAL_OK); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; VXGE_TX_UNLOCK(vpath); return (status); } /* ARGSUSED */ void vxge_tx_term(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, vxge_hal_txdl_state_e state, void *userdata, vxge_hal_reopen_e reopen) { vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_txdl_priv_t *txdl_priv = (vxge_txdl_priv_t *) dtr_priv; if (state != VXGE_HAL_TXDL_STATE_POSTED) return; if (txdl_priv != NULL) { bus_dmamap_sync(vpath->dma_tag_tx, txdl_priv->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(vpath->dma_tag_tx, txdl_priv->dma_map); bus_dmamap_destroy(vpath->dma_tag_tx, txdl_priv->dma_map); vxge_free_packet(txdl_priv->mbuf_pkt); } /* Free the descriptor */ vxge_hal_fifo_txdl_free(vpath->handle, txdlh); } /* * vxge_rx_replenish * Allocate buffers and set them into descriptors for later use */ /* ARGSUSED */ vxge_hal_status_e vxge_rx_replenish(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, u32 dtr_index, void *userdata, vxge_hal_reopen_e reopen) { int err = 0; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; /* Create DMA map for these descriptors */ err = bus_dmamap_create(vpath->dma_tag_rx, BUS_DMA_NOWAIT, &rxd_priv->dma_map); if (err == 0) { if (vxge_rx_rxd_1b_set(vpath, rxdh, dtr_priv)) { bus_dmamap_destroy(vpath->dma_tag_rx, rxd_priv->dma_map); status = VXGE_HAL_FAIL; } } return (status); } /* * vxge_rx_compl */ vxge_hal_status_e vxge_rx_compl(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, u8 t_code, void *userdata) { mbuf_t mbuf_up; vxge_rxd_priv_t *rxd_priv; vxge_hal_ring_rxd_info_t ext_info; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_dev_t *vdev = vpath->vdev; struct lro_entry *queued = NULL; struct lro_ctrl *lro = &vpath->lro; /* get the interface pointer */ ifnet_t ifp = vdev->ifp; do { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { vxge_hal_ring_rxd_post(vpath_handle, rxdh); status = VXGE_HAL_FAIL; break; } VXGE_DRV_STATS(vpath, rx_compl); rxd_priv = (vxge_rxd_priv_t *) dtr_priv; /* Gets details of mbuf i.e., packet length */ vxge_rx_rxd_1b_get(vpath, rxdh, dtr_priv); /* * Prepare one buffer to send it to upper layer Since upper * layer frees the buffer do not use rxd_priv->mbuf_pkt. * Meanwhile prepare a new buffer, do mapping, use with the * current descriptor and post descriptor back to ring vpath */ mbuf_up = rxd_priv->mbuf_pkt; if (t_code != VXGE_HAL_RING_RXD_T_CODE_OK) { ifp->if_ierrors++; VXGE_DRV_STATS(vpath, rx_tcode); status = vxge_hal_ring_handle_tcode(vpath_handle, rxdh, t_code); /* * If transfer code is not for unknown protocols and * vxge_hal_device_handle_tcode is NOT returned * VXGE_HAL_OK * drop this packet and increment rx_tcode stats */ if ((status != VXGE_HAL_OK) && (t_code != VXGE_HAL_RING_T_CODE_L3_PKT_ERR)) { vxge_free_packet(mbuf_up); vxge_hal_ring_rxd_post(vpath_handle, rxdh); continue; } } if (vxge_rx_rxd_1b_set(vpath, rxdh, dtr_priv)) { /* * If unable to allocate buffer, post descriptor back * to vpath for future processing of same packet. */ vxge_hal_ring_rxd_post(vpath_handle, rxdh); continue; } /* Get the extended information */ vxge_hal_ring_rxd_1b_info_get(vpath_handle, rxdh, &ext_info); /* post descriptor with newly allocated mbuf back to vpath */ vxge_hal_ring_rxd_post(vpath_handle, rxdh); vpath->rxd_posted++; if (vpath->rxd_posted % VXGE_RXD_REPLENISH_COUNT == 0) vxge_hal_ring_rxd_post_post_db(vpath_handle); /* * Set successfully computed checksums in the mbuf. * Leave the rest to the stack to be reverified. */ vxge_rx_checksum(ext_info, mbuf_up); #if __FreeBSD_version >= 800000 mbuf_up->m_flags |= M_FLOWID; mbuf_up->m_pkthdr.flowid = vpath->vp_index; #endif /* Post-Read sync for buffers */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); vxge_rx_input(ifp, mbuf_up, vpath); } while (vxge_hal_ring_rxd_next_completed(vpath_handle, &rxdh, &dtr_priv, &t_code) == VXGE_HAL_OK); /* Flush any outstanding LRO work */ if (vpath->lro_enable && vpath->lro.lro_cnt) { while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } } return (status); } static inline void vxge_rx_input(ifnet_t ifp, mbuf_t mbuf_up, vxge_vpath_t *vpath) { if (vpath->lro_enable && vpath->lro.lro_cnt) { if (tcp_lro_rx(&vpath->lro, mbuf_up, 0) == 0) return; } (*ifp->if_input) (ifp, mbuf_up); } static inline void vxge_rx_checksum(vxge_hal_ring_rxd_info_t ext_info, mbuf_t mbuf_up) { if (!(ext_info.proto & VXGE_HAL_FRAME_PROTO_IP_FRAG) && (ext_info.proto & VXGE_HAL_FRAME_PROTO_TCP_OR_UDP) && ext_info.l3_cksum_valid && ext_info.l4_cksum_valid) { mbuf_up->m_pkthdr.csum_data = htons(0xffff); mbuf_up->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mbuf_up->m_pkthdr.csum_flags |= CSUM_IP_VALID; mbuf_up->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } else { if (ext_info.vlan) { mbuf_up->m_pkthdr.ether_vtag = ext_info.vlan; mbuf_up->m_flags |= M_VLANTAG; } } } /* * vxge_rx_term During unload terminate and free all descriptors * @vpath_handle Rx vpath Handle @rxdh Rx Descriptor Handle @state Descriptor * State @userdata Per-adapter Data @reopen vpath open/reopen option */ /* ARGSUSED */ void vxge_rx_term(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, vxge_hal_rxd_state_e state, void *userdata, vxge_hal_reopen_e reopen) { vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; if (state != VXGE_HAL_RXD_STATE_POSTED) return; if (rxd_priv != NULL) { bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(vpath->dma_tag_rx, rxd_priv->dma_map); bus_dmamap_destroy(vpath->dma_tag_rx, rxd_priv->dma_map); vxge_free_packet(rxd_priv->mbuf_pkt); } /* Free the descriptor */ vxge_hal_ring_rxd_free(vpath_handle, rxdh); } /* * vxge_rx_rxd_1b_get * Get descriptors of packet to send up */ void vxge_rx_rxd_1b_get(vxge_vpath_t *vpath, vxge_hal_rxd_h rxdh, void *dtr_priv) { vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; mbuf_t mbuf_up = rxd_priv->mbuf_pkt; /* Retrieve data from completed descriptor */ vxge_hal_ring_rxd_1b_get(vpath->handle, rxdh, &rxd_priv->dma_addr[0], (u32 *) &rxd_priv->dma_sizes[0]); /* Update newly created buffer to be sent up with packet length */ mbuf_up->m_len = rxd_priv->dma_sizes[0]; mbuf_up->m_pkthdr.len = rxd_priv->dma_sizes[0]; mbuf_up->m_next = NULL; } /* * vxge_rx_rxd_1b_set * Allocates new mbufs to be placed into descriptors */ int vxge_rx_rxd_1b_set(vxge_vpath_t *vpath, vxge_hal_rxd_h rxdh, void *dtr_priv) { int num_segs, err = 0; mbuf_t mbuf_pkt; bus_dmamap_t dma_map; bus_dma_segment_t dma_buffers[1]; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; vxge_dev_t *vdev = vpath->vdev; mbuf_pkt = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, vdev->rx_mbuf_sz); if (!mbuf_pkt) { err = ENOBUFS; VXGE_DRV_STATS(vpath, rx_no_buf); device_printf(vdev->ndev, "out of memory to allocate mbuf\n"); goto _exit0; } /* Update mbuf's length, packet length and receive interface */ mbuf_pkt->m_len = vdev->rx_mbuf_sz; mbuf_pkt->m_pkthdr.len = vdev->rx_mbuf_sz; mbuf_pkt->m_pkthdr.rcvif = vdev->ifp; /* Load DMA map */ err = vxge_dma_mbuf_coalesce(vpath->dma_tag_rx, vpath->extra_dma_map, &mbuf_pkt, dma_buffers, &num_segs); if (err != 0) { VXGE_DRV_STATS(vpath, rx_map_fail); vxge_free_packet(mbuf_pkt); goto _exit0; } /* Unload DMA map of mbuf in current descriptor */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(vpath->dma_tag_rx, rxd_priv->dma_map); /* Update descriptor private data */ dma_map = rxd_priv->dma_map; rxd_priv->mbuf_pkt = mbuf_pkt; rxd_priv->dma_addr[0] = htole64(dma_buffers->ds_addr); rxd_priv->dma_map = vpath->extra_dma_map; vpath->extra_dma_map = dma_map; /* Pre-Read/Write sync */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Set descriptor buffer */ vxge_hal_ring_rxd_1b_set(rxdh, rxd_priv->dma_addr[0], vdev->rx_mbuf_sz); _exit0: return (err); } /* * vxge_link_up * Callback for Link-up indication from HAL */ /* ARGSUSED */ void vxge_link_up(vxge_hal_device_h devh, void *userdata) { int i; vxge_vpath_t *vpath; vxge_hal_device_hw_info_t *hw_info; vxge_dev_t *vdev = (vxge_dev_t *) userdata; hw_info = &vdev->config.hw_info; ifnet_t ifp = vdev->ifp; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_tti_ci_set(vpath->handle); vxge_hal_vpath_rti_ci_set(vpath->handle); } } if (vdev->is_privilaged && (hw_info->ports > 1)) { vxge_active_port_update(vdev); device_printf(vdev->ndev, "Active Port : %lld\n", vdev->active_port); } ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); } /* * vxge_link_down * Callback for Link-down indication from HAL */ /* ARGSUSED */ void vxge_link_down(vxge_hal_device_h devh, void *userdata) { int i; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) userdata; ifnet_t ifp = vdev->ifp; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_tti_ci_reset(vpath->handle); vxge_hal_vpath_rti_ci_reset(vpath->handle); } } ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_DOWN); } /* * vxge_reset */ void vxge_reset(vxge_dev_t *vdev) { if (!vdev->is_initialized) return; VXGE_DRV_LOCK(vdev); vxge_stop_locked(vdev); vxge_init_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_crit_error * Callback for Critical error indication from HAL */ /* ARGSUSED */ void vxge_crit_error(vxge_hal_device_h devh, void *userdata, vxge_hal_event_e type, u64 serr_data) { vxge_dev_t *vdev = (vxge_dev_t *) userdata; ifnet_t ifp = vdev->ifp; switch (type) { case VXGE_HAL_EVENT_SERR: case VXGE_HAL_EVENT_KDFCCTL: case VXGE_HAL_EVENT_CRITICAL: vxge_hal_device_intr_disable(vdev->devh); ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_DOWN); break; default: break; } } /* * vxge_ifp_setup */ int vxge_ifp_setup(device_t ndev) { ifnet_t ifp; int i, j, err = 0; vxge_dev_t *vdev = (vxge_dev_t *) device_get_softc(ndev); for (i = 0, j = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (!bVAL1(vdev->config.hw_info.vpath_mask, i)) continue; if (j >= vdev->no_of_vpath) break; vdev->vpaths[j].vp_id = i; vdev->vpaths[j].vp_index = j; vdev->vpaths[j].vdev = vdev; vdev->vpaths[j].is_configured = TRUE; vxge_os_memcpy((u8 *) vdev->vpaths[j].mac_addr, (u8 *) (vdev->config.hw_info.mac_addrs[i]), (size_t) ETHER_ADDR_LEN); j++; } /* Get interface ifnet structure for this Ether device */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(vdev->ndev, "memory allocation for ifnet failed\n"); err = ENXIO; goto _exit0; } vdev->ifp = ifp; /* Initialize interface ifnet structure */ if_initname(ifp, device_get_name(ndev), device_get_unit(ndev)); ifp->if_baudrate = VXGE_BAUDRATE; ifp->if_init = vxge_init; ifp->if_softc = vdev; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = vxge_ioctl; ifp->if_start = vxge_send; #if __FreeBSD_version >= 800000 ifp->if_transmit = vxge_mq_send; ifp->if_qflush = vxge_mq_qflush; #endif ifp->if_snd.ifq_drv_maxlen = max(vdev->config.ifq_maxlen, ifqmaxlen); IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); /* IFQ_SET_READY(&ifp->if_snd); */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_JUMBO_MTU; if (vdev->config.tso_enable) vxge_tso_config(vdev); if (vdev->config.lro_enable) ifp->if_capabilities |= IFCAP_LRO; ifp->if_capenable = ifp->if_capabilities; strlcpy(vdev->ndev_name, device_get_nameunit(ndev), sizeof(vdev->ndev_name)); /* Attach the interface */ ether_ifattach(ifp, vdev->vpaths[0].mac_addr); _exit0: return (err); } /* * vxge_isr_setup * Register isr functions */ int vxge_isr_setup(vxge_dev_t *vdev) { int i, irq_rid, err = 0; vxge_vpath_t *vpath; void *isr_func_arg; void (*isr_func_ptr) (void *); switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: err = bus_setup_intr(vdev->ndev, vdev->config.isr_info[0].irq_res, (INTR_TYPE_NET | INTR_MPSAFE), vxge_isr_filter, vxge_isr_line, vdev, &vdev->config.isr_info[0].irq_handle); break; case VXGE_HAL_INTR_MODE_MSIX: for (i = 0; i < vdev->intr_count; i++) { irq_rid = vdev->config.isr_info[i].irq_rid; vpath = &vdev->vpaths[irq_rid / 4]; if ((irq_rid % 4) == 2) { isr_func_ptr = vxge_isr_msix; isr_func_arg = (void *) vpath; } else if ((irq_rid % 4) == 3) { isr_func_ptr = vxge_isr_msix_alarm; isr_func_arg = (void *) vpath; } else break; err = bus_setup_intr(vdev->ndev, vdev->config.isr_info[i].irq_res, (INTR_TYPE_NET | INTR_MPSAFE), NULL, (void *) isr_func_ptr, (void *) isr_func_arg, &vdev->config.isr_info[i].irq_handle); if (err != 0) break; } if (err != 0) { /* Teardown interrupt handler */ while (--i > 0) bus_teardown_intr(vdev->ndev, vdev->config.isr_info[i].irq_res, vdev->config.isr_info[i].irq_handle); } break; } return (err); } /* * vxge_isr_filter * ISR filter function - filter interrupts from other shared devices */ int vxge_isr_filter(void *handle) { u64 val64 = 0; vxge_dev_t *vdev = (vxge_dev_t *) handle; __hal_device_t *hldev = (__hal_device_t *) vdev->devh; vxge_hal_common_reg_t *common_reg = (vxge_hal_common_reg_t *) (hldev->common_reg); val64 = vxge_os_pio_mem_read64(vdev->pdev, (vdev->devh)->regh0, &common_reg->titan_general_int_status); return ((val64) ? FILTER_SCHEDULE_THREAD : FILTER_STRAY); } /* * vxge_isr_line * Interrupt service routine for Line interrupts */ void vxge_isr_line(void *vdev_ptr) { vxge_dev_t *vdev = (vxge_dev_t *) vdev_ptr; vxge_hal_device_handle_irq(vdev->devh, 0); } void vxge_isr_msix(void *vpath_ptr) { u32 got_rx = 0; u32 got_tx = 0; __hal_virtualpath_t *hal_vpath; vxge_vpath_t *vpath = (vxge_vpath_t *) vpath_ptr; vxge_dev_t *vdev = vpath->vdev; hal_vpath = ((__hal_vpath_handle_t *) vpath->handle)->vpath; VXGE_DRV_STATS(vpath, isr_msix); VXGE_HAL_DEVICE_STATS_SW_INFO_TRAFFIC_INTR(vdev->devh); vxge_hal_vpath_mf_msix_mask(vpath->handle, vpath->msix_vec); /* processing rx */ vxge_hal_vpath_poll_rx(vpath->handle, &got_rx); /* processing tx */ if (hal_vpath->vp_config->fifo.enable) { vxge_intr_coalesce_tx(vpath); vxge_hal_vpath_poll_tx(vpath->handle, &got_tx); } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec); } void vxge_isr_msix_alarm(void *vpath_ptr) { int i; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) vpath_ptr; vxge_dev_t *vdev = vpath->vdev; VXGE_HAL_DEVICE_STATS_SW_INFO_NOT_TRAFFIC_INTR(vdev->devh); /* Process alarms in each vpath */ for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_mf_msix_mask(vpath->handle, vpath->msix_vec_alarm); status = vxge_hal_vpath_alarm_process(vpath->handle, 0); if ((status == VXGE_HAL_ERR_EVENT_SLOT_FREEZE) || (status == VXGE_HAL_ERR_EVENT_SERR)) { device_printf(vdev->ndev, "processing alarms urecoverable error %x\n", status); /* Stop the driver */ vdev->is_initialized = FALSE; break; } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec_alarm); } } /* * vxge_msix_enable */ vxge_hal_status_e vxge_msix_enable(vxge_dev_t *vdev) { int i, first_vp_id, msix_id; vxge_vpath_t *vpath; vxge_hal_status_e status = VXGE_HAL_OK; /* * Unmasking and Setting MSIX vectors before enabling interrupts * tim[] : 0 - Tx ## 1 - Rx ## 2 - UMQ-DMQ ## 0 - BITMAP */ int tim[4] = {0, 1, 0, 0}; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = vdev->vpaths + i; first_vp_id = vdev->vpaths[0].vp_id; msix_id = vpath->vp_id * VXGE_HAL_VPATH_MSIX_ACTIVE; tim[1] = vpath->msix_vec = msix_id + 1; vpath->msix_vec_alarm = first_vp_id * VXGE_HAL_VPATH_MSIX_ACTIVE + VXGE_HAL_VPATH_MSIX_ALARM_ID; status = vxge_hal_vpath_mf_msix_set(vpath->handle, tim, VXGE_HAL_VPATH_MSIX_ALARM_ID); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to set msix vectors to vpath\n"); break; } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec); vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec_alarm); } return (status); } /* * vxge_media_init * Initializes, adds and sets media */ void vxge_media_init(vxge_dev_t *vdev) { ifmedia_init(&vdev->media, IFM_IMASK, vxge_media_change, vxge_media_status); /* Add supported media */ ifmedia_add(&vdev->media, IFM_ETHER | vdev->ifm_optics | IFM_FDX, 0, NULL); /* Set media */ ifmedia_add(&vdev->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&vdev->media, IFM_ETHER | IFM_AUTO); } /* * vxge_media_status * Callback for interface media settings */ void vxge_media_status(ifnet_t ifp, struct ifmediareq *ifmr) { vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; vxge_hal_device_t *hldev = vdev->devh; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; /* set link state */ if (vxge_hal_device_link_state_get(hldev) == VXGE_HAL_LINK_UP) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= vdev->ifm_optics | IFM_FDX; if_link_state_change(ifp, LINK_STATE_UP); } } /* * vxge_media_change * Media change driver callback */ int vxge_media_change(ifnet_t ifp) { vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; struct ifmedia *ifmediap = &vdev->media; return (IFM_TYPE(ifmediap->ifm_media) != IFM_ETHER ? EINVAL : 0); } /* * Allocate PCI resources */ int vxge_alloc_resources(vxge_dev_t *vdev) { int err = 0; vxge_pci_info_t *pci_info = NULL; vxge_free_resources_e error_level = VXGE_FREE_NONE; device_t ndev = vdev->ndev; /* Allocate Buffer for HAL Device Configuration */ vdev->device_config = (vxge_hal_device_config_t *) vxge_mem_alloc(sizeof(vxge_hal_device_config_t)); if (!vdev->device_config) { err = ENOMEM; error_level = VXGE_DISABLE_PCI_BUSMASTER; device_printf(vdev->ndev, "failed to allocate memory for device config\n"); goto _exit0; } pci_info = (vxge_pci_info_t *) vxge_mem_alloc(sizeof(vxge_pci_info_t)); if (!pci_info) { error_level = VXGE_FREE_DEVICE_CONFIG; err = ENOMEM; device_printf(vdev->ndev, "failed to allocate memory for pci info\n"); goto _exit0; } pci_info->ndev = ndev; vdev->pdev = pci_info; err = vxge_alloc_bar_resources(vdev, 0); if (err != 0) { error_level = VXGE_FREE_BAR0; goto _exit0; } err = vxge_alloc_bar_resources(vdev, 1); if (err != 0) { error_level = VXGE_FREE_BAR1; goto _exit0; } err = vxge_alloc_bar_resources(vdev, 2); if (err != 0) error_level = VXGE_FREE_BAR2; _exit0: if (error_level) vxge_free_resources(ndev, error_level); return (err); } /* * vxge_alloc_bar_resources * Allocates BAR resources */ int vxge_alloc_bar_resources(vxge_dev_t *vdev, int i) { int err = 0; int res_id = 0; vxge_pci_info_t *pci_info = vdev->pdev; res_id = PCIR_BAR((i == 0) ? 0 : (i * 2)); pci_info->bar_info[i] = bus_alloc_resource_any(vdev->ndev, SYS_RES_MEMORY, &res_id, RF_ACTIVE); if (pci_info->bar_info[i] == NULL) { device_printf(vdev->ndev, "failed to allocate memory for bus resources\n"); err = ENOMEM; goto _exit0; } pci_info->reg_map[i] = (vxge_bus_res_t *) vxge_mem_alloc(sizeof(vxge_bus_res_t)); if (pci_info->reg_map[i] == NULL) { device_printf(vdev->ndev, "failed to allocate memory bar resources\n"); err = ENOMEM; goto _exit0; } ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_space_tag = rman_get_bustag(pci_info->bar_info[i]); ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_space_handle = rman_get_bushandle(pci_info->bar_info[i]); ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bar_start_addr = pci_info->bar_info[i]; ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_res_len = rman_get_size(pci_info->bar_info[i]); _exit0: return (err); } /* * vxge_alloc_isr_resources */ int vxge_alloc_isr_resources(vxge_dev_t *vdev) { int i, err = 0, irq_rid; int msix_vec_reqd, intr_count, msix_count; int intr_mode = VXGE_HAL_INTR_MODE_IRQLINE; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { /* MSI-X messages supported by device */ intr_count = pci_msix_count(vdev->ndev); if (intr_count) { msix_vec_reqd = 4 * vdev->no_of_vpath; if (intr_count >= msix_vec_reqd) { intr_count = msix_vec_reqd; err = pci_alloc_msix(vdev->ndev, &intr_count); if (err == 0) intr_mode = VXGE_HAL_INTR_MODE_MSIX; } if ((err != 0) || (intr_count < msix_vec_reqd)) { device_printf(vdev->ndev, "Unable to allocate " "msi/x vectors switching to INTA mode\n"); } } } err = 0; vdev->intr_count = 0; vdev->config.intr_mode = intr_mode; switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: vdev->config.isr_info[0].irq_rid = 0; vdev->config.isr_info[0].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[0].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[0].irq_res == NULL) { device_printf(vdev->ndev, "failed to allocate line interrupt resource\n"); err = ENOMEM; goto _exit0; } vdev->intr_count++; break; case VXGE_HAL_INTR_MODE_MSIX: msix_count = 0; for (i = 0; i < vdev->no_of_vpath; i++) { irq_rid = i * 4; vdev->config.isr_info[msix_count].irq_rid = irq_rid + 2; vdev->config.isr_info[msix_count].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[msix_count].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[msix_count].irq_res == NULL) { device_printf(vdev->ndev, "allocating bus resource (rid %d) failed\n", vdev->config.isr_info[msix_count].irq_rid); err = ENOMEM; goto _exit0; } vdev->intr_count++; err = bus_bind_intr(vdev->ndev, vdev->config.isr_info[msix_count].irq_res, (i % mp_ncpus)); if (err != 0) break; msix_count++; } vdev->config.isr_info[msix_count].irq_rid = 3; vdev->config.isr_info[msix_count].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[msix_count].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[msix_count].irq_res == NULL) { device_printf(vdev->ndev, "allocating bus resource (rid %d) failed\n", vdev->config.isr_info[msix_count].irq_rid); err = ENOMEM; goto _exit0; } vdev->intr_count++; err = bus_bind_intr(vdev->ndev, vdev->config.isr_info[msix_count].irq_res, (i % mp_ncpus)); break; } vdev->device_config->intr_mode = vdev->config.intr_mode; _exit0: return (err); } /* * vxge_free_resources * Undo what-all we did during load/attach */ void vxge_free_resources(device_t ndev, vxge_free_resources_e vxge_free_resource) { int i; vxge_dev_t *vdev; vdev = (vxge_dev_t *) device_get_softc(ndev); switch (vxge_free_resource) { case VXGE_FREE_ALL: for (i = 0; i < vdev->intr_count; i++) { bus_teardown_intr(ndev, vdev->config.isr_info[i].irq_res, vdev->config.isr_info[i].irq_handle); } /* FALLTHROUGH */ case VXGE_FREE_INTERFACE: ether_ifdetach(vdev->ifp); bus_generic_detach(ndev); if_free(vdev->ifp); /* FALLTHROUGH */ case VXGE_FREE_MEDIA: ifmedia_removeall(&vdev->media); /* FALLTHROUGH */ case VXGE_FREE_MUTEX: vxge_mutex_destroy(vdev); /* FALLTHROUGH */ case VXGE_FREE_VPATH: vxge_mem_free(vdev->vpaths, vdev->no_of_vpath * sizeof(vxge_vpath_t)); /* FALLTHROUGH */ case VXGE_FREE_TERMINATE_DEVICE: if (vdev->devh != NULL) { vxge_hal_device_private_set(vdev->devh, 0); vxge_hal_device_terminate(vdev->devh); } /* FALLTHROUGH */ case VXGE_FREE_ISR_RESOURCE: vxge_free_isr_resources(vdev); /* FALLTHROUGH */ case VXGE_FREE_BAR2: vxge_free_bar_resources(vdev, 2); /* FALLTHROUGH */ case VXGE_FREE_BAR1: vxge_free_bar_resources(vdev, 1); /* FALLTHROUGH */ case VXGE_FREE_BAR0: vxge_free_bar_resources(vdev, 0); /* FALLTHROUGH */ case VXGE_FREE_PCI_INFO: vxge_mem_free(vdev->pdev, sizeof(vxge_pci_info_t)); /* FALLTHROUGH */ case VXGE_FREE_DEVICE_CONFIG: vxge_mem_free(vdev->device_config, sizeof(vxge_hal_device_config_t)); /* FALLTHROUGH */ case VXGE_DISABLE_PCI_BUSMASTER: pci_disable_busmaster(ndev); /* FALLTHROUGH */ case VXGE_FREE_TERMINATE_DRIVER: if (vxge_dev_ref_count) { --vxge_dev_ref_count; if (0 == vxge_dev_ref_count) vxge_hal_driver_terminate(); } /* FALLTHROUGH */ default: case VXGE_FREE_NONE: break; /* NOTREACHED */ } } void vxge_free_isr_resources(vxge_dev_t *vdev) { int i; switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: if (vdev->config.isr_info[0].irq_res) { bus_release_resource(vdev->ndev, SYS_RES_IRQ, vdev->config.isr_info[0].irq_rid, vdev->config.isr_info[0].irq_res); vdev->config.isr_info[0].irq_res = NULL; } break; case VXGE_HAL_INTR_MODE_MSIX: for (i = 0; i < vdev->intr_count; i++) { if (vdev->config.isr_info[i].irq_res) { bus_release_resource(vdev->ndev, SYS_RES_IRQ, vdev->config.isr_info[i].irq_rid, vdev->config.isr_info[i].irq_res); vdev->config.isr_info[i].irq_res = NULL; } } if (vdev->intr_count) pci_release_msi(vdev->ndev); break; } } void vxge_free_bar_resources(vxge_dev_t *vdev, int i) { int res_id = 0; vxge_pci_info_t *pci_info = vdev->pdev; res_id = PCIR_BAR((i == 0) ? 0 : (i * 2)); if (pci_info->bar_info[i]) bus_release_resource(vdev->ndev, SYS_RES_MEMORY, res_id, pci_info->bar_info[i]); vxge_mem_free(pci_info->reg_map[i], sizeof(vxge_bus_res_t)); } /* * vxge_init_mutex * Initializes mutexes used in driver */ void vxge_mutex_init(vxge_dev_t *vdev) { int i; snprintf(vdev->mtx_drv_name, sizeof(vdev->mtx_drv_name), "%s_drv", vdev->ndev_name); mtx_init(&vdev->mtx_drv, vdev->mtx_drv_name, MTX_NETWORK_LOCK, MTX_DEF); for (i = 0; i < vdev->no_of_vpath; i++) { snprintf(vdev->vpaths[i].mtx_tx_name, sizeof(vdev->vpaths[i].mtx_tx_name), "%s_tx_%d", vdev->ndev_name, i); mtx_init(&vdev->vpaths[i].mtx_tx, vdev->vpaths[i].mtx_tx_name, NULL, MTX_DEF); } } /* * vxge_mutex_destroy * Destroys mutexes used in driver */ void vxge_mutex_destroy(vxge_dev_t *vdev) { int i; for (i = 0; i < vdev->no_of_vpath; i++) VXGE_TX_LOCK_DESTROY(&(vdev->vpaths[i])); VXGE_DRV_LOCK_DESTROY(vdev); } /* * vxge_rth_config */ vxge_hal_status_e vxge_rth_config(vxge_dev_t *vdev) { int i; vxge_hal_vpath_h vpath_handle; vxge_hal_rth_hash_types_t hash_types; vxge_hal_status_e status = VXGE_HAL_OK; u8 mtable[256] = {0}; /* Filling matable with bucket-to-vpath mapping */ vdev->config.rth_bkt_sz = VXGE_DEFAULT_RTH_BUCKET_SIZE; for (i = 0; i < (1 << vdev->config.rth_bkt_sz); i++) mtable[i] = i % vdev->no_of_vpath; /* Fill RTH hash types */ hash_types.hash_type_tcpipv4_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV4; hash_types.hash_type_tcpipv6_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV6; hash_types.hash_type_tcpipv6ex_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV6_EX; hash_types.hash_type_ipv4_en = VXGE_HAL_RING_HASH_TYPE_IPV4; hash_types.hash_type_ipv6_en = VXGE_HAL_RING_HASH_TYPE_IPV6; hash_types.hash_type_ipv6ex_en = VXGE_HAL_RING_HASH_TYPE_IPV6_EX; /* set indirection table, bucket-to-vpath mapping */ status = vxge_hal_vpath_rts_rth_itable_set(vdev->vpath_handles, vdev->no_of_vpath, mtable, ((u32) (1 << vdev->config.rth_bkt_sz))); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "rth configuration failed\n"); goto _exit0; } for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; status = vxge_hal_vpath_rts_rth_set(vpath_handle, RTH_ALG_JENKINS, &hash_types, vdev->config.rth_bkt_sz, TRUE); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "rth configuration failed for vpath (%d)\n", vdev->vpaths[i].vp_id); break; } } _exit0: return (status); } /* * vxge_vpath_config * Sets HAL parameter values from kenv */ void vxge_vpath_config(vxge_dev_t *vdev) { int i; u32 no_of_vpath = 0; vxge_hal_vp_config_t *vp_config; vxge_hal_device_config_t *device_config = vdev->device_config; device_config->debug_level = VXGE_TRACE; device_config->debug_mask = VXGE_COMPONENT_ALL; device_config->device_poll_millis = VXGE_DEFAULT_DEVICE_POLL_MILLIS; vdev->config.no_of_vpath = min(vdev->config.no_of_vpath, vdev->max_supported_vpath); for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { vp_config = &(device_config->vp_config[i]); vp_config->fifo.enable = VXGE_HAL_FIFO_DISABLE; vp_config->ring.enable = VXGE_HAL_RING_DISABLE; } for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (no_of_vpath >= vdev->config.no_of_vpath) break; if (!bVAL1(vdev->config.hw_info.vpath_mask, i)) continue; no_of_vpath++; vp_config = &(device_config->vp_config[i]); vp_config->mtu = VXGE_HAL_DEFAULT_MTU; vp_config->ring.enable = VXGE_HAL_RING_ENABLE; vp_config->ring.post_mode = VXGE_HAL_RING_POST_MODE_DOORBELL; vp_config->ring.buffer_mode = VXGE_HAL_RING_RXD_BUFFER_MODE_1; vp_config->ring.ring_length = vxge_ring_length_get(VXGE_HAL_RING_RXD_BUFFER_MODE_1); vp_config->ring.scatter_mode = VXGE_HAL_RING_SCATTER_MODE_A; vp_config->rpa_all_vid_en = VXGE_DEFAULT_ALL_VID_ENABLE; vp_config->rpa_strip_vlan_tag = VXGE_DEFAULT_STRIP_VLAN_TAG; vp_config->rpa_ucast_all_addr_en = VXGE_HAL_VPATH_RPA_UCAST_ALL_ADDR_DISABLE; vp_config->rti.intr_enable = VXGE_HAL_TIM_INTR_ENABLE; vp_config->rti.txfrm_cnt_en = VXGE_HAL_TXFRM_CNT_EN_ENABLE; vp_config->rti.util_sel = VXGE_HAL_TIM_UTIL_SEL_LEGACY_RX_NET_UTIL; vp_config->rti.uec_a = VXGE_DEFAULT_RTI_RX_UFC_A; vp_config->rti.uec_b = VXGE_DEFAULT_RTI_RX_UFC_B; vp_config->rti.uec_c = VXGE_DEFAULT_RTI_RX_UFC_C; vp_config->rti.uec_d = VXGE_DEFAULT_RTI_RX_UFC_D; vp_config->rti.urange_a = VXGE_DEFAULT_RTI_RX_URANGE_A; vp_config->rti.urange_b = VXGE_DEFAULT_RTI_RX_URANGE_B; vp_config->rti.urange_c = VXGE_DEFAULT_RTI_RX_URANGE_C; vp_config->rti.timer_ac_en = VXGE_HAL_TIM_TIMER_AC_ENABLE; vp_config->rti.timer_ci_en = VXGE_HAL_TIM_TIMER_CI_ENABLE; vp_config->rti.btimer_val = (VXGE_DEFAULT_RTI_BTIMER_VAL * 1000) / 272; vp_config->rti.rtimer_val = (VXGE_DEFAULT_RTI_RTIMER_VAL * 1000) / 272; vp_config->rti.ltimer_val = (VXGE_DEFAULT_RTI_LTIMER_VAL * 1000) / 272; if ((no_of_vpath > 1) && (VXGE_DEFAULT_CONFIG_MQ_ENABLE == 0)) continue; vp_config->fifo.enable = VXGE_HAL_FIFO_ENABLE; vp_config->fifo.max_aligned_frags = VXGE_DEFAULT_FIFO_ALIGNED_FRAGS; vp_config->tti.intr_enable = VXGE_HAL_TIM_INTR_ENABLE; vp_config->tti.txfrm_cnt_en = VXGE_HAL_TXFRM_CNT_EN_ENABLE; vp_config->tti.util_sel = VXGE_HAL_TIM_UTIL_SEL_LEGACY_TX_NET_UTIL; vp_config->tti.uec_a = VXGE_DEFAULT_TTI_TX_UFC_A; vp_config->tti.uec_b = VXGE_DEFAULT_TTI_TX_UFC_B; vp_config->tti.uec_c = VXGE_DEFAULT_TTI_TX_UFC_C; vp_config->tti.uec_d = VXGE_DEFAULT_TTI_TX_UFC_D; vp_config->tti.urange_a = VXGE_DEFAULT_TTI_TX_URANGE_A; vp_config->tti.urange_b = VXGE_DEFAULT_TTI_TX_URANGE_B; vp_config->tti.urange_c = VXGE_DEFAULT_TTI_TX_URANGE_C; vp_config->tti.timer_ac_en = VXGE_HAL_TIM_TIMER_AC_ENABLE; vp_config->tti.timer_ci_en = VXGE_HAL_TIM_TIMER_CI_ENABLE; vp_config->tti.btimer_val = (VXGE_DEFAULT_TTI_BTIMER_VAL * 1000) / 272; vp_config->tti.rtimer_val = (VXGE_DEFAULT_TTI_RTIMER_VAL * 1000) / 272; vp_config->tti.ltimer_val = (VXGE_DEFAULT_TTI_LTIMER_VAL * 1000) / 272; } vdev->no_of_vpath = no_of_vpath; if (vdev->no_of_vpath == 1) vdev->config.tx_steering = 0; if (vdev->config.rth_enable && (vdev->no_of_vpath > 1)) { device_config->rth_en = VXGE_HAL_RTH_ENABLE; device_config->rth_it_type = VXGE_HAL_RTH_IT_TYPE_MULTI_IT; } vdev->config.rth_enable = device_config->rth_en; } /* * vxge_vpath_cb_fn * Virtual path Callback function */ /* ARGSUSED */ static vxge_hal_status_e vxge_vpath_cb_fn(vxge_hal_client_h client_handle, vxge_hal_up_msg_h msgh, vxge_hal_message_type_e msg_type, vxge_hal_obj_id_t obj_id, vxge_hal_result_e result, vxge_hal_opaque_handle_t *opaque_handle) { return (VXGE_HAL_OK); } /* * vxge_vpath_open */ int vxge_vpath_open(vxge_dev_t *vdev) { int i, err = EINVAL; u64 func_id; vxge_vpath_t *vpath; vxge_hal_vpath_attr_t vpath_attr; vxge_hal_status_e status = VXGE_HAL_OK; struct lro_ctrl *lro = NULL; bzero(&vpath_attr, sizeof(vxge_hal_vpath_attr_t)); for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); lro = &vpath->lro; /* Vpath vpath_attr: FIFO */ vpath_attr.vp_id = vpath->vp_id; vpath_attr.fifo_attr.callback = vxge_tx_compl; vpath_attr.fifo_attr.txdl_init = vxge_tx_replenish; vpath_attr.fifo_attr.txdl_term = vxge_tx_term; vpath_attr.fifo_attr.userdata = vpath; vpath_attr.fifo_attr.per_txdl_space = sizeof(vxge_txdl_priv_t); /* Vpath vpath_attr: Ring */ vpath_attr.ring_attr.callback = vxge_rx_compl; vpath_attr.ring_attr.rxd_init = vxge_rx_replenish; vpath_attr.ring_attr.rxd_term = vxge_rx_term; vpath_attr.ring_attr.userdata = vpath; vpath_attr.ring_attr.per_rxd_space = sizeof(vxge_rxd_priv_t); err = vxge_dma_tags_create(vpath); if (err != 0) { device_printf(vdev->ndev, "failed to create dma tags\n"); break; } #if __FreeBSD_version >= 800000 vpath->br = buf_ring_alloc(VXGE_DEFAULT_BR_SIZE, M_DEVBUF, M_WAITOK, &vpath->mtx_tx); if (vpath->br == NULL) { err = ENOMEM; break; } #endif status = vxge_hal_vpath_open(vdev->devh, &vpath_attr, (vxge_hal_vpath_callback_f) vxge_vpath_cb_fn, NULL, &vpath->handle); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to open vpath (%d)\n", vpath->vp_id); err = EPERM; break; } vpath->is_open = TRUE; vdev->vpath_handles[i] = vpath->handle; vpath->tx_ticks = ticks; vpath->rx_ticks = ticks; vpath->tti_rtimer_val = VXGE_DEFAULT_TTI_RTIMER_VAL; vpath->tti_rtimer_val = VXGE_DEFAULT_TTI_RTIMER_VAL; vpath->tx_intr_coalesce = vdev->config.intr_coalesce; vpath->rx_intr_coalesce = vdev->config.intr_coalesce; func_id = vdev->config.hw_info.func_id; if (vdev->config.low_latency && (vdev->config.bw_info[func_id].priority == VXGE_DEFAULT_VPATH_PRIORITY_HIGH)) { vpath->tx_intr_coalesce = 0; } if (vdev->ifp->if_capenable & IFCAP_LRO) { err = tcp_lro_init(lro); if (err != 0) { device_printf(vdev->ndev, "LRO Initialization failed!\n"); break; } vpath->lro_enable = TRUE; lro->ifp = vdev->ifp; } } return (err); } void vxge_tso_config(vxge_dev_t *vdev) { u32 func_id, priority; vxge_hal_status_e status = VXGE_HAL_OK; vdev->ifp->if_capabilities |= IFCAP_TSO4; status = vxge_bw_priority_get(vdev, NULL); if (status == VXGE_HAL_OK) { func_id = vdev->config.hw_info.func_id; priority = vdev->config.bw_info[func_id].priority; if (priority != VXGE_DEFAULT_VPATH_PRIORITY_HIGH) vdev->ifp->if_capabilities &= ~IFCAP_TSO4; } #if __FreeBSD_version >= 800000 if (vdev->ifp->if_capabilities & IFCAP_TSO4) vdev->ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif } vxge_hal_status_e vxge_bw_priority_get(vxge_dev_t *vdev, vxge_bw_info_t *bw_info) { u32 priority, bandwidth; u32 vpath_count; u64 func_id, func_mode, vpath_list[VXGE_HAL_MAX_VIRTUAL_PATHS]; vxge_hal_status_e status = VXGE_HAL_OK; func_id = vdev->config.hw_info.func_id; if (bw_info) { func_id = bw_info->func_id; func_mode = vdev->config.hw_info.function_mode; if ((is_single_func(func_mode)) && (func_id > 0)) return (VXGE_HAL_FAIL); } if (vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 0)) { status = vxge_hal_vf_rx_bw_get(vdev->devh, func_id, &bandwidth, &priority); } else { status = vxge_hal_get_vpath_list(vdev->devh, func_id, vpath_list, &vpath_count); if (status == VXGE_HAL_OK) { status = vxge_hal_bw_priority_get(vdev->devh, vpath_list[0], &bandwidth, &priority); } } if (status == VXGE_HAL_OK) { if (bw_info) { bw_info->priority = priority; bw_info->bandwidth = bandwidth; } else { vdev->config.bw_info[func_id].priority = priority; vdev->config.bw_info[func_id].bandwidth = bandwidth; } } return (status); } /* * close vpaths */ void vxge_vpath_close(vxge_dev_t *vdev) { int i; vxge_vpath_t *vpath; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); if (vpath->handle) vxge_hal_vpath_close(vpath->handle); #if __FreeBSD_version >= 800000 if (vpath->br != NULL) buf_ring_free(vpath->br, M_DEVBUF); #endif /* Free LRO memory */ if (vpath->lro_enable) tcp_lro_free(&vpath->lro); if (vpath->dma_tag_rx) { bus_dmamap_destroy(vpath->dma_tag_rx, vpath->extra_dma_map); bus_dma_tag_destroy(vpath->dma_tag_rx); } if (vpath->dma_tag_tx) bus_dma_tag_destroy(vpath->dma_tag_tx); vpath->handle = NULL; vpath->is_open = FALSE; } } /* * reset vpaths */ void vxge_vpath_reset(vxge_dev_t *vdev) { int i; vxge_hal_vpath_h vpath_handle; vxge_hal_status_e status = VXGE_HAL_OK; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; status = vxge_hal_vpath_reset(vpath_handle); if (status != VXGE_HAL_OK) device_printf(vdev->ndev, "failed to reset vpath :%d\n", i); } } static inline int vxge_vpath_get(vxge_dev_t *vdev, mbuf_t mhead) { struct tcphdr *th = NULL; struct udphdr *uh = NULL; struct ip *ip = NULL; struct ip6_hdr *ip6 = NULL; struct ether_vlan_header *eth = NULL; void *ulp = NULL; int ehdrlen, iphlen = 0; u8 ipproto = 0; u16 etype, src_port, dst_port; u16 queue_len, counter = 0; src_port = dst_port = 0; queue_len = vdev->no_of_vpath; eth = mtod(mhead, struct ether_vlan_header *); if (eth->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eth->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eth->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } switch (etype) { case ETHERTYPE_IP: ip = (struct ip *) (mhead->m_data + ehdrlen); iphlen = ip->ip_hl << 2; ipproto = ip->ip_p; th = (struct tcphdr *) ((caddr_t)ip + iphlen); uh = (struct udphdr *) ((caddr_t)ip + iphlen); break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *) (mhead->m_data + ehdrlen); iphlen = sizeof(struct ip6_hdr); ipproto = ip6->ip6_nxt; ulp = mtod(mhead, char *) + iphlen; th = ((struct tcphdr *) (ulp)); uh = ((struct udphdr *) (ulp)); break; default: break; } switch (ipproto) { case IPPROTO_TCP: src_port = th->th_sport; dst_port = th->th_dport; break; case IPPROTO_UDP: src_port = uh->uh_sport; dst_port = uh->uh_dport; break; default: break; } counter = (ntohs(src_port) + ntohs(dst_port)) & vpath_selector[queue_len - 1]; if (counter >= queue_len) counter = queue_len - 1; return (counter); } static inline vxge_hal_vpath_h vxge_vpath_handle_get(vxge_dev_t *vdev, int i) { return (vdev->vpaths[i].is_open ? vdev->vpaths[i].handle : NULL); } int vxge_firmware_verify(vxge_dev_t *vdev) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; if (vdev->fw_upgrade) { status = vxge_firmware_upgrade(vdev); if (status == VXGE_HAL_OK) { err = ENXIO; goto _exit0; } } if ((vdev->config.function_mode != VXGE_DEFAULT_CONFIG_VALUE) && (vdev->config.hw_info.function_mode != (u64) vdev->config.function_mode)) { status = vxge_func_mode_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } /* l2_switch configuration */ active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_L2SwitchEnabled, &active_config); if (status == VXGE_HAL_OK) { vdev->l2_switch = active_config; if (vdev->config.l2_switch != VXGE_DEFAULT_CONFIG_VALUE) { if (vdev->config.l2_switch != active_config) { status = vxge_l2switch_mode_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } } } if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { if (vxge_port_mode_update(vdev) == ENXIO) err = ENXIO; } _exit0: if (err == ENXIO) device_printf(vdev->ndev, "PLEASE POWER CYCLE THE SYSTEM\n"); return (err); } vxge_hal_status_e vxge_firmware_upgrade(vxge_dev_t *vdev) { u8 *fw_buffer; u32 fw_size; vxge_hal_device_hw_info_t *hw_info; vxge_hal_status_e status = VXGE_HAL_OK; hw_info = &vdev->config.hw_info; fw_size = sizeof(VXGE_FW_ARRAY_NAME); fw_buffer = (u8 *) VXGE_FW_ARRAY_NAME; device_printf(vdev->ndev, "Current firmware version : %s (%s)\n", hw_info->fw_version.version, hw_info->fw_date.date); device_printf(vdev->ndev, "Upgrading firmware to %d.%d.%d\n", VXGE_MIN_FW_MAJOR_VERSION, VXGE_MIN_FW_MINOR_VERSION, VXGE_MIN_FW_BUILD_NUMBER); /* Call HAL API to upgrade firmware */ status = vxge_hal_mrpcim_fw_upgrade(vdev->pdev, (pci_reg_h) vdev->pdev->reg_map[0], (u8 *) vdev->pdev->bar_info[0], fw_buffer, fw_size); device_printf(vdev->ndev, "firmware upgrade %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); return (status); } vxge_hal_status_e vxge_func_mode_set(vxge_dev_t *vdev) { u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_mrpcim_pcie_func_mode_set(vdev->devh, vdev->config.function_mode); device_printf(vdev->ndev, "function mode change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) { vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); /* * If in MF + DP mode * if user changes to SF, change port_mode to single port mode */ if (((is_multi_func(vdev->config.hw_info.function_mode)) && is_single_func(vdev->config.function_mode)) && (active_config == VXGE_HAL_DP_NP_MODE_DUAL_PORT)) { vdev->config.port_mode = VXGE_HAL_DP_NP_MODE_SINGLE_PORT; status = vxge_port_mode_set(vdev); } } return (status); } vxge_hal_status_e vxge_port_mode_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_port_mode(vdev->devh, vdev->config.port_mode); device_printf(vdev->ndev, "port mode change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) { vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); /* Configure vpath_mapping for active-active mode only */ if (vdev->config.port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) { status = vxge_hal_config_vpath_map(vdev->devh, VXGE_DUAL_PORT_MAP); device_printf(vdev->ndev, "dual port map change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); } } return (status); } int vxge_port_mode_update(vxge_dev_t *vdev) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; if ((vdev->config.port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) && is_single_func(vdev->config.hw_info.function_mode)) { device_printf(vdev->ndev, "Adapter in SF mode, dual port mode is not allowed\n"); err = EPERM; goto _exit0; } active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } vdev->port_mode = active_config; if (vdev->config.port_mode != VXGE_DEFAULT_CONFIG_VALUE) { if (vdev->config.port_mode != vdev->port_mode) { status = vxge_port_mode_set(vdev); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } err = ENXIO; vdev->port_mode = vdev->config.port_mode; } } active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_BehaviourOnFail, &active_config); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } vdev->port_failure = active_config; /* * active/active mode : set to NoMove * active/passive mode: set to Failover-Failback */ if (vdev->port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) vdev->config.port_failure = VXGE_HAL_XMAC_NWIF_OnFailure_NoMove; else if (vdev->port_mode == VXGE_HAL_DP_NP_MODE_ACTIVE_PASSIVE) vdev->config.port_failure = VXGE_HAL_XMAC_NWIF_OnFailure_OtherPortBackOnRestore; if ((vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) && (vdev->config.port_failure != vdev->port_failure)) { status = vxge_port_behavior_on_failure_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } _exit0: return (err); } vxge_hal_status_e vxge_port_mode_get(vxge_dev_t *vdev, vxge_port_info_t *port_info) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); if (status != VXGE_HAL_OK) { err = ENXIO; goto _exit0; } port_info->port_mode = active_config; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_BehaviourOnFail, &active_config); if (status != VXGE_HAL_OK) { err = ENXIO; goto _exit0; } port_info->port_failure = active_config; _exit0: return (err); } vxge_hal_status_e vxge_port_behavior_on_failure_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_behavior_on_failure(vdev->devh, vdev->config.port_failure); device_printf(vdev->ndev, "port behaviour on failure change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); return (status); } void vxge_active_port_update(vxge_dev_t *vdev) { u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_ActivePort, &active_config); if (status == VXGE_HAL_OK) vdev->active_port = active_config; } vxge_hal_status_e vxge_l2switch_mode_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_l2switch_mode(vdev->devh, vdev->config.l2_switch); device_printf(vdev->ndev, "L2 switch %s\n", (status == VXGE_HAL_OK) ? (vdev->config.l2_switch) ? "enable" : "disable" : "change failed"); if (status == VXGE_HAL_OK) vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); return (status); } /* * vxge_promisc_set * Enable Promiscuous Mode */ void vxge_promisc_set(vxge_dev_t *vdev) { int i; ifnet_t ifp; vxge_hal_vpath_h vpath_handle; if (!vdev->is_initialized) return; ifp = vdev->ifp; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; if (ifp->if_flags & IFF_PROMISC) vxge_hal_vpath_promisc_enable(vpath_handle); else vxge_hal_vpath_promisc_disable(vpath_handle); } } /* * vxge_change_mtu * Change interface MTU to a requested valid size */ int vxge_change_mtu(vxge_dev_t *vdev, unsigned long new_mtu) { int err = EINVAL; if ((new_mtu < VXGE_HAL_MIN_MTU) || (new_mtu > VXGE_HAL_MAX_MTU)) goto _exit0; (vdev->ifp)->if_mtu = new_mtu; - device_printf(vdev->ndev, "MTU changed to %ld\n", (vdev->ifp)->if_mtu); + device_printf(vdev->ndev, "MTU changed to %u\n", (vdev->ifp)->if_mtu); if (vdev->is_initialized) { if_down(vdev->ifp); vxge_reset(vdev); if_up(vdev->ifp); } err = 0; _exit0: return (err); } /* * Creates DMA tags for both Tx and Rx */ int vxge_dma_tags_create(vxge_vpath_t *vpath) { int err = 0; bus_size_t max_size, boundary; vxge_dev_t *vdev = vpath->vdev; ifnet_t ifp = vdev->ifp; max_size = ifp->if_mtu + VXGE_HAL_MAC_HEADER_MAX_SIZE + VXGE_HAL_HEADER_ETHERNET_II_802_3_ALIGN; VXGE_BUFFER_ALIGN(max_size, 128) if (max_size <= MCLBYTES) vdev->rx_mbuf_sz = MCLBYTES; else vdev->rx_mbuf_sz = (max_size > MJUMPAGESIZE) ? MJUM9BYTES : MJUMPAGESIZE; boundary = (max_size > PAGE_SIZE) ? 0 : PAGE_SIZE; /* DMA tag for Tx */ err = bus_dma_tag_create( bus_get_dma_tag(vdev->ndev), 1, PAGE_SIZE, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, VXGE_TSO_SIZE, VXGE_MAX_SEGS, PAGE_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &(vpath->dma_tag_tx)); if (err != 0) goto _exit0; /* DMA tag for Rx */ err = bus_dma_tag_create( bus_get_dma_tag(vdev->ndev), 1, boundary, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, vdev->rx_mbuf_sz, 1, vdev->rx_mbuf_sz, BUS_DMA_ALLOCNOW, NULL, NULL, &(vpath->dma_tag_rx)); if (err != 0) goto _exit1; /* Create DMA map for this descriptor */ err = bus_dmamap_create(vpath->dma_tag_rx, BUS_DMA_NOWAIT, &vpath->extra_dma_map); if (err == 0) goto _exit0; bus_dma_tag_destroy(vpath->dma_tag_rx); _exit1: bus_dma_tag_destroy(vpath->dma_tag_tx); _exit0: return (err); } static inline int vxge_dma_mbuf_coalesce(bus_dma_tag_t dma_tag_tx, bus_dmamap_t dma_map, mbuf_t * m_headp, bus_dma_segment_t * dma_buffers, int *num_segs) { int err = 0; mbuf_t mbuf_pkt = NULL; retry: err = bus_dmamap_load_mbuf_sg(dma_tag_tx, dma_map, *m_headp, dma_buffers, num_segs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* try to defrag, too many segments */ mbuf_pkt = m_defrag(*m_headp, M_NOWAIT); if (mbuf_pkt == NULL) { err = ENOBUFS; goto _exit0; } *m_headp = mbuf_pkt; goto retry; } _exit0: return (err); } int vxge_device_hw_info_get(vxge_dev_t *vdev) { int i, err = ENXIO; u64 vpath_mask = 0; u32 max_supported_vpath = 0; u32 fw_ver_maj_min; vxge_firmware_upgrade_e fw_option; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_device_hw_info_t *hw_info; status = vxge_hal_device_hw_info_get(vdev->pdev, (pci_reg_h) vdev->pdev->reg_map[0], (u8 *) vdev->pdev->bar_info[0], &vdev->config.hw_info); if (status != VXGE_HAL_OK) goto _exit0; hw_info = &vdev->config.hw_info; vpath_mask = hw_info->vpath_mask; if (vpath_mask == 0) { device_printf(vdev->ndev, "No vpaths available in device\n"); goto _exit0; } fw_option = vdev->config.fw_option; /* Check how many vpaths are available */ for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (!((vpath_mask) & mBIT(i))) continue; max_supported_vpath++; } vdev->max_supported_vpath = max_supported_vpath; status = vxge_hal_device_is_privileged(hw_info->host_type, hw_info->func_id); vdev->is_privilaged = (status == VXGE_HAL_OK) ? TRUE : FALSE; vdev->hw_fw_version = VXGE_FW_VERSION( hw_info->fw_version.major, hw_info->fw_version.minor, hw_info->fw_version.build); fw_ver_maj_min = VXGE_FW_MAJ_MIN_VERSION(hw_info->fw_version.major, hw_info->fw_version.minor); if ((fw_option >= VXGE_FW_UPGRADE_FORCE) || (vdev->hw_fw_version != VXGE_DRV_FW_VERSION)) { /* For fw_ver 1.8.1 and above ignore build number. */ if ((fw_option == VXGE_FW_UPGRADE_ALL) && ((vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 1)) && (fw_ver_maj_min == VXGE_DRV_FW_MAJ_MIN_VERSION))) { goto _exit1; } if (vdev->hw_fw_version < VXGE_BASE_FW_VERSION) { device_printf(vdev->ndev, "Upgrade driver through vxge_update, " "Unable to load the driver.\n"); goto _exit0; } vdev->fw_upgrade = TRUE; } _exit1: err = 0; _exit0: return (err); } /* * vxge_device_hw_info_print * Print device and driver information */ void vxge_device_hw_info_print(vxge_dev_t *vdev) { u32 i; device_t ndev; struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; char pmd_type[2][VXGE_PMD_INFO_LEN]; vxge_hal_device_t *hldev; vxge_hal_device_hw_info_t *hw_info; vxge_hal_device_pmd_info_t *pmd_port; hldev = vdev->devh; ndev = vdev->ndev; ctx = device_get_sysctl_ctx(ndev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ndev)); hw_info = &(vdev->config.hw_info); snprintf(vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION], sizeof(vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION]), "%d.%d.%d.%d", XGELL_VERSION_MAJOR, XGELL_VERSION_MINOR, XGELL_VERSION_FIX, XGELL_VERSION_BUILD); /* Print PCI-e bus type/speed/width info */ snprintf(vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO], sizeof(vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO]), "x%d", hldev->link_width); if (hldev->link_width <= VXGE_HAL_PCI_E_LINK_WIDTH_X4) device_printf(ndev, "For optimal performance a x8 " "PCI-Express slot is required.\n"); vxge_null_terminate((char *) hw_info->serial_number, sizeof(hw_info->serial_number)); vxge_null_terminate((char *) hw_info->part_number, sizeof(hw_info->part_number)); snprintf(vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO], sizeof(vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO]), "%s", hw_info->serial_number); snprintf(vdev->config.nic_attr[VXGE_PRINT_PART_NO], sizeof(vdev->config.nic_attr[VXGE_PRINT_PART_NO]), "%s", hw_info->part_number); snprintf(vdev->config.nic_attr[VXGE_PRINT_FW_VERSION], sizeof(vdev->config.nic_attr[VXGE_PRINT_FW_VERSION]), "%s", hw_info->fw_version.version); snprintf(vdev->config.nic_attr[VXGE_PRINT_FW_DATE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FW_DATE]), "%s", hw_info->fw_date.date); pmd_port = &(hw_info->pmd_port0); for (i = 0; i < hw_info->ports; i++) { vxge_pmd_port_type_get(vdev, pmd_port->type, pmd_type[i], sizeof(pmd_type[i])); strncpy(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i], "vendor=??, sn=??, pn=??, type=??", sizeof(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i])); vxge_null_terminate(pmd_port->vendor, sizeof(pmd_port->vendor)); if (strlen(pmd_port->vendor) == 0) { pmd_port = &(hw_info->pmd_port1); continue; } vxge_null_terminate(pmd_port->ser_num, sizeof(pmd_port->ser_num)); vxge_null_terminate(pmd_port->part_num, sizeof(pmd_port->part_num)); snprintf(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i], sizeof(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i]), "vendor=%s, sn=%s, pn=%s, type=%s", pmd_port->vendor, pmd_port->ser_num, pmd_port->part_num, pmd_type[i]); pmd_port = &(hw_info->pmd_port1); } switch (hw_info->function_mode) { case VXGE_HAL_PCIE_FUNC_MODE_SF1_VP17: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Single Function - 1 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF2_VP8: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 2 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF4_VP4: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 4 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF8_VP2: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 8 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF8P_VP2: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function (DirectIO) - 8 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; } snprintf(vdev->config.nic_attr[VXGE_PRINT_INTR_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_INTR_MODE]), "%s", ((vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) ? "MSI-X" : "INTA")); snprintf(vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT], sizeof(vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT]), "%d", vdev->no_of_vpath); snprintf(vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE], sizeof(vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE]), - "%lu", vdev->ifp->if_mtu); + "%u", vdev->ifp->if_mtu); snprintf(vdev->config.nic_attr[VXGE_PRINT_LRO_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_LRO_MODE]), "%s", ((vdev->config.lro_enable) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_RTH_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_RTH_MODE]), "%s", ((vdev->config.rth_enable) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_TSO_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_TSO_MODE]), "%s", ((vdev->ifp->if_capenable & IFCAP_TSO4) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE], sizeof(vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE]), "%s", ((hw_info->ports == 1) ? "Single Port" : "Dual Port")); if (vdev->is_privilaged) { if (hw_info->ports > 1) { snprintf(vdev->config.nic_attr[VXGE_PRINT_PORT_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_PORT_MODE]), "%s", vxge_port_mode[vdev->port_mode]); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) snprintf(vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE], sizeof(vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE]), "%s", vxge_port_failure[vdev->port_failure]); vxge_active_port_update(vdev); snprintf(vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT], sizeof(vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT]), "%lld", vdev->active_port); } if (!is_single_func(hw_info->function_mode)) { snprintf(vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE]), "%s", ((vdev->l2_switch) ? "Enabled" : "Disabled")); } } device_printf(ndev, "Driver version\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION]); device_printf(ndev, "Serial number\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO]); device_printf(ndev, "Part number\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PART_NO]); device_printf(ndev, "Firmware version\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FW_VERSION]); device_printf(ndev, "Firmware date\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FW_DATE]); device_printf(ndev, "Link width\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO]); if (vdev->is_privilaged) { device_printf(ndev, "Function mode\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]); } device_printf(ndev, "Interrupt type\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_INTR_MODE]); device_printf(ndev, "VPath(s) opened\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT]); device_printf(ndev, "Adapter Type\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE]); device_printf(ndev, "PMD Port 0\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0]); if (hw_info->ports > 1) { device_printf(ndev, "PMD Port 1\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_1]); if (vdev->is_privilaged) { device_printf(ndev, "Port Mode\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PORT_MODE]); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) device_printf(ndev, "Port Failure\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE]); device_printf(vdev->ndev, "Active Port\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT]); } } if (vdev->is_privilaged && !is_single_func(hw_info->function_mode)) { device_printf(vdev->ndev, "L2 Switch\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE]); } device_printf(ndev, "MTU is %s\n", vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE]); device_printf(ndev, "LRO %s\n", vdev->config.nic_attr[VXGE_PRINT_LRO_MODE]); device_printf(ndev, "RTH %s\n", vdev->config.nic_attr[VXGE_PRINT_RTH_MODE]); device_printf(ndev, "TSO %s\n", vdev->config.nic_attr[VXGE_PRINT_TSO_MODE]); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Driver version", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION], 0, "Driver version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Serial number", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO], 0, "Serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Part number", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_PART_NO], 0, "Part number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Firmware version", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_FW_VERSION], 0, "Firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Firmware date", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_FW_DATE], 0, "Firmware date"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Link width", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO], 0, "Link width"); if (vdev->is_privilaged) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Function mode", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], 0, "Function mode"); } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Interrupt type", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_INTR_MODE], 0, "Interrupt type"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "VPath(s) opened", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT], 0, "VPath(s) opened"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Adapter Type", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE], 0, "Adapter Type"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pmd port 0", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0], 0, "pmd port"); if (hw_info->ports > 1) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pmd port 1", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_1], 0, "pmd port"); if (vdev->is_privilaged) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Port Mode", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_PORT_MODE], 0, "Port Mode"); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Port Failure", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE], 0, "Port Failure"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "L2 Switch", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE], 0, "L2 Switch"); } } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "LRO mode", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_LRO_MODE], 0, "LRO mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "RTH mode", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_RTH_MODE], 0, "RTH mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "TSO mode", CTLFLAG_RD, &vdev->config.nic_attr[VXGE_PRINT_TSO_MODE], 0, "TSO mode"); } void vxge_pmd_port_type_get(vxge_dev_t *vdev, u32 port_type, char *ifm_name, u8 ifm_len) { vdev->ifm_optics = IFM_UNKNOWN; switch (port_type) { case VXGE_HAL_DEVICE_PMD_TYPE_10G_SR: vdev->ifm_optics = IFM_10G_SR; strlcpy(ifm_name, "10GbE SR", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_LR: vdev->ifm_optics = IFM_10G_LR; strlcpy(ifm_name, "10GbE LR", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_LRM: vdev->ifm_optics = IFM_10G_LRM; strlcpy(ifm_name, "10GbE LRM", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_DIRECT: vdev->ifm_optics = IFM_10G_TWINAX; strlcpy(ifm_name, "10GbE DA (Direct Attached)", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_CX4: vdev->ifm_optics = IFM_10G_CX4; strlcpy(ifm_name, "10GbE CX4", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_BASE_T: #if __FreeBSD_version >= 800000 vdev->ifm_optics = IFM_10G_T; #endif strlcpy(ifm_name, "10GbE baseT", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_OTHER: strlcpy(ifm_name, "10GbE Other", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_SX: vdev->ifm_optics = IFM_1000_SX; strlcpy(ifm_name, "1GbE SX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_LX: vdev->ifm_optics = IFM_1000_LX; strlcpy(ifm_name, "1GbE LX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_CX: vdev->ifm_optics = IFM_1000_CX; strlcpy(ifm_name, "1GbE CX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_BASE_T: vdev->ifm_optics = IFM_1000_T; strlcpy(ifm_name, "1GbE baseT", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_DIRECT: strlcpy(ifm_name, "1GbE DA (Direct Attached)", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_CX4: strlcpy(ifm_name, "1GbE CX4", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_OTHER: strlcpy(ifm_name, "1GbE Other", ifm_len); break; default: case VXGE_HAL_DEVICE_PMD_TYPE_UNKNOWN: strlcpy(ifm_name, "UNSUP", ifm_len); break; } } u32 vxge_ring_length_get(u32 buffer_mode) { return (VXGE_DEFAULT_RING_BLOCK * vxge_hal_ring_rxds_per_block_get(buffer_mode)); } /* * Removes trailing spaces padded * and NULL terminates strings */ static inline void vxge_null_terminate(char *str, size_t len) { len--; while (*str && (*str != ' ') && (len != 0)) ++str; --len; if (*str) *str = '\0'; } /* * vxge_ioctl * Callback to control the device */ int vxge_ioctl(ifnet_t ifp, u_long command, caddr_t data) { int mask, err = 0; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; if (!vdev->is_active) return (EBUSY); switch (command) { /* Set/Get ifnet address */ case SIOCSIFADDR: case SIOCGIFADDR: ether_ioctl(ifp, command, data); break; /* Set Interface MTU */ case SIOCSIFMTU: err = vxge_change_mtu(vdev, (unsigned long)ifr->ifr_mtu); break; /* Set Interface Flags */ case SIOCSIFFLAGS: VXGE_DRV_LOCK(vdev); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ vdev->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) vxge_promisc_set(vdev); } else { vxge_init_locked(vdev); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vxge_stop_locked(vdev); } vdev->if_flags = ifp->if_flags; VXGE_DRV_UNLOCK(vdev); break; /* Add/delete multicast address */ case SIOCADDMULTI: case SIOCDELMULTI: break; /* Get/Set Interface Media */ case SIOCSIFMEDIA: case SIOCGIFMEDIA: err = ifmedia_ioctl(ifp, ifr, &vdev->media, command); break; /* Set Capabilities */ case SIOCSIFCAP: VXGE_DRV_LOCK(vdev); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if ((ifp->if_capenable & IFCAP_TSO) && !(ifp->if_capenable & IFCAP_TXCSUM)) { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO Disabled\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO) { if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= CSUM_TSO; if_printf(ifp, "TSO Enabled\n"); } else { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "Enable tx checksum offload \ first.\n"); err = EAGAIN; } } else { ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO Disabled\n"); } } if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_MTU) ifp->if_capenable ^= IFCAP_VLAN_MTU; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #if __FreeBSD_version >= 800000 if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; #endif #if defined(VLAN_CAPABILITIES) VLAN_CAPABILITIES(ifp); #endif VXGE_DRV_UNLOCK(vdev); break; case SIOCGPRIVATE_0: VXGE_DRV_LOCK(vdev); err = vxge_ioctl_stats(vdev, ifr); VXGE_DRV_UNLOCK(vdev); break; case SIOCGPRIVATE_1: VXGE_DRV_LOCK(vdev); err = vxge_ioctl_regs(vdev, ifr); VXGE_DRV_UNLOCK(vdev); break; default: err = ether_ioctl(ifp, command, data); break; } return (err); } /* * vxge_ioctl_regs * IOCTL to get registers */ int vxge_ioctl_regs(vxge_dev_t *vdev, struct ifreq *ifr) { u64 value = 0x0; u32 vp_id = 0; u32 offset, reqd_size = 0; int i, err = EINVAL; char *command = (char *) ifr->ifr_data; void *reg_info = (void *) ifr->ifr_data; vxge_vpath_t *vpath; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_mgmt_reg_type_e regs_type; switch (*command) { case vxge_hal_mgmt_reg_type_pcicfgmgmt: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_pcicfgmgmt_reg_t); regs_type = vxge_hal_mgmt_reg_type_pcicfgmgmt; } break; case vxge_hal_mgmt_reg_type_mrpcim: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_mrpcim_reg_t); regs_type = vxge_hal_mgmt_reg_type_mrpcim; } break; case vxge_hal_mgmt_reg_type_srpcim: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_srpcim_reg_t); regs_type = vxge_hal_mgmt_reg_type_srpcim; } break; case vxge_hal_mgmt_reg_type_memrepair: if (vdev->is_privilaged) { /* reqd_size = sizeof(vxge_hal_memrepair_reg_t); */ regs_type = vxge_hal_mgmt_reg_type_memrepair; } break; case vxge_hal_mgmt_reg_type_legacy: reqd_size = sizeof(vxge_hal_legacy_reg_t); regs_type = vxge_hal_mgmt_reg_type_legacy; break; case vxge_hal_mgmt_reg_type_toc: reqd_size = sizeof(vxge_hal_toc_reg_t); regs_type = vxge_hal_mgmt_reg_type_toc; break; case vxge_hal_mgmt_reg_type_common: reqd_size = sizeof(vxge_hal_common_reg_t); regs_type = vxge_hal_mgmt_reg_type_common; break; case vxge_hal_mgmt_reg_type_vpmgmt: reqd_size = sizeof(vxge_hal_vpmgmt_reg_t); regs_type = vxge_hal_mgmt_reg_type_vpmgmt; vpath = &(vdev->vpaths[*((u32 *) reg_info + 1)]); vp_id = vpath->vp_id; break; case vxge_hal_mgmt_reg_type_vpath: reqd_size = sizeof(vxge_hal_vpath_reg_t); regs_type = vxge_hal_mgmt_reg_type_vpath; vpath = &(vdev->vpaths[*((u32 *) reg_info + 1)]); vp_id = vpath->vp_id; break; case VXGE_GET_VPATH_COUNT: *((u32 *) reg_info) = vdev->no_of_vpath; err = 0; break; default: reqd_size = 0; break; } if (reqd_size) { for (i = 0, offset = 0; offset < reqd_size; i++, offset += 0x0008) { value = 0x0; status = vxge_hal_mgmt_reg_read(vdev->devh, regs_type, vp_id, offset, &value); err = (status != VXGE_HAL_OK) ? EINVAL : 0; if (err == EINVAL) break; *((u64 *) ((u64 *) reg_info + i)) = value; } } return (err); } /* * vxge_ioctl_stats * IOCTL to get statistics */ int vxge_ioctl_stats(vxge_dev_t *vdev, struct ifreq *ifr) { int i, retsize, err = EINVAL; u32 bufsize; vxge_vpath_t *vpath; vxge_bw_info_t *bw_info; vxge_port_info_t *port_info; vxge_drv_stats_t *drv_stat; char *buffer = NULL; char *command = (char *) ifr->ifr_data; vxge_hal_status_e status = VXGE_HAL_OK; switch (*command) { case VXGE_GET_PCI_CONF: bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_pci_config_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) err = copyout(buffer, ifr->ifr_data, retsize); else device_printf(vdev->ndev, "failed pciconfig statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_MRPCIM_STATS: if (!vdev->is_privilaged) break; bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_stats_mrpcim_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) err = copyout(buffer, ifr->ifr_data, retsize); else device_printf(vdev->ndev, "failed mrpcim statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DEVICE_STATS: bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_stats_device_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) err = copyout(buffer, ifr->ifr_data, retsize); else device_printf(vdev->ndev, "failed device statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DEVICE_HWINFO: bufsize = sizeof(vxge_device_hw_info_t); buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { vxge_os_memcpy( &(((vxge_device_hw_info_t *) buffer)->hw_info), &vdev->config.hw_info, sizeof(vxge_hal_device_hw_info_t)); ((vxge_device_hw_info_t *) buffer)->port_mode = vdev->port_mode; ((vxge_device_hw_info_t *) buffer)->port_failure = vdev->port_failure; err = copyout(buffer, ifr->ifr_data, bufsize); if (err != 0) device_printf(vdev->ndev, "failed device hardware info query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DRIVER_STATS: bufsize = sizeof(vxge_drv_stats_t) * vdev->no_of_vpath; drv_stat = (vxge_drv_stats_t *) vxge_mem_alloc(bufsize); if (drv_stat != NULL) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vpath->driver_stats.rx_lro_queued += vpath->lro.lro_queued; vpath->driver_stats.rx_lro_flushed += vpath->lro.lro_flushed; vxge_os_memcpy(&drv_stat[i], &(vpath->driver_stats), sizeof(vxge_drv_stats_t)); } err = copyout(drv_stat, ifr->ifr_data, bufsize); if (err != 0) device_printf(vdev->ndev, "failed driver statistics query\n"); vxge_mem_free(drv_stat, bufsize); } break; case VXGE_GET_BANDWIDTH: bw_info = (vxge_bw_info_t *) ifr->ifr_data; if ((vdev->config.hw_info.func_id != 0) && (vdev->hw_fw_version < VXGE_FW_VERSION(1, 8, 0))) break; if (vdev->config.hw_info.func_id != 0) bw_info->func_id = vdev->config.hw_info.func_id; status = vxge_bw_priority_get(vdev, bw_info); if (status != VXGE_HAL_OK) break; err = copyout(bw_info, ifr->ifr_data, sizeof(vxge_bw_info_t)); break; case VXGE_SET_BANDWIDTH: if (vdev->is_privilaged) err = vxge_bw_priority_set(vdev, ifr); break; case VXGE_SET_PORT_MODE: if (vdev->is_privilaged) { if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { port_info = (vxge_port_info_t *) ifr->ifr_data; vdev->config.port_mode = port_info->port_mode; err = vxge_port_mode_update(vdev); if (err != ENXIO) err = VXGE_HAL_FAIL; else { err = VXGE_HAL_OK; device_printf(vdev->ndev, "PLEASE POWER CYCLE THE SYSTEM\n"); } } } break; case VXGE_GET_PORT_MODE: if (vdev->is_privilaged) { if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { port_info = (vxge_port_info_t *) ifr->ifr_data; err = vxge_port_mode_get(vdev, port_info); if (err == VXGE_HAL_OK) { err = copyout(port_info, ifr->ifr_data, sizeof(vxge_port_info_t)); } } } break; default: break; } return (err); } int vxge_bw_priority_config(vxge_dev_t *vdev) { u32 i; int err = EINVAL; for (i = 0; i < vdev->no_of_func; i++) { err = vxge_bw_priority_update(vdev, i, TRUE); if (err != 0) break; } return (err); } int vxge_bw_priority_set(vxge_dev_t *vdev, struct ifreq *ifr) { int err; u32 func_id; vxge_bw_info_t *bw_info; bw_info = (vxge_bw_info_t *) ifr->ifr_data; func_id = bw_info->func_id; vdev->config.bw_info[func_id].priority = bw_info->priority; vdev->config.bw_info[func_id].bandwidth = bw_info->bandwidth; err = vxge_bw_priority_update(vdev, func_id, FALSE); return (err); } int vxge_bw_priority_update(vxge_dev_t *vdev, u32 func_id, bool binit) { u32 i, set = 0; u32 bandwidth, priority, vpath_count; u64 vpath_list[VXGE_HAL_MAX_VIRTUAL_PATHS]; vxge_hal_device_t *hldev; vxge_hal_vp_config_t *vp_config; vxge_hal_status_e status = VXGE_HAL_OK; hldev = vdev->devh; status = vxge_hal_get_vpath_list(vdev->devh, func_id, vpath_list, &vpath_count); if (status != VXGE_HAL_OK) return (status); for (i = 0; i < vpath_count; i++) { vp_config = &(hldev->config.vp_config[vpath_list[i]]); /* Configure Bandwidth */ if (vdev->config.bw_info[func_id].bandwidth != VXGE_HAL_VPATH_BW_LIMIT_DEFAULT) { set = 1; bandwidth = vdev->config.bw_info[func_id].bandwidth; if (bandwidth < VXGE_HAL_VPATH_BW_LIMIT_MIN || bandwidth > VXGE_HAL_VPATH_BW_LIMIT_MAX) { bandwidth = VXGE_HAL_VPATH_BW_LIMIT_DEFAULT; } vp_config->bandwidth = bandwidth; } /* * If b/w limiting is enabled on any of the * VFs, then for remaining VFs set the priority to 3 * and b/w limiting to max i.e 10 Gb) */ if (vp_config->bandwidth == VXGE_HAL_VPATH_BW_LIMIT_DEFAULT) vp_config->bandwidth = VXGE_HAL_VPATH_BW_LIMIT_MAX; if (binit && vdev->config.low_latency) { if (func_id == 0) vdev->config.bw_info[func_id].priority = VXGE_DEFAULT_VPATH_PRIORITY_HIGH; } /* Configure Priority */ if (vdev->config.bw_info[func_id].priority != VXGE_HAL_VPATH_PRIORITY_DEFAULT) { set = 1; priority = vdev->config.bw_info[func_id].priority; if (priority < VXGE_HAL_VPATH_PRIORITY_MIN || priority > VXGE_HAL_VPATH_PRIORITY_MAX) { priority = VXGE_HAL_VPATH_PRIORITY_DEFAULT; } vp_config->priority = priority; } else if (vdev->config.low_latency) { set = 1; vp_config->priority = VXGE_DEFAULT_VPATH_PRIORITY_LOW; } if (set == 1) { status = vxge_hal_rx_bw_priority_set(vdev->devh, vpath_list[i]); if (status != VXGE_HAL_OK) break; if (vpath_list[i] < VXGE_HAL_TX_BW_VPATH_LIMIT) { status = vxge_hal_tx_bw_priority_set( vdev->devh, vpath_list[i]); if (status != VXGE_HAL_OK) break; } } } return ((status == VXGE_HAL_OK) ? 0 : EINVAL); } /* * vxge_intr_coalesce_tx * Changes interrupt coalescing if the interrupts are not within a range * Return Value: Nothing */ void vxge_intr_coalesce_tx(vxge_vpath_t *vpath) { u32 timer; if (!vpath->tx_intr_coalesce) return; vpath->tx_interrupts++; if (ticks > vpath->tx_ticks + hz/100) { vpath->tx_ticks = ticks; timer = vpath->tti_rtimer_val; if (vpath->tx_interrupts > VXGE_MAX_TX_INTERRUPT_COUNT) { if (timer != VXGE_TTI_RTIMER_ADAPT_VAL) { vpath->tti_rtimer_val = VXGE_TTI_RTIMER_ADAPT_VAL; vxge_hal_vpath_dynamic_tti_rtimer_set( vpath->handle, vpath->tti_rtimer_val); } } else { if (timer != 0) { vpath->tti_rtimer_val = 0; vxge_hal_vpath_dynamic_tti_rtimer_set( vpath->handle, vpath->tti_rtimer_val); } } vpath->tx_interrupts = 0; } } /* * vxge_intr_coalesce_rx * Changes interrupt coalescing if the interrupts are not within a range * Return Value: Nothing */ void vxge_intr_coalesce_rx(vxge_vpath_t *vpath) { u32 timer; if (!vpath->rx_intr_coalesce) return; vpath->rx_interrupts++; if (ticks > vpath->rx_ticks + hz/100) { vpath->rx_ticks = ticks; timer = vpath->rti_rtimer_val; if (vpath->rx_interrupts > VXGE_MAX_RX_INTERRUPT_COUNT) { if (timer != VXGE_RTI_RTIMER_ADAPT_VAL) { vpath->rti_rtimer_val = VXGE_RTI_RTIMER_ADAPT_VAL; vxge_hal_vpath_dynamic_rti_rtimer_set( vpath->handle, vpath->rti_rtimer_val); } } else { if (timer != 0) { vpath->rti_rtimer_val = 0; vxge_hal_vpath_dynamic_rti_rtimer_set( vpath->handle, vpath->rti_rtimer_val); } } vpath->rx_interrupts = 0; } } /* * vxge_methods FreeBSD device interface entry points */ static device_method_t vxge_methods[] = { DEVMETHOD(device_probe, vxge_probe), DEVMETHOD(device_attach, vxge_attach), DEVMETHOD(device_detach, vxge_detach), DEVMETHOD(device_shutdown, vxge_shutdown), DEVMETHOD_END }; static driver_t vxge_driver = { "vxge", vxge_methods, sizeof(vxge_dev_t), }; static devclass_t vxge_devclass; DRIVER_MODULE(vxge, pci, vxge_driver, vxge_devclass, 0, 0); Index: head/sys/mips/rmi/dev/nlge/if_nlge.c =================================================================== --- head/sys/mips/rmi/dev/nlge/if_nlge.c (revision 263101) +++ head/sys/mips/rmi/dev/nlge/if_nlge.c (revision 263102) @@ -1,2562 +1,2562 @@ /*- * Copyright (c) 2003-2009 RMI Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of RMI Corporation, nor the names of its contributors, * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RMI_BSD */ /* * The XLR device supports upto four 10/100/1000 Ethernet MACs and upto * two 10G Ethernet MACs (of XGMII). Alternatively, each 10G port can used * as a SPI-4 interface, with 8 ports per such interface. The MACs are * encapsulated in another hardware block referred to as network accelerator, * such that there are three instances of these in a XLR. One of them controls * the four 1G RGMII ports while one each of the others controls an XGMII port. * Enabling MACs requires configuring the corresponding network accelerator * and the individual port. * The XLS device supports upto 8 10/100/1000 Ethernet MACs or max 2 10G * Ethernet MACs. The 1G MACs are of SGMII and 10G MACs are of XAUI * interface. These ports are part of two network accelerators. * The nlge driver configures and initializes non-SPI4 Ethernet ports in the * XLR/XLS devices and enables data transfer on them. */ #include __FBSDID("$FreeBSD$"); #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define __RMAN_RESOURCE_VISIBLE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for DELAY */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include #include "miibus_if.h" #include MODULE_DEPEND(nlna, nlge, 1, 1, 1); MODULE_DEPEND(nlge, ether, 1, 1, 1); MODULE_DEPEND(nlge, miibus, 1, 1, 1); /* Network accelarator entry points */ static int nlna_probe(device_t); static int nlna_attach(device_t); static int nlna_detach(device_t); static int nlna_suspend(device_t); static int nlna_resume(device_t); static int nlna_shutdown(device_t); /* GMAC port entry points */ static int nlge_probe(device_t); static int nlge_attach(device_t); static int nlge_detach(device_t); static int nlge_suspend(device_t); static int nlge_resume(device_t); static void nlge_init(void *); static int nlge_ioctl(struct ifnet *, u_long, caddr_t); static int nlge_tx(struct ifnet *ifp, struct mbuf *m); static void nlge_rx(struct nlge_softc *sc, vm_paddr_t paddr, int len); static int nlge_mii_write(struct device *, int, int, int); static int nlge_mii_read(struct device *, int, int); static void nlge_mac_mii_statchg(device_t); static int nlge_mediachange(struct ifnet *ifp); static void nlge_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr); /* Other internal/helper functions */ static void *get_buf(void); static void nlna_add_to_port_set(struct nlge_port_set *pset, struct nlge_softc *sc); static void nlna_config_pde(struct nlna_softc *); static void nlna_config_parser(struct nlna_softc *); static void nlna_config_classifier(struct nlna_softc *); static void nlna_config_fifo_spill_area(struct nlna_softc *sc); static void nlna_config_translate_table(struct nlna_softc *sc); static void nlna_config_common(struct nlna_softc *); static void nlna_disable_ports(struct nlna_softc *sc); static void nlna_enable_intr(struct nlna_softc *sc); static void nlna_disable_intr(struct nlna_softc *sc); static void nlna_enable_ports(struct nlna_softc *sc); static void nlna_get_all_softc(device_t iodi_dev, struct nlna_softc **sc_vec, uint32_t vec_sz); static void nlna_hw_init(struct nlna_softc *sc); static int nlna_is_last_active_na(struct nlna_softc *sc); static void nlna_media_specific_config(struct nlna_softc *sc); static void nlna_reset_ports(struct nlna_softc *sc, struct xlr_gmac_block_t *blk); static struct nlna_softc *nlna_sc_init(device_t dev, struct xlr_gmac_block_t *blk); static void nlna_setup_intr(struct nlna_softc *sc); static void nlna_smp_update_pde(void *dummy __unused); static void nlna_submit_rx_free_desc(struct nlna_softc *sc, uint32_t n_desc); static int nlge_gmac_config_speed(struct nlge_softc *, int quick); static void nlge_hw_init(struct nlge_softc *sc); static int nlge_if_init(struct nlge_softc *sc); static void nlge_intr(void *arg); static int nlge_irq_init(struct nlge_softc *sc); static void nlge_irq_fini(struct nlge_softc *sc); static void nlge_media_specific_init(struct nlge_softc *sc); static void nlge_mii_init(device_t dev, struct nlge_softc *sc); static int nlge_mii_read_internal(xlr_reg_t *mii_base, int phyaddr, int regidx); static void nlge_mii_write_internal(xlr_reg_t *mii_base, int phyaddr, int regidx, int regval); void nlge_msgring_handler(int bucket, int size, int code, int stid, struct msgrng_msg *msg, void *data); static void nlge_port_disable(struct nlge_softc *sc); static void nlge_port_enable(struct nlge_softc *sc); static void nlge_read_mac_addr(struct nlge_softc *sc); static void nlge_sc_init(struct nlge_softc *sc, device_t dev, struct xlr_gmac_port *port_info); static void nlge_set_mac_addr(struct nlge_softc *sc); static void nlge_set_port_attribs(struct nlge_softc *, struct xlr_gmac_port *); static void nlge_mac_set_rx_mode(struct nlge_softc *sc); static void nlge_sgmii_init(struct nlge_softc *sc); static int nlge_start_locked(struct ifnet *ifp, struct nlge_softc *sc, struct mbuf *m); static int prepare_fmn_message(struct nlge_softc *sc, struct msgrng_msg *msg, uint32_t *n_entries, struct mbuf *m_head, uint64_t fr_stid, struct nlge_tx_desc **tx_desc); static void release_tx_desc(vm_paddr_t phy_addr); static int send_fmn_msg_tx(struct nlge_softc *, struct msgrng_msg *, uint32_t n_entries); //#define DEBUG #ifdef DEBUG static int mac_debug = 1; #undef PDEBUG #define PDEBUG(fmt, args...) \ do {\ if (mac_debug) {\ printf("[%s@%d|%s]: cpu_%d: " fmt, \ __FILE__, __LINE__, __FUNCTION__, PCPU_GET(cpuid), ##args);\ }\ } while(0); /* Debug/dump functions */ static void dump_reg(xlr_reg_t *addr, uint32_t offset, char *name); static void dump_gmac_registers(struct nlge_softc *); static void dump_na_registers(xlr_reg_t *base, int port_id); static void dump_mac_stats(struct nlge_softc *sc); static void dump_mii_regs(struct nlge_softc *sc) __attribute__((used)); static void dump_mii_data(struct mii_data *mii) __attribute__((used)); static void dump_board_info(struct xlr_board_info *); static void dump_pcs_regs(struct nlge_softc *sc, int phy); #else #undef PDEBUG #define PDEBUG(fmt, args...) #define dump_reg(a, o, n) /* nop */ #define dump_gmac_registers(a) /* nop */ #define dump_na_registers(a, p) /* nop */ #define dump_board_info(b) /* nop */ #define dump_mac_stats(sc) /* nop */ #define dump_mii_regs(sc) /* nop */ #define dump_mii_data(mii) /* nop */ #define dump_pcs_regs(sc, phy) /* nop */ #endif /* Wrappers etc. to export the driver entry points. */ static device_method_t nlna_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nlna_probe), DEVMETHOD(device_attach, nlna_attach), DEVMETHOD(device_detach, nlna_detach), DEVMETHOD(device_shutdown, nlna_shutdown), DEVMETHOD(device_suspend, nlna_suspend), DEVMETHOD(device_resume, nlna_resume), /* bus interface : TBD : what are these for ? */ DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD_END }; static driver_t nlna_driver = { "nlna", nlna_methods, sizeof(struct nlna_softc) }; static devclass_t nlna_devclass; static device_method_t nlge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nlge_probe), DEVMETHOD(device_attach, nlge_attach), DEVMETHOD(device_detach, nlge_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, nlge_suspend), DEVMETHOD(device_resume, nlge_resume), /* MII interface */ DEVMETHOD(miibus_readreg, nlge_mii_read), DEVMETHOD(miibus_writereg, nlge_mii_write), DEVMETHOD(miibus_statchg, nlge_mac_mii_statchg), {0, 0} }; static driver_t nlge_driver = { "nlge", nlge_methods, sizeof(struct nlge_softc) }; static devclass_t nlge_devclass; DRIVER_MODULE(nlna, iodi, nlna_driver, nlna_devclass, 0, 0); DRIVER_MODULE(nlge, nlna, nlge_driver, nlge_devclass, 0, 0); DRIVER_MODULE(miibus, nlge, miibus_driver, miibus_devclass, 0, 0); static uma_zone_t nl_tx_desc_zone; /* Tunables. */ static int flow_classification = 0; TUNABLE_INT("hw.nlge.flow_classification", &flow_classification); #define NLGE_HW_CHKSUM 1 static __inline void atomic_incr_long(unsigned long *addr) { /* XXX: fix for 64 bit */ unsigned int *iaddr = (unsigned int *)addr; xlr_ldaddwu(1, iaddr); } static int nlna_probe(device_t dev) { return (BUS_PROBE_DEFAULT); } /* * Add all attached GMAC/XGMAC ports to the device tree. Port * configuration is spread in two regions - common configuration * for all ports in the NA and per-port configuration in MAC-specific * region. This function does the following: * - adds the ports to the device tree * - reset the ports * - do all the common initialization * - invoke bus_generic_attach for per-port configuration * - supply initial free rx descriptors to ports * - initialize s/w data structures * - finally, enable interrupts (only in the last NA). * * For reference, sample address space for common and per-port * registers is given below. * * The address map for RNA0 is: (typical value) * * XLR_IO_BASE +--------------------------------------+ 0xbef0_0000 * | | * | | * | | * | | * | | * | | * GMAC0 ---> +--------------------------------------+ 0xbef0_c000 * | | * | | * (common) -> |......................................| 0xbef0_c400 * | | * | (RGMII/SGMII: common registers) | * | | * GMAC1 ---> |--------------------------------------| 0xbef0_d000 * | | * | | * (common) -> |......................................| 0xbef0_d400 * | | * | (RGMII/SGMII: common registers) | * | | * |......................................| * and so on .... * * Ref: Figure 14-3 and Table 14-1 of XLR PRM */ static int nlna_attach(device_t dev) { struct xlr_gmac_block_t *block_info; device_t gmac_dev; struct nlna_softc *sc; int error; int i; int id; id = device_get_unit(dev); block_info = device_get_ivars(dev); if (!block_info->enabled) { return 0; } #ifdef DEBUG dump_board_info(&xlr_board_info); #endif /* Initialize nlna state in softc structure */ sc = nlna_sc_init(dev, block_info); /* Add device's for the ports controlled by this NA. */ if (block_info->type == XLR_GMAC) { KASSERT(id < 2, ("No GMACs supported with this network" "accelerator: %d", id)); for (i = 0; i < sc->num_ports; i++) { gmac_dev = device_add_child(dev, "nlge", -1); device_set_ivars(gmac_dev, &block_info->gmac_port[i]); } } else if (block_info->type == XLR_XGMAC) { KASSERT(id > 0 && id <= 2, ("No XGMACs supported with this" "network accelerator: %d", id)); gmac_dev = device_add_child(dev, "nlge", -1); device_set_ivars(gmac_dev, &block_info->gmac_port[0]); } else if (block_info->type == XLR_SPI4) { /* SPI4 is not supported here */ device_printf(dev, "Unsupported: NA with SPI4 type"); return (ENOTSUP); } nlna_reset_ports(sc, block_info); /* Initialize Network Accelarator registers. */ nlna_hw_init(sc); error = bus_generic_attach(dev); if (error) { device_printf(dev, "failed to attach port(s)\n"); goto fail; } /* Send out the initial pool of free-descriptors for the rx path */ nlna_submit_rx_free_desc(sc, MAX_FRIN_SPILL); /* S/w data structure initializations shared by all NA's. */ if (nl_tx_desc_zone == NULL) { /* Create a zone for allocating tx descriptors */ nl_tx_desc_zone = uma_zcreate("NL Tx Desc", sizeof(struct nlge_tx_desc), NULL, NULL, NULL, NULL, XLR_CACHELINE_SIZE, 0); } /* Enable NA interrupts */ nlna_setup_intr(sc); return (0); fail: return (error); } static int nlna_detach(device_t dev) { struct nlna_softc *sc; sc = device_get_softc(dev); if (device_is_alive(dev)) { nlna_disable_intr(sc); /* This will make sure that per-port detach is complete * and all traffic on the ports has been stopped. */ bus_generic_detach(dev); uma_zdestroy(nl_tx_desc_zone); } return (0); } static int nlna_suspend(device_t dev) { return (0); } static int nlna_resume(device_t dev) { return (0); } static int nlna_shutdown(device_t dev) { return (0); } /* GMAC port entry points */ static int nlge_probe(device_t dev) { struct nlge_softc *sc; struct xlr_gmac_port *port_info; int index; char *desc[] = { "RGMII", "SGMII", "RGMII/SGMII", "XGMAC", "XAUI", "Unknown"}; port_info = device_get_ivars(dev); index = (port_info->type < XLR_RGMII || port_info->type > XLR_XAUI) ? 5 : port_info->type; device_set_desc_copy(dev, desc[index]); sc = device_get_softc(dev); nlge_sc_init(sc, dev, port_info); nlge_port_disable(sc); return (0); } static int nlge_attach(device_t dev) { struct nlge_softc *sc; struct nlna_softc *nsc; int error; sc = device_get_softc(dev); nlge_if_init(sc); nlge_mii_init(dev, sc); error = nlge_irq_init(sc); if (error) return error; nlge_hw_init(sc); nsc = (struct nlna_softc *)device_get_softc(device_get_parent(dev)); nsc->child_sc[sc->instance] = sc; return (0); } static int nlge_detach(device_t dev) { struct nlge_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->nlge_if; if (device_is_attached(dev)) { nlge_port_disable(sc); nlge_irq_fini(sc); ether_ifdetach(ifp); bus_generic_detach(dev); } if (ifp) if_free(ifp); return (0); } static int nlge_suspend(device_t dev) { return (0); } static int nlge_resume(device_t dev) { return (0); } static void nlge_init(void *addr) { struct nlge_softc *sc; struct ifnet *ifp; sc = (struct nlge_softc *)addr; ifp = sc->nlge_if; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; nlge_gmac_config_speed(sc, 1); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; nlge_port_enable(sc); if (sc->port_type == XLR_SGMII) { dump_pcs_regs(sc, 27); } dump_gmac_registers(sc); dump_mac_stats(sc); } static int nlge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct mii_data *mii; struct nlge_softc *sc; struct ifreq *ifr; int error; sc = ifp->if_softc; error = 0; ifr = (struct ifreq *)data; switch(command) { case SIOCSIFFLAGS: NLGE_LOCK(sc); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { nlge_init(sc); } if (ifp->if_flags & IFF_PROMISC && !(sc->if_flags & IFF_PROMISC)) { sc->if_flags |= IFF_PROMISC; nlge_mac_set_rx_mode(sc); } else if (!(ifp->if_flags & IFF_PROMISC) && sc->if_flags & IFF_PROMISC) { sc->if_flags &= IFF_PROMISC; nlge_mac_set_rx_mode(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { nlge_port_disable(sc); } } sc->if_flags = ifp->if_flags; NLGE_UNLOCK(sc); error = 0; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: if (sc->mii_bus != NULL) { mii = (struct mii_data *)device_get_softc(sc->mii_bus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); } break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } /* This function is called from an interrupt handler */ void nlge_msgring_handler(int bucket, int size, int code, int stid, struct msgrng_msg *msg, void *data) { struct nlna_softc *na_sc; struct nlge_softc *sc; struct ifnet *ifp; struct mbuf *m; vm_paddr_t phys_addr; uint32_t length; int ctrl; int tx_error; int port; int is_p2p; is_p2p = 0; tx_error = 0; length = (msg->msg0 >> 40) & 0x3fff; na_sc = (struct nlna_softc *)data; if (length == 0) { ctrl = CTRL_REG_FREE; phys_addr = msg->msg0 & 0xffffffffffULL; port = (msg->msg0 >> 54) & 0x0f; is_p2p = (msg->msg0 >> 62) & 0x1; tx_error = (msg->msg0 >> 58) & 0xf; } else { ctrl = CTRL_SNGL; phys_addr = msg->msg0 & 0xffffffffe0ULL; length = length - BYTE_OFFSET - MAC_CRC_LEN; port = msg->msg0 & 0x0f; } sc = na_sc->child_sc[port]; if (sc == NULL) { printf("Message (of %d len) with softc=NULL on %d port (type=%s)\n", length, port, (ctrl == CTRL_SNGL ? "Pkt rx" : "Freeback for tx packet")); return; } if (ctrl == CTRL_REG_FREE || ctrl == CTRL_JUMBO_FREE) { ifp = sc->nlge_if; if (!tx_error) { if (is_p2p) { release_tx_desc(phys_addr); } else { #ifdef __mips_n64 m = (struct mbuf *)(uintptr_t)xlr_paddr_ld(phys_addr); m->m_nextpkt = NULL; #else m = (struct mbuf *)(uintptr_t)phys_addr; #endif m_freem(m); } NLGE_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_OACTIVE){ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } NLGE_UNLOCK(sc); } else { printf("ERROR: Tx fb error (%d) on port %d\n", tx_error, port); } - atomic_incr_long((tx_error) ? &ifp->if_oerrors: &ifp->if_opackets); + tx_error ? ifp->if_oerrors++ : ifp->if_opackets++; } else if (ctrl == CTRL_SNGL || ctrl == CTRL_START) { /* Rx Packet */ nlge_rx(sc, phys_addr, length); nlna_submit_rx_free_desc(na_sc, 1); /* return free descr to NA */ } else { printf("[%s]: unrecognized ctrl=%d!\n", __func__, ctrl); } } static int nlge_tx(struct ifnet *ifp, struct mbuf *m) { return (nlge_start_locked(ifp, ifp->if_softc, m)); } static int nlge_start_locked(struct ifnet *ifp, struct nlge_softc *sc, struct mbuf *m) { struct msgrng_msg msg; struct nlge_tx_desc *tx_desc; uint64_t fr_stid; uint32_t cpu; uint32_t n_entries; uint32_t tid; int error, ret; if (m == NULL) return (0); tx_desc = NULL; error = 0; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING) || ifp->if_drv_flags & IFF_DRV_OACTIVE) { error = ENXIO; goto fail; // note: mbuf will get free'd } cpu = xlr_core_id(); tid = xlr_thr_id(); /* H/w threads [0, 2] --> bucket 6 and [1, 3] --> bucket 7 */ fr_stid = cpu * 8 + 6 + (tid % 2); /* * First, remove some freeback messages before transmitting * any new packets. However, cap the number of messages * drained to permit this thread to continue with its * transmission. * * Mask for buckets {6, 7} is 0xc0 */ xlr_msgring_handler(0xc0, 4); ret = prepare_fmn_message(sc, &msg, &n_entries, m, fr_stid, &tx_desc); if (ret) { error = (ret == 2) ? ENOBUFS : ENOTSUP; goto fail; } ret = send_fmn_msg_tx(sc, &msg, n_entries); if (ret != 0) { error = EBUSY; goto fail; } return (0); fail: if (tx_desc != NULL) { uma_zfree(nl_tx_desc_zone, tx_desc); } if (m != NULL) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { NLGE_LOCK(sc); ifp->if_drv_flags |= IFF_DRV_OACTIVE; NLGE_UNLOCK(sc); } m_freem(m); - atomic_incr_long(&ifp->if_iqdrops); + ifp->if_iqdrops++; } return (error); } static void nlge_rx(struct nlge_softc *sc, vm_paddr_t paddr, int len) { struct ifnet *ifp; struct mbuf *m; uint64_t tm, mag; uint32_t sr; sr = xlr_enable_kx(); tm = xlr_paddr_ld(paddr - XLR_CACHELINE_SIZE); mag = xlr_paddr_ld(paddr - XLR_CACHELINE_SIZE + sizeof(uint64_t)); xlr_restore_kx(sr); m = (struct mbuf *)(intptr_t)tm; if (mag != 0xf00bad) { /* somebody else's packet. Error - FIXME in intialization */ printf("cpu %d: *ERROR* Not my packet paddr %jx\n", xlr_core_id(), (uintmax_t)paddr); return; } ifp = sc->nlge_if; #ifdef NLGE_HW_CHKSUM m->m_pkthdr.csum_flags = CSUM_IP_CHECKED; if (m->m_data[10] & 0x2) { m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if (m->m_data[10] & 0x1) { m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = htons(0xffff); } } m->m_data += NLGE_PREPAD_LEN; len -= NLGE_PREPAD_LEN; #else m->m_pkthdr.csum_flags = 0; #endif /* align the data */ m->m_data += BYTE_OFFSET ; m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = ifp; - atomic_incr_long(&ifp->if_ipackets); + ifp->if_ipackets++; (*ifp->if_input)(ifp, m); } static int nlge_mii_write(struct device *dev, int phyaddr, int regidx, int regval) { struct nlge_softc *sc; sc = device_get_softc(dev); if (sc->port_type != XLR_XGMII) nlge_mii_write_internal(sc->mii_base, phyaddr, regidx, regval); return (0); } static int nlge_mii_read(struct device *dev, int phyaddr, int regidx) { struct nlge_softc *sc; int val; sc = device_get_softc(dev); val = (sc->port_type == XLR_XGMII) ? (0xffff) : nlge_mii_read_internal(sc->mii_base, phyaddr, regidx); return (val); } static void nlge_mac_mii_statchg(device_t dev) { } static int nlge_mediachange(struct ifnet *ifp) { return 0; } static void nlge_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { struct nlge_softc *sc; struct mii_data *md; md = NULL; sc = ifp->if_softc; if (sc->mii_bus) md = device_get_softc(sc->mii_bus); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (sc->link == xlr_mac_link_down) return; if (md != NULL) ifmr->ifm_active = md->mii_media.ifm_cur->ifm_media; ifmr->ifm_status |= IFM_ACTIVE; } static struct nlna_softc * nlna_sc_init(device_t dev, struct xlr_gmac_block_t *blk) { struct nlna_softc *sc; sc = device_get_softc(dev); memset(sc, 0, sizeof(*sc)); sc->nlna_dev = dev; sc->base = xlr_io_mmio(blk->baseaddr); sc->rfrbucket = blk->station_rfr; sc->station_id = blk->station_id; sc->na_type = blk->type; sc->mac_type = blk->mode; sc->num_ports = blk->num_ports; sc->mdio_set.port_vec = sc->mdio_sc; sc->mdio_set.vec_sz = XLR_MAX_MACS; return (sc); } /* * Do: * - Initialize common GMAC registers (index range 0x100-0x3ff). */ static void nlna_hw_init(struct nlna_softc *sc) { /* * Register message ring handler for the NA block, messages from * the GMAC will have source station id to the first bucket of the * NA FMN station, so register just that station id. */ if (register_msgring_handler(sc->station_id, sc->station_id + 1, nlge_msgring_handler, sc)) { panic("Couldn't register msgring handler\n"); } nlna_config_fifo_spill_area(sc); nlna_config_pde(sc); nlna_config_common(sc); nlna_config_parser(sc); nlna_config_classifier(sc); } /* * Enable interrupts on all the ports controlled by this NA. For now, we * only care about the MII interrupt and this has to be enabled only * on the port id0. * * This function is not in-sync with the regular way of doing things - it * executes only in the context of the last active network accelerator (and * thereby has some ugly accesses in the device tree). Though inelegant, it * is necessary to do it this way as the per-port interrupts can be * setup/enabled only after all the network accelerators have been * initialized. */ static void nlna_setup_intr(struct nlna_softc *sc) { struct nlna_softc *na_sc[XLR_MAX_NLNA]; struct nlge_port_set *pset; struct xlr_gmac_port *port_info; device_t iodi_dev; int i, j; if (!nlna_is_last_active_na(sc)) return ; /* Collect all nlna softc pointers */ memset(na_sc, 0, sizeof(*na_sc) * XLR_MAX_NLNA); iodi_dev = device_get_parent(sc->nlna_dev); nlna_get_all_softc(iodi_dev, na_sc, XLR_MAX_NLNA); /* Setup the MDIO interrupt lists. */ /* * MDIO interrupts are coarse - a single interrupt line provides * information about one of many possible ports. To figure out the * exact port on which action is to be taken, all of the ports * linked to an MDIO interrupt should be read. To enable this, * ports need to add themselves to port sets. */ for (i = 0; i < XLR_MAX_NLNA; i++) { if (na_sc[i] == NULL) continue; for (j = 0; j < na_sc[i]->num_ports; j++) { /* processing j-th port on i-th NA */ port_info = device_get_ivars( na_sc[i]->child_sc[j]->nlge_dev); pset = &na_sc[port_info->mdint_id]->mdio_set; nlna_add_to_port_set(pset, na_sc[i]->child_sc[j]); } } /* Enable interrupts */ for (i = 0; i < XLR_MAX_NLNA; i++) { if (na_sc[i] != NULL && na_sc[i]->na_type != XLR_XGMAC) { nlna_enable_intr(na_sc[i]); } } } static void nlna_add_to_port_set(struct nlge_port_set *pset, struct nlge_softc *sc) { int i; /* step past the non-NULL elements */ for (i = 0; i < pset->vec_sz && pset->port_vec[i] != NULL; i++) ; if (i < pset->vec_sz) pset->port_vec[i] = sc; else printf("warning: internal error: out-of-bounds for MDIO array"); } static void nlna_enable_intr(struct nlna_softc *sc) { int i; for (i = 0; i < sc->num_ports; i++) { if (sc->child_sc[i]->instance == 0) NLGE_WRITE(sc->child_sc[i]->base, R_INTMASK, (1 << O_INTMASK__MDInt)); } } static void nlna_disable_intr(struct nlna_softc *sc) { int i; for (i = 0; i < sc->num_ports; i++) { if (sc->child_sc[i]->instance == 0) NLGE_WRITE(sc->child_sc[i]->base, R_INTMASK, 0); } } static int nlna_is_last_active_na(struct nlna_softc *sc) { int id; id = device_get_unit(sc->nlna_dev); return (id == 2 || xlr_board_info.gmac_block[id + 1].enabled == 0); } static void nlna_submit_rx_free_desc(struct nlna_softc *sc, uint32_t n_desc) { struct msgrng_msg msg; void *ptr; uint32_t msgrng_flags; int i, n, stid, ret, code; if (n_desc > 1) { PDEBUG("Sending %d free-in descriptors to station=%d\n", n_desc, sc->rfrbucket); } stid = sc->rfrbucket; code = (sc->na_type == XLR_XGMAC) ? MSGRNG_CODE_XGMAC : MSGRNG_CODE_MAC; memset(&msg, 0, sizeof(msg)); for (i = 0; i < n_desc; i++) { ptr = get_buf(); if (!ptr) { ret = -ENOMEM; device_printf(sc->nlna_dev, "Cannot allocate mbuf\n"); break; } /* Send the free Rx desc to the MAC */ msg.msg0 = vtophys(ptr) & 0xffffffffe0ULL; n = 0; do { msgrng_flags = msgrng_access_enable(); ret = message_send(1, code, stid, &msg); msgrng_restore(msgrng_flags); KASSERT(n++ < 100000, ("Too many credit fails in rx path\n")); } while (ret != 0); } } static __inline__ void * nlna_config_spill(xlr_reg_t *base, int reg_start_0, int reg_start_1, int reg_size, int size) { void *spill; uint64_t phys_addr; uint32_t spill_size; spill_size = size; spill = contigmalloc((spill_size + XLR_CACHELINE_SIZE), M_DEVBUF, M_NOWAIT | M_ZERO, 0, 0xffffffff, XLR_CACHELINE_SIZE, 0); if (spill == NULL || ((vm_offset_t) spill & (XLR_CACHELINE_SIZE - 1))) { panic("Unable to allocate memory for spill area!\n"); } phys_addr = vtophys(spill); PDEBUG("Allocated spill %d bytes at %llx\n", size, phys_addr); NLGE_WRITE(base, reg_start_0, (phys_addr >> 5) & 0xffffffff); NLGE_WRITE(base, reg_start_1, (phys_addr >> 37) & 0x07); NLGE_WRITE(base, reg_size, spill_size); return (spill); } /* * Configure the 6 FIFO's that are used by the network accelarator to * communicate with the rest of the XLx device. 4 of the FIFO's are for * packets from NA --> cpu (called Class FIFO's) and 2 are for feeding * the NA with free descriptors. */ static void nlna_config_fifo_spill_area(struct nlna_softc *sc) { sc->frin_spill = nlna_config_spill(sc->base, R_REG_FRIN_SPILL_MEM_START_0, R_REG_FRIN_SPILL_MEM_START_1, R_REG_FRIN_SPILL_MEM_SIZE, MAX_FRIN_SPILL * sizeof(struct fr_desc)); sc->frout_spill = nlna_config_spill(sc->base, R_FROUT_SPILL_MEM_START_0, R_FROUT_SPILL_MEM_START_1, R_FROUT_SPILL_MEM_SIZE, MAX_FROUT_SPILL * sizeof(struct fr_desc)); sc->class_0_spill = nlna_config_spill(sc->base, R_CLASS0_SPILL_MEM_START_0, R_CLASS0_SPILL_MEM_START_1, R_CLASS0_SPILL_MEM_SIZE, MAX_CLASS_0_SPILL * sizeof(union rx_tx_desc)); sc->class_1_spill = nlna_config_spill(sc->base, R_CLASS1_SPILL_MEM_START_0, R_CLASS1_SPILL_MEM_START_1, R_CLASS1_SPILL_MEM_SIZE, MAX_CLASS_1_SPILL * sizeof(union rx_tx_desc)); sc->class_2_spill = nlna_config_spill(sc->base, R_CLASS2_SPILL_MEM_START_0, R_CLASS2_SPILL_MEM_START_1, R_CLASS2_SPILL_MEM_SIZE, MAX_CLASS_2_SPILL * sizeof(union rx_tx_desc)); sc->class_3_spill = nlna_config_spill(sc->base, R_CLASS3_SPILL_MEM_START_0, R_CLASS3_SPILL_MEM_START_1, R_CLASS3_SPILL_MEM_SIZE, MAX_CLASS_3_SPILL * sizeof(union rx_tx_desc)); } /* Set the CPU buckets that receive packets from the NA class FIFOs. */ static void nlna_config_pde(struct nlna_softc *sc) { uint64_t bucket_map; uint32_t cpumask; int i, cpu, bucket; cpumask = 0x1; #ifdef SMP /* * rge may be called before SMP start in a BOOTP/NFSROOT * setup. we will distribute packets to other cpus only when * the SMP is started. */ if (smp_started) cpumask = xlr_hw_thread_mask; #endif bucket_map = 0; for (i = 0; i < 32; i++) { if (cpumask & (1 << i)) { cpu = i; /* use bucket 0 and 1 on every core for NA msgs */ bucket = cpu/4 * 8; bucket_map |= (3ULL << bucket); } } NLGE_WRITE(sc->base, R_PDE_CLASS_0, (bucket_map & 0xffffffff)); NLGE_WRITE(sc->base, R_PDE_CLASS_0 + 1, ((bucket_map >> 32) & 0xffffffff)); NLGE_WRITE(sc->base, R_PDE_CLASS_1, (bucket_map & 0xffffffff)); NLGE_WRITE(sc->base, R_PDE_CLASS_1 + 1, ((bucket_map >> 32) & 0xffffffff)); NLGE_WRITE(sc->base, R_PDE_CLASS_2, (bucket_map & 0xffffffff)); NLGE_WRITE(sc->base, R_PDE_CLASS_2 + 1, ((bucket_map >> 32) & 0xffffffff)); NLGE_WRITE(sc->base, R_PDE_CLASS_3, (bucket_map & 0xffffffff)); NLGE_WRITE(sc->base, R_PDE_CLASS_3 + 1, ((bucket_map >> 32) & 0xffffffff)); } /* * Update the network accelerator packet distribution engine for SMP. * On bootup, we have just the boot hw thread handling all packets, on SMP * start, we can start distributing packets across all the cores which are up. */ static void nlna_smp_update_pde(void *dummy __unused) { device_t iodi_dev; struct nlna_softc *na_sc[XLR_MAX_NLNA]; int i; printf("Updating packet distribution for SMP\n"); iodi_dev = devclass_get_device(devclass_find("iodi"), 0); nlna_get_all_softc(iodi_dev, na_sc, XLR_MAX_NLNA); for (i = 0; i < XLR_MAX_NLNA; i++) { if (na_sc[i] == NULL) continue; nlna_disable_ports(na_sc[i]); nlna_config_pde(na_sc[i]); nlna_config_translate_table(na_sc[i]); nlna_enable_ports(na_sc[i]); } } SYSINIT(nlna_smp_update_pde, SI_SUB_SMP, SI_ORDER_ANY, nlna_smp_update_pde, NULL); static void nlna_config_translate_table(struct nlna_softc *sc) { uint32_t cpu_mask; uint32_t val; int bkts[32]; /* one bucket is assumed for each cpu */ int b1, b2, c1, c2, i, j, k; int use_bkt; if (!flow_classification) return; use_bkt = 1; if (smp_started) cpu_mask = xlr_hw_thread_mask; else return; printf("Using %s-based distribution\n", (use_bkt) ? "bucket" : "class"); j = 0; for(i = 0; i < 32; i++) { if ((1 << i) & cpu_mask){ /* for each cpu, mark the 4+threadid bucket */ bkts[j] = ((i / 4) * 8) + (i % 4); j++; } } /*configure the 128 * 9 Translation table to send to available buckets*/ k = 0; c1 = 3; c2 = 0; for(i = 0; i < 64; i++) { /* Get the next 2 pairs of (class, bucket): (c1, b1), (c2, b2). c1, c2 limited to {0, 1, 2, 3} i.e, the 4 classes defined by h/w b1, b2 limited to { bkts[i], where 0 <= i < j} i.e, the set of buckets computed in the above loop. */ c1 = (c1 + 1) & 3; c2 = (c1 + 1) & 3; b1 = bkts[k]; k = (k + 1) % j; b2 = bkts[k]; k = (k + 1) % j; PDEBUG("Translation table[%d] b1=%d b2=%d c1=%d c2=%d\n", i, b1, b2, c1, c2); val = ((c1 << 23) | (b1 << 17) | (use_bkt << 16) | (c2 << 7) | (b2 << 1) | (use_bkt << 0)); NLGE_WRITE(sc->base, R_TRANSLATETABLE + i, val); c1 = c2; } } static void nlna_config_parser(struct nlna_softc *sc) { uint32_t val; /* * Mark it as ETHERNET type. */ NLGE_WRITE(sc->base, R_L2TYPE_0, 0x01); #ifndef NLGE_HW_CHKSUM if (!flow_classification) return; #endif /* Use 7bit CRChash for flow classification with 127 as CRC polynomial*/ NLGE_WRITE(sc->base, R_PARSERCONFIGREG, ((0x7f << 8) | (1 << 1))); /* configure the parser : L2 Type is configured in the bootloader */ /* extract IP: src, dest protocol */ NLGE_WRITE(sc->base, R_L3CTABLE, (9 << 20) | (1 << 19) | (1 << 18) | (0x01 << 16) | (0x0800 << 0)); NLGE_WRITE(sc->base, R_L3CTABLE + 1, (9 << 25) | (1 << 21) | (12 << 14) | (4 << 10) | (16 << 4) | 4); #ifdef NLGE_HW_CHKSUM device_printf(sc->nlna_dev, "Enabled h/w support to compute TCP/IP" " checksum\n"); #endif /* Configure to extract SRC port and Dest port for TCP and UDP pkts */ NLGE_WRITE(sc->base, R_L4CTABLE, 6); NLGE_WRITE(sc->base, R_L4CTABLE + 2, 17); val = ((0 << 21) | (2 << 17) | (2 << 11) | (2 << 7)); NLGE_WRITE(sc->base, R_L4CTABLE + 1, val); NLGE_WRITE(sc->base, R_L4CTABLE + 3, val); } static void nlna_config_classifier(struct nlna_softc *sc) { int i; if (sc->mac_type == XLR_XGMII) { /* TBD: XGMII init sequence */ /* xgmac translation table doesn't have sane values on reset */ for (i = 0; i < 64; i++) NLGE_WRITE(sc->base, R_TRANSLATETABLE + i, 0x0); /* * use upper 7 bits of the parser extract to index the * translate table */ NLGE_WRITE(sc->base, R_PARSERCONFIGREG, 0x0); } } /* * Complete a bunch of h/w register initializations that are common for all the * ports controlled by a NA. */ static void nlna_config_common(struct nlna_softc *sc) { struct xlr_gmac_block_t *block_info; struct stn_cc *gmac_cc_config; int i; block_info = device_get_ivars(sc->nlna_dev); gmac_cc_config = block_info->credit_config; for (i = 0; i < MAX_NUM_MSGRNG_STN_CC; i++) { NLGE_WRITE(sc->base, R_CC_CPU0_0 + i, gmac_cc_config->counters[i >> 3][i & 0x07]); } NLGE_WRITE(sc->base, R_MSG_TX_THRESHOLD, 3); NLGE_WRITE(sc->base, R_DMACR0, 0xffffffff); NLGE_WRITE(sc->base, R_DMACR1, 0xffffffff); NLGE_WRITE(sc->base, R_DMACR2, 0xffffffff); NLGE_WRITE(sc->base, R_DMACR3, 0xffffffff); NLGE_WRITE(sc->base, R_FREEQCARVE, 0); nlna_media_specific_config(sc); } static void nlna_media_specific_config(struct nlna_softc *sc) { struct bucket_size *bucket_sizes; bucket_sizes = xlr_board_info.bucket_sizes; switch (sc->mac_type) { case XLR_RGMII: case XLR_SGMII: case XLR_XAUI: NLGE_WRITE(sc->base, R_GMAC_JFR0_BUCKET_SIZE, bucket_sizes->bucket[MSGRNG_STNID_GMACJFR_0]); NLGE_WRITE(sc->base, R_GMAC_RFR0_BUCKET_SIZE, bucket_sizes->bucket[MSGRNG_STNID_GMACRFR_0]); NLGE_WRITE(sc->base, R_GMAC_JFR1_BUCKET_SIZE, bucket_sizes->bucket[MSGRNG_STNID_GMACJFR_1]); NLGE_WRITE(sc->base, R_GMAC_RFR1_BUCKET_SIZE, bucket_sizes->bucket[MSGRNG_STNID_GMACRFR_1]); if (sc->mac_type == XLR_XAUI) { NLGE_WRITE(sc->base, R_TXDATAFIFO0, (224 << 16)); } break; case XLR_XGMII: NLGE_WRITE(sc->base, R_XGS_RFR_BUCKET_SIZE, bucket_sizes->bucket[sc->rfrbucket]); default: break; } } static void nlna_reset_ports(struct nlna_softc *sc, struct xlr_gmac_block_t *blk) { xlr_reg_t *addr; int i; uint32_t rx_ctrl; /* Refer Section 13.9.3 in the PRM for the reset sequence */ for (i = 0; i < sc->num_ports; i++) { addr = xlr_io_mmio(blk->gmac_port[i].base_addr); /* 1. Reset RxEnable in MAC_CONFIG */ switch (sc->mac_type) { case XLR_RGMII: case XLR_SGMII: NLGE_UPDATE(addr, R_MAC_CONFIG_1, 0, (1 << O_MAC_CONFIG_1__rxen)); break; case XLR_XAUI: case XLR_XGMII: NLGE_UPDATE(addr, R_RX_CONTROL, 0, (1 << O_RX_CONTROL__RxEnable)); break; default: printf("Error: Unsupported port_type=%d\n", sc->mac_type); } /* 1.1 Wait for RxControl.RxHalt to be set */ do { rx_ctrl = NLGE_READ(addr, R_RX_CONTROL); } while (!(rx_ctrl & 0x2)); /* 2. Set the soft reset bit in RxControl */ NLGE_UPDATE(addr, R_RX_CONTROL, (1 << O_RX_CONTROL__SoftReset), (1 << O_RX_CONTROL__SoftReset)); /* 2.1 Wait for RxControl.SoftResetDone to be set */ do { rx_ctrl = NLGE_READ(addr, R_RX_CONTROL); } while (!(rx_ctrl & 0x8)); /* 3. Clear the soft reset bit in RxControl */ NLGE_UPDATE(addr, R_RX_CONTROL, 0, (1 << O_RX_CONTROL__SoftReset)); /* Turn off tx/rx on the port. */ NLGE_UPDATE(addr, R_RX_CONTROL, 0, (1 << O_RX_CONTROL__RxEnable)); NLGE_UPDATE(addr, R_TX_CONTROL, 0, (1 << O_TX_CONTROL__TxEnable)); } } static void nlna_disable_ports(struct nlna_softc *sc) { int i; for (i = 0; i < sc->num_ports; i++) { if (sc->child_sc[i] != NULL) nlge_port_disable(sc->child_sc[i]); } } static void nlna_enable_ports(struct nlna_softc *sc) { device_t nlge_dev, *devlist; struct nlge_softc *port_sc; int i, numdevs; device_get_children(sc->nlna_dev, &devlist, &numdevs); for (i = 0; i < numdevs; i++) { nlge_dev = devlist[i]; if (nlge_dev == NULL) continue; port_sc = device_get_softc(nlge_dev); if (port_sc->nlge_if->if_drv_flags & IFF_DRV_RUNNING) nlge_port_enable(port_sc); } free(devlist, M_TEMP); } static void nlna_get_all_softc(device_t iodi_dev, struct nlna_softc **sc_vec, uint32_t vec_sz) { device_t na_dev; int i; for (i = 0; i < vec_sz; i++) { sc_vec[i] = NULL; na_dev = device_find_child(iodi_dev, "nlna", i); if (na_dev != NULL) sc_vec[i] = device_get_softc(na_dev); } } static void nlge_port_disable(struct nlge_softc *sc) { struct ifnet *ifp; xlr_reg_t *base; uint32_t rd; int id, port_type; id = sc->id; port_type = sc->port_type; base = sc->base; ifp = sc->nlge_if; NLGE_UPDATE(base, R_RX_CONTROL, 0x0, 1 << O_RX_CONTROL__RxEnable); do { rd = NLGE_READ(base, R_RX_CONTROL); } while (!(rd & (1 << O_RX_CONTROL__RxHalt))); NLGE_UPDATE(base, R_TX_CONTROL, 0, 1 << O_TX_CONTROL__TxEnable); do { rd = NLGE_READ(base, R_TX_CONTROL); } while (!(rd & (1 << O_TX_CONTROL__TxIdle))); switch (port_type) { case XLR_RGMII: case XLR_SGMII: NLGE_UPDATE(base, R_MAC_CONFIG_1, 0, ((1 << O_MAC_CONFIG_1__rxen) | (1 << O_MAC_CONFIG_1__txen))); break; case XLR_XGMII: case XLR_XAUI: NLGE_UPDATE(base, R_XGMAC_CONFIG_1, 0, ((1 << O_XGMAC_CONFIG_1__hsttfen) | (1 << O_XGMAC_CONFIG_1__hstrfen))); break; default: panic("Unknown MAC type on port %d\n", id); } if (ifp) { ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } } static void nlge_port_enable(struct nlge_softc *sc) { struct xlr_gmac_port *self; xlr_reg_t *base; base = sc->base; self = device_get_ivars(sc->nlge_dev); if (xlr_board_info.is_xls && sc->port_type == XLR_RGMII) NLGE_UPDATE(base, R_RX_CONTROL, (1 << O_RX_CONTROL__RGMII), (1 << O_RX_CONTROL__RGMII)); NLGE_UPDATE(base, R_RX_CONTROL, (1 << O_RX_CONTROL__RxEnable), (1 << O_RX_CONTROL__RxEnable)); NLGE_UPDATE(base, R_TX_CONTROL, (1 << O_TX_CONTROL__TxEnable | RGE_TX_THRESHOLD_BYTES), (1 << O_TX_CONTROL__TxEnable | 0x3fff)); switch (sc->port_type) { case XLR_RGMII: case XLR_SGMII: NLGE_UPDATE(base, R_MAC_CONFIG_1, ((1 << O_MAC_CONFIG_1__rxen) | (1 << O_MAC_CONFIG_1__txen)), ((1 << O_MAC_CONFIG_1__rxen) | (1 << O_MAC_CONFIG_1__txen))); break; case XLR_XGMII: case XLR_XAUI: NLGE_UPDATE(base, R_XGMAC_CONFIG_1, ((1 << O_XGMAC_CONFIG_1__hsttfen) | (1 << O_XGMAC_CONFIG_1__hstrfen)), ((1 << O_XGMAC_CONFIG_1__hsttfen) | (1 << O_XGMAC_CONFIG_1__hstrfen))); break; default: panic("Unknown MAC type on port %d\n", sc->id); } } static void nlge_mac_set_rx_mode(struct nlge_softc *sc) { uint32_t regval; regval = NLGE_READ(sc->base, R_MAC_FILTER_CONFIG); if (sc->if_flags & IFF_PROMISC) { regval |= (1 << O_MAC_FILTER_CONFIG__BROADCAST_EN) | (1 << O_MAC_FILTER_CONFIG__PAUSE_FRAME_EN) | (1 << O_MAC_FILTER_CONFIG__ALL_MCAST_EN) | (1 << O_MAC_FILTER_CONFIG__ALL_UCAST_EN); } else { regval &= ~((1 << O_MAC_FILTER_CONFIG__PAUSE_FRAME_EN) | (1 << O_MAC_FILTER_CONFIG__ALL_UCAST_EN)); } NLGE_WRITE(sc->base, R_MAC_FILTER_CONFIG, regval); } static void nlge_sgmii_init(struct nlge_softc *sc) { xlr_reg_t *mmio_gpio; int phy; if (sc->port_type != XLR_SGMII) return; nlge_mii_write_internal(sc->serdes_addr, 26, 0, 0x6DB0); nlge_mii_write_internal(sc->serdes_addr, 26, 1, 0xFFFF); nlge_mii_write_internal(sc->serdes_addr, 26, 2, 0xB6D0); nlge_mii_write_internal(sc->serdes_addr, 26, 3, 0x00FF); nlge_mii_write_internal(sc->serdes_addr, 26, 4, 0x0000); nlge_mii_write_internal(sc->serdes_addr, 26, 5, 0x0000); nlge_mii_write_internal(sc->serdes_addr, 26, 6, 0x0005); nlge_mii_write_internal(sc->serdes_addr, 26, 7, 0x0001); nlge_mii_write_internal(sc->serdes_addr, 26, 8, 0x0000); nlge_mii_write_internal(sc->serdes_addr, 26, 9, 0x0000); nlge_mii_write_internal(sc->serdes_addr, 26,10, 0x0000); /* program GPIO values for serdes init parameters */ DELAY(100); mmio_gpio = xlr_io_mmio(XLR_IO_GPIO_OFFSET); xlr_write_reg(mmio_gpio, 0x20, 0x7e6802); xlr_write_reg(mmio_gpio, 0x10, 0x7104); DELAY(100); /* * This kludge is needed to setup serdes (?) clock correctly on some * XLS boards */ if ((xlr_boot1_info.board_major_version == RMI_XLR_BOARD_ARIZONA_XI || xlr_boot1_info.board_major_version == RMI_XLR_BOARD_ARIZONA_XII) && xlr_boot1_info.board_minor_version == 4) { /* use 125 Mhz instead of 156.25Mhz ref clock */ DELAY(100); xlr_write_reg(mmio_gpio, 0x10, 0x7103); xlr_write_reg(mmio_gpio, 0x21, 0x7103); DELAY(100); } /* enable autoneg - more magic */ phy = sc->phy_addr % 4 + 27; nlge_mii_write_internal(sc->pcs_addr, phy, 0, 0x1000); DELAY(100000); nlge_mii_write_internal(sc->pcs_addr, phy, 0, 0x0200); DELAY(100000); } static void nlge_intr(void *arg) { struct nlge_port_set *pset; struct nlge_softc *sc; struct nlge_softc *port_sc; xlr_reg_t *base; uint32_t intreg; uint32_t intr_status; int i; sc = arg; if (sc == NULL) { printf("warning: No port registered for interrupt\n"); return; } base = sc->base; intreg = NLGE_READ(base, R_INTREG); if (intreg & (1 << O_INTREG__MDInt)) { pset = sc->mdio_pset; if (pset == NULL) { printf("warning: No ports for MDIO interrupt\n"); return; } for (i = 0; i < pset->vec_sz; i++) { port_sc = pset->port_vec[i]; if (port_sc == NULL) continue; /* Ack phy interrupt - clear on read*/ intr_status = nlge_mii_read_internal(port_sc->mii_base, port_sc->phy_addr, 26); PDEBUG("Phy_%d: int_status=0x%08x\n", port_sc->phy_addr, intr_status); if (!(intr_status & 0x8000)) { /* no interrupt for this port */ continue; } if (intr_status & 0x2410) { /* update link status for port */ nlge_gmac_config_speed(port_sc, 1); } else { printf("%s: Unsupported phy interrupt" " (0x%08x)\n", device_get_nameunit(port_sc->nlge_dev), intr_status); } } } /* Clear the NA interrupt */ xlr_write_reg(base, R_INTREG, 0xffffffff); return; } static int nlge_irq_init(struct nlge_softc *sc) { struct resource irq_res; struct nlna_softc *na_sc; struct xlr_gmac_block_t *block_info; device_t na_dev; int ret; int irq_num; na_dev = device_get_parent(sc->nlge_dev); block_info = device_get_ivars(na_dev); irq_num = block_info->baseirq + sc->instance; irq_res.__r_i = (struct resource_i *)(intptr_t) (irq_num); ret = bus_setup_intr(sc->nlge_dev, &irq_res, INTR_TYPE_NET | INTR_MPSAFE, NULL, nlge_intr, sc, NULL); if (ret) { nlge_detach(sc->nlge_dev); device_printf(sc->nlge_dev, "couldn't set up irq: error=%d\n", ret); return (ENXIO); } PDEBUG("Setup intr for dev=%s, irq=%d\n", device_get_nameunit(sc->nlge_dev), irq_num); if (sc->instance == 0) { na_sc = device_get_softc(na_dev); sc->mdio_pset = &na_sc->mdio_set; } return (0); } static void nlge_irq_fini(struct nlge_softc *sc) { } static void nlge_hw_init(struct nlge_softc *sc) { struct xlr_gmac_port *port_info; xlr_reg_t *base; base = sc->base; port_info = device_get_ivars(sc->nlge_dev); sc->tx_bucket_id = port_info->tx_bucket_id; /* each packet buffer is 1536 bytes */ NLGE_WRITE(base, R_DESC_PACK_CTRL, (1 << O_DESC_PACK_CTRL__MaxEntry) | #ifdef NLGE_HW_CHKSUM (1 << O_DESC_PACK_CTRL__PrePadEnable) | #endif (MAX_FRAME_SIZE << O_DESC_PACK_CTRL__RegularSize)); NLGE_WRITE(base, R_STATCTRL, ((1 << O_STATCTRL__Sten) | (1 << O_STATCTRL__ClrCnt))); NLGE_WRITE(base, R_L2ALLOCCTRL, 0xffffffff); NLGE_WRITE(base, R_INTMASK, 0); nlge_set_mac_addr(sc); nlge_media_specific_init(sc); } static void nlge_sc_init(struct nlge_softc *sc, device_t dev, struct xlr_gmac_port *port_info) { memset(sc, 0, sizeof(*sc)); sc->nlge_dev = dev; sc->id = device_get_unit(dev); nlge_set_port_attribs(sc, port_info); } static void nlge_media_specific_init(struct nlge_softc *sc) { struct mii_data *media; struct bucket_size *bucket_sizes; bucket_sizes = xlr_board_info.bucket_sizes; switch (sc->port_type) { case XLR_RGMII: case XLR_SGMII: case XLR_XAUI: NLGE_UPDATE(sc->base, R_DESC_PACK_CTRL, (BYTE_OFFSET << O_DESC_PACK_CTRL__ByteOffset), (W_DESC_PACK_CTRL__ByteOffset << O_DESC_PACK_CTRL__ByteOffset)); NLGE_WRITE(sc->base, R_GMAC_TX0_BUCKET_SIZE + sc->instance, bucket_sizes->bucket[sc->tx_bucket_id]); if (sc->port_type != XLR_XAUI) { nlge_gmac_config_speed(sc, 1); if (sc->mii_bus) { media = (struct mii_data *)device_get_softc( sc->mii_bus); } } break; case XLR_XGMII: NLGE_WRITE(sc->base, R_BYTEOFFSET0, 0x2); NLGE_WRITE(sc->base, R_XGMACPADCALIBRATION, 0x30); NLGE_WRITE(sc->base, R_XGS_TX0_BUCKET_SIZE, bucket_sizes->bucket[sc->tx_bucket_id]); break; default: break; } } /* * Read the MAC address from the XLR boot registers. All port addresses * are identical except for the lowest octet. */ static void nlge_read_mac_addr(struct nlge_softc *sc) { int i, j; for (i = 0, j = 40; i < ETHER_ADDR_LEN && j >= 0; i++, j-= 8) sc->dev_addr[i] = (xlr_boot1_info.mac_addr >> j) & 0xff; sc->dev_addr[i - 1] += sc->id; /* last octet is port-specific */ } /* * Write the MAC address to the XLR MAC port. Also, set the address * masks and MAC filter configuration. */ static void nlge_set_mac_addr(struct nlge_softc *sc) { NLGE_WRITE(sc->base, R_MAC_ADDR0, ((sc->dev_addr[5] << 24) | (sc->dev_addr[4] << 16) | (sc->dev_addr[3] << 8) | (sc->dev_addr[2]))); NLGE_WRITE(sc->base, R_MAC_ADDR0 + 1, ((sc->dev_addr[1] << 24) | (sc-> dev_addr[0] << 16))); NLGE_WRITE(sc->base, R_MAC_ADDR_MASK2, 0xffffffff); NLGE_WRITE(sc->base, R_MAC_ADDR_MASK2 + 1, 0xffffffff); NLGE_WRITE(sc->base, R_MAC_ADDR_MASK3, 0xffffffff); NLGE_WRITE(sc->base, R_MAC_ADDR_MASK3 + 1, 0xffffffff); NLGE_WRITE(sc->base, R_MAC_FILTER_CONFIG, (1 << O_MAC_FILTER_CONFIG__BROADCAST_EN) | (1 << O_MAC_FILTER_CONFIG__ALL_MCAST_EN) | (1 << O_MAC_FILTER_CONFIG__MAC_ADDR0_VALID)); if (sc->port_type == XLR_RGMII || sc->port_type == XLR_SGMII) { NLGE_UPDATE(sc->base, R_IPG_IFG, MAC_B2B_IPG, 0x7f); } } static int nlge_if_init(struct nlge_softc *sc) { struct ifnet *ifp; device_t dev; int error; error = 0; dev = sc->nlge_dev; NLGE_LOCK_INIT(sc, device_get_nameunit(dev)); ifp = sc->nlge_if = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_capabilities = 0; ifp->if_capenable = ifp->if_capabilities; ifp->if_ioctl = nlge_ioctl; ifp->if_init = nlge_init; ifp->if_hwassist = 0; ifp->if_snd.ifq_drv_maxlen = RGE_TX_Q_SIZE; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); ifmedia_init(&sc->nlge_mii.mii_media, 0, nlge_mediachange, nlge_mediastatus); ifmedia_add(&sc->nlge_mii.mii_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->nlge_mii.mii_media, IFM_ETHER | IFM_AUTO); sc->nlge_mii.mii_media.ifm_media = sc->nlge_mii.mii_media.ifm_cur->ifm_media; nlge_read_mac_addr(sc); ether_ifattach(ifp, sc->dev_addr); /* override if_transmit : per ifnet(9), do it after if_attach */ ifp->if_transmit = nlge_tx; fail: return (error); } static void nlge_mii_init(device_t dev, struct nlge_softc *sc) { int error; if (sc->port_type != XLR_XAUI && sc->port_type != XLR_XGMII) { NLGE_WRITE(sc->mii_base, R_MII_MGMT_CONFIG, 0x07); } error = mii_attach(dev, &sc->mii_bus, sc->nlge_if, nlge_mediachange, nlge_mediastatus, BMSR_DEFCAPMASK, sc->phy_addr, MII_OFFSET_ANY, 0); if (error) { device_printf(dev, "attaching PHYs failed\n"); sc->mii_bus = NULL; } if (sc->mii_bus != NULL) { /* * Enable all MDIO interrupts in the phy. RX_ER bit seems to get * set about every 1 sec in GigE mode, ignore it for now... */ nlge_mii_write_internal(sc->mii_base, sc->phy_addr, 25, 0xfffffffe); } } /* * Read a PHY register. * * Input parameters: * mii_base - Base address of MII * phyaddr - PHY's address * regidx = index of register to read * * Return value: * value read, or 0 if an error occurred. */ static int nlge_mii_read_internal(xlr_reg_t *mii_base, int phyaddr, int regidx) { int i, val; /* setup the phy reg to be used */ NLGE_WRITE(mii_base, R_MII_MGMT_ADDRESS, (phyaddr << 8) | (regidx << 0)); /* Issue the read command */ NLGE_WRITE(mii_base, R_MII_MGMT_COMMAND, (1 << O_MII_MGMT_COMMAND__rstat)); /* poll for the read cycle to complete */ for (i = 0; i < PHY_STATUS_RETRIES; i++) { if (NLGE_READ(mii_base, R_MII_MGMT_INDICATORS) == 0) break; } /* clear the read cycle */ NLGE_WRITE(mii_base, R_MII_MGMT_COMMAND, 0); if (i == PHY_STATUS_RETRIES) { return (0xffffffff); } val = NLGE_READ(mii_base, R_MII_MGMT_STATUS); return (val); } /* * Write a value to a PHY register. * * Input parameters: * mii_base - Base address of MII * phyaddr - PHY to use * regidx - register within the PHY * regval - data to write to register * * Return value: * nothing */ static void nlge_mii_write_internal(xlr_reg_t *mii_base, int phyaddr, int regidx, int regval) { int i; NLGE_WRITE(mii_base, R_MII_MGMT_ADDRESS, (phyaddr << 8) | (regidx << 0)); /* Write the data which starts the write cycle */ NLGE_WRITE(mii_base, R_MII_MGMT_WRITE_DATA, regval); /* poll for the write cycle to complete */ for (i = 0; i < PHY_STATUS_RETRIES; i++) { if (NLGE_READ(mii_base, R_MII_MGMT_INDICATORS) == 0) break; } } /* * Function to optimize the use of p2d descriptors for the given PDU. * As it is on the fast-path (called during packet transmission), it * described in more detail than the initialization functions. * * Input: mbuf chain (MC), pointer to fmn message * Input constraints: None * Output: FMN message to transmit the data in MC * Return values: 0 - success * 1 - MC cannot be handled (see Limitations below) * 2 - MC cannot be handled presently (maybe worth re-trying) * Other output: Number of entries filled in the FMN message * * Output structure/constraints: * 1. Max 3 p2d's + 1 zero-len (ZL) p2d with virtual address of MC. * 2. 3 p2d's + 1 p2p with max 14 p2d's (ZL p2d not required in this case). * 3. Each p2d points to physically contiguous chunk of data (subject to * entire MC requiring max 17 p2d's). * Limitations: * 1. MC's that require more than 17 p2d's are not handled. * Benefits: MC's that require <= 3 p2d's avoid the overhead of allocating * the p2p structure. Small packets (which typically give low * performance) are expected to have a small MC that takes * advantage of this. */ static int prepare_fmn_message(struct nlge_softc *sc, struct msgrng_msg *fmn_msg, uint32_t *n_entries, struct mbuf *mbuf_chain, uint64_t fb_stn_id, struct nlge_tx_desc **tx_desc) { struct mbuf *m; struct nlge_tx_desc *p2p; uint64_t *cur_p2d; uint64_t fbpaddr; vm_offset_t buf; vm_paddr_t paddr; int msg_sz, p2p_sz, len, frag_sz; /* Num entries per FMN msg is 4 for XLR/XLS */ const int FMN_SZ = sizeof(*fmn_msg) / sizeof(uint64_t); msg_sz = p2p_sz = 0; p2p = NULL; cur_p2d = &fmn_msg->msg0; for (m = mbuf_chain; m != NULL; m = m->m_next) { buf = (vm_offset_t) m->m_data; len = m->m_len; while (len) { if (msg_sz == (FMN_SZ - 1)) { p2p = uma_zalloc(nl_tx_desc_zone, M_NOWAIT); if (p2p == NULL) { return (2); } /* * Save the virtual address in the descriptor, * it makes freeing easy. */ p2p->frag[XLR_MAX_TX_FRAGS] = (uint64_t)(vm_offset_t)p2p; cur_p2d = &p2p->frag[0]; } else if (msg_sz == (FMN_SZ - 2 + XLR_MAX_TX_FRAGS)) { uma_zfree(nl_tx_desc_zone, p2p); return (1); } paddr = vtophys(buf); frag_sz = PAGE_SIZE - (buf & PAGE_MASK); if (len < frag_sz) frag_sz = len; *cur_p2d++ = (127ULL << 54) | ((uint64_t)frag_sz << 40) | paddr; msg_sz++; if (p2p != NULL) p2p_sz++; len -= frag_sz; buf += frag_sz; } } if (msg_sz == 0) { printf("Zero-length mbuf chain ??\n"); *n_entries = msg_sz ; return (0); } /* set eop in most-recent p2d */ cur_p2d[-1] |= (1ULL << 63); #ifdef __mips_n64 /* * On n64, we cannot store our mbuf pointer(64 bit) in the freeback * message (40bit available), so we put the mbuf in m_nextpkt and * use the physical addr of that in freeback message. */ mbuf_chain->m_nextpkt = mbuf_chain; fbpaddr = vtophys(&mbuf_chain->m_nextpkt); #else /* Careful, don't sign extend when going to 64bit */ fbpaddr = (uint64_t)(uintptr_t)mbuf_chain; #endif *cur_p2d = (1ULL << 63) | ((uint64_t)fb_stn_id << 54) | fbpaddr; *tx_desc = p2p; if (p2p != NULL) { paddr = vtophys(p2p); p2p_sz++; fmn_msg->msg3 = (1ULL << 62) | ((uint64_t)fb_stn_id << 54) | ((uint64_t)(p2p_sz * 8) << 40) | paddr; *n_entries = FMN_SZ; } else { *n_entries = msg_sz + 1; } return (0); } static int send_fmn_msg_tx(struct nlge_softc *sc, struct msgrng_msg *msg, uint32_t n_entries) { uint32_t msgrng_flags; int ret; int i = 0; do { msgrng_flags = msgrng_access_enable(); ret = message_send(n_entries, MSGRNG_CODE_MAC, sc->tx_bucket_id, msg); msgrng_restore(msgrng_flags); if (ret == 0) return (0); i++; } while (i < 100000); device_printf(sc->nlge_dev, "Too many credit fails in tx path\n"); return (1); } static void release_tx_desc(vm_paddr_t paddr) { struct nlge_tx_desc *tx_desc; uint32_t sr; uint64_t vaddr; paddr += (XLR_MAX_TX_FRAGS * sizeof(uint64_t)); sr = xlr_enable_kx(); vaddr = xlr_paddr_ld(paddr); xlr_restore_kx(sr); tx_desc = (struct nlge_tx_desc*)(intptr_t)vaddr; uma_zfree(nl_tx_desc_zone, tx_desc); } static void * get_buf(void) { struct mbuf *m_new; uint64_t *md; #ifdef INVARIANTS vm_paddr_t temp1, temp2; #endif if ((m_new = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR)) == NULL) return (NULL); m_new->m_len = m_new->m_pkthdr.len = MCLBYTES; m_adj(m_new, XLR_CACHELINE_SIZE - ((uintptr_t)m_new->m_data & 0x1f)); md = (uint64_t *)m_new->m_data; md[0] = (intptr_t)m_new; /* Back Ptr */ md[1] = 0xf00bad; m_adj(m_new, XLR_CACHELINE_SIZE); #ifdef INVARIANTS temp1 = vtophys((vm_offset_t) m_new->m_data); temp2 = vtophys((vm_offset_t) m_new->m_data + 1536); if ((temp1 + 1536) != temp2) panic("ALLOCED BUFFER IS NOT CONTIGUOUS\n"); #endif return ((void *)m_new->m_data); } static int nlge_gmac_config_speed(struct nlge_softc *sc, int quick) { struct mii_data *md; xlr_reg_t *mmio; int bmsr, n_tries, max_tries; int core_ctl[] = { 0x2, 0x1, 0x0, 0x1 }; int sgmii_speed[] = { SGMII_SPEED_10, SGMII_SPEED_100, SGMII_SPEED_1000, SGMII_SPEED_100 }; /* default to 100Mbps */ char *speed_str[] = { "10", "100", "1000", "unknown, defaulting to 100" }; int link_state = LINK_STATE_DOWN; if (sc->port_type == XLR_XAUI || sc->port_type == XLR_XGMII) return 0; md = NULL; mmio = sc->base; if (sc->mii_base != NULL) { max_tries = (quick == 1) ? 100 : 4000; bmsr = 0; for (n_tries = 0; n_tries < max_tries; n_tries++) { bmsr = nlge_mii_read_internal(sc->mii_base, sc->phy_addr, MII_BMSR); if ((bmsr & BMSR_ACOMP) && (bmsr & BMSR_LINK)) break; /* Auto-negotiation is complete and link is up */ DELAY(1000); } bmsr &= BMSR_LINK; sc->link = (bmsr == 0) ? xlr_mac_link_down : xlr_mac_link_up; sc->speed = nlge_mii_read_internal(sc->mii_base, sc->phy_addr, 28); sc->speed = (sc->speed >> 3) & 0x03; if (sc->link == xlr_mac_link_up) { link_state = LINK_STATE_UP; nlge_sgmii_init(sc); } if (sc->mii_bus) md = (struct mii_data *)device_get_softc(sc->mii_bus); } if (sc->port_type != XLR_RGMII) NLGE_WRITE(mmio, R_INTERFACE_CONTROL, sgmii_speed[sc->speed]); if (sc->speed == xlr_mac_speed_10 || sc->speed == xlr_mac_speed_100 || sc->speed == xlr_mac_speed_rsvd) { NLGE_WRITE(mmio, R_MAC_CONFIG_2, 0x7117); } else if (sc->speed == xlr_mac_speed_1000) { NLGE_WRITE(mmio, R_MAC_CONFIG_2, 0x7217); if (md != NULL) { ifmedia_set(&md->mii_media, IFM_MAKEWORD(IFM_ETHER, IFM_1000_T, IFM_FDX, md->mii_instance)); } } NLGE_WRITE(mmio, R_CORECONTROL, core_ctl[sc->speed]); if_link_state_change(sc->nlge_if, link_state); printf("%s: [%sMbps]\n", device_get_nameunit(sc->nlge_dev), speed_str[sc->speed]); return (0); } /* * This function is called for each port that was added to the device tree * and it initializes the following port attributes: * - type * - base (base address to access port-specific registers) * - mii_base * - phy_addr */ static void nlge_set_port_attribs(struct nlge_softc *sc, struct xlr_gmac_port *port_info) { sc->instance = port_info->instance % 4; /* TBD: will not work for SPI-4 */ sc->port_type = port_info->type; sc->base = xlr_io_mmio(port_info->base_addr); sc->mii_base = xlr_io_mmio(port_info->mii_addr); if (port_info->pcs_addr != 0) sc->pcs_addr = xlr_io_mmio(port_info->pcs_addr); if (port_info->serdes_addr != 0) sc->serdes_addr = xlr_io_mmio(port_info->serdes_addr); sc->phy_addr = port_info->phy_addr; PDEBUG("Port%d: base=%p, mii_base=%p, phy_addr=%d\n", sc->id, sc->base, sc->mii_base, sc->phy_addr); } /* ------------------------------------------------------------------------ */ /* Debug dump functions */ #ifdef DEBUG static void dump_reg(xlr_reg_t *base, uint32_t offset, char *name) { int val; val = NLGE_READ(base, offset); printf("%-30s: 0x%8x 0x%8x\n", name, offset, val); } #define STRINGIFY(x) #x static void dump_na_registers(xlr_reg_t *base_addr, int port_id) { PDEBUG("Register dump for NA (of port=%d)\n", port_id); dump_reg(base_addr, R_PARSERCONFIGREG, STRINGIFY(R_PARSERCONFIGREG)); PDEBUG("Tx bucket sizes\n"); dump_reg(base_addr, R_GMAC_JFR0_BUCKET_SIZE, STRINGIFY(R_GMAC_JFR0_BUCKET_SIZE)); dump_reg(base_addr, R_GMAC_RFR0_BUCKET_SIZE, STRINGIFY(R_GMAC_RFR0_BUCKET_SIZE)); dump_reg(base_addr, R_GMAC_TX0_BUCKET_SIZE, STRINGIFY(R_GMAC_TX0_BUCKET_SIZE)); dump_reg(base_addr, R_GMAC_TX1_BUCKET_SIZE, STRINGIFY(R_GMAC_TX1_BUCKET_SIZE)); dump_reg(base_addr, R_GMAC_TX2_BUCKET_SIZE, STRINGIFY(R_GMAC_TX2_BUCKET_SIZE)); dump_reg(base_addr, R_GMAC_TX3_BUCKET_SIZE, STRINGIFY(R_GMAC_TX3_BUCKET_SIZE)); dump_reg(base_addr, R_GMAC_JFR1_BUCKET_SIZE, STRINGIFY(R_GMAC_JFR1_BUCKET_SIZE)); dump_reg(base_addr, R_GMAC_RFR1_BUCKET_SIZE, STRINGIFY(R_GMAC_RFR1_BUCKET_SIZE)); dump_reg(base_addr, R_TXDATAFIFO0, STRINGIFY(R_TXDATAFIFO0)); dump_reg(base_addr, R_TXDATAFIFO1, STRINGIFY(R_TXDATAFIFO1)); } static void dump_gmac_registers(struct nlge_softc *sc) { xlr_reg_t *base_addr = sc->base; int port_id = sc->instance; PDEBUG("Register dump for port=%d\n", port_id); if (sc->port_type == XLR_RGMII || sc->port_type == XLR_SGMII) { dump_reg(base_addr, R_MAC_CONFIG_1, STRINGIFY(R_MAC_CONFIG_1)); dump_reg(base_addr, R_MAC_CONFIG_2, STRINGIFY(R_MAC_CONFIG_2)); dump_reg(base_addr, R_IPG_IFG, STRINGIFY(R_IPG_IFG)); dump_reg(base_addr, R_HALF_DUPLEX, STRINGIFY(R_HALF_DUPLEX)); dump_reg(base_addr, R_MAXIMUM_FRAME_LENGTH, STRINGIFY(R_MAXIMUM_FRAME_LENGTH)); dump_reg(base_addr, R_TEST, STRINGIFY(R_TEST)); dump_reg(base_addr, R_MII_MGMT_CONFIG, STRINGIFY(R_MII_MGMT_CONFIG)); dump_reg(base_addr, R_MII_MGMT_COMMAND, STRINGIFY(R_MII_MGMT_COMMAND)); dump_reg(base_addr, R_MII_MGMT_ADDRESS, STRINGIFY(R_MII_MGMT_ADDRESS)); dump_reg(base_addr, R_MII_MGMT_WRITE_DATA, STRINGIFY(R_MII_MGMT_WRITE_DATA)); dump_reg(base_addr, R_MII_MGMT_STATUS, STRINGIFY(R_MII_MGMT_STATUS)); dump_reg(base_addr, R_MII_MGMT_INDICATORS, STRINGIFY(R_MII_MGMT_INDICATORS)); dump_reg(base_addr, R_INTERFACE_CONTROL, STRINGIFY(R_INTERFACE_CONTROL)); dump_reg(base_addr, R_INTERFACE_STATUS, STRINGIFY(R_INTERFACE_STATUS)); } else if (sc->port_type == XLR_XAUI || sc->port_type == XLR_XGMII) { dump_reg(base_addr, R_XGMAC_CONFIG_0, STRINGIFY(R_XGMAC_CONFIG_0)); dump_reg(base_addr, R_XGMAC_CONFIG_1, STRINGIFY(R_XGMAC_CONFIG_1)); dump_reg(base_addr, R_XGMAC_CONFIG_2, STRINGIFY(R_XGMAC_CONFIG_2)); dump_reg(base_addr, R_XGMAC_CONFIG_3, STRINGIFY(R_XGMAC_CONFIG_3)); dump_reg(base_addr, R_XGMAC_STATION_ADDRESS_LS, STRINGIFY(R_XGMAC_STATION_ADDRESS_LS)); dump_reg(base_addr, R_XGMAC_STATION_ADDRESS_MS, STRINGIFY(R_XGMAC_STATION_ADDRESS_MS)); dump_reg(base_addr, R_XGMAC_MAX_FRAME_LEN, STRINGIFY(R_XGMAC_MAX_FRAME_LEN)); dump_reg(base_addr, R_XGMAC_REV_LEVEL, STRINGIFY(R_XGMAC_REV_LEVEL)); dump_reg(base_addr, R_XGMAC_MIIM_COMMAND, STRINGIFY(R_XGMAC_MIIM_COMMAND)); dump_reg(base_addr, R_XGMAC_MIIM_FILED, STRINGIFY(R_XGMAC_MIIM_FILED)); dump_reg(base_addr, R_XGMAC_MIIM_CONFIG, STRINGIFY(R_XGMAC_MIIM_CONFIG)); dump_reg(base_addr, R_XGMAC_MIIM_LINK_FAIL_VECTOR, STRINGIFY(R_XGMAC_MIIM_LINK_FAIL_VECTOR)); dump_reg(base_addr, R_XGMAC_MIIM_INDICATOR, STRINGIFY(R_XGMAC_MIIM_INDICATOR)); } dump_reg(base_addr, R_MAC_ADDR0, STRINGIFY(R_MAC_ADDR0)); dump_reg(base_addr, R_MAC_ADDR0 + 1, STRINGIFY(R_MAC_ADDR0+1)); dump_reg(base_addr, R_MAC_ADDR1, STRINGIFY(R_MAC_ADDR1)); dump_reg(base_addr, R_MAC_ADDR2, STRINGIFY(R_MAC_ADDR2)); dump_reg(base_addr, R_MAC_ADDR3, STRINGIFY(R_MAC_ADDR3)); dump_reg(base_addr, R_MAC_ADDR_MASK2, STRINGIFY(R_MAC_ADDR_MASK2)); dump_reg(base_addr, R_MAC_ADDR_MASK3, STRINGIFY(R_MAC_ADDR_MASK3)); dump_reg(base_addr, R_MAC_FILTER_CONFIG, STRINGIFY(R_MAC_FILTER_CONFIG)); dump_reg(base_addr, R_TX_CONTROL, STRINGIFY(R_TX_CONTROL)); dump_reg(base_addr, R_RX_CONTROL, STRINGIFY(R_RX_CONTROL)); dump_reg(base_addr, R_DESC_PACK_CTRL, STRINGIFY(R_DESC_PACK_CTRL)); dump_reg(base_addr, R_STATCTRL, STRINGIFY(R_STATCTRL)); dump_reg(base_addr, R_L2ALLOCCTRL, STRINGIFY(R_L2ALLOCCTRL)); dump_reg(base_addr, R_INTMASK, STRINGIFY(R_INTMASK)); dump_reg(base_addr, R_INTREG, STRINGIFY(R_INTREG)); dump_reg(base_addr, R_TXRETRY, STRINGIFY(R_TXRETRY)); dump_reg(base_addr, R_CORECONTROL, STRINGIFY(R_CORECONTROL)); dump_reg(base_addr, R_BYTEOFFSET0, STRINGIFY(R_BYTEOFFSET0)); dump_reg(base_addr, R_BYTEOFFSET1, STRINGIFY(R_BYTEOFFSET1)); dump_reg(base_addr, R_L2TYPE_0, STRINGIFY(R_L2TYPE_0)); dump_na_registers(base_addr, port_id); } static void dump_fmn_cpu_credits_for_gmac(struct xlr_board_info *board, int gmac_id) { struct stn_cc *cc; int gmac_bucket_ids[] = { 97, 98, 99, 100, 101, 103 }; int j, k, r, c; int n_gmac_buckets; n_gmac_buckets = sizeof (gmac_bucket_ids) / sizeof (gmac_bucket_ids[0]); for (j = 0; j < 8; j++) { // for each cpu cc = board->credit_configs[j]; printf("Credits for Station CPU_%d ---> GMAC buckets (tx path)\n", j); for (k = 0; k < n_gmac_buckets; k++) { r = gmac_bucket_ids[k] / 8; c = gmac_bucket_ids[k] % 8; printf (" --> gmac%d_bucket_%-3d: credits=%d\n", gmac_id, gmac_bucket_ids[k], cc->counters[r][c]); } } } static void dump_fmn_gmac_credits(struct xlr_board_info *board, int gmac_id) { struct stn_cc *cc; int j, k; cc = board->gmac_block[gmac_id].credit_config; printf("Credits for Station: GMAC_%d ---> CPU buckets (rx path)\n", gmac_id); for (j = 0; j < 8; j++) { // for each cpu printf(" ---> cpu_%d\n", j); for (k = 0; k < 8; k++) { // for each bucket in cpu printf(" ---> bucket_%d: credits=%d\n", j * 8 + k, cc->counters[j][k]); } } } static void dump_board_info(struct xlr_board_info *board) { struct xlr_gmac_block_t *gm; int i, k; printf("cpu=%x ", xlr_revision()); printf("board_version: major=%llx, minor=%llx\n", xlr_boot1_info.board_major_version, xlr_boot1_info.board_minor_version); printf("is_xls=%d, nr_cpus=%d, usb=%s, cfi=%s, ata=%s\npci_irq=%d," "gmac_ports=%d\n", board->is_xls, board->nr_cpus, board->usb ? "Yes" : "No", board->cfi ? "Yes": "No", board->ata ? "Yes" : "No", board->pci_irq, board->gmacports); printf("FMN: Core-station bucket sizes\n"); for (i = 0; i < 128; i++) { if (i && ((i % 16) == 0)) printf("\n"); printf ("b[%d] = %d ", i, board->bucket_sizes->bucket[i]); } printf("\n"); for (i = 0; i < 3; i++) { gm = &board->gmac_block[i]; printf("RNA_%d: type=%d, enabled=%s, mode=%d, station_id=%d," "station_txbase=%d, station_rfr=%d ", i, gm->type, gm->enabled ? "Yes" : "No", gm->mode, gm->station_id, gm->station_txbase, gm->station_rfr); printf("n_ports=%d, baseaddr=%p, baseirq=%d, baseinst=%d\n", gm->num_ports, (xlr_reg_t *)gm->baseaddr, gm->baseirq, gm->baseinst); } for (k = 0; k < 3; k++) { // for each NA dump_fmn_cpu_credits_for_gmac(board, k); dump_fmn_gmac_credits(board, k); } } static void dump_mac_stats(struct nlge_softc *sc) { xlr_reg_t *addr; uint32_t pkts_tx, pkts_rx; addr = sc->base; pkts_rx = NLGE_READ(sc->base, R_RPKT); pkts_tx = NLGE_READ(sc->base, R_TPKT); printf("[nlge_%d mac stats]: pkts_tx=%u, pkts_rx=%u\n", sc->id, pkts_tx, pkts_rx); if (pkts_rx > 0) { uint32_t r; /* dump all rx counters. we need this because pkts_rx includes bad packets. */ for (r = R_RFCS; r <= R_ROVR; r++) printf("[nlge_%d mac stats]: [0x%x]=%u\n", sc->id, r, NLGE_READ(sc->base, r)); } if (pkts_tx > 0) { uint32_t r; /* dump all tx counters. might be useful for debugging. */ for (r = R_TMCA; r <= R_TFRG; r++) { if ((r == (R_TNCL + 1)) || (r == (R_TNCL + 2))) continue; printf("[nlge_%d mac stats]: [0x%x]=%u\n", sc->id, r, NLGE_READ(sc->base, r)); } } } static void dump_mii_regs(struct nlge_softc *sc) { uint32_t mii_regs[] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e}; int i, n_regs; if (sc->mii_base == NULL || sc->mii_bus == NULL) return; n_regs = sizeof (mii_regs) / sizeof (mii_regs[0]); for (i = 0; i < n_regs; i++) { printf("[mii_0x%x] = %x\n", mii_regs[i], nlge_mii_read_internal(sc->mii_base, sc->phy_addr, mii_regs[i])); } } static void dump_ifmedia(struct ifmedia *ifm) { printf("ifm_mask=%08x, ifm_media=%08x, cur=%p\n", ifm->ifm_mask, ifm->ifm_media, ifm->ifm_cur); if (ifm->ifm_cur != NULL) { printf("Cur attribs: ifmedia_entry.ifm_media=%08x," " ifmedia_entry.ifm_data=%08x\n", ifm->ifm_cur->ifm_media, ifm->ifm_cur->ifm_data); } } static void dump_mii_data(struct mii_data *mii) { dump_ifmedia(&mii->mii_media); printf("ifp=%p, mii_instance=%d, mii_media_status=%08x," " mii_media_active=%08x\n", mii->mii_ifp, mii->mii_instance, mii->mii_media_status, mii->mii_media_active); } static void dump_pcs_regs(struct nlge_softc *sc, int phy) { int i, val; printf("PCS regs from %p for phy=%d\n", sc->pcs_addr, phy); for (i = 0; i < 18; i++) { if (i == 2 || i == 3 || (i >= 9 && i <= 14)) continue; val = nlge_mii_read_internal(sc->pcs_addr, phy, i); printf("PHY:%d pcs[0x%x] is 0x%x\n", phy, i, val); } } #endif Index: head/sys/net/if.h =================================================================== --- head/sys/net/if.h (revision 263101) +++ head/sys/net/if.h (revision 263102) @@ -1,522 +1,535 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_H_ #define _NET_IF_H_ #include #if __BSD_VISIBLE /* * does not depend on on most other systems. This * helps userland compatibility. (struct timeval ifi_lastchange) * The same holds for . (struct sockaddr ifru_addr) */ #ifndef _KERNEL #include #include #endif #endif /* * Length of interface external name, including terminating '\0'. * Note: this is the same size as a generic device's external name. */ #define IF_NAMESIZE 16 #if __BSD_VISIBLE #define IFNAMSIZ IF_NAMESIZE #define IF_MAXUNIT 0x7fff /* historical value */ #endif #if __BSD_VISIBLE /* * Structure used to query names of interface cloners. */ struct if_clonereq { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; /* * Structure describing information about an interface * which may be of interest to management entities. */ struct if_data { /* generic interface information */ - u_char ifi_type; /* ethernet, tokenring, etc */ - u_char ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ - u_char ifi_addrlen; /* media address length */ - u_char ifi_hdrlen; /* media header length */ - u_char ifi_link_state; /* current link state */ - u_char ifi_vhid; /* carp vhid */ - u_char ifi_baudrate_pf; /* baudrate power factor */ - u_char ifi_datalen; /* length of this data struct */ - u_long ifi_mtu; /* maximum transmission unit */ - u_long ifi_metric; /* routing metric (external only) */ - u_long ifi_baudrate; /* linespeed */ + uint8_t ifi_type; /* ethernet, tokenring, etc */ + uint8_t ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ + uint8_t ifi_addrlen; /* media address length */ + uint8_t ifi_hdrlen; /* media header length */ + uint8_t ifi_link_state; /* current link state */ + uint8_t ifi_vhid; /* carp vhid */ + uint16_t ifi_datalen; /* length of this data struct */ + uint32_t ifi_mtu; /* maximum transmission unit */ + uint32_t ifi_metric; /* routing metric (external only) */ + uint64_t ifi_baudrate; /* linespeed */ /* volatile statistics */ - u_long ifi_ipackets; /* packets received on interface */ - u_long ifi_ierrors; /* input errors on interface */ - u_long ifi_opackets; /* packets sent on interface */ - u_long ifi_oerrors; /* output errors on interface */ - u_long ifi_collisions; /* collisions on csma interfaces */ - u_long ifi_ibytes; /* total number of octets received */ - u_long ifi_obytes; /* total number of octets sent */ - u_long ifi_imcasts; /* packets received via multicast */ - u_long ifi_omcasts; /* packets sent via multicast */ - u_long ifi_iqdrops; /* dropped on input, this interface */ - u_long ifi_noproto; /* destined for unsupported protocol */ - uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */ - time_t ifi_epoch; /* uptime at attach or stat reset */ - struct timeval ifi_lastchange; /* time of last administrative change */ + uint64_t ifi_ipackets; /* packets received on interface */ + uint64_t ifi_ierrors; /* input errors on interface */ + uint64_t ifi_opackets; /* packets sent on interface */ + uint64_t ifi_oerrors; /* output errors on interface */ + uint64_t ifi_collisions; /* collisions on csma interfaces */ + uint64_t ifi_ibytes; /* total number of octets received */ + uint64_t ifi_obytes; /* total number of octets sent */ + uint64_t ifi_imcasts; /* packets received via multicast */ + uint64_t ifi_omcasts; /* packets sent via multicast */ + uint64_t ifi_iqdrops; /* dropped on input */ + uint64_t ifi_oqdrops; /* dropped on output */ + uint64_t ifi_noproto; /* destined for unsupported protocol */ + uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */ + + /* Unions are here to make sizes MI. */ + union { /* uptime at attach or stat reset */ + time_t tt; + uint64_t ph; + } __ifi_epoch; +#define ifi_epoch __ifi_epoch.tt + union { /* time of last administrative change */ + struct timeval tv; + struct { + uint64_t ph1; + uint64_t ph2; + } ph; + } __ifi_lastchange; +#define ifi_lastchange __ifi_lastchange.tv }; /*- * Interface flags are of two types: network stack owned flags, and driver * owned flags. Historically, these values were stored in the same ifnet * flags field, but with the advent of fine-grained locking, they have been * broken out such that the network stack is responsible for synchronizing * the stack-owned fields, and the device driver the device-owned fields. * Both halves can perform lockless reads of the other half's field, subject * to accepting the involved races. * * Both sets of flags come from the same number space, and should not be * permitted to conflict, as they are exposed to user space via a single * field. * * The following symbols identify read and write requirements for fields: * * (i) if_flags field set by device driver before attach, read-only there * after. * (n) if_flags field written only by the network stack, read by either the * stack or driver. * (d) if_drv_flags field written only by the device driver, read by either * the stack or driver. */ #define IFF_UP 0x1 /* (n) interface is up */ #define IFF_BROADCAST 0x2 /* (i) broadcast address valid */ #define IFF_DEBUG 0x4 /* (n) turn on debugging */ #define IFF_LOOPBACK 0x8 /* (i) is a loopback net */ #define IFF_POINTOPOINT 0x10 /* (i) is a point-to-point link */ /* 0x20 was IFF_SMART */ #define IFF_DRV_RUNNING 0x40 /* (d) resources allocated */ #define IFF_NOARP 0x80 /* (n) no address resolution protocol */ #define IFF_PROMISC 0x100 /* (n) receive all packets */ #define IFF_ALLMULTI 0x200 /* (n) receive all multicast packets */ #define IFF_DRV_OACTIVE 0x400 /* (d) tx hardware queue is full */ #define IFF_SIMPLEX 0x800 /* (i) can't hear own transmissions */ #define IFF_LINK0 0x1000 /* per link layer defined bit */ #define IFF_LINK1 0x2000 /* per link layer defined bit */ #define IFF_LINK2 0x4000 /* per link layer defined bit */ #define IFF_ALTPHYS IFF_LINK2 /* use alternate physical connection */ #define IFF_MULTICAST 0x8000 /* (i) supports multicast */ #define IFF_CANTCONFIG 0x10000 /* (i) unconfigurable using ioctl(2) */ #define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */ #define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */ #define IFF_STATICARP 0x80000 /* (n) static ARP */ #define IFF_DYING 0x200000 /* (n) interface is winding down */ #define IFF_RENAMING 0x400000 /* (n) interface is being renamed */ /* * Old names for driver flags so that user space tools can continue to use * the old (portable) names. */ #ifndef _KERNEL #define IFF_RUNNING IFF_DRV_RUNNING #define IFF_OACTIVE IFF_DRV_OACTIVE #endif /* flags set internally only: */ #define IFF_CANTCHANGE \ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\ IFF_DYING|IFF_CANTCONFIG) /* * Values for if_link_state. */ #define LINK_STATE_UNKNOWN 0 /* link invalid/unknown */ #define LINK_STATE_DOWN 1 /* link is down */ #define LINK_STATE_UP 2 /* link is up */ /* * Some convenience macros used for setting ifi_baudrate. * XXX 1000 vs. 1024? --thorpej@netbsd.org */ #define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */ #define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */ #define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */ /* * Capabilities that interfaces can advertise. * * struct ifnet.if_capabilities * contains the optional features & capabilities a particular interface * supports (not only the driver but also the detected hw revision). * Capabilities are defined by IFCAP_* below. * struct ifnet.if_capenable * contains the enabled (either by default or through ifconfig) optional * features & capabilities on this interface. * Capabilities are defined by IFCAP_* below. * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above * contains the enabled optional feature & capabilites that can be used * individually per packet and are specified in the mbuf pkthdr.csum_flags * field. IFCAP_* and CSUM_* do not match one to one and CSUM_* may be * more detailed or differenciated than IFCAP_*. * Hwassist features are defined CSUM_* in sys/mbuf.h * * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE. * This is not strictly necessary because the common code never * changes capabilities, and it is left to the individual driver * to do the right thing. However, having the filter here * avoids replication of the same code in all individual drivers. */ #define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ #define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ #define IFCAP_NETCONS 0x00004 /* can be a network console */ #define IFCAP_VLAN_MTU 0x00008 /* VLAN-compatible MTU */ #define IFCAP_VLAN_HWTAGGING 0x00010 /* hardware VLAN tag support */ #define IFCAP_JUMBO_MTU 0x00020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x00040 /* driver supports polling */ #define IFCAP_VLAN_HWCSUM 0x00080 /* can do IFCAP_HWCSUM on VLANs */ #define IFCAP_TSO4 0x00100 /* can do TCP Segmentation Offload */ #define IFCAP_TSO6 0x00200 /* can do TCP6 Segmentation Offload */ #define IFCAP_LRO 0x00400 /* can do Large Receive Offload */ #define IFCAP_WOL_UCAST 0x00800 /* wake on any unicast frame */ #define IFCAP_WOL_MCAST 0x01000 /* wake on any multicast frame */ #define IFCAP_WOL_MAGIC 0x02000 /* wake on any Magic Packet */ #define IFCAP_TOE4 0x04000 /* interface can offload TCP */ #define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */ #define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */ #define IFCAP_POLLING_NOCOUNT 0x20000 /* polling ticks cannot be fragmented */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ #define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ #define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ #define IFCAP_HWSTATS 0x800000 /* manages counters internally */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) #define IFCAP_CANTCHANGE (IFCAP_NETMAP) #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* * Message format for use in obtaining information about interfaces * from getkerninfo and the routing socket * For the new, extensible interface see struct if_msghdrl below. */ struct if_msghdr { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifm_data_off or within ifm_data. Both the if_msghdr and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct if_msghdrl given a pointer to struct if_msghdrl. */ #define IF_MSGHDRL_IFM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifm_data_off) #define IF_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifm_len) struct if_msghdrl { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ u_short _ifm_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifm_len; /* length of if_msghdrl incl. if_data */ u_short ifm_data_off; /* offset of if_data from beginning */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses * from getkerninfo and the routing socket * For the new, extensible interface see struct ifa_msghdrl below. */ struct ifa_msghdr { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ int ifam_metric; /* value of ifa_metric */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifam_metric or within ifam_data. Both the ifa_msghdrl and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct ifa_msghdrl given a pointer to struct ifa_msghdrl. */ #define IFA_MSGHDRL_IFAM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifam_data_off) #define IFA_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifam_len) struct ifa_msghdrl { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ u_short _ifam_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifam_len; /* length of ifa_msghdrl incl. if_data */ u_short ifam_data_off; /* offset of if_data from beginning */ int ifam_metric; /* value of ifa_metric */ struct if_data ifam_data;/* statistics and other data about if or * address */ }; /* * Message format for use in obtaining information about multicast addresses * from the routing socket */ struct ifma_msghdr { u_short ifmam_msglen; /* to skip over non-understood messages */ u_char ifmam_version; /* future binary compatibility */ u_char ifmam_type; /* message type */ int ifmam_addrs; /* like rtm_addrs */ int ifmam_flags; /* value of ifa_flags */ u_short ifmam_index; /* index for associated ifp */ }; /* * Message format announcing the arrival or departure of a network interface. */ struct if_announcemsghdr { u_short ifan_msglen; /* to skip over non-understood messages */ u_char ifan_version; /* future binary compatibility */ u_char ifan_type; /* message type */ u_short ifan_index; /* index for associated ifp */ char ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_short ifan_what; /* what type of announcement */ }; #define IFAN_ARRIVAL 0 /* interface arrival */ #define IFAN_DEPARTURE 1 /* interface departure */ /* * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests */ struct ifreq_buffer { size_t length; void *buffer; }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; caddr_t ifru_data; int ifru_cap[2]; u_int ifru_fib; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ #define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ #define ifr_buffer ifr_ifru.ifru_buffer /* user supplied buffer with its length */ #define ifr_flags ifr_ifru.ifru_flags[0] /* flags (low 16 bits) */ #define ifr_flagshigh ifr_ifru.ifru_flags[1] /* flags (high 16 bits) */ #define ifr_jid ifr_ifru.ifru_jid /* jail/vnet */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ #define ifr_phys ifr_ifru.ifru_phys /* physical wire */ #define ifr_media ifr_ifru.ifru_media /* physical media */ #define ifr_data ifr_ifru.ifru_data /* for use by interface */ #define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ #define ifr_fib ifr_ifru.ifru_fib /* interface fib */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ ((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \ (sizeof(struct ifreq) - sizeof(struct sockaddr) + \ (ifr).ifr_addr.sa_len) : sizeof(struct ifreq)) struct ifaliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; int ifra_vhid; }; /* 9.x compat */ struct oifaliasreq { char ifra_name[IFNAMSIZ]; struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; }; struct ifmediareq { char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ int ifm_current; /* current media options */ int ifm_mask; /* don't care mask */ int ifm_status; /* media status */ int ifm_active; /* active options */ int ifm_count; /* # entries in ifm_ulist array */ int *ifm_ulist; /* media words */ }; struct ifdrv { char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ unsigned long ifd_cmd; size_t ifd_len; void *ifd_data; }; /* * Structure used to retrieve aux status data from interfaces. * Kernel suppliers to this interface should respect the formatting * needed by ifconfig(8): each line starts with a TAB and ends with * a newline. The canonical example to copy and paste is in if_tun.c. */ #define IFSTATMAX 800 /* 10 lines of text */ struct ifstat { char ifs_name[IFNAMSIZ]; /* if name, e.g. "en0" */ char ascii[IFSTATMAX + 1]; }; /* * Structure used in SIOCGIFCONF request. * Used to retrieve interface configuration * for machine (useful for programs which * must know all networks accessible). */ struct ifconf { int ifc_len; /* size of associated buffer */ union { caddr_t ifcu_buf; struct ifreq *ifcu_req; } ifc_ifcu; #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; /* * interface groups */ #define IFG_ALL "all" /* group contains all interfaces */ /* XXX: will we implement this? */ #define IFG_EGRESS "egress" /* if(s) default route(s) point to */ struct ifg_req { union { char ifgrqu_group[IFNAMSIZ]; char ifgrqu_member[IFNAMSIZ]; } ifgrq_ifgrqu; #define ifgrq_group ifgrq_ifgrqu.ifgrqu_group #define ifgrq_member ifgrq_ifgrqu.ifgrqu_member }; /* * Used to lookup groups for an interface */ struct ifgroupreq { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; struct ifg_req *ifgru_groups; } ifgr_ifgru; #define ifgr_group ifgr_ifgru.ifgru_group #define ifgr_groups ifgr_ifgru.ifgru_groups }; #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IFADDR); MALLOC_DECLARE(M_IFMADDR); #endif #endif #ifndef _KERNEL struct if_nameindex { unsigned int if_index; /* 1, 2, ... */ char *if_name; /* null terminated name: "le0", ... */ }; __BEGIN_DECLS void if_freenameindex(struct if_nameindex *); char *if_indextoname(unsigned int, char *); struct if_nameindex *if_nameindex(void); unsigned int if_nametoindex(const char *); __END_DECLS #endif #endif /* !_NET_IF_H_ */ Index: head/sys/net/if_bridge.c =================================================================== --- head/sys/net/if_bridge.c (revision 263101) +++ head/sys/net/if_bridge.c (revision 263102) @@ -1,3515 +1,3515 @@ /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */ /* * Copyright 2001 Wasabi Systems, Inc. * All rights reserved. * * Written by Jason R. Thorpe for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp */ /* * Network interface bridge support. * * TODO: * * - Currently only supports Ethernet-like interfaces (Ethernet, * 802.11, VLANs on Ethernet, etc.) Figure out a nice way * to bridge other types of interfaces (FDDI-FDDI, and maybe * consider heterogenous bridges). */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include /* for net/if.h */ #include #include /* string functions */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for struct arpcom */ #include #include #include #include #ifdef INET6 #include #include #include #endif #if defined(INET) || defined(INET6) #include #endif #include #include /* for struct arpcom */ #include #include #include #include #include /* * Size of the route hash table. Must be a power of two. */ #ifndef BRIDGE_RTHASH_SIZE #define BRIDGE_RTHASH_SIZE 1024 #endif #define BRIDGE_RTHASH_MASK (BRIDGE_RTHASH_SIZE - 1) /* * Default maximum number of addresses to cache. */ #ifndef BRIDGE_RTABLE_MAX #define BRIDGE_RTABLE_MAX 2000 #endif /* * Timeout (in seconds) for entries learned dynamically. */ #ifndef BRIDGE_RTABLE_TIMEOUT #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */ #endif /* * Number of seconds between walks of the route list. */ #ifndef BRIDGE_RTABLE_PRUNE_PERIOD #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60) #endif /* * List of capabilities to possibly mask on the member interface. */ #define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM) /* * List of capabilities to strip */ #define BRIDGE_IFCAPS_STRIP IFCAP_LRO /* * Bridge interface list entry. */ struct bridge_iflist { LIST_ENTRY(bridge_iflist) bif_next; struct ifnet *bif_ifp; /* member if */ struct bstp_port bif_stp; /* STP state */ uint32_t bif_flags; /* member if flags */ int bif_savedcaps; /* saved capabilities */ uint32_t bif_addrmax; /* max # of addresses */ uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ }; /* * Bridge route node. */ struct bridge_rtnode { LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */ LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */ struct bridge_iflist *brt_dst; /* destination if */ unsigned long brt_expire; /* expiration time */ uint8_t brt_flags; /* address flags */ uint8_t brt_addr[ETHER_ADDR_LEN]; uint16_t brt_vlan; /* vlan id */ }; #define brt_ifp brt_dst->bif_ifp /* * Software state for each bridge. */ struct bridge_softc { struct ifnet *sc_ifp; /* make this an interface */ LIST_ENTRY(bridge_softc) sc_list; struct mtx sc_mtx; struct cv sc_cv; uint32_t sc_brtmax; /* max # of addresses */ uint32_t sc_brtcnt; /* cur. # of addresses */ uint32_t sc_brttimeout; /* rt timeout in seconds */ struct callout sc_brcallout; /* bridge callout */ uint32_t sc_iflist_ref; /* refcount for sc_iflist */ uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */ LIST_HEAD(, bridge_iflist) sc_iflist; /* member interface list */ LIST_HEAD(, bridge_rtnode) *sc_rthash; /* our forwarding table */ LIST_HEAD(, bridge_rtnode) sc_rtlist; /* list version of above */ uint32_t sc_rthash_key; /* key for hash */ LIST_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */ struct bstp_state sc_stp; /* STP state */ uint32_t sc_brtexceeded; /* # of cache drops */ struct ifnet *sc_ifaddr; /* member mac copied from */ u_char sc_defaddr[6]; /* Default MAC address */ }; static struct mtx bridge_list_mtx; eventhandler_tag bridge_detach_cookie = NULL; int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; uma_zone_t bridge_rtnode_zone; static int bridge_clone_create(struct if_clone *, int, caddr_t); static void bridge_clone_destroy(struct ifnet *); static int bridge_ioctl(struct ifnet *, u_long, caddr_t); static void bridge_mutecaps(struct bridge_softc *); static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *, int); static void bridge_ifdetach(void *arg __unused, struct ifnet *); static void bridge_init(void *); static void bridge_dummynet(struct mbuf *, struct ifnet *); static void bridge_stop(struct ifnet *, int); static int bridge_transmit(struct ifnet *, struct mbuf *); static void bridge_qflush(struct ifnet *); static struct mbuf *bridge_input(struct ifnet *, struct mbuf *); static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static int bridge_enqueue(struct bridge_softc *, struct ifnet *, struct mbuf *); static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int); static void bridge_forward(struct bridge_softc *, struct bridge_iflist *, struct mbuf *m); static void bridge_timer(void *); static void bridge_broadcast(struct bridge_softc *, struct ifnet *, struct mbuf *, int); static void bridge_span(struct bridge_softc *, struct mbuf *); static int bridge_rtupdate(struct bridge_softc *, const uint8_t *, uint16_t, struct bridge_iflist *, int, uint8_t); static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *, uint16_t); static void bridge_rttrim(struct bridge_softc *); static void bridge_rtage(struct bridge_softc *); static void bridge_rtflush(struct bridge_softc *, int); static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *, uint16_t); static void bridge_rtable_init(struct bridge_softc *); static void bridge_rtable_fini(struct bridge_softc *); static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *); static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *, const uint8_t *, uint16_t); static int bridge_rtnode_insert(struct bridge_softc *, struct bridge_rtnode *); static void bridge_rtnode_destroy(struct bridge_softc *, struct bridge_rtnode *); static void bridge_rtable_expire(struct ifnet *, int); static void bridge_state_change(struct ifnet *, int); static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *, const char *name); static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *, struct ifnet *ifp); static void bridge_delete_member(struct bridge_softc *, struct bridge_iflist *, int); static void bridge_delete_span(struct bridge_softc *, struct bridge_iflist *); static int bridge_ioctl_add(struct bridge_softc *, void *); static int bridge_ioctl_del(struct bridge_softc *, void *); static int bridge_ioctl_gifflags(struct bridge_softc *, void *); static int bridge_ioctl_sifflags(struct bridge_softc *, void *); static int bridge_ioctl_scache(struct bridge_softc *, void *); static int bridge_ioctl_gcache(struct bridge_softc *, void *); static int bridge_ioctl_gifs(struct bridge_softc *, void *); static int bridge_ioctl_rts(struct bridge_softc *, void *); static int bridge_ioctl_saddr(struct bridge_softc *, void *); static int bridge_ioctl_sto(struct bridge_softc *, void *); static int bridge_ioctl_gto(struct bridge_softc *, void *); static int bridge_ioctl_daddr(struct bridge_softc *, void *); static int bridge_ioctl_flush(struct bridge_softc *, void *); static int bridge_ioctl_gpri(struct bridge_softc *, void *); static int bridge_ioctl_spri(struct bridge_softc *, void *); static int bridge_ioctl_ght(struct bridge_softc *, void *); static int bridge_ioctl_sht(struct bridge_softc *, void *); static int bridge_ioctl_gfd(struct bridge_softc *, void *); static int bridge_ioctl_sfd(struct bridge_softc *, void *); static int bridge_ioctl_gma(struct bridge_softc *, void *); static int bridge_ioctl_sma(struct bridge_softc *, void *); static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); static int bridge_ioctl_delspan(struct bridge_softc *, void *); static int bridge_ioctl_gbparam(struct bridge_softc *, void *); static int bridge_ioctl_grte(struct bridge_softc *, void *); static int bridge_ioctl_gifsstp(struct bridge_softc *, void *); static int bridge_ioctl_sproto(struct bridge_softc *, void *); static int bridge_ioctl_stxhc(struct bridge_softc *, void *); static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *, int); static int bridge_ip_checkbasic(struct mbuf **mp); #ifdef INET6 static int bridge_ip6_checkbasic(struct mbuf **mp); #endif /* INET6 */ static int bridge_fragment(struct ifnet *, struct mbuf *, struct ether_header *, int, struct llc *); static void bridge_linkstate(struct ifnet *ifp); static void bridge_linkcheck(struct bridge_softc *sc); extern void (*bridge_linkstate_p)(struct ifnet *ifp); /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */ #define VLANTAGOF(_m) \ (_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : 1 static struct bstp_cb_ops bridge_ops = { .bcb_state = bridge_state_change, .bcb_rtage = bridge_rtable_expire }; SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge"); static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */ static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */ static int pfil_member = 1; /* run pfil hooks on the member interface */ static int pfil_ipfw = 0; /* layer2 filter with ipfw */ static int pfil_ipfw_arp = 0; /* layer2 filter with ipfw */ static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for locally destined packets */ static int log_stp = 0; /* log STP state changes */ static int bridge_inherit_mac = 0; /* share MAC with first bridge member */ TUNABLE_INT("net.link.bridge.pfil_onlyip", &pfil_onlyip); SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW, &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled"); TUNABLE_INT("net.link.bridge.ipfw_arp", &pfil_ipfw_arp); SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW, &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2"); TUNABLE_INT("net.link.bridge.pfil_bridge", &pfil_bridge); SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW, &pfil_bridge, 0, "Packet filter on the bridge interface"); TUNABLE_INT("net.link.bridge.pfil_member", &pfil_member); SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW, &pfil_member, 0, "Packet filter on the member interface"); TUNABLE_INT("net.link.bridge.pfil_local_phys", &pfil_local_phys); SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW, &pfil_local_phys, 0, "Packet filter on the physical interface for locally destined packets"); TUNABLE_INT("net.link.bridge.log_stp", &log_stp); SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW, &log_stp, 0, "Log STP state changes"); TUNABLE_INT("net.link.bridge.inherit_mac", &bridge_inherit_mac); SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RW, &bridge_inherit_mac, 0, "Inherit MAC address from the first bridge member"); static VNET_DEFINE(int, allow_llz_overlap) = 0; #define V_allow_llz_overlap VNET(allow_llz_overlap) SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW, &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope " "zones of a bridge interface and the member interfaces"); struct bridge_control { int (*bc_func)(struct bridge_softc *, void *); int bc_argsize; int bc_flags; }; #define BC_F_COPYIN 0x01 /* copy arguments in */ #define BC_F_COPYOUT 0x02 /* copy arguments out */ #define BC_F_SUSER 0x04 /* do super-user check */ const struct bridge_control bridge_control_table[] = { { bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_addspan, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_delspan, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gbparam, sizeof(struct ifbropreq), BC_F_COPYOUT }, { bridge_ioctl_grte, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_gifsstp, sizeof(struct ifbpstpconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sproto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_stxhc, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, }; const int bridge_control_table_size = sizeof(bridge_control_table) / sizeof(bridge_control_table[0]); LIST_HEAD(, bridge_softc) bridge_list; static struct if_clone *bridge_cloner; static const char bridge_name[] = "bridge"; static int bridge_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: mtx_init(&bridge_list_mtx, "if_bridge list", NULL, MTX_DEF); bridge_cloner = if_clone_simple(bridge_name, bridge_clone_create, bridge_clone_destroy, 0); bridge_rtnode_zone = uma_zcreate("bridge_rtnode", sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); LIST_INIT(&bridge_list); bridge_input_p = bridge_input; bridge_output_p = bridge_output; bridge_dn_p = bridge_dummynet; bridge_linkstate_p = bridge_linkstate; bridge_detach_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, bridge_ifdetach, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, bridge_detach_cookie); if_clone_detach(bridge_cloner); uma_zdestroy(bridge_rtnode_zone); bridge_input_p = NULL; bridge_output_p = NULL; bridge_dn_p = NULL; bridge_linkstate_p = NULL; mtx_destroy(&bridge_list_mtx); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t bridge_mod = { "if_bridge", bridge_modevent, 0 }; DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1); /* * handler for net.link.bridge.ipfw */ static int sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS) { int enable = pfil_ipfw; int error; error = sysctl_handle_int(oidp, &enable, 0, req); enable = (enable) ? 1 : 0; if (enable != pfil_ipfw) { pfil_ipfw = enable; /* * Disable pfil so that ipfw doesnt run twice, if the user * really wants both then they can re-enable pfil_bridge and/or * pfil_member. Also allow non-ip packets as ipfw can filter by * layer2 type. */ if (pfil_ipfw) { pfil_onlyip = 0; pfil_bridge = 0; pfil_member = 0; } } return (error); } SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW, &pfil_ipfw, 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW"); /* * bridge_clone_create: * * Create a new bridge instance. */ static int bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct bridge_softc *sc, *sc2; struct ifnet *bifp, *ifp; int fb, retry; unsigned long hostid; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { free(sc, M_DEVBUF); return (ENOSPC); } BRIDGE_LOCK_INIT(sc); sc->sc_brtmax = BRIDGE_RTABLE_MAX; sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT; /* Initialize our routing table. */ bridge_rtable_init(sc); callout_init_mtx(&sc->sc_brcallout, &sc->sc_mtx, 0); LIST_INIT(&sc->sc_iflist); LIST_INIT(&sc->sc_spanlist); ifp->if_softc = sc; if_initname(ifp, bridge_name, unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = bridge_ioctl; ifp->if_transmit = bridge_transmit; ifp->if_qflush = bridge_qflush; ifp->if_init = bridge_init; ifp->if_type = IFT_BRIDGE; /* * Generate an ethernet address with a locally administered address. * * Since we are using random ethernet addresses for the bridge, it is * possible that we might have address collisions, so make sure that * this hardware address isn't already in use on another bridge. * The first try uses the hostid and falls back to arc4rand(). */ fb = 0; getcredhostid(curthread->td_ucred, &hostid); do { if (fb || hostid == 0) { arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1); sc->sc_defaddr[0] &= ~1;/* clear multicast bit */ sc->sc_defaddr[0] |= 2; /* set the LAA bit */ } else { sc->sc_defaddr[0] = 0x2; sc->sc_defaddr[1] = (hostid >> 24) & 0xff; sc->sc_defaddr[2] = (hostid >> 16) & 0xff; sc->sc_defaddr[3] = (hostid >> 8 ) & 0xff; sc->sc_defaddr[4] = hostid & 0xff; sc->sc_defaddr[5] = ifp->if_dunit & 0xff; } fb = 1; retry = 0; mtx_lock(&bridge_list_mtx); LIST_FOREACH(sc2, &bridge_list, sc_list) { bifp = sc2->sc_ifp; if (memcmp(sc->sc_defaddr, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) { retry = 1; break; } } mtx_unlock(&bridge_list_mtx); } while (retry == 1); bstp_attach(&sc->sc_stp, &bridge_ops); ether_ifattach(ifp, sc->sc_defaddr); /* Now undo some of the damage... */ ifp->if_baudrate = 0; ifp->if_type = IFT_BRIDGE; mtx_lock(&bridge_list_mtx); LIST_INSERT_HEAD(&bridge_list, sc, sc_list); mtx_unlock(&bridge_list_mtx); return (0); } /* * bridge_clone_destroy: * * Destroy a bridge instance. */ static void bridge_clone_destroy(struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_softc; struct bridge_iflist *bif; BRIDGE_LOCK(sc); bridge_stop(ifp, 1); ifp->if_flags &= ~IFF_UP; while ((bif = LIST_FIRST(&sc->sc_iflist)) != NULL) bridge_delete_member(sc, bif, 0); while ((bif = LIST_FIRST(&sc->sc_spanlist)) != NULL) { bridge_delete_span(sc, bif); } BRIDGE_UNLOCK(sc); callout_drain(&sc->sc_brcallout); mtx_lock(&bridge_list_mtx); LIST_REMOVE(sc, sc_list); mtx_unlock(&bridge_list_mtx); bstp_detach(&sc->sc_stp); ether_ifdetach(ifp); if_free(ifp); /* Tear down the routing table. */ bridge_rtable_fini(sc); BRIDGE_LOCK_DESTROY(sc); free(sc, M_DEVBUF); } /* * bridge_ioctl: * * Handle a control request from the operator. */ static int bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct bridge_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct bridge_iflist *bif; struct thread *td = curthread; union { struct ifbreq ifbreq; struct ifbifconf ifbifconf; struct ifbareq ifbareq; struct ifbaconf ifbaconf; struct ifbrparam ifbrparam; struct ifbropreq ifbropreq; } args; struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; int error = 0; switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCGDRVSPEC: case SIOCSDRVSPEC: if (ifd->ifd_cmd >= bridge_control_table_size) { error = EINVAL; break; } bc = &bridge_control_table[ifd->ifd_cmd]; if (cmd == SIOCGDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) == 0) { error = EINVAL; break; } else if (cmd == SIOCSDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) != 0) { error = EINVAL; break; } if (bc->bc_flags & BC_F_SUSER) { error = priv_check(td, PRIV_NET_BRIDGE); if (error) break; } if (ifd->ifd_len != bc->bc_argsize || ifd->ifd_len > sizeof(args)) { error = EINVAL; break; } bzero(&args, sizeof(args)); if (bc->bc_flags & BC_F_COPYIN) { error = copyin(ifd->ifd_data, &args, ifd->ifd_len); if (error) break; } BRIDGE_LOCK(sc); error = (*bc->bc_func)(sc, &args); BRIDGE_UNLOCK(sc); if (error) break; if (bc->bc_flags & BC_F_COPYOUT) error = copyout(&args, ifd->ifd_data, ifd->ifd_len); break; case SIOCSIFFLAGS: if (!(ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked down and it is running, * then stop and disable it. */ BRIDGE_LOCK(sc); bridge_stop(ifp, 1); BRIDGE_UNLOCK(sc); } else if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked up and it is stopped, then * start it. */ (*ifp->if_init)(sc); } break; case SIOCSIFMTU: if (ifr->ifr_mtu < 576) { error = EINVAL; break; } if (LIST_EMPTY(&sc->sc_iflist)) { sc->sc_ifp->if_mtu = ifr->ifr_mtu; break; } BRIDGE_LOCK(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) { - log(LOG_NOTICE, "%s: invalid MTU: %lu(%s)" + log(LOG_NOTICE, "%s: invalid MTU: %u(%s)" " != %d\n", sc->sc_ifp->if_xname, bif->bif_ifp->if_mtu, bif->bif_ifp->if_xname, ifr->ifr_mtu); error = EINVAL; break; } } if (!error) sc->sc_ifp->if_mtu = ifr->ifr_mtu; BRIDGE_UNLOCK(sc); break; default: /* * drop the lock as ether_ioctl() will call bridge_start() and * cause the lock to be recursed. */ error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * bridge_mutecaps: * * Clear or restore unwanted capabilities on the member interface */ static void bridge_mutecaps(struct bridge_softc *sc) { struct bridge_iflist *bif; int enabled, mask; /* Initial bitmask of capabilities to test */ mask = BRIDGE_IFCAPS_MASK; LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { /* Every member must support it or its disabled */ mask &= bif->bif_savedcaps; } LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { enabled = bif->bif_ifp->if_capenable; enabled &= ~BRIDGE_IFCAPS_STRIP; /* strip off mask bits and enable them again if allowed */ enabled &= ~BRIDGE_IFCAPS_MASK; enabled |= mask; bridge_set_ifcap(sc, bif, enabled); } } static void bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set) { struct ifnet *ifp = bif->bif_ifp; struct ifreq ifr; int error; bzero(&ifr, sizeof(ifr)); ifr.ifr_reqcap = set; if (ifp->if_capenable != set) { error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr); if (error) if_printf(sc->sc_ifp, "error setting interface capabilities on %s\n", ifp->if_xname); } } /* * bridge_lookup_member: * * Lookup a bridge member interface. */ static struct bridge_iflist * bridge_lookup_member(struct bridge_softc *sc, const char *name) { struct bridge_iflist *bif; struct ifnet *ifp; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { ifp = bif->bif_ifp; if (strcmp(ifp->if_xname, name) == 0) return (bif); } return (NULL); } /* * bridge_lookup_member_if: * * Lookup a bridge member interface by ifnet*. */ static struct bridge_iflist * bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp) { struct bridge_iflist *bif; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp == member_ifp) return (bif); } return (NULL); } /* * bridge_delete_member: * * Delete the specified member interface. */ static void bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, int gone) { struct ifnet *ifs = bif->bif_ifp; struct ifnet *fif = NULL; BRIDGE_LOCK_ASSERT(sc); if (bif->bif_flags & IFBIF_STP) bstp_disable(&bif->bif_stp); ifs->if_bridge = NULL; BRIDGE_XLOCK(sc); LIST_REMOVE(bif, bif_next); BRIDGE_XDROP(sc); /* * If removing the interface that gave the bridge its mac address, set * the mac address of the bridge to the address of the next member, or * to its default address if no members are left. */ if (bridge_inherit_mac && sc->sc_ifaddr == ifs) { if (LIST_EMPTY(&sc->sc_iflist)) { bcopy(sc->sc_defaddr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = NULL; } else { fif = LIST_FIRST(&sc->sc_iflist)->bif_ifp; bcopy(IF_LLADDR(fif), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = fif; } EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } bridge_linkcheck(sc); bridge_mutecaps(sc); /* recalcuate now this interface is removed */ bridge_rtdelete(sc, ifs, IFBF_FLUSHALL); KASSERT(bif->bif_addrcnt == 0, ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt)); BRIDGE_UNLOCK(sc); if (!gone) { switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: /* * Take the interface out of promiscuous mode. */ (void) ifpromisc(ifs, 0); break; case IFT_GIF: break; default: #ifdef DIAGNOSTIC panic("bridge_delete_member: impossible"); #endif break; } /* reneable any interface capabilities */ bridge_set_ifcap(sc, bif, bif->bif_savedcaps); } bstp_destroy(&bif->bif_stp); /* prepare to free */ BRIDGE_LOCK(sc); free(bif, M_DEVBUF); } /* * bridge_delete_span: * * Delete the specified span interface. */ static void bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif) { BRIDGE_LOCK_ASSERT(sc); KASSERT(bif->bif_ifp->if_bridge == NULL, ("%s: not a span interface", __func__)); LIST_REMOVE(bif, bif_next); free(bif, M_DEVBUF); } static int bridge_ioctl_add(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif = NULL; struct ifnet *ifs; int error = 0; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (ENOENT); if (ifs->if_ioctl == NULL) /* must be supported */ return (EINVAL); /* If it's in the span list, it can't be a member. */ LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) return (EBUSY); if (ifs->if_bridge == sc) return (EEXIST); if (ifs->if_bridge != NULL) return (EBUSY); switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: case IFT_GIF: /* permitted interface types */ break; default: return (EINVAL); } #ifdef INET6 /* * Two valid inet6 addresses with link-local scope must not be * on the parent interface and the member interfaces at the * same time. This restriction is needed to prevent violation * of link-local scope zone. Attempts to add a member * interface which has inet6 addresses when the parent has * inet6 triggers removal of all inet6 addresses on the member * interface. */ /* Check if the parent interface has a link-local scope addr. */ if (V_allow_llz_overlap == 0 && in6ifa_llaonifp(sc->sc_ifp) != NULL) { /* * If any, remove all inet6 addresses from the member * interfaces. */ BRIDGE_XLOCK(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (in6ifa_llaonifp(bif->bif_ifp)) { BRIDGE_UNLOCK(sc); in6_ifdetach(bif->bif_ifp); BRIDGE_LOCK(sc); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", bif->bif_ifp->if_xname); } } BRIDGE_XDROP(sc); if (in6ifa_llaonifp(ifs)) { BRIDGE_UNLOCK(sc); in6_ifdetach(ifs); BRIDGE_LOCK(sc); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", ifs->if_xname); } } #endif /* Allow the first Ethernet member to define the MTU */ if (LIST_EMPTY(&sc->sc_iflist)) sc->sc_ifp->if_mtu = ifs->if_mtu; else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { - if_printf(sc->sc_ifp, "invalid MTU: %lu(%s) != %lu\n", + if_printf(sc->sc_ifp, "invalid MTU: %u(%s) != %u\n", ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu); return (EINVAL); } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_ifp = ifs; bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; bif->bif_savedcaps = ifs->if_capenable; /* * Assign the interface's MAC address to the bridge if it's the first * member and the MAC address of the bridge has not been changed from * the default randomly generated one. */ if (bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) && !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) { bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = ifs; EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } ifs->if_bridge = sc; bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp); /* * XXX: XLOCK HERE!?! * * NOTE: insert_***HEAD*** should be safe for the traversals. */ LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next); /* Set interface capabilities to the intersection set of all members */ bridge_mutecaps(sc); bridge_linkcheck(sc); /* Place the interface into promiscuous mode */ switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: BRIDGE_UNLOCK(sc); error = ifpromisc(ifs, 1); BRIDGE_LOCK(sc); break; } if (error) { bridge_delete_member(sc, bif, 0); free(bif, M_DEVBUF); } return (error); } static int bridge_ioctl_del(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bridge_delete_member(sc, bif, 0); return (0); } static int bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct bstp_port *bp; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bp = &bif->bif_stp; req->ifbr_ifsflags = bif->bif_flags; req->ifbr_state = bp->bp_state; req->ifbr_priority = bp->bp_priority; req->ifbr_path_cost = bp->bp_path_cost; req->ifbr_portno = bif->bif_ifp->if_index & 0xfff; req->ifbr_proto = bp->bp_protover; req->ifbr_role = bp->bp_role; req->ifbr_stpflags = bp->bp_flags; req->ifbr_addrcnt = bif->bif_addrcnt; req->ifbr_addrmax = bif->bif_addrmax; req->ifbr_addrexceeded = bif->bif_addrexceeded; /* Copy STP state options as flags */ if (bp->bp_operedge) req->ifbr_ifsflags |= IFBIF_BSTP_EDGE; if (bp->bp_flags & BSTP_PORT_AUTOEDGE) req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE; if (bp->bp_ptp_link) req->ifbr_ifsflags |= IFBIF_BSTP_PTP; if (bp->bp_flags & BSTP_PORT_AUTOPTP) req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP; if (bp->bp_flags & BSTP_PORT_ADMEDGE) req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE; if (bp->bp_flags & BSTP_PORT_ADMCOST) req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST; return (0); } static int bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct bstp_port *bp; int error; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bp = &bif->bif_stp; if (req->ifbr_ifsflags & IFBIF_SPAN) /* SPAN is readonly */ return (EINVAL); if (req->ifbr_ifsflags & IFBIF_STP) { if ((bif->bif_flags & IFBIF_STP) == 0) { error = bstp_enable(&bif->bif_stp); if (error) return (error); } } else { if ((bif->bif_flags & IFBIF_STP) != 0) bstp_disable(&bif->bif_stp); } /* Pass on STP flags */ bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0); bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0); bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0); bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0); /* Save the bits relating to the bridge */ bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK; return (0); } static int bridge_ioctl_scache(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; sc->sc_brtmax = param->ifbrp_csize; bridge_rttrim(sc); return (0); } static int bridge_ioctl_gcache(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_csize = sc->sc_brtmax; return (0); } static int bridge_ioctl_gifs(struct bridge_softc *sc, void *arg) { struct ifbifconf *bifc = arg; struct bridge_iflist *bif; struct ifbreq breq; char *buf, *outbuf; int count, buflen, len, error = 0; count = 0; LIST_FOREACH(bif, &sc->sc_iflist, bif_next) count++; LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) count++; buflen = sizeof(breq) * count; if (bifc->ifbic_len == 0) { bifc->ifbic_len = buflen; return (0); } BRIDGE_UNLOCK(sc); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); BRIDGE_LOCK(sc); count = 0; buf = outbuf; len = min(bifc->ifbic_len, buflen); bzero(&breq, sizeof(breq)); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (len < sizeof(breq)) break; strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname, sizeof(breq.ifbr_ifsname)); /* Fill in the ifbreq structure */ error = bridge_ioctl_gifflags(sc, &breq); if (error) break; memcpy(buf, &breq, sizeof(breq)); count++; buf += sizeof(breq); len -= sizeof(breq); } LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) { if (len < sizeof(breq)) break; strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname, sizeof(breq.ifbr_ifsname)); breq.ifbr_ifsflags = bif->bif_flags; breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff; memcpy(buf, &breq, sizeof(breq)); count++; buf += sizeof(breq); len -= sizeof(breq); } BRIDGE_UNLOCK(sc); bifc->ifbic_len = sizeof(breq) * count; error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); BRIDGE_LOCK(sc); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_rts(struct bridge_softc *sc, void *arg) { struct ifbaconf *bac = arg; struct bridge_rtnode *brt; struct ifbareq bareq; char *buf, *outbuf; int count, buflen, len, error = 0; if (bac->ifbac_len == 0) return (0); count = 0; LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) count++; buflen = sizeof(bareq) * count; BRIDGE_UNLOCK(sc); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); BRIDGE_LOCK(sc); count = 0; buf = outbuf; len = min(bac->ifbac_len, buflen); bzero(&bareq, sizeof(bareq)); LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { if (len < sizeof(bareq)) goto out; strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname, sizeof(bareq.ifba_ifsname)); memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr)); bareq.ifba_vlan = brt->brt_vlan; if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && time_uptime < brt->brt_expire) bareq.ifba_expire = brt->brt_expire - time_uptime; else bareq.ifba_expire = 0; bareq.ifba_flags = brt->brt_flags; memcpy(buf, &bareq, sizeof(bareq)); count++; buf += sizeof(bareq); len -= sizeof(bareq); } out: BRIDGE_UNLOCK(sc); bac->ifbac_len = sizeof(bareq) * count; error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); BRIDGE_LOCK(sc); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_saddr(struct bridge_softc *sc, void *arg) { struct ifbareq *req = arg; struct bridge_iflist *bif; int error; bif = bridge_lookup_member(sc, req->ifba_ifsname); if (bif == NULL) return (ENOENT); error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1, req->ifba_flags); return (error); } static int bridge_ioctl_sto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; sc->sc_brttimeout = param->ifbrp_ctime; return (0); } static int bridge_ioctl_gto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_ctime = sc->sc_brttimeout; return (0); } static int bridge_ioctl_daddr(struct bridge_softc *sc, void *arg) { struct ifbareq *req = arg; return (bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan)); } static int bridge_ioctl_flush(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; bridge_rtflush(sc, req->ifbr_ifsflags); return (0); } static int bridge_ioctl_gpri(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_prio = bs->bs_bridge_priority; return (0); } static int bridge_ioctl_spri(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio)); } static int bridge_ioctl_ght(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_hellotime = bs->bs_bridge_htime >> 8; return (0); } static int bridge_ioctl_sht(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime)); } static int bridge_ioctl_gfd(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8; return (0); } static int bridge_ioctl_sfd(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay)); } static int bridge_ioctl_gma(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_maxage = bs->bs_bridge_max_age >> 8; return (0); } static int bridge_ioctl_sma(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage)); } static int bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority)); } static int bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost)); } static int bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (ENOENT); bif->bif_addrmax = req->ifbr_addrmax; return (0); } static int bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif = NULL; struct ifnet *ifs; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (ENOENT); LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) return (EBUSY); if (ifs->if_bridge != NULL) return (EBUSY); switch (ifs->if_type) { case IFT_ETHER: case IFT_GIF: case IFT_L2VLAN: break; default: return (EINVAL); } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_ifp = ifs; bif->bif_flags = IFBIF_SPAN; LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next); return (0); } static int bridge_ioctl_delspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct ifnet *ifs; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (ENOENT); LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) break; if (bif == NULL) return (ENOENT); bridge_delete_span(sc, bif); return (0); } static int bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg) { struct ifbropreq *req = arg; struct bstp_state *bs = &sc->sc_stp; struct bstp_port *root_port; req->ifbop_maxage = bs->bs_bridge_max_age >> 8; req->ifbop_hellotime = bs->bs_bridge_htime >> 8; req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; root_port = bs->bs_root_port; if (root_port == NULL) req->ifbop_root_port = 0; else req->ifbop_root_port = root_port->bp_ifp->if_index; req->ifbop_holdcount = bs->bs_txholdcount; req->ifbop_priority = bs->bs_bridge_priority; req->ifbop_protocol = bs->bs_protover; req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; return (0); } static int bridge_ioctl_grte(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_cexceeded = sc->sc_brtexceeded; return (0); } static int bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg) { struct ifbpstpconf *bifstp = arg; struct bridge_iflist *bif; struct bstp_port *bp; struct ifbpstpreq bpreq; char *buf, *outbuf; int count, buflen, len, error = 0; count = 0; LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if ((bif->bif_flags & IFBIF_STP) != 0) count++; } buflen = sizeof(bpreq) * count; if (bifstp->ifbpstp_len == 0) { bifstp->ifbpstp_len = buflen; return (0); } BRIDGE_UNLOCK(sc); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); BRIDGE_LOCK(sc); count = 0; buf = outbuf; len = min(bifstp->ifbpstp_len, buflen); bzero(&bpreq, sizeof(bpreq)); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (len < sizeof(bpreq)) break; if ((bif->bif_flags & IFBIF_STP) == 0) continue; bp = &bif->bif_stp; bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; memcpy(buf, &bpreq, sizeof(bpreq)); count++; buf += sizeof(bpreq); len -= sizeof(bpreq); } BRIDGE_UNLOCK(sc); bifstp->ifbpstp_len = sizeof(bpreq) * count; error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); BRIDGE_LOCK(sc); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_sproto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto)); } static int bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc)); } /* * bridge_ifdetach: * * Detach an interface from a bridge. Called when a member * interface is detaching. */ static void bridge_ifdetach(void *arg __unused, struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; if (ifp->if_flags & IFF_RENAMING) return; /* Check if the interface is a bridge member */ if (sc != NULL) { BRIDGE_LOCK(sc); bif = bridge_lookup_member_if(sc, ifp); if (bif != NULL) bridge_delete_member(sc, bif, 1); BRIDGE_UNLOCK(sc); return; } /* Check if the interface is a span port */ mtx_lock(&bridge_list_mtx); LIST_FOREACH(sc, &bridge_list, sc_list) { BRIDGE_LOCK(sc); LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifp == bif->bif_ifp) { bridge_delete_span(sc, bif); break; } BRIDGE_UNLOCK(sc); } mtx_unlock(&bridge_list_mtx); } /* * bridge_init: * * Initialize a bridge interface. */ static void bridge_init(void *xsc) { struct bridge_softc *sc = (struct bridge_softc *)xsc; struct ifnet *ifp = sc->sc_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; BRIDGE_LOCK(sc); callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz, bridge_timer, sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */ BRIDGE_UNLOCK(sc); } /* * bridge_stop: * * Stop the bridge interface. */ static void bridge_stop(struct ifnet *ifp, int disable) { struct bridge_softc *sc = ifp->if_softc; BRIDGE_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; callout_stop(&sc->sc_brcallout); bstp_stop(&sc->sc_stp); bridge_rtflush(sc, IFBF_FLUSHDYN); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } /* * bridge_enqueue: * * Enqueue a packet on a bridge member interface. * */ static int bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) { int len, err = 0; short mflags; struct mbuf *m0; /* We may be sending a fragment so traverse the mbuf */ for (; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = NULL; len = m->m_pkthdr.len; mflags = m->m_flags; /* * If underlying interface can not do VLAN tag insertion itself * then attach a packet tag that holds it. */ if ((m->m_flags & M_VLANTAG) && (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) { m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); if (m == NULL) { if_printf(dst_ifp, "unable to prepend VLAN header\n"); dst_ifp->if_oerrors++; continue; } m->m_flags &= ~M_VLANTAG; } if ((err = dst_ifp->if_transmit(dst_ifp, m))) { m_freem(m0); sc->sc_ifp->if_oerrors++; break; } sc->sc_ifp->if_opackets++; sc->sc_ifp->if_obytes += len; if (mflags & M_MCAST) sc->sc_ifp->if_omcasts++; } return (err); } /* * bridge_dummynet: * * Receive a queued packet from dummynet and pass it on to the output * interface. * * The mbuf has the Ethernet header already attached. */ static void bridge_dummynet(struct mbuf *m, struct ifnet *ifp) { struct bridge_softc *sc; sc = ifp->if_bridge; /* * The packet didnt originate from a member interface. This should only * ever happen if a member interface is removed while packets are * queued for it. */ if (sc == NULL) { m_freem(m); return; } if (PFIL_HOOKED(&V_inet_pfil_hook) #ifdef INET6 || PFIL_HOOKED(&V_inet6_pfil_hook) #endif ) { if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) return; if (m == NULL) return; } bridge_enqueue(sc, ifp, m); } /* * bridge_output: * * Send output from a bridge member interface. This * performs the bridging function for locally originated * packets. * * The mbuf has the Ethernet header already attached. We must * enqueue or free the mbuf before returning. */ static int bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, struct rtentry *rt) { struct ether_header *eh; struct ifnet *dst_if; struct bridge_softc *sc; uint16_t vlan; if (m->m_len < ETHER_HDR_LEN) { m = m_pullup(m, ETHER_HDR_LEN); if (m == NULL) return (0); } eh = mtod(m, struct ether_header *); sc = ifp->if_bridge; vlan = VLANTAGOF(m); BRIDGE_LOCK(sc); /* * If bridge is down, but the original output interface is up, * go ahead and send out that interface. Otherwise, the packet * is dropped below. */ if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { dst_if = ifp; goto sendunicast; } /* * If the packet is a multicast, or we don't know a better way to * get there, send to all interfaces. */ if (ETHER_IS_MULTICAST(eh->ether_dhost)) dst_if = NULL; else dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan); if (dst_if == NULL) { struct bridge_iflist *bif; struct mbuf *mc; int error = 0, used = 0; bridge_span(sc, m); BRIDGE_LOCK2REF(sc, error); if (error) { m_freem(m); return (0); } LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { dst_if = bif->bif_ifp; if (dst_if->if_type == IFT_GIF) continue; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; /* * If this is not the original output interface, * and the interface is participating in spanning * tree, make sure the port is in a state that * allows forwarding. */ if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; if (LIST_NEXT(bif, bif_next) == NULL) { used = 1; mc = m; } else { mc = m_copypacket(m, M_NOWAIT); if (mc == NULL) { sc->sc_ifp->if_oerrors++; continue; } } bridge_enqueue(sc, dst_if, mc); } if (used == 0) m_freem(m); BRIDGE_UNREF(sc); return (0); } sendunicast: /* * XXX Spanning tree consideration here? */ bridge_span(sc, m); if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) { m_freem(m); BRIDGE_UNLOCK(sc); return (0); } BRIDGE_UNLOCK(sc); bridge_enqueue(sc, dst_if, m); return (0); } /* * bridge_transmit: * * Do output on a bridge. * */ static int bridge_transmit(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc; struct ether_header *eh; struct ifnet *dst_if; int error = 0; sc = ifp->if_softc; ETHER_BPF_MTAP(ifp, m); eh = mtod(m, struct ether_header *); BRIDGE_LOCK(sc); if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) { BRIDGE_UNLOCK(sc); error = bridge_enqueue(sc, dst_if, m); } else bridge_broadcast(sc, ifp, m, 0); return (error); } /* * The ifp->if_qflush entry point for if_bridge(4) is no-op. */ static void bridge_qflush(struct ifnet *ifp __unused) { } /* * bridge_forward: * * The forwarding function of the bridge. * * NOTE: Releases the lock on return. */ static void bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, struct mbuf *m) { struct bridge_iflist *dbif; struct ifnet *src_if, *dst_if, *ifp; struct ether_header *eh; uint16_t vlan; uint8_t *dst; int error; src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; vlan = VLANTAGOF(m); if ((sbif->bif_flags & IFBIF_STP) && sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; eh = mtod(m, struct ether_header *); dst = eh->ether_dhost; /* If the interface is learning, record the address. */ if (sbif->bif_flags & IFBIF_LEARNING) { error = bridge_rtupdate(sc, eh->ether_shost, vlan, sbif, 0, IFBAF_DYNAMIC); /* * If the interface has addresses limits then deny any source * that is not in the cache. */ if (error && sbif->bif_addrmax) goto drop; } if ((sbif->bif_flags & IFBIF_STP) != 0 && sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) goto drop; /* * At this point, the port either doesn't participate * in spanning tree or it is in the forwarding state. */ /* * If the packet is unicast, destined for someone on * "this" side of the bridge, drop it. */ if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) { dst_if = bridge_rtlookup(sc, dst, vlan); if (src_if == dst_if) goto drop; } else { /* * Check if its a reserved multicast address, any address * listed in 802.1D section 7.12.6 may not be forwarded by the * bridge. * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F */ if (dst[0] == 0x01 && dst[1] == 0x80 && dst[2] == 0xc2 && dst[3] == 0x00 && dst[4] == 0x00 && dst[5] <= 0x0f) goto drop; /* ...forward it to all interfaces. */ ifp->if_imcasts++; dst_if = NULL; } /* * If we have a destination interface which is a member of our bridge, * OR this is a unicast packet, push it through the bpf(4) machinery. * For broadcast or multicast packets, don't bother because it will * be reinjected into ether_input. We do this before we pass the packets * through the pfil(9) framework, as it is possible that pfil(9) will * drop the packet, or possibly modify it, making it difficult to debug * firewall issues on the bridge. */ if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) ETHER_BPF_MTAP(ifp, m); /* run the packet filter */ if (PFIL_HOOKED(&V_inet_pfil_hook) #ifdef INET6 || PFIL_HOOKED(&V_inet6_pfil_hook) #endif ) { BRIDGE_UNLOCK(sc); if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0) return; if (m == NULL) return; BRIDGE_LOCK(sc); } if (dst_if == NULL) { bridge_broadcast(sc, src_if, m, 1); return; } /* * At this point, we're dealing with a unicast frame * going to a different interface. */ if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) goto drop; dbif = bridge_lookup_member_if(sc, dst_if); if (dbif == NULL) /* Not a member of the bridge (anymore?) */ goto drop; /* Private segments can not talk to each other */ if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) goto drop; if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; BRIDGE_UNLOCK(sc); if (PFIL_HOOKED(&V_inet_pfil_hook) #ifdef INET6 || PFIL_HOOKED(&V_inet6_pfil_hook) #endif ) { if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) return; if (m == NULL) return; } bridge_enqueue(sc, dst_if, m); return; drop: BRIDGE_UNLOCK(sc); m_freem(m); } /* * bridge_input: * * Receive input from a member interface. Queue the packet for * bridging if it is not for us. */ static struct mbuf * bridge_input(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif, *bif2; struct ifnet *bifp; struct ether_header *eh; struct mbuf *mc, *mc2; uint16_t vlan; int error; if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (m); bifp = sc->sc_ifp; vlan = VLANTAGOF(m); /* * Implement support for bridge monitoring. If this flag has been * set on this interface, discard the packet once we push it through * the bpf(4) machinery, but before we do, increment the byte and * packet counters associated with this interface. */ if ((bifp->if_flags & IFF_MONITOR) != 0) { m->m_pkthdr.rcvif = bifp; ETHER_BPF_MTAP(bifp, m); bifp->if_ipackets++; bifp->if_ibytes += m->m_pkthdr.len; m_freem(m); return (NULL); } BRIDGE_LOCK(sc); bif = bridge_lookup_member_if(sc, ifp); if (bif == NULL) { BRIDGE_UNLOCK(sc); return (m); } eh = mtod(m, struct ether_header *); bridge_span(sc, m); if (m->m_flags & (M_BCAST|M_MCAST)) { /* Tap off 802.1D packets; they do not get forwarded. */ if (memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) { bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */ BRIDGE_UNLOCK(sc); return (NULL); } if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { BRIDGE_UNLOCK(sc); return (m); } /* * Make a deep copy of the packet and enqueue the copy * for bridge processing; return the original packet for * local processing. */ mc = m_dup(m, M_NOWAIT); if (mc == NULL) { BRIDGE_UNLOCK(sc); return (m); } /* Perform the bridge forwarding function with the copy. */ bridge_forward(sc, bif, mc); /* * Reinject the mbuf as arriving on the bridge so we have a * chance at claiming multicast packets. We can not loop back * here from ether_input as a bridge is never a member of a * bridge. */ KASSERT(bifp->if_bridge == NULL, ("loop created in bridge_input")); mc2 = m_dup(m, M_NOWAIT); if (mc2 != NULL) { /* Keep the layer3 header aligned */ int i = min(mc2->m_pkthdr.len, max_protohdr); mc2 = m_copyup(mc2, i, ETHER_ALIGN); } if (mc2 != NULL) { mc2->m_pkthdr.rcvif = bifp; (*bifp->if_input)(bifp, mc2); } /* Return the original packet for local processing. */ return (m); } if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { BRIDGE_UNLOCK(sc); return (m); } #if (defined(INET) || defined(INET6)) # define OR_CARP_CHECK_WE_ARE_DST(iface) \ || ((iface)->if_carp \ && (*carp_forus_p)((iface), eh->ether_dhost)) # define OR_CARP_CHECK_WE_ARE_SRC(iface) \ || ((iface)->if_carp \ && (*carp_forus_p)((iface), eh->ether_shost)) #else # define OR_CARP_CHECK_WE_ARE_DST(iface) # define OR_CARP_CHECK_WE_ARE_SRC(iface) #endif #ifdef INET6 # define OR_PFIL_HOOKED_INET6 \ || PFIL_HOOKED(&V_inet6_pfil_hook) #else # define OR_PFIL_HOOKED_INET6 #endif #define GRAB_OUR_PACKETS(iface) \ if ((iface)->if_type == IFT_GIF) \ continue; \ /* It is destined for us. */ \ if (memcmp(IF_LLADDR((iface)), eh->ether_dhost, ETHER_ADDR_LEN) == 0 \ OR_CARP_CHECK_WE_ARE_DST((iface)) \ ) { \ if ((iface)->if_type == IFT_BRIDGE) { \ ETHER_BPF_MTAP(iface, m); \ iface->if_ipackets++; \ iface->if_ibytes += m->m_pkthdr.len; \ /* Filter on the physical interface. */ \ if (pfil_local_phys && \ (PFIL_HOOKED(&V_inet_pfil_hook) \ OR_PFIL_HOOKED_INET6)) { \ if (bridge_pfil(&m, NULL, ifp, \ PFIL_IN) != 0 || m == NULL) { \ BRIDGE_UNLOCK(sc); \ return (NULL); \ } \ eh = mtod(m, struct ether_header *); \ } \ } \ if (bif->bif_flags & IFBIF_LEARNING) { \ error = bridge_rtupdate(sc, eh->ether_shost, \ vlan, bif, 0, IFBAF_DYNAMIC); \ if (error && bif->bif_addrmax) { \ BRIDGE_UNLOCK(sc); \ m_freem(m); \ return (NULL); \ } \ } \ m->m_pkthdr.rcvif = iface; \ BRIDGE_UNLOCK(sc); \ return (m); \ } \ \ /* We just received a packet that we sent out. */ \ if (memcmp(IF_LLADDR((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \ OR_CARP_CHECK_WE_ARE_SRC((iface)) \ ) { \ BRIDGE_UNLOCK(sc); \ m_freem(m); \ return (NULL); \ } /* * Unicast. Make sure it's not for the bridge. */ do { GRAB_OUR_PACKETS(bifp) } while (0); /* * Give a chance for ifp at first priority. This will help when the * packet comes through the interface like VLAN's with the same MACs * on several interfaces from the same bridge. This also will save * some CPU cycles in case the destination interface and the input * interface (eq ifp) are the same. */ do { GRAB_OUR_PACKETS(ifp) } while (0); /* Now check the all bridge members. */ LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) { GRAB_OUR_PACKETS(bif2->bif_ifp) } #undef OR_CARP_CHECK_WE_ARE_DST #undef OR_CARP_CHECK_WE_ARE_SRC #undef OR_PFIL_HOOKED_INET6 #undef GRAB_OUR_PACKETS /* Perform the bridge forwarding function. */ bridge_forward(sc, bif, m); return (NULL); } /* * bridge_broadcast: * * Send a frame to all interfaces that are members of * the bridge, except for the one on which the packet * arrived. * * NOTE: Releases the lock on return. */ static void bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, struct mbuf *m, int runfilt) { struct bridge_iflist *dbif, *sbif; struct mbuf *mc; struct ifnet *dst_if; int error = 0, used = 0, i; sbif = bridge_lookup_member_if(sc, src_if); BRIDGE_LOCK2REF(sc, error); if (error) { m_freem(m); return; } /* Filter on the bridge interface before broadcasting */ if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook) #ifdef INET6 || PFIL_HOOKED(&V_inet6_pfil_hook) #endif )) { if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) goto out; if (m == NULL) goto out; } LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) { dst_if = dbif->bif_ifp; if (dst_if == src_if) continue; /* Private segments can not talk to each other */ if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) continue; if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 && (m->m_flags & (M_BCAST|M_MCAST)) == 0) continue; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; if (LIST_NEXT(dbif, bif_next) == NULL) { mc = m; used = 1; } else { mc = m_dup(m, M_NOWAIT); if (mc == NULL) { sc->sc_ifp->if_oerrors++; continue; } } /* * Filter on the output interface. Pass a NULL bridge interface * pointer so we do not redundantly filter on the bridge for * each interface we broadcast on. */ if (runfilt && (PFIL_HOOKED(&V_inet_pfil_hook) #ifdef INET6 || PFIL_HOOKED(&V_inet6_pfil_hook) #endif )) { if (used == 0) { /* Keep the layer3 header aligned */ i = min(mc->m_pkthdr.len, max_protohdr); mc = m_copyup(mc, i, ETHER_ALIGN); if (mc == NULL) { sc->sc_ifp->if_oerrors++; continue; } } if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0) continue; if (mc == NULL) continue; } bridge_enqueue(sc, dst_if, mc); } if (used == 0) m_freem(m); out: BRIDGE_UNREF(sc); } /* * bridge_span: * * Duplicate a packet out one or more interfaces that are in span mode, * the original mbuf is unmodified. */ static void bridge_span(struct bridge_softc *sc, struct mbuf *m) { struct bridge_iflist *bif; struct ifnet *dst_if; struct mbuf *mc; if (LIST_EMPTY(&sc->sc_spanlist)) return; LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) { dst_if = bif->bif_ifp; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; mc = m_copypacket(m, M_NOWAIT); if (mc == NULL) { sc->sc_ifp->if_oerrors++; continue; } bridge_enqueue(sc, dst_if, mc); } } /* * bridge_rtupdate: * * Add a bridge routing entry. */ static int bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, struct bridge_iflist *bif, int setflags, uint8_t flags) { struct bridge_rtnode *brt; int error; BRIDGE_LOCK_ASSERT(sc); /* Check the source address is valid and not multicast. */ if (ETHER_IS_MULTICAST(dst) || (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 && dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) return (EINVAL); /* 802.1p frames map to vlan 1 */ if (vlan == 0) vlan = 1; /* * A route for this destination might already exist. If so, * update it, otherwise create a new one. */ if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) { if (sc->sc_brtcnt >= sc->sc_brtmax) { sc->sc_brtexceeded++; return (ENOSPC); } /* Check per interface address limits (if enabled) */ if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) { bif->bif_addrexceeded++; return (ENOSPC); } /* * Allocate a new bridge forwarding node, and * initialize the expiration time and Ethernet * address. */ brt = uma_zalloc(bridge_rtnode_zone, M_NOWAIT | M_ZERO); if (brt == NULL) return (ENOMEM); if (bif->bif_flags & IFBIF_STICKY) brt->brt_flags = IFBAF_STICKY; else brt->brt_flags = IFBAF_DYNAMIC; memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN); brt->brt_vlan = vlan; if ((error = bridge_rtnode_insert(sc, brt)) != 0) { uma_zfree(bridge_rtnode_zone, brt); return (error); } brt->brt_dst = bif; bif->bif_addrcnt++; } if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && brt->brt_dst != bif) { brt->brt_dst->bif_addrcnt--; brt->brt_dst = bif; brt->brt_dst->bif_addrcnt++; } if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) brt->brt_expire = time_uptime + sc->sc_brttimeout; if (setflags) brt->brt_flags = flags; return (0); } /* * bridge_rtlookup: * * Lookup the destination interface for an address. */ static struct ifnet * bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; BRIDGE_LOCK_ASSERT(sc); if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) return (NULL); return (brt->brt_ifp); } /* * bridge_rttrim: * * Trim the routine table so that we have a number * of routing entries less than or equal to the * maximum number. */ static void bridge_rttrim(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; BRIDGE_LOCK_ASSERT(sc); /* Make sure we actually need to do this. */ if (sc->sc_brtcnt <= sc->sc_brtmax) return; /* Force an aging cycle; this might trim enough addresses. */ bridge_rtage(sc); if (sc->sc_brtcnt <= sc->sc_brtmax) return; LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { bridge_rtnode_destroy(sc, brt); if (sc->sc_brtcnt <= sc->sc_brtmax) return; } } } /* * bridge_timer: * * Aging timer for the bridge. */ static void bridge_timer(void *arg) { struct bridge_softc *sc = arg; BRIDGE_LOCK_ASSERT(sc); bridge_rtage(sc); if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz, bridge_timer, sc); } /* * bridge_rtage: * * Perform an aging cycle. */ static void bridge_rtage(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { if (time_uptime >= brt->brt_expire) bridge_rtnode_destroy(sc, brt); } } } /* * bridge_rtflush: * * Remove all dynamic addresses from the bridge. */ static void bridge_rtflush(struct bridge_softc *sc, int full) { struct bridge_rtnode *brt, *nbrt; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) bridge_rtnode_destroy(sc, brt); } } /* * bridge_rtdaddr: * * Remove an address from the table. */ static int bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; int found = 0; BRIDGE_LOCK_ASSERT(sc); /* * If vlan is zero then we want to delete for all vlans so the lookup * may return more than one. */ while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) { bridge_rtnode_destroy(sc, brt); found = 1; } return (found ? 0 : ENOENT); } /* * bridge_rtdelete: * * Delete routes to a speicifc member interface. */ static void bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full) { struct bridge_rtnode *brt, *nbrt; BRIDGE_LOCK_ASSERT(sc); LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (brt->brt_ifp == ifp && (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) bridge_rtnode_destroy(sc, brt); } } /* * bridge_rtable_init: * * Initialize the route table for this bridge. */ static void bridge_rtable_init(struct bridge_softc *sc) { int i; sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE, M_DEVBUF, M_WAITOK); for (i = 0; i < BRIDGE_RTHASH_SIZE; i++) LIST_INIT(&sc->sc_rthash[i]); sc->sc_rthash_key = arc4random(); LIST_INIT(&sc->sc_rtlist); } /* * bridge_rtable_fini: * * Deconstruct the route table for this bridge. */ static void bridge_rtable_fini(struct bridge_softc *sc) { KASSERT(sc->sc_brtcnt == 0, ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt)); free(sc->sc_rthash, M_DEVBUF); } /* * The following hash function is adapted from "Hash Functions" by Bob Jenkins * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). */ #define mix(a, b, c) \ do { \ a -= b; a -= c; a ^= (c >> 13); \ b -= c; b -= a; b ^= (a << 8); \ c -= a; c -= b; c ^= (b >> 13); \ a -= b; a -= c; a ^= (c >> 12); \ b -= c; b -= a; b ^= (a << 16); \ c -= a; c -= b; c ^= (b >> 5); \ a -= b; a -= c; a ^= (c >> 3); \ b -= c; b -= a; b ^= (a << 10); \ c -= a; c -= b; c ^= (b >> 15); \ } while (/*CONSTCOND*/0) static __inline uint32_t bridge_rthash(struct bridge_softc *sc, const uint8_t *addr) { uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key; b += addr[5] << 8; b += addr[4]; a += addr[3] << 24; a += addr[2] << 16; a += addr[1] << 8; a += addr[0]; mix(a, b, c); return (c & BRIDGE_RTHASH_MASK); } #undef mix static int bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b) { int i, d; for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) { d = ((int)a[i]) - ((int)b[i]); } return (d); } /* * bridge_rtnode_lookup: * * Look up a bridge route node for the specified destination. Compare the * vlan id or if zero then just return the first match. */ static struct bridge_rtnode * bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; uint32_t hash; int dir; BRIDGE_LOCK_ASSERT(sc); hash = bridge_rthash(sc, addr); LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) { dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr); if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) return (brt); if (dir > 0) return (NULL); } return (NULL); } /* * bridge_rtnode_insert: * * Insert the specified bridge node into the route table. We * assume the entry is not already in the table. */ static int bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) { struct bridge_rtnode *lbrt; uint32_t hash; int dir; BRIDGE_LOCK_ASSERT(sc); hash = bridge_rthash(sc, brt->brt_addr); lbrt = LIST_FIRST(&sc->sc_rthash[hash]); if (lbrt == NULL) { LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash); goto out; } do { dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr); if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) return (EEXIST); if (dir > 0) { LIST_INSERT_BEFORE(lbrt, brt, brt_hash); goto out; } if (LIST_NEXT(lbrt, brt_hash) == NULL) { LIST_INSERT_AFTER(lbrt, brt, brt_hash); goto out; } lbrt = LIST_NEXT(lbrt, brt_hash); } while (lbrt != NULL); #ifdef DIAGNOSTIC panic("bridge_rtnode_insert: impossible"); #endif out: LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list); sc->sc_brtcnt++; return (0); } /* * bridge_rtnode_destroy: * * Destroy a bridge rtnode. */ static void bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt) { BRIDGE_LOCK_ASSERT(sc); LIST_REMOVE(brt, brt_hash); LIST_REMOVE(brt, brt_list); sc->sc_brtcnt--; brt->brt_dst->bif_addrcnt--; uma_zfree(bridge_rtnode_zone, brt); } /* * bridge_rtable_expire: * * Set the expiry time for all routes on an interface. */ static void bridge_rtable_expire(struct ifnet *ifp, int age) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_rtnode *brt; BRIDGE_LOCK(sc); /* * If the age is zero then flush, otherwise set all the expiry times to * age for the interface */ if (age == 0) bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN); else { LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { /* Cap the expiry time to 'age' */ if (brt->brt_ifp == ifp && brt->brt_expire > time_uptime + age && (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) brt->brt_expire = time_uptime + age; } } BRIDGE_UNLOCK(sc); } /* * bridge_state_change: * * Callback from the bridgestp code when a port changes states. */ static void bridge_state_change(struct ifnet *ifp, int state) { struct bridge_softc *sc = ifp->if_bridge; static const char *stpstates[] = { "disabled", "listening", "learning", "forwarding", "blocking", "discarding" }; if (log_stp) log(LOG_NOTICE, "%s: state changed to %s on %s\n", sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname); } /* * Send bridge packets through pfil if they are one of the types pfil can deal * with, or if they are ARP or REVARP. (pfil will pass ARP and REVARP without * question.) If *bifp or *ifp are NULL then packet filtering is skipped for * that interface. */ static int bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) { int snap, error, i, hlen; struct ether_header *eh1, eh2; struct ip *ip; struct llc llc1; u_int16_t ether_type; snap = 0; error = -1; /* Default error if not error == 0 */ #if 0 /* we may return with the IP fields swapped, ensure its not shared */ KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__)); #endif if (pfil_bridge == 0 && pfil_member == 0 && pfil_ipfw == 0) return (0); /* filtering is disabled */ i = min((*mp)->m_pkthdr.len, max_protohdr); if ((*mp)->m_len < i) { *mp = m_pullup(*mp, i); if (*mp == NULL) { printf("%s: m_pullup failed\n", __func__); return (-1); } } eh1 = mtod(*mp, struct ether_header *); ether_type = ntohs(eh1->ether_type); /* * Check for SNAP/LLC. */ if (ether_type < ETHERMTU) { struct llc *llc2 = (struct llc *)(eh1 + 1); if ((*mp)->m_len >= ETHER_HDR_LEN + 8 && llc2->llc_dsap == LLC_SNAP_LSAP && llc2->llc_ssap == LLC_SNAP_LSAP && llc2->llc_control == LLC_UI) { ether_type = htons(llc2->llc_un.type_snap.ether_type); snap = 1; } } /* * If we're trying to filter bridge traffic, don't look at anything * other than IP and ARP traffic. If the filter doesn't understand * IPv6, don't allow IPv6 through the bridge either. This is lame * since if we really wanted, say, an AppleTalk filter, we are hosed, * but of course we don't have an AppleTalk filter to begin with. * (Note that since pfil doesn't understand ARP it will pass *ALL* * ARP traffic.) */ switch (ether_type) { case ETHERTYPE_ARP: case ETHERTYPE_REVARP: if (pfil_ipfw_arp == 0) return (0); /* Automatically pass */ break; case ETHERTYPE_IP: #ifdef INET6 case ETHERTYPE_IPV6: #endif /* INET6 */ break; default: /* * Check to see if the user wants to pass non-ip * packets, these will not be checked by pfil(9) and * passed unconditionally so the default is to drop. */ if (pfil_onlyip) goto bad; } /* Run the packet through pfil before stripping link headers */ if (PFIL_HOOKED(&V_link_pfil_hook) && pfil_ipfw != 0 && dir == PFIL_OUT && ifp != NULL) { error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, NULL); if (*mp == NULL || error != 0) /* packet consumed by filter */ return (error); } /* Strip off the Ethernet header and keep a copy. */ m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2); m_adj(*mp, ETHER_HDR_LEN); /* Strip off snap header, if present */ if (snap) { m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1); m_adj(*mp, sizeof(struct llc)); } /* * Check the IP header for alignment and errors */ if (dir == PFIL_IN) { switch (ether_type) { case ETHERTYPE_IP: error = bridge_ip_checkbasic(mp); break; #ifdef INET6 case ETHERTYPE_IPV6: error = bridge_ip6_checkbasic(mp); break; #endif /* INET6 */ default: error = 0; } if (error) goto bad; } error = 0; /* * Run the packet through pfil */ switch (ether_type) { case ETHERTYPE_IP: /* * Run pfil on the member interface and the bridge, both can * be skipped by clearing pfil_member or pfil_bridge. * * Keep the order: * in_if -> bridge_if -> out_if */ if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp, dir, NULL); if (*mp == NULL || error != 0) /* filter may consume */ break; if (pfil_member && ifp != NULL) error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, dir, NULL); if (*mp == NULL || error != 0) /* filter may consume */ break; if (pfil_bridge && dir == PFIL_IN && bifp != NULL) error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp, dir, NULL); if (*mp == NULL || error != 0) /* filter may consume */ break; /* check if we need to fragment the packet */ if (pfil_member && ifp != NULL && dir == PFIL_OUT) { i = (*mp)->m_pkthdr.len; if (i > ifp->if_mtu) { error = bridge_fragment(ifp, *mp, &eh2, snap, &llc1); return (error); } } /* Recalculate the ip checksum. */ ip = mtod(*mp, struct ip *); hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) goto bad; if (hlen > (*mp)->m_len) { if ((*mp = m_pullup(*mp, hlen)) == 0) goto bad; ip = mtod(*mp, struct ip *); if (ip == NULL) goto bad; } ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(*mp, hlen); break; #ifdef INET6 case ETHERTYPE_IPV6: if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp, dir, NULL); if (*mp == NULL || error != 0) /* filter may consume */ break; if (pfil_member && ifp != NULL) error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp, dir, NULL); if (*mp == NULL || error != 0) /* filter may consume */ break; if (pfil_bridge && dir == PFIL_IN && bifp != NULL) error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp, dir, NULL); break; #endif default: error = 0; break; } if (*mp == NULL) return (error); if (error != 0) goto bad; error = -1; /* * Finally, put everything back the way it was and return */ if (snap) { M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT); if (*mp == NULL) return (error); bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc)); } M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT); if (*mp == NULL) return (error); bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN); return (0); bad: m_freem(*mp); *mp = NULL; return (error); } /* * Perform basic checks on header size since * pfil assumes ip_input has already processed * it for it. Cut-and-pasted from ip_input.c. * Given how simple the IPv6 version is, * does the IPv4 version really need to be * this complicated? * * XXX Should we update ipstat here, or not? * XXX Right now we update ipstat but not * XXX csum_counter. */ static int bridge_ip_checkbasic(struct mbuf **mp) { struct mbuf *m = *mp; struct ip *ip; int len, hlen; u_short sum; if (*mp == NULL) return (-1); if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) { if ((m = m_copyup(m, sizeof(struct ip), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ KMOD_IPSTAT_INC(ips_toosmall); goto bad; } } else if (__predict_false(m->m_len < sizeof (struct ip))) { if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { KMOD_IPSTAT_INC(ips_toosmall); goto bad; } } ip = mtod(m, struct ip *); if (ip == NULL) goto bad; if (ip->ip_v != IPVERSION) { KMOD_IPSTAT_INC(ips_badvers); goto bad; } hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ KMOD_IPSTAT_INC(ips_badhlen); goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == 0) { KMOD_IPSTAT_INC(ips_badhlen); goto bad; } ip = mtod(m, struct ip *); if (ip == NULL) goto bad; } if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); } else { if (hlen == sizeof(struct ip)) { sum = in_cksum_hdr(ip); } else { sum = in_cksum(m, hlen); } } if (sum) { KMOD_IPSTAT_INC(ips_badsum); goto bad; } /* Retrieve the packet length. */ len = ntohs(ip->ip_len); /* * Check for additional length bogosity */ if (len < hlen) { KMOD_IPSTAT_INC(ips_badlen); goto bad; } /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < len) { KMOD_IPSTAT_INC(ips_tooshort); goto bad; } /* Checks out, proceed */ *mp = m; return (0); bad: *mp = m; return (-1); } #ifdef INET6 /* * Same as above, but for IPv6. * Cut-and-pasted from ip6_input.c. * XXX Should we update ip6stat, or not? */ static int bridge_ip6_checkbasic(struct mbuf **mp) { struct mbuf *m = *mp; struct ip6_hdr *ip6; /* * If the IPv6 header is not aligned, slurp it up into a new * mbuf with space for link headers, in the event we forward * it. Otherwise, if it is aligned, make sure the entire base * IPv6 header is in the first mbuf of the chain. */ if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) { struct ifnet *inifp = m->m_pkthdr.rcvif; if ((m = m_copyup(m, sizeof(struct ip6_hdr), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) { struct ifnet *inifp = m->m_pkthdr.rcvif; if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } } ip6 = mtod(m, struct ip6_hdr *); if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { IP6STAT_INC(ip6s_badvers); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); goto bad; } /* Checks out, proceed */ *mp = m; return (0); bad: *mp = m; return (-1); } #endif /* INET6 */ /* * bridge_fragment: * * Return a fragmented mbuf chain. */ static int bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh, int snap, struct llc *llc) { struct mbuf *m0; struct ip *ip; int error = -1; if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == NULL) goto out; ip = mtod(m, struct ip *); m->m_pkthdr.csum_flags |= CSUM_IP; error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist); if (error) goto out; /* walk the chain and re-add the Ethernet header */ for (m0 = m; m0; m0 = m0->m_nextpkt) { if (error == 0) { if (snap) { M_PREPEND(m0, sizeof(struct llc), M_NOWAIT); if (m0 == NULL) { error = ENOBUFS; continue; } bcopy(llc, mtod(m0, caddr_t), sizeof(struct llc)); } M_PREPEND(m0, ETHER_HDR_LEN, M_NOWAIT); if (m0 == NULL) { error = ENOBUFS; continue; } bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN); } else m_freem(m); } if (error == 0) KMOD_IPSTAT_INC(ips_fragmented); return (error); out: if (m != NULL) m_freem(m); return (error); } static void bridge_linkstate(struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; BRIDGE_LOCK(sc); bif = bridge_lookup_member_if(sc, ifp); if (bif == NULL) { BRIDGE_UNLOCK(sc); return; } bridge_linkcheck(sc); BRIDGE_UNLOCK(sc); bstp_linkstate(&bif->bif_stp); } static void bridge_linkcheck(struct bridge_softc *sc) { struct bridge_iflist *bif; int new_link, hasls; BRIDGE_LOCK_ASSERT(sc); new_link = LINK_STATE_DOWN; hasls = 0; /* Our link is considered up if at least one of our ports is active */ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE) hasls++; if (bif->bif_ifp->if_link_state == LINK_STATE_UP) { new_link = LINK_STATE_UP; break; } } if (!LIST_EMPTY(&sc->sc_iflist) && !hasls) { /* If no interfaces support link-state then we default to up */ new_link = LINK_STATE_UP; } if_link_state_change(sc->sc_ifp, new_link); } Index: head/sys/net/if_epair.c =================================================================== --- head/sys/net/if_epair.c (revision 263101) +++ head/sys/net/if_epair.c (revision 263102) @@ -1,985 +1,985 @@ /*- * Copyright (c) 2008 The FreeBSD Foundation * Copyright (c) 2009-2010 Bjoern A. Zeeb * All rights reserved. * * This software was developed by CK Software GmbH under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * A pair of virtual back-to-back connected ethernet like interfaces * (``two interfaces with a virtual cross-over cable''). * * This is mostly intended to be used to provide connectivity between * different virtual network stack instances. */ /* * Things to re-think once we have more experience: * - ifp->if_reassign function once we can test with vimage. Depending on * how if_vmove() is going to be improved. * - Real random etheraddrs that are checked to be uniquish; we would need * to re-do them in case we move the interface between network stacks * in a private if_reassign function. * In case we bridge to a real interface/network or between indepedent * epairs on multiple stacks/machines, we may need this. * For now let the user handle that case. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); #ifdef EPAIR_DEBUG static int epair_debug = 0; SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, &epair_debug, 0, "if_epair(4) debugging."); #define DPRINTF(fmt, arg...) \ if (epair_debug) \ printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) #else #define DPRINTF(fmt, arg...) #endif static void epair_nh_sintr(struct mbuf *); static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); static void epair_nh_drainedcpu(u_int); static void epair_start_locked(struct ifnet *); static int epair_media_change(struct ifnet *); static void epair_media_status(struct ifnet *, struct ifmediareq *); static int epair_clone_match(struct if_clone *, const char *); static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); static int epair_clone_destroy(struct if_clone *, struct ifnet *); static const char epairname[] = "epair"; /* Netisr realted definitions and sysctl. */ static struct netisr_handler epair_nh = { .nh_name = epairname, .nh_proto = NETISR_EPAIR, .nh_policy = NETISR_POLICY_CPU, .nh_handler = epair_nh_sintr, .nh_m2cpuid = epair_nh_m2cpuid, .nh_drainedcpu = epair_nh_drainedcpu, }; static int sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&epair_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&epair_nh, qlimit)); } SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_epair_netisr_maxqlen, "I", "Maximum if_epair(4) netisr \"hw\" queue length"); struct epair_softc { struct ifnet *ifp; /* This ifp. */ struct ifnet *oifp; /* other ifp of pair. */ struct ifmedia media; /* Media config (fake). */ u_int refcount; /* # of mbufs in flight. */ u_int cpuid; /* CPU ID assigned upon creation. */ void (*if_qflush)(struct ifnet *); /* Original if_qflush routine. */ }; /* * Per-CPU list of ifps with data in the ifq that needs to be flushed * to the netisr ``hw'' queue before we allow any further direct queuing * to the ``hw'' queue. */ struct epair_ifp_drain { STAILQ_ENTRY(epair_ifp_drain) ifp_next; struct ifnet *ifp; }; STAILQ_HEAD(eid_list, epair_ifp_drain); #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ "if_epair", NULL, MTX_DEF) #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ MA_OWNED) #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) #ifdef INVARIANTS #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) #else #define EPAIR_REFCOUNT_INIT(r, v) #define EPAIR_REFCOUNT_AQUIRE(r) #define EPAIR_REFCOUNT_RELEASE(r) #define EPAIR_REFCOUNT_ASSERT(a, p) #endif static MALLOC_DEFINE(M_EPAIR, epairname, "Pair of virtual cross-over connected Ethernet-like interfaces"); static struct if_clone *epair_cloner; /* * DPCPU area and functions. */ struct epair_dpcpu { struct mtx if_epair_mtx; /* Per-CPU locking. */ int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with * data in the ifq. */ }; DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); static void epair_dpcpu_init(void) { struct epair_dpcpu *epair_dpcpu; struct eid_list *s; u_int cpuid; CPU_FOREACH(cpuid) { epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); /* Initialize per-cpu lock. */ EPAIR_LOCK_INIT(epair_dpcpu); /* Driver flags are per-cpu as are our netisr "hw" queues. */ epair_dpcpu->epair_drv_flags = 0; /* * Initialize per-cpu drain list. * Manually do what STAILQ_HEAD_INITIALIZER would do. */ s = &epair_dpcpu->epair_ifp_drain_list; s->stqh_first = NULL; s->stqh_last = &s->stqh_first; } } static void epair_dpcpu_detach(void) { struct epair_dpcpu *epair_dpcpu; u_int cpuid; CPU_FOREACH(cpuid) { epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); /* Destroy per-cpu lock. */ EPAIR_LOCK_DESTROY(epair_dpcpu); } } /* * Helper functions. */ static u_int cpuid_from_ifp(struct ifnet *ifp) { struct epair_softc *sc; if (ifp == NULL) return (0); sc = ifp->if_softc; return (sc->cpuid); } /* * Netisr handler functions. */ static void epair_nh_sintr(struct mbuf *m) { struct ifnet *ifp; struct epair_softc *sc; ifp = m->m_pkthdr.rcvif; (*ifp->if_input)(ifp, m); sc = ifp->if_softc; EPAIR_REFCOUNT_RELEASE(&sc->refcount); EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, ("%s: ifp=%p sc->refcount not >= 1: %d", __func__, ifp, sc->refcount)); DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); } static struct mbuf * epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) { *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); return (m); } static void epair_nh_drainedcpu(u_int cpuid) { struct epair_dpcpu *epair_dpcpu; struct epair_ifp_drain *elm, *tvar; struct ifnet *ifp; epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); EPAIR_LOCK(epair_dpcpu); /* * Assume our "hw" queue and possibly ifq will be emptied * again. In case we will overflow the "hw" queue while * draining, epair_start_locked will set IFF_DRV_OACTIVE * again and we will stop and return. */ STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next, tvar) { ifp = elm->ifp; epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; epair_start_locked(ifp); IFQ_LOCK(&ifp->if_snd); if (IFQ_IS_EMPTY(&ifp->if_snd)) { struct epair_softc *sc; STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, elm, epair_ifp_drain, ifp_next); /* The cached ifp goes off the list. */ sc = ifp->if_softc; EPAIR_REFCOUNT_RELEASE(&sc->refcount); EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, ("%s: ifp=%p sc->refcount not >= 1: %d", __func__, ifp, sc->refcount)); free(elm, M_EPAIR); } IFQ_UNLOCK(&ifp->if_snd); if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { /* Our "hw"q overflew again. */ epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; DPRINTF("hw queue length overflow at %u\n", epair_nh.nh_qlimit); break; } } EPAIR_UNLOCK(epair_dpcpu); } /* * Network interface (`if') related functions. */ static void epair_remove_ifp_from_draining(struct ifnet *ifp) { struct epair_dpcpu *epair_dpcpu; struct epair_ifp_drain *elm, *tvar; u_int cpuid; CPU_FOREACH(cpuid) { epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); EPAIR_LOCK(epair_dpcpu); STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next, tvar) { if (ifp == elm->ifp) { struct epair_softc *sc; STAILQ_REMOVE( &epair_dpcpu->epair_ifp_drain_list, elm, epair_ifp_drain, ifp_next); /* The cached ifp goes off the list. */ sc = ifp->if_softc; EPAIR_REFCOUNT_RELEASE(&sc->refcount); EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, ("%s: ifp=%p sc->refcount not >= 1: %d", __func__, ifp, sc->refcount)); free(elm, M_EPAIR); } } EPAIR_UNLOCK(epair_dpcpu); } } static int epair_add_ifp_for_draining(struct ifnet *ifp) { struct epair_dpcpu *epair_dpcpu; struct epair_softc *sc; struct epair_ifp_drain *elm = NULL; sc = ifp->if_softc; epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); EPAIR_LOCK_ASSERT(epair_dpcpu); STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) if (elm->ifp == ifp) break; /* If the ifp is there already, return success. */ if (elm != NULL) return (0); elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); if (elm == NULL) return (ENOMEM); elm->ifp = ifp; /* Add a reference for the ifp pointer on the list. */ EPAIR_REFCOUNT_AQUIRE(&sc->refcount); STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); return (0); } static void epair_start_locked(struct ifnet *ifp) { struct epair_dpcpu *epair_dpcpu; struct mbuf *m; struct epair_softc *sc; struct ifnet *oifp; int error; DPRINTF("ifp=%p\n", ifp); sc = ifp->if_softc; epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); EPAIR_LOCK_ASSERT(epair_dpcpu); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; if ((ifp->if_flags & IFF_UP) == 0) return; /* * We get patckets here from ether_output via if_handoff() * and ned to put them into the input queue of the oifp * and call oifp->if_input() via netisr/epair_sintr(). */ oifp = sc->oifp; sc = oifp->if_softc; for (;;) { IFQ_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; BPF_MTAP(ifp, m); /* * In case the outgoing interface is not usable, * drop the packet. */ if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (oifp->if_flags & IFF_UP) ==0) { ifp->if_oerrors++; m_freem(m); continue; } DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); /* * Add a reference so the interface cannot go while the * packet is in transit as we rely on rcvif to stay valid. */ EPAIR_REFCOUNT_AQUIRE(&sc->refcount); m->m_pkthdr.rcvif = oifp; CURVNET_SET_QUIET(oifp->if_vnet); error = netisr_queue(NETISR_EPAIR, m); CURVNET_RESTORE(); if (!error) { ifp->if_opackets++; /* Someone else received the packet. */ oifp->if_ipackets++; } else { /* The packet was freed already. */ epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_OACTIVE; (void) epair_add_ifp_for_draining(ifp); ifp->if_oerrors++; EPAIR_REFCOUNT_RELEASE(&sc->refcount); EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, ("%s: ifp=%p sc->refcount not >= 1: %d", __func__, oifp, sc->refcount)); } } } static void epair_start(struct ifnet *ifp) { struct epair_dpcpu *epair_dpcpu; epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); EPAIR_LOCK(epair_dpcpu); epair_start_locked(ifp); EPAIR_UNLOCK(epair_dpcpu); } static int epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) { struct epair_dpcpu *epair_dpcpu; struct epair_softc *sc; struct ifnet *oifp; int error, len; short mflags; DPRINTF("ifp=%p m=%p\n", ifp, m); sc = ifp->if_softc; epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); EPAIR_LOCK_ASSERT(epair_dpcpu); if (m == NULL) return (0); /* * We are not going to use the interface en/dequeue mechanism * on the TX side. We are called from ether_output_frame() * and will put the packet into the incoming queue of the * other interface of our pair via the netsir. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { m_freem(m); return (ENXIO); } if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return (ENETDOWN); } BPF_MTAP(ifp, m); /* * In case the outgoing interface is not usable, * drop the packet. */ oifp = sc->oifp; if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (oifp->if_flags & IFF_UP) ==0) { ifp->if_oerrors++; m_freem(m); return (0); } len = m->m_pkthdr.len; mflags = m->m_flags; DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); #ifdef ALTQ /* Support ALTQ via the clasic if_start() path. */ IF_LOCK(&ifp->if_snd); if (ALTQ_IS_ENABLED(&ifp->if_snd)) { ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); if (error) ifp->if_snd.ifq_drops++; IF_UNLOCK(&ifp->if_snd); if (!error) { ifp->if_obytes += len; if (mflags & (M_BCAST|M_MCAST)) ifp->if_omcasts++; if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) epair_start_locked(ifp); else (void)epair_add_ifp_for_draining(ifp); } return (error); } IF_UNLOCK(&ifp->if_snd); #endif if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { /* * Our hardware queue is full, try to fall back * queuing to the ifq but do not call ifp->if_start. * Either we are lucky or the packet is gone. */ IFQ_ENQUEUE(&ifp->if_snd, m, error); if (!error) (void)epair_add_ifp_for_draining(ifp); return (error); } sc = oifp->if_softc; /* * Add a reference so the interface cannot go while the * packet is in transit as we rely on rcvif to stay valid. */ EPAIR_REFCOUNT_AQUIRE(&sc->refcount); m->m_pkthdr.rcvif = oifp; CURVNET_SET_QUIET(oifp->if_vnet); error = netisr_queue(NETISR_EPAIR, m); CURVNET_RESTORE(); if (!error) { ifp->if_opackets++; /* * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, * but as we bypass all this we have to duplicate * the logic another time. */ ifp->if_obytes += len; if (mflags & (M_BCAST|M_MCAST)) ifp->if_omcasts++; /* Someone else received the packet. */ oifp->if_ipackets++; } else { /* The packet was freed already. */ epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_OACTIVE; ifp->if_oerrors++; EPAIR_REFCOUNT_RELEASE(&sc->refcount); EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, ("%s: ifp=%p sc->refcount not >= 1: %d", __func__, oifp, sc->refcount)); } return (error); } static int epair_transmit(struct ifnet *ifp, struct mbuf *m) { struct epair_dpcpu *epair_dpcpu; int error; epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); EPAIR_LOCK(epair_dpcpu); error = epair_transmit_locked(ifp, m); EPAIR_UNLOCK(epair_dpcpu); return (error); } static void epair_qflush(struct ifnet *ifp) { struct epair_softc *sc; sc = ifp->if_softc; KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", __func__, ifp, sc)); /* * Remove this ifp from all backpointer lists. The interface will not * usable for flushing anyway nor should it have anything to flush * after if_qflush(). */ epair_remove_ifp_from_draining(ifp); if (sc->if_qflush) sc->if_qflush(ifp); } static int epair_media_change(struct ifnet *ifp __unused) { /* Do nothing. */ return (0); } static void epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) { imr->ifm_status = IFM_AVALID | IFM_ACTIVE; imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; } static int epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct epair_softc *sc; struct ifreq *ifr; int error; ifr = (struct ifreq *)data; switch (cmd) { case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: error = 0; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: sc = ifp->if_softc; error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); break; case SIOCSIFMTU: /* We basically allow all kinds of MTUs. */ ifp->if_mtu = ifr->ifr_mtu; error = 0; break; default: /* Let the common ethernet handler process this. */ error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void epair_init(void *dummy __unused) { } /* * Interface cloning functions. * We use our private ones so that we can create/destroy our secondary * device along with the primary one. */ static int epair_clone_match(struct if_clone *ifc, const char *name) { const char *cp; DPRINTF("name='%s'\n", name); /* * Our base name is epair. * Our interfaces will be named epair[ab]. * So accept anything of the following list: * - epair * - epair * but not the epair[ab] versions. */ if (strncmp(epairname, name, sizeof(epairname)-1) != 0) return (0); for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return (0); } return (1); } static int epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) { struct epair_softc *sca, *scb; struct ifnet *ifp; char *dp; int error, unit, wildcard; uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ /* * We are abusing params to create our second interface. * Actually we already created it and called if_clone_create() * for it to do the official insertion procedure the moment we knew * it cannot fail anymore. So just do attach it here. */ if (params) { scb = (struct epair_softc *)params; ifp = scb->ifp; /* Assign a hopefully unique, locally administered etheraddr. */ eaddr[0] = 0x02; eaddr[3] = (ifp->if_index >> 8) & 0xff; eaddr[4] = ifp->if_index & 0xff; eaddr[5] = 0x0b; ether_ifattach(ifp, eaddr); /* Correctly set the name for the cloner list. */ strlcpy(name, scb->ifp->if_xname, len); return (0); } /* Try to see if a special unit was requested. */ error = ifc_name2unit(name, &unit); if (error != 0) return (error); wildcard = (unit < 0); error = ifc_alloc_unit(ifc, &unit); if (error != 0) return (error); /* * If no unit had been given, we need to adjust the ifName. * Also make sure there is space for our extra [ab] suffix. */ for (dp = name; *dp != '\0'; dp++); if (wildcard) { error = snprintf(dp, len - (dp - name), "%d", unit); if (error > len - (dp - name) - 1) { /* ifName too long. */ ifc_free_unit(ifc, unit); return (ENOSPC); } dp += error; } if (len - (dp - name) - 1 < 1) { /* No space left for our [ab] suffix. */ ifc_free_unit(ifc, unit); return (ENOSPC); } *dp = 'a'; /* Must not change dp so we can replace 'a' by 'b' later. */ *(dp+1) = '\0'; /* Allocate memory for both [ab] interfaces */ sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); EPAIR_REFCOUNT_INIT(&sca->refcount, 1); sca->ifp = if_alloc(IFT_ETHER); if (sca->ifp == NULL) { free(sca, M_EPAIR); ifc_free_unit(ifc, unit); return (ENOSPC); } scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); EPAIR_REFCOUNT_INIT(&scb->refcount, 1); scb->ifp = if_alloc(IFT_ETHER); if (scb->ifp == NULL) { free(scb, M_EPAIR); if_free(sca->ifp); free(sca, M_EPAIR); ifc_free_unit(ifc, unit); return (ENOSPC); } /* * Cross-reference the interfaces so we will be able to free both. */ sca->oifp = scb->ifp; scb->oifp = sca->ifp; /* * Calculate the cpuid for netisr queueing based on the * ifIndex of the interfaces. As long as we cannot configure * this or use cpuset information easily we cannot guarantee * cache locality but we can at least allow parallelism. */ sca->cpuid = netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); scb->cpuid = netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); /* Finish initialization of interface a. */ ifp = sca->ifp; ifp->if_softc = sca; strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = epairname; ifp->if_dunit = unit; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_capabilities = IFCAP_VLAN_MTU; ifp->if_capenable = IFCAP_VLAN_MTU; ifp->if_start = epair_start; ifp->if_ioctl = epair_ioctl; ifp->if_init = epair_init; ifp->if_snd.ifq_maxlen = ifqmaxlen; /* Assign a hopefully unique, locally administered etheraddr. */ eaddr[0] = 0x02; eaddr[3] = (ifp->if_index >> 8) & 0xff; eaddr[4] = ifp->if_index & 0xff; eaddr[5] = 0x0a; ether_ifattach(ifp, eaddr); sca->if_qflush = ifp->if_qflush; ifp->if_qflush = epair_qflush; ifp->if_transmit = epair_transmit; - if_initbaudrate(ifp, IF_Gbps(10)); /* arbitrary maximum */ + ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ /* Swap the name and finish initialization of interface b. */ *dp = 'b'; ifp = scb->ifp; ifp->if_softc = scb; strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = epairname; ifp->if_dunit = unit; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_capabilities = IFCAP_VLAN_MTU; ifp->if_capenable = IFCAP_VLAN_MTU; ifp->if_start = epair_start; ifp->if_ioctl = epair_ioctl; ifp->if_init = epair_init; ifp->if_snd.ifq_maxlen = ifqmaxlen; /* We need to play some tricks here for the second interface. */ strlcpy(name, epairname, len); error = if_clone_create(name, len, (caddr_t)scb); if (error) panic("%s: if_clone_create() for our 2nd iface failed: %d", __func__, error); scb->if_qflush = ifp->if_qflush; ifp->if_qflush = epair_qflush; ifp->if_transmit = epair_transmit; - if_initbaudrate(ifp, IF_Gbps(10)); /* arbitrary maximum */ + ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ /* * Restore name to a as the ifp for this will go into the * cloner list for the initial call. */ strlcpy(name, sca->ifp->if_xname, len); DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); /* Initialise pseudo media types. */ ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); /* Tell the world, that we are ready to rock. */ sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(sca->ifp, LINK_STATE_UP); if_link_state_change(scb->ifp, LINK_STATE_UP); return (0); } static int epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) { struct ifnet *oifp; struct epair_softc *sca, *scb; int unit, error; DPRINTF("ifp=%p\n", ifp); /* * In case we called into if_clone_destroyif() ourselves * again to remove the second interface, the softc will be * NULL. In that case so not do anything but return success. */ if (ifp->if_softc == NULL) return (0); unit = ifp->if_dunit; sca = ifp->if_softc; oifp = sca->oifp; scb = oifp->if_softc; DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); if_link_state_change(ifp, LINK_STATE_DOWN); if_link_state_change(oifp, LINK_STATE_DOWN); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; oifp->if_drv_flags &= ~IFF_DRV_RUNNING; /* * Get rid of our second half. As the other of the two * interfaces may reside in a different vnet, we need to * switch before freeing them. */ CURVNET_SET_QUIET(oifp->if_vnet); ether_ifdetach(oifp); /* * Wait for all packets to be dispatched to if_input. * The numbers can only go down as the interface is * detached so there is no need to use atomics. */ DPRINTF("scb refcnt=%u\n", scb->refcount); EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); oifp->if_softc = NULL; error = if_clone_destroyif(ifc, oifp); if (error) panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", __func__, error); if_free(oifp); ifmedia_removeall(&scb->media); free(scb, M_EPAIR); CURVNET_RESTORE(); ether_ifdetach(ifp); /* * Wait for all packets to be dispatched to if_input. */ DPRINTF("sca refcnt=%u\n", sca->refcount); EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); if_free(ifp); ifmedia_removeall(&sca->media); free(sca, M_EPAIR); ifc_free_unit(ifc, unit); return (0); } static int epair_modevent(module_t mod, int type, void *data) { int qlimit; switch (type) { case MOD_LOAD: /* For now limit us to one global mutex and one inq. */ epair_dpcpu_init(); epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) epair_nh.nh_qlimit = qlimit; netisr_register(&epair_nh); epair_cloner = if_clone_advanced(epairname, 0, epair_clone_match, epair_clone_create, epair_clone_destroy); if (bootverbose) printf("%s initialized.\n", epairname); break; case MOD_UNLOAD: if_clone_detach(epair_cloner); netisr_unregister(&epair_nh); epair_dpcpu_detach(); if (bootverbose) printf("%s unloaded.\n", epairname); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t epair_mod = { "if_epair", epair_modevent, 0 }; DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_epair, 1); Index: head/sys/net/if_var.h =================================================================== --- head/sys/net/if_var.h (revision 263101) +++ head/sys/net/if_var.h (revision 263102) @@ -1,528 +1,515 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with three parameters: * (*ifp->if_output)(ifp, m, dst, rt) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of an internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating an interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ struct rtentry; /* ifa_rtrequest */ struct rt_addrinfo; /* ifa_rtrequest */ struct socket; struct carp_if; struct carp_softc; struct ifvlantrunk; struct route; /* if_output */ struct vnet; #ifdef _KERNEL #include /* ifqueue only? */ #include #include #endif /* _KERNEL */ #include #include /* XXX */ #include /* struct ifqueue */ #include /* XXX */ #include /* XXX */ #include /* if_link_task */ #define IF_DUNIT_NONE -1 #include TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ TAILQ_HEAD(ifmultihead, ifmultiaddr); TAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */ #define V_link_pfil_hook VNET(link_pfil_hook) #endif /* _KERNEL */ /* * Structure defining a network interface. * * Size ILP32: 592 (approx) * LP64: 1048 (approx) */ struct ifnet { /* General book keeping of interface lists. */ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */ /* protected by if_addr_lock */ u_char if_alloctype; /* if_type at time of allocation */ /* Driver and protocol specific information that remains stable. */ void *if_softc; /* pointer to driver state */ void *if_llsoftc; /* link layer softc */ void *if_l2com; /* pointer to protocol bits */ const char *if_dname; /* driver name */ int if_dunit; /* unit or IF_DUNIT_NONE */ u_short if_index; /* numeric abbreviation for this if */ short if_index_reserved; /* spare space to grow if_index */ char if_xname[IFNAMSIZ]; /* external name (name + unit) */ char *if_description; /* interface description */ /* Variable fields that are touched by the stack and drivers. */ int if_flags; /* up/down, broadcast, etc. */ int if_capabilities; /* interface features & capabilities */ int if_capenable; /* enabled features & capabilities */ void *if_linkmib; /* link-type-specific MIB data */ size_t if_linkmiblen; /* length of above data */ int if_drv_flags; /* driver-managed status flags */ u_int if_refcount; /* reference count */ struct ifaltq if_snd; /* output queue (includes altq) */ struct if_data if_data; /* type information and statistics */ struct task if_linktask; /* task for link change events */ /* Addresses of different protocol families assigned to this if. */ struct rwlock if_addr_lock; /* lock to protect address lists */ /* * if_addrhead is the list of all addresses associated to * an interface. * Some code in the kernel assumes that first element * of the list has type AF_LINK, and contains sockaddr_dl * addresses which store the link-level address and the name * of the interface. * However, access to the AF_LINK address through this * field is deprecated. Use if_addr or ifaddr_byindex() instead. */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ struct ifmultihead if_multiaddrs; /* multicast addresses configured */ int if_amcount; /* number of all-multicast requests */ struct ifaddr *if_addr; /* pointer to link-level address */ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */ struct rwlock if_afdata_lock; void *if_afdata[AF_MAX]; int if_afdata_initialized; /* Additional features hung off the interface. */ u_int if_fib; /* interface FIB */ struct vnet *if_vnet; /* pointer to network stack instance */ struct vnet *if_home_vnet; /* where this ifnet originates from */ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */ struct bpf_if *if_bpf; /* packet filter structure */ int if_pcount; /* number of promiscuous listeners */ void *if_bridge; /* bridge glue */ void *if_lagg; /* lagg glue */ void *if_pf_kif; /* pf glue */ struct carp_if *if_carp; /* carp interface structure */ struct label *if_label; /* interface MAC label */ /* Various procedures of the layer2 encapsulation and drivers. */ int (*if_output) /* output routine (enqueue) */ (struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); void (*if_input) /* input routine (from h/w driver) */ (struct ifnet *, struct mbuf *); void (*if_start) /* initiate output routine */ (struct ifnet *); int (*if_ioctl) /* ioctl routine */ (struct ifnet *, u_long, caddr_t); void (*if_init) /* Init routine */ (void *); int (*if_resolvemulti) /* validate/resolve multicast */ (struct ifnet *, struct sockaddr **, struct sockaddr *); void (*if_qflush) /* flush any queues */ (struct ifnet *); int (*if_transmit) /* initiate output routine */ (struct ifnet *, struct mbuf *); void (*if_reassign) /* reassign to vnet routine */ (struct ifnet *, struct vnet *, char *); /* Stuff that's only temporary and doesn't belong here. */ u_int if_hw_tsomax; /* tso burst length limit, the minimum * is (IP_MAXPACKET / 8). * XXXAO: Have to find a better place * for it eventually. */ /* * Spare fields are added so that we can modify sensitive data * structures without changing the kernel binary interface, and must * be used with care where binary compatibility is required. */ char if_cspare[3]; int if_ispare[4]; void *if_unused[2]; void *if_pspare[8]; /* 1 netmap, 7 TDB */ }; #include /* XXXAO: temporary unconditional include */ /* * XXX These aliases are terribly dangerous because they could apply * to anything. */ #define if_mtu if_data.ifi_mtu #define if_type if_data.ifi_type #define if_physical if_data.ifi_physical #define if_addrlen if_data.ifi_addrlen #define if_hdrlen if_data.ifi_hdrlen #define if_metric if_data.ifi_metric #define if_link_state if_data.ifi_link_state #define if_baudrate if_data.ifi_baudrate -#define if_baudrate_pf if_data.ifi_baudrate_pf #define if_hwassist if_data.ifi_hwassist #define if_ipackets if_data.ifi_ipackets #define if_ierrors if_data.ifi_ierrors #define if_opackets if_data.ifi_opackets #define if_oerrors if_data.ifi_oerrors #define if_collisions if_data.ifi_collisions #define if_ibytes if_data.ifi_ibytes #define if_obytes if_data.ifi_obytes #define if_imcasts if_data.ifi_imcasts #define if_omcasts if_data.ifi_omcasts #define if_iqdrops if_data.ifi_iqdrops #define if_noproto if_data.ifi_noproto #define if_lastchange if_data.ifi_lastchange /* for compatibility with other BSDs */ #define if_addrlist if_addrhead #define if_list if_link #define if_name(ifp) ((ifp)->if_xname) /* * Locks for address lists on the network interface. */ #define IF_ADDR_LOCK_INIT(if) rw_init(&(if)->if_addr_lock, "if_addr_lock") #define IF_ADDR_LOCK_DESTROY(if) rw_destroy(&(if)->if_addr_lock) #define IF_ADDR_WLOCK(if) rw_wlock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) rw_wunlock(&(if)->if_addr_lock) #define IF_ADDR_RLOCK(if) rw_rlock(&(if)->if_addr_lock) #define IF_ADDR_RUNLOCK(if) rw_runlock(&(if)->if_addr_lock) #define IF_ADDR_LOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_LOCKED) #define IF_ADDR_WLOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_WLOCKED) /* * Function variations on locking macros intended to be used by loadable * kernel modules in order to divorce them from the internals of address list * locking. */ void if_addr_rlock(struct ifnet *ifp); /* if_addrhead */ void if_addr_runlock(struct ifnet *ifp); /* if_addrhead */ void if_maddr_rlock(struct ifnet *ifp); /* if_multiaddrs */ void if_maddr_runlock(struct ifnet *ifp); /* if_multiaddrs */ #ifdef _KERNEL #ifdef _SYS_EVENTHANDLER_H_ /* interface link layer address change event */ typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t); /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); /* Interface link state change event */ typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int); EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); #endif /* _SYS_EVENTHANDLER_H_ */ /* * interface groups */ struct ifg_group { char ifg_group[IFNAMSIZ]; u_int ifg_refcnt; void *ifg_pf_kif; TAILQ_HEAD(, ifg_member) ifg_members; TAILQ_ENTRY(ifg_group) ifg_next; }; struct ifg_member { TAILQ_ENTRY(ifg_member) ifgm_next; struct ifnet *ifgm_ifp; }; struct ifg_list { struct ifg_group *ifgl_group; TAILQ_ENTRY(ifg_list) ifgl_next; }; #ifdef _SYS_EVENTHANDLER_H_ /* group attach event */ typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t); /* group detach event */ typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t); /* group change event */ typedef void (*group_change_event_handler_t)(void *, const char *); EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #endif /* _SYS_EVENTHANDLER_H_ */ #define IF_AFDATA_LOCK_INIT(ifp) \ rw_init(&(ifp)->if_afdata_lock, "if_afdata") #define IF_AFDATA_WLOCK(ifp) rw_wlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_RLOCK(ifp) rw_rlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_WUNLOCK(ifp) rw_wunlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_RUNLOCK(ifp) rw_runlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_TRYLOCK(ifp) rw_try_wlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_DESTROY(ifp) rw_destroy(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED) #define IF_AFDATA_RLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_RLOCKED) #define IF_AFDATA_WLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED) - -static __inline void -if_initbaudrate(struct ifnet *ifp, uintmax_t baud) -{ - - ifp->if_baudrate_pf = 0; - while (baud > (u_long)(~0UL)) { - baud /= 10; - ifp->if_baudrate_pf++; - } - ifp->if_baudrate = baud; -} /* * 72 was chosen below because it is the size of a TCP/IP * header (40) + the minimum mss (32). */ #define IF_MINMTU 72 #define IF_MAXMTU 65535 #define TOEDEV(ifp) ((ifp)->if_llsoftc) #endif /* _KERNEL */ /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. * * NOTE: a 'struct ifaddr' is always at the beginning of a larger * chunk of malloc'ed memory, where we store the three addresses * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here. */ #if defined(_KERNEL) || defined(_WANT_IFADDR) struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ struct ifnet *ifa_ifp; /* back-pointer to interface */ struct carp_softc *ifa_carp; /* pointer to CARP data */ TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ (int, struct rtentry *, struct rt_addrinfo *); u_short ifa_flags; /* mostly rt_flags for cloning */ u_int ifa_refcnt; /* references to this structure */ int ifa_metric; /* cost of going out this interface */ int (*ifa_claim_addr) /* check if an addr goes to this if */ (struct ifaddr *, struct sockaddr *); counter_u64_t ifa_ipackets; counter_u64_t ifa_opackets; counter_u64_t ifa_ibytes; counter_u64_t ifa_obytes; }; #endif #ifdef _KERNEL #define IFA_ROUTE RTF_UP /* route installed */ #define IFA_RTSELF RTF_HOST /* loopback route to self installed */ /* For compatibility with other BSDs. SCTP uses it. */ #define ifa_list ifa_link struct ifaddr * ifa_alloc(size_t size, int flags); void ifa_free(struct ifaddr *ifa); void ifa_ref(struct ifaddr *ifa); #endif /* _KERNEL */ /* * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. */ struct ifmultiaddr { TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ struct ifnet *ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ void *ifma_protospec; /* protocol-specific state, if any */ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */ }; #ifdef _KERNEL extern struct rwlock ifnet_rwlock; extern struct sx ifnet_sxlock; #define IFNET_LOCK_INIT() do { \ rw_init_flags(&ifnet_rwlock, "ifnet_rw", RW_RECURSE); \ sx_init_flags(&ifnet_sxlock, "ifnet_sx", SX_RECURSE); \ } while(0) #define IFNET_WLOCK() do { \ sx_xlock(&ifnet_sxlock); \ rw_wlock(&ifnet_rwlock); \ } while (0) #define IFNET_WUNLOCK() do { \ rw_wunlock(&ifnet_rwlock); \ sx_xunlock(&ifnet_sxlock); \ } while (0) /* * To assert the ifnet lock, you must know not only whether it's for read or * write, but also whether it was acquired with sleep support or not. */ #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_RLOCK_NOSLEEP_ASSERT() rw_assert(&ifnet_rwlock, RA_RLOCKED) #define IFNET_WLOCK_ASSERT() do { \ sx_assert(&ifnet_sxlock, SA_XLOCKED); \ rw_assert(&ifnet_rwlock, RA_WLOCKED); \ } while (0) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RLOCK_NOSLEEP() rw_rlock(&ifnet_rwlock) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) #define IFNET_RUNLOCK_NOSLEEP() rw_runlock(&ifnet_rwlock) /* * Look up an ifnet given its index; the _ref variant also acquires a * reference that must be freed using if_rele(). It is almost always a bug * to call ifnet_byindex() instead if ifnet_byindex_ref(). */ struct ifnet *ifnet_byindex(u_short idx); struct ifnet *ifnet_byindex_locked(u_short idx); struct ifnet *ifnet_byindex_ref(u_short idx); /* * Given the index, ifaddr_byindex() returns the one and only * link-level ifaddr for the interface. You are not supposed to use * it to traverse the list of addresses associated to the interface. */ struct ifaddr *ifaddr_byindex(u_short idx); VNET_DECLARE(struct ifnethead, ifnet); VNET_DECLARE(struct ifgrouphead, ifg_head); VNET_DECLARE(int, if_index); VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */ #define V_ifnet VNET(ifnet) #define V_ifg_head VNET(ifg_head) #define V_if_index VNET(if_index) #define V_loif VNET(loif) int if_addgroup(struct ifnet *, const char *); int if_delgroup(struct ifnet *, const char *); int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **); int if_allmulti(struct ifnet *, int); struct ifnet* if_alloc(u_char); void if_attach(struct ifnet *); void if_dead(struct ifnet *); int if_delmulti(struct ifnet *, struct sockaddr *); void if_delmulti_ifma(struct ifmultiaddr *); void if_detach(struct ifnet *); void if_vmove(struct ifnet *, struct vnet *); void if_purgeaddrs(struct ifnet *); void if_delallmulti(struct ifnet *); void if_down(struct ifnet *); struct ifmultiaddr * if_findmulti(struct ifnet *, struct sockaddr *); void if_free(struct ifnet *); void if_initname(struct ifnet *, const char *, int); void if_link_state_change(struct ifnet *, int); int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3); void if_ref(struct ifnet *); void if_rele(struct ifnet *); int if_setlladdr(struct ifnet *, const u_char *, int); void if_up(struct ifnet *); int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(struct ifnet *, int); struct ifnet *ifunit(const char *); struct ifnet *ifunit_ref(const char *); int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *); struct ifaddr *ifa_ifwithaddr(struct sockaddr *); int ifa_ifwithaddr_check(struct sockaddr *); struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *); struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *); struct ifaddr *ifa_ifwithnet(struct sockaddr *, int); struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *); struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int); struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *); int ifa_preferred(struct ifaddr *, struct ifaddr *); int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen); typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp); typedef void if_com_free_t(void *com, u_char type); void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f); void if_deregister_com_alloc(u_char type); #define IF_LLADDR(ifp) \ LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr)) #endif /* _KERNEL */ #endif /* !_NET_IF_VAR_H_ */ Index: head/sys/net/rtsock.c =================================================================== --- head/sys/net/rtsock.c (revision 263101) +++ head/sys/net/rtsock.c (revision 263102) @@ -1,2015 +1,1938 @@ /*- * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_mpath.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif #ifdef COMPAT_FREEBSD32 #include #include -struct if_data32 { - uint8_t ifi_type; - uint8_t ifi_physical; - uint8_t ifi_addrlen; - uint8_t ifi_hdrlen; - uint8_t ifi_link_state; - uint8_t ifi_vhid; - uint8_t ifi_baudrate_pf; - uint8_t ifi_datalen; - uint32_t ifi_mtu; - uint32_t ifi_metric; - uint32_t ifi_baudrate; - uint32_t ifi_ipackets; - uint32_t ifi_ierrors; - uint32_t ifi_opackets; - uint32_t ifi_oerrors; - uint32_t ifi_collisions; - uint32_t ifi_ibytes; - uint32_t ifi_obytes; - uint32_t ifi_imcasts; - uint32_t ifi_omcasts; - uint32_t ifi_iqdrops; - uint32_t ifi_noproto; - uint32_t ifi_hwassist; - int32_t ifi_epoch; - struct timeval32 ifi_lastchange; -}; - struct if_msghdr32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; - struct if_data32 ifm_data; + struct if_data ifm_data; }; struct if_msghdrl32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; uint16_t _ifm_spare1; uint16_t ifm_len; uint16_t ifm_data_off; - struct if_data32 ifm_data; + struct if_data ifm_data; }; struct ifa_msghdrl32 { uint16_t ifam_msglen; uint8_t ifam_version; uint8_t ifam_type; int32_t ifam_addrs; int32_t ifam_flags; uint16_t ifam_index; uint16_t _ifam_spare1; uint16_t ifam_len; uint16_t ifam_data_off; int32_t ifam_metric; - struct if_data32 ifam_data; + struct if_data ifam_data; }; #endif /* COMPAT_FREEBSD32 */ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; /* These are external hooks for CARP. */ int (*carp_get_vhid_p)(struct ifaddr *); /* * Used by rtsock/raw_input callback code to decide whether to filter the update * notification to a socket bound to a particular FIB. */ #define RTS_FILTER_FIB M_PROTO8 typedef struct { int ip_count; /* attached w/ AF_INET */ int ip6_count; /* attached w/ AF_INET6 */ int ipx_count; /* attached w/ AF_IPX */ int any_count; /* total attached */ } route_cb_t; static VNET_DEFINE(route_cb_t, route_cb); #define V_route_cb VNET(route_cb) struct mtx rtsock_mtx; MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); struct walkarg { int w_tmemsize; int w_op, w_arg; caddr_t w_tmem; struct sysctl_req *w_req; }; static void rts_input(struct mbuf *m); static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo); static int rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w); static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo); static int sysctl_dumpentry(struct radix_node *rn, void *vw); static int sysctl_iflist(int af, struct walkarg *w); static int sysctl_ifmalist(int af, struct walkarg *w); static int route_output(struct mbuf *m, struct socket *so); static void rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt); static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out); static void rt_dispatch(struct mbuf *, sa_family_t); static struct netisr_handler rtsock_nh = { .nh_name = "rtsock", .nh_handler = rts_input, .nh_proto = NETISR_ROUTE, .nh_policy = NETISR_POLICY_SOURCE, }; static int sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&rtsock_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&rtsock_nh, qlimit)); } SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_route_netisr_maxqlen, "I", "maximum routing socket dispatch queue length"); static void rts_init(void) { int tmp; if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp)) rtsock_nh.nh_qlimit = tmp; netisr_register(&rtsock_nh); } SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0); static int raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src, struct rawcb *rp) { int fibnum; KASSERT(m != NULL, ("%s: m is NULL", __func__)); KASSERT(proto != NULL, ("%s: proto is NULL", __func__)); KASSERT(rp != NULL, ("%s: rp is NULL", __func__)); /* No filtering requested. */ if ((m->m_flags & RTS_FILTER_FIB) == 0) return (0); /* Check if it is a rts and the fib matches the one of the socket. */ fibnum = M_GETFIB(m); if (proto->sp_family != PF_ROUTE || rp->rcb_socket == NULL || rp->rcb_socket->so_fibnum == fibnum) return (0); /* Filtering requested and no match, the socket shall be skipped. */ return (1); } static void rts_input(struct mbuf *m) { struct sockproto route_proto; unsigned short *family; struct m_tag *tag; route_proto.sp_family = PF_ROUTE; tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL); if (tag != NULL) { family = (unsigned short *)(tag + 1); route_proto.sp_protocol = *family; m_tag_delete(m, tag); } else route_proto.sp_protocol = 0; raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb); } /* * It really doesn't make any sense at all for this code to share much * with raw_usrreq.c, since its functionality is so restricted. XXX */ static void rts_abort(struct socket *so) { raw_usrreqs.pru_abort(so); } static void rts_close(struct socket *so) { raw_usrreqs.pru_close(so); } /* pru_accept is EOPNOTSUPP */ static int rts_attach(struct socket *so, int proto, struct thread *td) { struct rawcb *rp; int error; KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL")); /* XXX */ rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO); if (rp == NULL) return ENOBUFS; so->so_pcb = (caddr_t)rp; so->so_fibnum = td->td_proc->p_fibnum; error = raw_attach(so, proto); rp = sotorawcb(so); if (error) { so->so_pcb = NULL; free(rp, M_PCB); return error; } RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: V_route_cb.ip_count++; break; case AF_INET6: V_route_cb.ip6_count++; break; case AF_IPX: V_route_cb.ipx_count++; break; } V_route_cb.any_count++; RTSOCK_UNLOCK(); soisconnected(so); so->so_options |= SO_USELOOPBACK; return 0; } static int rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */ } static int rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */ } /* pru_connect2 is EOPNOTSUPP */ /* pru_control is EOPNOTSUPP */ static void rts_detach(struct socket *so) { struct rawcb *rp = sotorawcb(so); KASSERT(rp != NULL, ("rts_detach: rp == NULL")); RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: V_route_cb.ip_count--; break; case AF_INET6: V_route_cb.ip6_count--; break; case AF_IPX: V_route_cb.ipx_count--; break; } V_route_cb.any_count--; RTSOCK_UNLOCK(); raw_usrreqs.pru_detach(so); } static int rts_disconnect(struct socket *so) { return (raw_usrreqs.pru_disconnect(so)); } /* pru_listen is EOPNOTSUPP */ static int rts_peeraddr(struct socket *so, struct sockaddr **nam) { return (raw_usrreqs.pru_peeraddr(so, nam)); } /* pru_rcvd is EOPNOTSUPP */ /* pru_rcvoob is EOPNOTSUPP */ static int rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { return (raw_usrreqs.pru_send(so, flags, m, nam, control, td)); } /* pru_sense is null */ static int rts_shutdown(struct socket *so) { return (raw_usrreqs.pru_shutdown(so)); } static int rts_sockaddr(struct socket *so, struct sockaddr **nam) { return (raw_usrreqs.pru_sockaddr(so, nam)); } static struct pr_usrreqs route_usrreqs = { .pru_abort = rts_abort, .pru_attach = rts_attach, .pru_bind = rts_bind, .pru_connect = rts_connect, .pru_detach = rts_detach, .pru_disconnect = rts_disconnect, .pru_peeraddr = rts_peeraddr, .pru_send = rts_send, .pru_shutdown = rts_shutdown, .pru_sockaddr = rts_sockaddr, .pru_close = rts_close, }; #ifndef _SOCKADDR_UNION_DEFINED #define _SOCKADDR_UNION_DEFINED /* * The union of all possible address formats we handle. */ union sockaddr_union { struct sockaddr sa; struct sockaddr_in sin; struct sockaddr_in6 sin6; }; #endif /* _SOCKADDR_UNION_DEFINED */ static int rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred) { /* First, see if the returned address is part of the jail. */ if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) { info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; return (0); } switch (info->rti_info[RTAX_DST]->sa_family) { #ifdef INET case AF_INET: { struct in_addr ia; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; ia = ((struct sockaddr_in *)sa)->sin_addr; if (prison_check_ip4(cred, &ia) == 0) { found = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (!found) { /* * As a last resort return the 'default' jail address. */ ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)-> sin_addr; if (prison_get_ip4(cred, &ia) != 0) return (ESRCH); } bzero(&saun->sin, sizeof(struct sockaddr_in)); saun->sin.sin_len = sizeof(struct sockaddr_in); saun->sin.sin_family = AF_INET; saun->sin.sin_addr.s_addr = ia.s_addr; info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin; break; } #endif #ifdef INET6 case AF_INET6: { struct in6_addr ia6; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET6) continue; bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr, &ia6, sizeof(struct in6_addr)); if (prison_check_ip6(cred, &ia6) == 0) { found = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (!found) { /* * As a last resort return the 'default' jail address. */ ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)-> sin6_addr; if (prison_get_ip6(cred, &ia6) != 0) return (ESRCH); } bzero(&saun->sin6, sizeof(struct sockaddr_in6)); saun->sin6.sin6_len = sizeof(struct sockaddr_in6); saun->sin6.sin6_family = AF_INET6; bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr)); if (sa6_recoverscope(&saun->sin6) != 0) return (ESRCH); info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6; break; } #endif default: return (ESRCH); } return (0); } /*ARGSUSED*/ static int route_output(struct mbuf *m, struct socket *so) { #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct radix_node_head *rnh; struct rt_addrinfo info; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; int i, rti_need_deembed = 0; #endif int len, error = 0; struct ifnet *ifp = NULL; union sockaddr_union saun; sa_family_t saf = AF_UNSPEC; #define senderr(e) { error = e; goto flush;} if (m == NULL || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == NULL)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); len = m->m_pkthdr.len; if (len < sizeof(*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) { info.rti_info[RTAX_DST] = NULL; senderr(EINVAL); } R_Malloc(rtm, struct rt_msghdr *, len); if (rtm == NULL) { info.rti_info[RTAX_DST] = NULL; senderr(ENOBUFS); } m_copydata(m, 0, len, (caddr_t)rtm); if (rtm->rtm_version != RTM_VERSION) { info.rti_info[RTAX_DST] = NULL; senderr(EPROTONOSUPPORT); } rtm->rtm_pid = curproc->p_pid; bzero(&info, sizeof(info)); info.rti_addrs = rtm->rtm_addrs; /* * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6 * link-local address because rtrequest requires addresses with * embedded scope id. */ if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) { info.rti_info[RTAX_DST] = NULL; senderr(EINVAL); } info.rti_flags = rtm->rtm_flags; if (info.rti_info[RTAX_DST] == NULL || info.rti_info[RTAX_DST]->sa_family >= AF_MAX || (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) senderr(EINVAL); saf = info.rti_info[RTAX_DST]->sa_family; /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. */ if (rtm->rtm_type != RTM_GET) { error = priv_check(curthread, PRIV_NET_ROUTE); if (error) senderr(error); } /* * The given gateway address may be an interface address. * For example, issuing a "route change" command on a route * entry that was created from a tunnel, and the gateway * address given is the local end point. In this case the * RTF_GATEWAY flag must be cleared or the destination will * not be reachable even though there is no error message. */ if (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) { struct route gw_ro; bzero(&gw_ro, sizeof(gw_ro)); gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY]; rtalloc_ign_fib(&gw_ro, 0, so->so_fibnum); /* * A host route through the loopback interface is * installed for each interface adddress. In pre 8.0 * releases the interface address of a PPP link type * is not reachable locally. This behavior is fixed as * part of the new L2/L3 redesign and rewrite work. The * signature of this interface address route is the * AF_LINK sa_family type of the rt_gateway, and the * rt_ifp has the IFF_LOOPBACK flag set. */ if (gw_ro.ro_rt != NULL && gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK && gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) { info.rti_flags &= ~RTF_GATEWAY; info.rti_flags |= RTF_GWFLAG_COMPAT; } if (gw_ro.ro_rt != NULL) RTFREE(gw_ro.ro_rt); } switch (rtm->rtm_type) { struct rtentry *saved_nrt; case RTM_ADD: if (info.rti_info[RTAX_GATEWAY] == NULL) senderr(EINVAL); saved_nrt = NULL; /* support for new ARP code */ if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && (rtm->rtm_flags & RTF_LLDATA) != 0) { error = lla_rt_output(rtm, &info); #ifdef INET6 if (error == 0) rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; } error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt, so->so_fibnum); if (error == 0 && saved_nrt) { #ifdef INET6 rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif RT_LOCK(saved_nrt); rt_setmetrics(rtm, saved_nrt); rtm->rtm_index = saved_nrt->rt_ifp->if_index; RT_REMREF(saved_nrt); RT_UNLOCK(saved_nrt); } break; case RTM_DELETE: saved_nrt = NULL; /* support for new ARP code */ if (info.rti_info[RTAX_GATEWAY] && (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) && (rtm->rtm_flags & RTF_LLDATA) != 0) { error = lla_rt_output(rtm, &info); #ifdef INET6 if (error == 0) rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; } error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, so->so_fibnum); if (error == 0) { RT_LOCK(saved_nrt); rt = saved_nrt; goto report; } #ifdef INET6 /* rt_msg2() will not be used when RTM_DELETE fails. */ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; case RTM_GET: case RTM_CHANGE: case RTM_LOCK: rnh = rt_tables_get_rnh(so->so_fibnum, info.rti_info[RTAX_DST]->sa_family); if (rnh == NULL) senderr(EAFNOSUPPORT); RADIX_NODE_HEAD_RLOCK(rnh); if (info.rti_info[RTAX_NETMASK] == NULL && rtm->rtm_type == RTM_GET) { /* * Provide logest prefix match for * address lookup (no mask). * 'route -n get addr' */ rt = (struct rtentry *) rnh->rnh_matchaddr( info.rti_info[RTAX_DST], rnh); } else rt = (struct rtentry *) rnh->rnh_lookup( info.rti_info[RTAX_DST], info.rti_info[RTAX_NETMASK], rnh); if (rt == NULL) { RADIX_NODE_HEAD_RUNLOCK(rnh); senderr(ESRCH); } #ifdef RADIX_MPATH /* * for RTM_CHANGE/LOCK, if we got multipath routes, * we require users to specify a matching RTAX_GATEWAY. * * for RTM_GET, gate is optional even with multipath. * if gate == NULL the first match is returned. * (no need to call rt_mpath_matchgate if gate == NULL) */ if (rn_mpath_capable(rnh) && (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) { rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]); if (!rt) { RADIX_NODE_HEAD_RUNLOCK(rnh); senderr(ESRCH); } } #endif /* * If performing proxied L2 entry insertion, and * the actual PPP host entry is found, perform * another search to retrieve the prefix route of * the local end point of the PPP link. */ if (rtm->rtm_flags & RTF_ANNOUNCE) { struct sockaddr laddr; if (rt->rt_ifp != NULL && rt->rt_ifp->if_type == IFT_PROPVIRTUAL) { struct ifaddr *ifa; ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1); if (ifa != NULL) rt_maskedcopy(ifa->ifa_addr, &laddr, ifa->ifa_netmask); } else rt_maskedcopy(rt->rt_ifa->ifa_addr, &laddr, rt->rt_ifa->ifa_netmask); /* * refactor rt and no lock operation necessary */ rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh); if (rt == NULL) { RADIX_NODE_HEAD_RUNLOCK(rnh); senderr(ESRCH); } } RT_LOCK(rt); RT_ADDREF(rt); RADIX_NODE_HEAD_RUNLOCK(rnh); switch(rtm->rtm_type) { case RTM_GET: report: RT_LOCK_ASSERT(rt); if ((rt->rt_flags & RTF_HOST) == 0 ? jailed_without_vnet(curthread->td_ucred) : prison_if(curthread->td_ucred, rt_key(rt)) != 0) { RT_UNLOCK(rt); senderr(ESRCH); } info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = 0; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { ifp = rt->rt_ifp; if (ifp) { info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; error = rtm_get_jailed(&info, ifp, rt, &saun, curthread->td_ucred); if (error != 0) { RT_UNLOCK(rt); senderr(error); } if (ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; rtm->rtm_index = ifp->if_index; } else { info.rti_info[RTAX_IFP] = NULL; info.rti_info[RTAX_IFA] = NULL; } } else if ((ifp = rt->rt_ifp) != NULL) { rtm->rtm_index = ifp->if_index; } len = rt_msg2(rtm->rtm_type, &info, NULL, NULL); if (len > rtm->rtm_msglen) { struct rt_msghdr *new_rtm; R_Malloc(new_rtm, struct rt_msghdr *, len); if (new_rtm == NULL) { RT_UNLOCK(rt); senderr(ENOBUFS); } bcopy(rtm, new_rtm, rtm->rtm_msglen); Free(rtm); rtm = new_rtm; } (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL); if (rt->rt_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; rt_getmetrics(rt, &rtm->rtm_rmx); rtm->rtm_addrs = info.rti_addrs; break; case RTM_CHANGE: /* * New gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous */ if (((rt->rt_flags & RTF_GATEWAY) && info.rti_info[RTAX_GATEWAY] != NULL) || info.rti_info[RTAX_IFP] != NULL || (info.rti_info[RTAX_IFA] != NULL && !sa_equal(info.rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) { RT_UNLOCK(rt); RADIX_NODE_HEAD_LOCK(rnh); error = rt_getifa_fib(&info, rt->rt_fibnum); /* * XXXRW: Really we should release this * reference later, but this maintains * historical behavior. */ if (info.rti_ifa != NULL) ifa_free(info.rti_ifa); RADIX_NODE_HEAD_UNLOCK(rnh); if (error != 0) senderr(error); RT_LOCK(rt); } if (info.rti_ifa != NULL && info.rti_ifa != rt->rt_ifa && rt->rt_ifa != NULL && rt->rt_ifa->ifa_rtrequest != NULL) { rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, &info); ifa_free(rt->rt_ifa); } if (info.rti_info[RTAX_GATEWAY] != NULL) { RT_UNLOCK(rt); RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); error = rt_setgate(rt, rt_key(rt), info.rti_info[RTAX_GATEWAY]); RADIX_NODE_HEAD_UNLOCK(rnh); if (error != 0) { RT_UNLOCK(rt); senderr(error); } rt->rt_flags &= ~RTF_GATEWAY; rt->rt_flags |= (RTF_GATEWAY & info.rti_flags); } if (info.rti_ifa != NULL && info.rti_ifa != rt->rt_ifa) { ifa_ref(info.rti_ifa); rt->rt_ifa = info.rti_ifa; rt->rt_ifp = info.rti_ifp; } /* Allow some flags to be toggled on change. */ rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) | (rtm->rtm_flags & RTF_FMASK); rt_setmetrics(rtm, rt); rtm->rtm_index = rt->rt_ifp->if_index; if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info); /* FALLTHROUGH */ case RTM_LOCK: /* We don't support locks anymore */ break; } RT_UNLOCK(rt); break; default: senderr(EOPNOTSUPP); } flush: if (rtm) { if (error) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; } if (rt) /* XXX can this be true? */ RTFREE(rt); { struct rawcb *rp = NULL; /* * Check to see if we don't want our own messages. */ if ((so->so_options & SO_USELOOPBACK) == 0) { if (V_route_cb.any_count <= 1) { if (rtm) Free(rtm); m_freem(m); return (error); } /* There is another listener, so construct message */ rp = sotorawcb(so); } if (rtm) { #ifdef INET6 if (rti_need_deembed) { /* sin6_scope_id is recovered before sending rtm. */ sin6 = (struct sockaddr_in6 *)&ss; for (i = 0; i < RTAX_MAX; i++) { if (info.rti_info[i] == NULL) continue; if (info.rti_info[i]->sa_family != AF_INET6) continue; bcopy(info.rti_info[i], sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) bcopy(sin6, info.rti_info[i], sizeof(*sin6)); } } #endif m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); if (m->m_pkthdr.len < rtm->rtm_msglen) { m_freem(m); m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); } if (m) { M_SETFIB(m, so->so_fibnum); m->m_flags |= RTS_FILTER_FIB; if (rp) { /* * XXX insure we don't get a copy by * invalidating our protocol */ unsigned short family = rp->rcb_proto.sp_family; rp->rcb_proto.sp_family = 0; rt_dispatch(m, saf); rp->rcb_proto.sp_family = family; } else rt_dispatch(m, saf); } /* info.rti_info[RTAX_DST] (used above) can point inside of rtm */ if (rtm) Free(rtm); } return (error); #undef sa_equal } static void rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt) { if (rtm->rtm_inits & RTV_MTU) rt->rt_mtu = rtm->rtm_rmx.rmx_mtu; if (rtm->rtm_inits & RTV_WEIGHT) rt->rt_weight = rtm->rtm_rmx.rmx_weight; /* Kernel -> userland timebase conversion. */ if (rtm->rtm_inits & RTV_EXPIRE) rt->rt_expire = rtm->rtm_rmx.rmx_expire ? rtm->rtm_rmx.rmx_expire - time_second + time_uptime : 0; } static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out) { bzero(out, sizeof(*out)); out->rmx_mtu = rt->rt_mtu; out->rmx_weight = rt->rt_weight; out->rmx_pksent = counter_u64_fetch(rt->rt_pksent); /* Kernel -> userland timebase conversion. */ out->rmx_expire = rt->rt_expire ? rt->rt_expire - time_uptime + time_second : 0; } /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. * This data is derived straight from userland. */ static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) { struct sockaddr *sa; int i; for (i = 0; i < RTAX_MAX && cp < cplim; i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; sa = (struct sockaddr *)cp; /* * It won't fit. */ if (cp + sa->sa_len > cplim) return (EINVAL); /* * there are no more.. quit now * If there are more bits, they are in error. * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * for compatibility, If we see this, point to a safe address. */ if (sa->sa_len == 0) { rtinfo->rti_info[i] = &sa_zero; return (0); /* should be EINVAL but for compat */ } /* accept it */ #ifdef INET6 if (sa->sa_family == AF_INET6) sa6_embedscope((struct sockaddr_in6 *)sa, V_ip6_use_defzone); #endif rtinfo->rti_info[i] = sa; cp += SA_SIZE(sa); } return (0); } /* * Used by the routing socket. */ static struct mbuf * rt_msg1(int type, struct rt_addrinfo *rtinfo) { struct rt_msghdr *rtm; struct mbuf *m; int i; struct sockaddr *sa; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; #endif int len, dlen; switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; case RTM_IFANNOUNCE: case RTM_IEEE80211: len = sizeof(struct if_announcemsghdr); break; default: len = sizeof(struct rt_msghdr); } /* XXXGL: can we use MJUMPAGESIZE cluster here? */ KASSERT(len <= MCLBYTES, ("%s: message too big", __func__)); if (len > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (m); m->m_pkthdr.len = m->m_len = len; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); #ifdef INET6 if (V_deembed_scopeid && sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)&ss; bcopy(sa, sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) sa = (struct sockaddr *)sin6; } #endif m_copyback(m, len, dlen, (caddr_t)sa); len += dlen; } if (m->m_pkthdr.len != len) { m_freem(m); return (NULL); } rtm->rtm_msglen = len; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } /* * Used by the sysctl code and routing socket. */ static int rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) { int i; int len, dlen, second_time = 0; caddr_t cp0; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; #endif rtinfo->rti_addrs = 0; again: switch (type) { case RTM_DELADDR: case RTM_NEWADDR: if (w != NULL && w->w_op == NET_RT_IFLISTL) { #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) len = sizeof(struct ifa_msghdrl32); else #endif len = sizeof(struct ifa_msghdrl); } else len = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: #ifdef COMPAT_FREEBSD32 if (w != NULL && w->w_req->flags & SCTL_MASK32) { if (w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl32); else len = sizeof(struct if_msghdr32); break; } #endif if (w != NULL && w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl); else len = sizeof(struct if_msghdr); break; case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; default: len = sizeof(struct rt_msghdr); } cp0 = cp; if (cp0) cp += len; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); if (cp) { #ifdef INET6 if (V_deembed_scopeid && sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)&ss; bcopy(sa, sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) sa = (struct sockaddr *)sin6; } #endif bcopy((caddr_t)sa, cp, (unsigned)dlen); cp += dlen; } len += dlen; } len = ALIGN(len); if (cp == NULL && w != NULL && !second_time) { struct walkarg *rw = w; if (rw->w_req) { if (rw->w_tmemsize < len) { if (rw->w_tmem) free(rw->w_tmem, M_RTABLE); rw->w_tmem = (caddr_t) malloc(len, M_RTABLE, M_NOWAIT); if (rw->w_tmem) rw->w_tmemsize = len; } if (rw->w_tmem) { cp = rw->w_tmem; second_time = 1; goto again; } } } if (cp) { struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = len; } return (len); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occured, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error, int fibnum) { struct rt_msghdr *rtm; struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; if (V_route_cb.any_count == 0) return; m = rt_msg1(type, rtinfo); if (m == NULL) return; if (fibnum != RT_ALL_FIBS) { KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out " "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs)); M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo->rti_addrs; rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); } void rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) { rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ void rt_ifmsg(struct ifnet *ifp) { struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; if (V_route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); m = rt_msg1(RTM_IFINFO, &info); if (m == NULL) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = 0; rt_dispatch(m, AF_UNSPEC); } /* * Announce interface address arrival/withdraw. * Please do not call directly, use rt_addrmsg(). * Assume input data to be valid. * Returns 0 on success. */ int rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { struct rt_addrinfo info; struct sockaddr *sa; int ncmd; struct mbuf *m; struct ifa_msghdr *ifam; struct ifnet *ifp = ifa->ifa_ifp; if (V_route_cb.any_count == 0) return (0); ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; if ((m = rt_msg1(ncmd, &info)) == NULL) return (ENOBUFS); ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = info.rti_addrs; if (fibnum != RT_ALL_FIBS) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); return (0); } /* * Announce route addition/removal. * Please do not call directly, use rt_routemsg(). * Note that @rt data MAY be inconsistent/invalid: * if some userland app sends us "invalid" route message (invalid mask, * no dst, wrong address families, etc...) we need to pass it back * to app (and any other rtsock consumers) with rtm_errno field set to * non-zero value. * * Returns 0 on success. */ int rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt, int fibnum) { struct rt_addrinfo info; struct sockaddr *sa; struct mbuf *m; struct rt_msghdr *rtm; if (V_route_cb.any_count == 0) return (0); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_DST] = sa = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; if ((m = rt_msg1(cmd, &info)) == NULL) return (ENOBUFS); rtm = mtod(m, struct rt_msghdr *); rtm->rtm_index = ifp->if_index; rtm->rtm_flags |= rt->rt_flags; rtm->rtm_errno = error; rtm->rtm_addrs = info.rti_addrs; if (fibnum != RT_ALL_FIBS) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); return (0); } /* * This is the analogue to the rt_newaddrmsg which performs the same * function but for multicast group memberhips. This is easier since * there is no route state to worry about. */ void rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) { struct rt_addrinfo info; struct mbuf *m = NULL; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; if (V_route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL; /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; m = rt_msg1(cmd, &info); if (m == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n", __func__)); ifmam->ifmam_index = ifp->if_index; ifmam->ifmam_addrs = info.rti_addrs; rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC); } static struct mbuf * rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, struct rt_addrinfo *info) { struct if_announcemsghdr *ifan; struct mbuf *m; if (V_route_cb.any_count == 0) return NULL; bzero((caddr_t)info, sizeof(*info)); m = rt_msg1(type, info); if (m != NULL) { ifan = mtod(m, struct if_announcemsghdr *); ifan->ifan_index = ifp->if_index; strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); ifan->ifan_what = what; } return m; } /* * This is called to generate routing socket messages indicating * IEEE80211 wireless events. * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. */ void rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); if (m != NULL) { /* * Append the ieee80211 data. Try to stick it in the * mbuf containing the ifannounce msg; otherwise allocate * a new mbuf and append. * * NB: we assume m is a single mbuf. */ if (data_len > M_TRAILINGSPACE(m)) { struct mbuf *n = m_get(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } bcopy(data, mtod(n, void *), data_len); n->m_len = data_len; m->m_next = n; } else if (data_len > 0) { bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); m->m_len += data_len; } if (m->m_flags & M_PKTHDR) m->m_pkthdr.len += data_len; mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; rt_dispatch(m, AF_UNSPEC); } } /* * This is called to generate routing socket messages indicating * network interface arrival and departure. */ void rt_ifannouncemsg(struct ifnet *ifp, int what) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info); if (m != NULL) rt_dispatch(m, AF_UNSPEC); } static void rt_dispatch(struct mbuf *m, sa_family_t saf) { struct m_tag *tag; /* * Preserve the family from the sockaddr, if any, in an m_tag for * use when injecting the mbuf into the routing socket buffer from * the netisr. */ if (saf != AF_UNSPEC) { tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short), M_NOWAIT); if (tag == NULL) { m_freem(m); return; } *(unsigned short *)(tag + 1) = saf; m_tag_prepend(m, tag); } #ifdef VIMAGE if (V_loif) m->m_pkthdr.rcvif = V_loif; else { m_freem(m); return; } #endif netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */ } /* * This is used in dumping the kernel table via sysctl(). */ static int sysctl_dumpentry(struct radix_node *rn, void *vw) { struct walkarg *w = vw; struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; if ((rt->rt_flags & RTF_HOST) == 0 ? jailed_without_vnet(w->w_req->td->td_ucred) : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0) return (0); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = 0; if (rt->rt_ifp) { info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; } size = rt_msg2(RTM_GET, &info, NULL, w); if (w->w_req && w->w_tmem) { struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; if (rt->rt_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; rt_getmetrics(rt, &rtm->rtm_rmx); rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); return (error); } return (error); } -#ifdef COMPAT_FREEBSD32 -static void -copy_ifdata32(struct if_data *src, struct if_data32 *dst) -{ - - bzero(dst, sizeof(*dst)); - CP(*src, *dst, ifi_type); - CP(*src, *dst, ifi_physical); - CP(*src, *dst, ifi_addrlen); - CP(*src, *dst, ifi_hdrlen); - CP(*src, *dst, ifi_link_state); - CP(*src, *dst, ifi_vhid); - CP(*src, *dst, ifi_baudrate_pf); - dst->ifi_datalen = sizeof(struct if_data32); - CP(*src, *dst, ifi_mtu); - CP(*src, *dst, ifi_metric); - CP(*src, *dst, ifi_baudrate); - CP(*src, *dst, ifi_ipackets); - CP(*src, *dst, ifi_ierrors); - CP(*src, *dst, ifi_opackets); - CP(*src, *dst, ifi_oerrors); - CP(*src, *dst, ifi_collisions); - CP(*src, *dst, ifi_ibytes); - CP(*src, *dst, ifi_obytes); - CP(*src, *dst, ifi_imcasts); - CP(*src, *dst, ifi_omcasts); - CP(*src, *dst, ifi_iqdrops); - CP(*src, *dst, ifi_noproto); - CP(*src, *dst, ifi_hwassist); - CP(*src, *dst, ifi_epoch); - TV_CP(*src, *dst, ifi_lastchange); -} -#endif - static int sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdrl *ifm; + struct if_data *ifd; + ifm = (struct if_msghdrl *)w->w_tmem; + #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdrl32 *ifm32; - ifm32 = (struct if_msghdrl32 *)w->w_tmem; + ifm32 = (struct if_msghdrl32 *)ifm; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; ifm32->_ifm_spare1 = 0; ifm32->ifm_len = sizeof(*ifm32); ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data); - - copy_ifdata32(&ifp->if_data, &ifm32->ifm_data); - /* Fixup if_data carp(4) vhid. */ - if (carp_get_vhid_p != NULL) - ifm32->ifm_data.ifi_vhid = - (*carp_get_vhid_p)(ifp->if_addr); - - return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len)); - } + ifd = &ifm32->ifm_data; + } else #endif - ifm = (struct if_msghdrl *)w->w_tmem; - ifm->ifm_addrs = info->rti_addrs; - ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; - ifm->ifm_index = ifp->if_index; - ifm->_ifm_spare1 = 0; - ifm->ifm_len = sizeof(*ifm); - ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data); + { + ifm->ifm_addrs = info->rti_addrs; + ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; + ifm->ifm_index = ifp->if_index; + ifm->_ifm_spare1 = 0; + ifm->ifm_len = sizeof(*ifm); + ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data); + ifd = &ifm->ifm_data; + } - ifm->ifm_data = ifp->if_data; + *ifd = ifp->if_data; + /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) - ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr); + ifd->ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdr *ifm; + struct if_data *ifd; + ifm = (struct if_msghdr *)w->w_tmem; + #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdr32 *ifm32; - ifm32 = (struct if_msghdr32 *)w->w_tmem; + ifm32 = (struct if_msghdr32 *)ifm; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; - - copy_ifdata32(&ifp->if_data, &ifm32->ifm_data); - /* Fixup if_data carp(4) vhid. */ - if (carp_get_vhid_p != NULL) - ifm32->ifm_data.ifi_vhid = - (*carp_get_vhid_p)(ifp->if_addr); - - return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len)); - } + ifd = &ifm32->ifm_data; + } else #endif - ifm = (struct if_msghdr *)w->w_tmem; - ifm->ifm_addrs = info->rti_addrs; - ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; - ifm->ifm_index = ifp->if_index; + { + ifm->ifm_addrs = info->rti_addrs; + ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; + ifm->ifm_index = ifp->if_index; + ifd = &ifm->ifm_data; + } - ifm->ifm_data = ifp->if_data; + *ifd = ifp->if_data; /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) - ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr); + ifd->ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdrl *ifam; + struct if_data *ifd; + ifam = (struct ifa_msghdrl *)w->w_tmem; + #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct ifa_msghdrl32 *ifam32; - ifam32 = (struct ifa_msghdrl32 *)w->w_tmem; + ifam32 = (struct ifa_msghdrl32 *)ifam; ifam32->ifam_addrs = info->rti_addrs; ifam32->ifam_flags = ifa->ifa_flags; ifam32->ifam_index = ifa->ifa_ifp->if_index; ifam32->_ifam_spare1 = 0; ifam32->ifam_len = sizeof(*ifam32); ifam32->ifam_data_off = offsetof(struct ifa_msghdrl32, ifam_data); ifam32->ifam_metric = ifa->ifa_metric; - - bzero(&ifam32->ifam_data, sizeof(ifam32->ifam_data)); - ifam32->ifam_data.ifi_datalen = sizeof(struct if_data32); - ifam32->ifam_data.ifi_ipackets = - counter_u64_fetch(ifa->ifa_ipackets); - ifam32->ifam_data.ifi_opackets = - counter_u64_fetch(ifa->ifa_opackets); - ifam32->ifam_data.ifi_ibytes = - counter_u64_fetch(ifa->ifa_ibytes); - ifam32->ifam_data.ifi_obytes = - counter_u64_fetch(ifa->ifa_obytes); - - /* Fixup if_data carp(4) vhid. */ - if (carp_get_vhid_p != NULL) - ifam32->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa); - - return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len)); - } + ifd = &ifam32->ifam_data; + } else #endif + { + ifam->ifam_addrs = info->rti_addrs; + ifam->ifam_flags = ifa->ifa_flags; + ifam->ifam_index = ifa->ifa_ifp->if_index; + ifam->_ifam_spare1 = 0; + ifam->ifam_len = sizeof(*ifam); + ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data); + ifam->ifam_metric = ifa->ifa_metric; + ifd = &ifam->ifam_data; + } - ifam = (struct ifa_msghdrl *)w->w_tmem; - ifam->ifam_addrs = info->rti_addrs; - ifam->ifam_flags = ifa->ifa_flags; - ifam->ifam_index = ifa->ifa_ifp->if_index; - ifam->_ifam_spare1 = 0; - ifam->ifam_len = sizeof(*ifam); - ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data); - ifam->ifam_metric = ifa->ifa_metric; + bzero(ifd, sizeof(*ifd)); + ifd->ifi_datalen = sizeof(struct if_data); + ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets); + ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets); + ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes); + ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes); - bzero(&ifam->ifam_data, sizeof(ifam->ifam_data)); - ifam->ifam_data.ifi_datalen = sizeof(struct if_data); - ifam->ifam_data.ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets); - ifam->ifam_data.ifi_opackets = counter_u64_fetch(ifa->ifa_opackets); - ifam->ifam_data.ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes); - ifam->ifam_data.ifi_obytes = counter_u64_fetch(ifa->ifa_obytes); - /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) - ifam->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa); + ifd->ifi_vhid = (*carp_get_vhid_p)(ifa); return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_addrs = info->rti_addrs; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->ifam_metric = ifa->ifa_metric; return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifaddr *ifa; struct rt_addrinfo info; int len, error = 0; bzero((caddr_t)&info, sizeof(info)); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa->ifa_addr; len = rt_msg2(RTM_IFINFO, &info, NULL, w); info.rti_info[RTAX_IFP] = NULL; if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifml(ifp, &info, w, len); else error = sysctl_iflist_ifm(ifp, &info, w, len); if (error) goto done; } while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) { if (af && af != ifa->ifa_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifa->ifa_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; len = rt_msg2(RTM_NEWADDR, &info, NULL, w); if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifaml(ifa, &info, w, len); else error = sysctl_iflist_ifam(ifa, &info, w, len); if (error) goto done; } } IF_ADDR_RUNLOCK(ifp); info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = info.rti_info[RTAX_BRD] = NULL; } done: if (ifp != NULL) IF_ADDR_RUNLOCK(ifp); IFNET_RUNLOCK_NOSLEEP(); return (error); } static int sysctl_ifmalist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifmultiaddr *ifma; struct rt_addrinfo info; int len, error = 0; struct ifaddr *ifa; bzero((caddr_t)&info, sizeof(info)); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (af && af != ifma->ifma_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifma->ifma_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_GATEWAY] = (ifma->ifma_addr->sa_family != AF_LINK) ? ifma->ifma_lladdr : NULL; len = rt_msg2(RTM_NEWMADDR, &info, NULL, w); if (w->w_req && w->w_tmem) { struct ifma_msghdr *ifmam; ifmam = (struct ifma_msghdr *)w->w_tmem; ifmam->ifmam_index = ifma->ifma_ifp->if_index; ifmam->ifmam_flags = 0; ifmam->ifmam_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error) { IF_ADDR_RUNLOCK(ifp); goto done; } } } IF_ADDR_RUNLOCK(ifp); } done: IFNET_RUNLOCK_NOSLEEP(); return (error); } static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct radix_node_head *rnh = NULL; /* silence compiler. */ int i, lim, error = EINVAL; int fib = 0; u_char af; struct walkarg w; name ++; namelen--; if (req->newptr) return (EPERM); if (name[1] == NET_RT_DUMP) { if (namelen == 3) fib = req->td->td_proc->p_fibnum; else if (namelen == 4) fib = (name[3] == RT_ALL_FIBS) ? req->td->td_proc->p_fibnum : name[3]; else return ((namelen < 3) ? EISDIR : ENOTDIR); if (fib < 0 || fib >= rt_numfibs) return (EINVAL); } else if (namelen != 3) return ((namelen < 3) ? EISDIR : ENOTDIR); af = name[0]; if (af > AF_MAX) return (EINVAL); bzero(&w, sizeof(w)); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; error = sysctl_wire_old_buffer(req, 0); if (error) return (error); switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: if (af == 0) { /* dump all tables */ i = 1; lim = AF_MAX; } else /* dump only one table */ i = lim = af; /* * take care of llinfo entries, the caller must * specify an AF */ if (w.w_op == NET_RT_FLAGS && (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) { if (af != 0) error = lltable_sysctl_dumparp(af, w.w_req); else error = EINVAL; break; } /* * take care of routing entries */ for (error = 0; error == 0 && i <= lim; i++) { rnh = rt_tables_get_rnh(fib, i); if (rnh != NULL) { RADIX_NODE_HEAD_RLOCK(rnh); error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w); RADIX_NODE_HEAD_RUNLOCK(rnh); } else if (af != 0) error = EAFNOSUPPORT; } break; case NET_RT_IFLIST: case NET_RT_IFLISTL: error = sysctl_iflist(af, &w); break; case NET_RT_IFMALIST: error = sysctl_ifmalist(af, &w); break; } if (w.w_tmem) free(w.w_tmem, M_RTABLE); return (error); } static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); /* * Definitions of protocols supported in the ROUTE domain. */ static struct domain routedomain; /* or at least forward */ static struct protosw routesw[] = { { .pr_type = SOCK_RAW, .pr_domain = &routedomain, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_output = route_output, .pr_ctlinput = raw_ctlinput, .pr_init = raw_init, .pr_usrreqs = &route_usrreqs } }; static struct domain routedomain = { .dom_family = PF_ROUTE, .dom_name = "route", .dom_protosw = routesw, .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])] }; VNET_DOMAIN_SET(route); Index: head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c (revision 263101) +++ head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c (revision 263102) @@ -1,1544 +1,1544 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "ipoib.h" static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); #include #include #include #include #include #include /* For ARPHRD_xxx */ #include #include #include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); int ipoib_sendq_size = IPOIB_TX_RING_SIZE; int ipoib_recvq_size = IPOIB_RX_RING_SIZE; module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level = 1; module_param_named(debug_level, ipoib_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif struct ipoib_path_iter { struct ipoib_dev_priv *priv; struct ipoib_path path; }; static const u8 ipv4_bcast_addr[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff }; struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); static void ipoib_start(struct ifnet *dev); static int ipoib_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void ipoib_input(struct ifnet *ifp, struct mbuf *m); #define IPOIB_MTAP(_ifp, _m) \ do { \ if (bpf_peers_present((_ifp)->if_bpf)) { \ M_ASSERTVALID(_m); \ ipoib_mtap_mb((_ifp), (_m)); \ } \ } while (0) /* * This is for clients that have an ipoib_header in the mbuf. */ static void ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb) { struct ipoib_header *ih; struct ether_header eh; ih = mtod(mb, struct ipoib_header *); eh.ether_type = ih->proto; bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN); bzero(&eh.ether_shost, ETHER_ADDR_LEN); mb->m_data += sizeof(struct ipoib_header); mb->m_len -= sizeof(struct ipoib_header); bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); mb->m_data -= sizeof(struct ipoib_header); mb->m_len += sizeof(struct ipoib_header); } void ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto) { struct ether_header eh; eh.ether_type = proto; bzero(&eh.ether_shost, ETHER_ADDR_LEN); bzero(&eh.ether_dhost, ETHER_ADDR_LEN); bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); } static struct ib_client ipoib_client = { .name = "ipoib", .add = ipoib_add_one, .remove = ipoib_remove_one }; int ipoib_open(struct ipoib_dev_priv *priv) { struct ifnet *dev = priv->dev; ipoib_dbg(priv, "bringing up interface\n"); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); if (ipoib_pkey_dev_delay_open(priv)) return 0; if (ipoib_ib_dev_open(priv)) goto err_disable; if (ipoib_ib_dev_up(priv)) goto err_stop; if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring up any child interfaces too */ mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0) ipoib_open(cpriv); mutex_unlock(&priv->vlan_mutex); } dev->if_drv_flags |= IFF_DRV_RUNNING; dev->if_drv_flags &= ~IFF_DRV_OACTIVE; return 0; err_stop: ipoib_ib_dev_stop(priv, 1); err_disable: clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); return -EINVAL; } static void ipoib_init(void *arg) { struct ifnet *dev; struct ipoib_dev_priv *priv; priv = arg; dev = priv->dev; if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) ipoib_open(priv); queue_work(ipoib_workqueue, &priv->flush_light); } static int ipoib_stop(struct ipoib_dev_priv *priv) { struct ifnet *dev = priv->dev; ipoib_dbg(priv, "stopping interface\n"); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); ipoib_ib_dev_down(priv, 0); ipoib_ib_dev_stop(priv, 0); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring down any child interfaces too */ mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0) ipoib_stop(cpriv); mutex_unlock(&priv->vlan_mutex); } return 0; } int ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu) { struct ifnet *dev = priv->dev; /* dev->if_mtu > 2K ==> connected mode */ if (ipoib_cm_admin_enabled(priv)) { if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv))) return -EINVAL; if (new_mtu > priv->mcast_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); dev->if_mtu = new_mtu; return 0; } if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; dev->if_mtu = min(priv->mcast_mtu, priv->admin_mtu); queue_work(ipoib_workqueue, &priv->flush_light); return 0; } static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ipoib_dev_priv *priv = ifp->if_softc; struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) error = -ipoib_open(priv); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ipoib_stop(priv); break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) queue_work(ipoib_workqueue, &priv->restart_task); break; case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(IF_LLADDR(ifp), (caddr_t) sa->sa_data, INFINIBAND_ALEN); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ error = -ipoib_change_mtu(priv, ifr->ifr_mtu); break; default: error = EINVAL; break; } return (error); } static struct ipoib_path * __path_find(struct ipoib_dev_priv *priv, void *gid) { struct rb_node *n = priv->path_tree.rb_node; struct ipoib_path *path; int ret; while (n) { path = rb_entry(n, struct ipoib_path, rb_node); ret = memcmp(gid, path->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = n->rb_left; else if (ret > 0) n = n->rb_right; else return path; } return NULL; } static int __path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path) { struct rb_node **n = &priv->path_tree.rb_node; struct rb_node *pn = NULL; struct ipoib_path *tpath; int ret; while (*n) { pn = *n; tpath = rb_entry(pn, struct ipoib_path, rb_node); ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = &pn->rb_left; else if (ret > 0) n = &pn->rb_right; else return -EEXIST; } rb_link_node(&path->rb_node, pn, n); rb_insert_color(&path->rb_node, &priv->path_tree); list_add_tail(&path->list, &priv->path_list); return 0; } void ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path) { _IF_DRAIN(&path->queue); if (path->ah) ipoib_put_ah(path->ah); if (ipoib_cm_get(path)) ipoib_cm_destroy_tx(ipoib_cm_get(path)); kfree(path); } #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_path_iter * ipoib_path_iter_init(struct ipoib_dev_priv *priv) { struct ipoib_path_iter *iter; iter = kmalloc(sizeof *iter, GFP_KERNEL); if (!iter) return NULL; iter->priv = priv; memset(iter->path.pathrec.dgid.raw, 0, 16); if (ipoib_path_iter_next(iter)) { kfree(iter); return NULL; } return iter; } int ipoib_path_iter_next(struct ipoib_path_iter *iter) { struct ipoib_dev_priv *priv = iter->priv; struct rb_node *n; struct ipoib_path *path; int ret = 1; spin_lock_irq(&priv->lock); n = rb_first(&priv->path_tree); while (n) { path = rb_entry(n, struct ipoib_path, rb_node); if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, sizeof (union ib_gid)) < 0) { iter->path = *path; ret = 0; break; } n = rb_next(n); } spin_unlock_irq(&priv->lock); return ret; } void ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path) { *path = iter->path; } #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ void ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv) { struct ipoib_path *path, *tp; spin_lock_irq(&priv->lock); list_for_each_entry_safe(path, tp, &priv->path_list, list) { ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n", be16_to_cpu(path->pathrec.dlid), path->pathrec.dgid.raw, ":"); path->valid = 0; } spin_unlock_irq(&priv->lock); } void ipoib_flush_paths(struct ipoib_dev_priv *priv) { struct ipoib_path *path, *tp; LIST_HEAD(remove_list); unsigned long flags; spin_lock_irqsave(&priv->lock, flags); list_splice_init(&priv->path_list, &remove_list); list_for_each_entry(path, &remove_list, list) rb_erase(&path->rb_node, &priv->path_tree); list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); spin_unlock_irqrestore(&priv->lock, flags); wait_for_completion(&path->done); ipoib_path_free(priv, path); spin_lock_irqsave(&priv->lock, flags); } spin_unlock_irqrestore(&priv->lock, flags); } static void path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr) { struct ipoib_path *path = path_ptr; struct ipoib_dev_priv *priv = path->priv; struct ifnet *dev = priv->dev; struct ipoib_ah *ah = NULL; struct ipoib_ah *old_ah = NULL; struct ifqueue mbqueue; struct mbuf *mb; unsigned long flags; if (!status) ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n", be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":"); else ipoib_dbg(priv, "PathRec status %d for GID %16D\n", status, path->pathrec.dgid.raw, ":"); bzero(&mbqueue, sizeof(mbqueue)); if (!status) { struct ib_ah_attr av; if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) ah = ipoib_create_ah(priv, priv->pd, &av); } spin_lock_irqsave(&priv->lock, flags); if (ah) { path->pathrec = *pathrec; old_ah = path->ah; path->ah = ah; ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", ah, be16_to_cpu(pathrec->dlid), pathrec->sl); for (;;) { _IF_DEQUEUE(&path->queue, mb); if (mb == NULL) break; _IF_ENQUEUE(&mbqueue, mb); } #ifdef CONFIG_INFINIBAND_IPOIB_CM if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path)) ipoib_cm_set(path, ipoib_cm_create_tx(priv, path)); #endif path->valid = 1; } path->query = NULL; complete(&path->done); spin_unlock_irqrestore(&priv->lock, flags); if (old_ah) ipoib_put_ah(old_ah); for (;;) { _IF_DEQUEUE(&mbqueue, mb); if (mb == NULL) break; mb->m_pkthdr.rcvif = dev; if (dev->if_transmit(dev, mb)) ipoib_warn(priv, "dev_queue_xmit failed " "to requeue packet\n"); } } static struct ipoib_path * path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr) { struct ipoib_path *path; if (!priv->broadcast) return NULL; path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; path->priv = priv; bzero(&path->queue, sizeof(path->queue)); #ifdef CONFIG_INFINIBAND_IPOIB_CM memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN); #endif memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid)); path->pathrec.sgid = priv->local_gid; path->pathrec.pkey = cpu_to_be16(priv->pkey); path->pathrec.numb_path = 1; path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; return path; } static int path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path) { struct ifnet *dev = priv->dev; ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU; struct ib_sa_path_rec p_rec; p_rec = path->pathrec; p_rec.mtu_selector = IB_SA_GT; switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) { case 512: p_rec.mtu = IB_MTU_256; break; case 1024: p_rec.mtu = IB_MTU_512; break; case 2048: p_rec.mtu = IB_MTU_1024; break; case 4096: p_rec.mtu = IB_MTU_2048; break; default: /* Wildcard everything */ comp_mask = 0; p_rec.mtu = 0; p_rec.mtu_selector = 0; } ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n", p_rec.dgid.raw, ":", comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0); init_completion(&path->done); path->query_id = ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, &p_rec, comp_mask | IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_TRAFFIC_CLASS | IB_SA_PATH_REC_PKEY, 1000, GFP_ATOMIC, path_rec_completion, path, &path->query); if (path->query_id < 0) { ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); path->query = NULL; complete(&path->done); return path->query_id; } return 0; } static void ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh) { struct ipoib_path *path; path = __path_find(priv, eh->hwaddr + 4); if (!path || !path->valid) { int new_path = 0; if (!path) { path = path_rec_create(priv, eh->hwaddr); new_path = 1; } if (path) { _IF_ENQUEUE(&path->queue, mb); if (!path->query && path_rec_start(priv, path)) { spin_unlock_irqrestore(&priv->lock, flags); if (new_path) ipoib_path_free(priv, path); return; } else __path_add(priv, path); } else { ++priv->dev->if_oerrors; m_freem(mb); } return; } if (ipoib_cm_get(path) && ipoib_cm_up(path)) { ipoib_cm_send(priv, mb, ipoib_cm_get(path)); } else if (path->ah) { ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr)); } else if ((path->query || !path_rec_start(priv, path)) && path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) { _IF_ENQUEUE(&path->queue, mb); } else { ++priv->dev->if_oerrors; m_freem(mb); } } static int ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb) { struct ipoib_header *eh; eh = mtod(mb, struct ipoib_header *); if (IPOIB_IS_MULTICAST(eh->hwaddr)) { /* Add in the P_Key for multicast*/ eh->hwaddr[8] = (priv->pkey >> 8) & 0xff; eh->hwaddr[9] = priv->pkey & 0xff; ipoib_mcast_send(priv, eh->hwaddr + 4, mb); } else ipoib_unicast_send(mb, priv, eh); return 0; } static void _ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) { struct mbuf *mb; if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; spin_lock(&priv->lock); while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) && (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_DRV_DEQUEUE(&dev->if_snd, mb); if (mb == NULL) break; IPOIB_MTAP(dev, mb); ipoib_send_one(priv, mb); } spin_unlock(&priv->lock); } static void ipoib_start(struct ifnet *dev) { _ipoib_start(dev, dev->if_softc); } static void ipoib_vlan_start(struct ifnet *dev) { struct ipoib_dev_priv *priv; struct mbuf *mb; priv = VLAN_COOKIE(dev); if (priv != NULL) return _ipoib_start(dev, priv); while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) { IFQ_DRV_DEQUEUE(&dev->if_snd, mb); if (mb == NULL) break; m_freem(mb); dev->if_oerrors++; } } int ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port) { /* Allocate RX/TX "rings" to hold queued mbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", ca->name, ipoib_recvq_size); goto out; } priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ if (ipoib_ib_dev_init(priv, ca, port)) goto out_tx_ring_cleanup; return 0; out_tx_ring_cleanup: kfree(priv->tx_ring); out_rx_ring_cleanup: kfree(priv->rx_ring); out: return -ENOMEM; } static void ipoib_detach(struct ipoib_dev_priv *priv) { struct ifnet *dev; dev = priv->dev; if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { bpfdetach(dev); if_detach(dev); if_free(dev); } else VLAN_SETCOOKIE(priv->dev, NULL); free(priv, M_TEMP); } void ipoib_dev_cleanup(struct ipoib_dev_priv *priv) { struct ipoib_dev_priv *cpriv, *tcpriv; /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { ipoib_dev_cleanup(cpriv); ipoib_detach(cpriv); } ipoib_ib_dev_cleanup(priv); kfree(priv->rx_ring); kfree(priv->tx_ring); priv->rx_ring = NULL; priv->tx_ring = NULL; } static volatile int ipoib_unit; static struct ipoib_dev_priv * ipoib_priv_alloc(void) { struct ipoib_dev_priv *priv; priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK); spin_lock_init(&priv->lock); mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN); return (priv); } struct ipoib_dev_priv * ipoib_intf_alloc(const char *name) { struct ipoib_dev_priv *priv; struct sockaddr_dl *sdl; struct ifnet *dev; priv = ipoib_priv_alloc(); dev = priv->dev = if_alloc(IFT_INFINIBAND); if (!dev) { free(priv, M_TEMP); return NULL; } dev->if_softc = priv; if_initname(dev, name, atomic_fetchadd_int(&ipoib_unit, 1)); dev->if_flags = IFF_BROADCAST | IFF_MULTICAST; dev->if_addrlen = INFINIBAND_ALEN; dev->if_hdrlen = IPOIB_HEADER_LEN; if_attach(dev); dev->if_init = ipoib_init; dev->if_ioctl = ipoib_ioctl; dev->if_start = ipoib_start; dev->if_output = ipoib_output; dev->if_input = ipoib_input; dev->if_resolvemulti = ipoib_resolvemulti; - if_initbaudrate(dev, IF_Gbps(10)); + dev->if_baudrate = IF_Gbps(10); dev->if_broadcastaddr = priv->broadcastaddr; dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2; sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr; sdl->sdl_type = IFT_INFINIBAND; sdl->sdl_alen = dev->if_addrlen; priv->dev = dev; if_link_state_change(dev, LINK_STATE_DOWN); bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN); return dev->if_softc; } int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) { struct ib_device_attr *device_attr; int result = -ENOMEM; device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); if (!device_attr) { printk(KERN_WARNING "%s: allocation of %zu bytes failed\n", hca->name, sizeof *device_attr); return result; } result = ib_query_device(hca, device_attr); if (result) { printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n", hca->name, result); kfree(device_attr); return result; } priv->hca_caps = device_attr->device_cap_flags; kfree(device_attr); priv->dev->if_hwassist = 0; priv->dev->if_capabilities = 0; #ifndef CONFIG_INFINIBAND_IPOIB_CM if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { set_bit(IPOIB_FLAG_CSUM, &priv->flags); priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; } #if 0 if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) { priv->dev->if_capabilities |= IFCAP_TSO4; priv->dev->if_hwassist |= CSUM_TSO; } #endif #endif priv->dev->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE; priv->dev->if_capenable = priv->dev->if_capabilities; return 0; } static struct ifnet * ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); if (!priv) goto alloc_mem_failed; if (!ib_query_port(hca, port, &attr)) priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); else { printk(KERN_WARNING "%s: ib_query_port %d failed\n", hca->name, port); goto device_init_failed; } /* MTU will be reset when mcast join happens */ priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu; result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } if (ipoib_set_dev_features(priv, hca)) goto device_init_failed; /* * Set the full membership bit, so that we join the right * broadcast group, etc. */ priv->pkey |= 0x8000; priv->broadcastaddr[8] = priv->pkey >> 8; priv->broadcastaddr[9] = priv->pkey & 0xff; result = ib_query_gid(hca, port, 0, &priv->local_gid); if (result) { printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid)); result = ipoib_dev_init(priv, hca, port); if (result < 0) { printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", hca->name, port, result); goto device_init_failed; } if (ipoib_cm_admin_enabled(priv)) priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)); INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); result = ib_register_event_handler(&priv->event_handler); if (result < 0) { printk(KERN_WARNING "%s: ib_register_event_handler failed for " "port %d (ret = %d)\n", hca->name, port, result); goto event_failed; } if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port); return priv->dev; event_failed: ipoib_dev_cleanup(priv); device_init_failed: ipoib_detach(priv); alloc_mem_failed: return ERR_PTR(result); } static void ipoib_add_one(struct ib_device *device) { struct list_head *dev_list; struct ifnet *dev; struct ipoib_dev_priv *priv; int s, e, p; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) return; INIT_LIST_HEAD(dev_list); if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; } else { s = 1; e = device->phys_port_cnt; } for (p = s; p <= e; ++p) { if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) continue; dev = ipoib_add_port("ib", device, p); if (!IS_ERR(dev)) { priv = dev->if_softc; list_add_tail(&priv->list, dev_list); } } ib_set_client_data(device, &ipoib_client, dev_list); } static void ipoib_remove_one(struct ib_device *device) { struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; dev_list = ib_get_client_data(device, &ipoib_client); list_for_each_entry_safe(priv, tmp, dev_list, list) { if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND) continue; ipoib_stop(priv); ib_unregister_event_handler(&priv->event_handler); /* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */ flush_workqueue(ipoib_workqueue); ipoib_dev_cleanup(priv); ipoib_detach(priv); } kfree(dev_list); } static void ipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct ipoib_dev_priv *parent; struct ipoib_dev_priv *priv; struct ifnet *dev; uint16_t pkey; int error; if (ifp->if_type != IFT_INFINIBAND) return; dev = VLAN_DEVAT(ifp, vtag); if (dev == NULL) return; priv = NULL; error = 0; parent = ifp->if_softc; /* We only support 15 bits of pkey. */ if (vtag & 0x8000) return; pkey = vtag | 0x8000; /* Set full membership bit. */ if (pkey == parent->pkey) return; /* Check for dups */ mutex_lock(&parent->vlan_mutex); list_for_each_entry(priv, &parent->child_intfs, list) { if (priv->pkey == pkey) { priv = NULL; error = EBUSY; goto out; } } priv = ipoib_priv_alloc(); priv->dev = dev; priv->max_ib_mtu = parent->max_ib_mtu; priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); error = ipoib_set_dev_features(priv, parent->ca); if (error) goto out; priv->pkey = pkey; priv->broadcastaddr[8] = pkey >> 8; priv->broadcastaddr[9] = pkey & 0xff; dev->if_broadcastaddr = priv->broadcastaddr; error = ipoib_dev_init(priv, parent->ca, parent->port); if (error) goto out; priv->parent = parent->dev; list_add_tail(&priv->list, &parent->child_intfs); VLAN_SETCOOKIE(dev, priv); dev->if_start = ipoib_vlan_start; dev->if_drv_flags &= ~IFF_DRV_RUNNING; dev->if_hdrlen = IPOIB_HEADER_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ipoib_open(priv); mutex_unlock(&parent->vlan_mutex); return; out: mutex_unlock(&parent->vlan_mutex); if (priv) free(priv, M_TEMP); if (error) ipoib_warn(parent, "failed to initialize subinterface: device %s, port %d vtag 0x%X", parent->ca->name, parent->port, vtag); return; } static void ipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct ipoib_dev_priv *parent; struct ipoib_dev_priv *priv; struct ifnet *dev; uint16_t pkey; if (ifp->if_type != IFT_INFINIBAND) return; dev = VLAN_DEVAT(ifp, vtag); if (dev) VLAN_SETCOOKIE(dev, NULL); pkey = vtag | 0x8000; parent = ifp->if_softc; mutex_lock(&parent->vlan_mutex); list_for_each_entry(priv, &parent->child_intfs, list) { if (priv->pkey == pkey) { ipoib_dev_cleanup(priv); list_del(&priv->list); break; } } mutex_unlock(&parent->vlan_mutex); } eventhandler_tag ipoib_vlan_attach; eventhandler_tag ipoib_vlan_detach; static int __init ipoib_init_module(void) { int ret; ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE)); #ifdef CONFIG_INFINIBAND_IPOIB_CM ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST); ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST); /* * We create our own workqueue mainly because we want to be * able to flush it when devices are being removed. We can't * use schedule_work()/flush_scheduled_work() because both * unregister_netdev() and linkwatch_event take the rtnl lock, * so flush_scheduled_work() can deadlock during device * removal. */ ipoib_workqueue = create_singlethread_workqueue("ipoib"); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; } ib_sa_register_client(&ipoib_sa_client); ret = ib_register_client(&ipoib_client); if (ret) goto err_sa; return 0; err_sa: ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); err_fs: return ret; } static void __exit ipoib_cleanup_module(void) { EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach); EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach); ib_unregister_client(&ipoib_client); ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); } /* * Infiniband output routine. */ static int ipoib_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_char edst[INFINIBAND_ALEN]; struct llentry *lle = NULL; struct rtentry *rt0 = NULL; struct ipoib_header *eh; int error = 0; short type; if (ro != NULL) { if (!(m->m_flags & (M_BCAST | M_MCAST))) lle = ro->ro_lle; rt0 = ro->ro_rt; } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) goto bad; #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) { error = ENETDOWN; goto bad; } if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) { error = ENETDOWN; goto bad; } switch (dst->sa_family) { #ifdef INET case AF_INET: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac8, sizeof(edst)); else if (m->m_flags & M_MCAST) ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst); else error = arpresolve(ifp, rt0, m, dst, edst, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_INFINIBAND); switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN); else bcopy(ar_tha(ah), edst, INFINIBAND_ALEN); } break; #endif #ifdef INET6 case AF_INET6: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac8, sizeof(edst)); else if (m->m_flags & M_MCAST) ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst); else error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle); if (error) return error; type = htons(ETHERTYPE_IPV6); break; #endif default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); error = EAFNOSUPPORT; goto bad; } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; } eh = mtod(m, struct ipoib_header *); (void)memcpy(&eh->proto, &type, sizeof(eh->proto)); (void)memcpy(&eh->hwaddr, edst, sizeof (edst)); /* * Queue message on interface, update output statistics if * successful, and start output if interface not yet active. */ return ((ifp->if_transmit)(ifp, m)); bad: if (m != NULL) m_freem(m); return (error); } /* * Upper layer processing for a received Infiniband packet. */ void ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto) { int isr; #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { if_printf(ifp, "discard frame at IFF_MONITOR\n"); m_freem(m); return; } /* * Dispatch frame to upper layer. */ switch (proto) { #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: goto discard; } netisr_dispatch(isr, m); return; discard: m_freem(m); } /* * Process a received Infiniband packet. */ static void ipoib_input(struct ifnet *ifp, struct mbuf *m) { struct ipoib_header *eh; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } CURVNET_SET_QUIET(ifp->if_vnet); /* Let BPF have it before we strip the header. */ IPOIB_MTAP(ifp, m); eh = mtod(m, struct ipoib_header *); /* * Reset layer specific mbuf flags to avoid confusing upper layers. * Strip off Infiniband header. */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); m_adj(m, IPOIB_HEADER_LEN); if (IPOIB_IS_MULTICAST(eh->hwaddr)) { if (memcmp(eh->hwaddr, ifp->if_broadcastaddr, ifp->if_addrlen) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; ifp->if_imcasts++; } ipoib_demux(ifp, m, ntohs(eh->proto)); CURVNET_RESTORE(); } static int ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if (!IPOIB_IS_MULTICAST(e_addr)) return EADDRNOTAVAIL; *llsa = 0; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); sdl->sdl_alen = INFINIBAND_ALEN; e_addr = LLADDR(sdl); ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; /* * An IP6 address of 0 means listen to all * of the multicast address used for IP6. * This has no meaning in ipoib. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) return EADDRNOTAVAIL; if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); sdl->sdl_alen = INFINIBAND_ALEN; e_addr = LLADDR(sdl); ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif default: return EAFNOSUPPORT; } } module_init(ipoib_init_module); module_exit(ipoib_cleanup_module); #undef MODULE_VERSION #include static int ipoib_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t ipoib_mod = { .name = "ipoib", .evhand = ipoib_evhand, }; DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_SMP, SI_ORDER_ANY); MODULE_DEPEND(ipoib, ibcore, 1, 1, 1); Index: head/sys/ofed/drivers/net/mlx4/en_netdev.c =================================================================== --- head/sys/ofed/drivers/net/mlx4/en_netdev.c (revision 263101) +++ head/sys/ofed/drivers/net/mlx4/en_netdev.c (revision 263102) @@ -1,1666 +1,1666 @@ /* * Copyright (c) 2007 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include "mlx4_en.h" #include #include #include #include #include #include #include #include static void mlx4_en_init_locked(struct mlx4_en_priv *priv); static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); int idx; u8 field; if (arg != priv) return; if ((vid == 0) || (vid > 4095)) /* Invalid */ return; en_dbg(HW, priv, "adding VLAN:%d\n", vid); idx = vid >> 5; field = 1 << (vid & 0x1f); spin_lock(&priv->vlan_lock); priv->vlgrp_modified = true; if (priv->vlan_unregister[idx] & field) priv->vlan_unregister[idx] &= ~field; else priv->vlan_register[idx] |= field; priv->vlans[idx] |= field; spin_unlock(&priv->vlan_lock); } static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); int idx; u8 field; if (arg != priv) return; if ((vid == 0) || (vid > 4095)) /* Invalid */ return; en_dbg(HW, priv, "Killing VID:%d\n", vid); idx = vid >> 5; field = 1 << (vid & 0x1f); spin_lock(&priv->vlan_lock); priv->vlgrp_modified = true; if (priv->vlan_register[idx] & field) priv->vlan_register[idx] &= ~field; else priv->vlan_unregister[idx] |= field; priv->vlans[idx] &= ~field; spin_unlock(&priv->vlan_lock); } u64 mlx4_en_mac_to_u64(u8 *addr) { u64 mac = 0; int i; for (i = 0; i < ETHER_ADDR_LEN; i++) { mac <<= 8; mac |= addr[i]; } return mac; } static int mlx4_en_cache_mclist(struct net_device *dev, u64 **mcaddrp) { struct ifmultiaddr *ifma; u64 *mcaddr; int cnt; int i; *mcaddrp = NULL; restart: cnt = 0; if_maddr_rlock(dev); TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != ETHER_ADDR_LEN) continue; cnt++; } if_maddr_runlock(dev); if (cnt == 0) return (0); mcaddr = kmalloc(sizeof(u64) * cnt, GFP_KERNEL); if (mcaddr == NULL) return (0); i = 0; if_maddr_rlock(dev); TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != ETHER_ADDR_LEN) continue; /* Make sure the list didn't grow. */ if (i == cnt) { if_maddr_runlock(dev); kfree(mcaddr); goto restart; } mcaddr[i++] = mlx4_en_mac_to_u64( LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); } if_maddr_runlock(dev); *mcaddrp = mcaddr; return (i); } static void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); queue_work(priv->mdev->workqueue, &priv->stop_port_task); } static void mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); queue_work(priv->mdev->workqueue, &priv->start_port_task); } static void mlx4_en_set_multicast(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); if (!priv->port_up) return; queue_work(priv->mdev->workqueue, &priv->mcast_task); } static void mlx4_en_do_set_multicast(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, mcast_task); struct net_device *dev = priv->dev; struct mlx4_en_dev *mdev = priv->mdev; int err; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_dbg(HW, priv, "Card is not up, " "ignoring multicast change.\n"); goto out; } if (!priv->port_up) { en_dbg(HW, priv, "Port is down, " "ignoring multicast change.\n"); goto out; } /* * Promsicuous mode: disable all filters */ if (dev->if_flags & IFF_PROMISC) { if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { priv->flags |= MLX4_EN_FLAG_PROMISC; /* Enable promiscouos mode */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 1); if (err) en_err(priv, "Failed enabling " "promiscous mode\n"); /* Disable port multicast filter (unconditionally) */ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling " "multicast filter\n"); /* Disable port VLAN filter */ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, NULL); if (err) en_err(priv, "Failed disabling VLAN filter\n"); } goto out; } /* * Not in promiscous mode */ if (priv->flags & MLX4_EN_FLAG_PROMISC) { priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) en_err(priv, "Failed disabling promiscous mode\n"); /* Enable port VLAN filter */ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlans); if (err) en_err(priv, "Failed enabling VLAN filter\n"); } /* Enable/disable the multicast filter according to IFF_ALLMULTI */ if (dev->if_flags & IFF_ALLMULTI) { err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); } else { u64 *mcaddr; int mccount; int i; err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Flush mcast filter and init it with broadcast address */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, 1, MLX4_MCAST_CONFIG); /* Update multicast list - we cache all addresses so they won't * change while HW is updated holding the command semaphor */ mccount = mlx4_en_cache_mclist(dev, &mcaddr); for (i = 0; i < mccount; i++) mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcaddr[i], 0, MLX4_MCAST_CONFIG); err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); kfree(mcaddr); } out: mutex_unlock(&mdev->state_lock); } #ifdef CONFIG_NET_POLL_CONTROLLER static void mlx4_en_netpoll(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_cq *cq; unsigned long flags; int i; for (i = 0; i < priv->rx_ring_num; i++) { cq = &priv->rx_cq[i]; spin_lock_irqsave(&cq->lock, flags); napi_synchronize(&cq->napi); mlx4_en_process_rx_cq(dev, cq, 0); spin_unlock_irqrestore(&cq->lock, flags); } } #endif static void mlx4_en_watchdog_timeout(void *arg) { struct mlx4_en_priv *priv = arg; struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Scheduling watchdog\n"); queue_work(mdev->workqueue, &priv->watchdog_task); if (priv->port_up) callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); } /* XXX This clears user settings in too many cases. */ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) { struct mlx4_en_cq *cq; int i; /* If we haven't received a specific coalescing setting * (module param), we set the moderation paramters as follows: * - moder_cnt is set to the number of mtu sized packets to * satisfy our coelsing target. * - moder_time is set to a fixed value. */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET / priv->dev->if_mtu + 1; priv->rx_usecs = MLX4_EN_RX_COAL_TIME; - en_dbg(INTR, priv, "Default coalesing params for mtu:%ld - " + en_dbg(INTR, priv, "Default coalesing params for mtu:%u - " "rx_frames:%d rx_usecs:%d\n", priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs); /* Setup cq moderation params */ for (i = 0; i < priv->rx_ring_num; i++) { cq = &priv->rx_cq[i]; cq->moder_cnt = priv->rx_frames; cq->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; priv->last_moder_packets[i] = 0; priv->last_moder_bytes[i] = 0; } for (i = 0; i < priv->tx_ring_num; i++) { cq = &priv->tx_cq[i]; cq->moder_cnt = MLX4_EN_TX_COAL_PKTS; cq->moder_time = MLX4_EN_TX_COAL_TIME; } /* Reset auto-moderation params */ priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW; priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW; priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH; priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH; priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL; priv->adaptive_rx_coal = 1; priv->last_moder_jiffies = 0; priv->last_moder_tx_packets = 0; } static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) { unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); struct mlx4_en_cq *cq; unsigned long packets; unsigned long rate; unsigned long avg_pkt_size; unsigned long rx_packets; unsigned long rx_bytes; unsigned long rx_pkt_diff; int moder_time; int ring, err; if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { spin_lock(&priv->stats_lock); rx_packets = priv->rx_ring[ring].packets; rx_bytes = priv->rx_ring[ring].bytes; spin_unlock(&priv->stats_lock); rx_pkt_diff = ((unsigned long) (rx_packets - priv->last_moder_packets[ring])); packets = rx_pkt_diff; rate = packets * HZ / period; avg_pkt_size = packets ? ((unsigned long) (rx_bytes - priv->last_moder_bytes[ring])) / packets : 0; /* Apply auto-moderation only when packet rate * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { if (rate < priv->pkt_rate_low || avg_pkt_size < MLX4_EN_AVG_PKT_SMALL) moder_time = priv->rx_usecs_low; else if (rate > priv->pkt_rate_high) moder_time = priv->rx_usecs_high; else moder_time = (rate - priv->pkt_rate_low) * (priv->rx_usecs_high - priv->rx_usecs_low) / (priv->pkt_rate_high - priv->pkt_rate_low) + priv->rx_usecs_low; } else { moder_time = priv->rx_usecs_low; } if (moder_time != priv->last_moder_time[ring]) { priv->last_moder_time[ring] = moder_time; cq = &priv->rx_cq[ring]; cq->moder_time = moder_time; err = mlx4_en_set_cq_moder(priv, cq); if (err) en_err(priv, "Failed modifying moderation " "for cq:%d\n", ring); } priv->last_moder_packets[ring] = rx_packets; priv->last_moder_bytes[ring] = rx_bytes; } priv->last_moder_jiffies = jiffies; } static void mlx4_en_handle_vlans(struct mlx4_en_priv *priv) { u8 vlan_register[VLAN_FLTR_SIZE]; u8 vlan_unregister[VLAN_FLTR_SIZE]; int i, j, idx; u16 vid; /* cache the vlan data for processing * done under lock to avoid changes during work */ spin_lock(&priv->vlan_lock); for (i = 0; i < VLAN_FLTR_SIZE; i++) { vlan_register[i] = priv->vlan_register[i]; priv->vlan_register[i] = 0; vlan_unregister[i] = priv->vlan_unregister[i]; priv->vlan_unregister[i] = 0; } priv->vlgrp_modified = false; spin_unlock(&priv->vlan_lock); /* Configure the vlan filter * The vlgrp is updated with all the vids that need to be allowed */ if (mlx4_SET_VLAN_FLTR(priv->mdev->dev, priv->port, priv->vlans)) en_err(priv, "Failed configuring VLAN filter\n"); /* Configure the VLAN table */ for (i = 0; i < VLAN_FLTR_SIZE; i++) { for (j = 0; j < 32; j++) { vid = (i << 5) + j; if (vlan_register[i] & (1 << j)) if (mlx4_register_vlan(priv->mdev->dev, priv->port, vid, &idx)) en_dbg(HW, priv, "failed registering vlan %d\n", vid); if (vlan_unregister[i] & (1 << j)) { if (!mlx4_find_cached_vlan(priv->mdev->dev, priv->port, vid, &idx)) mlx4_unregister_vlan(priv->mdev->dev, priv->port, idx); else en_dbg(HW, priv, "could not find vid %d in cache\n", vid); } } } } static void mlx4_en_do_get_stats(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, stats_task); struct mlx4_en_dev *mdev = priv->mdev; int err; err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); if (err) en_dbg(HW, priv, "Could not update stats \n"); mutex_lock(&mdev->state_lock); if (mdev->device_up) { if (priv->port_up) { if (priv->vlgrp_modified) mlx4_en_handle_vlans(priv); mlx4_en_auto_moderation(priv); } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } mlx4_en_QUERY_PORT(priv->mdev, priv->port); mutex_unlock(&mdev->state_lock); } static void mlx4_en_linkstate(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, linkstate_task); struct mlx4_en_dev *mdev = priv->mdev; int linkstate = priv->link_state; mutex_lock(&mdev->state_lock); /* If observable port state changed set carrier state and * report to system log */ if (priv->last_link_state != linkstate) { if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { if_link_state_change(priv->dev, LINK_STATE_DOWN); } else { en_info(priv, "Link Up\n"); if_link_state_change(priv->dev, LINK_STATE_UP); } } priv->last_link_state = linkstate; mutex_unlock(&mdev->state_lock); } static void mlx4_en_lock_and_stop_port(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, stop_port_task); struct net_device *dev = priv->dev; struct mlx4_en_dev *mdev = priv->mdev; mutex_lock(&mdev->state_lock); mlx4_en_do_stop_port(dev); mutex_unlock(&mdev->state_lock); } static void mlx4_en_lock_and_start_port(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, start_port_task); struct net_device *dev = priv->dev; struct mlx4_en_dev *mdev = priv->mdev; mutex_lock(&mdev->state_lock); mlx4_en_do_start_port(dev); mutex_unlock(&mdev->state_lock); } int mlx4_en_do_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; u64 config; int rx_index = 0; int tx_index = 0; int err = 0; int i; int j; if (priv->port_up) { en_dbg(DRV, priv, "start port called while port already up\n"); return 0; } /* Calculate Rx buf size */ dev->if_mtu = min(dev->if_mtu, priv->max_mtu); mlx4_en_calc_rx_buf(dev); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size); /* Configure rx cq's and rings */ err = mlx4_en_activate_rx_rings(priv); if (err) { en_err(priv, "Failed to activate RX rings\n"); return err; } for (i = 0; i < priv->rx_ring_num; i++) { cq = &priv->rx_cq[i]; err = mlx4_en_activate_cq(priv, cq); if (err) { en_err(priv, "Failed activating Rx CQ\n"); goto cq_err; } for (j = 0; j < cq->size; j++) cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto cq_err; } mlx4_en_arm_cq(priv, cq); priv->rx_ring[i].cqn = cq->mcq.cqn; ++rx_index; } err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); goto cq_err; } /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ cq = &priv->tx_cq[i]; err = mlx4_en_activate_cq(priv, cq); if (err) { en_err(priv, "Failed allocating Tx CQ\n"); goto tx_err; } err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i); cq->buf->wqe_index = cpu_to_be16(0xffff); /* Configure ring */ tx_ring = &priv->tx_ring[i]; err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn); if (err) { en_err(priv, "Failed allocating Tx ring\n"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *) (tx_ring->buf + j)) = 0xffffffff; ++tx_index; } /* Configure port */ err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations " "for port %d, with error %d\n", priv->port, err); goto tx_err; } /* Set default qp number */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) { en_err(priv, "Failed setting default qp numbers\n"); goto tx_err; } /* Set port mac number */ en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port); err = mlx4_register_mac(mdev->dev, priv->port, mlx4_en_mac_to_u64(IF_LLADDR(dev))); if (err < 0) { en_err(priv, "Failed setting port mac err=%d\n", err); goto tx_err; } mdev->mac_removed[priv->port] = 0; /* Init port */ en_dbg(HW, priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto mac_err; } /* Set the various hardware offload abilities */ dev->if_hwassist = 0; if (dev->if_capenable & IFCAP_TSO4) dev->if_hwassist |= CSUM_TSO; if (dev->if_capenable & IFCAP_TXCSUM) dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (dev->if_capenable & IFCAP_RXCSUM) priv->rx_csum = 1; else priv->rx_csum = 0; err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL info, unable to modify\n"); goto wol_err; } if (dev->if_capenable & IFCAP_WOL_MAGIC) { config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC; } else { config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC); config |= MLX4_EN_WOL_DO_MODIFY; } err = mlx4_wol_write(priv->mdev->dev, config, priv->port); if (err) { en_err(priv, "Failed to set WoL information\n"); goto wol_err; } priv->port_up = true; /* Populate multicast list */ mlx4_en_set_multicast(dev); /* Enable the queues. */ dev->if_drv_flags &= ~IFF_DRV_OACTIVE; dev->if_drv_flags |= IFF_DRV_RUNNING; callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); return 0; wol_err: /* close port*/ mlx4_CLOSE_PORT(mdev->dev, priv->port); mac_err: mlx4_unregister_mac(mdev->dev, priv->port, priv->mac); tx_err: while (tx_index--) { mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]); mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]); } mlx4_en_release_rss_steer(priv); cq_err: while (rx_index--) mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]); for (i = 0; i < priv->rx_ring_num; i++) mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); return err; /* need to close devices */ } void mlx4_en_do_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int i; if (!priv->port_up) { en_dbg(DRV, priv, "stop port called while port already down\n"); return; } /* Set port as not active */ priv->port_up = false; /* Unregister Mac address for the port */ mlx4_unregister_mac(mdev->dev, priv->port, priv->mac); mdev->mac_removed[priv->port] = 1; /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]); mlx4_en_deactivate_cq(priv, &priv->tx_cq[i]); } msleep(10); for (i = 0; i < priv->tx_ring_num; i++) mlx4_en_free_tx_buf(dev, &priv->tx_ring[i]); /* Free RSS qps */ mlx4_en_release_rss_steer(priv); /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, &priv->rx_cq[i]); } /* close port*/ mlx4_CLOSE_PORT(mdev->dev, priv->port); callout_stop(&priv->watchdog_timer); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, watchdog_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; struct mlx4_en_tx_ring *ring; int i; if (priv->blocked == 0 || priv->port_up == 0) return; for (i = 0; i < priv->tx_ring_num; i++) { ring = &priv->tx_ring[i]; if (ring->blocked && ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks) goto reset; } return; reset: priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_do_stop_port(dev); if (mlx4_en_do_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_init(void *arg) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; priv = arg; mdev = priv->mdev; mutex_lock(&mdev->state_lock); mlx4_en_init_locked(priv); mutex_unlock(&mdev->state_lock); } static void mlx4_en_init_locked(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev; struct ifnet *dev; int i; dev = priv->dev; mdev = priv->mdev; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_do_stop_port(dev); if (!mdev->device_up) { en_err(priv, "Cannot open - device down/disabled\n"); return; } /* Reset HW statistics and performance counters */ if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); memset(&priv->pstats, 0, sizeof(priv->pstats)); for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i].bytes = 0; priv->tx_ring[i].packets = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i].bytes = 0; priv->rx_ring[i].packets = 0; } mlx4_en_set_default_moderation(priv); if (mlx4_en_do_start_port(dev)) en_err(priv, "Failed starting port:%d\n", priv->port); } void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring[i].tx_info) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq[i].buf) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i].rx_info) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i]); if (priv->rx_cq[i].buf) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } /* Free the stats tree when we resize the rings. */ if (priv->sysctl) sysctl_ctx_free(&priv->stat_ctx); } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->tx_cq[i], prof->tx_ring_size, i, TX)) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], prof->tx_ring_size, TXBB_SIZE)) goto err; } /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->rx_cq[i], prof->rx_ring_size, i, RX)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size)) goto err; } /* Re-create stat sysctls in case the number of rings changed. */ mlx4_en_sysctl_stat(priv); /* Populate Tx priority mappings */ mlx4_en_set_prio_map(priv, priv->tx_prio_map, priv->tx_ring_num - MLX4_EN_NUM_HASH_RINGS); return 0; err: en_err(priv, "Failed to allocate NIC resources\n"); return -ENOMEM; } void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* Unregister device - this will close the port if it was up */ if (priv->registered) ether_ifdetach(dev); if (priv->allocated) mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); mutex_lock(&mdev->state_lock); mlx4_en_do_stop_port(dev); mutex_unlock(&mdev->state_lock); cancel_delayed_work(&priv->stats_task); /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); callout_drain(&priv->watchdog_timer); /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); if (priv->sysctl) sysctl_ctx_free(&priv->conf_ctx); mtx_destroy(&priv->stats_lock.m); mtx_destroy(&priv->vlan_lock.m); kfree(priv); if_free(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err = 0; - en_dbg(DRV, priv, "Change MTU called - current:%ld new:%d\n", + en_dbg(DRV, priv, "Change MTU called - current:%u new:%d\n", dev->if_mtu, new_mtu); if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { en_err(priv, "Bad MTU size:%d.\n", new_mtu); return -EPERM; } mutex_lock(&mdev->state_lock); dev->if_mtu = new_mtu; if (dev->if_drv_flags & IFF_DRV_RUNNING) { if (!mdev->device_up) { /* NIC is probably restarting - let watchdog task reset * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { mlx4_en_do_stop_port(dev); mlx4_en_set_default_moderation(priv); err = mlx4_en_do_start_port(dev); if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); queue_work(mdev->workqueue, &priv->watchdog_task); } } } mutex_unlock(&mdev->state_lock); return 0; } static int mlx4_en_calc_media(struct mlx4_en_priv *priv) { int trans_type; int active; active = IFM_ETHER; if (priv->last_link_state == MLX4_DEV_EVENT_PORT_DOWN) return (active); /* * [ShaharK] mlx4_en_QUERY_PORT sleeps and cannot be called under a * non-sleepable lock. * I moved it to the periodic mlx4_en_do_get_stats. if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return (active); */ active |= IFM_FDX; trans_type = priv->port_state.transciver; /* XXX I don't know all of the transceiver values. */ switch (priv->port_state.link_speed) { case 1000: active |= IFM_1000_T; break; case 10000: if (trans_type > 0 && trans_type <= 0xC) active |= IFM_10G_SR; else if (trans_type == 0x80 || trans_type == 0) active |= IFM_10G_CX4; break; case 40000: active |= IFM_40G_CR4; break; } if (priv->prof->tx_pause) active |= IFM_ETH_TXPAUSE; if (priv->prof->rx_pause) active |= IFM_ETH_RXPAUSE; return (active); } static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx4_en_priv *priv; priv = dev->if_softc; ifmr->ifm_status = IFM_AVALID; if (priv->last_link_state != MLX4_DEV_EVENT_PORT_DOWN) ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active = mlx4_en_calc_media(priv); return; } static int mlx4_en_media_change(struct ifnet *dev) { struct mlx4_en_priv *priv; struct ifmedia *ifm; int rxpause; int txpause; int error; priv = dev->if_softc; ifm = &priv->media; rxpause = txpause = 0; error = 0; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_SR: case IFM_10G_CX4: case IFM_1000_T: if (IFM_SUBTYPE(ifm->ifm_media) == IFM_SUBTYPE(mlx4_en_calc_media(priv)) && (ifm->ifm_media & IFM_FDX)) break; /* Fallthrough */ default: printf("%s: Only auto media type\n", if_name(dev)); return (EINVAL); } /* Allow user to set/clear pause */ if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE) rxpause = 1; if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE) txpause = 1; if (priv->prof->tx_pause != txpause || priv->prof->rx_pause != rxpause) { priv->prof->tx_pause = txpause; priv->prof->rx_pause = rxpause; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); } return (error); } static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct ifreq *ifr; int error; int mask; error = 0; mask = 0; priv = dev->if_softc; mdev = priv->mdev; ifr = (struct ifreq *) data; switch (command) { case SIOCSIFMTU: error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu); break; case SIOCSIFFLAGS: if (dev->if_flags & IFF_UP) { if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) mlx4_en_start_port(dev); else mlx4_en_set_multicast(dev); } else { if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_stop_port(dev); if_link_state_change(dev, LINK_STATE_DOWN); /* * Since mlx4_en_stop_port is defered we * have to wait till it's finished. */ for (int count=0; count<10; count++) { if (dev->if_drv_flags & IFF_DRV_RUNNING) { DELAY(20000); } else { break; } } } } break; case SIOCADDMULTI: case SIOCDELMULTI: mlx4_en_set_multicast(dev); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(dev, ifr, &priv->media, command); break; case SIOCSIFCAP: mutex_lock(&mdev->state_lock); mask = ifr->ifr_reqcap ^ dev->if_capenable; if (mask & IFCAP_HWCSUM) dev->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) dev->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_LRO) dev->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) dev->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) dev->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_WOL_MAGIC) dev->if_capenable ^= IFCAP_WOL_MAGIC; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_init_locked(priv); mutex_unlock(&mdev->state_lock); VLAN_CAPABILITIES(dev); break; default: error = ether_ioctl(dev, command, data); break; } return (error); } static int mlx4_en_set_ring_size(struct net_device *dev, int rx_size, int tx_size) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; rx_size = roundup_pow_of_two(rx_size); rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); tx_size = roundup_pow_of_two(tx_size); tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); if (rx_size == (priv->port_up ? priv->rx_ring[0].actual_size : priv->rx_ring[0].size) && tx_size == priv->tx_ring[0].size) return 0; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_do_stop_port(dev); } mlx4_en_free_resources(priv); priv->prof->tx_ring_size = tx_size; priv->prof->rx_ring_size = rx_size; err = mlx4_en_alloc_resources(priv); if (err) { en_err(priv, "Failed reallocating port resources\n"); goto out; } if (port_up) { err = mlx4_en_do_start_port(dev); if (err) en_err(priv, "Failed starting port\n"); } out: mutex_unlock(&mdev->state_lock); return err; } static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->rx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, size, priv->prof->tx_ring_size); return (error); } static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->tx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size, size); return (error); } static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int ppp; int error; priv = arg1; ppp = priv->prof->tx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); priv->prof->tx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; int tx_ring_num; int ppp; int error; int port_up; port_up = 0; priv = arg1; mdev = priv->mdev; ppp = priv->prof->rx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); /* See if we have to change the number of tx queues. */ if (!ppp != !priv->prof->rx_ppp) { tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 + (!!ppp) * MLX4_EN_NUM_PPP_RINGS; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_do_stop_port(priv->dev); } mlx4_en_free_resources(priv); priv->tx_ring_num = tx_ring_num; priv->prof->rx_ppp = ppp; error = -mlx4_en_alloc_resources(priv); if (error) en_err(priv, "Failed reallocating port resources\n"); if (error == 0 && port_up) { error = -mlx4_en_do_start_port(priv->dev); if (error) en_err(priv, "Failed starting port\n"); } mutex_unlock(&mdev->state_lock); return (error); } priv->prof->rx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *coal; struct sysctl_oid_list *coal_list; dev = priv->dev; ctx = &priv->conf_ctx; sysctl_ctx_init(ctx); priv->sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet"); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, "conf", CTLFLAG_RD, NULL, "Configuration"); node_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable", CTLFLAG_RW, &priv->msg_enable, 0, "Driver message enable bitfield"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings", CTLTYPE_INT | CTLFLAG_RD, &priv->rx_ring_num, 0, "Number of receive rings"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings", CTLTYPE_INT | CTLFLAG_RD, &priv->tx_ring_num, 0, "Number of transmit rings"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ring_size, "I", "Receive ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ring_size, "I", "Transmit ring size"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "ip_reasm", CTLFLAG_RW, &priv->ip_reasm, 0, "Allow reassembly of IP fragments."); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ppp, "I", "TX Per-priority pause"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ppp, "I", "RX Per-priority pause"); /* Add coalescer configuration. */ coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration"); coal_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low", CTLFLAG_RW, &priv->pkt_rate_low, 0, "Packets per-second for minimum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low", CTLFLAG_RW, &priv->rx_usecs_low, 0, "Minimum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high", CTLFLAG_RW, &priv->pkt_rate_high, 0, "Packets per-second for maximum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high", CTLFLAG_RW, &priv->rx_usecs_high, 0, "Maximum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval", CTLFLAG_RW, &priv->sample_interval, 0, "adaptive frequency in units of HZ ticks"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal", CTLFLAG_RW, &priv->adaptive_rx_coal, 0, "Enable adaptive rx coalescing"); } static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *ring_node; struct sysctl_oid_list *ring_list; struct mlx4_en_tx_ring *tx_ring; struct mlx4_en_rx_ring *rx_ring; char namebuf[128]; int i; dev = priv->dev; ctx = &priv->stat_ctx; sysctl_ctx_init(ctx); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, "stat", CTLFLAG_RD, NULL, "Statistics"); node_list = SYSCTL_CHILDREN(node); #ifdef MLX4_EN_PERF_STAT SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_poll", CTLFLAG_RD, &priv->pstats.tx_poll, "TX Poll calls"); SYSCTL_ADD_QUAD(ctx, node_list, OID_AUTO, "tx_pktsz_avg", CTLFLAG_RD, &priv->pstats.tx_pktsz_avg, "TX average packet size"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "inflight_avg", CTLFLAG_RD, &priv->pstats.inflight_avg, "TX average packets in-flight"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_coal_avg", CTLFLAG_RD, &priv->pstats.tx_coal_avg, "TX average coalesced completions"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_coal_avg", CTLFLAG_RD, &priv->pstats.rx_coal_avg, "RX average coalesced completions"); #endif SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &priv->port_stats.tso_packets, "TSO packets sent"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "queue_stopped", CTLFLAG_RD, &priv->port_stats.queue_stopped, "Queue full"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "wake_queue", CTLFLAG_RD, &priv->port_stats.wake_queue, "Queue resumed after full"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_timeout", CTLFLAG_RD, &priv->port_stats.tx_timeout, "Transmit timeouts"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_alloc_failed", CTLFLAG_RD, &priv->port_stats.rx_alloc_failed, "RX failed to allocate mbuf"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_good", CTLFLAG_RD, &priv->port_stats.rx_chksum_good, "RX checksum offload success"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_none", CTLFLAG_RD, &priv->port_stats.rx_chksum_none, "RX without checksum offload"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_chksum_offload", CTLFLAG_RD, &priv->port_stats.tx_chksum_offload, "TX checksum offloads"); /* Could strdup the names and add in a loop. This is simpler. */ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "broadcast", CTLFLAG_RD, &priv->pkstats.broadcast, "Broadcast packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio0", CTLFLAG_RD, &priv->pkstats.tx_prio[0], "TX Priority 0 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio1", CTLFLAG_RD, &priv->pkstats.tx_prio[1], "TX Priority 1 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio2", CTLFLAG_RD, &priv->pkstats.tx_prio[2], "TX Priority 2 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio3", CTLFLAG_RD, &priv->pkstats.tx_prio[3], "TX Priority 3 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio4", CTLFLAG_RD, &priv->pkstats.tx_prio[4], "TX Priority 4 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio5", CTLFLAG_RD, &priv->pkstats.tx_prio[5], "TX Priority 5 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio6", CTLFLAG_RD, &priv->pkstats.tx_prio[6], "TX Priority 6 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio7", CTLFLAG_RD, &priv->pkstats.tx_prio[7], "TX Priority 7 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio0", CTLFLAG_RD, &priv->pkstats.rx_prio[0], "RX Priority 0 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio1", CTLFLAG_RD, &priv->pkstats.rx_prio[1], "RX Priority 1 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio2", CTLFLAG_RD, &priv->pkstats.rx_prio[2], "RX Priority 2 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio3", CTLFLAG_RD, &priv->pkstats.rx_prio[3], "RX Priority 3 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio4", CTLFLAG_RD, &priv->pkstats.rx_prio[4], "RX Priority 4 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio5", CTLFLAG_RD, &priv->pkstats.rx_prio[5], "RX Priority 5 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio6", CTLFLAG_RD, &priv->pkstats.rx_prio[6], "RX Priority 6 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio7", CTLFLAG_RD, &priv->pkstats.rx_prio[7], "RX Priority 7 packets"); for (i = 0; i < priv->tx_ring_num; i++) { tx_ring = &priv->tx_ring[i]; snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &tx_ring->packets, "TX packets"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &tx_ring->bytes, "TX bytes"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &tx_ring->errors, "TX soft errors"); } for (i = 0; i < priv->rx_ring_num; i++) { rx_ring = &priv->rx_ring[i]; snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &rx_ring->packets, "RX packets"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &rx_ring->bytes, "RX bytes"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &rx_ring->errors, "RX soft errors"); SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &rx_ring->lro.lro_queued, 0, "LRO Queued"); SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &rx_ring->lro.lro_flushed, 0, "LRO Flushed"); } } int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { static volatile int mlx4_en_unit; struct net_device *dev; struct mlx4_en_priv *priv; uint8_t dev_addr[ETHER_ADDR_LEN]; int err; int i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); dev = priv->dev = if_alloc(IFT_ETHER); if (dev == NULL) { mlx4_err(mdev, "Net device allocation failed\n"); kfree(priv); return -ENOMEM; } dev->if_softc = priv; if_initname(dev, "mlxen", atomic_fetchadd_int(&mlx4_en_unit, 1)); dev->if_mtu = ETHERMTU; dev->if_baudrate = 1000000000; dev->if_init = mlx4_en_init; dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; dev->if_ioctl = mlx4_en_ioctl; dev->if_transmit = mlx4_en_transmit; dev->if_qflush = mlx4_en_qflush; dev->if_snd.ifq_maxlen = prof->tx_ring_size; /* * Initialize driver private data */ priv->dev = dev; priv->mdev = mdev; priv->prof = prof; priv->port = port; priv->port_up = false; priv->rx_csum = 1; priv->flags = prof->flags; priv->tx_ring_num = prof->tx_ring_num; priv->rx_ring_num = prof->rx_ring_num; priv->mac_index = -1; priv->msg_enable = MLX4_EN_MSG_LEVEL; priv->ip_reasm = priv->mdev->profile.ip_reasm; mtx_init(&priv->stats_lock.m, "mlx4 stats", NULL, MTX_DEF); mtx_init(&priv->vlan_lock.m, "mlx4 vlan", NULL, MTX_DEF); INIT_WORK(&priv->start_port_task, mlx4_en_lock_and_start_port); INIT_WORK(&priv->stop_port_task, mlx4_en_lock_and_stop_port); INIT_WORK(&priv->mcast_task, mlx4_en_do_set_multicast); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); callout_init(&priv->watchdog_timer, 1); /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; priv->mac = mdev->dev->caps.def_mac[priv->port]; if (ILLEGAL_MAC(priv->mac)) { en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n", priv->port, priv->mac); err = -EINVAL; goto out; } mlx4_en_sysctl_conf(priv); err = mlx4_en_alloc_resources(priv); if (err) goto out; /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); if (err) { en_err(priv, "Failed to allocate page for rx qps\n"); goto out; } priv->allocated = 1; /* * Set driver features */ dev->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM; dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; if (mdev->LSO_support) dev->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO; if (mdev->profile.num_lro) dev->if_capabilities |= IFCAP_LRO; dev->if_capenable = dev->if_capabilities; /* * Setup wake-on-lan. */ #if 0 if (priv->mdev->dev->caps.wol) { u64 config; if (mlx4_wol_read(priv->mdev->dev, &config, priv->port) == 0) { if (config & MLX4_EN_WOL_MAGIC) dev->if_capabilities |= IFCAP_WOL_MAGIC; if (config & MLX4_EN_WOL_ENABLED) dev->if_capenable |= IFCAP_WOL_MAGIC; } } #endif /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); mdev->pndev[priv->port] = dev; priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN; if_link_state_change(dev, LINK_STATE_DOWN); /* Set default MAC */ for (i = 0; i < ETHER_ADDR_LEN; i++) dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i)); ether_ifattach(dev, dev_addr); ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, mlx4_en_media_change, mlx4_en_media_status); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->registered = 1; queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); return 0; out: mlx4_en_destroy_netdev(dev); return err; } Index: head/sys/ofed/include/rdma/ib_addr.h =================================================================== --- head/sys/ofed/include/rdma/ib_addr.h (revision 263101) +++ head/sys/ofed/include/rdma/ib_addr.h (revision 263102) @@ -1,318 +1,312 @@ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #if !defined(IB_ADDR_H) #define IB_ADDR_H #include #include #include #include #include #include #include #include #include struct rdma_addr_client { atomic_t refcount; struct completion comp; }; /** * rdma_addr_register_client - Register an address client. */ void rdma_addr_register_client(struct rdma_addr_client *client); /** * rdma_addr_unregister_client - Deregister an address client. * @client: Client object to deregister. */ void rdma_addr_unregister_client(struct rdma_addr_client *client); struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; unsigned short dev_type; int bound_dev_if; enum rdma_transport_type transport; }; /** * rdma_translate_ip - Translate a local IP address to an RDMA hardware * address. */ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr); /** * rdma_resolve_ip - Resolve source and destination IP addresses to * RDMA hardware addresses. * @client: Address client associated with request. * @src_addr: An optional source address to use in the resolution. If a * source address is not provided, a usable address will be returned via * the callback. * @dst_addr: The destination address to resolve. * @addr: A reference to a data location that will receive the resolved * addresses. The data location must remain valid until the callback has * been invoked. * @timeout_ms: Amount of time to wait for the address resolution to complete. * @callback: Call invoked once address resolution has completed, timed out, * or been canceled. A status of 0 indicates success. * @context: User-specified context associated with the call. */ int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), void *context); void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr); static inline int ip_addr_size(struct sockaddr *addr) { return addr->sa_family == AF_INET6 ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in); } static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; } static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey) { dev_addr->broadcast[8] = pkey >> 8; dev_addr->broadcast[9] = (unsigned char) pkey; } static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid) { memset(gid->raw, 0, 16); *((u32 *)gid->raw) = cpu_to_be32(0xfe800000); if (vid < 0x1000) { gid->raw[12] = vid & 0xff; gid->raw[11] = vid >> 8; } else { gid->raw[12] = 0xfe; gid->raw[11] = 0xff; } memcpy(gid->raw + 13, mac + 3, 3); memcpy(gid->raw + 8, mac, 3); gid->raw[8] ^= 2; } static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) { #ifdef __linux__ return dev->priv_flags & IFF_802_1Q_VLAN ? vlan_dev_vlan_id(dev) : 0xffff; #else uint16_t tag; if (VLAN_TAG(__DECONST(struct ifnet *, dev), &tag) != 0) return 0xffff; return tag; #endif } static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { struct net_device *dev; u16 vid = 0xffff; dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (dev) { vid = rdma_vlan_dev_vlan_id(dev); dev_put(dev); } iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid); } static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { if (dev_addr->transport == RDMA_TRANSPORT_IB && dev_addr->dev_type != ARPHRD_INFINIBAND) iboe_addr_get_sgid(dev_addr, gid); else memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline enum ib_mtu iboe_get_mtu(int mtu) { /* * reduce IB headers from effective IBoE MTU. 28 stands for * atomic header which is the biggest possible header after BTH */ mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048)) return IB_MTU_2048; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024)) return IB_MTU_1024; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512)) return IB_MTU_512; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256)) return IB_MTU_256; else return 0; } #ifdef __linux__ static inline int iboe_get_rate(struct net_device *dev) { struct ethtool_cmd cmd; if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings || dev->ethtool_ops->get_settings(dev, &cmd)) return IB_RATE_PORT_CURRENT; if (cmd.speed >= 40000) return IB_RATE_40_GBPS; else if (cmd.speed >= 30000) return IB_RATE_30_GBPS; else if (cmd.speed >= 20000) return IB_RATE_20_GBPS; else if (cmd.speed >= 10000) return IB_RATE_10_GBPS; else return IB_RATE_PORT_CURRENT; } #else static inline int iboe_get_rate(struct net_device *dev) { - uintmax_t baudrate; - int exp; - - baudrate = dev->if_baudrate; - for (exp = dev->if_baudrate_pf; exp > 0; exp--) - baudrate *= 10; - if (baudrate >= IF_Gbps(40)) + if (dev->if_baudrate >= IF_Gbps(40)) return IB_RATE_40_GBPS; - else if (baudrate >= IF_Gbps(30)) + else if (dev->if_baudrate >= IF_Gbps(30)) return IB_RATE_30_GBPS; - else if (baudrate >= IF_Gbps(20)) + else if (dev->if_baudrate >= IF_Gbps(20)) return IB_RATE_20_GBPS; - else if (baudrate >= IF_Gbps(10)) + else if (dev->if_baudrate >= IF_Gbps(10)) return IB_RATE_10_GBPS; else return IB_RATE_PORT_CURRENT; } #endif static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) && addr->s6_addr32[1] == 0) return 1; return 0; } static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) { memcpy(mac, &addr->s6_addr[8], 3); memcpy(mac + 3, &addr->s6_addr[13], 3); mac[0] ^= 2; } static inline int rdma_is_multicast_addr(struct in6_addr *addr) { return addr->s6_addr[0] == 0xff; } static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) { int i; mac[0] = 0x33; mac[1] = 0x33; for (i = 2; i < 6; ++i) mac[i] = addr->s6_addr[i + 10]; } static inline u16 rdma_get_vlan_id(union ib_gid *dgid) { u16 vid; vid = dgid->raw[11] << 8 | dgid->raw[12]; return vid < 0x1000 ? vid : 0xffff; } static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) { #ifdef __linux__ return dev->priv_flags & IFF_802_1Q_VLAN ? vlan_dev_real_dev(dev) : 0; #else return VLAN_TRUNKDEV(__DECONST(struct ifnet *, dev)); #endif } #endif /* IB_ADDR_H */ Index: head/sys/sys/param.h =================================================================== --- head/sys/sys/param.h (revision 263101) +++ head/sys/sys/param.h (revision 263102) @@ -1,347 +1,347 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)param.h 8.3 (Berkeley) 4/4/95 * $FreeBSD$ */ #ifndef _SYS_PARAM_H_ #define _SYS_PARAM_H_ #include #define BSD 199506 /* System version (year & month). */ #define BSD4_3 1 #define BSD4_4 1 /* * __FreeBSD_version numbers are documented in the Porter's Handbook. * If you bump the version for any reason, you should update the documentation * there. * Currently this lives here in the doc/ repository: * * head/en_US.ISO8859-1/books/porters-handbook/book.xml * * scheme is: Rxx * 'R' is in the range 0 to 4 if this is a release branch or * x.0-CURRENT before RELENG_*_0 is created, otherwise 'R' is * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1100010 /* Master, propagated to newvers */ +#define __FreeBSD_version 1100011 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, * which by definition is always true on FreeBSD. This macro is also defined * on other systems that use the kernel of FreeBSD, such as GNU/kFreeBSD. * * It is tempting to use this macro in userland code when we want to enable * kernel-specific routines, and in fact it's fine to do this in code that * is part of FreeBSD itself. However, be aware that as presence of this * macro is still not widespread (e.g. older FreeBSD versions, 3rd party * compilers, etc), it is STRONGLY DISCOURAGED to check for this macro in * external applications without also checking for __FreeBSD__ as an * alternative. */ #undef __FreeBSD_kernel__ #define __FreeBSD_kernel__ #ifdef _KERNEL #define P_OSREL_SIGWAIT 700000 #define P_OSREL_SIGSEGV 700004 #define P_OSREL_MAP_ANON 800104 #define P_OSREL_MAJOR(x) ((x) / 100000) #endif #ifndef LOCORE #include #endif /* * Machine-independent constants (some used in following include files). * Redefined constants are from POSIX 1003.1 limits file. * * MAXCOMLEN should be >= sizeof(ac_comm) (see ) */ #include #define MAXCOMLEN 19 /* max command name remembered */ #define MAXINTERP PATH_MAX /* max interpreter file name length */ #define MAXLOGNAME 33 /* max login name length (incl. NUL) */ #define MAXUPRC CHILD_MAX /* max simultaneous processes */ #define NCARGS ARG_MAX /* max bytes for an exec function */ #define NGROUPS (NGROUPS_MAX+1) /* max number groups */ #define NOFILE OPEN_MAX /* max open files per process */ #define NOGROUP 65535 /* marker for empty group set member */ #define MAXHOSTNAMELEN 256 /* max hostname size */ #define SPECNAMELEN 63 /* max length of devicename */ /* More types and definitions used throughout the kernel. */ #ifdef _KERNEL #include #include #ifndef LOCORE #include #include #endif #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif #endif #ifndef _KERNEL /* Signals. */ #include #endif /* Machine type dependent parameters. */ #include #ifndef _KERNEL #include #endif #ifndef DEV_BSHIFT #define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */ #endif #define DEV_BSIZE (1<>PAGE_SHIFT) #endif /* * btodb() is messy and perhaps slow because `bytes' may be an off_t. We * want to shift an unsigned type to avoid sign extension and we don't * want to widen `bytes' unnecessarily. Assume that the result fits in * a daddr_t. */ #ifndef btodb #define btodb(bytes) /* calculates (bytes / DEV_BSIZE) */ \ (sizeof (bytes) > sizeof(long) \ ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \ : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT)) #endif #ifndef dbtob #define dbtob(db) /* calculates (db * DEV_BSIZE) */ \ ((off_t)(db) << DEV_BSHIFT) #endif #define PRIMASK 0x0ff #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PDROP 0x200 /* OR'd with pri to stop re-entry of interlock mutex */ #define NZERO 0 /* default "nice" */ #define NBBY 8 /* number of bits in a byte */ #define NBPW sizeof(int) /* number of bytes per word (integer) */ #define CMASK 022 /* default file mask: S_IWGRP|S_IWOTH */ #define NODEV (dev_t)(-1) /* non-existent device */ /* * File system parameters and macros. * * MAXBSIZE - Filesystems are made out of blocks of at most MAXBSIZE bytes * per block. MAXBSIZE may be made larger without effecting * any existing filesystems as long as it does not exceed MAXPHYS, * and may be made smaller at the risk of not being able to use * filesystems which require a block size exceeding MAXBSIZE. * * BKVASIZE - Nominal buffer space per buffer, in bytes. BKVASIZE is the * minimum KVM memory reservation the kernel is willing to make. * Filesystems can of course request smaller chunks. Actual * backing memory uses a chunk size of a page (PAGE_SIZE). * * If you make BKVASIZE too small you risk seriously fragmenting * the buffer KVM map which may slow things down a bit. If you * make it too big the kernel will not be able to optimally use * the KVM memory reserved for the buffer cache and will wind * up with too-few buffers. * * The default is 16384, roughly 2x the block size used by a * normal UFS filesystem. */ #define MAXBSIZE 65536 /* must be power of 2 */ #define BKVASIZE 16384 /* must be power of 2 */ #define BKVAMASK (BKVASIZE-1) /* * MAXPATHLEN defines the longest permissible path length after expanding * symbolic links. It is used to allocate a temporary buffer from the buffer * pool in which to do the name expansion, hence should be a power of two, * and must be less than or equal to MAXBSIZE. MAXSYMLINKS defines the * maximum number of symbolic links that may be expanded in a path name. * It should be set high enough to allow all legitimate uses, but halt * infinite loops reasonably quickly. */ #define MAXPATHLEN PATH_MAX #define MAXSYMLINKS 32 /* Bit map related macros. */ #define setbit(a,i) (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY)) #define clrbit(a,i) (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY))) #define isset(a,i) \ (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) #define isclr(a,i) \ ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0) /* Macros for counting and rounding. */ #ifndef howmany #define howmany(x, y) (((x)+((y)-1))/(y)) #endif #define nitems(x) (sizeof((x)) / sizeof((x)[0])) #define rounddown(x, y) (((x)/(y))*(y)) #define rounddown2(x, y) ((x)&(~((y)-1))) /* if y is power of two */ #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */ #define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ #define powerof2(x) ((((x)-1)&(x))==0) /* Macros for min/max. */ #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) #ifdef _KERNEL /* * Basic byte order function prototypes for non-inline functions. */ #ifndef LOCORE #ifndef _BYTEORDER_PROTOTYPED #define _BYTEORDER_PROTOTYPED __BEGIN_DECLS __uint32_t htonl(__uint32_t); __uint16_t htons(__uint16_t); __uint32_t ntohl(__uint32_t); __uint16_t ntohs(__uint16_t); __END_DECLS #endif #endif #ifndef lint #ifndef _BYTEORDER_FUNC_DEFINED #define _BYTEORDER_FUNC_DEFINED #define htonl(x) __htonl(x) #define htons(x) __htons(x) #define ntohl(x) __ntohl(x) #define ntohs(x) __ntohs(x) #endif /* !_BYTEORDER_FUNC_DEFINED */ #endif /* lint */ #endif /* _KERNEL */ /* * Scale factor for scaled integers used to count %cpu time and load avgs. * * The number of CPU `tick's that map to a unique `%age' can be expressed * by the formula (1 / (2 ^ (FSHIFT - 11))). The maximum load average that * can be calculated (assuming 32 bits) can be closely approximated using * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15). * * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age', * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024. */ #define FSHIFT 11 /* bits to right of fixed binary point */ #define FSCALE (1<> (PAGE_SHIFT - DEV_BSHIFT)) #define ctodb(db) /* calculates pages to devblks */ \ ((db) << (PAGE_SHIFT - DEV_BSHIFT)) /* * Old spelling of __containerof(). */ #define member2struct(s, m, x) \ ((struct s *)(void *)((char *)(x) - offsetof(struct s, m))) /* * Access a variable length array that has been declared as a fixed * length array. */ #define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset]) #endif /* _SYS_PARAM_H_ */ Index: head/tools/tools/ifinfo/ifinfo.c =================================================================== --- head/tools/tools/ifinfo/ifinfo.c (revision 263101) +++ head/tools/tools/ifinfo/ifinfo.c (revision 263102) @@ -1,310 +1,308 @@ /* * Copyright 1996 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include /* for PF_LINK */ #include #include #include #include #include #include #include #include #include #include #include #include #include "ifinfo.h" static void printit(const struct ifmibdata *, const char *); static const char *iftype(int); static const char *ifphys(int, int); static int isit(int, char **, const char *); static printfcn findlink(int); static void usage(const char *argv0) { fprintf(stderr, "%s: usage:\n\t%s [-l]\n", argv0, argv0); exit(EX_USAGE); } int main(int argc, char **argv) { int i, maxifno, retval; struct ifmibdata ifmd; int name[6]; size_t len; int c; int dolink = 0; void *linkmib; size_t linkmiblen; printfcn pf; char *dname; while ((c = getopt(argc, argv, "l")) != -1) { switch(c) { case 'l': dolink = 1; break; default: usage(argv[0]); } } retval = 1; name[0] = CTL_NET; name[1] = PF_LINK; name[2] = NETLINK_GENERIC; name[3] = IFMIB_SYSTEM; name[4] = IFMIB_IFCOUNT; len = sizeof maxifno; if (sysctl(name, 5, &maxifno, &len, 0, 0) < 0) err(EX_OSERR, "sysctl(net.link.generic.system.ifcount)"); for (i = 1; i <= maxifno; i++) { len = sizeof ifmd; name[3] = IFMIB_IFDATA; name[4] = i; name[5] = IFDATA_GENERAL; if (sysctl(name, 6, &ifmd, &len, 0, 0) < 0) { if (errno == ENOENT) continue; err(EX_OSERR, "sysctl(net.link.ifdata.%d.general)", i); } if (!isit(argc - optind, argv + optind, ifmd.ifmd_name)) continue; dname = NULL; len = 0; name[5] = IFDATA_DRIVERNAME; if (sysctl(name, 6, NULL, &len, 0, 0) < 0) { warn("sysctl(net.link.ifdata.%d.drivername)", i); } else { if ((dname = malloc(len)) == NULL) err(EX_OSERR, NULL); if (sysctl(name, 6, dname, &len, 0, 0) < 0) { warn("sysctl(net.link.ifdata.%d.drivername)", i); free(dname); dname = NULL; } } printit(&ifmd, dname); free(dname); if (dolink && (pf = findlink(ifmd.ifmd_data.ifi_type))) { name[5] = IFDATA_LINKSPECIFIC; if (sysctl(name, 6, 0, &linkmiblen, 0, 0) < 0) err(EX_OSERR, "sysctl(net.link.ifdata.%d.linkspec) size", i); linkmib = malloc(linkmiblen); if (!linkmib) err(EX_OSERR, "malloc(%lu)", (u_long)linkmiblen); if (sysctl(name, 6, linkmib, &linkmiblen, 0, 0) < 0) err(EX_OSERR, "sysctl(net.link.ifdata.%d.linkspec)", i); pf(linkmib, linkmiblen); free(linkmib); } retval = 0; } return retval; } static void printit(const struct ifmibdata *ifmd, const char *dname) { printf("Interface %.*s", IFNAMSIZ, ifmd->ifmd_name); if (dname != NULL) printf(" (%s)", dname); printf(":\n"); printf("\tflags: %x\n", ifmd->ifmd_flags); printf("\tpromiscuous listeners: %d\n", ifmd->ifmd_pcount); printf("\tsend queue length: %d\n", ifmd->ifmd_snd_len); printf("\tsend queue max length: %d\n", ifmd->ifmd_snd_maxlen); printf("\tsend queue drops: %d\n", ifmd->ifmd_snd_drops); printf("\ttype: %s\n", iftype(ifmd->ifmd_data.ifi_type)); printf("\tphysical: %s\n", ifphys(ifmd->ifmd_data.ifi_type, ifmd->ifmd_data.ifi_physical)); printf("\taddress length: %d\n", ifmd->ifmd_data.ifi_addrlen); printf("\theader length: %d\n", ifmd->ifmd_data.ifi_hdrlen); printf("\tlink state: %u\n", ifmd->ifmd_data.ifi_link_state); printf("\tvhid: %u\n", ifmd->ifmd_data.ifi_vhid); - printf("\tbaudrate power factor: %u\n", - ifmd->ifmd_data.ifi_baudrate_pf); printf("\tdatalen: %u\n", ifmd->ifmd_data.ifi_datalen); printf("\tmtu: %lu\n", ifmd->ifmd_data.ifi_mtu); printf("\tmetric: %lu\n", ifmd->ifmd_data.ifi_metric); printf("\tline rate: %lu bit/s\n", ifmd->ifmd_data.ifi_baudrate); printf("\tpackets received: %lu\n", ifmd->ifmd_data.ifi_ipackets); printf("\tinput errors: %lu\n", ifmd->ifmd_data.ifi_ierrors); printf("\tpackets transmitted: %lu\n", ifmd->ifmd_data.ifi_opackets); printf("\toutput errors: %lu\n", ifmd->ifmd_data.ifi_oerrors); printf("\tcollisions: %lu\n", ifmd->ifmd_data.ifi_collisions); printf("\tbytes received: %lu\n", ifmd->ifmd_data.ifi_ibytes); printf("\tbytes transmitted: %lu\n", ifmd->ifmd_data.ifi_obytes); printf("\tmulticasts received: %lu\n", ifmd->ifmd_data.ifi_imcasts); printf("\tmulticasts transmitted: %lu\n", ifmd->ifmd_data.ifi_omcasts); printf("\tinput queue drops: %lu\n", ifmd->ifmd_data.ifi_iqdrops); printf("\tpackets for unknown protocol: %lu\n", ifmd->ifmd_data.ifi_noproto); printf("\tHW offload capabilities: 0x%lx\n", ifmd->ifmd_data.ifi_hwassist); printf("\tuptime at attach or stat reset: %lu\n", ifmd->ifmd_data.ifi_epoch); #ifdef notdef printf("\treceive timing: %lu usec\n", ifmd->ifmd_data.ifi_recvtiming); printf("\ttransmit timing: %lu usec\n", ifmd->ifmd_data.ifi_xmittiming); #endif } static const char *const if_types[] = { "reserved", "other", "BBN 1822", "HDH 1822", "X.25 DDN", "X.25", "Ethernet", "ISO 8802-3 CSMA/CD", "ISO 8802-4 Token Bus", "ISO 8802-5 Token Ring", "ISO 8802-6 DQDB MAN", "StarLAN", "Proteon proNET-10", "Proteon proNET-80", "HyperChannel", "FDDI", "LAP-B", "SDLC", "T-1", "CEPT", "Basic rate ISDN", "Primary rate ISDN", "Proprietary P2P", "PPP", "Loopback", "ISO CLNP over IP", "Experimental Ethernet", "XNS over IP", "SLIP", "Ultra Technologies", "DS-3", "SMDS", "Frame Relay", "RS-232 serial", "Parallel printer port", "ARCNET", "ARCNET+", "ATM", "MIOX25", "SONET/SDH", "X25PLE", "ISO 8802-2 LLC", "LocalTalk", "SMDSDXI", "Frame Relay DCE", "V.35", "HSSI", "HIPPI", "Generic Modem", "ATM AAL5", "SONETPATH", "SONETVT", "SMDS InterCarrier Interface", "Proprietary virtual interface", "Proprietary multiplexing", "Generic tunnel interface", "IPv6-to-IPv4 TCP relay capturing interface", "6to4 tunnel interface" }; #define NIFTYPES ((sizeof if_types)/(sizeof if_types[0])) static const char * iftype(int type) { static char buf[256]; if (type <= 0 || type >= NIFTYPES) { sprintf(buf, "unknown type %d", type); return buf; } return if_types[type]; } static const char * ifphys(int type, int phys) { static char buf[256]; sprintf(buf, "unknown physical %d", phys); return buf; } static int isit(int argc, char **argv, const char *name) { if (argc == 0) return 1; for (argc = 0; argv[argc]; argc++) { if (strncmp(argv[argc], name, IFNAMSIZ) == 0) return 1; } return 0; } static printfcn findlink(int type) { switch(type) { case IFT_ETHER: case IFT_ISO88023: case IFT_STARLAN: return print_1650; } return 0; } Index: head/usr.bin/netstat/if.c =================================================================== --- head/usr.bin/netstat/if.c (revision 263101) +++ head/usr.bin/netstat/if.c (revision 263102) @@ -1,594 +1,596 @@ /*- * Copyright (c) 2013 Gleb Smirnoff * Copyright (c) 1983, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)if.c 8.3 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PF #include #include #endif #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include #include "netstat.h" static void sidewaysintpr(int); #ifdef INET6 static char addr_buf[NI_MAXHOST]; /* for getnameinfo() */ #endif #ifdef PF static const char* pfsyncacts[] = { /* PFSYNC_ACT_CLR */ "clear all request", /* PFSYNC_ACT_INS */ "state insert", /* PFSYNC_ACT_INS_ACK */ "state inserted ack", /* PFSYNC_ACT_UPD */ "state update", /* PFSYNC_ACT_UPD_C */ "compressed state update", /* PFSYNC_ACT_UPD_REQ */ "uncompressed state request", /* PFSYNC_ACT_DEL */ "state delete", /* PFSYNC_ACT_DEL_C */ "compressed state delete", /* PFSYNC_ACT_INS_F */ "fragment insert", /* PFSYNC_ACT_DEL_F */ "fragment delete", /* PFSYNC_ACT_BUS */ "bulk update mark", /* PFSYNC_ACT_TDB */ "TDB replay counter update", /* PFSYNC_ACT_EOF */ "end of frame mark", }; static void pfsync_acts_stats(const char *fmt, uint64_t *a) { int i; for (i = 0; i < PFSYNC_ACT_MAX; i++, a++) if (*a || sflag <= 1) printf(fmt, *a, pfsyncacts[i], plural(*a)); } /* * Dump pfsync statistics structure. */ void pfsync_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct pfsyncstats pfsyncstat, zerostat; size_t len = sizeof(struct pfsyncstats); if (live) { if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.pfsync.stats", &pfsyncstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { if (errno != ENOENT) warn("sysctl: net.pfsync.stats"); return; } } else kread(off, &pfsyncstat, len); printf("%s:\n", name); #define p(f, m) if (pfsyncstat.f || sflag <= 1) \ printf(m, (uintmax_t)pfsyncstat.f, plural(pfsyncstat.f)) p(pfsyncs_ipackets, "\t%ju packet%s received (IPv4)\n"); p(pfsyncs_ipackets6, "\t%ju packet%s received (IPv6)\n"); pfsync_acts_stats("\t %ju %s%s received\n", &pfsyncstat.pfsyncs_iacts[0]); p(pfsyncs_badif, "\t\t%ju packet%s discarded for bad interface\n"); p(pfsyncs_badttl, "\t\t%ju packet%s discarded for bad ttl\n"); p(pfsyncs_hdrops, "\t\t%ju packet%s shorter than header\n"); p(pfsyncs_badver, "\t\t%ju packet%s discarded for bad version\n"); p(pfsyncs_badauth, "\t\t%ju packet%s discarded for bad HMAC\n"); p(pfsyncs_badact,"\t\t%ju packet%s discarded for bad action\n"); p(pfsyncs_badlen, "\t\t%ju packet%s discarded for short packet\n"); p(pfsyncs_badval, "\t\t%ju state%s discarded for bad values\n"); p(pfsyncs_stale, "\t\t%ju stale state%s\n"); p(pfsyncs_badstate, "\t\t%ju failed state lookup/insert%s\n"); p(pfsyncs_opackets, "\t%ju packet%s sent (IPv4)\n"); p(pfsyncs_opackets6, "\t%ju packet%s sent (IPv6)\n"); pfsync_acts_stats("\t %ju %s%s sent\n", &pfsyncstat.pfsyncs_oacts[0]); p(pfsyncs_onomem, "\t\t%ju failure%s due to mbuf memory error\n"); p(pfsyncs_oerrors, "\t\t%ju send error%s\n"); #undef p } #endif /* PF */ /* * Display a formatted value, or a '-' in the same space. */ static void show_stat(const char *fmt, int width, u_long value, short showvalue) { const char *lsep, *rsep; char newfmt[32]; lsep = ""; if (strncmp(fmt, "LS", 2) == 0) { lsep = " "; fmt += 2; } rsep = " "; if (strncmp(fmt, "NRS", 3) == 0) { rsep = ""; fmt += 3; } if (showvalue == 0) { /* Print just dash. */ sprintf(newfmt, "%s%%%ds%s", lsep, width, rsep); printf(newfmt, "-"); return; } if (hflag) { char buf[5]; /* Format in human readable form. */ humanize_number(buf, sizeof(buf), (int64_t)value, "", HN_AUTOSCALE, HN_NOSPACE | HN_DECIMAL); sprintf(newfmt, "%s%%%ds%s", lsep, width, rsep); printf(newfmt, buf); } else { /* Construct the format string. */ sprintf(newfmt, "%s%%%d%s%s", lsep, width, fmt, rsep); printf(newfmt, value); } } /* * Find next multiaddr for a given interface name. */ static struct ifmaddrs * next_ifma(struct ifmaddrs *ifma, const char *name, const sa_family_t family) { for(; ifma != NULL; ifma = ifma->ifma_next) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)ifma->ifma_name; if (ifma->ifma_addr->sa_family == family && strcmp(sdl->sdl_data, name) == 0) break; } return (ifma); } /* * Print a description of the network interfaces. */ void intpr(int interval, void (*pfunc)(char *), int af) { struct ifaddrs *ifap, *ifa; struct ifmaddrs *ifmap, *ifma; if (interval) return sidewaysintpr(interval); if (getifaddrs(&ifap) != 0) err(EX_OSERR, "getifaddrs"); if (aflag && getifmaddrs(&ifmap) != 0) err(EX_OSERR, "getifmaddrs"); if (!pfunc) { if (Wflag) printf("%-7.7s", "Name"); else printf("%-5.5s", "Name"); printf(" %5.5s %-13.13s %-17.17s %8.8s %5.5s %5.5s", "Mtu", "Network", "Address", "Ipkts", "Ierrs", "Idrop"); if (bflag) printf(" %10.10s","Ibytes"); printf(" %8.8s %5.5s", "Opkts", "Oerrs"); if (bflag) printf(" %10.10s","Obytes"); printf(" %5s", "Coll"); if (dflag) printf(" %s", "Drop"); putchar('\n'); } for (ifa = ifap; ifa; ifa = ifa->ifa_next) { bool network = false, link = false; if (interface != NULL && strcmp(ifa->ifa_name, interface) != 0) continue; if (pfunc) { char *name; name = ifa->ifa_name; (*pfunc)(name); /* * Skip all ifaddrs belonging to same interface. */ while(ifa->ifa_next != NULL && (strcmp(ifa->ifa_next->ifa_name, name) == 0)) { ifa = ifa->ifa_next; } continue; } if (af != AF_UNSPEC && ifa->ifa_addr->sa_family != af) continue; if (Wflag) printf("%-7.7s", ifa->ifa_name); else printf("%-5.5s", ifa->ifa_name); #define IFA_MTU(ifa) (((struct if_data *)(ifa)->ifa_data)->ifi_mtu) show_stat("lu", 6, IFA_MTU(ifa), IFA_MTU(ifa)); #undef IFA_MTU switch (ifa->ifa_addr->sa_family) { case AF_UNSPEC: printf("%-13.13s ", "none"); printf("%-15.15s ", "none"); break; case AF_INET: { struct sockaddr_in *sin, *mask; sin = (struct sockaddr_in *)ifa->ifa_addr; mask = (struct sockaddr_in *)ifa->ifa_netmask; printf("%-13.13s ", netname(sin->sin_addr.s_addr, mask->sin_addr.s_addr)); printf("%-17.17s ", routename(sin->sin_addr.s_addr)); network = true; break; } #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6, *mask; sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; mask = (struct sockaddr_in6 *)ifa->ifa_netmask; printf("%-13.13s ", netname6(sin6, &mask->sin6_addr)); getnameinfo(ifa->ifa_addr, ifa->ifa_addr->sa_len, addr_buf, sizeof(addr_buf), 0, 0, NI_NUMERICHOST); printf("%-17.17s ", addr_buf); network = 1; break; } #endif /* INET6 */ case AF_IPX: { struct sockaddr_ipx *sipx; u_long net; char netnum[10]; sipx = (struct sockaddr_ipx *)ifa->ifa_addr; *(union ipx_net *) &net = sipx->sipx_addr.x_net; sprintf(netnum, "%lx", (u_long)ntohl(net)); printf("ipx:%-8s ", netnum); printf("%-17s ", ipx_phost((struct sockaddr *)sipx)); network = 1; break; } case AF_APPLETALK: printf("atalk:%-12.12s ", atalk_print(ifa->ifa_addr, 0x10)); printf("%-11.11s ", atalk_print(ifa->ifa_addr, 0x0b)); break; case AF_LINK: { struct sockaddr_dl *sdl; char *cp, linknum[10]; int n, m; sdl = (struct sockaddr_dl *)ifa->ifa_addr; cp = (char *)LLADDR(sdl); n = sdl->sdl_alen; sprintf(linknum, "", sdl->sdl_index); m = printf("%-13.13s ", linknum); while ((--n >= 0) && (m < 30)) m += printf("%02x%c", *cp++ & 0xff, n > 0 ? ':' : ' '); m = 32 - m; while (m-- > 0) putchar(' '); link = 1; break; } } #define IFA_STAT(s) (((struct if_data *)ifa->ifa_data)->ifi_ ## s) show_stat("lu", 8, IFA_STAT(ipackets), link|network); show_stat("lu", 5, IFA_STAT(ierrors), link); show_stat("lu", 5, IFA_STAT(iqdrops), link); if (bflag) show_stat("lu", 10, IFA_STAT(ibytes), link|network); show_stat("lu", 8, IFA_STAT(opackets), link|network); show_stat("lu", 5, IFA_STAT(oerrors), link); if (bflag) show_stat("lu", 10, IFA_STAT(obytes), link|network); show_stat("NRSlu", 5, IFA_STAT(collisions), link); /* XXXGL: output queue drops */ putchar('\n'); if (!aflag) continue; /* * Print family's multicast addresses. */ for (ifma = next_ifma(ifmap, ifa->ifa_name, ifa->ifa_addr->sa_family); ifma != NULL; ifma = next_ifma(ifma, ifa->ifa_name, ifa->ifa_addr->sa_family)) { const char *fmt = NULL; switch (ifma->ifma_addr->sa_family) { case AF_INET: { struct sockaddr_in *sin; sin = (struct sockaddr_in *)ifma->ifma_addr; fmt = routename(sin->sin_addr.s_addr); break; } #ifdef INET6 case AF_INET6: /* in6_fillscopeid(&msa.in6); */ getnameinfo(ifma->ifma_addr, ifma->ifma_addr->sa_len, addr_buf, sizeof(addr_buf), 0, 0, NI_NUMERICHOST); printf("%*s %s\n", Wflag ? 27 : 25, "", addr_buf); break; #endif /* INET6 */ case AF_LINK: { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)ifma->ifma_addr; switch (sdl->sdl_type) { case IFT_ETHER: case IFT_FDDI: fmt = ether_ntoa( (struct ether_addr *)LLADDR(sdl)); break; } break; } } if (fmt) { printf("%*s %-17.17s", Wflag ? 27 : 25, "", fmt); if (ifma->ifma_addr->sa_family == AF_LINK) { - printf(" %8lu", IFA_STAT(imcasts)); + printf(" %8ju", + (uintmax_t )IFA_STAT(imcasts)); printf("%*s", bflag ? 17 : 6, ""); - printf(" %8lu", IFA_STAT(omcasts)); + printf(" %8ju", + (uintmax_t )IFA_STAT(omcasts)); } putchar('\n'); } ifma = ifma->ifma_next; } } freeifaddrs(ifap); if (aflag) freeifmaddrs(ifmap); } struct iftot { u_long ift_ip; /* input packets */ u_long ift_ie; /* input errors */ u_long ift_id; /* input drops */ u_long ift_op; /* output packets */ u_long ift_oe; /* output errors */ u_long ift_co; /* collisions */ u_long ift_ib; /* input bytes */ u_long ift_ob; /* output bytes */ }; /* * Obtain stats for interface(s). */ static void fill_iftot(struct iftot *st) { struct ifaddrs *ifap, *ifa; bool found = false; if (getifaddrs(&ifap) != 0) err(EX_OSERR, "getifaddrs"); bzero(st, sizeof(*st)); for (ifa = ifap; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; if (interface) { if (strcmp(ifa->ifa_name, interface) == 0) found = true; else continue; } st->ift_ip += IFA_STAT(ipackets); st->ift_ie += IFA_STAT(ierrors); st->ift_id += IFA_STAT(iqdrops); st->ift_ib += IFA_STAT(ibytes); st->ift_op += IFA_STAT(opackets); st->ift_oe += IFA_STAT(oerrors); st->ift_ob += IFA_STAT(obytes); st->ift_co += IFA_STAT(collisions); } if (interface && found == false) err(EX_DATAERR, "interface %s not found", interface); freeifaddrs(ifap); } /* * Set a flag to indicate that a signal from the periodic itimer has been * caught. */ static sig_atomic_t signalled; static void catchalarm(int signo __unused) { signalled = true; } /* * Print a running summary of interface statistics. * Repeat display every interval seconds, showing statistics * collected over that interval. Assumes that interval is non-zero. * First line printed at top of screen is always cumulative. */ static void sidewaysintpr(int interval) { struct iftot ift[2], *new, *old; struct itimerval interval_it; int oldmask, line; new = &ift[0]; old = &ift[1]; fill_iftot(old); (void)signal(SIGALRM, catchalarm); signalled = false; interval_it.it_interval.tv_sec = interval; interval_it.it_interval.tv_usec = 0; interval_it.it_value = interval_it.it_interval; setitimer(ITIMER_REAL, &interval_it, NULL); banner: printf("%17s %14s %16s", "input", interface != NULL ? interface : "(Total)", "output"); putchar('\n'); printf("%10s %5s %5s %10s %10s %5s %10s %5s", "packets", "errs", "idrops", "bytes", "packets", "errs", "bytes", "colls"); if (dflag) printf(" %5.5s", "drops"); putchar('\n'); fflush(stdout); line = 0; loop: if ((noutputs != 0) && (--noutputs == 0)) exit(0); oldmask = sigblock(sigmask(SIGALRM)); while (!signalled) sigpause(0); signalled = false; sigsetmask(oldmask); line++; fill_iftot(new); show_stat("lu", 10, new->ift_ip - old->ift_ip, 1); show_stat("lu", 5, new->ift_ie - old->ift_ie, 1); show_stat("lu", 5, new->ift_id - old->ift_id, 1); show_stat("lu", 10, new->ift_ib - old->ift_ib, 1); show_stat("lu", 10, new->ift_op - old->ift_op, 1); show_stat("lu", 5, new->ift_oe - old->ift_oe, 1); show_stat("lu", 10, new->ift_ob - old->ift_ob, 1); show_stat("NRSlu", 5, new->ift_co - old->ift_co, 1); /* XXXGL: output queue drops */ putchar('\n'); fflush(stdout); if (new == &ift[0]) { new = &ift[1]; old = &ift[0]; } else { new = &ift[0]; old = &ift[1]; } if (line == 21) goto banner; else goto loop; /* NOTREACHED */ }