Index: head/sys/dev/lmc/if_lmc.c =================================================================== --- head/sys/dev/lmc/if_lmc.c (revision 170034) +++ head/sys/dev/lmc/if_lmc.c (revision 170035) @@ -1,7031 +1,7031 @@ /* * $FreeBSD$ * * Copyright (c) 2002-2004 David Boggs. * All rights reserved. * * BSD License: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * GNU General Public License: * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 * Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Description: * * This is an open-source Unix device driver for PCI-bus WAN interface cards. * It sends and receives packets in HDLC frames over synchronous links. * A generic PC plus Unix plus some SBE/LMC cards makes an OPEN router. * This driver works with FreeBSD, NetBSD, OpenBSD, BSD/OS and Linux. * It has been tested on i386 (32-bit little-end), Sparc (64-bit big-end), * and Alpha (64-bit little-end) architectures. * * History and Authors: * * Ron Crane had the neat idea to use a Fast Ethernet chip as a PCI * interface and add an Ethernet-to-HDLC gate array to make a WAN card. * David Boggs designed the Ethernet-to-HDLC gate arrays and PC cards. * We did this at our company, LAN Media Corporation (LMC). - * SBE Corp aquired LMC and continues to make the cards. + * SBE Corp acquired LMC and continues to make the cards. * * Since the cards use Tulip Ethernet chips, we started with Matt Thomas' * ubiquitous "de" driver. Michael Graff stripped out the Ethernet stuff * and added HSSI stuff. Basil Gunn ported it to Solaris (lost) and * Rob Braun ported it to Linux. Andrew Stanley-Jones added support * for three more cards and wrote the first version of lmcconfig. * During 2002-5 David Boggs rewrote it and now feels responsible for it. * * Responsible Individual: * * Send bug reports and improvements to . */ #ifdef __FreeBSD__ # include /* OS version */ # define IFNET 1 # include "opt_inet.h" /* INET */ # include "opt_inet6.h" /* INET6 */ # include "opt_netgraph.h" /* NETGRAPH */ # ifdef HAVE_KERNEL_OPTION_HEADERS # include "opt_device_polling.h" /* DEVICE_POLLING */ # endif # ifndef NETGRAPH # define NETGRAPH 0 # endif # define P2P 0 /* not in FreeBSD */ # if (__FreeBSD_version >= 500000) # define NSPPP 1 /* No count devices in FreeBSD 5 */ # include "opt_bpf.h" /* DEV_BPF */ # define NBPFILTER DEV_BPF # else /* FreeBSD-4 */ # include "sppp.h" /* NSPPP */ # include "bpf.h" /* NBPF */ # define NBPFILTER NBPF # endif # define GEN_HDLC 0 /* not in FreeBSD */ # # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # if (__FreeBSD_version >= 700000) # include # endif # if (__FreeBSD_version >= 500000) # include # include # else /* FreeBSD-4 */ # include # include # endif # if NETGRAPH # include # include # endif # if (INET || INET6) # include # include # endif # if NSPPP # include # endif # if NBPFILTER # include # endif /* and finally... */ # include #endif /*__FreeBSD__*/ #ifdef __NetBSD__ # include /* OS version */ # define IFNET 1 # include "opt_inet.h" /* INET6, INET */ # define NETGRAPH 0 /* not in NetBSD */ # include "sppp.h" /* NSPPP */ # define P2P 0 /* not in NetBSD */ # include "opt_altq_enabled.h" /* ALTQ */ # include "bpfilter.h" /* NBPFILTER */ # define GEN_HDLC 0 /* not in NetBSD */ # # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # if (__NetBSD_Version__ >= 106000000) # include # else # include # endif # if (INET || INET6) # include # include # endif # if NSPPP # if (__NetBSD_Version__ >= 106000000) # include # else # include # endif # endif # if NBPFILTER # include # endif /* and finally... */ # include "if_lmc.h" #endif /*__NetBSD__*/ #ifdef __OpenBSD__ # include /* OS version */ # define IFNET 1 /* -DINET is passed on the compiler command line */ /* -DINET6 is passed on the compiler command line */ # define NETGRAPH 0 /* not in OpenBSD */ # include "sppp.h" /* NSPPP */ # define P2P 0 /* not in OpenBSD */ /* -DALTQ is passed on the compiler command line */ # include "bpfilter.h" /* NBPFILTER */ # define GEN_HDLC 0 /* not in OpenBSD */ # # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # if (OpenBSD >= 200206) # include # else # include # endif # if (INET || INET6) # include # include # endif # if NSPPP # include # endif # if NBPFILTER # include # endif /* and finally... */ # include "if_lmc.h" #endif /*__OpenBSD__*/ #ifdef __bsdi__ # include /* OS version */ # define IFNET 1 /* -DINET is passed on the compiler command line */ /* -DINET6 is passed on the compiler command line */ # define NETGRAPH 0 /* not in BSD/OS */ # define NSPPP 0 /* not in BSD/OS */ /* -DPPP is passed on the compiler command line */ /* -DCISCO_HDLC is passed on the compiler command line */ /* -DFR is passed on the compiler command line */ # if (PPP || CISCO_HDLC || FR) # define P2P 1 # else # define P2P 0 # endif # define ALTQ 0 /* not in BSD/OS */ # include "bpfilter.h" /* NBPFILTER */ # define GEN_HDLC 0 /* not in BSD/OS */ # # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # if (INET || INET6) # include # include # endif # if P2P # include # include # endif # if NBPFILTER # include # endif /* and finally... */ # include "if_lmc.h" #endif /*__bsdi__*/ #ifdef __linux__ # include # if (CONFIG_HDLC || CONFIG_HDLC_MODULE) # define GEN_HDLC 1 # else # define GEN_HDLC 0 # endif # define IFNET 0 /* different in Linux */ # define NETGRAPH 0 /* not in Linux */ # define NSPPP 0 /* different in Linux */ # define P2P 0 /* not in Linux */ # define ALTQ 0 /* different in Linux */ # define NBPFILTER 0 /* different in Linux */ # # include # include # include # include # if GEN_HDLC # include # endif /* and finally... */ # include "if_lmc.h" #endif /* __linux__ */ /* The SROM is a generic 93C46 serial EEPROM (64 words by 16 bits). */ /* Data is set up before the RISING edge of CLK; CLK is parked low. */ static void shift_srom_bits(softc_t *sc, u_int32_t data, u_int32_t len) { u_int32_t csr = READ_CSR(TLP_SROM_MII); for (; len>0; len--) { /* MSB first */ if (data & (1<<(len-1))) csr |= TLP_SROM_DIN; /* DIN setup */ else csr &= ~TLP_SROM_DIN; /* DIN setup */ WRITE_CSR(TLP_SROM_MII, csr); csr |= TLP_SROM_CLK; /* CLK rising edge */ WRITE_CSR(TLP_SROM_MII, csr); csr &= ~TLP_SROM_CLK; /* CLK falling edge */ WRITE_CSR(TLP_SROM_MII, csr); } } /* Data is sampled on the RISING edge of CLK; CLK is parked low. */ static u_int16_t read_srom(softc_t *sc, u_int8_t addr) { int i; u_int32_t csr; u_int16_t data; /* Enable SROM access. */ csr = (TLP_SROM_SEL | TLP_SROM_RD | TLP_MII_MDOE); WRITE_CSR(TLP_SROM_MII, csr); /* CS rising edge prepares SROM for a new cycle. */ csr |= TLP_SROM_CS; WRITE_CSR(TLP_SROM_MII, csr); /* assert CS */ shift_srom_bits(sc, 6, 4); /* issue read cmd */ shift_srom_bits(sc, addr, 6); /* issue address */ for (data=0, i=16; i>=0; i--) /* read ->17<- bits of data */ { /* MSB first */ csr = READ_CSR(TLP_SROM_MII); /* DOUT sampled */ data = (data<<1) | ((csr & TLP_SROM_DOUT) ? 1:0); csr |= TLP_SROM_CLK; /* CLK rising edge */ WRITE_CSR(TLP_SROM_MII, csr); csr &= ~TLP_SROM_CLK; /* CLK falling edge */ WRITE_CSR(TLP_SROM_MII, csr); } /* Disable SROM access. */ WRITE_CSR(TLP_SROM_MII, TLP_MII_MDOE); return data; } /* The SROM is formatted by the mfgr and should NOT be written! */ /* But lmcconfig can rewrite it in case it gets overwritten somehow. */ /* IOCTL SYSCALL: can sleep. */ static void write_srom(softc_t *sc, u_int8_t addr, u_int16_t data) { u_int32_t csr; int i; /* Enable SROM access. */ csr = (TLP_SROM_SEL | TLP_SROM_RD | TLP_MII_MDOE); WRITE_CSR(TLP_SROM_MII, csr); /* Issue write-enable command. */ csr |= TLP_SROM_CS; WRITE_CSR(TLP_SROM_MII, csr); /* assert CS */ shift_srom_bits(sc, 4, 4); /* issue write enable cmd */ shift_srom_bits(sc, 63, 6); /* issue address */ csr &= ~TLP_SROM_CS; WRITE_CSR(TLP_SROM_MII, csr); /* deassert CS */ /* Issue erase command. */ csr |= TLP_SROM_CS; WRITE_CSR(TLP_SROM_MII, csr); /* assert CS */ shift_srom_bits(sc, 7, 4); /* issue erase cmd */ shift_srom_bits(sc, addr, 6); /* issue address */ csr &= ~TLP_SROM_CS; WRITE_CSR(TLP_SROM_MII, csr); /* deassert CS */ /* Issue write command. */ csr |= TLP_SROM_CS; WRITE_CSR(TLP_SROM_MII, csr); /* assert CS */ for (i=0; i<10; i++) /* 100 ms max wait */ if ((READ_CSR(TLP_SROM_MII) & TLP_SROM_DOUT)==0) SLEEP(10000); shift_srom_bits(sc, 5, 4); /* issue write cmd */ shift_srom_bits(sc, addr, 6); /* issue address */ shift_srom_bits(sc, data, 16); /* issue data */ csr &= ~TLP_SROM_CS; WRITE_CSR(TLP_SROM_MII, csr); /* deassert CS */ /* Issue write-disable command. */ csr |= TLP_SROM_CS; WRITE_CSR(TLP_SROM_MII, csr); /* assert CS */ for (i=0; i<10; i++) /* 100 ms max wait */ if ((READ_CSR(TLP_SROM_MII) & TLP_SROM_DOUT)==0) SLEEP(10000); shift_srom_bits(sc, 4, 4); /* issue write disable cmd */ shift_srom_bits(sc, 0, 6); /* issue address */ csr &= ~TLP_SROM_CS; WRITE_CSR(TLP_SROM_MII, csr); /* deassert CS */ /* Disable SROM access. */ WRITE_CSR(TLP_SROM_MII, TLP_MII_MDOE); } /* Not all boards have BIOS roms. */ /* The BIOS ROM is an AMD 29F010 1Mbit (128K by 8) EEPROM. */ static u_int8_t read_bios(softc_t *sc, u_int32_t addr) { u_int32_t srom_mii; /* Load the BIOS rom address register. */ WRITE_CSR(TLP_BIOS_ROM, addr); /* Enable the BIOS rom. */ srom_mii = TLP_BIOS_SEL | TLP_BIOS_RD | TLP_MII_MDOE; WRITE_CSR(TLP_SROM_MII, srom_mii); /* Wait at least 20 PCI cycles. */ DELAY(20); /* Read the BIOS rom data. */ srom_mii = READ_CSR(TLP_SROM_MII); /* Disable the BIOS rom. */ WRITE_CSR(TLP_SROM_MII, TLP_MII_MDOE); return (u_int8_t)srom_mii & 0xFF; } static void write_bios_phys(softc_t *sc, u_int32_t addr, u_int8_t data) { u_int32_t srom_mii; /* Load the BIOS rom address register. */ WRITE_CSR(TLP_BIOS_ROM, addr); /* Enable the BIOS rom. */ srom_mii = TLP_BIOS_SEL | TLP_BIOS_WR | TLP_MII_MDOE; /* Load the data into the data register. */ srom_mii = (srom_mii & 0xFFFFFF00) | (data & 0xFF); WRITE_CSR(TLP_SROM_MII, srom_mii); /* Wait at least 20 PCI cycles. */ DELAY(20); /* Disable the BIOS rom. */ WRITE_CSR(TLP_SROM_MII, TLP_MII_MDOE); } /* IOCTL SYSCALL: can sleep. */ static void write_bios(softc_t *sc, u_int32_t addr, u_int8_t data) { u_int8_t read_data; /* this sequence enables writing */ write_bios_phys(sc, 0x5555, 0xAA); write_bios_phys(sc, 0x2AAA, 0x55); write_bios_phys(sc, 0x5555, 0xA0); write_bios_phys(sc, addr, data); /* Wait for the write operation to complete. */ for (;;) /* interruptable syscall */ { for (;;) { read_data = read_bios(sc, addr); if ((read_data & 0x80) == (data & 0x80)) break; if (read_data & 0x20) { /* Data sheet says read it again. */ read_data = read_bios(sc, addr); if ((read_data & 0x80) == (data & 0x80)) break; if (DRIVER_DEBUG) printf("%s: write_bios() failed; rom addr=0x%x\n", NAME_UNIT, addr); return; } } read_data = read_bios(sc, addr); if (read_data == data) break; } } /* IOCTL SYSCALL: can sleep. */ static void erase_bios(softc_t *sc) { unsigned char read_data; /* This sequence enables erasing: */ write_bios_phys(sc, 0x5555, 0xAA); write_bios_phys(sc, 0x2AAA, 0x55); write_bios_phys(sc, 0x5555, 0x80); write_bios_phys(sc, 0x5555, 0xAA); write_bios_phys(sc, 0x2AAA, 0x55); write_bios_phys(sc, 0x5555, 0x10); /* Wait for the erase operation to complete. */ for (;;) /* interruptable syscall */ { for (;;) { read_data = read_bios(sc, 0); if (read_data & 0x80) break; if (read_data & 0x20) { /* Data sheet says read it again. */ read_data = read_bios(sc, 0); if (read_data & 0x80) break; if (DRIVER_DEBUG) printf("%s: erase_bios() failed\n", NAME_UNIT); return; } } read_data = read_bios(sc, 0); if (read_data == 0xFF) break; } } /* MDIO is 3-stated between tranactions. */ /* MDIO is set up before the RISING edge of MDC; MDC is parked low. */ static void shift_mii_bits(softc_t *sc, u_int32_t data, u_int32_t len) { u_int32_t csr = READ_CSR(TLP_SROM_MII); for (; len>0; len--) { /* MSB first */ if (data & (1<<(len-1))) csr |= TLP_MII_MDOUT; /* MDOUT setup */ else csr &= ~TLP_MII_MDOUT; /* MDOUT setup */ WRITE_CSR(TLP_SROM_MII, csr); csr |= TLP_MII_MDC; /* MDC rising edge */ WRITE_CSR(TLP_SROM_MII, csr); csr &= ~TLP_MII_MDC; /* MDC falling edge */ WRITE_CSR(TLP_SROM_MII, csr); } } /* The specification for the MII is IEEE Std 802.3 clause 22. */ /* MDIO is sampled on the RISING edge of MDC; MDC is parked low. */ static u_int16_t read_mii(softc_t *sc, u_int8_t regad) { int i; u_int32_t csr; u_int16_t data = 0; WRITE_CSR(TLP_SROM_MII, TLP_MII_MDOUT); shift_mii_bits(sc, 0xFFFFF, 20); /* preamble */ shift_mii_bits(sc, 0xFFFFF, 20); /* preamble */ shift_mii_bits(sc, 1, 2); /* start symbol */ shift_mii_bits(sc, 2, 2); /* read op */ shift_mii_bits(sc, 0, 5); /* phyad=0 */ shift_mii_bits(sc, regad, 5); /* regad */ csr = READ_CSR(TLP_SROM_MII); csr |= TLP_MII_MDOE; WRITE_CSR(TLP_SROM_MII, csr); shift_mii_bits(sc, 0, 2); /* turn-around */ for (i=15; i>=0; i--) /* data */ { /* MSB first */ csr = READ_CSR(TLP_SROM_MII); /* MDIN sampled */ data = (data<<1) | ((csr & TLP_MII_MDIN) ? 1:0); csr |= TLP_MII_MDC; /* MDC rising edge */ WRITE_CSR(TLP_SROM_MII, csr); csr &= ~TLP_MII_MDC; /* MDC falling edge */ WRITE_CSR(TLP_SROM_MII, csr); } return data; } static void write_mii(softc_t *sc, u_int8_t regad, u_int16_t data) { WRITE_CSR(TLP_SROM_MII, TLP_MII_MDOUT); shift_mii_bits(sc, 0xFFFFF, 20); /* preamble */ shift_mii_bits(sc, 0xFFFFF, 20); /* preamble */ shift_mii_bits(sc, 1, 2); /* start symbol */ shift_mii_bits(sc, 1, 2); /* write op */ shift_mii_bits(sc, 0, 5); /* phyad=0 */ shift_mii_bits(sc, regad, 5); /* regad */ shift_mii_bits(sc, 2, 2); /* turn-around */ shift_mii_bits(sc, data, 16); /* data */ WRITE_CSR(TLP_SROM_MII, TLP_MII_MDOE); if (regad == 16) sc->led_state = data; /* a small optimization */ } static void set_mii16_bits(softc_t *sc, u_int16_t bits) { u_int16_t mii16 = read_mii(sc, 16); mii16 |= bits; write_mii(sc, 16, mii16); } static void clr_mii16_bits(softc_t *sc, u_int16_t bits) { u_int16_t mii16 = read_mii(sc, 16); mii16 &= ~bits; write_mii(sc, 16, mii16); } static void set_mii17_bits(softc_t *sc, u_int16_t bits) { u_int16_t mii17 = read_mii(sc, 17); mii17 |= bits; write_mii(sc, 17, mii17); } static void clr_mii17_bits(softc_t *sc, u_int16_t bits) { u_int16_t mii17 = read_mii(sc, 17); mii17 &= ~bits; write_mii(sc, 17, mii17); } /* * Watchdog code is more readable if it refreshes LEDs * once a second whether they need it or not. * But MII refs take 150 uSecs each, so remember the last value * written to MII16 and avoid LED writes that do nothing. */ static void led_off(softc_t *sc, u_int16_t led) { if ((led & sc->led_state) == led) return; set_mii16_bits(sc, led); } static void led_on(softc_t *sc, u_int16_t led) { if ((led & sc->led_state) == 0) return; clr_mii16_bits(sc, led); } static void led_inv(softc_t *sc, u_int16_t led) { u_int16_t mii16 = read_mii(sc, 16); mii16 ^= led; write_mii(sc, 16, mii16); } /* * T1 & T3 framer registers are accessed through MII regs 17 & 18. * Write the address to MII reg 17 then R/W data through MII reg 18. * The hardware interface is an Intel-style 8-bit muxed A/D bus. */ static void write_framer(softc_t *sc, u_int16_t addr, u_int8_t data) { write_mii(sc, 17, addr); write_mii(sc, 18, data); } static u_int8_t read_framer(softc_t *sc, u_int16_t addr) { write_mii(sc, 17, addr); return (u_int8_t)read_mii(sc, 18); } /* Tulip's hardware implementation of General Purpose IO * (GPIO) pins makes life difficult for software. * Bits 7-0 in the Tulip GPIO CSR are used for two purposes * depending on the state of bit 8. * If bit 8 is 0 then bits 7-0 are "data" bits. * If bit 8 is 1 then bits 7-0 are "direction" bits. * If a direction bit is one, the data bit is an output. * The problem is that the direction bits are WRITE-ONLY. * Software must remember the direction bits in a shadow copy. * (sc->gpio_dir) in order to change some but not all of the bits. * All accesses to the Tulip GPIO register use these five procedures. */ static void make_gpio_input(softc_t *sc, u_int32_t bits) { sc->gpio_dir &= ~bits; WRITE_CSR(TLP_GPIO, TLP_GPIO_DIR | (sc->gpio_dir)); } static void make_gpio_output(softc_t *sc, u_int32_t bits) { sc->gpio_dir |= bits; WRITE_CSR(TLP_GPIO, TLP_GPIO_DIR | (sc->gpio_dir)); } static u_int32_t read_gpio(softc_t *sc) { return READ_CSR(TLP_GPIO); } static void set_gpio_bits(softc_t *sc, u_int32_t bits) { WRITE_CSR(TLP_GPIO, (read_gpio(sc) | bits) & 0xFF); } static void clr_gpio_bits(softc_t *sc, u_int32_t bits) { WRITE_CSR(TLP_GPIO, (read_gpio(sc) & ~bits) & 0xFF); } /* Reset ALL of the flip-flops in the gate array to zero. */ /* This does NOT change the gate array programming. */ /* Called during initialization so it must not sleep. */ static void reset_xilinx(softc_t *sc) { /* Drive RESET low to force initialization. */ clr_gpio_bits(sc, GPIO_RESET); make_gpio_output(sc, GPIO_RESET); /* Hold RESET low for more than 10 uSec. */ DELAY(50); /* Done with RESET; make it an input. */ make_gpio_input(sc, GPIO_RESET); } /* Load Xilinx gate array program from on-board rom. */ /* This changes the gate array programming. */ /* IOCTL SYSCALL: can sleep. */ static void load_xilinx_from_rom(softc_t *sc) { int i; /* Drive MODE low to load from ROM rather than GPIO. */ clr_gpio_bits(sc, GPIO_MODE); make_gpio_output(sc, GPIO_MODE); /* Drive DP & RESET low to force configuration. */ clr_gpio_bits(sc, GPIO_RESET | GPIO_DP); make_gpio_output(sc, GPIO_RESET | GPIO_DP); /* Hold RESET & DP low for more than 10 uSec. */ DELAY(50); /* Done with RESET & DP; make them inputs. */ make_gpio_input(sc, GPIO_DP | GPIO_RESET); /* BUSY-WAIT for Xilinx chip to configure itself from ROM bits. */ for (i=0; i<100; i++) /* 1 sec max delay */ if ((read_gpio(sc) & GPIO_DP) == 0) SLEEP(10000); /* Done with MODE; make it an input. */ make_gpio_input(sc, GPIO_MODE); } /* Load the Xilinx gate array program from userland bits. */ /* This changes the gate array programming. */ /* IOCTL SYSCALL: can sleep. */ static int load_xilinx_from_file(softc_t *sc, char *addr, u_int32_t len) { char *data; int i, j, error; /* Get some pages to hold the Xilinx bits; biggest file is < 6 KB. */ if (len > 8192) return EFBIG; /* too big */ data = malloc(len, M_DEVBUF, M_WAITOK); if (data == NULL) return ENOMEM; /* Copy the Xilinx bits from userland. */ if ((error = copyin(addr, data, len))) { free(data, M_DEVBUF); return error; } /* Drive MODE high to load from GPIO rather than ROM. */ set_gpio_bits(sc, GPIO_MODE); make_gpio_output(sc, GPIO_MODE); /* Drive DP & RESET low to force configuration. */ clr_gpio_bits(sc, GPIO_RESET | GPIO_DP); make_gpio_output(sc, GPIO_RESET | GPIO_DP); /* Hold RESET & DP low for more than 10 uSec. */ DELAY(50); /* Done with RESET & DP; make them inputs. */ make_gpio_input(sc, GPIO_RESET | GPIO_DP); /* BUSY-WAIT for Xilinx chip to clear its config memory. */ make_gpio_input(sc, GPIO_INIT); for (i=0; i<10000; i++) /* 1 sec max delay */ if ((read_gpio(sc) & GPIO_INIT)==0) SLEEP(10000); /* Configure CLK and DATA as outputs. */ set_gpio_bits(sc, GPIO_CLK); /* park CLK high */ make_gpio_output(sc, GPIO_CLK | GPIO_DATA); /* Write bits to Xilinx; CLK is parked HIGH. */ /* DATA is set up before the RISING edge of CLK. */ for (i=0; istatus.card_type == TLP_CSID_SSI) { if (synth->prescale == 9) /* divide by 512 */ set_mii17_bits(sc, MII17_SSI_PRESCALE); else /* divide by 32 */ clr_mii17_bits(sc, MII17_SSI_PRESCALE); } clr_gpio_bits(sc, GPIO_DATA | GPIO_CLK); make_gpio_output(sc, GPIO_DATA | GPIO_CLK); /* SYNTH is a low-true chip enable for the AV9110 chip. */ set_gpio_bits(sc, GPIO_SSI_SYNTH); make_gpio_output(sc, GPIO_SSI_SYNTH); clr_gpio_bits(sc, GPIO_SSI_SYNTH); /* Serially shift the command into the AV9110 chip. */ shift_synth_bits(sc, synth->n, 7); shift_synth_bits(sc, synth->m, 7); shift_synth_bits(sc, synth->v, 1); shift_synth_bits(sc, synth->x, 2); shift_synth_bits(sc, synth->r, 2); shift_synth_bits(sc, 0x16, 5); /* enable clk/x output */ /* SYNTH (chip enable) going high ends the command. */ set_gpio_bits(sc, GPIO_SSI_SYNTH); make_gpio_input(sc, GPIO_SSI_SYNTH); /* Stop driving serial-related signals; pullups/pulldowns take over. */ make_gpio_input(sc, GPIO_DATA | GPIO_CLK); /* remember the new synthesizer parameters */ if (&sc->config.synth != synth) sc->config.synth = *synth; } /* Write a command to the DAC controlling the VCXO on some T3 adapters. */ /* The DAC is a TI-TLV5636: 12-bit resolution and a serial interface. */ /* DATA is set up before the FALLING edge of CLK. CLK is parked HIGH. */ static void write_dac(softc_t *sc, u_int16_t data) { int i; /* Prepare to use DATA and CLK. */ set_gpio_bits(sc, GPIO_DATA | GPIO_CLK); make_gpio_output(sc, GPIO_DATA | GPIO_CLK); /* High-to-low transition prepares DAC for new value. */ set_gpio_bits(sc, GPIO_T3_DAC); make_gpio_output(sc, GPIO_T3_DAC); clr_gpio_bits(sc, GPIO_T3_DAC); /* Serially shift command bits into DAC. */ for (i=0; i<16; i++) { /* MSB first */ if ((data & (1<<(15-i))) != 0) set_gpio_bits(sc, GPIO_DATA); /* DATA setup */ else clr_gpio_bits(sc, GPIO_DATA); /* DATA setup */ clr_gpio_bits(sc, GPIO_CLK); /* CLK falling edge */ set_gpio_bits(sc, GPIO_CLK); /* CLK rising edge */ } /* Done with DAC; make it an input; loads new value into DAC. */ set_gpio_bits(sc, GPIO_T3_DAC); make_gpio_input(sc, GPIO_T3_DAC); /* Stop driving serial-related signals; pullups/pulldowns take over. */ make_gpio_input(sc, GPIO_DATA | GPIO_CLK); } /* begin HSSI card code */ /* Must not sleep. */ static void hssi_config(softc_t *sc) { if (sc->status.card_type == 0) { /* defaults */ sc->status.card_type = READ_PCI_CFG(sc, TLP_CSID); sc->config.crc_len = CFG_CRC_16; sc->config.loop_back = CFG_LOOP_NONE; sc->config.tx_clk_src = CFG_CLKMUX_ST; sc->config.dte_dce = CFG_DTE; sc->config.synth.n = 52; /* 52.000 Mbs */ sc->config.synth.m = 5; sc->config.synth.v = 0; sc->config.synth.x = 0; sc->config.synth.r = 0; sc->config.synth.prescale = 2; } /* set CRC length */ if (sc->config.crc_len == CFG_CRC_32) set_mii16_bits(sc, MII16_HSSI_CRC32); else clr_mii16_bits(sc, MII16_HSSI_CRC32); /* Assert pin LA in HSSI conn: ask modem for local loop. */ if (sc->config.loop_back == CFG_LOOP_LL) set_mii16_bits(sc, MII16_HSSI_LA); else clr_mii16_bits(sc, MII16_HSSI_LA); /* Assert pin LB in HSSI conn: ask modem for remote loop. */ if (sc->config.loop_back == CFG_LOOP_RL) set_mii16_bits(sc, MII16_HSSI_LB); else clr_mii16_bits(sc, MII16_HSSI_LB); if (sc->status.card_type == TLP_CSID_HSSI) { /* set TXCLK src */ if (sc->config.tx_clk_src == CFG_CLKMUX_ST) set_gpio_bits(sc, GPIO_HSSI_TXCLK); else clr_gpio_bits(sc, GPIO_HSSI_TXCLK); make_gpio_output(sc, GPIO_HSSI_TXCLK); } else if (sc->status.card_type == TLP_CSID_HSSIc) { /* cPCI HSSI rev C has extra features */ /* Set TXCLK source. */ u_int16_t mii16 = read_mii(sc, 16); mii16 &= ~MII16_HSSI_CLKMUX; mii16 |= (sc->config.tx_clk_src&3)<<13; write_mii(sc, 16, mii16); /* cPCI HSSI implements loopback towards the net. */ if (sc->config.loop_back == CFG_LOOP_LINE) set_mii16_bits(sc, MII16_HSSI_LOOP); else clr_mii16_bits(sc, MII16_HSSI_LOOP); /* Set DTE/DCE mode. */ if (sc->config.dte_dce == CFG_DCE) set_gpio_bits(sc, GPIO_HSSI_DCE); else clr_gpio_bits(sc, GPIO_HSSI_DCE); make_gpio_output(sc, GPIO_HSSI_DCE); /* Program the synthesized oscillator. */ write_synth(sc, &sc->config.synth); } } static void hssi_ident(softc_t *sc) { } /* Called once a second; must not sleep. */ static int hssi_watchdog(softc_t *sc) { u_int16_t mii16 = read_mii(sc, 16) & MII16_HSSI_MODEM; int link_status = STATUS_UP; led_inv(sc, MII16_HSSI_LED_UL); /* Software is alive. */ led_on(sc, MII16_HSSI_LED_LL); /* always on (SSI cable) */ /* Check the transmit clock. */ if (sc->status.tx_speed == 0) { led_on(sc, MII16_HSSI_LED_UR); link_status = STATUS_DOWN; } else led_off(sc, MII16_HSSI_LED_UR); /* Is the modem ready? */ if ((mii16 & MII16_HSSI_CA) == 0) { led_off(sc, MII16_HSSI_LED_LR); link_status = STATUS_DOWN; } else led_on(sc, MII16_HSSI_LED_LR); /* Print the modem control signals if they changed. */ if ((DRIVER_DEBUG) && (mii16 != sc->last_mii16)) { char *on = "ON ", *off = "OFF"; printf("%s: TA=%s CA=%s LA=%s LB=%s LC=%s TM=%s\n", NAME_UNIT, (mii16 & MII16_HSSI_TA) ? on : off, (mii16 & MII16_HSSI_CA) ? on : off, (mii16 & MII16_HSSI_LA) ? on : off, (mii16 & MII16_HSSI_LB) ? on : off, (mii16 & MII16_HSSI_LC) ? on : off, (mii16 & MII16_HSSI_TM) ? on : off); } /* SNMP one-second-report */ sc->status.snmp.hssi.sigs = mii16 & MII16_HSSI_MODEM; /* Remember this state until next time. */ sc->last_mii16 = mii16; /* If a loop back is in effect, link status is UP */ if (sc->config.loop_back != CFG_LOOP_NONE) link_status = STATUS_UP; return link_status; } /* IOCTL SYSCALL: can sleep (but doesn't). */ static int hssi_ioctl(softc_t *sc, struct ioctl *ioctl) { int error = 0; if (ioctl->cmd == IOCTL_SNMP_SIGS) { u_int16_t mii16 = read_mii(sc, 16); mii16 &= ~MII16_HSSI_MODEM; mii16 |= (MII16_HSSI_MODEM & ioctl->data); write_mii(sc, 16, mii16); } else if (ioctl->cmd == IOCTL_SET_STATUS) { if (ioctl->data != 0) set_mii16_bits(sc, MII16_HSSI_TA); else clr_mii16_bits(sc, MII16_HSSI_TA); } else error = EINVAL; return error; } /* begin DS3 card code */ /* Must not sleep. */ static void t3_config(softc_t *sc) { int i; u_int8_t ctl1; if (sc->status.card_type == 0) { /* defaults */ sc->status.card_type = TLP_CSID_T3; sc->config.crc_len = CFG_CRC_16; sc->config.loop_back = CFG_LOOP_NONE; sc->config.format = CFG_FORMAT_T3CPAR; sc->config.cable_len = 10; /* meters */ sc->config.scrambler = CFG_SCRAM_DL_KEN; sc->config.tx_clk_src = CFG_CLKMUX_INT; /* Center the VCXO -- get within 20 PPM of 44736000. */ write_dac(sc, 0x9002); /* set Vref = 2.048 volts */ write_dac(sc, 2048); /* range is 0..4095 */ } /* Set cable length. */ if (sc->config.cable_len > 30) clr_mii16_bits(sc, MII16_DS3_ZERO); else set_mii16_bits(sc, MII16_DS3_ZERO); /* Set payload scrambler polynomial. */ if (sc->config.scrambler == CFG_SCRAM_LARS) set_mii16_bits(sc, MII16_DS3_POLY); else clr_mii16_bits(sc, MII16_DS3_POLY); /* Set payload scrambler on/off. */ if (sc->config.scrambler == CFG_SCRAM_OFF) clr_mii16_bits(sc, MII16_DS3_SCRAM); else set_mii16_bits(sc, MII16_DS3_SCRAM); /* Set CRC length. */ if (sc->config.crc_len == CFG_CRC_32) set_mii16_bits(sc, MII16_DS3_CRC32); else clr_mii16_bits(sc, MII16_DS3_CRC32); /* Loopback towards host thru the line interface. */ if (sc->config.loop_back == CFG_LOOP_OTHER) set_mii16_bits(sc, MII16_DS3_TRLBK); else clr_mii16_bits(sc, MII16_DS3_TRLBK); /* Loopback towards network thru the line interface. */ if (sc->config.loop_back == CFG_LOOP_LINE) set_mii16_bits(sc, MII16_DS3_LNLBK); else if (sc->config.loop_back == CFG_LOOP_DUAL) set_mii16_bits(sc, MII16_DS3_LNLBK); else clr_mii16_bits(sc, MII16_DS3_LNLBK); /* Configure T3 framer chip; write EVERY writeable register. */ ctl1 = CTL1_SER | CTL1_XTX; if (sc->config.loop_back == CFG_LOOP_INWARD) ctl1 |= CTL1_3LOOP; if (sc->config.loop_back == CFG_LOOP_DUAL) ctl1 |= CTL1_3LOOP; if (sc->config.format == CFG_FORMAT_T3M13) ctl1 |= CTL1_M13MODE; write_framer(sc, T3CSR_CTL1, ctl1); write_framer(sc, T3CSR_TX_FEAC, CTL5_EMODE); write_framer(sc, T3CSR_CTL8, CTL8_FBEC); write_framer(sc, T3CSR_CTL12, CTL12_DLCB1 | CTL12_C21 | CTL12_MCB1); write_framer(sc, T3CSR_DBL_FEAC, 0); write_framer(sc, T3CSR_CTL14, CTL14_RGCEN | CTL14_TGCEN); write_framer(sc, T3CSR_INTEN, 0); write_framer(sc, T3CSR_CTL20, CTL20_CVEN); /* Clear error counters and latched error bits */ /* that may have happened while initializing. */ for (i=0; i<21; i++) read_framer(sc, i); } static void t3_ident(softc_t *sc) { printf(", TXC03401 rev B"); } /* Called once a second; must not sleep. */ static int t3_watchdog(softc_t *sc) { u_int16_t CV; u_int8_t CERR, PERR, MERR, FERR, FEBE; u_int8_t ctl1, stat16, feac; int link_status = STATUS_UP; u_int16_t mii16; /* Read the alarm registers. */ ctl1 = read_framer(sc, T3CSR_CTL1); stat16 = read_framer(sc, T3CSR_STAT16); mii16 = read_mii(sc, 16); /* Always ignore the RTLOC alarm bit. */ stat16 &= ~STAT16_RTLOC; /* Software is alive. */ led_inv(sc, MII16_DS3_LED_GRN); /* Receiving Alarm Indication Signal (AIS). */ if ((stat16 & STAT16_RAIS) != 0) /* receiving ais */ led_on(sc, MII16_DS3_LED_BLU); else if (ctl1 & CTL1_TXAIS) /* sending ais */ led_inv(sc, MII16_DS3_LED_BLU); else led_off(sc, MII16_DS3_LED_BLU); /* Receiving Remote Alarm Indication (RAI). */ if ((stat16 & STAT16_XERR) != 0) /* receiving rai */ led_on(sc, MII16_DS3_LED_YEL); else if ((ctl1 & CTL1_XTX) == 0) /* sending rai */ led_inv(sc, MII16_DS3_LED_YEL); else led_off(sc, MII16_DS3_LED_YEL); /* If certain status bits are set then the link is 'down'. */ /* The bad bits are: rxlos rxoof rxais rxidl xerr. */ if ((stat16 & ~(STAT16_FEAC | STAT16_SEF)) != 0) link_status = STATUS_DOWN; /* Declare local Red Alarm if the link is down. */ if (link_status == STATUS_DOWN) led_on(sc, MII16_DS3_LED_RED); else if (sc->loop_timer != 0) /* loopback is active */ led_inv(sc, MII16_DS3_LED_RED); else led_off(sc, MII16_DS3_LED_RED); /* Print latched error bits if they changed. */ if ((DRIVER_DEBUG) && ((stat16 & ~STAT16_FEAC) != sc->last_stat16)) { char *on = "ON ", *off = "OFF"; printf("%s: RLOS=%s ROOF=%s RAIS=%s RIDL=%s SEF=%s XERR=%s\n", NAME_UNIT, (stat16 & STAT16_RLOS) ? on : off, (stat16 & STAT16_ROOF) ? on : off, (stat16 & STAT16_RAIS) ? on : off, (stat16 & STAT16_RIDL) ? on : off, (stat16 & STAT16_SEF) ? on : off, (stat16 & STAT16_XERR) ? on : off); } /* Check and print error counters if non-zero. */ CV = read_framer(sc, T3CSR_CVHI)<<8; CV += read_framer(sc, T3CSR_CVLO); PERR = read_framer(sc, T3CSR_PERR); CERR = read_framer(sc, T3CSR_CERR); FERR = read_framer(sc, T3CSR_FERR); MERR = read_framer(sc, T3CSR_MERR); FEBE = read_framer(sc, T3CSR_FEBE); /* CV is invalid during LOS. */ if ((stat16 & STAT16_RLOS)!=0) CV = 0; /* CERR & FEBE are invalid in M13 mode */ if (sc->config.format == CFG_FORMAT_T3M13) CERR = FEBE = 0; /* FEBE is invalid during AIS. */ if ((stat16 & STAT16_RAIS)!=0) FEBE = 0; if (DRIVER_DEBUG && (CV || PERR || CERR || FERR || MERR || FEBE)) printf("%s: CV=%u PERR=%u CERR=%u FERR=%u MERR=%u FEBE=%u\n", NAME_UNIT, CV, PERR, CERR, FERR, MERR, FEBE); /* Driver keeps crude link-level error counters (SNMP is better). */ sc->status.cntrs.lcv_errs += CV; sc->status.cntrs.par_errs += PERR; sc->status.cntrs.cpar_errs += CERR; sc->status.cntrs.frm_errs += FERR; sc->status.cntrs.mfrm_errs += MERR; sc->status.cntrs.febe_errs += FEBE; /* Check for FEAC messages (FEAC not defined in M13 mode). */ if (FORMAT_T3CPAR && (stat16 & STAT16_FEAC)) do { feac = read_framer(sc, T3CSR_FEAC_STK); if ((feac & FEAC_STK_VALID)==0) break; /* Ignore RxFEACs while a far end loopback has been requested. */ if ((sc->status.snmp.t3.line & TLOOP_FAR_LINE)!=0) continue; switch (feac & FEAC_STK_FEAC) { case T3BOP_LINE_UP: break; case T3BOP_LINE_DOWN: break; case T3BOP_LOOP_DS3: { if (sc->last_FEAC == T3BOP_LINE_DOWN) { if (DRIVER_DEBUG) printf("%s: Received a 'line loopback deactivate' FEAC msg\n", NAME_UNIT); clr_mii16_bits(sc, MII16_DS3_LNLBK); sc->loop_timer = 0; } if (sc->last_FEAC == T3BOP_LINE_UP) { if (DRIVER_DEBUG) printf("%s: Received a 'line loopback activate' FEAC msg\n", NAME_UNIT); set_mii16_bits(sc, MII16_DS3_LNLBK); sc->loop_timer = 300; } break; } case T3BOP_OOF: { if (DRIVER_DEBUG) printf("%s: Received a 'far end LOF' FEAC msg\n", NAME_UNIT); break; } case T3BOP_IDLE: { if (DRIVER_DEBUG) printf("%s: Received a 'far end IDL' FEAC msg\n", NAME_UNIT); break; } case T3BOP_AIS: { if (DRIVER_DEBUG) printf("%s: Received a 'far end AIS' FEAC msg\n", NAME_UNIT); break; } case T3BOP_LOS: { if (DRIVER_DEBUG) printf("%s: Received a 'far end LOS' FEAC msg\n", NAME_UNIT); break; } default: { if (DRIVER_DEBUG) printf("%s: Received a 'type 0x%02X' FEAC msg\n", NAME_UNIT, feac & FEAC_STK_FEAC); break; } } sc->last_FEAC = feac & FEAC_STK_FEAC; } while ((feac & FEAC_STK_MORE) != 0); stat16 &= ~STAT16_FEAC; /* Send Service-Affecting priority FEAC messages */ if (((sc->last_stat16 ^ stat16) & 0xF0) && (FORMAT_T3CPAR)) { /* Transmit continuous FEACs */ write_framer(sc, T3CSR_CTL14, read_framer(sc, T3CSR_CTL14) & ~CTL14_FEAC10); if ((stat16 & STAT16_RLOS)!=0) write_framer(sc, T3CSR_TX_FEAC, 0xC0 + T3BOP_LOS); else if ((stat16 & STAT16_ROOF)!=0) write_framer(sc, T3CSR_TX_FEAC, 0xC0 + T3BOP_OOF); else if ((stat16 & STAT16_RAIS)!=0) write_framer(sc, T3CSR_TX_FEAC, 0xC0 + T3BOP_AIS); else if ((stat16 & STAT16_RIDL)!=0) write_framer(sc, T3CSR_TX_FEAC, 0xC0 + T3BOP_IDLE); else write_framer(sc, T3CSR_TX_FEAC, CTL5_EMODE); } /* Start sending RAI, Remote Alarm Indication. */ if (((stat16 & STAT16_ROOF)!=0) && ((stat16 & STAT16_RLOS)==0) && ((sc->last_stat16 & STAT16_ROOF)==0)) write_framer(sc, T3CSR_CTL1, ctl1 &= ~CTL1_XTX); /* Stop sending RAI, Remote Alarm Indication. */ else if (((stat16 & STAT16_ROOF)==0) && ((sc->last_stat16 & STAT16_ROOF)!=0)) write_framer(sc, T3CSR_CTL1, ctl1 |= CTL1_XTX); /* Start sending AIS, Alarm Indication Signal */ if (((stat16 & STAT16_RLOS)!=0) && ((sc->last_stat16 & STAT16_RLOS)==0)) { set_mii16_bits(sc, MII16_DS3_FRAME); write_framer(sc, T3CSR_CTL1, ctl1 | CTL1_TXAIS); } /* Stop sending AIS, Alarm Indication Signal */ else if (((stat16 & STAT16_RLOS)==0) && ((sc->last_stat16 & STAT16_RLOS)!=0)) { clr_mii16_bits(sc, MII16_DS3_FRAME); write_framer(sc, T3CSR_CTL1, ctl1 & ~CTL1_TXAIS); } /* Time out loopback requests. */ if (sc->loop_timer != 0) if (--sc->loop_timer == 0) if ((mii16 & MII16_DS3_LNLBK)!=0) { if (DRIVER_DEBUG) printf("%s: Timeout: Loop Down after 300 seconds\n", NAME_UNIT); clr_mii16_bits(sc, MII16_DS3_LNLBK); /* line loopback off */ } /* SNMP error counters */ sc->status.snmp.t3.lcv = CV; sc->status.snmp.t3.pcv = PERR; sc->status.snmp.t3.ccv = CERR; sc->status.snmp.t3.febe = FEBE; /* SNMP Line Status */ sc->status.snmp.t3.line = 0; if ((ctl1 & CTL1_XTX)==0) sc->status.snmp.t3.line |= TLINE_TX_RAI; if (stat16 & STAT16_XERR) sc->status.snmp.t3.line |= TLINE_RX_RAI; if (ctl1 & CTL1_TXAIS) sc->status.snmp.t3.line |= TLINE_TX_AIS; if (stat16 & STAT16_RAIS) sc->status.snmp.t3.line |= TLINE_RX_AIS; if (stat16 & STAT16_ROOF) sc->status.snmp.t3.line |= TLINE_LOF; if (stat16 & STAT16_RLOS) sc->status.snmp.t3.line |= TLINE_LOS; if (stat16 & STAT16_SEF) sc->status.snmp.t3.line |= T3LINE_SEF; /* SNMP Loopback Status */ sc->status.snmp.t3.loop &= ~TLOOP_FAR_LINE; if (sc->config.loop_back == CFG_LOOP_TULIP) sc->status.snmp.t3.loop |= TLOOP_NEAR_OTHER; if (ctl1 & CTL1_3LOOP) sc->status.snmp.t3.loop |= TLOOP_NEAR_INWARD; if (mii16 & MII16_DS3_TRLBK) sc->status.snmp.t3.loop |= TLOOP_NEAR_OTHER; if (mii16 & MII16_DS3_LNLBK) sc->status.snmp.t3.loop |= TLOOP_NEAR_LINE; /*if (ctl12 & CTL12_RTPLOOP) sc->status.snmp.t3.loop |= TLOOP_NEAR_PAYLOAD; */ /* Remember this state until next time. */ sc->last_stat16 = stat16; /* If an INWARD loopback is in effect, link status is UP */ if (sc->config.loop_back != CFG_LOOP_NONE) /* XXX INWARD ONLY */ link_status = STATUS_UP; return link_status; } /* IOCTL SYSCALL: can sleep. */ static void t3_send_dbl_feac(softc_t *sc, int feac1, int feac2) { u_int8_t tx_feac; int i; /* The FEAC transmitter could be sending a continuous */ /* FEAC msg when told to send a double FEAC message. */ /* So save the current state of the FEAC transmitter. */ tx_feac = read_framer(sc, T3CSR_TX_FEAC); /* Load second FEAC code and stop FEAC transmitter. */ write_framer(sc, T3CSR_TX_FEAC, CTL5_EMODE + feac2); /* FEAC transmitter sends 10 more FEACs and then stops. */ SLEEP(20000); /* sending one FEAC takes 1700 uSecs */ /* Load first FEAC code and start FEAC transmitter. */ write_framer(sc, T3CSR_DBL_FEAC, CTL13_DFEXEC + feac1); /* Wait for double FEAC sequence to complete -- about 70 ms. */ for (i=0; i<10; i++) /* max delay 100 ms */ if (read_framer(sc, T3CSR_DBL_FEAC) & CTL13_DFEXEC) SLEEP(10000); /* Flush received FEACS; don't respond to our own loop cmd! */ while (read_framer(sc, T3CSR_FEAC_STK) & FEAC_STK_VALID) DELAY(1); /* XXX HANG */ /* Restore previous state of the FEAC transmitter. */ /* If it was sending a continous FEAC, it will resume. */ write_framer(sc, T3CSR_TX_FEAC, tx_feac); } /* IOCTL SYSCALL: can sleep. */ static int t3_ioctl(softc_t *sc, struct ioctl *ioctl) { int error = 0; switch (ioctl->cmd) { case IOCTL_SNMP_SEND: /* set opstatus? */ { if (sc->config.format != CFG_FORMAT_T3CPAR) error = EINVAL; else if (ioctl->data == TSEND_LINE) { sc->status.snmp.t3.loop |= TLOOP_FAR_LINE; t3_send_dbl_feac(sc, T3BOP_LINE_UP, T3BOP_LOOP_DS3); } else if (ioctl->data == TSEND_RESET) { t3_send_dbl_feac(sc, T3BOP_LINE_DOWN, T3BOP_LOOP_DS3); sc->status.snmp.t3.loop &= ~TLOOP_FAR_LINE; } else error = EINVAL; break; } case IOCTL_SNMP_LOOP: /* set opstatus = test? */ { if (ioctl->data == CFG_LOOP_NONE) { clr_mii16_bits(sc, MII16_DS3_FRAME); clr_mii16_bits(sc, MII16_DS3_TRLBK); clr_mii16_bits(sc, MII16_DS3_LNLBK); write_framer(sc, T3CSR_CTL1, read_framer(sc, T3CSR_CTL1) & ~CTL1_3LOOP); write_framer(sc, T3CSR_CTL12, read_framer(sc, T3CSR_CTL12) & ~(CTL12_RTPLOOP | CTL12_RTPLLEN)); } else if (ioctl->data == CFG_LOOP_LINE) set_mii16_bits(sc, MII16_DS3_LNLBK); else if (ioctl->data == CFG_LOOP_OTHER) set_mii16_bits(sc, MII16_DS3_TRLBK); else if (ioctl->data == CFG_LOOP_INWARD) write_framer(sc, T3CSR_CTL1, read_framer(sc, T3CSR_CTL1) | CTL1_3LOOP); else if (ioctl->data == CFG_LOOP_DUAL) { set_mii16_bits(sc, MII16_DS3_LNLBK); write_framer(sc, T3CSR_CTL1, read_framer(sc, T3CSR_CTL1) | CTL1_3LOOP); } else if (ioctl->data == CFG_LOOP_PAYLOAD) { set_mii16_bits(sc, MII16_DS3_FRAME); write_framer(sc, T3CSR_CTL12, read_framer(sc, T3CSR_CTL12) | CTL12_RTPLOOP); write_framer(sc, T3CSR_CTL12, read_framer(sc, T3CSR_CTL12) | CTL12_RTPLLEN); DELAY(25); /* at least two frames (22 uS) */ write_framer(sc, T3CSR_CTL12, read_framer(sc, T3CSR_CTL12) & ~CTL12_RTPLLEN); } else error = EINVAL; break; } default: error = EINVAL; break; } return error; } /* begin SSI card code */ /* Must not sleep. */ static void ssi_config(softc_t *sc) { if (sc->status.card_type == 0) { /* defaults */ sc->status.card_type = TLP_CSID_SSI; sc->config.crc_len = CFG_CRC_16; sc->config.loop_back = CFG_LOOP_NONE; sc->config.tx_clk_src = CFG_CLKMUX_ST; sc->config.dte_dce = CFG_DTE; sc->config.synth.n = 51; /* 1.536 MHz */ sc->config.synth.m = 83; sc->config.synth.v = 1; sc->config.synth.x = 1; sc->config.synth.r = 1; sc->config.synth.prescale = 4; } /* Disable the TX clock driver while programming the oscillator. */ clr_gpio_bits(sc, GPIO_SSI_DCE); make_gpio_output(sc, GPIO_SSI_DCE); /* Program the synthesized oscillator. */ write_synth(sc, &sc->config.synth); /* Set DTE/DCE mode. */ /* If DTE mode then DCD & TXC are received. */ /* If DCE mode then DCD & TXC are driven. */ /* Boards with MII rev=4.0 don't drive DCD. */ if (sc->config.dte_dce == CFG_DCE) set_gpio_bits(sc, GPIO_SSI_DCE); else clr_gpio_bits(sc, GPIO_SSI_DCE); make_gpio_output(sc, GPIO_SSI_DCE); /* Set CRC length. */ if (sc->config.crc_len == CFG_CRC_32) set_mii16_bits(sc, MII16_SSI_CRC32); else clr_mii16_bits(sc, MII16_SSI_CRC32); /* Loop towards host thru cable drivers and receivers. */ /* Asserts DCD at the far end of a null modem cable. */ if (sc->config.loop_back == CFG_LOOP_PINS) set_mii16_bits(sc, MII16_SSI_LOOP); else clr_mii16_bits(sc, MII16_SSI_LOOP); /* Assert pin LL in modem conn: ask modem for local loop. */ /* Asserts TM at the far end of a null modem cable. */ if (sc->config.loop_back == CFG_LOOP_LL) set_mii16_bits(sc, MII16_SSI_LL); else clr_mii16_bits(sc, MII16_SSI_LL); /* Assert pin RL in modem conn: ask modem for remote loop. */ if (sc->config.loop_back == CFG_LOOP_RL) set_mii16_bits(sc, MII16_SSI_RL); else clr_mii16_bits(sc, MII16_SSI_RL); } static void ssi_ident(softc_t *sc) { printf(", LTC1343/44"); } /* Called once a second; must not sleep. */ static int ssi_watchdog(softc_t *sc) { u_int16_t cable; u_int16_t mii16 = read_mii(sc, 16) & MII16_SSI_MODEM; int link_status = STATUS_UP; /* Software is alive. */ led_inv(sc, MII16_SSI_LED_UL); /* Check the transmit clock. */ if (sc->status.tx_speed == 0) { led_on(sc, MII16_SSI_LED_UR); link_status = STATUS_DOWN; } else led_off(sc, MII16_SSI_LED_UR); /* Check the external cable. */ cable = read_mii(sc, 17); cable = cable & MII17_SSI_CABLE_MASK; cable = cable >> MII17_SSI_CABLE_SHIFT; if (cable == 7) { led_off(sc, MII16_SSI_LED_LL); /* no cable */ link_status = STATUS_DOWN; } else led_on(sc, MII16_SSI_LED_LL); /* The unit at the other end of the cable is ready if: */ /* DTE mode and DCD pin is asserted */ /* DCE mode and DSR pin is asserted */ if (((sc->config.dte_dce == CFG_DTE) && ((mii16 & MII16_SSI_DCD)==0)) || ((sc->config.dte_dce == CFG_DCE) && ((mii16 & MII16_SSI_DSR)==0))) { led_off(sc, MII16_SSI_LED_LR); link_status = STATUS_DOWN; } else led_on(sc, MII16_SSI_LED_LR); if (DRIVER_DEBUG && (cable != sc->status.cable_type)) printf("%s: SSI cable type changed to '%s'\n", NAME_UNIT, ssi_cables[cable]); sc->status.cable_type = cable; /* Print the modem control signals if they changed. */ if ((DRIVER_DEBUG) && (mii16 != sc->last_mii16)) { char *on = "ON ", *off = "OFF"; printf("%s: DTR=%s DSR=%s RTS=%s CTS=%s DCD=%s RI=%s LL=%s RL=%s TM=%s\n", NAME_UNIT, (mii16 & MII16_SSI_DTR) ? on : off, (mii16 & MII16_SSI_DSR) ? on : off, (mii16 & MII16_SSI_RTS) ? on : off, (mii16 & MII16_SSI_CTS) ? on : off, (mii16 & MII16_SSI_DCD) ? on : off, (mii16 & MII16_SSI_RI) ? on : off, (mii16 & MII16_SSI_LL) ? on : off, (mii16 & MII16_SSI_RL) ? on : off, (mii16 & MII16_SSI_TM) ? on : off); } /* SNMP one-second report */ sc->status.snmp.ssi.sigs = mii16 & MII16_SSI_MODEM; /* Remember this state until next time. */ sc->last_mii16 = mii16; /* If a loop back is in effect, link status is UP */ if (sc->config.loop_back != CFG_LOOP_NONE) link_status = STATUS_UP; return link_status; } /* IOCTL SYSCALL: can sleep (but doesn't). */ static int ssi_ioctl(softc_t *sc, struct ioctl *ioctl) { int error = 0; if (ioctl->cmd == IOCTL_SNMP_SIGS) { u_int16_t mii16 = read_mii(sc, 16); mii16 &= ~MII16_SSI_MODEM; mii16 |= (MII16_SSI_MODEM & ioctl->data); write_mii(sc, 16, mii16); } else if (ioctl->cmd == IOCTL_SET_STATUS) { if (ioctl->data != 0) set_mii16_bits(sc, (MII16_SSI_DTR | MII16_SSI_RTS | MII16_SSI_DCD)); else clr_mii16_bits(sc, (MII16_SSI_DTR | MII16_SSI_RTS | MII16_SSI_DCD)); } else error = EINVAL; return error; } /* begin T1E1 card code */ /* Must not sleep. */ static void t1_config(softc_t *sc) { int i; u_int8_t pulse, lbo, gain; if (sc->status.card_type == 0) { /* defaults */ sc->status.card_type = TLP_CSID_T1E1; sc->config.crc_len = CFG_CRC_16; sc->config.loop_back = CFG_LOOP_NONE; sc->config.tx_clk_src = CFG_CLKMUX_INT; sc->config.format = CFG_FORMAT_T1ESF; sc->config.cable_len = 10; sc->config.time_slots = 0x01FFFFFE; sc->config.tx_pulse = CFG_PULSE_AUTO; sc->config.rx_gain = CFG_GAIN_AUTO; sc->config.tx_lbo = CFG_LBO_AUTO; /* Bt8370 occasionally powers up in a loopback mode. */ /* Data sheet says zero LOOP reg and do a s/w reset. */ write_framer(sc, Bt8370_LOOP, 0x00); /* no loopback */ write_framer(sc, Bt8370_CR0, 0x80); /* s/w reset */ for (i=0; i<10; i++) /* max delay 10 ms */ if (read_framer(sc, Bt8370_CR0) & 0x80) DELAY(1000); } /* Set CRC length. */ if (sc->config.crc_len == CFG_CRC_32) set_mii16_bits(sc, MII16_T1_CRC32); else clr_mii16_bits(sc, MII16_T1_CRC32); /* Invert HDLC payload data in SF/AMI mode. */ /* HDLC stuff bits satisfy T1 pulse density. */ if (FORMAT_T1SF) set_mii16_bits(sc, MII16_T1_INVERT); else clr_mii16_bits(sc, MII16_T1_INVERT); /* Set the transmitter output impedance. */ if (FORMAT_E1ANY) set_mii16_bits(sc, MII16_T1_Z); /* 001:CR0 -- Control Register 0 - T1/E1 and frame format */ write_framer(sc, Bt8370_CR0, sc->config.format); /* 002:JAT_CR -- Jitter Attenuator Control Register */ if (sc->config.tx_clk_src == CFG_CLKMUX_RT) /* loop timing */ write_framer(sc, Bt8370_JAT_CR, 0xA3); /* JAT in RX path */ else { /* 64-bit elastic store; free-running JCLK and CLADO */ write_framer(sc, Bt8370_JAT_CR, 0x4B); /* assert jcenter */ write_framer(sc, Bt8370_JAT_CR, 0x43); /* release jcenter */ } /* 00C-013:IERn -- Interrupt Enable Registers */ for (i=Bt8370_IER7; i<=Bt8370_IER0; i++) write_framer(sc, i, 0); /* no interrupts; polled */ /* 014:LOOP -- loopbacks */ if (sc->config.loop_back == CFG_LOOP_PAYLOAD) write_framer(sc, Bt8370_LOOP, LOOP_PAYLOAD); else if (sc->config.loop_back == CFG_LOOP_LINE) write_framer(sc, Bt8370_LOOP, LOOP_LINE); else if (sc->config.loop_back == CFG_LOOP_OTHER) write_framer(sc, Bt8370_LOOP, LOOP_ANALOG); else if (sc->config.loop_back == CFG_LOOP_INWARD) write_framer(sc, Bt8370_LOOP, LOOP_FRAMER); else if (sc->config.loop_back == CFG_LOOP_DUAL) write_framer(sc, Bt8370_LOOP, LOOP_DUAL); else write_framer(sc, Bt8370_LOOP, 0x00); /* no loopback */ /* 015:DL3_TS -- Data Link 3 */ write_framer(sc, Bt8370_DL3_TS, 0x00); /* disabled */ /* 018:PIO -- Programmable I/O */ write_framer(sc, Bt8370_PIO, 0xFF); /* all pins are outputs */ /* 019:POE -- Programmable Output Enable */ write_framer(sc, Bt8370_POE, 0x00); /* all outputs are enabled */ /* 01A;CMUX -- Clock Input Mux */ if (sc->config.tx_clk_src == CFG_CLKMUX_EXT) write_framer(sc, Bt8370_CMUX, 0x0C); /* external timing */ else write_framer(sc, Bt8370_CMUX, 0x0F); /* internal timing */ /* 020:LIU_CR -- Line Interface Unit Config Register */ write_framer(sc, Bt8370_LIU_CR, 0xC1); /* reset LIU, squelch */ /* 022:RLIU_CR -- RX Line Interface Unit Config Reg */ /* Errata sheet says don't use freeze-short, but we do anyway! */ write_framer(sc, Bt8370_RLIU_CR, 0xB1); /* AGC=2048, Long Eye */ /* Select Rx sensitivity based on cable length. */ if ((gain = sc->config.rx_gain) == CFG_GAIN_AUTO) { if (sc->config.cable_len > 2000) gain = CFG_GAIN_EXTEND; else if (sc->config.cable_len > 1000) gain = CFG_GAIN_LONG; else if (sc->config.cable_len > 100) gain = CFG_GAIN_MEDIUM; else gain = CFG_GAIN_SHORT; } /* 024:VGA_MAX -- Variable Gain Amplifier Max gain */ write_framer(sc, Bt8370_VGA_MAX, gain); /* 028:PRE_EQ -- Pre Equalizer */ if (gain == CFG_GAIN_EXTEND) write_framer(sc, Bt8370_PRE_EQ, 0xE6); /* ON; thresh 6 */ else write_framer(sc, Bt8370_PRE_EQ, 0xA6); /* OFF; thresh 6 */ /* 038-03C:GAINn -- RX Equalizer gain thresholds */ write_framer(sc, Bt8370_GAIN0, 0x24); write_framer(sc, Bt8370_GAIN1, 0x28); write_framer(sc, Bt8370_GAIN2, 0x2C); write_framer(sc, Bt8370_GAIN3, 0x30); write_framer(sc, Bt8370_GAIN4, 0x34); /* 040:RCR0 -- Receiver Control Register 0 */ if (FORMAT_T1ESF) write_framer(sc, Bt8370_RCR0, 0x05); /* B8ZS, 2/5 FErrs */ else if (FORMAT_T1SF) write_framer(sc, Bt8370_RCR0, 0x84); /* AMI, 2/5 FErrs */ else if (FORMAT_E1NONE) write_framer(sc, Bt8370_RCR0, 0x41); /* HDB3, rabort */ else if (FORMAT_E1CRC) write_framer(sc, Bt8370_RCR0, 0x09); /* HDB3, 3 FErrs or 915 CErrs */ else /* E1 no CRC */ write_framer(sc, Bt8370_RCR0, 0x19); /* HDB3, 3 FErrs */ /* 041:RPATT -- Receive Test Pattern configuration */ write_framer(sc, Bt8370_RPATT, 0x3E); /* looking for framed QRSS */ /* 042:RLB -- Receive Loop Back code detector config */ write_framer(sc, Bt8370_RLB, 0x09); /* 6 bits down; 5 bits up */ /* 043:LBA -- Loop Back Activate code */ write_framer(sc, Bt8370_LBA, 0x08); /* 10000 10000 10000 ... */ /* 044:LBD -- Loop Back Deactivate code */ write_framer(sc, Bt8370_LBD, 0x24); /* 100100 100100 100100 ... */ /* 045:RALM -- Receive Alarm signal configuration */ write_framer(sc, Bt8370_RALM, 0x0C); /* yel_intg rlof_intg */ /* 046:LATCH -- Alarm/Error/Counter Latch register */ write_framer(sc, Bt8370_LATCH, 0x1F); /* stop_cnt latch_{cnt,err,alm} */ /* Select Pulse Shape based on cable length (T1 only). */ if ((pulse = sc->config.tx_pulse) == CFG_PULSE_AUTO) { if (FORMAT_T1ANY) { if (sc->config.cable_len > 200) pulse = CFG_PULSE_T1CSU; else if (sc->config.cable_len > 160) pulse = CFG_PULSE_T1DSX4; else if (sc->config.cable_len > 120) pulse = CFG_PULSE_T1DSX3; else if (sc->config.cable_len > 80) pulse = CFG_PULSE_T1DSX2; else if (sc->config.cable_len > 40) pulse = CFG_PULSE_T1DSX1; else pulse = CFG_PULSE_T1DSX0; } else pulse = CFG_PULSE_E1TWIST; } /* Select Line Build Out based on cable length (T1CSU only). */ if ((lbo = sc->config.tx_lbo) == CFG_LBO_AUTO) { if (pulse == CFG_PULSE_T1CSU) { if (sc->config.cable_len > 1500) lbo = CFG_LBO_0DB; else if (sc->config.cable_len > 1000) lbo = CFG_LBO_7DB; else if (sc->config.cable_len > 500) lbo = CFG_LBO_15DB; else lbo = CFG_LBO_22DB; } else lbo = 0; } /* 068:TLIU_CR -- Transmit LIU Control Register */ write_framer(sc, Bt8370_TLIU_CR, (0x40 | (lbo & 0x30) | (pulse & 0x0E))); /* 070:TCR0 -- Transmit Framer Configuration */ write_framer(sc, Bt8370_TCR0, sc->config.format>>1); /* 071:TCR1 -- Transmitter Configuration */ if (FORMAT_T1SF) write_framer(sc, Bt8370_TCR1, 0x43); /* tabort, AMI PDV enforced */ else write_framer(sc, Bt8370_TCR1, 0x41); /* tabort, B8ZS or HDB3 */ /* 072:TFRM -- Transmit Frame format MYEL YEL MF FE CRC FBIT */ if (sc->config.format == CFG_FORMAT_T1ESF) write_framer(sc, Bt8370_TFRM, 0x0B); /* - YEL MF - CRC FBIT */ else if (sc->config.format == CFG_FORMAT_T1SF) write_framer(sc, Bt8370_TFRM, 0x19); /* - YEL MF - - FBIT */ else if (sc->config.format == CFG_FORMAT_E1FAS) write_framer(sc, Bt8370_TFRM, 0x11); /* - YEL - - - FBIT */ else if (sc->config.format == CFG_FORMAT_E1FASCRC) write_framer(sc, Bt8370_TFRM, 0x1F); /* - YEL MF FE CRC FBIT */ else if (sc->config.format == CFG_FORMAT_E1FASCAS) write_framer(sc, Bt8370_TFRM, 0x31); /* MYEL YEL - - - FBIT */ else if (sc->config.format == CFG_FORMAT_E1FASCRCCAS) write_framer(sc, Bt8370_TFRM, 0x3F); /* MYEL YEL MF FE CRC FBIT */ else if (sc->config.format == CFG_FORMAT_E1NONE) write_framer(sc, Bt8370_TFRM, 0x00); /* NO FRAMING BITS AT ALL! */ /* 073:TERROR -- Transmit Error Insert */ write_framer(sc, Bt8370_TERROR, 0x00); /* no errors, please! */ /* 074:TMAN -- Transmit Manual Sa-byte/FEBE configuration */ write_framer(sc, Bt8370_TMAN, 0x00); /* none */ /* 075:TALM -- Transmit Alarm Signal Configuration */ if (FORMAT_E1ANY) write_framer(sc, Bt8370_TALM, 0x38); /* auto_myel auto_yel auto_ais */ else if (FORMAT_T1ANY) write_framer(sc, Bt8370_TALM, 0x18); /* auto_yel auto_ais */ /* 076:TPATT -- Transmit Test Pattern Configuration */ write_framer(sc, Bt8370_TPATT, 0x00); /* disabled */ /* 077:TLB -- Transmit Inband Loopback Code Configuration */ write_framer(sc, Bt8370_TLB, 0x00); /* disabled */ /* 090:CLAD_CR -- Clack Rate Adapter Configuration */ if (FORMAT_T1ANY) write_framer(sc, Bt8370_CLAD_CR, 0x06); /* loop filter gain 1/2^6 */ else write_framer(sc, Bt8370_CLAD_CR, 0x08); /* loop filter gain 1/2^8 */ /* 091:CSEL -- CLAD frequency Select */ if (FORMAT_T1ANY) write_framer(sc, Bt8370_CSEL, 0x55); /* 1544 kHz */ else write_framer(sc, Bt8370_CSEL, 0x11); /* 2048 kHz */ /* 092:CPHASE -- CLAD Phase detector */ if (FORMAT_T1ANY) write_framer(sc, Bt8370_CPHASE, 0x22); /* phase compare @ 386 kHz */ else write_framer(sc, Bt8370_CPHASE, 0x00); /* phase compare @ 2048 kHz */ if (FORMAT_T1ESF) /* BOP & PRM are enabled in T1ESF mode only. */ { /* 0A0:BOP -- Bit Oriented Protocol messages */ write_framer(sc, Bt8370_BOP, RBOP_25 | TBOP_OFF); /* 0A4:DL1_TS -- Data Link 1 Time Slot Enable */ write_framer(sc, Bt8370_DL1_TS, 0x40); /* FDL bits in odd frames */ /* 0A6:DL1_CTL -- Data Link 1 Control */ write_framer(sc, Bt8370_DL1_CTL, 0x03); /* FCS mode, TX on, RX on */ /* 0A7:RDL1_FFC -- Rx Data Link 1 Fifo Fill Control */ write_framer(sc, Bt8370_RDL1_FFC, 0x30); /* assert "near full" at 48 */ /* 0AA:PRM -- Performance Report Messages */ write_framer(sc, Bt8370_PRM, 0x80); } /* 0D0:SBI_CR -- System Bus Interface Configuration Register */ if (FORMAT_T1ANY) write_framer(sc, Bt8370_SBI_CR, 0x47); /* 1.544 with 24 TS +Fbits */ else write_framer(sc, Bt8370_SBI_CR, 0x46); /* 2.048 with 32 TS */ /* 0D1:RSB_CR -- Receive System Bus Configuration Register */ /* Change RINDO & RFSYNC on falling edge of RSBCLKI. */ write_framer(sc, Bt8370_RSB_CR, 0x70); /* 0D2,0D3:RSYNC_{TS,BIT} -- Receive frame Sync offset */ write_framer(sc, Bt8370_RSYNC_BIT, 0x00); write_framer(sc, Bt8370_RSYNC_TS, 0x00); /* 0D4:TSB_CR -- Transmit System Bus Configuration Register */ /* Change TINDO & TFSYNC on falling edge of TSBCLKI. */ write_framer(sc, Bt8370_TSB_CR, 0x30); /* 0D5,0D6:TSYNC_{TS,BIT} -- Transmit frame Sync offset */ write_framer(sc, Bt8370_TSYNC_BIT, 0x00); write_framer(sc, Bt8370_TSYNC_TS, 0x00); /* 0D7:RSIG_CR -- Receive SIGnalling Configuratin Register */ write_framer(sc, Bt8370_RSIG_CR, 0x00); /* Assign and configure 64Kb TIME SLOTS. */ /* TS24..TS1 must be assigned for T1, TS31..TS0 for E1. */ /* Timeslots with no user data have RINDO and TINDO off. */ for (i=0; i<32; i++) { /* 0E0-0FF:SBCn -- System Bus Per-Channel Control */ if (FORMAT_T1ANY && (i==0 || i>24)) write_framer(sc, Bt8370_SBCn +i, 0x00); /* not assigned in T1 mode */ else if (FORMAT_E1ANY && (i==0) && !FORMAT_E1NONE) write_framer(sc, Bt8370_SBCn +i, 0x01); /* assigned, TS0 o/h bits */ else if (FORMAT_E1CAS && (i==16) && !FORMAT_E1NONE) write_framer(sc, Bt8370_SBCn +i, 0x01); /* assigned, TS16 o/h bits */ else if ((sc->config.time_slots & (1<config.time_slots & (1<>4, read_framer(sc, Bt8370_DID)&0x0F); } /* Called once a second; must not sleep. */ static int t1_watchdog(softc_t *sc) { u_int16_t LCV = 0, FERR = 0, CRC = 0, FEBE = 0; u_int8_t alm1, alm3, loop, isr0; int link_status = STATUS_UP; int i; /* Read the alarm registers */ alm1 = read_framer(sc, Bt8370_ALM1); alm3 = read_framer(sc, Bt8370_ALM3); loop = read_framer(sc, Bt8370_LOOP); isr0 = read_framer(sc, Bt8370_ISR0); /* Always ignore the SIGFRZ alarm bit, */ alm1 &= ~ALM1_SIGFRZ; if (FORMAT_T1ANY) /* ignore RYEL in T1 modes */ alm1 &= ~ALM1_RYEL; else if (FORMAT_E1NONE) /* ignore all alarms except LOS */ alm1 &= ALM1_RLOS; /* Software is alive. */ led_inv(sc, MII16_T1_LED_GRN); /* Receiving Alarm Indication Signal (AIS). */ if ((alm1 & ALM1_RAIS)!=0) /* receiving ais */ led_on(sc, MII16_T1_LED_BLU); else if ((alm1 & ALM1_RLOS)!=0) /* sending ais */ led_inv(sc, MII16_T1_LED_BLU); else led_off(sc, MII16_T1_LED_BLU); /* Receiving Remote Alarm Indication (RAI). */ if ((alm1 & (ALM1_RMYEL | ALM1_RYEL))!=0) /* receiving rai */ led_on(sc, MII16_T1_LED_YEL); else if ((alm1 & ALM1_RLOF)!=0) /* sending rai */ led_inv(sc, MII16_T1_LED_YEL); else led_off(sc, MII16_T1_LED_YEL); /* If any alarm bits are set then the link is 'down'. */ /* The bad bits are: rmyel ryel rais ralos rlos rlof. */ /* Some alarm bits have been masked by this point. */ if (alm1 != 0) link_status = STATUS_DOWN; /* Declare local Red Alarm if the link is down. */ if (link_status == STATUS_DOWN) led_on(sc, MII16_T1_LED_RED); else if (sc->loop_timer != 0) /* loopback is active */ led_inv(sc, MII16_T1_LED_RED); else led_off(sc, MII16_T1_LED_RED); /* Print latched error bits if they changed. */ if ((DRIVER_DEBUG) && (alm1 != sc->last_alm1)) { char *on = "ON ", *off = "OFF"; printf("%s: RLOF=%s RLOS=%s RALOS=%s RAIS=%s RYEL=%s RMYEL=%s\n", NAME_UNIT, (alm1 & ALM1_RLOF) ? on : off, (alm1 & ALM1_RLOS) ? on : off, (alm1 & ALM1_RALOS) ? on : off, (alm1 & ALM1_RAIS) ? on : off, (alm1 & ALM1_RYEL) ? on : off, (alm1 & ALM1_RMYEL) ? on : off); } /* Check and print error counters if non-zero. */ LCV = read_framer(sc, Bt8370_LCV_LO) + (read_framer(sc, Bt8370_LCV_HI)<<8); if (!FORMAT_E1NONE) FERR = read_framer(sc, Bt8370_FERR_LO) + (read_framer(sc, Bt8370_FERR_HI)<<8); if (FORMAT_E1CRC || FORMAT_T1ESF) CRC = read_framer(sc, Bt8370_CRC_LO) + (read_framer(sc, Bt8370_CRC_HI)<<8); if (FORMAT_E1CRC) FEBE = read_framer(sc, Bt8370_FEBE_LO) + (read_framer(sc, Bt8370_FEBE_HI)<<8); /* Only LCV is valid if Out-Of-Frame */ if (FORMAT_E1NONE) FERR = CRC = FEBE = 0; if ((DRIVER_DEBUG) && (LCV || FERR || CRC || FEBE)) printf("%s: LCV=%u FERR=%u CRC=%u FEBE=%u\n", NAME_UNIT, LCV, FERR, CRC, FEBE); /* Driver keeps crude link-level error counters (SNMP is better). */ sc->status.cntrs.lcv_errs += LCV; sc->status.cntrs.frm_errs += FERR; sc->status.cntrs.crc_errs += CRC; sc->status.cntrs.febe_errs += FEBE; /* Check for BOP messages in the ESF Facility Data Link. */ if ((FORMAT_T1ESF) && (read_framer(sc, Bt8370_ISR1) & 0x80)) { u_int8_t bop_code = read_framer(sc, Bt8370_RBOP) & 0x3F; switch (bop_code) { case T1BOP_OOF: { if ((DRIVER_DEBUG) && ((sc->last_alm1 & ALM1_RMYEL)==0)) printf("%s: Receiving a 'yellow alarm' BOP msg\n", NAME_UNIT); break; } case T1BOP_LINE_UP: { if (DRIVER_DEBUG) printf("%s: Received a 'line loopback activate' BOP msg\n", NAME_UNIT); write_framer(sc, Bt8370_LOOP, LOOP_LINE); sc->loop_timer = 305; break; } case T1BOP_LINE_DOWN: { if (DRIVER_DEBUG) printf("%s: Received a 'line loopback deactivate' BOP msg\n", NAME_UNIT); write_framer(sc, Bt8370_LOOP, read_framer(sc, Bt8370_LOOP) & ~LOOP_LINE); sc->loop_timer = 0; break; } case T1BOP_PAY_UP: { if (DRIVER_DEBUG) printf("%s: Received a 'payload loopback activate' BOP msg\n", NAME_UNIT); write_framer(sc, Bt8370_LOOP, LOOP_PAYLOAD); sc->loop_timer = 305; break; } case T1BOP_PAY_DOWN: { if (DRIVER_DEBUG) printf("%s: Received a 'payload loopback deactivate' BOP msg\n", NAME_UNIT); write_framer(sc, Bt8370_LOOP, read_framer(sc, Bt8370_LOOP) & ~LOOP_PAYLOAD); sc->loop_timer = 0; break; } default: { if (DRIVER_DEBUG) printf("%s: Received a type 0x%02X BOP msg\n", NAME_UNIT, bop_code); break; } } } /* Check for HDLC pkts in the ESF Facility Data Link. */ if ((FORMAT_T1ESF) && (read_framer(sc, Bt8370_ISR2) & 0x70)) { /* while (not fifo-empty && not start-of-msg) flush fifo */ while ((read_framer(sc, Bt8370_RDL1_STAT) & 0x0C) == 0) read_framer(sc, Bt8370_RDL1); /* If (not fifo-empty), then begin processing fifo contents. */ if ((read_framer(sc, Bt8370_RDL1_STAT) & 0x0C) == 0x08) { u_int8_t msg[64]; u_int8_t stat = read_framer(sc, Bt8370_RDL1); sc->status.cntrs.fdl_pkts++; for (i=0; i<(stat & 0x3F); i++) msg[i] = read_framer(sc, Bt8370_RDL1); /* Is this FDL message a T1.403 performance report? */ if (((stat & 0x3F)==11) && ((msg[0]==0x38) || (msg[0]==0x3A)) && (msg[1]==1) && (msg[2]==3)) /* Copy 4 PRs from FDL pkt to SNMP struct. */ memcpy(sc->status.snmp.t1.prm, msg+3, 8); } } /* Check for inband loop up/down commands. */ if (FORMAT_T1ANY) { u_int8_t isr6 = read_framer(sc, Bt8370_ISR6); u_int8_t alarm2 = read_framer(sc, Bt8370_ALM2); u_int8_t tlb = read_framer(sc, Bt8370_TLB); /* Inband Code == Loop Up && On Transition && Inband Tx Inactive */ if ((isr6 & 0x40) && (alarm2 & 0x40) && ((tlb & 1)==0)) { /* CSU loop up is 10000 10000 ... */ if (DRIVER_DEBUG) printf("%s: Received a 'CSU Loop Up' inband msg\n", NAME_UNIT); write_framer(sc, Bt8370_LOOP, LOOP_LINE); /* Loop up */ sc->loop_timer = 305; } /* Inband Code == Loop Down && On Transition && Inband Tx Inactive */ if ((isr6 & 0x80) && (alarm2 & 0x80) && ((tlb & 1)==0)) { /* CSU loop down is 100 100 100 ... */ if (DRIVER_DEBUG) printf("%s: Received a 'CSU Loop Down' inband msg\n", NAME_UNIT); write_framer(sc, Bt8370_LOOP, read_framer(sc, Bt8370_LOOP) & ~LOOP_LINE); /* loop down */ sc->loop_timer = 0; } } /* Manually send Yellow Alarm BOP msgs. */ if (FORMAT_T1ESF) { u_int8_t isr7 = read_framer(sc, Bt8370_ISR7); if ((isr7 & 0x02) && (alm1 & 0x02)) /* RLOF on-transition */ { /* Start sending continuous Yellow Alarm BOP messages. */ write_framer(sc, Bt8370_BOP, RBOP_25 | TBOP_CONT); write_framer(sc, Bt8370_TBOP, 0x00); /* send BOP; order matters */ } else if ((isr7 & 0x02) && ((alm1 & 0x02)==0)) /* RLOF off-transition */ { /* Stop sending continuous Yellow Alarm BOP messages. */ write_framer(sc, Bt8370_BOP, RBOP_25 | TBOP_OFF); } } /* Time out loopback requests. */ if (sc->loop_timer != 0) if (--sc->loop_timer == 0) if (loop != 0) { if (DRIVER_DEBUG) printf("%s: Timeout: Loop Down after 300 seconds\n", NAME_UNIT); write_framer(sc, Bt8370_LOOP, loop & ~(LOOP_PAYLOAD | LOOP_LINE)); } /* RX Test Pattern status */ if ((DRIVER_DEBUG) && (isr0 & 0x10)) printf("%s: RX Test Pattern Sync\n", NAME_UNIT); /* SNMP Error Counters */ sc->status.snmp.t1.lcv = LCV; sc->status.snmp.t1.fe = FERR; sc->status.snmp.t1.crc = CRC; sc->status.snmp.t1.febe = FEBE; /* SNMP Line Status */ sc->status.snmp.t1.line = 0; if (alm1 & ALM1_RMYEL) sc->status.snmp.t1.line |= TLINE_RX_RAI; if (alm1 & ALM1_RYEL) sc->status.snmp.t1.line |= TLINE_RX_RAI; if (alm1 & ALM1_RLOF) sc->status.snmp.t1.line |= TLINE_TX_RAI; if (alm1 & ALM1_RAIS) sc->status.snmp.t1.line |= TLINE_RX_AIS; if (alm1 & ALM1_RLOS) sc->status.snmp.t1.line |= TLINE_TX_AIS; if (alm1 & ALM1_RLOF) sc->status.snmp.t1.line |= TLINE_LOF; if (alm1 & ALM1_RLOS) sc->status.snmp.t1.line |= TLINE_LOS; if (alm3 & ALM3_RMAIS) sc->status.snmp.t1.line |= T1LINE_RX_TS16_AIS; if (alm3 & ALM3_SRED) sc->status.snmp.t1.line |= T1LINE_TX_TS16_LOMF; if (alm3 & ALM3_SEF) sc->status.snmp.t1.line |= T1LINE_SEF; if (isr0 & 0x10) sc->status.snmp.t1.line |= T1LINE_RX_TEST; if ((alm1 & ALM1_RMYEL) && (FORMAT_E1CAS)) sc->status.snmp.t1.line |= T1LINE_RX_TS16_LOMF; /* SNMP Loopback Status */ sc->status.snmp.t1.loop &= ~(TLOOP_FAR_LINE | TLOOP_FAR_PAYLOAD); if (sc->config.loop_back == CFG_LOOP_TULIP) sc->status.snmp.t1.loop |= TLOOP_NEAR_OTHER; if (loop & LOOP_PAYLOAD) sc->status.snmp.t1.loop |= TLOOP_NEAR_PAYLOAD; if (loop & LOOP_LINE) sc->status.snmp.t1.loop |= TLOOP_NEAR_LINE; if (loop & LOOP_ANALOG) sc->status.snmp.t1.loop |= TLOOP_NEAR_OTHER; if (loop & LOOP_FRAMER) sc->status.snmp.t1.loop |= TLOOP_NEAR_INWARD; /* Remember this state until next time. */ sc->last_alm1 = alm1; /* If an INWARD loopback is in effect, link status is UP */ if (sc->config.loop_back != CFG_LOOP_NONE) /* XXX INWARD ONLY */ link_status = STATUS_UP; return link_status; } /* IOCTL SYSCALL: can sleep. */ static void t1_send_bop(softc_t *sc, int bop_code) { u_int8_t bop; int i; /* The BOP transmitter could be sending a continuous */ /* BOP msg when told to send this BOP_25 message. */ /* So save and restore the state of the BOP machine. */ bop = read_framer(sc, Bt8370_BOP); write_framer(sc, Bt8370_BOP, RBOP_OFF | TBOP_OFF); for (i=0; i<40; i++) /* max delay 400 ms. */ if (read_framer(sc, Bt8370_BOP_STAT) & 0x80) SLEEP(10000); /* send 25 repetitions of bop_code */ write_framer(sc, Bt8370_BOP, RBOP_OFF | TBOP_25); write_framer(sc, Bt8370_TBOP, bop_code); /* order matters */ /* wait for tx to stop */ for (i=0; i<40; i++) /* max delay 400 ms. */ if (read_framer(sc, Bt8370_BOP_STAT) & 0x80) SLEEP(10000); /* Restore previous state of the BOP machine. */ write_framer(sc, Bt8370_BOP, bop); } /* IOCTL SYSCALL: can sleep. */ static int t1_ioctl(softc_t *sc, struct ioctl *ioctl) { int error = 0; switch (ioctl->cmd) { case IOCTL_SNMP_SEND: /* set opstatus? */ { switch (ioctl->data) { case TSEND_NORMAL: { write_framer(sc, Bt8370_TPATT, 0x00); /* tx pattern generator off */ write_framer(sc, Bt8370_RPATT, 0x00); /* rx pattern detector off */ write_framer(sc, Bt8370_TLB, 0x00); /* tx inband generator off */ break; } case TSEND_LINE: { if (FORMAT_T1ESF) t1_send_bop(sc, T1BOP_LINE_UP); else if (FORMAT_T1SF) { write_framer(sc, Bt8370_LBP, 0x08); /* 10000 10000 ... */ write_framer(sc, Bt8370_TLB, 0x05); /* 5 bits, framed, start */ } sc->status.snmp.t1.loop |= TLOOP_FAR_LINE; break; } case TSEND_PAYLOAD: { t1_send_bop(sc, T1BOP_PAY_UP); sc->status.snmp.t1.loop |= TLOOP_FAR_PAYLOAD; break; } case TSEND_RESET: { if (sc->status.snmp.t1.loop == TLOOP_FAR_LINE) { if (FORMAT_T1ESF) t1_send_bop(sc, T1BOP_LINE_DOWN); else if (FORMAT_T1SF) { write_framer(sc, Bt8370_LBP, 0x24); /* 100100 100100 ... */ write_framer(sc, Bt8370_TLB, 0x09); /* 6 bits, framed, start */ } sc->status.snmp.t1.loop &= ~TLOOP_FAR_LINE; } if (sc->status.snmp.t1.loop == TLOOP_FAR_PAYLOAD) { t1_send_bop(sc, T1BOP_PAY_DOWN); sc->status.snmp.t1.loop &= ~TLOOP_FAR_PAYLOAD; } break; } case TSEND_QRS: { write_framer(sc, Bt8370_TPATT, 0x1E); /* framed QRSS */ break; } default: { error = EINVAL; break; } } break; } case IOCTL_SNMP_LOOP: /* set opstatus = test? */ { u_int8_t new_loop = 0; if (ioctl->data == CFG_LOOP_NONE) new_loop = 0; else if (ioctl->data == CFG_LOOP_PAYLOAD) new_loop = LOOP_PAYLOAD; else if (ioctl->data == CFG_LOOP_LINE) new_loop = LOOP_LINE; else if (ioctl->data == CFG_LOOP_OTHER) new_loop = LOOP_ANALOG; else if (ioctl->data == CFG_LOOP_INWARD) new_loop = LOOP_FRAMER; else if (ioctl->data == CFG_LOOP_DUAL) new_loop = LOOP_DUAL; else error = EINVAL; if (error == 0) { write_framer(sc, Bt8370_LOOP, new_loop); sc->config.loop_back = ioctl->data; } break; } default: error = EINVAL; break; } return error; } static struct card hssi_card = { .config = hssi_config, .ident = hssi_ident, .watchdog = hssi_watchdog, .ioctl = hssi_ioctl, }; static struct card t3_card = { .config = t3_config, .ident = t3_ident, .watchdog = t3_watchdog, .ioctl = t3_ioctl, }; static struct card ssi_card = { .config = ssi_config, .ident = ssi_ident, .watchdog = ssi_watchdog, .ioctl = ssi_ioctl, }; static struct card t1_card = { .config = t1_config, .ident = t1_ident, .watchdog = t1_watchdog, .ioctl = t1_ioctl, }; /* RAWIP is raw IP packets (v4 or v6) in HDLC frames with NO HEADERS. */ /* No HDLC Address/Control fields! No line control protocol at all! */ /* This code is BSD/ifnet-specific; Linux and Netgraph also do RAWIP. */ #if IFNET # if ((defined(__FreeBSD__) && (__FreeBSD_version < 500000)) ||\ defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__)) static void netisr_dispatch(int isr, struct mbuf *mbuf) { struct ifqueue *intrq = NULL; int qfull = 0; #if INET if (isr == NETISR_IP) intrq = &ipintrq; #endif #if INET6 if (isr == NETISR_IPV6) intrq = &ip6intrq; #endif if ((intrq != NULL) && ((qfull = IF_QFULL(intrq)) == 0)) { /* rxintr_cleanup() ENQUEUES in a hard interrupt. */ /* networking code DEQUEUES in a soft interrupt. */ /* Some BSD QUEUE routines are not interrupt-safe. */ DISABLE_INTR; /* noop in FreeBSD */ IF_ENQUEUE(intrq, mbuf); ENABLE_INTR; schednetisr(isr); /* schedule a soft interrupt */ } else { m_freem(mbuf); if ((intrq != NULL) && (qfull != 0)) IF_DROP(intrq); } } # endif /* ((__FreeBSD__ && (__FreeBSD_version < 500000)) || */ /* __NetBSD__ || __OpenBSD__ || __bsdi__) */ /* rxintr_cleanup calls this to give a newly arrived pkt to higher levels. */ static void raw_input(struct ifnet *ifp, struct mbuf *mbuf) { softc_t *sc = IFP2SC(ifp); # if INET if (mbuf->m_data[0]>>4 == 4) netisr_dispatch(NETISR_IP, mbuf); else # endif # if INET6 if (mbuf->m_data[0]>>4 == 6) netisr_dispatch(NETISR_IPV6, mbuf); else # endif { m_freem(mbuf); sc->status.cntrs.idiscards++; if (DRIVER_DEBUG) printf("%s: raw_input: rx pkt discarded: not IPv4 or IPv6\n", NAME_UNIT); } } #endif /* IFNET */ /* There are TWO VERSIONS of interrupt/DMA code: Linux & BSD. * Handling Linux and the BSDs with CPP directives would * make the code unreadable, so there are two versions. * Conceptually, the two versions do the same thing and * core_interrupt() doesn't know they are different. * * We are "standing on the head of a pin" in these routines. * Tulip CSRs can be accessed, but nothing else is interrupt-safe! * Do NOT access: MII, GPIO, SROM, BIOSROM, XILINX, SYNTH, or DAC. */ #if BSD /* BSD version of interrupt/DMA code */ /* Singly-linked tail-queues hold mbufs with active DMA. * For RX, single mbuf clusters; for TX, mbuf chains are queued. * NB: mbufs are linked through their m_nextpkt field. * Callers must hold sc->bottom_lock; not otherwise locked. */ /* Put an mbuf (chain) on the tail of the descriptor ring queue. */ static void /* BSD version */ mbuf_enqueue(struct desc_ring *ring, struct mbuf *m) { m->m_nextpkt = NULL; if (ring->tail == NULL) ring->head = m; else ring->tail->m_nextpkt = m; ring->tail = m; } /* Get an mbuf (chain) from the head of the descriptor ring queue. */ static struct mbuf* /* BSD version */ mbuf_dequeue(struct desc_ring *ring) { struct mbuf *m = ring->head; if (m != NULL) if ((ring->head = m->m_nextpkt) == NULL) ring->tail = NULL; return m; } # ifdef __FreeBSD__ static void /* *** FreeBSD ONLY *** Callout from bus_dmamap_load() */ fbsd_dmamap_load(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct desc_ring *ring = arg; ring->nsegs = error ? 0 : nsegs; ring->segs[0] = segs[0]; ring->segs[1] = segs[1]; } # endif /* Initialize a DMA descriptor ring. */ static int /* BSD version */ create_ring(softc_t *sc, struct desc_ring *ring, int num_descs) { struct dma_desc *descs; int size_descs = sizeof(struct dma_desc)*num_descs; int i, error = 0; /* The DMA descriptor array must not cross a page boundary. */ if (size_descs > PAGE_SIZE) { printf("%s: DMA descriptor array > PAGE_SIZE (%d)\n", NAME_UNIT, (u_int)PAGE_SIZE); return EINVAL; } #ifdef __FreeBSD__ /* Create a DMA tag for descriptors and buffers. */ if ((error = bus_dma_tag_create(NULL, 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, PAGE_SIZE, 2, PAGE_SIZE, BUS_DMA_ALLOCNOW, # if (__FreeBSD_version >= 502000) NULL, NULL, # endif &ring->tag))) { printf("%s: bus_dma_tag_create() failed: error %d\n", NAME_UNIT, error); return error; } /* Allocate wired physical memory for DMA descriptor array */ /* and map physical address to kernel virtual address. */ if ((error = bus_dmamem_alloc(ring->tag, (void**)&ring->first, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &ring->map))) { printf("%s: bus_dmamem_alloc() failed; error %d\n", NAME_UNIT, error); return error; } descs = ring->first; /* Map kernel virtual address to PCI address for DMA descriptor array. */ if ((error = bus_dmamap_load(ring->tag, ring->map, descs, size_descs, fbsd_dmamap_load, ring, 0))) { printf("%s: bus_dmamap_load() failed; error %d\n", NAME_UNIT, error); return error; } ring->dma_addr = ring->segs[0].ds_addr; /* Allocate dmamaps for each DMA descriptor. */ for (i=0; itag, 0, &descs[i].map))) { printf("%s: bus_dmamap_create() failed; error %d\n", NAME_UNIT, error); return error; } #elif (defined(__NetBSD__) || defined(__OpenBSD__)) /* Use the DMA tag passed to attach() for descriptors and buffers. */ ring->tag = sc->pa_dmat; /* Allocate wired physical memory for DMA descriptor array. */ if ((error = bus_dmamem_alloc(ring->tag, size_descs, PAGE_SIZE, 0, ring->segs, 1, &ring->nsegs, BUS_DMA_NOWAIT))) { printf("%s: bus_dmamem_alloc() failed; error %d\n", NAME_UNIT, error); return error; } /* Map physical address to kernel virtual address. */ if ((error = bus_dmamem_map(ring->tag, ring->segs, ring->nsegs, size_descs, (caddr_t *)&ring->first, BUS_DMA_NOWAIT | BUS_DMA_COHERENT))) { printf("%s: bus_dmamem_map() failed; error %d\n", NAME_UNIT, error); return error; } descs = ring->first; /* suppress compiler warning about aliasing */ memset(descs, 0, size_descs); /* Allocate dmamap for PCI access to DMA descriptor array. */ if ((error = bus_dmamap_create(ring->tag, size_descs, 1, size_descs, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &ring->map))) { printf("%s: bus_dmamap_create() failed; error %d\n", NAME_UNIT, error); return error; } /* Map kernel virtual address to PCI address for DMA descriptor array. */ if ((error = bus_dmamap_load(ring->tag, ring->map, descs, size_descs, 0, BUS_DMA_NOWAIT))) { printf("%s: bus_dmamap_load() failed; error %d\n", NAME_UNIT, error); return error; } ring->dma_addr = ring->map->dm_segs[0].ds_addr; /* Allocate dmamaps for each DMA descriptor. */ for (i=0; itag, MAX_DESC_LEN, 2, MAX_CHUNK_LEN, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &descs[i].map))) { printf("%s: bus_dmamap_create() failed; error %d\n", NAME_UNIT, error); return error; } #elif defined(__bsdi__) /* Allocate wired physical memory for DMA descriptor array. */ if ((ring->first = malloc(size_descs, M_DEVBUF, M_NOWAIT)) == NULL) { printf("%s: malloc() failed for DMA descriptor array\n", NAME_UNIT); return ENOMEM; } descs = ring->first; memset(descs, 0, size_descs); /* Map kernel virtual address to PCI address for DMA descriptor array. */ ring->dma_addr = vtophys(descs); /* Relax! BSD/OS only. */ #endif ring->read = descs; ring->write = descs; ring->first = descs; ring->last = descs + num_descs -1; ring->last->control = TLP_DCTL_END_RING; ring->num_descs = num_descs; ring->size_descs = size_descs; ring->head = NULL; ring->tail = NULL; return 0; } /* Destroy a DMA descriptor ring */ static void /* BSD version */ destroy_ring(softc_t *sc, struct desc_ring *ring) { struct dma_desc *desc; struct mbuf *m; /* Free queued mbufs. */ while ((m = mbuf_dequeue(ring)) != NULL) m_freem(m); /* TX may have one pkt that is not on any queue. */ if (sc->tx_mbuf != NULL) { m_freem(sc->tx_mbuf); sc->tx_mbuf = NULL; } /* Unmap active DMA descriptors. */ while (ring->read != ring->write) { bus_dmamap_unload(ring->tag, ring->read->map); if (ring->read++ == ring->last) ring->read = ring->first; } #ifdef __FreeBSD__ /* Free the dmamaps of all DMA descriptors. */ for (desc=ring->first; desc!=ring->last+1; desc++) if (desc->map != NULL) bus_dmamap_destroy(ring->tag, desc->map); /* Unmap PCI address for DMA descriptor array. */ if (ring->dma_addr != 0) bus_dmamap_unload(ring->tag, ring->map); /* Free kernel memory for DMA descriptor array. */ if (ring->first != NULL) bus_dmamem_free(ring->tag, ring->first, ring->map); /* Free the DMA tag created for this ring. */ if (ring->tag != NULL) bus_dma_tag_destroy(ring->tag); #elif (defined(__NetBSD__) || defined(__OpenBSD__)) /* Free the dmamaps of all DMA descriptors. */ for (desc=ring->first; desc!=ring->last+1; desc++) if (desc->map != NULL) bus_dmamap_destroy(ring->tag, desc->map); /* Unmap PCI address for DMA descriptor array. */ if (ring->dma_addr != 0) bus_dmamap_unload(ring->tag, ring->map); /* Free dmamap for DMA descriptor array. */ if (ring->map != NULL) bus_dmamap_destroy(ring->tag, ring->map); /* Unmap kernel address for DMA descriptor array. */ if (ring->first != NULL) bus_dmamem_unmap(ring->tag, (caddr_t)ring->first, ring->size_descs); /* Free kernel memory for DMA descriptor array. */ if (ring->segs[0].ds_addr != 0) bus_dmamem_free(ring->tag, ring->segs, ring->nsegs); #elif defined(__bsdi__) /* Free kernel memory for DMA descriptor array. */ if (ring->first != NULL) free(ring->first, M_DEVBUF); #endif } /* Clean up after a packet has been received. */ static int /* BSD version */ rxintr_cleanup(softc_t *sc) { struct desc_ring *ring = &sc->rxring; struct dma_desc *first_desc, *last_desc; struct mbuf *first_mbuf=NULL, *last_mbuf=NULL; struct mbuf *new_mbuf; int pkt_len, desc_len; #if (defined(__FreeBSD__) && defined(DEVICE_POLLING)) /* Input packet flow control (livelock prevention): */ /* Give pkts to higher levels only if quota is > 0. */ if (sc->quota <= 0) return 0; #endif /* This looks complicated, but remember: typically packets up */ /* to 2048 bytes long fit in one mbuf and use one descriptor. */ first_desc = last_desc = ring->read; /* ASSERTION: If there is a descriptor in the ring and the hardware has */ /* finished with it, then that descriptor will have RX_FIRST_DESC set. */ if ((ring->read != ring->write) && /* descriptor ring not empty */ ((ring->read->status & TLP_DSTS_OWNER) == 0) && /* hardware done */ ((ring->read->status & TLP_DSTS_RX_FIRST_DESC) == 0)) /* should be set */ panic("%s: rxintr_cleanup: rx-first-descriptor not set.\n", NAME_UNIT); /* First decide if a complete packet has arrived. */ /* Run down DMA descriptors looking for one marked "last". */ /* Bail out if an active descriptor is encountered. */ /* Accumulate most significant bits of packet length. */ pkt_len = 0; for (;;) { if (last_desc == ring->write) return 0; /* no more descs */ if (last_desc->status & TLP_DSTS_OWNER) return 0; /* still active */ if (last_desc->status & TLP_DSTS_RX_LAST_DESC) break; /* end of packet */ pkt_len += last_desc->length1 + last_desc->length2; /* entire desc filled */ if (last_desc++->control & TLP_DCTL_END_RING) last_desc = ring->first; /* ring wrap */ } /* A complete packet has arrived; how long is it? */ /* H/w ref man shows RX pkt length as a 14-bit field. */ /* An experiment found that only the 12 LSBs work. */ if (((last_desc->status>>16)&0xFFF) == 0) pkt_len += 4096; /* carry-bit */ pkt_len = (pkt_len & 0xF000) + ((last_desc->status>>16) & 0x0FFF); /* Subtract the CRC length unless doing so would underflow. */ if (pkt_len >= sc->config.crc_len) pkt_len -= sc->config.crc_len; /* Run down DMA descriptors again doing the following: * 1) put pkt info in pkthdr of first mbuf, * 2) link mbufs, * 3) set mbuf lengths. */ first_desc = ring->read; do { /* Read a DMA descriptor from the ring. */ last_desc = ring->read; /* Advance the ring read pointer. */ if (ring->read++ == ring->last) ring->read = ring->first; /* Dequeue the corresponding cluster mbuf. */ new_mbuf = mbuf_dequeue(ring); if (new_mbuf == NULL) panic("%s: rxintr_cleanup: expected an mbuf\n", NAME_UNIT); desc_len = last_desc->length1 + last_desc->length2; /* If bouncing, copy bounce buf to mbuf. */ DMA_SYNC(last_desc->map, desc_len, BUS_DMASYNC_POSTREAD); /* Unmap kernel virtual address to PCI address. */ bus_dmamap_unload(ring->tag, last_desc->map); /* 1) Put pkt info in pkthdr of first mbuf. */ if (last_desc == first_desc) { first_mbuf = new_mbuf; first_mbuf->m_pkthdr.len = pkt_len; /* total pkt length */ #if IFNET first_mbuf->m_pkthdr.rcvif = sc->ifp; /* how it got here */ #else first_mbuf->m_pkthdr.rcvif = NULL; #endif } else /* 2) link mbufs. */ { last_mbuf->m_next = new_mbuf; /* M_PKTHDR should be set in the first mbuf only. */ new_mbuf->m_flags &= ~M_PKTHDR; } last_mbuf = new_mbuf; /* 3) Set mbuf lengths. */ new_mbuf->m_len = (pkt_len >= desc_len) ? desc_len : pkt_len; pkt_len -= new_mbuf->m_len; } while ((last_desc->status & TLP_DSTS_RX_LAST_DESC) == 0); /* Decide whether to accept or to discard this packet. */ /* RxHDLC sets MIIERR for bad CRC, abort and partial byte at pkt end. */ if (((last_desc->status & TLP_DSTS_RX_BAD) == 0) && (sc->status.oper_status == STATUS_UP) && (first_mbuf->m_pkthdr.len > 0)) { /* Optimization: copy a small pkt into a small mbuf. */ if (first_mbuf->m_pkthdr.len <= COPY_BREAK) { MGETHDR(new_mbuf, M_DONTWAIT, MT_DATA); if (new_mbuf != NULL) { new_mbuf->m_pkthdr.rcvif = first_mbuf->m_pkthdr.rcvif; new_mbuf->m_pkthdr.len = first_mbuf->m_pkthdr.len; new_mbuf->m_len = first_mbuf->m_len; memcpy(new_mbuf->m_data, first_mbuf->m_data, first_mbuf->m_pkthdr.len); m_freem(first_mbuf); first_mbuf = new_mbuf; } } /* Include CRC and one flag byte in input byte count. */ sc->status.cntrs.ibytes += first_mbuf->m_pkthdr.len + sc->config.crc_len +1; sc->status.cntrs.ipackets++; #if IFNET sc->ifp->if_ipackets++; LMC_BPF_MTAP(first_mbuf); #endif #if (defined(__FreeBSD__) && defined(DEVICE_POLLING)) sc->quota--; #endif /* Give this good packet to the network stacks. */ #if NETGRAPH if (sc->ng_hook != NULL) /* is hook connected? */ { # if (__FreeBSD_version >= 500000) int error; /* ignore error */ NG_SEND_DATA_ONLY(error, sc->ng_hook, first_mbuf); # else /* FreeBSD-4 */ ng_queue_data(sc->ng_hook, first_mbuf, NULL); # endif return 1; /* did something */ } #endif /* NETGRAPH */ if (sc->config.line_pkg == PKG_RAWIP) raw_input(sc->ifp, first_mbuf); else { #if NSPPP sppp_input(sc->ifp, first_mbuf); #elif P2P new_mbuf = first_mbuf; while (new_mbuf != NULL) { sc->p2p->p2p_hdrinput(sc->p2p, new_mbuf->m_data, new_mbuf->m_len); new_mbuf = new_mbuf->m_next; } sc->p2p->p2p_input(sc->p2p, NULL); m_freem(first_mbuf); #else m_freem(first_mbuf); sc->status.cntrs.idiscards++; #endif } } else if (sc->status.oper_status != STATUS_UP) { /* If the link is down, this packet is probably noise. */ m_freem(first_mbuf); sc->status.cntrs.idiscards++; if (DRIVER_DEBUG) printf("%s: rxintr_cleanup: rx pkt discarded: link down\n", NAME_UNIT); } else /* Log and discard this bad packet. */ { if (DRIVER_DEBUG) printf("%s: RX bad pkt; len=%d %s%s%s%s\n", NAME_UNIT, first_mbuf->m_pkthdr.len, (last_desc->status & TLP_DSTS_RX_MII_ERR) ? " miierr" : "", (last_desc->status & TLP_DSTS_RX_DRIBBLE) ? " dribble" : "", (last_desc->status & TLP_DSTS_RX_DESC_ERR) ? " descerr" : "", (last_desc->status & TLP_DSTS_RX_OVERRUN) ? " overrun" : ""); if (last_desc->status & TLP_DSTS_RX_OVERRUN) sc->status.cntrs.fifo_over++; else sc->status.cntrs.ierrors++; m_freem(first_mbuf); } return 1; /* did something */ } /* Setup (prepare) to receive a packet. */ /* Try to keep the RX descriptor ring full of empty buffers. */ static int /* BSD version */ rxintr_setup(softc_t *sc) { struct desc_ring *ring = &sc->rxring; struct dma_desc *desc; struct mbuf *m; int desc_len; int error; /* Ring is full if (wrap(write+1)==read) */ if (((ring->write == ring->last) ? ring->first : ring->write+1) == ring->read) return 0; /* ring is full; nothing to do */ /* Allocate a small mbuf and attach an mbuf cluster. */ MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) { sc->status.cntrs.rxdma++; if (DRIVER_DEBUG) printf("%s: rxintr_setup: MGETHDR() failed\n", NAME_UNIT); return 0; } MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_freem(m); sc->status.cntrs.rxdma++; if (DRIVER_DEBUG) printf("%s: rxintr_setup: MCLGET() failed\n", NAME_UNIT); return 0; } /* Queue the mbuf for later processing by rxintr_cleanup. */ mbuf_enqueue(ring, m); /* Write a DMA descriptor into the ring. */ /* Hardware won't see it until the OWNER bit is set. */ desc = ring->write; /* Advance the ring write pointer. */ if (ring->write++ == ring->last) ring->write = ring->first; desc_len = (MCLBYTES < MAX_DESC_LEN) ? MCLBYTES : MAX_DESC_LEN; /* Map kernel virtual address to PCI address. */ if ((error = DMA_LOAD(desc->map, m->m_data, desc_len))) printf("%s: bus_dmamap_load(rx) failed; error %d\n", NAME_UNIT, error); /* Invalidate the cache for this mbuf. */ DMA_SYNC(desc->map, desc_len, BUS_DMASYNC_PREREAD); /* Set up the DMA descriptor. */ #ifdef __FreeBSD__ desc->address1 = ring->segs[0].ds_addr; #elif (defined(__NetBSD__) || defined(__OpenBSD__)) desc->address1 = desc->map->dm_segs[0].ds_addr; #elif defined(__bsdi__) desc->address1 = vtophys(m->m_data); /* Relax! BSD/OS only. */ #endif desc->length1 = desc_len>>1; desc->address2 = desc->address1 + desc->length1; desc->length2 = desc_len>>1; /* Before setting the OWNER bit, flush the cache (memory barrier). */ DMA_SYNC(ring->map, ring->size_descs, BUS_DMASYNC_PREWRITE); /* Commit the DMA descriptor to the hardware. */ desc->status = TLP_DSTS_OWNER; /* Notify the receiver that there is another buffer available. */ WRITE_CSR(TLP_RX_POLL, 1); return 1; /* did something */ } /* Clean up after a packet has been transmitted. */ /* Free the mbuf chain and update the DMA descriptor ring. */ static int /* BSD version */ txintr_cleanup(softc_t *sc) { struct desc_ring *ring = &sc->txring; struct dma_desc *desc; while ((ring->read != ring->write) && /* while ring is not empty */ ((ring->read->status & TLP_DSTS_OWNER) == 0)) { /* Read a DMA descriptor from the ring. */ desc = ring->read; /* Advance the ring read pointer. */ if (ring->read++ == ring->last) ring->read = ring->first; /* This is a no-op on most architectures. */ DMA_SYNC(desc->map, desc->length1 + desc->length2, BUS_DMASYNC_POSTWRITE); /* Unmap kernel virtual address to PCI address. */ bus_dmamap_unload(ring->tag, desc->map); /* If this descriptor is the last segment of a packet, */ /* then dequeue and free the corresponding mbuf chain. */ if ((desc->control & TLP_DCTL_TX_LAST_SEG) != 0) { struct mbuf *m; if ((m = mbuf_dequeue(ring)) == NULL) panic("%s: txintr_cleanup: expected an mbuf\n", NAME_UNIT); /* Include CRC and one flag byte in output byte count. */ sc->status.cntrs.obytes += m->m_pkthdr.len + sc->config.crc_len +1; sc->status.cntrs.opackets++; #if IFNET sc->ifp->if_opackets++; LMC_BPF_MTAP(m); #endif /* The only bad TX status is fifo underrun. */ if ((desc->status & TLP_DSTS_TX_UNDERRUN) != 0) sc->status.cntrs.fifo_under++; m_freem(m); return 1; /* did something */ } } return 0; } /* Build DMA descriptors for a transmit packet mbuf chain. */ static int /* 0=success; 1=error */ /* BSD version */ txintr_setup_mbuf(softc_t *sc, struct mbuf *m) { struct desc_ring *ring = &sc->txring; struct dma_desc *desc; unsigned int desc_len; /* build DMA descriptors for a chain of mbufs. */ while (m != NULL) { char *data = m->m_data; int length = m->m_len; /* zero length mbufs happen! */ /* Build DMA descriptors for one mbuf. */ while (length > 0) { int error; /* Ring is full if (wrap(write+1)==read) */ if (((ring->temp==ring->last) ? ring->first : ring->temp+1) == ring->read) { /* Not enough DMA descriptors; try later. */ for (; ring->temp!=ring->write; ring->temp = (ring->temp==ring->first)? ring->last : ring->temp-1) bus_dmamap_unload(ring->tag, ring->temp->map); sc->status.cntrs.txdma++; return 1; } /* Provisionally, write a descriptor into the ring. */ /* But don't change the REAL ring write pointer. */ /* Hardware won't see it until the OWNER bit is set. */ desc = ring->temp; /* Advance the temporary ring write pointer. */ if (ring->temp++ == ring->last) ring->temp = ring->first; /* Clear all control bits except the END_RING bit. */ desc->control &= TLP_DCTL_END_RING; /* Don't pad short packets up to 64 bytes. */ desc->control |= TLP_DCTL_TX_NO_PAD; /* Use Tulip's CRC-32 generator, if appropriate. */ if (sc->config.crc_len != CFG_CRC_32) desc->control |= TLP_DCTL_TX_NO_CRC; /* Set the OWNER bit, except in the first descriptor. */ if (desc != ring->write) desc->status = TLP_DSTS_OWNER; desc_len = (length > MAX_CHUNK_LEN) ? MAX_CHUNK_LEN : length; /* Map kernel virtual address to PCI address. */ if ((error = DMA_LOAD(desc->map, data, desc_len))) printf("%s: bus_dmamap_load(tx) failed; error %d\n", NAME_UNIT, error); /* Flush the cache and if bouncing, copy mbuf to bounce buf. */ DMA_SYNC(desc->map, desc_len, BUS_DMASYNC_PREWRITE); /* Prevent wild fetches if mapping fails (nsegs==0). */ desc->length1 = desc->length2 = 0; desc->address1 = desc->address2 = 0; #if (defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) { # ifdef __FreeBSD__ bus_dma_segment_t *segs = ring->segs; int nsegs = ring->nsegs; # elif (defined(__NetBSD__) || defined(__OpenBSD__)) bus_dma_segment_t *segs = desc->map->dm_segs; int nsegs = desc->map->dm_nsegs; # endif if (nsegs >= 1) { desc->address1 = segs[0].ds_addr; desc->length1 = segs[0].ds_len; } if (nsegs == 2) { desc->address2 = segs[1].ds_addr; desc->length2 = segs[1].ds_len; } } #elif defined(__bsdi__) desc->address1 = vtophys(data); /* Relax! BSD/OS only. */ desc->length1 = desc_len; #endif data += desc_len; length -= desc_len; } /* while (length > 0) */ m = m->m_next; } /* while (m != NULL) */ return 0; /* success */ } /* Setup (prepare) to transmit a packet. */ /* Select a packet, build DMA descriptors and give packet to hardware. */ /* If DMA descriptors run out, abandon the attempt and return 0. */ static int /* BSD version */ txintr_setup(softc_t *sc) { struct desc_ring *ring = &sc->txring; struct dma_desc *first_desc, *last_desc; /* Protect against half-up links: Don't transmit */ /* if the receiver can't hear the far end. */ if (sc->status.oper_status != STATUS_UP) return 0; /* Pick a packet to transmit. */ #if NETGRAPH if ((sc->ng_hook != NULL) && (sc->tx_mbuf == NULL)) { if (!IFQ_IS_EMPTY(&sc->ng_fastq)) IFQ_DEQUEUE(&sc->ng_fastq, sc->tx_mbuf); else IFQ_DEQUEUE(&sc->ng_sndq, sc->tx_mbuf); } else #endif if (sc->tx_mbuf == NULL) { if (sc->config.line_pkg == PKG_RAWIP) IFQ_DEQUEUE(&sc->ifp->if_snd, sc->tx_mbuf); else { #if NSPPP sc->tx_mbuf = sppp_dequeue(sc->ifp); #elif P2P if (!IFQ_IS_EMPTY(&sc->p2p->p2p_isnd)) IFQ_DEQUEUE(&sc->p2p->p2p_isnd, sc->tx_mbuf); else IFQ_DEQUEUE(&sc->ifp->if_snd, sc->tx_mbuf); #endif } } if (sc->tx_mbuf == NULL) return 0; /* no pkt to transmit */ /* Build DMA descriptors for an outgoing mbuf chain. */ ring->temp = ring->write; /* temporary ring write pointer */ if (txintr_setup_mbuf(sc, sc->tx_mbuf) != 0) return 0; /* Enqueue the mbuf; txintr_cleanup will free it. */ mbuf_enqueue(ring, sc->tx_mbuf); /* The transmitter has room for another packet. */ sc->tx_mbuf = NULL; /* Set first & last segment bits. */ /* last_desc is the desc BEFORE the one pointed to by ring->temp. */ first_desc = ring->write; first_desc->control |= TLP_DCTL_TX_FIRST_SEG; last_desc = (ring->temp==ring->first)? ring->last : ring->temp-1; last_desc->control |= TLP_DCTL_TX_LAST_SEG; /* Interrupt at end-of-transmission? Why bother the poor computer! */ /* last_desc->control |= TLP_DCTL_TX_INTERRUPT; */ /* Make sure the OWNER bit is not set in the next descriptor. */ /* The OWNER bit may have been set if a previous call aborted. */ ring->temp->status = 0; /* Commit the DMA descriptors to the software. */ ring->write = ring->temp; /* Before setting the OWNER bit, flush the cache (memory barrier). */ DMA_SYNC(ring->map, ring->size_descs, BUS_DMASYNC_PREWRITE); /* Commit the DMA descriptors to the hardware. */ first_desc->status = TLP_DSTS_OWNER; /* Notify the transmitter that there is another packet to send. */ WRITE_CSR(TLP_TX_POLL, 1); return 1; /* did something */ } #endif /* BSD */ #ifdef __linux__ /* NOTE: this is the LINUX version of the interrupt/DMA code, */ /* Singly-linked tail-queues hold sk_buffs with active DMA. * skbuffs are linked through their sk_buff.next field. * Callers must hold sc->bottom_lock; not otherwise locked. */ /* Put an skbuff on the tail of the descriptor ring queue. */ static void /* Linux version */ skbuff_enqueue(struct desc_ring *ring, struct sk_buff *skb) { skb->next = NULL; if (ring->tail == NULL) ring->head = skb; else ring->tail->next = skb; ring->tail = skb; } /* Get an skbuff from the head of the descriptor ring queue. */ static struct sk_buff* /* Linux version */ skbuff_dequeue(struct desc_ring *ring) { struct sk_buff *skb = ring->head; if (skb != NULL) if ((ring->head = skb->next) == NULL) ring->tail = NULL; return skb; } /* Initialize a DMA descriptor ring. */ static int /* Linux version */ create_ring(softc_t *sc, struct desc_ring *ring, int num_descs) { struct dma_desc *descs; int size_descs = sizeof(struct dma_desc)*num_descs; /* Allocate and map memory for DMA descriptor array. */ if ((descs = pci_alloc_consistent(sc->pci_dev, size_descs, &ring->dma_addr)) == NULL) { printk("%s: pci_alloc_consistent() failed\n", NAME_UNIT); return ENOMEM; } memset(descs, 0, size_descs); ring->read = descs; ring->write = descs; ring->first = descs; ring->last = descs + num_descs -1; ring->last->control = TLP_DCTL_END_RING; ring->num_descs = num_descs; ring->size_descs = size_descs; ring->head = NULL; ring->tail = NULL; return 0; } /* Destroy a DMA descriptor ring */ static void /* Linux version */ destroy_ring(softc_t *sc, struct desc_ring *ring) { struct sk_buff *skb; /* Free queued skbuffs. */ while ((skb = skbuff_dequeue(ring)) != NULL) dev_kfree_skb(skb); /* TX may have one pkt that is not on any queue. */ if (sc->tx_skb != NULL) { dev_kfree_skb(sc->tx_skb); sc->tx_skb = NULL; } if (ring->first != NULL) { /* Unmap active DMA descriptors. */ while (ring->read != ring->write) { pci_unmap_single(sc->pci_dev, ring->read->address1, ring->read->length1 + ring->read->length2, PCI_DMA_BIDIRECTIONAL); if (ring->read++ == ring->last) ring->read = ring->first; } /* Unmap and free memory for DMA descriptor array. */ pci_free_consistent(sc->pci_dev, ring->size_descs, ring->first, ring->dma_addr); } } static int /* Linux version */ rxintr_cleanup(softc_t *sc) { struct desc_ring *ring = &sc->rxring; struct dma_desc *first_desc, *last_desc; struct sk_buff *first_skb=NULL, *last_skb=NULL; struct sk_buff *new_skb; int pkt_len, desc_len; /* Input packet flow control (livelock prevention): */ /* Give pkts to higher levels only if quota is > 0. */ if (sc->quota <= 0) return 0; /* This looks complicated, but remember: packets up to 4032 */ /* bytes long fit in one skbuff and use one DMA descriptor. */ first_desc = last_desc = ring->read; /* ASSERTION: If there is a descriptor in the ring and the hardware has */ /* finished with it, then that descriptor will have RX_FIRST_DESC set. */ if ((ring->read != ring->write) && /* descriptor ring not empty */ ((ring->read->status & TLP_DSTS_OWNER) == 0) && /* hardware done */ ((ring->read->status & TLP_DSTS_RX_FIRST_DESC) == 0)) /* should be set */ panic("%s: rxintr_cleanup: rx-first-descriptor not set.\n", NAME_UNIT); /* First decide if a complete packet has arrived. */ /* Run down DMA descriptors looking for one marked "last". */ /* Bail out if an active descriptor is encountered. */ /* Accumulate most significant bits of packet length. */ pkt_len = 0; for (;;) { if (last_desc == ring->write) return 0; /* no more descs */ if (last_desc->status & TLP_DSTS_OWNER) return 0; /* still active */ if (last_desc->status & TLP_DSTS_RX_LAST_DESC) break; /* end of packet */ pkt_len += last_desc->length1 + last_desc->length2; /* entire desc filled */ if (last_desc++->control & TLP_DCTL_END_RING) last_desc = ring->first; /* ring wrap */ } /* A complete packet has arrived; how long is it? */ /* H/w ref man shows RX pkt length as a 14-bit field. */ /* An experiment found that only the 12 LSBs work. */ if (((last_desc->status>>16)&0xFFF) == 0) pkt_len += 4096; /* carry-bit */ pkt_len = (pkt_len & 0xF000) + ((last_desc->status>>16) & 0x0FFF); /* Subtract the CRC length unless doing so would underflow. */ if (pkt_len >= sc->config.crc_len) pkt_len -= sc->config.crc_len; /* Run down DMA descriptors again doing the following: * 1) put pkt info in hdr of first skbuff. * 2) put additional skbuffs on frag_list. * 3) set skbuff lengths. */ first_desc = ring->read; do { /* Read a DMA descriptor from the ring. */ last_desc = ring->read; /* Advance the ring read pointer. */ if (ring->read++ == ring->last) ring->read = ring->first; /* Dequeue the corresponding skbuff. */ new_skb = skbuff_dequeue(ring); if (new_skb == NULL) panic("%s: rxintr_cleanup: expected an skbuff\n", NAME_UNIT); desc_len = last_desc->length1 + last_desc->length2; /* Unmap kernel virtual addresss to PCI address. */ pci_unmap_single(sc->pci_dev, last_desc->address1, desc_len, PCI_DMA_FROMDEVICE); /* Set skbuff length. */ skb_put(new_skb, (pkt_len >= desc_len) ? desc_len : pkt_len); pkt_len -= new_skb->len; /* 1) Put pkt info in hdr of first skbuff. */ if (last_desc == first_desc) { first_skb = new_skb; if (sc->config.line_pkg == PKG_RAWIP) { if (first_skb->data[0]>>4 == 4) first_skb->protocol = htons(ETH_P_IP); else if (first_skb->data[0]>>4 == 6) first_skb->protocol = htons(ETH_P_IPV6); } else #if GEN_HDLC first_skb->protocol = hdlc_type_trans(first_skb, sc->net_dev); #else first_skb->protocol = htons(ETH_P_HDLC); #endif first_skb->mac.raw = first_skb->data; first_skb->dev = sc->net_dev; do_gettimeofday(&first_skb->stamp); sc->net_dev->last_rx = jiffies; } else /* 2) link skbuffs. */ { /* Put this skbuff on the frag_list of the first skbuff. */ new_skb->next = NULL; if (skb_shinfo(first_skb)->frag_list == NULL) skb_shinfo(first_skb)->frag_list = new_skb; else last_skb->next = new_skb; /* 3) set skbuff lengths. */ first_skb->len += new_skb->len; first_skb->data_len += new_skb->len; } last_skb = new_skb; } while ((last_desc->status & TLP_DSTS_RX_LAST_DESC) == 0); /* Decide whether to accept or to discard this packet. */ /* RxHDLC sets MIIERR for bad CRC, abort and partial byte at pkt end. */ if (((last_desc->status & TLP_DSTS_RX_BAD) == 0) && (sc->status.oper_status == STATUS_UP) && (first_skb->len > 0)) { /* Optimization: copy a small pkt into a small skbuff. */ if (first_skb->len <= COPY_BREAK) if ((new_skb = skb_copy(first_skb, GFP_ATOMIC)) != NULL) { dev_kfree_skb_any(first_skb); first_skb = new_skb; } /* Include CRC and one flag byte in input byte count. */ sc->status.cntrs.ibytes += first_skb->len + sc->config.crc_len +1; sc->status.cntrs.ipackets++; /* Give this good packet to the network stacks. */ netif_receive_skb(first_skb); /* NAPI */ sc->quota--; } else if (sc->status.oper_status != STATUS_UP) { /* If the link is down, this packet is probably noise. */ sc->status.cntrs.idiscards++; dev_kfree_skb_any(first_skb); if (DRIVER_DEBUG) printk("%s: rxintr_cleanup: rx pkt discarded: link down\n", NAME_UNIT); } else /* Log and discard this bad packet. */ { if (DRIVER_DEBUG) printk("%s: RX bad pkt; len=%d %s%s%s%s\n", NAME_UNIT, first_skb->len, (last_desc->status & TLP_DSTS_RX_MII_ERR) ? " miierr" : "", (last_desc->status & TLP_DSTS_RX_DRIBBLE) ? " dribble" : "", (last_desc->status & TLP_DSTS_RX_DESC_ERR) ? " descerr" : "", (last_desc->status & TLP_DSTS_RX_OVERRUN) ? " overrun" : ""); if (last_desc->status & TLP_DSTS_RX_OVERRUN) sc->status.cntrs.fifo_over++; else sc->status.cntrs.ierrors++; dev_kfree_skb_any(first_skb); } return 1; /* did something */ } /* Setup (prepare) to receive a packet. */ /* Try to keep the RX descriptor ring full of empty buffers. */ static int /* Linux version */ rxintr_setup(softc_t *sc) { struct desc_ring *ring = &sc->rxring; struct dma_desc *desc; struct sk_buff *skb; u_int32_t dma_addr; /* Ring is full if (wrap(write+1)==read) */ if (((ring->write == ring->last) ? ring->first : ring->write+1) == ring->read) return 0; /* ring is full; nothing to do */ /* Allocate an skbuff. */ if ((skb = dev_alloc_skb(MAX_DESC_LEN)) == NULL) { sc->status.cntrs.rxdma++; if (DRIVER_DEBUG) printk("%s: rxintr_setup: dev_alloc_skb() failed\n", NAME_UNIT); return 0; } skb->dev = sc->net_dev; /* Queue the skbuff for later processing by rxintr_cleanup. */ skbuff_enqueue(ring, skb); /* Write a DMA descriptor into the ring. */ /* Hardware won't see it until the OWNER bit is set. */ desc = ring->write; /* Advance the ring write pointer. */ if (ring->write++ == ring->last) ring->write = ring->first; /* Map kernel virtual addresses to PCI addresses. */ dma_addr = pci_map_single(sc->pci_dev, skb->data, MAX_DESC_LEN, PCI_DMA_FROMDEVICE); /* Set up the DMA descriptor. */ desc->address1 = dma_addr; desc->length1 = MAX_CHUNK_LEN; desc->address2 = desc->address1 + desc->length1; desc->length2 = MAX_CHUNK_LEN; /* Before setting the OWNER bit, flush the cache (memory barrier). */ wmb(); /* write memory barrier */ /* Commit the DMA descriptor to the hardware. */ desc->status = TLP_DSTS_OWNER; /* Notify the receiver that there is another buffer available. */ WRITE_CSR(TLP_RX_POLL, 1); return 1; /* did something */ } /* Clean up after a packet has been transmitted. */ /* Free the sk_buff and update the DMA descriptor ring. */ static int /* Linux version */ txintr_cleanup(softc_t *sc) { struct desc_ring *ring = &sc->txring; struct dma_desc *desc; while ((ring->read != ring->write) && /* ring is not empty */ ((ring->read->status & TLP_DSTS_OWNER) == 0)) { /* Read a DMA descriptor from the ring. */ desc = ring->read; /* Advance the ring read pointer. */ if (ring->read++ == ring->last) ring->read = ring->first; /* Unmap kernel virtual address to PCI address. */ pci_unmap_single(sc->pci_dev, desc->address1, desc->length1 + desc->length2, PCI_DMA_TODEVICE); /* If this descriptor is the last segment of a packet, */ /* then dequeue and free the corresponding skbuff. */ if ((desc->control & TLP_DCTL_TX_LAST_SEG) != 0) { struct sk_buff *skb; if ((skb = skbuff_dequeue(ring)) == NULL) panic("%s: txintr_cleanup: expected an sk_buff\n", NAME_UNIT); /* Include CRC and one flag byte in output byte count. */ sc->status.cntrs.obytes += skb->len + sc->config.crc_len +1; sc->status.cntrs.opackets++; /* The only bad TX status is fifo underrun. */ if ((desc->status & TLP_DSTS_TX_UNDERRUN) != 0) { sc->status.cntrs.fifo_under++; /* also increment oerrors? */ if (DRIVER_DEBUG) printk("%s: txintr_cleanup: tx fifo underrun\n", NAME_UNIT); } dev_kfree_skb_any(skb); return 1; /* did something */ } } return 0; } /* Build DMA descriptors for a tranmit packet fragment, */ /* Assertion: fragment is contiguous in physical memory. */ static int /* 0=success; 1=error */ /* linux version */ txintr_setup_frag(softc_t *sc, char *data, int length) { struct desc_ring *ring = &sc->txring; struct dma_desc *desc; unsigned int desc_len; u_int32_t dma_addr; while (length > 0) { /* Ring is full if (wrap(write+1)==read) */ if (((ring->temp==ring->last) ? ring->first : ring->temp+1) == ring->read) { /* Not enough DMA descriptors; try later. */ for (; ring->temp!=ring->write; ring->temp = (ring->temp==ring->first)? ring->last : ring->temp-1) pci_unmap_single(sc->pci_dev, ring->temp->address1, ring->temp->length1 + ring->temp->length2, PCI_DMA_FROMDEVICE); sc->status.cntrs.txdma++; return 1; } /* Provisionally, write a DMA descriptor into the ring. */ /* But don't change the REAL ring write pointer. */ /* Hardware won't see it until the OWNER bit is set. */ desc = ring->temp; /* Advance the temporary ring write pointer. */ if (ring->temp++ == ring->last) ring->temp = ring->first; /* Clear all control bits except the END_RING bit. */ desc->control &= TLP_DCTL_END_RING; /* Don't pad short packets up to 64 bytes */ desc->control |= TLP_DCTL_TX_NO_PAD; /* Use Tulip's CRC-32 generator, if appropriate. */ if (sc->config.crc_len != CFG_CRC_32) desc->control |= TLP_DCTL_TX_NO_CRC; /* Set the OWNER bit, except in the first descriptor. */ if (desc != ring->write) desc->status = TLP_DSTS_OWNER; desc_len = (length >= MAX_DESC_LEN) ? MAX_DESC_LEN : length; /* Map kernel virtual address to PCI address. */ dma_addr = pci_map_single(sc->pci_dev, data, desc_len, PCI_DMA_TODEVICE); /* If it will fit in one chunk, do so, otherwise split it. */ if (desc_len <= MAX_CHUNK_LEN) { desc->address1 = dma_addr; desc->length1 = desc_len; desc->address2 = 0; desc->length2 = 0; } else { desc->address1 = dma_addr; desc->length1 = desc_len>>1; desc->address2 = desc->address1 + desc->length1; desc->length2 = desc_len>>1; if (desc_len & 1) desc->length2++; } data += desc_len; length -= desc_len; } /* while (length > 0) */ return 0; /* success */ } /* NB: this procedure is recursive! */ static int /* 0=success; 1=error */ txintr_setup_skb(softc_t *sc, struct sk_buff *skb) { struct sk_buff *list; int i; /* First, handle the data in the skbuff itself. */ if (txintr_setup_frag(sc, skb->data, skb_headlen(skb))) return 1; /* Next, handle the VM pages in the Scatter/Gather list. */ if (skb_shinfo(skb)->nr_frags != 0) for (i=0; inr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (txintr_setup_frag(sc, page_address(frag->page) + frag->page_offset, frag->size)) return 1; } /* Finally, handle the skbuffs in the frag_list. */ if ((list = skb_shinfo(skb)->frag_list) != NULL) for (; list; list=list->next) if (txintr_setup_skb(sc, list)) /* recursive! */ return 1; return 0; } /* Setup (prepare) to transmit a packet. */ /* Select a packet, build DMA descriptors and give packet to hardware. */ /* If DMA descriptors run out, abandon the attempt and return 0. */ static int /* Linux version */ txintr_setup(softc_t *sc) { struct desc_ring *ring = &sc->txring; struct dma_desc *first_desc, *last_desc; /* Protect against half-up links: Don't transmit */ /* if the receiver can't hear the far end. */ if (sc->status.oper_status != STATUS_UP) return 0; /* Pick a packet to transmit. */ /* linux_start() puts packets in sc->tx_skb. */ if (sc->tx_skb == NULL) { if (netif_queue_stopped(sc->net_dev) != 0) netif_wake_queue(sc->net_dev); return 0; /* no pkt to transmit */ } /* Build DMA descriptors for an outgoing skbuff. */ ring->temp = ring->write; /* temporary ring write pointer */ if (txintr_setup_skb(sc, sc->tx_skb) != 0) return 0; /* Enqueue the skbuff; txintr_cleanup will free it. */ skbuff_enqueue(ring, sc->tx_skb); /* The transmitter has room for another packet. */ sc->tx_skb = NULL; /* Set first & last segment bits. */ /* last_desc is the desc BEFORE the one pointed to by ring->temp. */ first_desc = ring->write; first_desc->control |= TLP_DCTL_TX_FIRST_SEG; last_desc = (ring->temp==ring->first)? ring->last : ring->temp-1; last_desc->control |= TLP_DCTL_TX_LAST_SEG; /* Interrupt at end-of-transmission? Why bother the poor computer! */ /* last_desc->control |= TLP_DCTL_TX_INTERRUPT; */ /* Make sure the OWNER bit is not set in the next descriptor. */ /* The OWNER bit may have been set if a previous call aborted. */ ring->temp->status = 0; /* Commit the DMA descriptors to the software. */ ring->write = ring->temp; /* Before setting the OWNER bit, flush the cache (memory barrier). */ wmb(); /* write memory barrier */ /* Commit the DMA descriptors to the hardware. */ first_desc->status = TLP_DSTS_OWNER; /* Notify the transmitter that there is another packet to send. */ WRITE_CSR(TLP_TX_POLL, 1); sc->net_dev->trans_start = jiffies; return 1; /* did something */ } #endif /* __linux__ */ static void check_intr_status(softc_t *sc) { u_int32_t status, cfcs, op_mode; u_int32_t missed, overruns; /* Check for four unusual events: * 1) fatal PCI bus errors - some are recoverable * 2) transmitter FIFO underruns - increase fifo threshold * 3) receiver FIFO overruns - clear potential hangup * 4) no receive descs or bufs - count missed packets */ /* 1) A fatal bus error causes a Tulip to stop initiating bus cycles. */ /* Module unload/load or boot are the only fixes for Parity Errors. */ /* Master and Target Aborts can be cleared and life may continue. */ status = READ_CSR(TLP_STATUS); if ((status & TLP_STAT_FATAL_ERROR) != 0) { u_int32_t fatal = (status & TLP_STAT_FATAL_BITS)>>TLP_STAT_FATAL_SHIFT; printf("%s: FATAL PCI BUS ERROR: %s%s%s%s\n", NAME_UNIT, (fatal == 0) ? "PARITY ERROR" : "", (fatal == 1) ? "MASTER ABORT" : "", (fatal == 2) ? "TARGET ABORT" : "", (fatal >= 3) ? "RESERVED (?)" : ""); cfcs = READ_PCI_CFG(sc, TLP_CFCS); /* try to clear it */ cfcs &= ~(TLP_CFCS_MSTR_ABORT | TLP_CFCS_TARG_ABORT); WRITE_PCI_CFG(sc, TLP_CFCS, cfcs); } /* 2) If the transmitter fifo underruns, increase the transmit fifo */ /* threshold: the number of bytes required to be in the fifo */ /* before starting the transmitter (cost: increased tx delay). */ /* The TX_FSM must be stopped to change this parameter. */ if ((status & TLP_STAT_TX_UNDERRUN) != 0) { op_mode = READ_CSR(TLP_OP_MODE); /* enable store-and-forward mode if tx_threshold tops out? */ if ((op_mode & TLP_OP_TX_THRESH) < TLP_OP_TX_THRESH) { op_mode += 0x4000; /* increment TX_THRESH field; can't overflow */ WRITE_CSR(TLP_OP_MODE, op_mode & ~TLP_OP_TX_RUN); /* Wait for the TX FSM to stop; it might be processing a pkt. */ while (READ_CSR(TLP_STATUS) & TLP_STAT_TX_FSM); /* XXX HANG */ WRITE_CSR(TLP_OP_MODE, op_mode); /* restart tx */ if (DRIVER_DEBUG) printf("%s: tx underrun; tx fifo threshold now %d bytes\n", NAME_UNIT, 128<<((op_mode>>TLP_OP_TR_SHIFT)&3)); } } /* 3) Errata memo from Digital Equipment Corp warns that 21140A */ /* receivers through rev 2.2 can hang if the fifo overruns. */ /* Recommended fix: stop and start the RX FSM after an overrun. */ missed = READ_CSR(TLP_MISSED); if ((overruns = ((missed & TLP_MISS_OVERRUN)>>TLP_OVERRUN_SHIFT)) != 0) { if (DRIVER_DEBUG) printf("%s: rx overrun cntr=%d\n", NAME_UNIT, overruns); sc->status.cntrs.overruns += overruns; if ((READ_PCI_CFG(sc, TLP_CFRV) & 0xFF) <= 0x22) { op_mode = READ_CSR(TLP_OP_MODE); WRITE_CSR(TLP_OP_MODE, op_mode & ~TLP_OP_RX_RUN); /* Wait for the RX FSM to stop; it might be processing a pkt. */ while (READ_CSR(TLP_STATUS) & TLP_STAT_RX_FSM); /* XXX HANG */ WRITE_CSR(TLP_OP_MODE, op_mode); /* restart rx */ } } /* 4) When the receiver is enabled and a packet arrives, but no DMA */ /* descriptor is available, the packet is counted as 'missed'. */ /* The receiver should never miss packets; warn if it happens. */ if ((missed = (missed & TLP_MISS_MISSED)) != 0) { if (DRIVER_DEBUG) printf("%s: rx missed %d pkts\n", NAME_UNIT, missed); sc->status.cntrs.missed += missed; } } static void /* This is where the work gets done. */ core_interrupt(void *arg, int check_status) { softc_t *sc = arg; int activity; /* If any CPU is inside this critical section, then */ /* other CPUs should go away without doing anything. */ if (BOTTOM_TRYLOCK == 0) { sc->status.cntrs.lck_intr++; return; } /* Clear pending card interrupts. */ WRITE_CSR(TLP_STATUS, READ_CSR(TLP_STATUS)); /* In Linux, pci_alloc_consistent() means DMA descriptors */ /* don't need explicit syncing. */ #if BSD { struct desc_ring *ring = &sc->txring; DMA_SYNC(sc->txring.map, sc->txring.size_descs, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); ring = &sc->rxring; DMA_SYNC(sc->rxring.map, sc->rxring.size_descs, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } #endif do /* This is the main loop for interrupt processing. */ { activity = txintr_cleanup(sc); activity += txintr_setup(sc); activity += rxintr_cleanup(sc); activity += rxintr_setup(sc); } while (activity); #if BSD { struct desc_ring *ring = &sc->txring; DMA_SYNC(sc->txring.map, sc->txring.size_descs, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ring = &sc->rxring; DMA_SYNC(sc->rxring.map, sc->rxring.size_descs, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } #endif /* As the interrupt is dismissed, check for four unusual events. */ if (check_status) check_intr_status(sc); BOTTOM_UNLOCK; } /* user_interrupt() may be called from a syscall or a softirq */ static void user_interrupt(softc_t *sc, int check_status) { DISABLE_INTR; /* noop on FreeBSD-5 and Linux */ core_interrupt(sc, check_status); ENABLE_INTR; /* noop on FreeBSD-5 and Linux */ } #if BSD # if (defined(__FreeBSD__) && defined(DEVICE_POLLING)) /* Service the card from the kernel idle loop without interrupts. */ static void fbsd_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { softc_t *sc = IFP2SC(ifp); #if (__FreeBSD_version < 700000) if ((ifp->if_capenable & IFCAP_POLLING) == 0) { ether_poll_deregister(ifp); cmd = POLL_DEREGISTER; } if (cmd == POLL_DEREGISTER) { /* Last call -- reenable card interrupts. */ WRITE_CSR(TLP_INT_ENBL, TLP_INT_TXRX); return; } #endif sc->quota = count; core_interrupt(sc, (cmd==POLL_AND_CHECK_STATUS)); } # endif /* (__FreeBSD__ && DEVICE_POLLING) */ /* BSD kernels call this procedure when an interrupt happens. */ static intr_return_t bsd_interrupt(void *arg) { softc_t *sc = arg; /* Cut losses early if this is not our interrupt. */ if ((READ_CSR(TLP_STATUS) & TLP_INT_TXRX) == 0) return IRQ_NONE; # if (defined(__FreeBSD__) && defined(DEVICE_POLLING)) if (sc->ifp->if_capenable & IFCAP_POLLING) return IRQ_NONE; if ((sc->ifp->if_capabilities & IFCAP_POLLING) && (ether_poll_register(fbsd_poll, sc->ifp))) { WRITE_CSR(TLP_INT_ENBL, TLP_INT_DISABLE); return IRQ_NONE; } else sc->quota = sc->rxring.num_descs; /* input flow control */ # endif /* (__FreeBSD__ && DEVICE_POLLING) */ /* Disable card interrupts. */ WRITE_CSR(TLP_INT_ENBL, TLP_INT_DISABLE); core_interrupt(sc, 0); /* Enable card interrupts. */ WRITE_CSR(TLP_INT_ENBL, TLP_INT_TXRX); return IRQ_HANDLED; } #endif /* BSD */ /* Administrative status of the driver (UP or DOWN) has changed. */ /* A card-specific action may be required: T1 and T3 cards: no-op. */ /* HSSI and SSI cards change the state of modem ready signals. */ static void set_status(softc_t *sc, int status) { struct ioctl ioctl; ioctl.cmd = IOCTL_SET_STATUS; ioctl.data = status; sc->card->ioctl(sc, &ioctl); } #if P2P /* Callout from P2P: */ /* Get the state of DCD (Data Carrier Detect). */ static int p2p_getmdm(struct p2pcom *p2p, caddr_t result) { softc_t *sc = IFP2SC(&p2p->p2p_if); /* Non-zero isn't good enough; TIOCM_CAR is 0x40. */ *(int *)result = (sc->status.oper_status==STATUS_UP) ? TIOCM_CAR : 0; return 0; } /* Callout from P2P: */ /* Set the state of DTR (Data Terminal Ready). */ static int p2p_mdmctl(struct p2pcom *p2p, int flag) { softc_t *sc = IFP2SC(&p2p->p2p_if); set_status(sc, flag); return 0; } #endif /* P2P */ #if NSPPP # ifndef PP_FR # define PP_FR 0 # endif /* Callout from SPPP: */ static void sppp_tls(struct sppp *sppp) { # ifdef __FreeBSD__ if (!(sppp->pp_mode & IFF_LINK2) && !(sppp->pp_flags & PP_FR)) # elif defined(__NetBSD__) || defined(__OpenBSD__) if (!(sppp->pp_flags & PP_CISCO)) # endif sppp->pp_up(sppp); } /* Callout from SPPP: */ static void sppp_tlf(struct sppp *sppp) { # ifdef __FreeBSD__ if (!(sppp->pp_mode & IFF_LINK2) && !(sppp->pp_flags & PP_FR)) # elif defined(__NetBSD__) || defined(__OpenBSD__) if (!(sppp->pp_flags & PP_CISCO)) # endif sppp->pp_down(sppp); } #endif /* NSPPP */ /* Configure line protocol stuff. * Called by attach_card() during module init. * Called by core_ioctl() when lmcconfig writes sc->config. * Called by detach_card() during module shutdown. */ static void config_proto(softc_t *sc, struct config *config) { /* Use line protocol stack instead of RAWIP mode. */ if ((sc->config.line_pkg == PKG_RAWIP) && (config->line_pkg != PKG_RAWIP)) { #if NSPPP LMC_BPF_DETACH; sppp_attach(sc->ifp); LMC_BPF_ATTACH(DLT_PPP, 4); sc->sppp->pp_tls = sppp_tls; sc->sppp->pp_tlf = sppp_tlf; /* Force reconfiguration of SPPP params. */ sc->config.line_prot = 0; sc->config.keep_alive = config->keep_alive ? 0:1; #elif P2P int error = 0; sc->p2p->p2p_proto = 0; /* force p2p_attach */ if ((error = p2p_attach(sc->p2p))) /* calls bpfattach() */ { printf("%s: p2p_attach() failed; error %d\n", NAME_UNIT, error); config->line_pkg = PKG_RAWIP; /* still in RAWIP mode */ } else { sc->p2p->p2p_mdmctl = p2p_mdmctl; /* set DTR */ sc->p2p->p2p_getmdm = p2p_getmdm; /* get DCD */ } #elif GEN_HDLC int error = 0; sc->net_dev->mtu = HDLC_MAX_MTU; if ((error = hdlc_open(sc->net_dev))) { printf("%s: hdlc_open() failed; error %d\n", NAME_UNIT, error); printf("%s: Try 'sethdlc %s ppp'\n", NAME_UNIT, NAME_UNIT); config->line_pkg = PKG_RAWIP; /* still in RAWIP mode */ } #else /* no line protocol stack was configured */ config->line_pkg = PKG_RAWIP; /* still in RAWIP mode */ #endif } /* Bypass line protocol stack and return to RAWIP mode. */ if ((sc->config.line_pkg != PKG_RAWIP) && (config->line_pkg == PKG_RAWIP)) { #if NSPPP LMC_BPF_DETACH; sppp_flush(sc->ifp); sppp_detach(sc->ifp); setup_ifnet(sc->ifp); LMC_BPF_ATTACH(DLT_RAW, 0); #elif P2P int error = 0; if_qflush(&sc->p2p->p2p_isnd); if ((error = p2p_detach(sc->p2p))) { printf("%s: p2p_detach() failed; error %d\n", NAME_UNIT, error); printf("%s: Try 'ifconfig %s down -remove'\n", NAME_UNIT, NAME_UNIT); config->line_pkg = PKG_P2P; /* not in RAWIP mode; still attached to P2P */ } else { setup_ifnet(sc->ifp); LMC_BPF_ATTACH(DLT_RAW, 0); } #elif GEN_HDLC hdlc_proto_detach(sc->hdlc_dev); hdlc_close(sc->net_dev); setup_netdev(sc->net_dev); #endif } #if NSPPP if (config->line_pkg != PKG_RAWIP) { /* Check for change to PPP protocol. */ if ((sc->config.line_prot != PROT_PPP) && (config->line_prot == PROT_PPP)) { LMC_BPF_DETACH; # if (defined(__NetBSD__) || defined(__OpenBSD__)) sc->sppp->pp_flags &= ~PP_CISCO; # elif defined(__FreeBSD__) sc->ifp->if_flags &= ~IFF_LINK2; sc->sppp->pp_flags &= ~PP_FR; # endif LMC_BPF_ATTACH(DLT_PPP, 4); sppp_ioctl(sc->ifp, SIOCSIFFLAGS, NULL); } # ifndef DLT_C_HDLC # define DLT_C_HDLC DLT_PPP # endif /* Check for change to C_HDLC protocol. */ if ((sc->config.line_prot != PROT_C_HDLC) && (config->line_prot == PROT_C_HDLC)) { LMC_BPF_DETACH; # if (defined(__NetBSD__) || defined(__OpenBSD__)) sc->sppp->pp_flags |= PP_CISCO; # elif defined(__FreeBSD__) sc->ifp->if_flags |= IFF_LINK2; sc->sppp->pp_flags &= ~PP_FR; # endif LMC_BPF_ATTACH(DLT_C_HDLC, 4); sppp_ioctl(sc->ifp, SIOCSIFFLAGS, NULL); } /* Check for change to Frame Relay protocol. */ if ((sc->config.line_prot != PROT_FRM_RLY) && (config->line_prot == PROT_FRM_RLY)) { LMC_BPF_DETACH; # if (defined(__NetBSD__) || defined(__OpenBSD__)) sc->sppp->pp_flags &= ~PP_CISCO; # elif defined(__FreeBSD__) sc->ifp->if_flags &= ~IFF_LINK2; sc->sppp->pp_flags |= PP_FR; # endif LMC_BPF_ATTACH(DLT_FRELAY, 4); sppp_ioctl(sc->ifp, SIOCSIFFLAGS, NULL); } /* Check for disabling keep-alives. */ if ((sc->config.keep_alive != 0) && (config->keep_alive == 0)) sc->sppp->pp_flags &= ~PP_KEEPALIVE; /* Check for enabling keep-alives. */ if ((sc->config.keep_alive == 0) && (config->keep_alive != 0)) sc->sppp->pp_flags |= PP_KEEPALIVE; } #endif /* NSPPP */ /* Loop back through the TULIP Ethernet chip; (no CRC). */ /* Data sheet says stop DMA before changing OPMODE register. */ /* But that's not as simple as it sounds; works anyway. */ /* Check for enabling loopback thru Tulip chip. */ if ((sc->config.loop_back != CFG_LOOP_TULIP) && (config->loop_back == CFG_LOOP_TULIP)) { u_int32_t op_mode = READ_CSR(TLP_OP_MODE); op_mode |= TLP_OP_INT_LOOP; WRITE_CSR(TLP_OP_MODE, op_mode); config->crc_len = CFG_CRC_0; } /* Check for disabling loopback thru Tulip chip. */ if ((sc->config.loop_back == CFG_LOOP_TULIP) && (config->loop_back != CFG_LOOP_TULIP)) { u_int32_t op_mode = READ_CSR(TLP_OP_MODE); op_mode &= ~TLP_OP_LOOP_MODE; WRITE_CSR(TLP_OP_MODE, op_mode); config->crc_len = CFG_CRC_16; } } /* This is the core ioctl procedure. */ /* It handles IOCTLs from lmcconfig(8). */ /* It must not run when card watchdogs run. */ /* Called from a syscall (user context; no spinlocks). */ /* This procedure can SLEEP. */ static int core_ioctl(softc_t *sc, u_long cmd, caddr_t data) { struct iohdr *iohdr = (struct iohdr *) data; struct ioctl *ioctl = (struct ioctl *) data; struct status *status = (struct status *) data; struct config *config = (struct config *) data; int error = 0; /* All structs start with a string and a cookie. */ if (((struct iohdr *)data)->cookie != NGM_LMC_COOKIE) return EINVAL; while (TOP_TRYLOCK == 0) { sc->status.cntrs.lck_ioctl++; SLEEP(10000); /* yield? */ } switch (cmd) { case LMCIOCGSTAT: { *status = sc->status; iohdr->cookie = NGM_LMC_COOKIE; break; } case LMCIOCGCFG: { *config = sc->config; iohdr->cookie = NGM_LMC_COOKIE; break; } case LMCIOCSCFG: { if ((error = CHECK_CAP)) break; config_proto(sc, config); sc->config = *config; sc->card->config(sc); break; } case LMCIOCREAD: { if (ioctl->cmd == IOCTL_RW_PCI) { if (ioctl->address > 252) { error = EFAULT; break; } ioctl->data = READ_PCI_CFG(sc, ioctl->address); } else if (ioctl->cmd == IOCTL_RW_CSR) { if (ioctl->address > 15) { error = EFAULT; break; } ioctl->data = READ_CSR(ioctl->address*TLP_CSR_STRIDE); } else if (ioctl->cmd == IOCTL_RW_SROM) { if (ioctl->address > 63) { error = EFAULT; break; } ioctl->data = read_srom(sc, ioctl->address); } else if (ioctl->cmd == IOCTL_RW_BIOS) ioctl->data = read_bios(sc, ioctl->address); else if (ioctl->cmd == IOCTL_RW_MII) ioctl->data = read_mii(sc, ioctl->address); else if (ioctl->cmd == IOCTL_RW_FRAME) ioctl->data = read_framer(sc, ioctl->address); else error = EINVAL; break; } case LMCIOCWRITE: { if ((error = CHECK_CAP)) break; if (ioctl->cmd == IOCTL_RW_PCI) { if (ioctl->address > 252) { error = EFAULT; break; } WRITE_PCI_CFG(sc, ioctl->address, ioctl->data); } else if (ioctl->cmd == IOCTL_RW_CSR) { if (ioctl->address > 15) { error = EFAULT; break; } WRITE_CSR(ioctl->address*TLP_CSR_STRIDE, ioctl->data); } else if (ioctl->cmd == IOCTL_RW_SROM) { if (ioctl->address > 63) { error = EFAULT; break; } write_srom(sc, ioctl->address, ioctl->data); /* can sleep */ } else if (ioctl->cmd == IOCTL_RW_BIOS) { if (ioctl->address == 0) erase_bios(sc); write_bios(sc, ioctl->address, ioctl->data); /* can sleep */ } else if (ioctl->cmd == IOCTL_RW_MII) write_mii(sc, ioctl->address, ioctl->data); else if (ioctl->cmd == IOCTL_RW_FRAME) write_framer(sc, ioctl->address, ioctl->data); else if (ioctl->cmd == IOCTL_WO_SYNTH) write_synth(sc, (struct synth *)&ioctl->data); else if (ioctl->cmd == IOCTL_WO_DAC) { write_dac(sc, 0x9002); /* set Vref = 2.048 volts */ write_dac(sc, ioctl->data & 0xFFF); } else error = EINVAL; break; } case LMCIOCTL: { if ((error = CHECK_CAP)) break; if (ioctl->cmd == IOCTL_XILINX_RESET) { reset_xilinx(sc); sc->card->config(sc); } else if (ioctl->cmd == IOCTL_XILINX_ROM) { load_xilinx_from_rom(sc); /* can sleep */ sc->card->config(sc); } else if (ioctl->cmd == IOCTL_XILINX_FILE) { /* load_xilinx_from_file() can sleep. */ error = load_xilinx_from_file(sc, ioctl->ucode, ioctl->data); if (error != 0) load_xilinx_from_rom(sc); /* try the rom */ sc->card->config(sc); set_status(sc, (error==0)); /* XXX */ } else if (ioctl->cmd == IOCTL_RESET_CNTRS) { memset(&sc->status.cntrs, 0, sizeof(struct event_cntrs)); microtime(&sc->status.cntrs.reset_time); } else error = sc->card->ioctl(sc, ioctl); /* can sleep */ break; } default: error = EINVAL; break; } TOP_UNLOCK; return error; } /* This is the core watchdog procedure. */ /* It calculates link speed, and calls the card-specific watchdog code. */ /* Calls interrupt() in case one got lost; also kick-starts the device. */ /* ioctl syscalls and card watchdog routines must be interlocked. */ /* This procedure must not sleep. */ static void core_watchdog(softc_t *sc) { /* Read and restart the Tulip timer. */ u_int32_t tx_speed = READ_CSR(TLP_TIMER); WRITE_CSR(TLP_TIMER, 0xFFFF); /* Measure MII clock using a timer in the Tulip chip. * This timer counts transmitter bits divided by 4096. * Since this is called once a second the math is easy. * This is only correct when the link is NOT sending pkts. * On a fully-loaded link, answer will be HALF actual rate. * Clock rate during pkt is HALF clk rate between pkts. * Measuring clock rate really measures link utilization! */ sc->status.tx_speed = (0xFFFF - (tx_speed & 0xFFFF)) << 12; /* The first status reset time is when the calendar clock is set. */ if (sc->status.cntrs.reset_time.tv_sec < 1000) microtime(&sc->status.cntrs.reset_time); /* Update hardware (operational) status. */ /* Call the card-specific watchdog routines. */ if (TOP_TRYLOCK != 0) { sc->status.oper_status = sc->card->watchdog(sc); /* Increment a counter which tells user-land */ /* observers that SNMP state has been updated. */ sc->status.ticks++; TOP_UNLOCK; } else sc->status.cntrs.lck_watch++; /* In case an interrupt gets lost... */ user_interrupt(sc, 1); } #if IFNET /* Called from a syscall (user context; no spinlocks). */ static int raw_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (cmd) { # if (defined(__FreeBSD__) && defined(DEVICE_POLLING)) /* XXX necessary? */ case SIOCSIFCAP: # endif case SIOCSIFDSTADDR: case SIOCAIFADDR: case SIOCSIFFLAGS: #if 0 case SIOCADDMULTI: case SIOCDELMULTI: break; #endif case SIOCSIFADDR: ifp->if_flags |= IFF_UP; /* a Unix tradition */ break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; break; default: error = EINVAL; break; } return error; } /* Called from a syscall (user context; no spinlocks). */ static int ifnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { softc_t *sc = IFP2SC(ifp); # ifdef __OpenBSD__ struct ifreq *ifr = (struct ifreq *) data; # endif int error = 0; switch (cmd) { /* Catch the IOCTLs used by lmcconfig. */ case LMCIOCGSTAT: case LMCIOCGCFG: case LMCIOCSCFG: case LMCIOCREAD: case LMCIOCWRITE: case LMCIOCTL: error = core_ioctl(sc, cmd, data); break; # ifdef __OpenBSD__ /* Catch the IOCTLs used by ifconfig. */ case SIOCSIFMEDIA: if ((error = CHECK_CAP)) break; case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->ifm, cmd); break; case SIOCSIFTIMESLOT: if ((error = CHECK_CAP)) break; if (sc->status.card_type == TLP_CSID_T1E1) { struct config config = sc->config; if ((error = copyin(ifr->ifr_data, &config.time_slots, sizeof config.time_slots))) break; config.iohdr.cookie = NGM_LMC_COOKIE; error = core_ioctl(sc, LMCIOCSCFG, (caddr_t)&config); } else error = EINVAL; break; case SIOCGIFTIMESLOT: if (sc->status.card_type == TLP_CSID_T1E1) error = copyout(&sc->config.time_slots, ifr->ifr_data, sizeof sc->config.time_slots); else error = EINVAL; break; # endif /* Pass the rest to the line protocol. */ default: if (sc->config.line_pkg == PKG_RAWIP) error = raw_ioctl(ifp, cmd, data); else # if NSPPP error = sppp_ioctl(ifp, cmd, data); # elif P2P error = p2p_ioctl(ifp, cmd, data); # else error = EINVAL; # endif break; } if (DRIVER_DEBUG && (error!=0)) printf("%s: ifnet_ioctl; cmd=0x%08lx error=%d\n", NAME_UNIT, cmd, error); return error; } /* Called from a syscall (user context; no spinlocks). */ static void ifnet_start(struct ifnet *ifp) { softc_t *sc = IFP2SC(ifp); /* Start the transmitter; incoming pkts are NOT processed. */ user_interrupt(sc, 0); } /* sppp and p2p replace this with their own proc. */ /* RAWIP mode is the only time this is used. */ /* Called from a syscall (user context; no spinlocks). */ static int raw_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt) { softc_t *sc = IFP2SC(ifp); int error = 0; /* Fail if the link is down. */ if (sc->status.oper_status != STATUS_UP) { m_freem(m); sc->status.cntrs.odiscards++; if (DRIVER_DEBUG) printf("%s: raw_output: tx pkt discarded: link down\n", NAME_UNIT); return ENETDOWN; } # if NETGRAPH /* Netgraph has priority over the ifnet kernel interface. */ if (sc->ng_hook != NULL) { m_freem(m); sc->status.cntrs.odiscards++; if (DRIVER_DEBUG) printf("%s: raw_output: tx pkt discarded: netgraph active\n", NAME_UNIT); return EBUSY; } # endif /* raw_output() ENQUEUEs in a syscall or softirq. */ /* txintr_setup() DEQUEUEs in a hard interrupt. */ /* Some BSD QUEUE routines are not interrupt-safe. */ { DISABLE_INTR; # if (__FreeBSD_version >= 503000) IFQ_ENQUEUE(&ifp->if_snd, m, error); # else IFQ_ENQUEUE(&ifp->if_snd, m, NULL, error); # endif ENABLE_INTR; } if (error==0) user_interrupt(sc, 0); /* start the transmitter */ else { m_freem(m); sc->status.cntrs.odiscards++; if (DRIVER_DEBUG) printf("%s: raw_output: IFQ_ENQUEUE() failed; error %d\n", NAME_UNIT, error); } return error; } /* Called from a softirq once a second. */ static void ifnet_watchdog(struct ifnet *ifp) { softc_t *sc = IFP2SC(ifp); u_int8_t old_oper_status = sc->status.oper_status; struct event_cntrs *cntrs = &sc->status.cntrs; core_watchdog(sc); /* updates oper_status */ #if NETGRAPH if (sc->ng_hook != NULL) { sc->status.line_pkg = PKG_NG; sc->status.line_prot = 0; } else #endif if (sc->config.line_pkg == PKG_RAWIP) { sc->status.line_pkg = PKG_RAWIP; sc->status.line_prot = PROT_IP_HDLC; } else { # if P2P /* Notice change in link status. */ if ((old_oper_status != sc->status.oper_status) && (sc->p2p->p2p_modem)) (*sc->p2p->p2p_modem)(sc->p2p, sc->status.oper_status==STATUS_UP); /* Notice change in line protocol. */ sc->status.line_pkg = PKG_P2P; switch (sc->ifp->if_type) { case IFT_PPP: sc->status.line_prot = PROT_PPP; break; case IFT_PTPSERIAL: sc->status.line_prot = PROT_C_HDLC; break; case IFT_FRELAY: sc->status.line_prot = PROT_FRM_RLY; break; default: sc->status.line_prot = 0; break; } # elif NSPPP /* Notice change in link status. */ if ((old_oper_status != STATUS_UP) && (sc->status.oper_status == STATUS_UP)) /* link came up */ sppp_tls(sc->sppp); if ((old_oper_status == STATUS_UP) && (sc->status.oper_status != STATUS_UP)) /* link went down */ sppp_tlf(sc->sppp); /* Notice change in line protocol. */ sc->status.line_pkg = PKG_SPPP; # ifdef __FreeBSD__ if (sc->sppp->pp_flags & PP_FR) sc->status.line_prot = PROT_FRM_RLY; else if (sc->ifp->if_flags & IFF_LINK2) # elif (defined(__NetBSD__) || defined(__OpenBSD__)) if (sc->sppp->pp_flags & PP_CISCO) # endif sc->status.line_prot = PROT_C_HDLC; else sc->status.line_prot = PROT_PPP; # else /* Suppress compiler warning. */ if (old_oper_status == STATUS_UP); # endif } /* Copy statistics from sc to ifp. */ ifp->if_baudrate = sc->status.tx_speed; ifp->if_ipackets = cntrs->ipackets; ifp->if_opackets = cntrs->opackets; ifp->if_ibytes = cntrs->ibytes; ifp->if_obytes = cntrs->obytes; ifp->if_ierrors = cntrs->ierrors; ifp->if_oerrors = cntrs->oerrors; ifp->if_iqdrops = cntrs->idiscards; # if ((__FreeBSD_version >= 500000) || defined(__OpenBSD__) || defined(__NetBSD__)) if (sc->status.oper_status == STATUS_UP) ifp->if_link_state = LINK_STATE_UP; else ifp->if_link_state = LINK_STATE_DOWN; # endif /* Call this procedure again after one second. */ ifp->if_timer = 1; } # ifdef __OpenBSD__ /* Callback from ifmedia. */ static int ifmedia_change(struct ifnet *ifp) { softc_t *sc = IFP2SC(ifp); struct config config = sc->config; int media = sc->ifm.ifm_media; int error; /* ifconfig lmc0 media t1 */ if (sc->status.card_type == TLP_CSID_T3) { if ((media & IFM_TMASK) == IFM_TDM_T3) config.format = CFG_FORMAT_T3CPAR; else if ((media & IFM_TMASK) == IFM_TDM_T3_M13) config.format = CFG_FORMAT_T3M13; } else if (sc->status.card_type == TLP_CSID_T1E1) { if ((media & IFM_TMASK) == IFM_TDM_T1) config.format = CFG_FORMAT_T1ESF; else if ((media & IFM_TMASK) == IFM_TDM_T1_AMI) config.format = CFG_FORMAT_T1SF; else if ((media & IFM_TMASK) == IFM_TDM_E1) config.format = CFG_FORMAT_E1NONE; else if ((media & IFM_TMASK) == IFM_TDM_E1_G704) config.format = CFG_FORMAT_E1FASCRC; } /* ifconfig lmc0 mediaopt loopback */ if (media & IFM_LOOP) config.loop_back = CFG_LOOP_TULIP; else config.loop_back = CFG_LOOP_NONE; /* ifconfig lmc0 mediaopt crc16 */ if (media & IFM_TDM_HDLC_CRC16) config.crc_len = CFG_CRC_16; else config.crc_len = CFG_CRC_32; /* Set ConFiGuration. */ config.iohdr.cookie = NGM_LMC_COOKIE; error = core_ioctl(sc, LMCIOCSCFG, (caddr_t)&config); return error; } /* Callback from ifmedia. */ static void ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr) { softc_t *sc = IFP2SC(ifp); /* ifconfig wants to know if the hardware link is up. */ ifmr->ifm_status = IFM_AVALID; if (sc->status.oper_status == STATUS_UP) ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active = sc->ifm.ifm_cur->ifm_media; if (sc->config.loop_back != CFG_LOOP_NONE) ifmr->ifm_active |= IFM_LOOP; if (sc->config.crc_len == CFG_CRC_16) ifmr->ifm_active |= IFM_TDM_HDLC_CRC16; } # endif /* __OpenBSD__ */ static void setup_ifnet(struct ifnet *ifp) { softc_t *sc = ifp->if_softc; /* Initialize the generic network interface. */ /* Note similarity to linux's setup_netdev(). */ ifp->if_flags = IFF_POINTOPOINT; ifp->if_flags |= IFF_RUNNING; ifp->if_ioctl = ifnet_ioctl; ifp->if_start = ifnet_start; /* sppp changes this */ ifp->if_output = raw_output; /* sppp & p2p change this */ ifp->if_input = raw_input; ifp->if_watchdog = ifnet_watchdog; ifp->if_timer = 1; ifp->if_mtu = MAX_DESC_LEN; /* sppp & p2p change this */ ifp->if_type = IFT_PTPSERIAL; /* p2p changes this */ # if (defined(__FreeBSD__) && defined(DEVICE_POLLING)) ifp->if_capabilities |= IFCAP_POLLING; # if (__FreeBSD_version < 500000) ifp->if_capenable |= IFCAP_POLLING; # endif # endif /* Every OS does it differently! */ # if (defined(__FreeBSD__) && (__FreeBSD_version < 502000)) (const char *)ifp->if_name = device_get_name(sc->dev); ifp->if_unit = device_get_unit(sc->dev); # elif (__FreeBSD_version >= 502000) if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); # elif defined(__NetBSD__) strcpy(ifp->if_xname, sc->dev.dv_xname); # elif __OpenBSD__ bcopy(sc->dev.dv_xname, ifp->if_xname, IFNAMSIZ); # elif defined(__bsdi__) ifp->if_name = sc->dev.dv_cfdata->cf_driver->cd_name; ifp->if_unit = sc->dev.dv_unit; # endif } static int ifnet_attach(softc_t *sc) { # if (__FreeBSD_version >= 600000) sc->ifp = if_alloc(NSPPP ? IFT_PPP : IFT_OTHER); if (sc->ifp == NULL) return ENOMEM; # endif # if NSPPP # if (__FreeBSD_version >= 600000) sc->sppp = sc->ifp->if_l2com; # else sc->ifp = &sc->spppcom.pp_if; sc->sppp = &sc->spppcom; # endif # elif P2P sc->ifp = &sc->p2pcom.p2p_if; sc->p2p = &sc->p2pcom; # elif (__FreeBSD_version < 600000) sc->ifp = &sc->ifnet; # endif /* Initialize the network interface struct. */ sc->ifp->if_softc = sc; setup_ifnet(sc->ifp); /* ALTQ output queue initialization. */ IFQ_SET_MAXLEN(&sc->ifp->if_snd, SNDQ_MAXLEN); IFQ_SET_READY(&sc->ifp->if_snd); /* Attach to the ifnet kernel interface. */ if_attach(sc->ifp); # if ((defined(__NetBSD__) && __NetBSD_Version__ >= 106000000) || \ (defined(__OpenBSD__) && OpenBSD >= 200211)) if_alloc_sadl(sc->ifp); # endif /* Attach Berkeley Packet Filter. */ LMC_BPF_ATTACH(DLT_RAW, 0); # ifdef __OpenBSD__ /* Initialize ifmedia mechanism. */ ifmedia_init(&sc->ifm, IFM_OMASK | IFM_GMASK | IFM_IMASK, ifmedia_change, ifmedia_status); if (sc->status.card_type == TLP_CSID_T3) { ifmedia_add(&sc->ifm, IFM_TDM | IFM_TDM_T3, 0, NULL); ifmedia_add(&sc->ifm, IFM_TDM | IFM_TDM_T3_M13, 0, NULL); ifmedia_set(&sc->ifm, IFM_TDM | IFM_TDM_T3); } else if (sc->status.card_type == TLP_CSID_T1E1) { ifmedia_add(&sc->ifm, IFM_TDM | IFM_TDM_T1, 0, NULL); ifmedia_add(&sc->ifm, IFM_TDM | IFM_TDM_T1_AMI, 0, NULL); ifmedia_add(&sc->ifm, IFM_TDM | IFM_TDM_E1, 0, NULL); ifmedia_add(&sc->ifm, IFM_TDM | IFM_TDM_E1_G704, 0, NULL); ifmedia_set(&sc->ifm, IFM_TDM | IFM_TDM_T1); } else if ((sc->status.card_type == TLP_CSID_HSSI) || (sc->status.card_type == TLP_CSID_SSI)) { ifmedia_add(&sc->ifm, IFM_TDM | IFM_NONE, 0, NULL); ifmedia_set(&sc->ifm, IFM_TDM | IFM_NONE); } # endif /* __OpenBSD__ */ return 0; } static void ifnet_detach(softc_t *sc) { # ifdef __OpenBSD__ ifmedia_delete_instance(&sc->ifm, IFM_INST_ANY); # endif # if (defined(__FreeBSD__) && defined(DEVICE_POLLING)) if (sc->ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(sc->ifp); # endif /* Detach Berkeley Packet Filter. */ LMC_BPF_DETACH; # if ((defined(__NetBSD__) && __NetBSD_Version__ >= 106000000) || \ (defined(__OpenBSD__) && OpenBSD >= 200211)) if_free_sadl(sc->ifp); # endif /* Detach from the ifnet kernel interface. */ if_detach(sc->ifp); # if (__FreeBSD_version >= 600000) if_free_type(sc->ifp, NSPPP ? IFT_PPP : IFT_OTHER); # endif } #endif /* IFNET */ #if NETGRAPH /* Netgraph changed significantly between FreeBSD-4 and -5. */ /* These are backward compatibility hacks for FreeBSD-4. */ # if (__FreeBSD_version >= 500000) /* These next two macros should be added to netgraph */ # define NG_TYPE_REF(type) atomic_add_int(&(type)->refs, 1) # define NG_TYPE_UNREF(type) \ do { \ if ((type)->refs == 1) \ ng_rmtype(type); \ else \ atomic_subtract_int(&(type)->refs, 1); \ } while (0) # else /* FreeBSD-4 */ # define NGI_GET_MSG(item, msg) /* nothing */ # define NG_HOOK_FORCE_QUEUE(hook) /* nothing */ # define NG_TYPE_REF(type) atomic_add_int(&(type)->refs, 1) # define NG_TYPE_UNREF(type) \ do { \ if ((type)->refs == 1) \ LIST_REMOVE(type, types); \ else \ atomic_subtract_int(&(type)->refs, 1); \ } while (0) # endif /* It is an error to construct new copies of this Netgraph node. */ /* All instances are constructed by ng_attach and are persistent. */ # if (__FreeBSD_version >= 500000) static int ng_constructor(node_p node) { return EINVAL; } # else /* FreeBSD-4 */ static int ng_constructor(node_p *node) { return EINVAL; } # endif /* Incoming Netgraph control message. */ # if (__FreeBSD_version >= 500000) static int ng_rcvmsg(node_p node, item_p item, hook_p lasthook) { struct ng_mesg *msg; # else /* FreeBSD-4 */ static int ng_rcvmsg(node_p node, struct ng_mesg *msg, const char *retaddr, struct ng_mesg **rptr) { # endif struct ng_mesg *resp = NULL; softc_t *sc = NG_NODE_PRIVATE(node); int error = 0; NGI_GET_MSG(item, msg); if (msg->header.typecookie == NGM_LMC_COOKIE) { switch (msg->header.cmd) { case LMCIOCGSTAT: case LMCIOCGCFG: case LMCIOCSCFG: case LMCIOCREAD: case LMCIOCWRITE: case LMCIOCTL: { /* Call the core ioctl procedure. */ error = core_ioctl(sc, msg->header.cmd, msg->data); if ((msg->header.cmd & IOC_OUT) != 0) { /* synchronous response */ NG_MKRESPONSE(resp, msg, sizeof(struct ng_mesg) + IOCPARM_LEN(msg->header.cmd), M_NOWAIT); if (resp == NULL) error = ENOMEM; else memcpy(resp->data, msg->data, IOCPARM_LEN(msg->header.cmd)); } break; } default: error = EINVAL; break; } } else if ((msg->header.typecookie == NGM_GENERIC_COOKIE) && (msg->header.cmd == NGM_TEXT_STATUS)) { /* synchronous response */ NG_MKRESPONSE(resp, msg, sizeof(struct ng_mesg) + NG_TEXTRESPONSE, M_NOWAIT); if (resp == NULL) error = ENOMEM; else { char *s = resp->data; sprintf(s, "Card type = <%s>\n" "This driver considers the link to be %s.\n" "Use lmcconfig to configure this interface.\n", sc->dev_desc, (sc->status.oper_status==STATUS_UP) ? "UP" : "DOWN"); resp->header.arglen = strlen(s) +1; } } else /* Netgraph should be able to read and write these * parameters with text-format control messages: * SSI HSSI T1E1 T3 * crc crc crc crc * loop loop loop loop * clksrc clksrc * dte dte format format * synth synth cablen cablen * cable timeslot scram * gain * pulse * lbo * Someday I'll implement this... */ error = EINVAL; /* Handle synchronous response. */ # if (__FreeBSD_version >= 500000) NG_RESPOND_MSG(error, node, item, resp); NG_FREE_MSG(msg); # else /* FreeBSD-4 */ if (rptr != NULL) *rptr = resp; else if (resp != NULL) FREE(resp, M_NETGRAPH); FREE(msg, M_NETGRAPH); # endif return error; } /* This is a persistent netgraph node. */ static int ng_shutdown(node_p node) { # if (__FreeBSD_version >= 500000) /* unless told to really die, bounce back to life */ if ((node->nd_flags & NG_REALLY_DIE)==0) node->nd_flags &= ~NG_INVALID; /* bounce back to life */ # else /* FreeBSD-4 */ ng_cutlinks(node); node->flags &= ~NG_INVALID; /* bounce back to life */ # endif return 0; } /* ng_disconnect is the opposite of this procedure. */ static int ng_newhook(node_p node, hook_p hook, const char *name) { softc_t *sc = NG_NODE_PRIVATE(node); /* Hook name must be 'rawdata'. */ if (strncmp(name, "rawdata", 7) != 0) return EINVAL; /* Is our hook connected? */ if (sc->ng_hook != NULL) return EBUSY; /* Accept the hook. */ sc->ng_hook = hook; return 0; } /* Both ends have accepted their hooks and the links have been made. */ /* This is the last chance to reject the connection request. */ static int ng_connect(hook_p hook) { /* Probably not at splnet, force outward queueing. (huh?) */ NG_HOOK_FORCE_QUEUE(NG_HOOK_PEER(hook)); return 0; /* always accept */ } /* Receive data in mbufs from another Netgraph node. */ /* Transmit an mbuf-chain on the communication link. */ /* This procedure is very similar to raw_output(). */ /* Called from a syscall (user context; no spinlocks). */ # if (__FreeBSD_version >= 500000) static int ng_rcvdata(hook_p hook, item_p item) { softc_t *sc = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); int error = 0; struct mbuf *m; meta_p meta = NULL; NGI_GET_M(item, m); NGI_GET_META(item, meta); NG_FREE_ITEM(item); # else /* FreeBSD-4 */ static int ng_rcvdata(hook_p hook, struct mbuf *m, meta_p meta) { softc_t *sc = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); int error = 0; # endif /* This macro must not store into meta! */ NG_FREE_META(meta); /* Fail if the link is down. */ if (sc->status.oper_status != STATUS_UP) { m_freem(m); sc->status.cntrs.odiscards++; if (DRIVER_DEBUG) printf("%s: ng_rcvdata: tx pkt discarded: link down\n", NAME_UNIT); return ENETDOWN; } /* ng_rcvdata() ENQUEUEs in a syscall or softirq. */ /* txintr_setup() DEQUEUEs in a hard interrupt. */ /* Some BSD QUEUE routines are not interrupt-safe. */ { DISABLE_INTR; # if (__FreeBSD_version >= 503000) if (meta==NULL) IFQ_ENQUEUE(&sc->ng_sndq, m, error); else IFQ_ENQUEUE(&sc->ng_fastq, m, error); # else if (meta==NULL) IFQ_ENQUEUE(&sc->ng_sndq, m, NULL, error); else IFQ_ENQUEUE(&sc->ng_fastq, m, NULL, error); # endif ENABLE_INTR; } if (error==0) user_interrupt(sc, 0); /* start the transmitter */ else { m_freem(m); sc->status.cntrs.odiscards++; if (DRIVER_DEBUG) printf("%s: ng_rcvdata: IFQ_ENQUEUE() failed; error %d\n", NAME_UNIT, error); } return error; } /* ng_newhook is the opposite of this procedure, not */ /* ng_connect, as you might expect from the names. */ static int ng_disconnect(hook_p hook) { softc_t *sc = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); /* Disconnect the hook. */ sc->ng_hook = NULL; return 0; } static struct ng_type ng_type = { .version = NG_ABI_VERSION, .name = NG_LMC_NODE_TYPE, .mod_event = NULL, .constructor = ng_constructor, .rcvmsg = ng_rcvmsg, # if (__FreeBSD_version >=503000) .close = NULL, # endif .shutdown = ng_shutdown, .newhook = ng_newhook, .findhook = NULL, .connect = ng_connect, .rcvdata = ng_rcvdata, # if (defined(__FreeBSD__) && (__FreeBSD_version < 500000)) .rcvdataq = ng_rcvdata, # endif .disconnect = ng_disconnect, }; # if (IFNET == 0) /* Called from a softirq once a second. */ static void ng_watchdog(void *arg) { softc_t *sc = arg; /* Call the core watchdog procedure. */ core_watchdog(sc); /* Set line protocol and package status. */ sc->status.line_pkg = PKG_NG; sc->status.line_prot = 0; /* Call this procedure again after one second. */ callout_reset(&sc->ng_callout, hz, ng_watchdog, sc); } # endif /* Attach to the Netgraph kernel interface (/sys/netgraph). * It is called once for each physical card during device attach. * This is effectively ng_constructor. */ static int ng_attach(softc_t *sc) { int error; /* If this node type is not known to Netgraph then register it. */ if (ng_type.refs == 0) /* or: if (ng_findtype(&ng_type) == NULL) */ { if ((error = ng_newtype(&ng_type))) { printf("%s: ng_newtype() failed; error %d\n", NAME_UNIT, error); return error; } } else NG_TYPE_REF(&ng_type); /* Call the superclass node constructor. */ if ((error = ng_make_node_common(&ng_type, &sc->ng_node))) { NG_TYPE_UNREF(&ng_type); printf("%s: ng_make_node_common() failed; error %d\n", NAME_UNIT, error); return error; } /* Associate a name with this netgraph node. */ if ((error = ng_name_node(sc->ng_node, NAME_UNIT))) { NG_NODE_UNREF(sc->ng_node); NG_TYPE_UNREF(&ng_type); printf("%s: ng_name_node() failed; error %d\n", NAME_UNIT, error); return error; } # if (__FreeBSD_version >= 500000) /* Initialize the send queue mutexes. */ mtx_init(&sc->ng_sndq.ifq_mtx, NAME_UNIT, "sndq", MTX_DEF); mtx_init(&sc->ng_fastq.ifq_mtx, NAME_UNIT, "fastq", MTX_DEF); # endif /* Put a backpointer to the softc in the netgraph node. */ NG_NODE_SET_PRIVATE(sc->ng_node, sc); /* ALTQ output queue initialization. */ IFQ_SET_MAXLEN(&sc->ng_fastq, SNDQ_MAXLEN); IFQ_SET_READY(&sc->ng_fastq); IFQ_SET_MAXLEN(&sc->ng_sndq, SNDQ_MAXLEN); IFQ_SET_READY(&sc->ng_sndq); /* If ifnet is present, it will call watchdog. */ /* Otherwise, arrange to call watchdog here. */ # if (IFNET == 0) /* Arrange to call ng_watchdog() once a second. */ # if (__FreeBSD_version >= 500000) callout_init(&sc->ng_callout, 0); # else /* FreeBSD-4 */ callout_init(&sc->ng_callout); # endif callout_reset(&sc->ng_callout, hz, ng_watchdog, sc); # endif return 0; } static void ng_detach(softc_t *sc) { # if (IFNET == 0) callout_stop(&sc->ng_callout); # endif # if (__FreeBSD_version >= 500000) mtx_destroy(&sc->ng_sndq.ifq_mtx); mtx_destroy(&sc->ng_fastq.ifq_mtx); ng_rmnode_self(sc->ng_node); /* free hook */ NG_NODE_UNREF(sc->ng_node); /* free node */ NG_TYPE_UNREF(&ng_type); # else /* FreeBSD-4 */ ng_unname(sc->ng_node); /* free name */ ng_cutlinks(sc->ng_node); /* free hook */ NG_NODE_UNREF(sc->ng_node); /* free node */ NG_TYPE_UNREF(&ng_type); # endif } #endif /* NETGRAPH */ /* The next few procedures initialize the card. */ /* Returns 0 on success; error code on failure. */ static int startup_card(softc_t *sc) { int num_rx_descs, error = 0; u_int32_t tlp_bus_pbl, tlp_bus_cal, tlp_op_tr; u_int32_t tlp_cfdd, tlp_cfcs; u_int32_t tlp_cflt, tlp_csid, tlp_cfit; /* Make sure the COMMAND bits are reasonable. */ tlp_cfcs = READ_PCI_CFG(sc, TLP_CFCS); tlp_cfcs &= ~TLP_CFCS_MWI_ENABLE; tlp_cfcs |= TLP_CFCS_BUS_MASTER; tlp_cfcs |= TLP_CFCS_MEM_ENABLE; tlp_cfcs |= TLP_CFCS_IO_ENABLE; tlp_cfcs |= TLP_CFCS_PAR_ERROR; tlp_cfcs |= TLP_CFCS_SYS_ERROR; WRITE_PCI_CFG(sc, TLP_CFCS, tlp_cfcs); /* Set the LATENCY TIMER to the recommended value, */ /* and make sure the CACHE LINE SIZE is reasonable. */ tlp_cfit = READ_PCI_CFG(sc, TLP_CFIT); tlp_cflt = READ_PCI_CFG(sc, TLP_CFLT); tlp_cflt &= ~TLP_CFLT_LATENCY; tlp_cflt |= (tlp_cfit & TLP_CFIT_MAX_LAT)>>16; /* "prgmbl burst length" and "cache alignment" used below. */ switch(tlp_cflt & TLP_CFLT_CACHE) { case 8: /* 8 bytes per cache line */ { tlp_bus_pbl = 32; tlp_bus_cal = 1; break; } case 16: { tlp_bus_pbl = 32; tlp_bus_cal = 2; break; } case 32: { tlp_bus_pbl = 32; tlp_bus_cal = 3; break; } default: { tlp_bus_pbl = 32; tlp_bus_cal = 1; tlp_cflt &= ~TLP_CFLT_CACHE; tlp_cflt |= 8; break; } } WRITE_PCI_CFG(sc, TLP_CFLT, tlp_cflt); /* Make sure SNOOZE and SLEEP modes are disabled. */ tlp_cfdd = READ_PCI_CFG(sc, TLP_CFDD); tlp_cfdd &= ~TLP_CFDD_SLEEP; tlp_cfdd &= ~TLP_CFDD_SNOOZE; WRITE_PCI_CFG(sc, TLP_CFDD, tlp_cfdd); DELAY(11*1000); /* Tulip wakes up in 10 ms max */ /* Software Reset the Tulip chip; stops DMA and Interrupts. */ /* This does not change the PCI config regs just set above. */ WRITE_CSR(TLP_BUS_MODE, TLP_BUS_RESET); /* self-clearing */ DELAY(5); /* Tulip is dead for 50 PCI cycles after reset. */ /* Reset the Xilinx Field Programmable Gate Array. */ reset_xilinx(sc); /* side effect: turns on all four LEDs */ /* Configure card-specific stuff (framers, line interfaces, etc.). */ sc->card->config(sc); /* Initializing cards can glitch clocks and upset fifos. */ /* Reset the FIFOs between the Tulip and Xilinx chips. */ set_mii16_bits(sc, MII16_FIFO); clr_mii16_bits(sc, MII16_FIFO); /* Initialize the PCI busmode register. */ /* The PCI bus cycle type "Memory Write and Invalidate" does NOT */ /* work cleanly in any version of the 21140A, so don't enable it! */ WRITE_CSR(TLP_BUS_MODE, (tlp_bus_cal ? TLP_BUS_READ_LINE : 0) | (tlp_bus_cal ? TLP_BUS_READ_MULT : 0) | (tlp_bus_pbl<txring, NUM_TX_DESCS))) return error; WRITE_CSR(TLP_TX_LIST, sc->txring.dma_addr); if ((error = create_ring(sc, &sc->rxring, num_rx_descs))) return error; WRITE_CSR(TLP_RX_LIST, sc->rxring.dma_addr); /* Initialize the operating mode register. */ WRITE_CSR(TLP_OP_MODE, TLP_OP_INIT | (tlp_op_tr<txring); destroy_ring(sc, &sc->rxring); } /* Start the card and attach a kernel interface and line protocol. */ static int attach_card(softc_t *sc, const char *intrstr) { struct config config; u_int32_t tlp_cfrv; u_int16_t mii3; u_int8_t *ieee; int i, error = 0; /* Start the card. */ if ((error = startup_card(sc))) return error; /* Attach a kernel interface. */ #if NETGRAPH if ((error = ng_attach(sc))) return error; sc->flags |= FLAG_NETGRAPH; #endif #if IFNET if ((error = ifnet_attach(sc))) return error; sc->flags |= FLAG_IFNET; #endif /* Attach a line protocol stack. */ sc->config.line_pkg = PKG_RAWIP; config = sc->config; /* get current config */ config.line_pkg = 0; /* select external stack */ config.line_prot = PROT_C_HDLC; config.keep_alive = 1; config_proto(sc, &config); /* reconfigure */ sc->config = config; /* save new configuration */ /* Print interesting hardware-related things. */ mii3 = read_mii(sc, 3); tlp_cfrv = READ_PCI_CFG(sc, TLP_CFRV); printf("%s: PCI rev %d.%d, MII rev %d.%d", NAME_UNIT, (tlp_cfrv>>4) & 0xF, tlp_cfrv & 0xF, (mii3>>4) & 0xF, mii3 & 0xF); ieee = (u_int8_t *)sc->status.ieee; for (i=0; i<3; i++) sc->status.ieee[i] = read_srom(sc, 10+i); printf(", IEEE addr %02x:%02x:%02x:%02x:%02x:%02x", ieee[0], ieee[1], ieee[2], ieee[3], ieee[4], ieee[5]); sc->card->ident(sc); printf(" %s\n", intrstr); /* Print interesting software-related things. */ printf("%s: Driver rev %d.%d.%d", NAME_UNIT, DRIVER_MAJOR_VERSION, DRIVER_MINOR_VERSION, DRIVER_SUB_VERSION); printf(", Options %s%s%s%s%s%s%s%s%s\n", NETGRAPH ? "NETGRAPH " : "", GEN_HDLC ? "GEN_HDLC " : "", NSPPP ? "SPPP " : "", P2P ? "P2P " : "", ALTQ_PRESENT ? "ALTQ " : "", NBPFILTER ? "BPF " : "", DEV_POLL ? "POLL " : "", IOREF_CSR ? "IO_CSR " : "MEM_CSR ", (BYTE_ORDER == BIG_ENDIAN) ? "BIG_END " : "LITTLE_END "); /* Make the local hardware ready. */ set_status(sc, 1); return 0; } /* Detach from the kernel in all ways. */ static void detach_card(softc_t *sc) { struct config config; /* Make the local hardware NOT ready. */ set_status(sc, 0); /* Detach external line protocol stack. */ if (sc->config.line_pkg != PKG_RAWIP) { config = sc->config; config.line_pkg = PKG_RAWIP; config_proto(sc, &config); sc->config = config; } /* Detach kernel interfaces. */ #if NETGRAPH if (sc->flags & FLAG_NETGRAPH) { IFQ_PURGE(&sc->ng_fastq); IFQ_PURGE(&sc->ng_sndq); ng_detach(sc); sc->flags &= ~FLAG_NETGRAPH; } #endif #if IFNET if (sc->flags & FLAG_IFNET) { IFQ_PURGE(&sc->ifp->if_snd); ifnet_detach(sc); sc->flags &= ~FLAG_IFNET; } #endif /* Reset the Tulip chip; stops DMA and Interrupts. */ shutdown_card(sc); } /* This is the I/O configuration interface for FreeBSD */ #ifdef __FreeBSD__ static int fbsd_probe(device_t dev) { u_int32_t cfid = pci_read_config(dev, TLP_CFID, 4); u_int32_t csid = pci_read_config(dev, TLP_CSID, 4); /* Looking for a DEC 21140A chip on any Lan Media Corp card. */ if (cfid != TLP_CFID_TULIP) return ENXIO; switch (csid) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: device_set_desc(dev, HSSI_DESC); break; case TLP_CSID_T3: device_set_desc(dev, T3_DESC); break; case TLP_CSID_SSI: device_set_desc(dev, SSI_DESC); break; case TLP_CSID_T1E1: device_set_desc(dev, T1E1_DESC); break; default: return ENXIO; } return 0; } static int fbsd_detach(device_t dev) { softc_t *sc = device_get_softc(dev); /* Stop the card and detach from the kernel. */ detach_card(sc); /* Release resources. */ if (sc->irq_cookie != NULL) { bus_teardown_intr(dev, sc->irq_res, sc->irq_cookie); sc->irq_cookie = NULL; } if (sc->irq_res != NULL) { bus_release_resource(dev, SYS_RES_IRQ, sc->irq_res_id, sc->irq_res); sc->irq_res = NULL; } if (sc->csr_res != NULL) { bus_release_resource(dev, sc->csr_res_type, sc->csr_res_id, sc->csr_res); sc->csr_res = NULL; } # if (__FreeBSD_version >= 500000) mtx_destroy(&sc->top_mtx); mtx_destroy(&sc->bottom_mtx); # endif return 0; /* no error */ } static void fbsd_shutdown(device_t dev) { shutdown_card(device_get_softc(dev)); } static int fbsd_attach(device_t dev) { softc_t *sc = device_get_softc(dev); int error; /* READ/WRITE_PCI_CFG need this. */ sc->dev = dev; /* What kind of card are we driving? */ switch (READ_PCI_CFG(sc, TLP_CSID)) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: sc->card = &hssi_card; break; case TLP_CSID_T3: sc->card = &t3_card; break; case TLP_CSID_SSI: sc->card = &ssi_card; break; case TLP_CSID_T1E1: sc->card = &t1_card; break; default: return ENXIO; } sc->dev_desc = device_get_desc(dev); /* Allocate PCI memory or IO resources to access the Tulip chip CSRs. */ # if IOREF_CSR sc->csr_res_id = TLP_CBIO; sc->csr_res_type = SYS_RES_IOPORT; # else sc->csr_res_id = TLP_CBMA; sc->csr_res_type = SYS_RES_MEMORY; # endif sc->csr_res = bus_alloc_resource(dev, sc->csr_res_type, &sc->csr_res_id, 0, ~0, 1, RF_ACTIVE); if (sc->csr_res == NULL) { printf("%s: bus_alloc_resource(csr) failed.\n", NAME_UNIT); return ENXIO; } sc->csr_tag = rman_get_bustag(sc->csr_res); sc->csr_handle = rman_get_bushandle(sc->csr_res); /* Allocate PCI interrupt resources for the card. */ sc->irq_res_id = 0; sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->irq_res_id, 0, ~0, 1, RF_ACTIVE | RF_SHAREABLE); if (sc->irq_res == NULL) { printf("%s: bus_alloc_resource(irq) failed.\n", NAME_UNIT); fbsd_detach(dev); return ENXIO; } if ((error = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_NET | INTR_MPSAFE, NULL, bsd_interrupt, sc, &sc->irq_cookie))) { printf("%s: bus_setup_intr() failed; error %d\n", NAME_UNIT, error); fbsd_detach(dev); return error; } # if (__FreeBSD_version >= 500000) /* Initialize the top-half and bottom-half locks. */ mtx_init(&sc->top_mtx, NAME_UNIT, "top half lock", MTX_DEF); mtx_init(&sc->bottom_mtx, NAME_UNIT, "bottom half lock", MTX_DEF); # endif /* Start the card and attach a kernel interface and line protocol. */ if ((error = attach_card(sc, ""))) detach_card(sc); return error; } static device_method_t methods[] = { DEVMETHOD(device_probe, fbsd_probe), DEVMETHOD(device_attach, fbsd_attach), DEVMETHOD(device_detach, fbsd_detach), DEVMETHOD(device_shutdown, fbsd_shutdown), /* This driver does not suspend and resume. */ { 0, 0 } }; static driver_t driver = { .name = DEVICE_NAME, .methods = methods, # if (__FreeBSD_version >= 500000) .size = sizeof(softc_t), # else /* FreeBSD-4 */ .softc = sizeof(softc_t), # endif }; static devclass_t devclass; DRIVER_MODULE(if_lmc, pci, driver, devclass, 0, 0); MODULE_VERSION(if_lmc, 2); MODULE_DEPEND(if_lmc, pci, 1, 1, 1); # if NETGRAPH MODULE_DEPEND(if_lmc, netgraph, NG_ABI_VERSION, NG_ABI_VERSION, NG_ABI_VERSION); # endif # if NSPPP MODULE_DEPEND(if_lmc, sppp, 1, 1, 1); # endif #endif /* __FreeBSD__ */ /* This is the I/O configuration interface for NetBSD. */ #ifdef __NetBSD__ static int nbsd_match(struct device *parent, struct cfdata *match, void *aux) { struct pci_attach_args *pa = aux; u_int32_t cfid = pci_conf_read(pa->pa_pc, pa->pa_tag, TLP_CFID); u_int32_t csid = pci_conf_read(pa->pa_pc, pa->pa_tag, TLP_CSID); /* Looking for a DEC 21140A chip on any Lan Media Corp card. */ if (cfid != TLP_CFID_TULIP) return 0; switch (csid) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: case TLP_CSID_T3: case TLP_CSID_SSI: case TLP_CSID_T1E1: return 100; default: return 0; } } static int nbsd_detach(struct device *self, int flags) { softc_t *sc = (softc_t *)self; /* device is first in softc */ /* Stop the card and detach from the kernel. */ detach_card(sc); /* Release resources. */ if (sc->sdh_cookie != NULL) { shutdownhook_disestablish(sc->sdh_cookie); sc->sdh_cookie = NULL; } if (sc->irq_cookie != NULL) { pci_intr_disestablish(sc->pa_pc, sc->irq_cookie); sc->irq_cookie = NULL; } if (sc->csr_handle) { bus_space_unmap(sc->csr_tag, sc->csr_handle, TLP_CSR_SIZE); sc->csr_handle = 0; } return 0; /* no error */ } static void nbsd_attach(struct device *parent, struct device *self, void *aux) { softc_t *sc = (softc_t *)self; /* device is first in softc */ struct pci_attach_args *pa = aux; const char *intrstr; bus_addr_t csr_addr; int error; /* READ/WRITE_PCI_CFG need these. */ sc->pa_pc = pa->pa_pc; sc->pa_tag = pa->pa_tag; /* bus_dma needs this. */ sc->pa_dmat = pa->pa_dmat; /* What kind of card are we driving? */ switch (READ_PCI_CFG(sc, TLP_CSID)) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: sc->dev_desc = HSSI_DESC; sc->card = &hssi_card; break; case TLP_CSID_T3: sc->dev_desc = T3_DESC; sc->card = &t3_card; break; case TLP_CSID_SSI: sc->dev_desc = SSI_DESC; sc->card = &ssi_card; break; case TLP_CSID_T1E1: sc->dev_desc = T1E1_DESC; sc->card = &t1_card; break; default: return; } printf(": %s\n", sc->dev_desc); /* Allocate PCI resources to access the Tulip chip CSRs. */ # if IOREF_CSR csr_addr = (bus_addr_t)READ_PCI_CFG(sc, TLP_CBIO) & -2; sc->csr_tag = pa->pa_iot; /* bus_space tag for IO refs */ # else csr_addr = (bus_addr_t)READ_PCI_CFG(sc, TLP_CBMA); sc->csr_tag = pa->pa_memt; /* bus_space tag for MEM refs */ # endif if ((error = bus_space_map(sc->csr_tag, csr_addr, TLP_CSR_SIZE, 0, &sc->csr_handle))) { printf("%s: bus_space_map() failed; error %d\n", NAME_UNIT, error); return; } /* Allocate PCI interrupt resources. */ if ((error = pci_intr_map(pa, &sc->intr_handle))) { printf("%s: pci_intr_map() failed; error %d\n", NAME_UNIT, error); nbsd_detach(self, 0); return; } sc->irq_cookie = pci_intr_establish(pa->pa_pc, sc->intr_handle, IPL_NET, bsd_interrupt, sc); if (sc->irq_cookie == NULL) { printf("%s: pci_intr_establish() failed\n", NAME_UNIT); nbsd_detach(self, 0); return; } intrstr = pci_intr_string(pa->pa_pc, sc->intr_handle); /* Install a shutdown hook. */ sc->sdh_cookie = shutdownhook_establish(shutdown_card, sc); if (sc->sdh_cookie == NULL) { printf("%s: shutdown_hook_establish() failed\n", NAME_UNIT); nbsd_detach(self, 0); return; } /* Initialize the top-half and bottom-half locks. */ simple_lock_init(&sc->top_lock); simple_lock_init(&sc->bottom_lock); /* Start the card and attach a kernel interface and line protocol. */ if ((error = attach_card(sc, intrstr))) detach_card(sc); } # if (__NetBSD_Version__ >= 106080000) /* 1.6H */ CFATTACH_DECL(lmc, sizeof(softc_t), nbsd_match, nbsd_attach, nbsd_detach, NULL); # else struct cfattach lmc_ca = { /*.ca_name = DEVICE_NAME, */ .ca_devsize = sizeof(softc_t), .ca_match = nbsd_match, .ca_attach = nbsd_attach, .ca_detach = nbsd_detach, .ca_activate = NULL, }; # endif # if (__NetBSD_Version__ >= 106080000) CFDRIVER_DECL(lmc, DV_IFNET, NULL); # else static struct cfdriver lmc_cd = { .cd_name = DEVICE_NAME, .cd_class = DV_IFNET, .cd_ndevs = 0, .cd_devs = NULL, }; # endif /* cfdata is declared static, unseen outside this module. */ /* It is used for LKM; config builds its own in ioconf.c. */ static struct cfdata lmc_cf = { # if (__NetBSD_Version__ >= 106080000) .cf_name = DEVICE_NAME, .cf_atname = DEVICE_NAME, # else .cf_driver = &lmc_cd, .cf_attach = &lmc_ca, # endif .cf_unit = 0, .cf_fstate = FSTATE_STAR, }; # if (__NetBSD_Version__ >= 106080000) MOD_MISC(DEVICE_NAME) # else static struct lkm_misc _module = { .lkm_name = DEVICE_NAME, .lkm_type = LM_MISC, .lkm_offset = 0, .lkm_ver = LKM_VERSION, }; # endif /* From /sys/dev/pci/pci.c (no public prototype). */ int pciprint(void *, const char *); static int lkm_nbsd_match(struct pci_attach_args *pa) { return nbsd_match(0, 0, pa); } /* LKM loader finds this by appending "_lkmentry" to filename "if_lmc". */ int if_lmc_lkmentry(struct lkm_table *lkmtp, int cmd, int ver) { int i, error = 0; if (ver != LKM_VERSION) return EINVAL; switch (cmd) { case LKM_E_LOAD: { struct cfdriver* pcicd; lkmtp->private.lkm_misc = &_module; if ((pcicd = config_cfdriver_lookup("pci")) == NULL) { printf("%s: config_cfdriver_lookup(pci) failed; error %d\n", lmc_cd.cd_name, error); return error; } # if (__NetBSD_Version__ >= 106080000) if ((error = config_cfdriver_attach(&lmc_cd))) { printf("%s: config_cfdriver_attach() failed; error %d\n", lmc_cd.cd_name, error); return error; } if ((error = config_cfattach_attach(lmc_cd.cd_name, &lmc_ca))) { printf("%s: config_cfattach_attach() failed; error %d\n", lmc_cd.cd_name, error); config_cfdriver_detach(&lmc_cd); return error; } # endif for (i=0; icd_ndevs; i++) { int dev; /* A pointer to a device is a pointer to its softc. */ struct pci_softc *sc = pcicd->cd_devs[i]; if (sc == NULL) continue; for (dev=0; devsc_maxndevs; dev++) { struct pci_attach_args pa; pcitag_t tag = pci_make_tag(sc->sc_pc, sc->sc_bus, dev, 0); if (pci_probe_device(sc, tag, lkm_nbsd_match, &pa) != 0) config_attach(pcicd->cd_devs[i], &lmc_cf, &pa, pciprint); /* config_attach doesn't return on failure; it calls panic. */ } } break; } case LKM_E_UNLOAD: { for (i=lmc_cd.cd_ndevs-1; i>=0; i--) { struct device *dev = lmc_cd.cd_devs[i]; if (dev == NULL) continue; if ((error = config_detach(dev, 0))) { printf("%s: config_detach() failed; error %d\n", dev->dv_xname, error); return error; } } # if (__NetBSD_Version__ >= 106080000) if ((error = config_cfattach_detach(lmc_cd.cd_name, &lmc_ca))) { printf("%s: config_cfattach_detach() failed; error %d\n", lmc_cd.cd_name, error); return error; } if ((error = config_cfdriver_detach(&lmc_cd))) { printf("%s: config_cfdriver_detach() failed; error %d\n", lmc_cd.cd_name, error); return error; } # endif break; } case LKM_E_STAT: break; } return error; } #endif /* __NetBSD__ */ /* This is the I/O configuration interface for OpenBSD. */ #ifdef __OpenBSD__ static int obsd_match(struct device *parent, void *match, void *aux) { struct pci_attach_args *pa = aux; u_int32_t cfid = pci_conf_read(pa->pa_pc, pa->pa_tag, TLP_CFID); u_int32_t csid = pci_conf_read(pa->pa_pc, pa->pa_tag, TLP_CSID); /* Looking for a DEC 21140A chip on any Lan Media Corp card. */ if (cfid != TLP_CFID_TULIP) return 0; switch (csid) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: case TLP_CSID_T3: case TLP_CSID_SSI: case TLP_CSID_T1E1: return 100; /* match better than other 21140 drivers */ default: return 0; } } static int obsd_detach(struct device *self, int flags) { softc_t *sc = (softc_t *)self; /* device is first in softc */ /* Stop the card and detach from the kernel. */ detach_card(sc); /* Release resources. */ if (sc->sdh_cookie != NULL) { shutdownhook_disestablish(sc->sdh_cookie); sc->sdh_cookie = NULL; } if (sc->irq_cookie != NULL) { pci_intr_disestablish(sc->pa_pc, sc->irq_cookie); sc->irq_cookie = NULL; } if (sc->csr_handle) { bus_space_unmap(sc->csr_tag, sc->csr_handle, TLP_CSR_SIZE); sc->csr_handle = 0; } return 0; /* no error */ } static void obsd_attach(struct device *parent, struct device *self, void *aux) { softc_t *sc = (softc_t *)self; /* device is first in softc */ struct pci_attach_args *pa = aux; const char *intrstr; bus_addr_t csr_addr; int error; /* READ/WRITE_PCI_CFG need these. */ sc->pa_pc = pa->pa_pc; sc->pa_tag = pa->pa_tag; /* bus_dma needs this. */ sc->pa_dmat = pa->pa_dmat; /* What kind of card are we driving? */ switch (READ_PCI_CFG(sc, TLP_CSID)) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: sc->dev_desc = HSSI_DESC; sc->card = &hssi_card; break; case TLP_CSID_T3: sc->dev_desc = T3_DESC; sc->card = &t3_card; break; case TLP_CSID_SSI: sc->dev_desc = SSI_DESC; sc->card = &ssi_card; break; case TLP_CSID_T1E1: sc->dev_desc = T1E1_DESC; sc->card = &t1_card; break; default: return; } printf(": %s\n", sc->dev_desc); /* Allocate PCI resources to access the Tulip chip CSRs. */ # if IOREF_CSR csr_addr = (bus_addr_t)READ_PCI_CFG(sc, TLP_CBIO) & -2; sc->csr_tag = pa->pa_iot; /* bus_space tag for IO refs */ # else csr_addr = (bus_addr_t)READ_PCI_CFG(sc, TLP_CBMA); sc->csr_tag = pa->pa_memt; /* bus_space tag for MEM refs */ # endif if ((error = bus_space_map(sc->csr_tag, csr_addr, TLP_CSR_SIZE, 0, &sc->csr_handle))) { printf("%s: bus_space_map() failed; error %d\n", NAME_UNIT, error); return; } /* Allocate PCI interrupt resources. */ if ((error = pci_intr_map(pa, &sc->intr_handle))) { printf("%s: pci_intr_map() failed; error %d\n", NAME_UNIT, error); obsd_detach(self, 0); return; } sc->irq_cookie = pci_intr_establish(pa->pa_pc, sc->intr_handle, IPL_NET, bsd_interrupt, sc, self->dv_xname); if (sc->irq_cookie == NULL) { printf("%s: pci_intr_establish() failed\n", NAME_UNIT); obsd_detach(self, 0); return; } intrstr = pci_intr_string(pa->pa_pc, sc->intr_handle); /* Install a shutdown hook. */ sc->sdh_cookie = shutdownhook_establish(shutdown_card, sc); if (sc->sdh_cookie == NULL) { printf("%s: shutdown_hook_establish() failed\n", NAME_UNIT); obsd_detach(self, 0); return; } /* Initialize the top-half and bottom-half locks. */ simple_lock_init(&sc->top_lock); simple_lock_init(&sc->bottom_lock); /* Start the card and attach a kernel interface and line protocol. */ if ((error = attach_card(sc, intrstr))) detach_card(sc); } struct cfattach lmc_ca = { .ca_devsize = sizeof(softc_t), .ca_match = obsd_match, .ca_attach = obsd_attach, .ca_detach = obsd_detach, .ca_activate = NULL, }; struct cfdriver lmc_cd = { .cd_name = DEVICE_NAME, .cd_devs = NULL, .cd_class = DV_IFNET, .cd_indirect = 0, .cd_ndevs = 0, }; /* cfdata is declared static, unseen outside this module. */ /* It is used for LKM; config builds its own in ioconf.c. */ static struct cfdata lmc_cfdata = { .cf_attach = &lmc_ca, .cf_driver = &lmc_cd, .cf_unit = 0, .cf_fstate = FSTATE_STAR, }; static struct lkm_any _module = { .lkm_name = DEVICE_NAME, .lkm_type = LM_MISC, .lkm_offset = 0, .lkm_ver = LKM_VERSION, }; /* From /sys/dev/pci/pci.c (no public prototype). */ int pciprint(void *, const char *); extern struct cfdriver pci_cd; /* LKM loader finds this by appending "_lkmentry" to filename "if_lmc". */ int if_lmc_lkmentry(struct lkm_table *lkmtp, int cmd, int ver) { int i, error = 0; if (ver != LKM_VERSION) return EINVAL; switch (cmd) { case LKM_E_LOAD: { /* XXX This works for ONE card on pci0 of a i386 machine! XXX */ lkmtp->private.lkm_any = &_module; for (i=0; idv_unit)!=0) continue; /* only bus zero */ /* XXX For machine independence, need: pcibus_attach_args. XXX */ /* XXX See NetBSD's sys/dev/pci/pci.c/pci_probe_device. XXX */ /* XXX Why isn't there an LKM network interface module? XXX */ pa.pa_pc = NULL; /* XXX */ pa.pa_bus = 0; /* XXX */ pa.pa_iot = I386_BUS_SPACE_IO; /* XXX */ pa.pa_memt = I386_BUS_SPACE_MEM; /* XXX */ pa.pa_dmat = &pci_bus_dma_tag; /* XXX */ for (pa.pa_device=0; pa.pa_device<32; pa.pa_device++) /* XXX */ { int intr; pa.pa_function = 0; /* DEC-21140A has function 0 only XXX */ pa.pa_tag = pci_make_tag(pa.pa_pc, pa.pa_bus, pa.pa_device, 0); pa.pa_id = pci_conf_read(pa.pa_pc, pa.pa_tag, PCI_ID_REG); if ((pa.pa_id & 0xFFFF) == 0xFFFF) continue; if ((pa.pa_id & 0xFFFF) == 0) continue; /* XXX this only works for pci0 -- no swizzelling XXX */ pa.pa_intrswiz = 0; pa.pa_intrtag = pa.pa_tag; intr = pci_conf_read(pa.pa_pc, pa.pa_tag, PCI_INTERRUPT_REG); pa.pa_intrline = PCI_INTERRUPT_LINE(intr); pa.pa_intrpin = ((PCI_INTERRUPT_PIN(intr) -1) % 4) +1; if (obsd_match(parent, &lmc_cfdata, &pa)) config_attach(parent, &lmc_cfdata, &pa, pciprint); /* config_attach doesn't return on failure; it calls panic. */ } } break; } case LKM_E_UNLOAD: { for (i=lmc_cd.cd_ndevs-1; i>=0; i--) { struct device *dev = lmc_cd.cd_devs[i]; if (dev == NULL) continue; if ((error = config_detach(dev, 0))) printf("%s: config_detach() failed; error %d\n", dev->dv_xname, error); } break; } case LKM_E_STAT: break; } return error; } #endif /* __OpenBSD__ */ /* This is the I/O configuration interface for BSD/OS. */ #ifdef __bsdi__ static int bsdi_match(pci_devaddr_t *pa) { u_int32_t cfid = pci_inl(pa, TLP_CFID); u_int32_t csid = pci_inl(pa, TLP_CSID); /* Looking for a DEC 21140A chip on any Lan Media Corp card. */ if (cfid != TLP_CFID_TULIP) return 0; switch (csid) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: case TLP_CSID_T3: case TLP_CSID_SSI: case TLP_CSID_T1E1: return 1; default: return 0; } } static int bsdi_probe(struct device *parent, struct cfdata *cf, void *aux) { struct isa_attach_args *ia = aux; pci_devaddr_t *pa = NULL; pci_devres_t res; /* This must be a PCI bus. */ if (ia->ia_bustype != BUS_PCI) return 0; /* Scan PCI bus for our boards. */ if ((pa = pci_scan(bsdi_match)) == 0) return 0; /* Scan config space for IO and MEM base registers and IRQ info. */ pci_getres(pa, &res, 1, ia); /* Crucial: pass pci_devaddr to bsdi_attach in ia_aux. */ ia->ia_aux = (void *)pa; return 1; } static void bsdi_attach(struct device *parent, struct device *self, void *aux) { softc_t *sc = (softc_t *)self; /* device is first in softc */ struct isa_attach_args *ia = aux; pci_devaddr_t *pa = ia->ia_aux; /* this is crucial! */ int error; /* READ/WRITE_PCI_CFG need this. */ sc->cfgbase = *pa; /* What kind of card are we driving? */ switch (READ_PCI_CFG(sc, TLP_CSID)) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: sc->dev_desc = HSSI_DESC; sc->card = &hssi_card; break; case TLP_CSID_T3: sc->dev_desc = T3_DESC; sc->card = &t3_card; break; case TLP_CSID_SSI: sc->dev_desc = SSI_DESC; sc->card = &ssi_card; break; case TLP_CSID_T1E1: sc->dev_desc = T1E1_DESC; sc->card = &t1_card; break; default: return; } printf(": %s\n", sc->dev_desc); /* Allocate PCI memory or IO resources to access the Tulip chip CSRs. */ sc->csr_iobase = ia->ia_iobase; sc->csr_membase = (u_int32_t *)mapphys((vm_offset_t)ia->ia_maddr, TLP_CSR_SIZE); /* Attach to the PCI bus. */ isa_establish(&sc->id, &sc->dev); /* Allocate PCI interrupt resources for the card. */ sc->ih.ih_fun = bsd_interrupt; sc->ih.ih_arg = sc; intr_establish(ia->ia_irq, &sc->ih, DV_NET); /* Install a shutdown hook. */ sc->ats.func = shutdown_card; sc->ats.arg = sc; atshutdown(&sc->ats, ATSH_ADD); /* Initialize the top-half and bottom-half locks. */ simple_lock_init(&sc->top_lock); simple_lock_init(&sc->bottom_lock); /* Start the card and attach a kernel interface and line protocol. */ if ((error = attach_card(sc, ""))) detach_card(sc); } struct cfdriver lmccd = { .cd_devs = NULL, .cd_name = DEVICE_NAME, .cd_match = bsdi_probe, .cd_attach = bsdi_attach, .cd_class = DV_IFNET, .cd_devsize = sizeof(softc_t), }; #endif /* __bsdi__ */ #ifdef __linux__ /* The kernel calls this procedure when an interrupt happens. */ static irqreturn_t linux_interrupt(int irq, void *dev, struct pt_regs *regs) { struct net_device *net_dev = dev; softc_t *sc = dev_to_hdlc(net_dev)->priv; /* Cut losses early if this is not our interrupt. */ if ((READ_CSR(TLP_STATUS) & TLP_INT_TXRX) == 0) return IRQ_NONE; /* Disable card interrupts. */ WRITE_CSR(TLP_INT_ENBL, TLP_INT_DISABLE); /* Handle the card interrupt with the dev->poll method. */ if (netif_rx_schedule_prep(net_dev)) __netif_rx_schedule(net_dev); /* NAPI - add to poll list */ else printk("%s: interrupt while on poll list\n", NAME_UNIT); return IRQ_HANDLED; } /* This net_device method services interrupts in a softirq. */ /* With rxintr_cleanup(), it implements input flow control. */ static int linux_poll(struct net_device *net_dev, int *budget) { softc_t *sc = dev_to_hdlc(net_dev)->priv; int received; /* Yes, we do NAPI. */ /* Allow processing up to net_dev->quota incoming packets. */ /* This is the ONLY time core_interrupt() may process rx pkts. */ /* Otherwise (sc->quota == 0) and rxintr_cleanup() is a NOOP. */ sc->quota = net_dev->quota; /* Handle the card interrupt with kernel ints enabled. */ /* Process rx pkts (and tx pkts, too). */ /* Card interrupts are disabled. */ core_interrupt(sc, 0); /* Report number of rx packets processed. */ received = net_dev->quota - sc->quota; net_dev->quota -= received; *budget -= received; /* if quota prevented processing all rx pkts, leave rx ints disabled */ if (sc->quota == 0) /* this is off by one...but harmless */ { WRITE_CSR(TLP_INT_ENBL, TLP_INT_TX); return 1; /* more pkts to handle -- reschedule */ } sc->quota = 0; /* disable rx pkt processing by rxintr_cleanup() */ netif_rx_complete(net_dev); /* NAPI - remove from poll list */ /* Enable card interrupts. */ WRITE_CSR(TLP_INT_ENBL, TLP_INT_TXRX); return 0; } /* These next routines are similar to BSD's ifnet kernel/driver interface. */ /* This net_device method hands outgoing packets to the transmitter. */ /* With txintr_setup(), it implements output flow control. */ /* Called from a syscall (user context; no spinlocks). */ static int linux_start(struct sk_buff *skb, struct net_device *net_dev) { softc_t *sc = dev_to_hdlc(net_dev)->priv; if (sc->tx_skb == NULL) { /* Put this skb where the transmitter will see it. */ sc->tx_skb = skb; /* Start the transmitter; incoming pkts are NOT processed. */ user_interrupt(sc, 0); /* If the tx didn't take the skb then stop the queue. */ /* This can happen if another CPU is in core_interrupt(). */ if (sc->tx_skb != NULL) netif_stop_queue(net_dev); return 0; } /* This shouldn't happen; skb is NOT consumed. */ if (netif_queue_stopped(net_dev)) printk("%s: dev->start() called with queue stopped\n", NAME_UNIT); else netif_stop_queue(net_dev); return 1; } /* This net_device method restarts the transmitter if it hangs. */ /* Called from a softirq. */ static void linux_timeout(struct net_device *net_dev) { softc_t *sc = dev_to_hdlc(net_dev)->priv; /* Start the transmitter; incoming packets are NOT processed. */ user_interrupt(sc, 1); } /* This net_device method handles IOCTL syscalls. */ /* Called from a syscall (user context; no spinlocks; can sleep). */ static int linux_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) { softc_t *sc = dev_to_hdlc(net_dev)->priv; int error = 0; if ((cmd >= SIOCDEVPRIVATE) && (cmd <= SIOCDEVPRIVATE+15)) { struct iohdr *iohdr = (struct iohdr *)ifr; u_int16_t direction = iohdr->direction; u_int16_t length = iohdr->length; char *user_addr = (char *)iohdr->iohdr; char *kern_addr; if (iohdr->cookie != NGM_LMC_COOKIE) return -EINVAL; /* Emulate a BSD-style IOCTL syscall. */ kern_addr = kmalloc(length, GFP_KERNEL); if (kern_addr == NULL) error = -ENOMEM; if ((error == 0) && ((direction & DIR_IOW) != 0)) error = copy_from_user(kern_addr, user_addr, length); if (error == 0) error = -core_ioctl(sc, (unsigned long)cmd, kern_addr); if ((error == 0) && ((direction & DIR_IOR) != 0)) error = copy_to_user(user_addr, kern_addr, length); kfree(kern_addr); } # if GEN_HDLC else if (cmd == SIOCWANDEV) { const size_t size = sizeof(sync_serial_settings); switch (ifr->ifr_settings.type) { case IF_GET_IFACE: /* get interface config */ { ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL; if (ifr->ifr_settings.size < size) { ifr->ifr_settings.size = size; error = -ENOBUFS; } else { if (sc->config.tx_clk_src == CFG_CLKMUX_ST) sc->hdlc_settings.clock_type = CLOCK_EXT; if (sc->config.tx_clk_src == CFG_CLKMUX_INT) sc->hdlc_settings.clock_type = CLOCK_TXINT; if (sc->config.tx_clk_src == CFG_CLKMUX_RT) sc->hdlc_settings.clock_type = CLOCK_TXFROMRX; sc->hdlc_settings.loopback = (sc->config.loop_back != CFG_LOOP_NONE) ? 1:0; sc->hdlc_settings.clock_rate = sc->status.tx_speed; error = copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &sc->hdlc_settings, size); } break; } case IF_IFACE_SYNC_SERIAL: /* set interface config */ { if (!capable(CAP_NET_ADMIN)) error = -EPERM; if (error == 0) error = copy_from_user(&sc->hdlc_settings, ifr->ifr_settings.ifs_ifsu.sync, size); /* hdlc_settings are currently ignored. */ break; } default: /* Pass the rest to the line protocol code. */ { error = hdlc_ioctl(net_dev, ifr, cmd); break; } } } # endif /* GEN_HDLC */ else /* unknown IOCTL command */ error = -EINVAL; if (DRIVER_DEBUG) printk("%s: linux_ioctl; cmd=0x%08x error=%d\n", NAME_UNIT, cmd, error); return error; } /* This net_device method returns a pointer to device statistics. */ static struct net_device_stats * linux_stats(struct net_device *net_dev) { # if GEN_HDLC return &dev_to_hdlc(net_dev)->stats; # else softc_t *sc = net_dev->priv; return &sc->net_stats; # endif } /* Called from a softirq once a second. */ static void linux_watchdog(unsigned long softc) { softc_t *sc = (softc_t *)softc; u_int8_t old_oper_status = sc->status.oper_status; struct event_cntrs *cntrs = &sc->status.cntrs; struct net_device_stats *stats = linux_stats(sc->net_dev); core_watchdog(sc); /* updates oper_status */ /* Notice change in link status. */ if ((old_oper_status != STATUS_UP) && (sc->status.oper_status == STATUS_UP)) /* link came up */ { hdlc_set_carrier(1, sc->net_dev); netif_wake_queue(sc->net_dev); } if ((old_oper_status == STATUS_UP) && (sc->status.oper_status != STATUS_UP)) /* link went down */ { hdlc_set_carrier(0, sc->net_dev); netif_stop_queue(sc->net_dev); } /* Notice change in line protocol. */ if (sc->config.line_pkg == PKG_RAWIP) { sc->status.line_pkg = PKG_RAWIP; sc->status.line_prot = PROT_IP_HDLC; } # if GEN_HDLC else { sc->status.line_pkg = PKG_GEN_HDLC; switch (sc->hdlc_dev->proto.id) { case IF_PROTO_PPP: sc->status.line_prot = PROT_PPP; break; case IF_PROTO_CISCO: sc->status.line_prot = PROT_C_HDLC; break; case IF_PROTO_FR: sc->status.line_prot = PROT_FRM_RLY; break; case IF_PROTO_HDLC: sc->status.line_prot = PROT_IP_HDLC; break; case IF_PROTO_X25: sc->status.line_prot = PROT_X25; break; case IF_PROTO_HDLC_ETH: sc->status.line_prot = PROT_ETH_HDLC; break; default: sc->status.line_prot = 0; break; } } # endif /* GEN_HDLC */ /* Copy statistics from sc to net_dev for get_stats(). */ stats->rx_packets = cntrs->ipackets; stats->tx_packets = cntrs->opackets; stats->rx_bytes = cntrs->ibytes; stats->tx_bytes = cntrs->obytes; stats->rx_errors = cntrs->ierrors; stats->tx_errors = cntrs->oerrors; stats->rx_dropped = cntrs->idiscards; stats->tx_dropped = cntrs->odiscards; stats->rx_fifo_errors = cntrs->fifo_over; stats->tx_fifo_errors = cntrs->fifo_under; stats->rx_missed_errors = cntrs->missed; stats->rx_over_errors = cntrs->overruns; /* Call this procedure again after one second. */ sc->wd_timer.expires = jiffies + HZ; /* now plus one second */ add_timer(&sc->wd_timer); } /* This is the I/O configuration interface for Linux. */ /* This net_device method is called when IFF_UP goes false. */ static int linux_stop(struct net_device *net_dev) { softc_t *sc = dev_to_hdlc(net_dev)->priv; /* Stop the card and detach from the kernel. */ detach_card(sc); /* doesn't fail */ free_irq(net_dev->irq, net_dev); /* doesn't fail */ del_timer(&sc->wd_timer); /* return value ignored */ return 0; } /* This net_device method is called when IFF_UP goes true. */ static int linux_open(struct net_device *net_dev) { softc_t *sc = dev_to_hdlc(net_dev)->priv; int error; /* Allocate PCI interrupt resources for the card. */ if ((error = request_irq(net_dev->irq, &linux_interrupt, SA_SHIRQ, NAME_UNIT, net_dev))) { printk("%s: request_irq() failed; error %d\n", NAME_UNIT, error); return error; } /* Arrange to call linux_watchdog() once a second. */ init_timer(&sc->wd_timer); sc->wd_timer.expires = jiffies + HZ; /* now plus one second */ sc->wd_timer.function = &linux_watchdog; sc->wd_timer.data = (unsigned long) sc; add_timer(&sc->wd_timer); /* Start the card and attach a kernel interface and line protocol. */ if ((error = -attach_card(sc, ""))) linux_stop(net_dev); else { net_dev->weight = sc->rxring.num_descs; /* input flow control */ netif_start_queue(net_dev); /* output flow control */ } return error; } # if GEN_HDLC static int hdlc_attach(struct net_device *net_dev, unsigned short encoding, unsigned short parity) { return 0; } # endif /* This pci_driver method is called during shutdown or module-unload. */ /* This is called from user context; can sleep; no spinlocks! */ static void __exit linux_remove(struct pci_dev *pci_dev) { struct net_device *net_dev = (struct net_device *)pci_get_drvdata(pci_dev); softc_t *sc = dev_to_hdlc(net_dev)->priv; if (net_dev == NULL) return; /* Assume that linux_stop() has already been called. */ if (sc->flags & FLAG_NETDEV) # if GEN_HDLC unregister_hdlc_device(net_dev); # else unregister_netdev(net_dev); # endif # if (IOREF_CSR == 0) if (sc->csr_membase != NULL) iounmap(sc->csr_membase); # endif pci_disable_device(pci_dev); if (sc->csr_iobase != 0) pci_release_regions(pci_dev); pci_set_drvdata(pci_dev, NULL); kfree(sc); free_netdev(net_dev); } static void setup_netdev(struct net_device *net_dev) { /* Initialize the generic network device. */ /* Note similarity to BSD's ifnet_attach(). */ net_dev->flags = IFF_POINTOPOINT; net_dev->flags |= IFF_RUNNING; net_dev->open = linux_open; net_dev->stop = linux_stop; net_dev->hard_start_xmit = linux_start; net_dev->do_ioctl = linux_ioctl; net_dev->get_stats = linux_stats; net_dev->tx_timeout = linux_timeout; net_dev->poll = linux_poll; net_dev->watchdog_timeo = 1 * HZ; net_dev->tx_queue_len = SNDQ_MAXLEN; net_dev->mtu = MAX_DESC_LEN; net_dev->type = ARPHRD_RAWHDLC; /* The receiver generates frag-lists for packets >4032 bytes. */ /* The transmitter accepts scatter/gather lists and frag-lists. */ /* However Linux linearizes outgoing packets since our hardware */ /* doesn't compute soft checksums. All that work for nothing! */ /*net_dev->features |= NETIF_F_SG; */ /*net_dev->features |= NETIF_F_FRAGLIST; */ } /* This pci_driver method is called during boot or module-load. */ /* This is called from user context; can sleep; no spinlocks! */ static int __init linux_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) { u_int32_t cfid, csid; struct net_device *net_dev; softc_t *sc; int error; /* Looking for a DEC 21140A chip on any Lan Media Corp card. */ pci_read_config_dword(pci_dev, TLP_CFID, &cfid); if (cfid != TLP_CFID_TULIP) return -ENXIO; pci_read_config_dword(pci_dev, TLP_CSID, &csid); switch (csid) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: case TLP_CSID_T3: case TLP_CSID_SSI: case TLP_CSID_T1E1: break; default: return -ENXIO; } /* Declare that these cards use 32-bit single-address PCI cycles. */ if ((error = pci_set_dma_mask(pci_dev, DMA_32BIT_MASK))) { printk("%s: pci_set_dma_mask() failed; error %d\n", DEVICE_NAME, error); return error; } pci_set_consistent_dma_mask(pci_dev, DMA_32BIT_MASK); /* can't fail */ # if GEN_HDLC /* generic-hdlc line protocols */ /* device driver instance data, aka Soft Context or sc */ if ((sc = kmalloc(sizeof(softc_t), GFP_KERNEL)) == NULL) { printk("%s: kmalloc() failed\n", DEVICE_NAME); return -ENOMEM; } memset(sc, 0, sizeof(softc_t)); /* Allocate space for the HDLC network device struct. */ if ((net_dev = alloc_hdlcdev(sc)) == NULL) { printk("%s: alloc_hdlcdev() failed\n", DEVICE_NAME); kfree(sc); return -ENOMEM; } /* Initialize the network device struct. */ setup_netdev(net_dev); /* Initialize the HDLC extension to the network device. */ sc->hdlc_dev = dev_to_hdlc(net_dev); sc->hdlc_dev->attach = hdlc_attach; /* noop for this driver */ sc->hdlc_dev->xmit = linux_start; /* the REAL hard_start_xmit() */ # else /* GEN_HDLC */ /* no line protocol. */ /* Allocate space for the bare network device struct. */ net_dev = alloc_netdev(sizeof(softc_t), DEVICE_NAME"%d", setup_netdev); if (net_dev == NULL) { printk("%s: alloc_netdev() failed\n", DEVICE_NAME); return -ENOMEM; } /* device driver instance data, aka Soft Context or sc */ sc = net_dev->priv; # endif /* GEN_HDLC */ sc->net_dev = net_dev; /* NAME_UNIT macro needs this */ sc->pci_dev = pci_dev; /* READ/WRITE_PCI_CFG macros need this */ /* Cross-link pci_dev and net_dev. */ pci_set_drvdata(pci_dev, net_dev); /* pci_dev->driver_data = net_dev */ SET_NETDEV_DEV(net_dev, &pci_dev->dev); /* net_dev->class_dev.dev = &pci_dev->dev */ SET_MODULE_OWNER(net_dev); /* ??? NOOP in linux-2.6.3. ??? */ /* Sets cfcs.io and cfcs.mem; sets pci_dev->irq based on cfit.int */ if ((error = pci_enable_device(pci_dev))) { printk("%s: pci_enable_device() failed; error %d\n", DEVICE_NAME, error); linux_remove(pci_dev); return error; } net_dev->irq = pci_dev->irq; /* linux_open/stop need this */ /* Allocate PCI memory and IO resources to access the Tulip chip CSRs. */ if ((error = pci_request_regions(pci_dev, DEVICE_NAME))) { printk("%s: pci_request_regions() failed; error %d\n", DEVICE_NAME, error); linux_remove(pci_dev); return error; } net_dev->base_addr = pci_resource_start(pci_dev, 0); net_dev->mem_start = pci_resource_start(pci_dev, 1); net_dev->mem_end = pci_resource_end(pci_dev, 1); sc->csr_iobase = net_dev->base_addr; # if (IOREF_CSR == 0) sc->csr_membase = ioremap_nocache(net_dev->mem_start, TLP_CSR_SIZE); if (sc->csr_membase == NULL) { printk("%s: ioremap_nocache() failed\n", DEVICE_NAME); linux_remove(pci_dev); return -EFAULT; } # endif /* Sets cfcs.master, enabling PCI DMA; checks latency timer value. */ pci_set_master(pci_dev); /* Later, attach_card() does this too. */ /* Initialize the top-half and bottom-half locks. */ /* Top_lock must be initialized before net_dev is registered. */ init_MUTEX(&sc->top_lock); spin_lock_init(&sc->bottom_lock); # if GEN_HDLC if ((error = register_hdlc_device(net_dev))) { printk("%s: register_hdlc_device() failed; error %d\n", DEVICE_NAME, error); linux_remove(pci_dev); return error; } # else if ((error = register_netdev(net_dev))) { printk("%s: register_netdev() failed; error %d\n", DEVICE_NAME, error); linux_remove(pci_dev); return error; } # endif /* The NAME_UNIT macro now works. Use DEVICE_NAME before this. */ sc->flags |= FLAG_NETDEV; /* What kind of card are we driving? */ switch (READ_PCI_CFG(sc, TLP_CSID)) { case TLP_CSID_HSSI: case TLP_CSID_HSSIc: sc->dev_desc = HSSI_DESC; sc->card = &hssi_card; break; case TLP_CSID_T3: sc->dev_desc = T3_DESC; sc->card = &t3_card; break; case TLP_CSID_SSI: sc->dev_desc = SSI_DESC; sc->card = &ssi_card; break; case TLP_CSID_T1E1: sc->dev_desc = T1E1_DESC; sc->card = &t1_card; break; default: /* shouldn't happen! */ linux_remove(pci_dev); return -ENXIO; } /* Announce the hardware on the console. */ printk("%s: <%s> io 0x%04lx/9 mem 0x%08lx/25 rom 0x%08lx/14 irq %d pci %s\n", NAME_UNIT, sc->dev_desc, pci_resource_start(pci_dev, 0), pci_resource_start(pci_dev, 1), pci_resource_start(pci_dev, 6), pci_dev->irq, pci_name(pci_dev)); return 0; } /* This pci driver knows how to drive these devices: */ static __initdata struct pci_device_id pci_device_id_tbl[] = { /* Looking for a DEC 21140A chip on any Lan Media Corp card. */ { 0x1011, 0x0009, 0x1376, PCI_ANY_ID, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0 } }; MODULE_DEVICE_TABLE(pci, pci_device_id_tbl); static struct pci_driver pci_driver = { .name = DEVICE_NAME, .id_table = pci_device_id_tbl, .probe = linux_probe, .remove = __devexit_p(linux_remove), /* This driver does not suspend and resume. */ }; /* This ultimately calls our pci_driver.probe() method. */ static int __init linux_modload(void) { return pci_module_init(&pci_driver); } module_init(linux_modload); /* This ultimately calls our pci_driver.remove() method. */ static void __exit linux_modunload(void) { pci_unregister_driver(&pci_driver); } module_exit(linux_modunload); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("Device driver for SBE/LMC Wide-Area Network cards"); MODULE_AUTHOR("David Boggs "); #endif /* __linux__ */ Index: head/sys/dev/sk/if_sk.c =================================================================== --- head/sys/dev/sk/if_sk.c (revision 170034) +++ head/sys/dev/sk/if_sk.c (revision 170035) @@ -1,3974 +1,3974 @@ /* $OpenBSD: if_sk.c,v 2.33 2003/08/12 05:23:06 nate Exp $ */ /*- * Copyright (c) 1997, 1998, 1999, 2000 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2003 Nathan L. Binkert * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); /* * SysKonnect SK-NET gigabit ethernet driver for FreeBSD. Supports * the SK-984x series adapters, both single port and dual port. * References: * The XaQti XMAC II datasheet, * http://www.freebsd.org/~wpaul/SysKonnect/xmacii_datasheet_rev_c_9-29.pdf * The SysKonnect GEnesis manual, http://www.syskonnect.com * - * Note: XaQti has been aquired by Vitesse, and Vitesse does not have the + * Note: XaQti has been acquired by Vitesse, and Vitesse does not have the * XMAC II datasheet online. I have put my copy at people.freebsd.org as a * convenience to others until Vitesse corrects this problem: * * http://people.freebsd.org/~wpaul/SysKonnect/xmacii_datasheet_rev_c_9-29.pdf * * Written by Bill Paul * Department of Electrical Engineering * Columbia University, New York City */ /* * The SysKonnect gigabit ethernet adapters consist of two main * components: the SysKonnect GEnesis controller chip and the XaQti Corp. * XMAC II gigabit ethernet MAC. The XMAC provides all of the MAC * components and a PHY while the GEnesis controller provides a PCI * interface with DMA support. Each card may have between 512K and * 2MB of SRAM on board depending on the configuration. * * The SysKonnect GEnesis controller can have either one or two XMAC * chips connected to it, allowing single or dual port NIC configurations. * SysKonnect has the distinction of being the only vendor on the market * with a dual port gigabit ethernet NIC. The GEnesis provides dual FIFOs, * dual DMA queues, packet/MAC/transmit arbiters and direct access to the * XMAC registers. This driver takes advantage of these features to allow * both XMACs to operate as independent interfaces. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if 0 #define SK_USEIOSPACE #endif #include #include #include MODULE_DEPEND(sk, pci, 1, 1, 1); MODULE_DEPEND(sk, ether, 1, 1, 1); MODULE_DEPEND(sk, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" #ifndef lint static const char rcsid[] = "$FreeBSD$"; #endif static struct sk_type sk_devs[] = { { VENDORID_SK, DEVICEID_SK_V1, "SysKonnect Gigabit Ethernet (V1.0)" }, { VENDORID_SK, DEVICEID_SK_V2, "SysKonnect Gigabit Ethernet (V2.0)" }, { VENDORID_MARVELL, DEVICEID_SK_V2, "Marvell Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_BELKIN_5005, "Belkin F5D5005 Gigabit Ethernet" }, { VENDORID_3COM, DEVICEID_3COM_3C940, "3Com 3C940 Gigabit Ethernet" }, { VENDORID_LINKSYS, DEVICEID_LINKSYS_EG1032, "Linksys EG1032 Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE530T_A1, "D-Link DGE-530T Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE530T_B1, "D-Link DGE-530T Gigabit Ethernet" }, { 0, 0, NULL } }; static int skc_probe(device_t); static int skc_attach(device_t); static int skc_detach(device_t); static void skc_shutdown(device_t); static int skc_suspend(device_t); static int skc_resume(device_t); static int sk_detach(device_t); static int sk_probe(device_t); static int sk_attach(device_t); static void sk_tick(void *); static void sk_yukon_tick(void *); static void sk_intr(void *); static void sk_intr_xmac(struct sk_if_softc *); static void sk_intr_bcom(struct sk_if_softc *); static void sk_intr_yukon(struct sk_if_softc *); static __inline void sk_rxcksum(struct ifnet *, struct mbuf *, u_int32_t); static __inline int sk_rxvalid(struct sk_softc *, u_int32_t, u_int32_t); static void sk_rxeof(struct sk_if_softc *); static void sk_jumbo_rxeof(struct sk_if_softc *); static void sk_txeof(struct sk_if_softc *); static void sk_txcksum(struct ifnet *, struct mbuf *, struct sk_tx_desc *); static int sk_encap(struct sk_if_softc *, struct mbuf **); static void sk_start(struct ifnet *); static void sk_start_locked(struct ifnet *); static int sk_ioctl(struct ifnet *, u_long, caddr_t); static void sk_init(void *); static void sk_init_locked(struct sk_if_softc *); static void sk_init_xmac(struct sk_if_softc *); static void sk_init_yukon(struct sk_if_softc *); static void sk_stop(struct sk_if_softc *); static void sk_watchdog(void *); static int sk_ifmedia_upd(struct ifnet *); static void sk_ifmedia_sts(struct ifnet *, struct ifmediareq *); static void sk_reset(struct sk_softc *); static __inline void sk_discard_rxbuf(struct sk_if_softc *, int); static __inline void sk_discard_jumbo_rxbuf(struct sk_if_softc *, int); static int sk_newbuf(struct sk_if_softc *, int); static int sk_jumbo_newbuf(struct sk_if_softc *, int); static void sk_dmamap_cb(void *, bus_dma_segment_t *, int, int); static int sk_dma_alloc(struct sk_if_softc *); static void sk_dma_free(struct sk_if_softc *); static void *sk_jalloc(struct sk_if_softc *); static void sk_jfree(void *, void *); static int sk_init_rx_ring(struct sk_if_softc *); static int sk_init_jumbo_rx_ring(struct sk_if_softc *); static void sk_init_tx_ring(struct sk_if_softc *); static u_int32_t sk_win_read_4(struct sk_softc *, int); static u_int16_t sk_win_read_2(struct sk_softc *, int); static u_int8_t sk_win_read_1(struct sk_softc *, int); static void sk_win_write_4(struct sk_softc *, int, u_int32_t); static void sk_win_write_2(struct sk_softc *, int, u_int32_t); static void sk_win_write_1(struct sk_softc *, int, u_int32_t); static int sk_miibus_readreg(device_t, int, int); static int sk_miibus_writereg(device_t, int, int, int); static void sk_miibus_statchg(device_t); static int sk_xmac_miibus_readreg(struct sk_if_softc *, int, int); static int sk_xmac_miibus_writereg(struct sk_if_softc *, int, int, int); static void sk_xmac_miibus_statchg(struct sk_if_softc *); static int sk_marv_miibus_readreg(struct sk_if_softc *, int, int); static int sk_marv_miibus_writereg(struct sk_if_softc *, int, int, int); static void sk_marv_miibus_statchg(struct sk_if_softc *); static uint32_t sk_xmchash(const uint8_t *); static uint32_t sk_gmchash(const uint8_t *); static void sk_setfilt(struct sk_if_softc *, u_int16_t *, int); static void sk_setmulti(struct sk_if_softc *); static void sk_setpromisc(struct sk_if_softc *); static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high); static int sysctl_hw_sk_int_mod(SYSCTL_HANDLER_ARGS); /* * It seems that SK-NET GENESIS supports very simple checksum offload * capability for Tx and I believe it can generate 0 checksum value for * UDP packets in Tx as the hardware can't differenciate UDP packets from * TCP packets. 0 chcecksum value for UDP packet is an invalid one as it * means sender didn't perforam checksum computation. For the safety I * disabled UDP checksum offload capability at the moment. Alternatively * we can intrduce a LINK0/LINK1 flag as hme(4) did in its Tx checksum * offload routine. */ #define SK_CSUM_FEATURES (CSUM_TCP) /* * Note that we have newbus methods for both the GEnesis controller * itself and the XMAC(s). The XMACs are children of the GEnesis, and * the miibus code is a child of the XMACs. We need to do it this way * so that the miibus drivers can access the PHY registers on the * right PHY. It's not quite what I had in mind, but it's the only * design that achieves the desired effect. */ static device_method_t skc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, skc_probe), DEVMETHOD(device_attach, skc_attach), DEVMETHOD(device_detach, skc_detach), DEVMETHOD(device_suspend, skc_suspend), DEVMETHOD(device_resume, skc_resume), DEVMETHOD(device_shutdown, skc_shutdown), /* bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), { 0, 0 } }; static driver_t skc_driver = { "skc", skc_methods, sizeof(struct sk_softc) }; static devclass_t skc_devclass; static device_method_t sk_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sk_probe), DEVMETHOD(device_attach, sk_attach), DEVMETHOD(device_detach, sk_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), /* bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), /* MII interface */ DEVMETHOD(miibus_readreg, sk_miibus_readreg), DEVMETHOD(miibus_writereg, sk_miibus_writereg), DEVMETHOD(miibus_statchg, sk_miibus_statchg), { 0, 0 } }; static driver_t sk_driver = { "sk", sk_methods, sizeof(struct sk_if_softc) }; static devclass_t sk_devclass; DRIVER_MODULE(skc, pci, skc_driver, skc_devclass, 0, 0); DRIVER_MODULE(sk, skc, sk_driver, sk_devclass, 0, 0); DRIVER_MODULE(miibus, sk, miibus_driver, miibus_devclass, 0, 0); static struct resource_spec sk_res_spec_io[] = { { SYS_RES_IOPORT, PCIR_BAR(1), RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE }, { -1, 0, 0 } }; static struct resource_spec sk_res_spec_mem[] = { { SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE }, { -1, 0, 0 } }; #define SK_SETBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, CSR_READ_4(sc, reg) | x) #define SK_CLRBIT(sc, reg, x) \ CSR_WRITE_4(sc, reg, CSR_READ_4(sc, reg) & ~x) #define SK_WIN_SETBIT_4(sc, reg, x) \ sk_win_write_4(sc, reg, sk_win_read_4(sc, reg) | x) #define SK_WIN_CLRBIT_4(sc, reg, x) \ sk_win_write_4(sc, reg, sk_win_read_4(sc, reg) & ~x) #define SK_WIN_SETBIT_2(sc, reg, x) \ sk_win_write_2(sc, reg, sk_win_read_2(sc, reg) | x) #define SK_WIN_CLRBIT_2(sc, reg, x) \ sk_win_write_2(sc, reg, sk_win_read_2(sc, reg) & ~x) static u_int32_t sk_win_read_4(sc, reg) struct sk_softc *sc; int reg; { #ifdef SK_USEIOSPACE CSR_WRITE_4(sc, SK_RAP, SK_WIN(reg)); return(CSR_READ_4(sc, SK_WIN_BASE + SK_REG(reg))); #else return(CSR_READ_4(sc, reg)); #endif } static u_int16_t sk_win_read_2(sc, reg) struct sk_softc *sc; int reg; { #ifdef SK_USEIOSPACE CSR_WRITE_4(sc, SK_RAP, SK_WIN(reg)); return(CSR_READ_2(sc, SK_WIN_BASE + SK_REG(reg))); #else return(CSR_READ_2(sc, reg)); #endif } static u_int8_t sk_win_read_1(sc, reg) struct sk_softc *sc; int reg; { #ifdef SK_USEIOSPACE CSR_WRITE_4(sc, SK_RAP, SK_WIN(reg)); return(CSR_READ_1(sc, SK_WIN_BASE + SK_REG(reg))); #else return(CSR_READ_1(sc, reg)); #endif } static void sk_win_write_4(sc, reg, val) struct sk_softc *sc; int reg; u_int32_t val; { #ifdef SK_USEIOSPACE CSR_WRITE_4(sc, SK_RAP, SK_WIN(reg)); CSR_WRITE_4(sc, SK_WIN_BASE + SK_REG(reg), val); #else CSR_WRITE_4(sc, reg, val); #endif return; } static void sk_win_write_2(sc, reg, val) struct sk_softc *sc; int reg; u_int32_t val; { #ifdef SK_USEIOSPACE CSR_WRITE_4(sc, SK_RAP, SK_WIN(reg)); CSR_WRITE_2(sc, SK_WIN_BASE + SK_REG(reg), val); #else CSR_WRITE_2(sc, reg, val); #endif return; } static void sk_win_write_1(sc, reg, val) struct sk_softc *sc; int reg; u_int32_t val; { #ifdef SK_USEIOSPACE CSR_WRITE_4(sc, SK_RAP, SK_WIN(reg)); CSR_WRITE_1(sc, SK_WIN_BASE + SK_REG(reg), val); #else CSR_WRITE_1(sc, reg, val); #endif return; } static int sk_miibus_readreg(dev, phy, reg) device_t dev; int phy, reg; { struct sk_if_softc *sc_if; int v; sc_if = device_get_softc(dev); SK_IF_MII_LOCK(sc_if); switch(sc_if->sk_softc->sk_type) { case SK_GENESIS: v = sk_xmac_miibus_readreg(sc_if, phy, reg); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: v = sk_marv_miibus_readreg(sc_if, phy, reg); break; default: v = 0; break; } SK_IF_MII_UNLOCK(sc_if); return (v); } static int sk_miibus_writereg(dev, phy, reg, val) device_t dev; int phy, reg, val; { struct sk_if_softc *sc_if; int v; sc_if = device_get_softc(dev); SK_IF_MII_LOCK(sc_if); switch(sc_if->sk_softc->sk_type) { case SK_GENESIS: v = sk_xmac_miibus_writereg(sc_if, phy, reg, val); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: v = sk_marv_miibus_writereg(sc_if, phy, reg, val); break; default: v = 0; break; } SK_IF_MII_UNLOCK(sc_if); return (v); } static void sk_miibus_statchg(dev) device_t dev; { struct sk_if_softc *sc_if; sc_if = device_get_softc(dev); SK_IF_MII_LOCK(sc_if); switch(sc_if->sk_softc->sk_type) { case SK_GENESIS: sk_xmac_miibus_statchg(sc_if); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: sk_marv_miibus_statchg(sc_if); break; } SK_IF_MII_UNLOCK(sc_if); return; } static int sk_xmac_miibus_readreg(sc_if, phy, reg) struct sk_if_softc *sc_if; int phy, reg; { int i; if (sc_if->sk_phytype == SK_PHYTYPE_XMAC && phy != 0) return(0); SK_XM_WRITE_2(sc_if, XM_PHY_ADDR, reg|(phy << 8)); SK_XM_READ_2(sc_if, XM_PHY_DATA); if (sc_if->sk_phytype != SK_PHYTYPE_XMAC) { for (i = 0; i < SK_TIMEOUT; i++) { DELAY(1); if (SK_XM_READ_2(sc_if, XM_MMUCMD) & XM_MMUCMD_PHYDATARDY) break; } if (i == SK_TIMEOUT) { if_printf(sc_if->sk_ifp, "phy failed to come ready\n"); return(0); } } DELAY(1); i = SK_XM_READ_2(sc_if, XM_PHY_DATA); return(i); } static int sk_xmac_miibus_writereg(sc_if, phy, reg, val) struct sk_if_softc *sc_if; int phy, reg, val; { int i; SK_XM_WRITE_2(sc_if, XM_PHY_ADDR, reg|(phy << 8)); for (i = 0; i < SK_TIMEOUT; i++) { if (!(SK_XM_READ_2(sc_if, XM_MMUCMD) & XM_MMUCMD_PHYBUSY)) break; } if (i == SK_TIMEOUT) { if_printf(sc_if->sk_ifp, "phy failed to come ready\n"); return (ETIMEDOUT); } SK_XM_WRITE_2(sc_if, XM_PHY_DATA, val); for (i = 0; i < SK_TIMEOUT; i++) { DELAY(1); if (!(SK_XM_READ_2(sc_if, XM_MMUCMD) & XM_MMUCMD_PHYBUSY)) break; } if (i == SK_TIMEOUT) if_printf(sc_if->sk_ifp, "phy write timed out\n"); return(0); } static void sk_xmac_miibus_statchg(sc_if) struct sk_if_softc *sc_if; { struct mii_data *mii; mii = device_get_softc(sc_if->sk_miibus); /* * If this is a GMII PHY, manually set the XMAC's * duplex mode accordingly. */ if (sc_if->sk_phytype != SK_PHYTYPE_XMAC) { if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) { SK_XM_SETBIT_2(sc_if, XM_MMUCMD, XM_MMUCMD_GMIIFDX); } else { SK_XM_CLRBIT_2(sc_if, XM_MMUCMD, XM_MMUCMD_GMIIFDX); } } } static int sk_marv_miibus_readreg(sc_if, phy, reg) struct sk_if_softc *sc_if; int phy, reg; { u_int16_t val; int i; if (phy != 0 || (sc_if->sk_phytype != SK_PHYTYPE_MARV_COPPER && sc_if->sk_phytype != SK_PHYTYPE_MARV_FIBER)) { return(0); } SK_YU_WRITE_2(sc_if, YUKON_SMICR, YU_SMICR_PHYAD(phy) | YU_SMICR_REGAD(reg) | YU_SMICR_OP_READ); for (i = 0; i < SK_TIMEOUT; i++) { DELAY(1); val = SK_YU_READ_2(sc_if, YUKON_SMICR); if (val & YU_SMICR_READ_VALID) break; } if (i == SK_TIMEOUT) { if_printf(sc_if->sk_ifp, "phy failed to come ready\n"); return(0); } val = SK_YU_READ_2(sc_if, YUKON_SMIDR); return(val); } static int sk_marv_miibus_writereg(sc_if, phy, reg, val) struct sk_if_softc *sc_if; int phy, reg, val; { int i; SK_YU_WRITE_2(sc_if, YUKON_SMIDR, val); SK_YU_WRITE_2(sc_if, YUKON_SMICR, YU_SMICR_PHYAD(phy) | YU_SMICR_REGAD(reg) | YU_SMICR_OP_WRITE); for (i = 0; i < SK_TIMEOUT; i++) { DELAY(1); if ((SK_YU_READ_2(sc_if, YUKON_SMICR) & YU_SMICR_BUSY) == 0) break; } if (i == SK_TIMEOUT) if_printf(sc_if->sk_ifp, "phy write timeout\n"); return(0); } static void sk_marv_miibus_statchg(sc_if) struct sk_if_softc *sc_if; { return; } #define HASH_BITS 6 static u_int32_t sk_xmchash(addr) const uint8_t *addr; { uint32_t crc; /* Compute CRC for the address value. */ crc = ether_crc32_le(addr, ETHER_ADDR_LEN); return (~crc & ((1 << HASH_BITS) - 1)); } /* gmchash is just a big endian crc */ static u_int32_t sk_gmchash(addr) const uint8_t *addr; { uint32_t crc; /* Compute CRC for the address value. */ crc = ether_crc32_be(addr, ETHER_ADDR_LEN); return (crc & ((1 << HASH_BITS) - 1)); } static void sk_setfilt(sc_if, addr, slot) struct sk_if_softc *sc_if; u_int16_t *addr; int slot; { int base; base = XM_RXFILT_ENTRY(slot); SK_XM_WRITE_2(sc_if, base, addr[0]); SK_XM_WRITE_2(sc_if, base + 2, addr[1]); SK_XM_WRITE_2(sc_if, base + 4, addr[2]); return; } static void sk_setmulti(sc_if) struct sk_if_softc *sc_if; { struct sk_softc *sc = sc_if->sk_softc; struct ifnet *ifp = sc_if->sk_ifp; u_int32_t hashes[2] = { 0, 0 }; int h = 0, i; struct ifmultiaddr *ifma; u_int16_t dummy[] = { 0, 0, 0 }; u_int16_t maddr[(ETHER_ADDR_LEN+1)/2]; SK_IF_LOCK_ASSERT(sc_if); /* First, zot all the existing filters. */ switch(sc->sk_type) { case SK_GENESIS: for (i = 1; i < XM_RXFILT_MAX; i++) sk_setfilt(sc_if, dummy, i); SK_XM_WRITE_4(sc_if, XM_MAR0, 0); SK_XM_WRITE_4(sc_if, XM_MAR2, 0); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: SK_YU_WRITE_2(sc_if, YUKON_MCAH1, 0); SK_YU_WRITE_2(sc_if, YUKON_MCAH2, 0); SK_YU_WRITE_2(sc_if, YUKON_MCAH3, 0); SK_YU_WRITE_2(sc_if, YUKON_MCAH4, 0); break; } /* Now program new ones. */ if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { hashes[0] = 0xFFFFFFFF; hashes[1] = 0xFFFFFFFF; } else { i = 1; IF_ADDR_LOCK(ifp); TAILQ_FOREACH_REVERSE(ifma, &ifp->if_multiaddrs, ifmultihead, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; /* * Program the first XM_RXFILT_MAX multicast groups * into the perfect filter. For all others, * use the hash table. */ if (sc->sk_type == SK_GENESIS && i < XM_RXFILT_MAX) { bcopy(LLADDR( (struct sockaddr_dl *)ifma->ifma_addr), maddr, ETHER_ADDR_LEN); sk_setfilt(sc_if, maddr, i); i++; continue; } switch(sc->sk_type) { case SK_GENESIS: bcopy(LLADDR( (struct sockaddr_dl *)ifma->ifma_addr), maddr, ETHER_ADDR_LEN); h = sk_xmchash((const uint8_t *)maddr); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: bcopy(LLADDR( (struct sockaddr_dl *)ifma->ifma_addr), maddr, ETHER_ADDR_LEN); h = sk_gmchash((const uint8_t *)maddr); break; } if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); } IF_ADDR_UNLOCK(ifp); } switch(sc->sk_type) { case SK_GENESIS: SK_XM_SETBIT_4(sc_if, XM_MODE, XM_MODE_RX_USE_HASH| XM_MODE_RX_USE_PERFECT); SK_XM_WRITE_4(sc_if, XM_MAR0, hashes[0]); SK_XM_WRITE_4(sc_if, XM_MAR2, hashes[1]); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: SK_YU_WRITE_2(sc_if, YUKON_MCAH1, hashes[0] & 0xffff); SK_YU_WRITE_2(sc_if, YUKON_MCAH2, (hashes[0] >> 16) & 0xffff); SK_YU_WRITE_2(sc_if, YUKON_MCAH3, hashes[1] & 0xffff); SK_YU_WRITE_2(sc_if, YUKON_MCAH4, (hashes[1] >> 16) & 0xffff); break; } return; } static void sk_setpromisc(sc_if) struct sk_if_softc *sc_if; { struct sk_softc *sc = sc_if->sk_softc; struct ifnet *ifp = sc_if->sk_ifp; SK_IF_LOCK_ASSERT(sc_if); switch(sc->sk_type) { case SK_GENESIS: if (ifp->if_flags & IFF_PROMISC) { SK_XM_SETBIT_4(sc_if, XM_MODE, XM_MODE_RX_PROMISC); } else { SK_XM_CLRBIT_4(sc_if, XM_MODE, XM_MODE_RX_PROMISC); } break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: if (ifp->if_flags & IFF_PROMISC) { SK_YU_CLRBIT_2(sc_if, YUKON_RCR, YU_RCR_UFLEN | YU_RCR_MUFLEN); } else { SK_YU_SETBIT_2(sc_if, YUKON_RCR, YU_RCR_UFLEN | YU_RCR_MUFLEN); } break; } return; } static int sk_init_rx_ring(sc_if) struct sk_if_softc *sc_if; { struct sk_ring_data *rd; bus_addr_t addr; u_int32_t csum_start; int i; sc_if->sk_cdata.sk_rx_cons = 0; csum_start = (ETHER_HDR_LEN + sizeof(struct ip)) << 16 | ETHER_HDR_LEN; rd = &sc_if->sk_rdata; bzero(rd->sk_rx_ring, sizeof(struct sk_rx_desc) * SK_RX_RING_CNT); for (i = 0; i < SK_RX_RING_CNT; i++) { if (sk_newbuf(sc_if, i) != 0) return (ENOBUFS); if (i == (SK_RX_RING_CNT - 1)) addr = SK_RX_RING_ADDR(sc_if, 0); else addr = SK_RX_RING_ADDR(sc_if, i + 1); rd->sk_rx_ring[i].sk_next = htole32(SK_ADDR_LO(addr)); rd->sk_rx_ring[i].sk_csum_start = htole32(csum_start); } bus_dmamap_sync(sc_if->sk_cdata.sk_rx_ring_tag, sc_if->sk_cdata.sk_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return(0); } static int sk_init_jumbo_rx_ring(sc_if) struct sk_if_softc *sc_if; { struct sk_ring_data *rd; bus_addr_t addr; u_int32_t csum_start; int i; sc_if->sk_cdata.sk_jumbo_rx_cons = 0; csum_start = ((ETHER_HDR_LEN + sizeof(struct ip)) << 16) | ETHER_HDR_LEN; rd = &sc_if->sk_rdata; bzero(rd->sk_jumbo_rx_ring, sizeof(struct sk_rx_desc) * SK_JUMBO_RX_RING_CNT); for (i = 0; i < SK_JUMBO_RX_RING_CNT; i++) { if (sk_jumbo_newbuf(sc_if, i) != 0) return (ENOBUFS); if (i == (SK_JUMBO_RX_RING_CNT - 1)) addr = SK_JUMBO_RX_RING_ADDR(sc_if, 0); else addr = SK_JUMBO_RX_RING_ADDR(sc_if, i + 1); rd->sk_jumbo_rx_ring[i].sk_next = htole32(SK_ADDR_LO(addr)); rd->sk_jumbo_rx_ring[i].sk_csum_start = htole32(csum_start); } bus_dmamap_sync(sc_if->sk_cdata.sk_jumbo_rx_ring_tag, sc_if->sk_cdata.sk_jumbo_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } static void sk_init_tx_ring(sc_if) struct sk_if_softc *sc_if; { struct sk_ring_data *rd; struct sk_txdesc *txd; bus_addr_t addr; int i; STAILQ_INIT(&sc_if->sk_cdata.sk_txfreeq); STAILQ_INIT(&sc_if->sk_cdata.sk_txbusyq); sc_if->sk_cdata.sk_tx_prod = 0; sc_if->sk_cdata.sk_tx_cons = 0; sc_if->sk_cdata.sk_tx_cnt = 0; rd = &sc_if->sk_rdata; bzero(rd->sk_tx_ring, sizeof(struct sk_tx_desc) * SK_TX_RING_CNT); for (i = 0; i < SK_TX_RING_CNT; i++) { if (i == (SK_TX_RING_CNT - 1)) addr = SK_TX_RING_ADDR(sc_if, 0); else addr = SK_TX_RING_ADDR(sc_if, i + 1); rd->sk_tx_ring[i].sk_next = htole32(SK_ADDR_LO(addr)); txd = &sc_if->sk_cdata.sk_txdesc[i]; STAILQ_INSERT_TAIL(&sc_if->sk_cdata.sk_txfreeq, txd, tx_q); } bus_dmamap_sync(sc_if->sk_cdata.sk_tx_ring_tag, sc_if->sk_cdata.sk_tx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static __inline void sk_discard_rxbuf(sc_if, idx) struct sk_if_softc *sc_if; int idx; { struct sk_rx_desc *r; struct sk_rxdesc *rxd; struct mbuf *m; r = &sc_if->sk_rdata.sk_rx_ring[idx]; rxd = &sc_if->sk_cdata.sk_rxdesc[idx]; m = rxd->rx_m; r->sk_ctl = htole32(m->m_len | SK_RXSTAT | SK_OPCODE_CSUM); } static __inline void sk_discard_jumbo_rxbuf(sc_if, idx) struct sk_if_softc *sc_if; int idx; { struct sk_rx_desc *r; struct sk_rxdesc *rxd; struct mbuf *m; r = &sc_if->sk_rdata.sk_jumbo_rx_ring[idx]; rxd = &sc_if->sk_cdata.sk_jumbo_rxdesc[idx]; m = rxd->rx_m; r->sk_ctl = htole32(m->m_len | SK_RXSTAT | SK_OPCODE_CSUM); } static int sk_newbuf(sc_if, idx) struct sk_if_softc *sc_if; int idx; { struct sk_rx_desc *r; struct sk_rxdesc *rxd; struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; int nsegs; m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; m_adj(m, ETHER_ALIGN); if (bus_dmamap_load_mbuf_sg(sc_if->sk_cdata.sk_rx_tag, sc_if->sk_cdata.sk_rx_sparemap, m, segs, &nsegs, 0) != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); rxd = &sc_if->sk_cdata.sk_rxdesc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->sk_cdata.sk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->sk_cdata.sk_rx_tag, rxd->rx_dmamap); } map = rxd->rx_dmamap; rxd->rx_dmamap = sc_if->sk_cdata.sk_rx_sparemap; sc_if->sk_cdata.sk_rx_sparemap = map; bus_dmamap_sync(sc_if->sk_cdata.sk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); rxd->rx_m = m; r = &sc_if->sk_rdata.sk_rx_ring[idx]; r->sk_data_lo = htole32(SK_ADDR_LO(segs[0].ds_addr)); r->sk_data_hi = htole32(SK_ADDR_HI(segs[0].ds_addr)); r->sk_ctl = htole32(segs[0].ds_len | SK_RXSTAT | SK_OPCODE_CSUM); return (0); } static int sk_jumbo_newbuf(sc_if, idx) struct sk_if_softc *sc_if; int idx; { struct sk_rx_desc *r; struct sk_rxdesc *rxd; struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; int nsegs; void *buf; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); buf = sk_jalloc(sc_if); if (buf == NULL) { m_freem(m); return (ENOBUFS); } /* Attach the buffer to the mbuf */ MEXTADD(m, buf, SK_JLEN, sk_jfree, (struct sk_if_softc *)sc_if, 0, EXT_NET_DRV); if ((m->m_flags & M_EXT) == 0) { m_freem(m); return (ENOBUFS); } m->m_pkthdr.len = m->m_len = SK_JLEN; /* * Adjust alignment so packet payload begins on a * longword boundary. Mandatory for Alpha, useful on * x86 too. */ m_adj(m, ETHER_ALIGN); if (bus_dmamap_load_mbuf_sg(sc_if->sk_cdata.sk_jumbo_rx_tag, sc_if->sk_cdata.sk_jumbo_rx_sparemap, m, segs, &nsegs, 0) != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); rxd = &sc_if->sk_cdata.sk_jumbo_rxdesc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->sk_cdata.sk_jumbo_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->sk_cdata.sk_jumbo_rx_tag, rxd->rx_dmamap); } map = rxd->rx_dmamap; rxd->rx_dmamap = sc_if->sk_cdata.sk_jumbo_rx_sparemap; sc_if->sk_cdata.sk_jumbo_rx_sparemap = map; bus_dmamap_sync(sc_if->sk_cdata.sk_jumbo_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); rxd->rx_m = m; r = &sc_if->sk_rdata.sk_jumbo_rx_ring[idx]; r->sk_data_lo = htole32(SK_ADDR_LO(segs[0].ds_addr)); r->sk_data_hi = htole32(SK_ADDR_HI(segs[0].ds_addr)); r->sk_ctl = htole32(segs[0].ds_len | SK_RXSTAT | SK_OPCODE_CSUM); return (0); } /* * Set media options. */ static int sk_ifmedia_upd(ifp) struct ifnet *ifp; { struct sk_if_softc *sc_if = ifp->if_softc; struct mii_data *mii; mii = device_get_softc(sc_if->sk_miibus); sk_init(sc_if); mii_mediachg(mii); return(0); } /* * Report current media status. */ static void sk_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct sk_if_softc *sc_if; struct mii_data *mii; sc_if = ifp->if_softc; mii = device_get_softc(sc_if->sk_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; return; } static int sk_ioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct sk_if_softc *sc_if = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int error, mask; struct mii_data *mii; error = 0; switch(command) { case SIOCSIFMTU: SK_IF_LOCK(sc_if); if (ifr->ifr_mtu > SK_JUMBO_MTU) error = EINVAL; else { ifp->if_mtu = ifr->ifr_mtu; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sk_init_locked(sc_if); } SK_IF_UNLOCK(sc_if); break; case SIOCSIFFLAGS: SK_IF_LOCK(sc_if); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ sc_if->sk_if_flags) & IFF_PROMISC) { sk_setpromisc(sc_if); sk_setmulti(sc_if); } } else sk_init_locked(sc_if); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) sk_stop(sc_if); } sc_if->sk_if_flags = ifp->if_flags; SK_IF_UNLOCK(sc_if); break; case SIOCADDMULTI: case SIOCDELMULTI: SK_IF_LOCK(sc_if); if (ifp->if_drv_flags & IFF_DRV_RUNNING) sk_setmulti(sc_if); SK_IF_UNLOCK(sc_if); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc_if->sk_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: SK_IF_LOCK(sc_if); if (sc_if->sk_softc->sk_type == SK_GENESIS) { SK_IF_UNLOCK(sc_if); break; } mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_HWCSUM) { ifp->if_capenable ^= IFCAP_HWCSUM; if (IFCAP_HWCSUM & ifp->if_capenable && IFCAP_HWCSUM & ifp->if_capabilities) ifp->if_hwassist = SK_CSUM_FEATURES; else ifp->if_hwassist = 0; } SK_IF_UNLOCK(sc_if); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } /* * Probe for a SysKonnect GEnesis chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int skc_probe(dev) device_t dev; { struct sk_type *t = sk_devs; while(t->sk_name != NULL) { if ((pci_get_vendor(dev) == t->sk_vid) && (pci_get_device(dev) == t->sk_did)) { /* * Only attach to rev. 2 of the Linksys EG1032 adapter. * Rev. 3 is supported by re(4). */ if ((t->sk_vid == VENDORID_LINKSYS) && (t->sk_did == DEVICEID_LINKSYS_EG1032) && (pci_get_subdevice(dev) != SUBDEVICEID_LINKSYS_EG1032_REV2)) { t++; continue; } device_set_desc(dev, t->sk_name); return (BUS_PROBE_DEFAULT); } t++; } return(ENXIO); } /* * Force the GEnesis into reset, then bring it out of reset. */ static void sk_reset(sc) struct sk_softc *sc; { CSR_WRITE_2(sc, SK_CSR, SK_CSR_SW_RESET); CSR_WRITE_2(sc, SK_CSR, SK_CSR_MASTER_RESET); if (SK_YUKON_FAMILY(sc->sk_type)) CSR_WRITE_2(sc, SK_LINK_CTRL, SK_LINK_RESET_SET); DELAY(1000); CSR_WRITE_2(sc, SK_CSR, SK_CSR_SW_UNRESET); DELAY(2); CSR_WRITE_2(sc, SK_CSR, SK_CSR_MASTER_UNRESET); if (SK_YUKON_FAMILY(sc->sk_type)) CSR_WRITE_2(sc, SK_LINK_CTRL, SK_LINK_RESET_CLEAR); if (sc->sk_type == SK_GENESIS) { /* Configure packet arbiter */ sk_win_write_2(sc, SK_PKTARB_CTL, SK_PKTARBCTL_UNRESET); sk_win_write_2(sc, SK_RXPA1_TINIT, SK_PKTARB_TIMEOUT); sk_win_write_2(sc, SK_TXPA1_TINIT, SK_PKTARB_TIMEOUT); sk_win_write_2(sc, SK_RXPA2_TINIT, SK_PKTARB_TIMEOUT); sk_win_write_2(sc, SK_TXPA2_TINIT, SK_PKTARB_TIMEOUT); } /* Enable RAM interface */ sk_win_write_4(sc, SK_RAMCTL, SK_RAMCTL_UNRESET); /* * Configure interrupt moderation. The moderation timer * defers interrupts specified in the interrupt moderation * timer mask based on the timeout specified in the interrupt * moderation timer init register. Each bit in the timer * register represents one tick, so to specify a timeout in * microseconds, we have to multiply by the correct number of * ticks-per-microsecond. */ switch (sc->sk_type) { case SK_GENESIS: sc->sk_int_ticks = SK_IMTIMER_TICKS_GENESIS; break; default: sc->sk_int_ticks = SK_IMTIMER_TICKS_YUKON; break; } if (bootverbose) device_printf(sc->sk_dev, "interrupt moderation is %d us\n", sc->sk_int_mod); sk_win_write_4(sc, SK_IMTIMERINIT, SK_IM_USECS(sc->sk_int_mod, sc->sk_int_ticks)); sk_win_write_4(sc, SK_IMMR, SK_ISR_TX1_S_EOF|SK_ISR_TX2_S_EOF| SK_ISR_RX1_EOF|SK_ISR_RX2_EOF); sk_win_write_1(sc, SK_IMTIMERCTL, SK_IMCTL_START); return; } static int sk_probe(dev) device_t dev; { struct sk_softc *sc; sc = device_get_softc(device_get_parent(dev)); /* * Not much to do here. We always know there will be * at least one XMAC present, and if there are two, * skc_attach() will create a second device instance * for us. */ switch (sc->sk_type) { case SK_GENESIS: device_set_desc(dev, "XaQti Corp. XMAC II"); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: device_set_desc(dev, "Marvell Semiconductor, Inc. Yukon"); break; } return (BUS_PROBE_DEFAULT); } /* * Each XMAC chip is attached as a separate logical IP interface. * Single port cards will have only one logical interface of course. */ static int sk_attach(dev) device_t dev; { struct sk_softc *sc; struct sk_if_softc *sc_if; struct ifnet *ifp; int i, port, error; u_char eaddr[6]; if (dev == NULL) return(EINVAL); error = 0; sc_if = device_get_softc(dev); sc = device_get_softc(device_get_parent(dev)); port = *(int *)device_get_ivars(dev); sc_if->sk_if_dev = dev; sc_if->sk_port = port; sc_if->sk_softc = sc; sc->sk_if[port] = sc_if; if (port == SK_PORT_A) sc_if->sk_tx_bmu = SK_BMU_TXS_CSR0; if (port == SK_PORT_B) sc_if->sk_tx_bmu = SK_BMU_TXS_CSR1; callout_init_mtx(&sc_if->sk_tick_ch, &sc_if->sk_softc->sk_mtx, 0); callout_init_mtx(&sc_if->sk_watchdog_ch, &sc_if->sk_softc->sk_mtx, 0); if (sk_dma_alloc(sc_if) != 0) { error = ENOMEM; goto fail; } ifp = sc_if->sk_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(sc_if->sk_if_dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc_if; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; /* * SK_GENESIS has a bug in checksum offload - From linux. */ if (sc_if->sk_softc->sk_type != SK_GENESIS) { ifp->if_capabilities = IFCAP_HWCSUM; ifp->if_hwassist = SK_CSUM_FEATURES; } else { ifp->if_capabilities = 0; ifp->if_hwassist = 0; } ifp->if_capenable = ifp->if_capabilities; ifp->if_ioctl = sk_ioctl; ifp->if_start = sk_start; ifp->if_timer = 0; ifp->if_watchdog = NULL; ifp->if_init = sk_init; IFQ_SET_MAXLEN(&ifp->if_snd, SK_TX_RING_CNT - 1); ifp->if_snd.ifq_drv_maxlen = SK_TX_RING_CNT - 1; IFQ_SET_READY(&ifp->if_snd); /* * Get station address for this interface. Note that * dual port cards actually come with three station * addresses: one for each port, plus an extra. The * extra one is used by the SysKonnect driver software * as a 'virtual' station address for when both ports * are operating in failover mode. Currently we don't * use this extra address. */ SK_IF_LOCK(sc_if); for (i = 0; i < ETHER_ADDR_LEN; i++) eaddr[i] = sk_win_read_1(sc, SK_MAC0_0 + (port * 8) + i); /* * Set up RAM buffer addresses. The NIC will have a certain * amount of SRAM on it, somewhere between 512K and 2MB. We * need to divide this up a) between the transmitter and * receiver and b) between the two XMACs, if this is a * dual port NIC. Our algotithm is to divide up the memory * evenly so that everyone gets a fair share. * * Just to be contrary, Yukon2 appears to have separate memory * for each MAC. */ if (sk_win_read_1(sc, SK_CONFIG) & SK_CONFIG_SINGLEMAC) { u_int32_t chunk, val; chunk = sc->sk_ramsize / 2; val = sc->sk_rboff / sizeof(u_int64_t); sc_if->sk_rx_ramstart = val; val += (chunk / sizeof(u_int64_t)); sc_if->sk_rx_ramend = val - 1; sc_if->sk_tx_ramstart = val; val += (chunk / sizeof(u_int64_t)); sc_if->sk_tx_ramend = val - 1; } else { u_int32_t chunk, val; chunk = sc->sk_ramsize / 4; val = (sc->sk_rboff + (chunk * 2 * sc_if->sk_port)) / sizeof(u_int64_t); sc_if->sk_rx_ramstart = val; val += (chunk / sizeof(u_int64_t)); sc_if->sk_rx_ramend = val - 1; sc_if->sk_tx_ramstart = val; val += (chunk / sizeof(u_int64_t)); sc_if->sk_tx_ramend = val - 1; } /* Read and save PHY type and set PHY address */ sc_if->sk_phytype = sk_win_read_1(sc, SK_EPROM1) & 0xF; if (!SK_YUKON_FAMILY(sc->sk_type)) { switch(sc_if->sk_phytype) { case SK_PHYTYPE_XMAC: sc_if->sk_phyaddr = SK_PHYADDR_XMAC; break; case SK_PHYTYPE_BCOM: sc_if->sk_phyaddr = SK_PHYADDR_BCOM; break; default: device_printf(sc->sk_dev, "unsupported PHY type: %d\n", sc_if->sk_phytype); error = ENODEV; SK_IF_UNLOCK(sc_if); goto fail; } } else { if (sc_if->sk_phytype < SK_PHYTYPE_MARV_COPPER && sc->sk_pmd != 'S') { /* not initialized, punt */ sc_if->sk_phytype = SK_PHYTYPE_MARV_COPPER; sc->sk_coppertype = 1; } sc_if->sk_phyaddr = SK_PHYADDR_MARV; if (!(sc->sk_coppertype)) sc_if->sk_phytype = SK_PHYTYPE_MARV_FIBER; } /* * Call MI attach routine. Can't hold locks when calling into ether_*. */ SK_IF_UNLOCK(sc_if); ether_ifattach(ifp, eaddr); SK_IF_LOCK(sc_if); /* * The hardware should be ready for VLAN_MTU by default: * XMAC II has 0x8100 in VLAN Tag Level 1 register initially; * YU_SMR_MFL_VLAN is set by this driver in Yukon. * */ ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_MTU; /* * Tell the upper layer(s) we support long frames. * Must appear after the call to ether_ifattach() because * ether_ifattach() sets ifi_hdrlen to the default value. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); /* * Do miibus setup. */ switch (sc->sk_type) { case SK_GENESIS: sk_init_xmac(sc_if); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: sk_init_yukon(sc_if); break; } SK_IF_UNLOCK(sc_if); if (mii_phy_probe(dev, &sc_if->sk_miibus, sk_ifmedia_upd, sk_ifmedia_sts)) { device_printf(sc_if->sk_if_dev, "no PHY found!\n"); ether_ifdetach(ifp); error = ENXIO; goto fail; } fail: if (error) { /* Access should be ok even though lock has been dropped */ sc->sk_if[port] = NULL; sk_detach(dev); } return(error); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int skc_attach(dev) device_t dev; { struct sk_softc *sc; int error = 0, *port; uint8_t skrs; const char *pname = NULL; char *revstr; sc = device_get_softc(dev); sc->sk_dev = dev; mtx_init(&sc->sk_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&sc->sk_mii_mtx, "sk_mii_mutex", NULL, MTX_DEF); /* * Map control/status registers. */ pci_enable_busmaster(dev); /* Allocate resources */ #ifdef SK_USEIOSPACE sc->sk_res_spec = sk_res_spec_io; #else sc->sk_res_spec = sk_res_spec_mem; #endif error = bus_alloc_resources(dev, sc->sk_res_spec, sc->sk_res); if (error) { if (sc->sk_res_spec == sk_res_spec_mem) sc->sk_res_spec = sk_res_spec_io; else sc->sk_res_spec = sk_res_spec_mem; error = bus_alloc_resources(dev, sc->sk_res_spec, sc->sk_res); if (error) { device_printf(dev, "couldn't allocate %s resources\n", sc->sk_res_spec == sk_res_spec_mem ? "memory" : "I/O"); goto fail; } } sc->sk_type = sk_win_read_1(sc, SK_CHIPVER); sc->sk_rev = (sk_win_read_1(sc, SK_CONFIG) >> 4) & 0xf; /* Bail out if chip is not recognized. */ if (sc->sk_type != SK_GENESIS && !SK_YUKON_FAMILY(sc->sk_type)) { device_printf(dev, "unknown device: chipver=%02x, rev=%x\n", sc->sk_type, sc->sk_rev); error = ENXIO; goto fail; } SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "int_mod", CTLTYPE_INT|CTLFLAG_RW, &sc->sk_int_mod, 0, sysctl_hw_sk_int_mod, "I", "SK interrupt moderation"); /* Pull in device tunables. */ sc->sk_int_mod = SK_IM_DEFAULT; error = resource_int_value(device_get_name(dev), device_get_unit(dev), "int_mod", &sc->sk_int_mod); if (error == 0) { if (sc->sk_int_mod < SK_IM_MIN || sc->sk_int_mod > SK_IM_MAX) { device_printf(dev, "int_mod value out of range; " "using default: %d\n", SK_IM_DEFAULT); sc->sk_int_mod = SK_IM_DEFAULT; } } /* Reset the adapter. */ sk_reset(sc); skrs = sk_win_read_1(sc, SK_EPROM0); if (sc->sk_type == SK_GENESIS) { /* Read and save RAM size and RAMbuffer offset */ switch(skrs) { case SK_RAMSIZE_512K_64: sc->sk_ramsize = 0x80000; sc->sk_rboff = SK_RBOFF_0; break; case SK_RAMSIZE_1024K_64: sc->sk_ramsize = 0x100000; sc->sk_rboff = SK_RBOFF_80000; break; case SK_RAMSIZE_1024K_128: sc->sk_ramsize = 0x100000; sc->sk_rboff = SK_RBOFF_0; break; case SK_RAMSIZE_2048K_128: sc->sk_ramsize = 0x200000; sc->sk_rboff = SK_RBOFF_0; break; default: device_printf(dev, "unknown ram size: %d\n", skrs); error = ENXIO; goto fail; } } else { /* SK_YUKON_FAMILY */ if (skrs == 0x00) sc->sk_ramsize = 0x20000; else sc->sk_ramsize = skrs * (1<<12); sc->sk_rboff = SK_RBOFF_0; } /* Read and save physical media type */ sc->sk_pmd = sk_win_read_1(sc, SK_PMDTYPE); if (sc->sk_pmd == 'T' || sc->sk_pmd == '1') sc->sk_coppertype = 1; else sc->sk_coppertype = 0; /* Determine whether to name it with VPD PN or just make it up. * Marvell Yukon VPD PN seems to freqently be bogus. */ switch (pci_get_device(dev)) { case DEVICEID_SK_V1: case DEVICEID_BELKIN_5005: case DEVICEID_3COM_3C940: case DEVICEID_LINKSYS_EG1032: case DEVICEID_DLINK_DGE530T_A1: case DEVICEID_DLINK_DGE530T_B1: /* Stay with VPD PN. */ (void) pci_get_vpd_ident(dev, &pname); break; case DEVICEID_SK_V2: /* YUKON VPD PN might bear no resemblance to reality. */ switch (sc->sk_type) { case SK_GENESIS: /* Stay with VPD PN. */ (void) pci_get_vpd_ident(dev, &pname); break; case SK_YUKON: pname = "Marvell Yukon Gigabit Ethernet"; break; case SK_YUKON_LITE: pname = "Marvell Yukon Lite Gigabit Ethernet"; break; case SK_YUKON_LP: pname = "Marvell Yukon LP Gigabit Ethernet"; break; default: pname = "Marvell Yukon (Unknown) Gigabit Ethernet"; break; } /* Yukon Lite Rev. A0 needs special test. */ if (sc->sk_type == SK_YUKON || sc->sk_type == SK_YUKON_LP) { u_int32_t far; u_int8_t testbyte; /* Save flash address register before testing. */ far = sk_win_read_4(sc, SK_EP_ADDR); sk_win_write_1(sc, SK_EP_ADDR+0x03, 0xff); testbyte = sk_win_read_1(sc, SK_EP_ADDR+0x03); if (testbyte != 0x00) { /* Yukon Lite Rev. A0 detected. */ sc->sk_type = SK_YUKON_LITE; sc->sk_rev = SK_YUKON_LITE_REV_A0; /* Restore flash address register. */ sk_win_write_4(sc, SK_EP_ADDR, far); } } break; default: device_printf(dev, "unknown device: vendor=%04x, device=%04x, " "chipver=%02x, rev=%x\n", pci_get_vendor(dev), pci_get_device(dev), sc->sk_type, sc->sk_rev); error = ENXIO; goto fail; } if (sc->sk_type == SK_YUKON_LITE) { switch (sc->sk_rev) { case SK_YUKON_LITE_REV_A0: revstr = "A0"; break; case SK_YUKON_LITE_REV_A1: revstr = "A1"; break; case SK_YUKON_LITE_REV_A3: revstr = "A3"; break; default: revstr = ""; break; } } else { revstr = ""; } /* Announce the product name and more VPD data if there. */ if (pname != NULL) device_printf(dev, "%s rev. %s(0x%x)\n", pname, revstr, sc->sk_rev); if (bootverbose) { device_printf(dev, "chip ver = 0x%02x\n", sc->sk_type); device_printf(dev, "chip rev = 0x%02x\n", sc->sk_rev); device_printf(dev, "SK_EPROM0 = 0x%02x\n", skrs); device_printf(dev, "SRAM size = 0x%06x\n", sc->sk_ramsize); } sc->sk_devs[SK_PORT_A] = device_add_child(dev, "sk", -1); if (sc->sk_devs[SK_PORT_A] == NULL) { device_printf(dev, "failed to add child for PORT_A\n"); error = ENXIO; goto fail; } port = malloc(sizeof(int), M_DEVBUF, M_NOWAIT); if (port == NULL) { device_printf(dev, "failed to allocate memory for " "ivars of PORT_A\n"); error = ENXIO; goto fail; } *port = SK_PORT_A; device_set_ivars(sc->sk_devs[SK_PORT_A], port); if (!(sk_win_read_1(sc, SK_CONFIG) & SK_CONFIG_SINGLEMAC)) { sc->sk_devs[SK_PORT_B] = device_add_child(dev, "sk", -1); if (sc->sk_devs[SK_PORT_B] == NULL) { device_printf(dev, "failed to add child for PORT_B\n"); error = ENXIO; goto fail; } port = malloc(sizeof(int), M_DEVBUF, M_NOWAIT); if (port == NULL) { device_printf(dev, "failed to allocate memory for " "ivars of PORT_B\n"); error = ENXIO; goto fail; } *port = SK_PORT_B; device_set_ivars(sc->sk_devs[SK_PORT_B], port); } /* Turn on the 'driver is loaded' LED. */ CSR_WRITE_2(sc, SK_LED, SK_LED_GREEN_ON); error = bus_generic_attach(dev); if (error) { device_printf(dev, "failed to attach port(s)\n"); goto fail; } /* Hook interrupt last to avoid having to lock softc */ error = bus_setup_intr(dev, sc->sk_res[1], INTR_TYPE_NET|INTR_MPSAFE, NULL, sk_intr, sc, &sc->sk_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); goto fail; } fail: if (error) skc_detach(dev); return(error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int sk_detach(dev) device_t dev; { struct sk_if_softc *sc_if; struct ifnet *ifp; sc_if = device_get_softc(dev); KASSERT(mtx_initialized(&sc_if->sk_softc->sk_mtx), ("sk mutex not initialized in sk_detach")); SK_IF_LOCK(sc_if); ifp = sc_if->sk_ifp; /* These should only be active if attach_xmac succeeded */ if (device_is_attached(dev)) { sk_stop(sc_if); /* Can't hold locks while calling detach */ SK_IF_UNLOCK(sc_if); callout_drain(&sc_if->sk_tick_ch); callout_drain(&sc_if->sk_watchdog_ch); ether_ifdetach(ifp); SK_IF_LOCK(sc_if); } if (ifp) if_free(ifp); /* * We're generally called from skc_detach() which is using * device_delete_child() to get to here. It's already trashed * miibus for us, so don't do it here or we'll panic. */ /* if (sc_if->sk_miibus != NULL) device_delete_child(dev, sc_if->sk_miibus); */ bus_generic_detach(dev); sk_dma_free(sc_if); SK_IF_UNLOCK(sc_if); return(0); } static int skc_detach(dev) device_t dev; { struct sk_softc *sc; sc = device_get_softc(dev); KASSERT(mtx_initialized(&sc->sk_mtx), ("sk mutex not initialized")); if (device_is_alive(dev)) { if (sc->sk_devs[SK_PORT_A] != NULL) { free(device_get_ivars(sc->sk_devs[SK_PORT_A]), M_DEVBUF); device_delete_child(dev, sc->sk_devs[SK_PORT_A]); } if (sc->sk_devs[SK_PORT_B] != NULL) { free(device_get_ivars(sc->sk_devs[SK_PORT_B]), M_DEVBUF); device_delete_child(dev, sc->sk_devs[SK_PORT_B]); } bus_generic_detach(dev); } if (sc->sk_intrhand) bus_teardown_intr(dev, sc->sk_res[1], sc->sk_intrhand); bus_release_resources(dev, sc->sk_res_spec, sc->sk_res); mtx_destroy(&sc->sk_mii_mtx); mtx_destroy(&sc->sk_mtx); return(0); } struct sk_dmamap_arg { bus_addr_t sk_busaddr; }; static void sk_dmamap_cb(arg, segs, nseg, error) void *arg; bus_dma_segment_t *segs; int nseg; int error; { struct sk_dmamap_arg *ctx; if (error != 0) return; ctx = arg; ctx->sk_busaddr = segs[0].ds_addr; } /* * Allocate jumbo buffer storage. The SysKonnect adapters support * "jumbograms" (9K frames), although SysKonnect doesn't currently * use them in their drivers. In order for us to use them, we need * large 9K receive buffers, however standard mbuf clusters are only * 2048 bytes in size. Consequently, we need to allocate and manage * our own jumbo buffer pool. Fortunately, this does not require an * excessive amount of additional code. */ static int sk_dma_alloc(sc_if) struct sk_if_softc *sc_if; { struct sk_dmamap_arg ctx; struct sk_txdesc *txd; struct sk_rxdesc *rxd; struct sk_rxdesc *jrxd; u_int8_t *ptr; struct sk_jpool_entry *entry; int error, i; mtx_init(&sc_if->sk_jlist_mtx, "sk_jlist_mtx", NULL, MTX_DEF); SLIST_INIT(&sc_if->sk_jfree_listhead); SLIST_INIT(&sc_if->sk_jinuse_listhead); /* create parent tag */ /* * XXX * This driver should use BUS_SPACE_MAXADDR for lowaddr argument * in bus_dma_tag_create(9) as the NIC would support DAC mode. * However bz@ reported that it does not work on amd64 with > 4GB * RAM. Until we have more clues of the breakage, disable DAC mode * by limiting DMA address to be in 32bit address space. */ error = bus_dma_tag_create( bus_get_dma_tag(sc_if->sk_if_dev),/* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->sk_cdata.sk_parent_tag); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to create parent DMA tag\n"); goto fail; } /* create tag for Tx ring */ error = bus_dma_tag_create(sc_if->sk_cdata.sk_parent_tag,/* parent */ SK_RING_ALIGN, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ SK_TX_RING_SZ, /* maxsize */ 1, /* nsegments */ SK_TX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->sk_cdata.sk_tx_ring_tag); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate Tx ring DMA tag\n"); goto fail; } /* create tag for Rx ring */ error = bus_dma_tag_create(sc_if->sk_cdata.sk_parent_tag,/* parent */ SK_RING_ALIGN, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ SK_RX_RING_SZ, /* maxsize */ 1, /* nsegments */ SK_RX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->sk_cdata.sk_rx_ring_tag); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate Rx ring DMA tag\n"); goto fail; } /* create tag for jumbo Rx ring */ error = bus_dma_tag_create(sc_if->sk_cdata.sk_parent_tag,/* parent */ SK_RING_ALIGN, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ SK_JUMBO_RX_RING_SZ, /* maxsize */ 1, /* nsegments */ SK_JUMBO_RX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->sk_cdata.sk_jumbo_rx_ring_tag); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate jumbo Rx ring DMA tag\n"); goto fail; } /* create tag for jumbo buffer blocks */ error = bus_dma_tag_create(sc_if->sk_cdata.sk_parent_tag,/* parent */ PAGE_SIZE, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ SK_JMEM, /* maxsize */ 1, /* nsegments */ SK_JMEM, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->sk_cdata.sk_jumbo_tag); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate jumbo Rx buffer block DMA tag\n"); goto fail; } /* create tag for Tx buffers */ error = bus_dma_tag_create(sc_if->sk_cdata.sk_parent_tag,/* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES * SK_MAXTXSEGS, /* maxsize */ SK_MAXTXSEGS, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->sk_cdata.sk_tx_tag); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate Tx DMA tag\n"); goto fail; } /* create tag for Rx buffers */ error = bus_dma_tag_create(sc_if->sk_cdata.sk_parent_tag,/* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->sk_cdata.sk_rx_tag); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate Rx DMA tag\n"); goto fail; } /* create tag for jumbo Rx buffers */ error = bus_dma_tag_create(sc_if->sk_cdata.sk_parent_tag,/* parent */ PAGE_SIZE, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES * SK_MAXRXSEGS, /* maxsize */ SK_MAXRXSEGS, /* nsegments */ SK_JLEN, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->sk_cdata.sk_jumbo_rx_tag); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate jumbo Rx DMA tag\n"); goto fail; } /* allocate DMA'able memory and load the DMA map for Tx ring */ error = bus_dmamem_alloc(sc_if->sk_cdata.sk_tx_ring_tag, (void **)&sc_if->sk_rdata.sk_tx_ring, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc_if->sk_cdata.sk_tx_ring_map); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate DMA'able memory for Tx ring\n"); goto fail; } ctx.sk_busaddr = 0; error = bus_dmamap_load(sc_if->sk_cdata.sk_tx_ring_tag, sc_if->sk_cdata.sk_tx_ring_map, sc_if->sk_rdata.sk_tx_ring, SK_TX_RING_SZ, sk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to load DMA'able memory for Tx ring\n"); goto fail; } sc_if->sk_rdata.sk_tx_ring_paddr = ctx.sk_busaddr; /* allocate DMA'able memory and load the DMA map for Rx ring */ error = bus_dmamem_alloc(sc_if->sk_cdata.sk_rx_ring_tag, (void **)&sc_if->sk_rdata.sk_rx_ring, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc_if->sk_cdata.sk_rx_ring_map); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate DMA'able memory for Rx ring\n"); goto fail; } ctx.sk_busaddr = 0; error = bus_dmamap_load(sc_if->sk_cdata.sk_rx_ring_tag, sc_if->sk_cdata.sk_rx_ring_map, sc_if->sk_rdata.sk_rx_ring, SK_RX_RING_SZ, sk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to load DMA'able memory for Rx ring\n"); goto fail; } sc_if->sk_rdata.sk_rx_ring_paddr = ctx.sk_busaddr; /* allocate DMA'able memory and load the DMA map for jumbo Rx ring */ error = bus_dmamem_alloc(sc_if->sk_cdata.sk_jumbo_rx_ring_tag, (void **)&sc_if->sk_rdata.sk_jumbo_rx_ring, BUS_DMA_NOWAIT|BUS_DMA_ZERO, &sc_if->sk_cdata.sk_jumbo_rx_ring_map); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate DMA'able memory for jumbo Rx ring\n"); goto fail; } ctx.sk_busaddr = 0; error = bus_dmamap_load(sc_if->sk_cdata.sk_jumbo_rx_ring_tag, sc_if->sk_cdata.sk_jumbo_rx_ring_map, sc_if->sk_rdata.sk_jumbo_rx_ring, SK_JUMBO_RX_RING_SZ, sk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to load DMA'able memory for jumbo Rx ring\n"); goto fail; } sc_if->sk_rdata.sk_jumbo_rx_ring_paddr = ctx.sk_busaddr; /* create DMA maps for Tx buffers */ for (i = 0; i < SK_TX_RING_CNT; i++) { txd = &sc_if->sk_cdata.sk_txdesc[i]; txd->tx_m = NULL; txd->tx_dmamap = 0; error = bus_dmamap_create(sc_if->sk_cdata.sk_tx_tag, 0, &txd->tx_dmamap); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to create Tx dmamap\n"); goto fail; } } /* create DMA maps for Rx buffers */ if ((error = bus_dmamap_create(sc_if->sk_cdata.sk_rx_tag, 0, &sc_if->sk_cdata.sk_rx_sparemap)) != 0) { device_printf(sc_if->sk_if_dev, "failed to create spare Rx dmamap\n"); goto fail; } for (i = 0; i < SK_RX_RING_CNT; i++) { rxd = &sc_if->sk_cdata.sk_rxdesc[i]; rxd->rx_m = NULL; rxd->rx_dmamap = 0; error = bus_dmamap_create(sc_if->sk_cdata.sk_rx_tag, 0, &rxd->rx_dmamap); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to create Rx dmamap\n"); goto fail; } } /* create DMA maps for jumbo Rx buffers */ if ((error = bus_dmamap_create(sc_if->sk_cdata.sk_jumbo_rx_tag, 0, &sc_if->sk_cdata.sk_jumbo_rx_sparemap)) != 0) { device_printf(sc_if->sk_if_dev, "failed to create spare jumbo Rx dmamap\n"); goto fail; } for (i = 0; i < SK_JUMBO_RX_RING_CNT; i++) { jrxd = &sc_if->sk_cdata.sk_jumbo_rxdesc[i]; jrxd->rx_m = NULL; jrxd->rx_dmamap = 0; error = bus_dmamap_create(sc_if->sk_cdata.sk_jumbo_rx_tag, 0, &jrxd->rx_dmamap); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to create jumbo Rx dmamap\n"); goto fail; } } /* allocate DMA'able memory and load the DMA map for jumbo buf */ error = bus_dmamem_alloc(sc_if->sk_cdata.sk_jumbo_tag, (void **)&sc_if->sk_rdata.sk_jumbo_buf, BUS_DMA_NOWAIT|BUS_DMA_ZERO, &sc_if->sk_cdata.sk_jumbo_map); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to allocate DMA'able memory for jumbo buf\n"); goto fail; } ctx.sk_busaddr = 0; error = bus_dmamap_load(sc_if->sk_cdata.sk_jumbo_tag, sc_if->sk_cdata.sk_jumbo_map, sc_if->sk_rdata.sk_jumbo_buf, SK_JMEM, sk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->sk_if_dev, "failed to load DMA'able memory for jumbobuf\n"); goto fail; } sc_if->sk_rdata.sk_jumbo_buf_paddr = ctx.sk_busaddr; /* * Now divide it up into 9K pieces and save the addresses * in an array. */ ptr = sc_if->sk_rdata.sk_jumbo_buf; for (i = 0; i < SK_JSLOTS; i++) { sc_if->sk_cdata.sk_jslots[i] = ptr; ptr += SK_JLEN; entry = malloc(sizeof(struct sk_jpool_entry), M_DEVBUF, M_NOWAIT); if (entry == NULL) { device_printf(sc_if->sk_if_dev, "no memory for jumbo buffers!\n"); error = ENOMEM; goto fail; } entry->slot = i; SLIST_INSERT_HEAD(&sc_if->sk_jfree_listhead, entry, jpool_entries); } fail: return (error); } static void sk_dma_free(sc_if) struct sk_if_softc *sc_if; { struct sk_txdesc *txd; struct sk_rxdesc *rxd; struct sk_rxdesc *jrxd; struct sk_jpool_entry *entry; int i; SK_JLIST_LOCK(sc_if); while ((entry = SLIST_FIRST(&sc_if->sk_jinuse_listhead))) { device_printf(sc_if->sk_if_dev, "asked to free buffer that is in use!\n"); SLIST_REMOVE_HEAD(&sc_if->sk_jinuse_listhead, jpool_entries); SLIST_INSERT_HEAD(&sc_if->sk_jfree_listhead, entry, jpool_entries); } while (!SLIST_EMPTY(&sc_if->sk_jfree_listhead)) { entry = SLIST_FIRST(&sc_if->sk_jfree_listhead); SLIST_REMOVE_HEAD(&sc_if->sk_jfree_listhead, jpool_entries); free(entry, M_DEVBUF); } SK_JLIST_UNLOCK(sc_if); /* destroy jumbo buffer block */ if (sc_if->sk_cdata.sk_jumbo_map) bus_dmamap_unload(sc_if->sk_cdata.sk_jumbo_tag, sc_if->sk_cdata.sk_jumbo_map); if (sc_if->sk_rdata.sk_jumbo_buf) { bus_dmamem_free(sc_if->sk_cdata.sk_jumbo_tag, sc_if->sk_rdata.sk_jumbo_buf, sc_if->sk_cdata.sk_jumbo_map); sc_if->sk_rdata.sk_jumbo_buf = NULL; sc_if->sk_cdata.sk_jumbo_map = 0; } /* Tx ring */ if (sc_if->sk_cdata.sk_tx_ring_tag) { if (sc_if->sk_cdata.sk_tx_ring_map) bus_dmamap_unload(sc_if->sk_cdata.sk_tx_ring_tag, sc_if->sk_cdata.sk_tx_ring_map); if (sc_if->sk_cdata.sk_tx_ring_map && sc_if->sk_rdata.sk_tx_ring) bus_dmamem_free(sc_if->sk_cdata.sk_tx_ring_tag, sc_if->sk_rdata.sk_tx_ring, sc_if->sk_cdata.sk_tx_ring_map); sc_if->sk_rdata.sk_tx_ring = NULL; sc_if->sk_cdata.sk_tx_ring_map = 0; bus_dma_tag_destroy(sc_if->sk_cdata.sk_tx_ring_tag); sc_if->sk_cdata.sk_tx_ring_tag = NULL; } /* Rx ring */ if (sc_if->sk_cdata.sk_rx_ring_tag) { if (sc_if->sk_cdata.sk_rx_ring_map) bus_dmamap_unload(sc_if->sk_cdata.sk_rx_ring_tag, sc_if->sk_cdata.sk_rx_ring_map); if (sc_if->sk_cdata.sk_rx_ring_map && sc_if->sk_rdata.sk_rx_ring) bus_dmamem_free(sc_if->sk_cdata.sk_rx_ring_tag, sc_if->sk_rdata.sk_rx_ring, sc_if->sk_cdata.sk_rx_ring_map); sc_if->sk_rdata.sk_rx_ring = NULL; sc_if->sk_cdata.sk_rx_ring_map = 0; bus_dma_tag_destroy(sc_if->sk_cdata.sk_rx_ring_tag); sc_if->sk_cdata.sk_rx_ring_tag = NULL; } /* jumbo Rx ring */ if (sc_if->sk_cdata.sk_jumbo_rx_ring_tag) { if (sc_if->sk_cdata.sk_jumbo_rx_ring_map) bus_dmamap_unload(sc_if->sk_cdata.sk_jumbo_rx_ring_tag, sc_if->sk_cdata.sk_jumbo_rx_ring_map); if (sc_if->sk_cdata.sk_jumbo_rx_ring_map && sc_if->sk_rdata.sk_jumbo_rx_ring) bus_dmamem_free(sc_if->sk_cdata.sk_jumbo_rx_ring_tag, sc_if->sk_rdata.sk_jumbo_rx_ring, sc_if->sk_cdata.sk_jumbo_rx_ring_map); sc_if->sk_rdata.sk_jumbo_rx_ring = NULL; sc_if->sk_cdata.sk_jumbo_rx_ring_map = 0; bus_dma_tag_destroy(sc_if->sk_cdata.sk_jumbo_rx_ring_tag); sc_if->sk_cdata.sk_jumbo_rx_ring_tag = NULL; } /* Tx buffers */ if (sc_if->sk_cdata.sk_tx_tag) { for (i = 0; i < SK_TX_RING_CNT; i++) { txd = &sc_if->sk_cdata.sk_txdesc[i]; if (txd->tx_dmamap) { bus_dmamap_destroy(sc_if->sk_cdata.sk_tx_tag, txd->tx_dmamap); txd->tx_dmamap = 0; } } bus_dma_tag_destroy(sc_if->sk_cdata.sk_tx_tag); sc_if->sk_cdata.sk_tx_tag = NULL; } /* Rx buffers */ if (sc_if->sk_cdata.sk_rx_tag) { for (i = 0; i < SK_RX_RING_CNT; i++) { rxd = &sc_if->sk_cdata.sk_rxdesc[i]; if (rxd->rx_dmamap) { bus_dmamap_destroy(sc_if->sk_cdata.sk_rx_tag, rxd->rx_dmamap); rxd->rx_dmamap = 0; } } if (sc_if->sk_cdata.sk_rx_sparemap) { bus_dmamap_destroy(sc_if->sk_cdata.sk_rx_tag, sc_if->sk_cdata.sk_rx_sparemap); sc_if->sk_cdata.sk_rx_sparemap = 0; } bus_dma_tag_destroy(sc_if->sk_cdata.sk_rx_tag); sc_if->sk_cdata.sk_rx_tag = NULL; } /* jumbo Rx buffers */ if (sc_if->sk_cdata.sk_jumbo_rx_tag) { for (i = 0; i < SK_JUMBO_RX_RING_CNT; i++) { jrxd = &sc_if->sk_cdata.sk_jumbo_rxdesc[i]; if (jrxd->rx_dmamap) { bus_dmamap_destroy( sc_if->sk_cdata.sk_jumbo_rx_tag, jrxd->rx_dmamap); jrxd->rx_dmamap = 0; } } if (sc_if->sk_cdata.sk_jumbo_rx_sparemap) { bus_dmamap_destroy(sc_if->sk_cdata.sk_jumbo_rx_tag, sc_if->sk_cdata.sk_jumbo_rx_sparemap); sc_if->sk_cdata.sk_jumbo_rx_sparemap = 0; } bus_dma_tag_destroy(sc_if->sk_cdata.sk_jumbo_rx_tag); sc_if->sk_cdata.sk_jumbo_rx_tag = NULL; } if (sc_if->sk_cdata.sk_parent_tag) { bus_dma_tag_destroy(sc_if->sk_cdata.sk_parent_tag); sc_if->sk_cdata.sk_parent_tag = NULL; } mtx_destroy(&sc_if->sk_jlist_mtx); } /* * Allocate a jumbo buffer. */ static void * sk_jalloc(sc_if) struct sk_if_softc *sc_if; { struct sk_jpool_entry *entry; SK_JLIST_LOCK(sc_if); entry = SLIST_FIRST(&sc_if->sk_jfree_listhead); if (entry == NULL) { SK_JLIST_UNLOCK(sc_if); return (NULL); } SLIST_REMOVE_HEAD(&sc_if->sk_jfree_listhead, jpool_entries); SLIST_INSERT_HEAD(&sc_if->sk_jinuse_listhead, entry, jpool_entries); SK_JLIST_UNLOCK(sc_if); return (sc_if->sk_cdata.sk_jslots[entry->slot]); } /* * Release a jumbo buffer. */ static void sk_jfree(buf, args) void *buf; void *args; { struct sk_if_softc *sc_if; struct sk_jpool_entry *entry; int i; /* Extract the softc struct pointer. */ sc_if = (struct sk_if_softc *)args; KASSERT(sc_if != NULL, ("%s: can't find softc pointer!", __func__)); SK_JLIST_LOCK(sc_if); /* calculate the slot this buffer belongs to */ i = ((vm_offset_t)buf - (vm_offset_t)sc_if->sk_rdata.sk_jumbo_buf) / SK_JLEN; KASSERT(i >= 0 && i < SK_JSLOTS, ("%s: asked to free buffer that we don't manage!", __func__)); entry = SLIST_FIRST(&sc_if->sk_jinuse_listhead); KASSERT(entry != NULL, ("%s: buffer not in use!", __func__)); entry->slot = i; SLIST_REMOVE_HEAD(&sc_if->sk_jinuse_listhead, jpool_entries); SLIST_INSERT_HEAD(&sc_if->sk_jfree_listhead, entry, jpool_entries); if (SLIST_EMPTY(&sc_if->sk_jinuse_listhead)) wakeup(sc_if); SK_JLIST_UNLOCK(sc_if); } static void sk_txcksum(ifp, m, f) struct ifnet *ifp; struct mbuf *m; struct sk_tx_desc *f; { struct ip *ip; u_int16_t offset; u_int8_t *p; offset = sizeof(struct ip) + ETHER_HDR_LEN; for(; m && m->m_len == 0; m = m->m_next) ; if (m == NULL || m->m_len < ETHER_HDR_LEN) { if_printf(ifp, "%s: m_len < ETHER_HDR_LEN\n", __func__); /* checksum may be corrupted */ goto sendit; } if (m->m_len < ETHER_HDR_LEN + sizeof(u_int32_t)) { if (m->m_len != ETHER_HDR_LEN) { if_printf(ifp, "%s: m_len != ETHER_HDR_LEN\n", __func__); /* checksum may be corrupted */ goto sendit; } for(m = m->m_next; m && m->m_len == 0; m = m->m_next) ; if (m == NULL) { offset = sizeof(struct ip) + ETHER_HDR_LEN; /* checksum may be corrupted */ goto sendit; } ip = mtod(m, struct ip *); } else { p = mtod(m, u_int8_t *); p += ETHER_HDR_LEN; ip = (struct ip *)p; } offset = (ip->ip_hl << 2) + ETHER_HDR_LEN; sendit: f->sk_csum_startval = 0; f->sk_csum_start = htole32(((offset + m->m_pkthdr.csum_data) & 0xffff) | (offset << 16)); } static int sk_encap(sc_if, m_head) struct sk_if_softc *sc_if; struct mbuf **m_head; { struct sk_txdesc *txd; struct sk_tx_desc *f = NULL; struct mbuf *m; bus_dma_segment_t txsegs[SK_MAXTXSEGS]; u_int32_t cflags, frag, si, sk_ctl; int error, i, nseg; SK_IF_LOCK_ASSERT(sc_if); if ((txd = STAILQ_FIRST(&sc_if->sk_cdata.sk_txfreeq)) == NULL) return (ENOBUFS); error = bus_dmamap_load_mbuf_sg(sc_if->sk_cdata.sk_tx_tag, txd->tx_dmamap, *m_head, txsegs, &nseg, 0); if (error == EFBIG) { m = m_defrag(*m_head, M_DONTWAIT); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOMEM); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc_if->sk_cdata.sk_tx_tag, txd->tx_dmamap, *m_head, txsegs, &nseg, 0); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nseg == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } if (sc_if->sk_cdata.sk_tx_cnt + nseg >= SK_TX_RING_CNT) { bus_dmamap_unload(sc_if->sk_cdata.sk_tx_tag, txd->tx_dmamap); return (ENOBUFS); } m = *m_head; if ((m->m_pkthdr.csum_flags & sc_if->sk_ifp->if_hwassist) != 0) cflags = SK_OPCODE_CSUM; else cflags = SK_OPCODE_DEFAULT; si = frag = sc_if->sk_cdata.sk_tx_prod; for (i = 0; i < nseg; i++) { f = &sc_if->sk_rdata.sk_tx_ring[frag]; f->sk_data_lo = htole32(SK_ADDR_LO(txsegs[i].ds_addr)); f->sk_data_hi = htole32(SK_ADDR_HI(txsegs[i].ds_addr)); sk_ctl = txsegs[i].ds_len | cflags; if (i == 0) { if (cflags == SK_OPCODE_CSUM) sk_txcksum(sc_if->sk_ifp, m, f); sk_ctl |= SK_TXCTL_FIRSTFRAG; } else sk_ctl |= SK_TXCTL_OWN; f->sk_ctl = htole32(sk_ctl); sc_if->sk_cdata.sk_tx_cnt++; SK_INC(frag, SK_TX_RING_CNT); } sc_if->sk_cdata.sk_tx_prod = frag; /* set EOF on the last desciptor */ frag = (frag + SK_TX_RING_CNT - 1) % SK_TX_RING_CNT; f = &sc_if->sk_rdata.sk_tx_ring[frag]; f->sk_ctl |= htole32(SK_TXCTL_LASTFRAG | SK_TXCTL_EOF_INTR); /* turn the first descriptor ownership to NIC */ f = &sc_if->sk_rdata.sk_tx_ring[si]; f->sk_ctl |= htole32(SK_TXCTL_OWN); STAILQ_REMOVE_HEAD(&sc_if->sk_cdata.sk_txfreeq, tx_q); STAILQ_INSERT_TAIL(&sc_if->sk_cdata.sk_txbusyq, txd, tx_q); txd->tx_m = m; /* sync descriptors */ bus_dmamap_sync(sc_if->sk_cdata.sk_tx_tag, txd->tx_dmamap, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc_if->sk_cdata.sk_tx_ring_tag, sc_if->sk_cdata.sk_tx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } static void sk_start(ifp) struct ifnet *ifp; { struct sk_if_softc *sc_if; sc_if = ifp->if_softc; SK_IF_LOCK(sc_if); sk_start_locked(ifp); SK_IF_UNLOCK(sc_if); return; } static void sk_start_locked(ifp) struct ifnet *ifp; { struct sk_softc *sc; struct sk_if_softc *sc_if; struct mbuf *m_head; int enq; sc_if = ifp->if_softc; sc = sc_if->sk_softc; SK_IF_LOCK_ASSERT(sc_if); for (enq = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc_if->sk_cdata.sk_tx_cnt < SK_TX_RING_CNT - 1; ) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. */ if (sk_encap(sc_if, &m_head)) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } enq++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ BPF_MTAP(ifp, m_head); } if (enq > 0) { /* Transmit */ CSR_WRITE_4(sc, sc_if->sk_tx_bmu, SK_TXBMU_TX_START); /* Set a timeout in case the chip goes out to lunch. */ sc_if->sk_watchdog_timer = 5; } } static void sk_watchdog(arg) void *arg; { struct sk_if_softc *sc_if; struct ifnet *ifp; ifp = arg; sc_if = ifp->if_softc; SK_IF_LOCK_ASSERT(sc_if); if (sc_if->sk_watchdog_timer == 0 || --sc_if->sk_watchdog_timer) goto done; /* * Reclaim first as there is a possibility of losing Tx completion * interrupts. */ sk_txeof(sc_if); if (sc_if->sk_cdata.sk_tx_cnt != 0) { if_printf(sc_if->sk_ifp, "watchdog timeout\n"); ifp->if_oerrors++; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sk_init_locked(sc_if); } done: callout_reset(&sc_if->sk_watchdog_ch, hz, sk_watchdog, ifp); return; } static void skc_shutdown(dev) device_t dev; { struct sk_softc *sc; sc = device_get_softc(dev); SK_LOCK(sc); /* Turn off the 'driver is loaded' LED. */ CSR_WRITE_2(sc, SK_LED, SK_LED_GREEN_OFF); /* * Reset the GEnesis controller. Doing this should also * assert the resets on the attached XMAC(s). */ sk_reset(sc); SK_UNLOCK(sc); return; } static int skc_suspend(dev) device_t dev; { struct sk_softc *sc; struct sk_if_softc *sc_if0, *sc_if1; struct ifnet *ifp0 = NULL, *ifp1 = NULL; sc = device_get_softc(dev); SK_LOCK(sc); sc_if0 = sc->sk_if[SK_PORT_A]; sc_if1 = sc->sk_if[SK_PORT_B]; if (sc_if0 != NULL) ifp0 = sc_if0->sk_ifp; if (sc_if1 != NULL) ifp1 = sc_if1->sk_ifp; if (ifp0 != NULL) sk_stop(sc_if0); if (ifp1 != NULL) sk_stop(sc_if1); sc->sk_suspended = 1; SK_UNLOCK(sc); return (0); } static int skc_resume(dev) device_t dev; { struct sk_softc *sc; struct sk_if_softc *sc_if0, *sc_if1; struct ifnet *ifp0 = NULL, *ifp1 = NULL; sc = device_get_softc(dev); SK_LOCK(sc); sc_if0 = sc->sk_if[SK_PORT_A]; sc_if1 = sc->sk_if[SK_PORT_B]; if (sc_if0 != NULL) ifp0 = sc_if0->sk_ifp; if (sc_if1 != NULL) ifp1 = sc_if1->sk_ifp; if (ifp0 != NULL && ifp0->if_flags & IFF_UP) sk_init_locked(sc_if0); if (ifp1 != NULL && ifp1->if_flags & IFF_UP) sk_init_locked(sc_if1); sc->sk_suspended = 0; SK_UNLOCK(sc); return (0); } /* * According to the data sheet from SK-NET GENESIS the hardware can compute * two Rx checksums at the same time(Each checksum start position is * programmed in Rx descriptors). However it seems that TCP/UDP checksum * does not work at least on my Yukon hardware. I tried every possible ways * to get correct checksum value but couldn't get correct one. So TCP/UDP * checksum offload was disabled at the moment and only IP checksum offload * was enabled. * As nomral IP header size is 20 bytes I can't expect it would give an * increase in throughput. However it seems it doesn't hurt performance in * my testing. If there is a more detailed information for checksum secret * of the hardware in question please contact yongari@FreeBSD.org to add * TCP/UDP checksum offload support. */ static __inline void sk_rxcksum(ifp, m, csum) struct ifnet *ifp; struct mbuf *m; u_int32_t csum; { struct ether_header *eh; struct ip *ip; int32_t hlen, len, pktlen; u_int16_t csum1, csum2, ipcsum; pktlen = m->m_pkthdr.len; if (pktlen < sizeof(struct ether_header) + sizeof(struct ip)) return; eh = mtod(m, struct ether_header *); if (eh->ether_type != htons(ETHERTYPE_IP)) return; ip = (struct ip *)(eh + 1); if (ip->ip_v != IPVERSION) return; hlen = ip->ip_hl << 2; pktlen -= sizeof(struct ether_header); if (hlen < sizeof(struct ip)) return; if (ntohs(ip->ip_len) < hlen) return; if (ntohs(ip->ip_len) != pktlen) return; csum1 = htons(csum & 0xffff); csum2 = htons((csum >> 16) & 0xffff); ipcsum = in_addword(csum1, ~csum2 & 0xffff); /* checksum fixup for IP options */ len = hlen - sizeof(struct ip); if (len > 0) { /* * If the second checksum value is correct we can compute IP * checksum with simple math. Unfortunately the second checksum * value is wrong so we can't verify the checksum from the * value(It seems there is some magic here to get correct * value). If the second checksum value is correct it also * means we can get TCP/UDP checksum) here. However, it still * needs pseudo header checksum calculation due to hardware * limitations. */ return; } m->m_pkthdr.csum_flags = CSUM_IP_CHECKED; if (ipcsum == 0xffff) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } static __inline int sk_rxvalid(sc, stat, len) struct sk_softc *sc; u_int32_t stat, len; { if (sc->sk_type == SK_GENESIS) { if ((stat & XM_RXSTAT_ERRFRAME) == XM_RXSTAT_ERRFRAME || XM_RXSTAT_BYTES(stat) != len) return (0); } else { if ((stat & (YU_RXSTAT_CRCERR | YU_RXSTAT_LONGERR | YU_RXSTAT_MIIERR | YU_RXSTAT_BADFC | YU_RXSTAT_GOODFC | YU_RXSTAT_JABBER)) != 0 || (stat & YU_RXSTAT_RXOK) != YU_RXSTAT_RXOK || YU_RXSTAT_BYTES(stat) != len) return (0); } return (1); } static void sk_rxeof(sc_if) struct sk_if_softc *sc_if; { struct sk_softc *sc; struct mbuf *m; struct ifnet *ifp; struct sk_rx_desc *cur_rx; struct sk_rxdesc *rxd; int cons, prog; u_int32_t csum, rxstat, sk_ctl; sc = sc_if->sk_softc; ifp = sc_if->sk_ifp; SK_IF_LOCK_ASSERT(sc_if); bus_dmamap_sync(sc_if->sk_cdata.sk_rx_ring_tag, sc_if->sk_cdata.sk_rx_ring_map, BUS_DMASYNC_POSTREAD); prog = 0; for (cons = sc_if->sk_cdata.sk_rx_cons; prog < SK_RX_RING_CNT; prog++, SK_INC(cons, SK_RX_RING_CNT)) { cur_rx = &sc_if->sk_rdata.sk_rx_ring[cons]; sk_ctl = le32toh(cur_rx->sk_ctl); if ((sk_ctl & SK_RXCTL_OWN) != 0) break; rxd = &sc_if->sk_cdata.sk_rxdesc[cons]; rxstat = le32toh(cur_rx->sk_xmac_rxstat); if ((sk_ctl & (SK_RXCTL_STATUS_VALID | SK_RXCTL_FIRSTFRAG | SK_RXCTL_LASTFRAG)) != (SK_RXCTL_STATUS_VALID | SK_RXCTL_FIRSTFRAG | SK_RXCTL_LASTFRAG) || SK_RXBYTES(sk_ctl) < SK_MIN_FRAMELEN || SK_RXBYTES(sk_ctl) > SK_MAX_FRAMELEN || sk_rxvalid(sc, rxstat, SK_RXBYTES(sk_ctl)) == 0) { ifp->if_ierrors++; sk_discard_rxbuf(sc_if, cons); continue; } m = rxd->rx_m; csum = le32toh(cur_rx->sk_csum); if (sk_newbuf(sc_if, cons) != 0) { ifp->if_iqdrops++; /* reuse old buffer */ sk_discard_rxbuf(sc_if, cons); continue; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = SK_RXBYTES(sk_ctl); ifp->if_ipackets++; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) sk_rxcksum(ifp, m, csum); SK_IF_UNLOCK(sc_if); (*ifp->if_input)(ifp, m); SK_IF_LOCK(sc_if); } if (prog > 0) { sc_if->sk_cdata.sk_rx_cons = cons; bus_dmamap_sync(sc_if->sk_cdata.sk_rx_ring_tag, sc_if->sk_cdata.sk_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } } static void sk_jumbo_rxeof(sc_if) struct sk_if_softc *sc_if; { struct sk_softc *sc; struct mbuf *m; struct ifnet *ifp; struct sk_rx_desc *cur_rx; struct sk_rxdesc *jrxd; int cons, prog; u_int32_t csum, rxstat, sk_ctl; sc = sc_if->sk_softc; ifp = sc_if->sk_ifp; SK_IF_LOCK_ASSERT(sc_if); bus_dmamap_sync(sc_if->sk_cdata.sk_jumbo_rx_ring_tag, sc_if->sk_cdata.sk_jumbo_rx_ring_map, BUS_DMASYNC_POSTREAD); prog = 0; for (cons = sc_if->sk_cdata.sk_jumbo_rx_cons; prog < SK_JUMBO_RX_RING_CNT; prog++, SK_INC(cons, SK_JUMBO_RX_RING_CNT)) { cur_rx = &sc_if->sk_rdata.sk_jumbo_rx_ring[cons]; sk_ctl = le32toh(cur_rx->sk_ctl); if ((sk_ctl & SK_RXCTL_OWN) != 0) break; jrxd = &sc_if->sk_cdata.sk_jumbo_rxdesc[cons]; rxstat = le32toh(cur_rx->sk_xmac_rxstat); if ((sk_ctl & (SK_RXCTL_STATUS_VALID | SK_RXCTL_FIRSTFRAG | SK_RXCTL_LASTFRAG)) != (SK_RXCTL_STATUS_VALID | SK_RXCTL_FIRSTFRAG | SK_RXCTL_LASTFRAG) || SK_RXBYTES(sk_ctl) < SK_MIN_FRAMELEN || SK_RXBYTES(sk_ctl) > SK_JUMBO_FRAMELEN || sk_rxvalid(sc, rxstat, SK_RXBYTES(sk_ctl)) == 0) { ifp->if_ierrors++; sk_discard_jumbo_rxbuf(sc_if, cons); continue; } m = jrxd->rx_m; csum = le32toh(cur_rx->sk_csum); if (sk_jumbo_newbuf(sc_if, cons) != 0) { ifp->if_iqdrops++; /* reuse old buffer */ sk_discard_jumbo_rxbuf(sc_if, cons); continue; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = SK_RXBYTES(sk_ctl); ifp->if_ipackets++; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) sk_rxcksum(ifp, m, csum); SK_IF_UNLOCK(sc_if); (*ifp->if_input)(ifp, m); SK_IF_LOCK(sc_if); } if (prog > 0) { sc_if->sk_cdata.sk_jumbo_rx_cons = cons; bus_dmamap_sync(sc_if->sk_cdata.sk_jumbo_rx_ring_tag, sc_if->sk_cdata.sk_jumbo_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } } static void sk_txeof(sc_if) struct sk_if_softc *sc_if; { struct sk_softc *sc; struct sk_txdesc *txd; struct sk_tx_desc *cur_tx; struct ifnet *ifp; u_int32_t idx, sk_ctl; sc = sc_if->sk_softc; ifp = sc_if->sk_ifp; txd = STAILQ_FIRST(&sc_if->sk_cdata.sk_txbusyq); if (txd == NULL) return; bus_dmamap_sync(sc_if->sk_cdata.sk_tx_ring_tag, sc_if->sk_cdata.sk_tx_ring_map, BUS_DMASYNC_POSTREAD); /* * Go through our tx ring and free mbufs for those * frames that have been sent. */ for (idx = sc_if->sk_cdata.sk_tx_cons;; SK_INC(idx, SK_TX_RING_CNT)) { if (sc_if->sk_cdata.sk_tx_cnt <= 0) break; cur_tx = &sc_if->sk_rdata.sk_tx_ring[idx]; sk_ctl = le32toh(cur_tx->sk_ctl); if (sk_ctl & SK_TXCTL_OWN) break; sc_if->sk_cdata.sk_tx_cnt--; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if ((sk_ctl & SK_TXCTL_LASTFRAG) == 0) continue; bus_dmamap_sync(sc_if->sk_cdata.sk_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc_if->sk_cdata.sk_tx_tag, txd->tx_dmamap); ifp->if_opackets++; m_freem(txd->tx_m); txd->tx_m = NULL; STAILQ_REMOVE_HEAD(&sc_if->sk_cdata.sk_txbusyq, tx_q); STAILQ_INSERT_TAIL(&sc_if->sk_cdata.sk_txfreeq, txd, tx_q); txd = STAILQ_FIRST(&sc_if->sk_cdata.sk_txbusyq); } sc_if->sk_cdata.sk_tx_cons = idx; sc_if->sk_watchdog_timer = sc_if->sk_cdata.sk_tx_cnt > 0 ? 5 : 0; bus_dmamap_sync(sc_if->sk_cdata.sk_tx_ring_tag, sc_if->sk_cdata.sk_tx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static void sk_tick(xsc_if) void *xsc_if; { struct sk_if_softc *sc_if; struct mii_data *mii; struct ifnet *ifp; int i; sc_if = xsc_if; ifp = sc_if->sk_ifp; mii = device_get_softc(sc_if->sk_miibus); if (!(ifp->if_flags & IFF_UP)) return; if (sc_if->sk_phytype == SK_PHYTYPE_BCOM) { sk_intr_bcom(sc_if); return; } /* * According to SysKonnect, the correct way to verify that * the link has come back up is to poll bit 0 of the GPIO * register three times. This pin has the signal from the * link_sync pin connected to it; if we read the same link * state 3 times in a row, we know the link is up. */ for (i = 0; i < 3; i++) { if (SK_XM_READ_2(sc_if, XM_GPIO) & XM_GPIO_GP0_SET) break; } if (i != 3) { callout_reset(&sc_if->sk_tick_ch, hz, sk_tick, sc_if); return; } /* Turn the GP0 interrupt back on. */ SK_XM_CLRBIT_2(sc_if, XM_IMR, XM_IMR_GP0_SET); SK_XM_READ_2(sc_if, XM_ISR); mii_tick(mii); callout_stop(&sc_if->sk_tick_ch); } static void sk_yukon_tick(xsc_if) void *xsc_if; { struct sk_if_softc *sc_if; struct mii_data *mii; sc_if = xsc_if; mii = device_get_softc(sc_if->sk_miibus); mii_tick(mii); callout_reset(&sc_if->sk_tick_ch, hz, sk_yukon_tick, sc_if); } static void sk_intr_bcom(sc_if) struct sk_if_softc *sc_if; { struct mii_data *mii; struct ifnet *ifp; int status; mii = device_get_softc(sc_if->sk_miibus); ifp = sc_if->sk_ifp; SK_XM_CLRBIT_2(sc_if, XM_MMUCMD, XM_MMUCMD_TX_ENB|XM_MMUCMD_RX_ENB); /* * Read the PHY interrupt register to make sure * we clear any pending interrupts. */ status = sk_xmac_miibus_readreg(sc_if, SK_PHYADDR_BCOM, BRGPHY_MII_ISR); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { sk_init_xmac(sc_if); return; } if (status & (BRGPHY_ISR_LNK_CHG|BRGPHY_ISR_AN_PR)) { int lstat; lstat = sk_xmac_miibus_readreg(sc_if, SK_PHYADDR_BCOM, BRGPHY_MII_AUXSTS); if (!(lstat & BRGPHY_AUXSTS_LINK) && sc_if->sk_link) { mii_mediachg(mii); /* Turn off the link LED. */ SK_IF_WRITE_1(sc_if, 0, SK_LINKLED1_CTL, SK_LINKLED_OFF); sc_if->sk_link = 0; } else if (status & BRGPHY_ISR_LNK_CHG) { sk_xmac_miibus_writereg(sc_if, SK_PHYADDR_BCOM, BRGPHY_MII_IMR, 0xFF00); mii_tick(mii); sc_if->sk_link = 1; /* Turn on the link LED. */ SK_IF_WRITE_1(sc_if, 0, SK_LINKLED1_CTL, SK_LINKLED_ON|SK_LINKLED_LINKSYNC_OFF| SK_LINKLED_BLINK_OFF); } else { mii_tick(mii); callout_reset(&sc_if->sk_tick_ch, hz, sk_tick, sc_if); } } SK_XM_SETBIT_2(sc_if, XM_MMUCMD, XM_MMUCMD_TX_ENB|XM_MMUCMD_RX_ENB); return; } static void sk_intr_xmac(sc_if) struct sk_if_softc *sc_if; { struct sk_softc *sc; u_int16_t status; sc = sc_if->sk_softc; status = SK_XM_READ_2(sc_if, XM_ISR); /* * Link has gone down. Start MII tick timeout to * watch for link resync. */ if (sc_if->sk_phytype == SK_PHYTYPE_XMAC) { if (status & XM_ISR_GP0_SET) { SK_XM_SETBIT_2(sc_if, XM_IMR, XM_IMR_GP0_SET); callout_reset(&sc_if->sk_tick_ch, hz, sk_tick, sc_if); } if (status & XM_ISR_AUTONEG_DONE) { callout_reset(&sc_if->sk_tick_ch, hz, sk_tick, sc_if); } } if (status & XM_IMR_TX_UNDERRUN) SK_XM_SETBIT_4(sc_if, XM_MODE, XM_MODE_FLUSH_TXFIFO); if (status & XM_IMR_RX_OVERRUN) SK_XM_SETBIT_4(sc_if, XM_MODE, XM_MODE_FLUSH_RXFIFO); status = SK_XM_READ_2(sc_if, XM_ISR); return; } static void sk_intr_yukon(sc_if) struct sk_if_softc *sc_if; { u_int8_t status; status = SK_IF_READ_1(sc_if, 0, SK_GMAC_ISR); /* RX overrun */ if ((status & SK_GMAC_INT_RX_OVER) != 0) { SK_IF_WRITE_1(sc_if, 0, SK_RXMF1_CTRL_TEST, SK_RFCTL_RX_FIFO_OVER); } /* TX underrun */ if ((status & SK_GMAC_INT_TX_UNDER) != 0) { SK_IF_WRITE_1(sc_if, 0, SK_RXMF1_CTRL_TEST, SK_TFCTL_TX_FIFO_UNDER); } } static void sk_intr(xsc) void *xsc; { struct sk_softc *sc = xsc; struct sk_if_softc *sc_if0, *sc_if1; struct ifnet *ifp0 = NULL, *ifp1 = NULL; u_int32_t status; SK_LOCK(sc); status = CSR_READ_4(sc, SK_ISSR); if (status == 0 || status == 0xffffffff || sc->sk_suspended) goto done_locked; sc_if0 = sc->sk_if[SK_PORT_A]; sc_if1 = sc->sk_if[SK_PORT_B]; if (sc_if0 != NULL) ifp0 = sc_if0->sk_ifp; if (sc_if1 != NULL) ifp1 = sc_if1->sk_ifp; for (; (status &= sc->sk_intrmask) != 0;) { /* Handle receive interrupts first. */ if (status & SK_ISR_RX1_EOF) { if (ifp0->if_mtu > SK_MAX_FRAMELEN) sk_jumbo_rxeof(sc_if0); else sk_rxeof(sc_if0); CSR_WRITE_4(sc, SK_BMU_RX_CSR0, SK_RXBMU_CLR_IRQ_EOF|SK_RXBMU_RX_START); } if (status & SK_ISR_RX2_EOF) { if (ifp1->if_mtu > SK_MAX_FRAMELEN) sk_jumbo_rxeof(sc_if1); else sk_rxeof(sc_if1); CSR_WRITE_4(sc, SK_BMU_RX_CSR1, SK_RXBMU_CLR_IRQ_EOF|SK_RXBMU_RX_START); } /* Then transmit interrupts. */ if (status & SK_ISR_TX1_S_EOF) { sk_txeof(sc_if0); CSR_WRITE_4(sc, SK_BMU_TXS_CSR0, SK_TXBMU_CLR_IRQ_EOF); } if (status & SK_ISR_TX2_S_EOF) { sk_txeof(sc_if1); CSR_WRITE_4(sc, SK_BMU_TXS_CSR1, SK_TXBMU_CLR_IRQ_EOF); } /* Then MAC interrupts. */ if (status & SK_ISR_MAC1 && ifp0->if_drv_flags & IFF_DRV_RUNNING) { if (sc->sk_type == SK_GENESIS) sk_intr_xmac(sc_if0); else sk_intr_yukon(sc_if0); } if (status & SK_ISR_MAC2 && ifp1->if_drv_flags & IFF_DRV_RUNNING) { if (sc->sk_type == SK_GENESIS) sk_intr_xmac(sc_if1); else sk_intr_yukon(sc_if1); } if (status & SK_ISR_EXTERNAL_REG) { if (ifp0 != NULL && sc_if0->sk_phytype == SK_PHYTYPE_BCOM) sk_intr_bcom(sc_if0); if (ifp1 != NULL && sc_if1->sk_phytype == SK_PHYTYPE_BCOM) sk_intr_bcom(sc_if1); } status = CSR_READ_4(sc, SK_ISSR); } CSR_WRITE_4(sc, SK_IMR, sc->sk_intrmask); if (ifp0 != NULL && !IFQ_DRV_IS_EMPTY(&ifp0->if_snd)) sk_start_locked(ifp0); if (ifp1 != NULL && !IFQ_DRV_IS_EMPTY(&ifp1->if_snd)) sk_start_locked(ifp1); done_locked: SK_UNLOCK(sc); } static void sk_init_xmac(sc_if) struct sk_if_softc *sc_if; { struct sk_softc *sc; struct ifnet *ifp; u_int16_t eaddr[(ETHER_ADDR_LEN+1)/2]; struct sk_bcom_hack bhack[] = { { 0x18, 0x0c20 }, { 0x17, 0x0012 }, { 0x15, 0x1104 }, { 0x17, 0x0013 }, { 0x15, 0x0404 }, { 0x17, 0x8006 }, { 0x15, 0x0132 }, { 0x17, 0x8006 }, { 0x15, 0x0232 }, { 0x17, 0x800D }, { 0x15, 0x000F }, { 0x18, 0x0420 }, { 0, 0 } }; SK_IF_LOCK_ASSERT(sc_if); sc = sc_if->sk_softc; ifp = sc_if->sk_ifp; /* Unreset the XMAC. */ SK_IF_WRITE_2(sc_if, 0, SK_TXF1_MACCTL, SK_TXMACCTL_XMAC_UNRESET); DELAY(1000); /* Reset the XMAC's internal state. */ SK_XM_SETBIT_2(sc_if, XM_GPIO, XM_GPIO_RESETMAC); /* Save the XMAC II revision */ sc_if->sk_xmac_rev = XM_XMAC_REV(SK_XM_READ_4(sc_if, XM_DEVID)); /* * Perform additional initialization for external PHYs, * namely for the 1000baseTX cards that use the XMAC's * GMII mode. */ if (sc_if->sk_phytype == SK_PHYTYPE_BCOM) { int i = 0; u_int32_t val; /* Take PHY out of reset. */ val = sk_win_read_4(sc, SK_GPIO); if (sc_if->sk_port == SK_PORT_A) val |= SK_GPIO_DIR0|SK_GPIO_DAT0; else val |= SK_GPIO_DIR2|SK_GPIO_DAT2; sk_win_write_4(sc, SK_GPIO, val); /* Enable GMII mode on the XMAC. */ SK_XM_SETBIT_2(sc_if, XM_HWCFG, XM_HWCFG_GMIIMODE); sk_xmac_miibus_writereg(sc_if, SK_PHYADDR_BCOM, BRGPHY_MII_BMCR, BRGPHY_BMCR_RESET); DELAY(10000); sk_xmac_miibus_writereg(sc_if, SK_PHYADDR_BCOM, BRGPHY_MII_IMR, 0xFFF0); /* * Early versions of the BCM5400 apparently have * a bug that requires them to have their reserved * registers initialized to some magic values. I don't * know what the numbers do, I'm just the messenger. */ if (sk_xmac_miibus_readreg(sc_if, SK_PHYADDR_BCOM, 0x03) == 0x6041) { while(bhack[i].reg) { sk_xmac_miibus_writereg(sc_if, SK_PHYADDR_BCOM, bhack[i].reg, bhack[i].val); i++; } } } /* Set station address */ bcopy(IF_LLADDR(sc_if->sk_ifp), eaddr, ETHER_ADDR_LEN); SK_XM_WRITE_2(sc_if, XM_PAR0, eaddr[0]); SK_XM_WRITE_2(sc_if, XM_PAR1, eaddr[1]); SK_XM_WRITE_2(sc_if, XM_PAR2, eaddr[2]); SK_XM_SETBIT_4(sc_if, XM_MODE, XM_MODE_RX_USE_STATION); if (ifp->if_flags & IFF_BROADCAST) { SK_XM_CLRBIT_4(sc_if, XM_MODE, XM_MODE_RX_NOBROAD); } else { SK_XM_SETBIT_4(sc_if, XM_MODE, XM_MODE_RX_NOBROAD); } /* We don't need the FCS appended to the packet. */ SK_XM_SETBIT_2(sc_if, XM_RXCMD, XM_RXCMD_STRIPFCS); /* We want short frames padded to 60 bytes. */ SK_XM_SETBIT_2(sc_if, XM_TXCMD, XM_TXCMD_AUTOPAD); /* * Enable the reception of all error frames. This is is * a necessary evil due to the design of the XMAC. The * XMAC's receive FIFO is only 8K in size, however jumbo * frames can be up to 9000 bytes in length. When bad * frame filtering is enabled, the XMAC's RX FIFO operates * in 'store and forward' mode. For this to work, the * entire frame has to fit into the FIFO, but that means * that jumbo frames larger than 8192 bytes will be * truncated. Disabling all bad frame filtering causes * the RX FIFO to operate in streaming mode, in which * case the XMAC will start transfering frames out of the * RX FIFO as soon as the FIFO threshold is reached. */ if (ifp->if_mtu > SK_MAX_FRAMELEN) { SK_XM_SETBIT_4(sc_if, XM_MODE, XM_MODE_RX_BADFRAMES| XM_MODE_RX_GIANTS|XM_MODE_RX_RUNTS|XM_MODE_RX_CRCERRS| XM_MODE_RX_INRANGELEN); SK_XM_SETBIT_2(sc_if, XM_RXCMD, XM_RXCMD_BIGPKTOK); } else SK_XM_CLRBIT_2(sc_if, XM_RXCMD, XM_RXCMD_BIGPKTOK); /* * Bump up the transmit threshold. This helps hold off transmit * underruns when we're blasting traffic from both ports at once. */ SK_XM_WRITE_2(sc_if, XM_TX_REQTHRESH, SK_XM_TX_FIFOTHRESH); /* Set promiscuous mode */ sk_setpromisc(sc_if); /* Set multicast filter */ sk_setmulti(sc_if); /* Clear and enable interrupts */ SK_XM_READ_2(sc_if, XM_ISR); if (sc_if->sk_phytype == SK_PHYTYPE_XMAC) SK_XM_WRITE_2(sc_if, XM_IMR, XM_INTRS); else SK_XM_WRITE_2(sc_if, XM_IMR, 0xFFFF); /* Configure MAC arbiter */ switch(sc_if->sk_xmac_rev) { case XM_XMAC_REV_B2: sk_win_write_1(sc, SK_RCINIT_RX1, SK_RCINIT_XMAC_B2); sk_win_write_1(sc, SK_RCINIT_TX1, SK_RCINIT_XMAC_B2); sk_win_write_1(sc, SK_RCINIT_RX2, SK_RCINIT_XMAC_B2); sk_win_write_1(sc, SK_RCINIT_TX2, SK_RCINIT_XMAC_B2); sk_win_write_1(sc, SK_MINIT_RX1, SK_MINIT_XMAC_B2); sk_win_write_1(sc, SK_MINIT_TX1, SK_MINIT_XMAC_B2); sk_win_write_1(sc, SK_MINIT_RX2, SK_MINIT_XMAC_B2); sk_win_write_1(sc, SK_MINIT_TX2, SK_MINIT_XMAC_B2); sk_win_write_1(sc, SK_RECOVERY_CTL, SK_RECOVERY_XMAC_B2); break; case XM_XMAC_REV_C1: sk_win_write_1(sc, SK_RCINIT_RX1, SK_RCINIT_XMAC_C1); sk_win_write_1(sc, SK_RCINIT_TX1, SK_RCINIT_XMAC_C1); sk_win_write_1(sc, SK_RCINIT_RX2, SK_RCINIT_XMAC_C1); sk_win_write_1(sc, SK_RCINIT_TX2, SK_RCINIT_XMAC_C1); sk_win_write_1(sc, SK_MINIT_RX1, SK_MINIT_XMAC_C1); sk_win_write_1(sc, SK_MINIT_TX1, SK_MINIT_XMAC_C1); sk_win_write_1(sc, SK_MINIT_RX2, SK_MINIT_XMAC_C1); sk_win_write_1(sc, SK_MINIT_TX2, SK_MINIT_XMAC_C1); sk_win_write_1(sc, SK_RECOVERY_CTL, SK_RECOVERY_XMAC_B2); break; default: break; } sk_win_write_2(sc, SK_MACARB_CTL, SK_MACARBCTL_UNRESET|SK_MACARBCTL_FASTOE_OFF); sc_if->sk_link = 1; return; } static void sk_init_yukon(sc_if) struct sk_if_softc *sc_if; { u_int32_t phy, v; u_int16_t reg; struct sk_softc *sc; struct ifnet *ifp; int i; SK_IF_LOCK_ASSERT(sc_if); sc = sc_if->sk_softc; ifp = sc_if->sk_ifp; if (sc->sk_type == SK_YUKON_LITE && sc->sk_rev >= SK_YUKON_LITE_REV_A3) { /* * Workaround code for COMA mode, set PHY reset. * Otherwise it will not correctly take chip out of * powerdown (coma) */ v = sk_win_read_4(sc, SK_GPIO); v |= SK_GPIO_DIR9 | SK_GPIO_DAT9; sk_win_write_4(sc, SK_GPIO, v); } /* GMAC and GPHY Reset */ SK_IF_WRITE_4(sc_if, 0, SK_GPHY_CTRL, SK_GPHY_RESET_SET); SK_IF_WRITE_4(sc_if, 0, SK_GMAC_CTRL, SK_GMAC_RESET_SET); DELAY(1000); if (sc->sk_type == SK_YUKON_LITE && sc->sk_rev >= SK_YUKON_LITE_REV_A3) { /* * Workaround code for COMA mode, clear PHY reset */ v = sk_win_read_4(sc, SK_GPIO); v |= SK_GPIO_DIR9; v &= ~SK_GPIO_DAT9; sk_win_write_4(sc, SK_GPIO, v); } phy = SK_GPHY_INT_POL_HI | SK_GPHY_DIS_FC | SK_GPHY_DIS_SLEEP | SK_GPHY_ENA_XC | SK_GPHY_ANEG_ALL | SK_GPHY_ENA_PAUSE; if (sc->sk_coppertype) phy |= SK_GPHY_COPPER; else phy |= SK_GPHY_FIBER; SK_IF_WRITE_4(sc_if, 0, SK_GPHY_CTRL, phy | SK_GPHY_RESET_SET); DELAY(1000); SK_IF_WRITE_4(sc_if, 0, SK_GPHY_CTRL, phy | SK_GPHY_RESET_CLEAR); SK_IF_WRITE_4(sc_if, 0, SK_GMAC_CTRL, SK_GMAC_LOOP_OFF | SK_GMAC_PAUSE_ON | SK_GMAC_RESET_CLEAR); /* unused read of the interrupt source register */ SK_IF_READ_2(sc_if, 0, SK_GMAC_ISR); reg = SK_YU_READ_2(sc_if, YUKON_PAR); /* MIB Counter Clear Mode set */ reg |= YU_PAR_MIB_CLR; SK_YU_WRITE_2(sc_if, YUKON_PAR, reg); /* MIB Counter Clear Mode clear */ reg &= ~YU_PAR_MIB_CLR; SK_YU_WRITE_2(sc_if, YUKON_PAR, reg); /* receive control reg */ SK_YU_WRITE_2(sc_if, YUKON_RCR, YU_RCR_CRCR); /* transmit parameter register */ SK_YU_WRITE_2(sc_if, YUKON_TPR, YU_TPR_JAM_LEN(0x3) | YU_TPR_JAM_IPG(0xb) | YU_TPR_JAM2DATA_IPG(0x1a) ); /* serial mode register */ reg = YU_SMR_DATA_BLIND(0x1c) | YU_SMR_MFL_VLAN | YU_SMR_IPG_DATA(0x1e); if (ifp->if_mtu > SK_MAX_FRAMELEN) reg |= YU_SMR_MFL_JUMBO; SK_YU_WRITE_2(sc_if, YUKON_SMR, reg); /* Setup Yukon's address */ for (i = 0; i < 3; i++) { /* Write Source Address 1 (unicast filter) */ SK_YU_WRITE_2(sc_if, YUKON_SAL1 + i * 4, IF_LLADDR(sc_if->sk_ifp)[i * 2] | IF_LLADDR(sc_if->sk_ifp)[i * 2 + 1] << 8); } for (i = 0; i < 3; i++) { reg = sk_win_read_2(sc_if->sk_softc, SK_MAC1_0 + i * 2 + sc_if->sk_port * 8); SK_YU_WRITE_2(sc_if, YUKON_SAL2 + i * 4, reg); } /* Set promiscuous mode */ sk_setpromisc(sc_if); /* Set multicast filter */ sk_setmulti(sc_if); /* enable interrupt mask for counter overflows */ SK_YU_WRITE_2(sc_if, YUKON_TIMR, 0); SK_YU_WRITE_2(sc_if, YUKON_RIMR, 0); SK_YU_WRITE_2(sc_if, YUKON_TRIMR, 0); /* Configure RX MAC FIFO Flush Mask */ v = YU_RXSTAT_FOFL | YU_RXSTAT_CRCERR | YU_RXSTAT_MIIERR | YU_RXSTAT_BADFC | YU_RXSTAT_GOODFC | YU_RXSTAT_RUNT | YU_RXSTAT_JABBER; SK_IF_WRITE_2(sc_if, 0, SK_RXMF1_FLUSH_MASK, v); /* Disable RX MAC FIFO Flush for YUKON-Lite Rev. A0 only */ if (sc->sk_type == SK_YUKON_LITE && sc->sk_rev == SK_YUKON_LITE_REV_A0) v = SK_TFCTL_OPERATION_ON; else v = SK_TFCTL_OPERATION_ON | SK_RFCTL_FIFO_FLUSH_ON; /* Configure RX MAC FIFO */ SK_IF_WRITE_1(sc_if, 0, SK_RXMF1_CTRL_TEST, SK_RFCTL_RESET_CLEAR); SK_IF_WRITE_2(sc_if, 0, SK_RXMF1_CTRL_TEST, v); /* Increase flush threshould to 64 bytes */ SK_IF_WRITE_2(sc_if, 0, SK_RXMF1_FLUSH_THRESHOLD, SK_RFCTL_FIFO_THRESHOLD + 1); /* Configure TX MAC FIFO */ SK_IF_WRITE_1(sc_if, 0, SK_TXMF1_CTRL_TEST, SK_TFCTL_RESET_CLEAR); SK_IF_WRITE_2(sc_if, 0, SK_TXMF1_CTRL_TEST, SK_TFCTL_OPERATION_ON); } /* * Note that to properly initialize any part of the GEnesis chip, * you first have to take it out of reset mode. */ static void sk_init(xsc) void *xsc; { struct sk_if_softc *sc_if = xsc; SK_IF_LOCK(sc_if); sk_init_locked(sc_if); SK_IF_UNLOCK(sc_if); return; } static void sk_init_locked(sc_if) struct sk_if_softc *sc_if; { struct sk_softc *sc; struct ifnet *ifp; struct mii_data *mii; u_int16_t reg; u_int32_t imr; int error; SK_IF_LOCK_ASSERT(sc_if); ifp = sc_if->sk_ifp; sc = sc_if->sk_softc; mii = device_get_softc(sc_if->sk_miibus); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; /* Cancel pending I/O and free all RX/TX buffers. */ sk_stop(sc_if); if (sc->sk_type == SK_GENESIS) { /* Configure LINK_SYNC LED */ SK_IF_WRITE_1(sc_if, 0, SK_LINKLED1_CTL, SK_LINKLED_ON); SK_IF_WRITE_1(sc_if, 0, SK_LINKLED1_CTL, SK_LINKLED_LINKSYNC_ON); /* Configure RX LED */ SK_IF_WRITE_1(sc_if, 0, SK_RXLED1_CTL, SK_RXLEDCTL_COUNTER_START); /* Configure TX LED */ SK_IF_WRITE_1(sc_if, 0, SK_TXLED1_CTL, SK_TXLEDCTL_COUNTER_START); } /* * Configure descriptor poll timer * * SK-NET GENESIS data sheet says that possibility of losing Start * transmit command due to CPU/cache related interim storage problems * under certain conditions. The document recommends a polling * mechanism to send a Start transmit command to initiate transfer * of ready descriptors regulary. To cope with this issue sk(4) now * enables descriptor poll timer to initiate descriptor processing * periodically as defined by SK_DPT_TIMER_MAX. However sk(4) still * issue SK_TXBMU_TX_START to Tx BMU to get fast execution of Tx * command instead of waiting for next descriptor polling time. * The same rule may apply to Rx side too but it seems that is not * needed at the moment. * Since sk(4) uses descriptor polling as a last resort there is no * need to set smaller polling time than maximum allowable one. */ SK_IF_WRITE_4(sc_if, 0, SK_DPT_INIT, SK_DPT_TIMER_MAX); /* Configure I2C registers */ /* Configure XMAC(s) */ switch (sc->sk_type) { case SK_GENESIS: sk_init_xmac(sc_if); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: sk_init_yukon(sc_if); break; } mii_mediachg(mii); if (sc->sk_type == SK_GENESIS) { /* Configure MAC FIFOs */ SK_IF_WRITE_4(sc_if, 0, SK_RXF1_CTL, SK_FIFO_UNRESET); SK_IF_WRITE_4(sc_if, 0, SK_RXF1_END, SK_FIFO_END); SK_IF_WRITE_4(sc_if, 0, SK_RXF1_CTL, SK_FIFO_ON); SK_IF_WRITE_4(sc_if, 0, SK_TXF1_CTL, SK_FIFO_UNRESET); SK_IF_WRITE_4(sc_if, 0, SK_TXF1_END, SK_FIFO_END); SK_IF_WRITE_4(sc_if, 0, SK_TXF1_CTL, SK_FIFO_ON); } /* Configure transmit arbiter(s) */ SK_IF_WRITE_1(sc_if, 0, SK_TXAR1_COUNTERCTL, SK_TXARCTL_ON|SK_TXARCTL_FSYNC_ON); /* Configure RAMbuffers */ SK_IF_WRITE_4(sc_if, 0, SK_RXRB1_CTLTST, SK_RBCTL_UNRESET); SK_IF_WRITE_4(sc_if, 0, SK_RXRB1_START, sc_if->sk_rx_ramstart); SK_IF_WRITE_4(sc_if, 0, SK_RXRB1_WR_PTR, sc_if->sk_rx_ramstart); SK_IF_WRITE_4(sc_if, 0, SK_RXRB1_RD_PTR, sc_if->sk_rx_ramstart); SK_IF_WRITE_4(sc_if, 0, SK_RXRB1_END, sc_if->sk_rx_ramend); SK_IF_WRITE_4(sc_if, 0, SK_RXRB1_CTLTST, SK_RBCTL_ON); SK_IF_WRITE_4(sc_if, 1, SK_TXRBS1_CTLTST, SK_RBCTL_UNRESET); SK_IF_WRITE_4(sc_if, 1, SK_TXRBS1_CTLTST, SK_RBCTL_STORENFWD_ON); SK_IF_WRITE_4(sc_if, 1, SK_TXRBS1_START, sc_if->sk_tx_ramstart); SK_IF_WRITE_4(sc_if, 1, SK_TXRBS1_WR_PTR, sc_if->sk_tx_ramstart); SK_IF_WRITE_4(sc_if, 1, SK_TXRBS1_RD_PTR, sc_if->sk_tx_ramstart); SK_IF_WRITE_4(sc_if, 1, SK_TXRBS1_END, sc_if->sk_tx_ramend); SK_IF_WRITE_4(sc_if, 1, SK_TXRBS1_CTLTST, SK_RBCTL_ON); /* Configure BMUs */ SK_IF_WRITE_4(sc_if, 0, SK_RXQ1_BMU_CSR, SK_RXBMU_ONLINE); if (ifp->if_mtu > SK_MAX_FRAMELEN) { SK_IF_WRITE_4(sc_if, 0, SK_RXQ1_CURADDR_LO, SK_ADDR_LO(SK_JUMBO_RX_RING_ADDR(sc_if, 0))); SK_IF_WRITE_4(sc_if, 0, SK_RXQ1_CURADDR_HI, SK_ADDR_HI(SK_JUMBO_RX_RING_ADDR(sc_if, 0))); } else { SK_IF_WRITE_4(sc_if, 0, SK_RXQ1_CURADDR_LO, SK_ADDR_LO(SK_RX_RING_ADDR(sc_if, 0))); SK_IF_WRITE_4(sc_if, 0, SK_RXQ1_CURADDR_HI, SK_ADDR_HI(SK_RX_RING_ADDR(sc_if, 0))); } SK_IF_WRITE_4(sc_if, 1, SK_TXQS1_BMU_CSR, SK_TXBMU_ONLINE); SK_IF_WRITE_4(sc_if, 1, SK_TXQS1_CURADDR_LO, SK_ADDR_LO(SK_TX_RING_ADDR(sc_if, 0))); SK_IF_WRITE_4(sc_if, 1, SK_TXQS1_CURADDR_HI, SK_ADDR_HI(SK_TX_RING_ADDR(sc_if, 0))); /* Init descriptors */ if (ifp->if_mtu > SK_MAX_FRAMELEN) error = sk_init_jumbo_rx_ring(sc_if); else error = sk_init_rx_ring(sc_if); if (error != 0) { device_printf(sc_if->sk_if_dev, "initialization failed: no memory for rx buffers\n"); sk_stop(sc_if); return; } sk_init_tx_ring(sc_if); /* Set interrupt moderation if changed via sysctl. */ imr = sk_win_read_4(sc, SK_IMTIMERINIT); if (imr != SK_IM_USECS(sc->sk_int_mod, sc->sk_int_ticks)) { sk_win_write_4(sc, SK_IMTIMERINIT, SK_IM_USECS(sc->sk_int_mod, sc->sk_int_ticks)); if (bootverbose) device_printf(sc_if->sk_if_dev, "interrupt moderation is %d us.\n", sc->sk_int_mod); } /* Configure interrupt handling */ CSR_READ_4(sc, SK_ISSR); if (sc_if->sk_port == SK_PORT_A) sc->sk_intrmask |= SK_INTRS1; else sc->sk_intrmask |= SK_INTRS2; sc->sk_intrmask |= SK_ISR_EXTERNAL_REG; CSR_WRITE_4(sc, SK_IMR, sc->sk_intrmask); /* Start BMUs. */ SK_IF_WRITE_4(sc_if, 0, SK_RXQ1_BMU_CSR, SK_RXBMU_RX_START); switch(sc->sk_type) { case SK_GENESIS: /* Enable XMACs TX and RX state machines */ SK_XM_CLRBIT_2(sc_if, XM_MMUCMD, XM_MMUCMD_IGNPAUSE); SK_XM_SETBIT_2(sc_if, XM_MMUCMD, XM_MMUCMD_TX_ENB|XM_MMUCMD_RX_ENB); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: reg = SK_YU_READ_2(sc_if, YUKON_GPCR); reg |= YU_GPCR_TXEN | YU_GPCR_RXEN; #if 0 /* XXX disable 100Mbps and full duplex mode? */ reg &= ~(YU_GPCR_SPEED | YU_GPCR_DPLX_DIS); #endif SK_YU_WRITE_2(sc_if, YUKON_GPCR, reg); } /* Activate descriptor polling timer */ SK_IF_WRITE_4(sc_if, 0, SK_DPT_TIMER_CTRL, SK_DPT_TCTL_START); /* start transfer of Tx descriptors */ CSR_WRITE_4(sc, sc_if->sk_tx_bmu, SK_TXBMU_TX_START); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; switch (sc->sk_type) { case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: callout_reset(&sc_if->sk_tick_ch, hz, sk_yukon_tick, sc_if); break; } callout_reset(&sc_if->sk_watchdog_ch, hz, sk_watchdog, ifp); return; } static void sk_stop(sc_if) struct sk_if_softc *sc_if; { int i; struct sk_softc *sc; struct sk_txdesc *txd; struct sk_rxdesc *rxd; struct sk_rxdesc *jrxd; struct ifnet *ifp; u_int32_t val; SK_IF_LOCK_ASSERT(sc_if); sc = sc_if->sk_softc; ifp = sc_if->sk_ifp; callout_stop(&sc_if->sk_tick_ch); callout_stop(&sc_if->sk_watchdog_ch); /* stop Tx descriptor polling timer */ SK_IF_WRITE_4(sc_if, 0, SK_DPT_TIMER_CTRL, SK_DPT_TCTL_STOP); /* stop transfer of Tx descriptors */ CSR_WRITE_4(sc, sc_if->sk_tx_bmu, SK_TXBMU_TX_STOP); for (i = 0; i < SK_TIMEOUT; i++) { val = CSR_READ_4(sc, sc_if->sk_tx_bmu); if ((val & SK_TXBMU_TX_STOP) == 0) break; DELAY(1); } if (i == SK_TIMEOUT) device_printf(sc_if->sk_if_dev, "can not stop transfer of Tx descriptor\n"); /* stop transfer of Rx descriptors */ SK_IF_WRITE_4(sc_if, 0, SK_RXQ1_BMU_CSR, SK_RXBMU_RX_STOP); for (i = 0; i < SK_TIMEOUT; i++) { val = SK_IF_READ_4(sc_if, 0, SK_RXQ1_BMU_CSR); if ((val & SK_RXBMU_RX_STOP) == 0) break; DELAY(1); } if (i == SK_TIMEOUT) device_printf(sc_if->sk_if_dev, "can not stop transfer of Rx descriptor\n"); if (sc_if->sk_phytype == SK_PHYTYPE_BCOM) { /* Put PHY back into reset. */ val = sk_win_read_4(sc, SK_GPIO); if (sc_if->sk_port == SK_PORT_A) { val |= SK_GPIO_DIR0; val &= ~SK_GPIO_DAT0; } else { val |= SK_GPIO_DIR2; val &= ~SK_GPIO_DAT2; } sk_win_write_4(sc, SK_GPIO, val); } /* Turn off various components of this interface. */ SK_XM_SETBIT_2(sc_if, XM_GPIO, XM_GPIO_RESETMAC); switch (sc->sk_type) { case SK_GENESIS: SK_IF_WRITE_2(sc_if, 0, SK_TXF1_MACCTL, SK_TXMACCTL_XMAC_RESET); SK_IF_WRITE_4(sc_if, 0, SK_RXF1_CTL, SK_FIFO_RESET); break; case SK_YUKON: case SK_YUKON_LITE: case SK_YUKON_LP: SK_IF_WRITE_1(sc_if,0, SK_RXMF1_CTRL_TEST, SK_RFCTL_RESET_SET); SK_IF_WRITE_1(sc_if,0, SK_TXMF1_CTRL_TEST, SK_TFCTL_RESET_SET); break; } SK_IF_WRITE_4(sc_if, 0, SK_RXQ1_BMU_CSR, SK_RXBMU_OFFLINE); SK_IF_WRITE_4(sc_if, 0, SK_RXRB1_CTLTST, SK_RBCTL_RESET|SK_RBCTL_OFF); SK_IF_WRITE_4(sc_if, 1, SK_TXQS1_BMU_CSR, SK_TXBMU_OFFLINE); SK_IF_WRITE_4(sc_if, 1, SK_TXRBS1_CTLTST, SK_RBCTL_RESET|SK_RBCTL_OFF); SK_IF_WRITE_1(sc_if, 0, SK_TXAR1_COUNTERCTL, SK_TXARCTL_OFF); SK_IF_WRITE_1(sc_if, 0, SK_RXLED1_CTL, SK_RXLEDCTL_COUNTER_STOP); SK_IF_WRITE_1(sc_if, 0, SK_TXLED1_CTL, SK_RXLEDCTL_COUNTER_STOP); SK_IF_WRITE_1(sc_if, 0, SK_LINKLED1_CTL, SK_LINKLED_OFF); SK_IF_WRITE_1(sc_if, 0, SK_LINKLED1_CTL, SK_LINKLED_LINKSYNC_OFF); /* Disable interrupts */ if (sc_if->sk_port == SK_PORT_A) sc->sk_intrmask &= ~SK_INTRS1; else sc->sk_intrmask &= ~SK_INTRS2; CSR_WRITE_4(sc, SK_IMR, sc->sk_intrmask); SK_XM_READ_2(sc_if, XM_ISR); SK_XM_WRITE_2(sc_if, XM_IMR, 0xFFFF); /* Free RX and TX mbufs still in the queues. */ for (i = 0; i < SK_RX_RING_CNT; i++) { rxd = &sc_if->sk_cdata.sk_rxdesc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->sk_cdata.sk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->sk_cdata.sk_rx_tag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } for (i = 0; i < SK_JUMBO_RX_RING_CNT; i++) { jrxd = &sc_if->sk_cdata.sk_jumbo_rxdesc[i]; if (jrxd->rx_m != NULL) { bus_dmamap_sync(sc_if->sk_cdata.sk_jumbo_rx_tag, jrxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->sk_cdata.sk_jumbo_rx_tag, jrxd->rx_dmamap); m_freem(jrxd->rx_m); jrxd->rx_m = NULL; } } for (i = 0; i < SK_TX_RING_CNT; i++) { txd = &sc_if->sk_cdata.sk_txdesc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc_if->sk_cdata.sk_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc_if->sk_cdata.sk_tx_tag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } ifp->if_drv_flags &= ~(IFF_DRV_RUNNING|IFF_DRV_OACTIVE); return; } static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high) { int error, value; if (!arg1) return (EINVAL); value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || !req->newptr) return (error); if (value < low || value > high) return (EINVAL); *(int *)arg1 = value; return (0); } static int sysctl_hw_sk_int_mod(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, SK_IM_MIN, SK_IM_MAX)); } Index: head/sys/kern/kern_mtxpool.c =================================================================== --- head/sys/kern/kern_mtxpool.c (revision 170034) +++ head/sys/kern/kern_mtxpool.c (revision 170035) @@ -1,218 +1,218 @@ /*- * Copyright (c) 2001 Matthew Dillon. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Mutex pool routines. These routines are designed to be used as short - * term leaf mutexes (e.g. the last mutex you might aquire other then + * term leaf mutexes (e.g. the last mutex you might acquire other then * calling msleep()). They operate using a shared pool. A mutex is chosen * from the pool based on the supplied pointer (which may or may not be * valid). * * Advantages: * - no structural overhead. Mutexes can be associated with structures * without adding bloat to the structures. * - mutexes can be obtained for invalid pointers, useful when uses * mutexes to interlock destructor ops. * - no initialization/destructor overhead. * - can be used with msleep. * * Disadvantages: * - should generally only be used as leaf mutexes. * - pool/pool dependancy ordering cannot be depended on. * - possible L1 cache mastersip contention between cpus. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_MTXPOOL, "mtx_pool", "mutex pool"); /* Pool sizes must be a power of two */ #ifndef MTX_POOL_LOCKBUILDER_SIZE #define MTX_POOL_LOCKBUILDER_SIZE 128 #endif #ifndef MTX_POOL_SLEEP_SIZE #define MTX_POOL_SLEEP_SIZE 128 #endif struct mtxpool_header { int mtxpool_size; int mtxpool_mask; int mtxpool_shift; int mtxpool_next; }; struct mtx_pool { struct mtxpool_header mtx_pool_header; struct mtx mtx_pool_ary[1]; }; static struct mtx_pool_lockbuilder { struct mtxpool_header mtx_pool_header; struct mtx mtx_pool_ary[MTX_POOL_LOCKBUILDER_SIZE]; } lockbuilder_pool; #define mtx_pool_size mtx_pool_header.mtxpool_size #define mtx_pool_mask mtx_pool_header.mtxpool_mask #define mtx_pool_shift mtx_pool_header.mtxpool_shift #define mtx_pool_next mtx_pool_header.mtxpool_next struct mtx_pool *mtxpool_sleep; struct mtx_pool *mtxpool_lockbuilder; #if UINTPTR_MAX == UINT64_MAX /* 64 bits */ # define POINTER_BITS 64 # define HASH_MULTIPLIER 11400714819323198485u /* (2^64)*(sqrt(5)-1)/2 */ #else /* assume 32 bits */ # define POINTER_BITS 32 # define HASH_MULTIPLIER 2654435769u /* (2^32)*(sqrt(5)-1)/2 */ #endif /* * Return the (shared) pool mutex associated with the specified address. * The returned mutex is a leaf level mutex, meaning that if you obtain it * you cannot obtain any other mutexes until you release it. You can * legally msleep() on the mutex. */ struct mtx * mtx_pool_find(struct mtx_pool *pool, void *ptr) { int p; KASSERT(pool != NULL, ("_mtx_pool_find(): null pool")); /* * Fibonacci hash, see Knuth's * _Art of Computer Programming, Volume 3 / Sorting and Searching_ */ p = ((HASH_MULTIPLIER * (uintptr_t)ptr) >> pool->mtx_pool_shift) & pool->mtx_pool_mask; return (&pool->mtx_pool_ary[p]); } static void mtx_pool_initialize(struct mtx_pool *pool, const char *mtx_name, int pool_size, int opts) { int i, maskbits; pool->mtx_pool_size = pool_size; pool->mtx_pool_mask = pool_size - 1; for (i = 1, maskbits = 0; (i & pool_size) == 0; i = i << 1) maskbits++; pool->mtx_pool_shift = POINTER_BITS - maskbits; pool->mtx_pool_next = 0; for (i = 0; i < pool_size; ++i) mtx_init(&pool->mtx_pool_ary[i], mtx_name, NULL, opts); } struct mtx_pool * mtx_pool_create(const char *mtx_name, int pool_size, int opts) { struct mtx_pool *pool; if (pool_size <= 0 || !powerof2(pool_size)) { printf("WARNING: %s pool size is not a power of 2.\n", mtx_name); pool_size = 128; } MALLOC(pool, struct mtx_pool *, sizeof (struct mtx_pool) + ((pool_size - 1) * sizeof (struct mtx)), M_MTXPOOL, M_WAITOK | M_ZERO); mtx_pool_initialize(pool, mtx_name, pool_size, opts); return pool; } void mtx_pool_destroy(struct mtx_pool **poolp) { int i; struct mtx_pool *pool = *poolp; for (i = pool->mtx_pool_size - 1; i >= 0; --i) mtx_destroy(&pool->mtx_pool_ary[i]); FREE(pool, M_MTXPOOL); *poolp = NULL; } static void mtx_pool_setup_static(void *dummy __unused) { mtx_pool_initialize((struct mtx_pool *)&lockbuilder_pool, "lockbuilder mtxpool", MTX_POOL_LOCKBUILDER_SIZE, MTX_DEF | MTX_NOWITNESS | MTX_QUIET); mtxpool_lockbuilder = (struct mtx_pool *)&lockbuilder_pool; } static void mtx_pool_setup_dynamic(void *dummy __unused) { mtxpool_sleep = mtx_pool_create("sleep mtxpool", MTX_POOL_SLEEP_SIZE, MTX_DEF); } /* * Obtain a (shared) mutex from the pool. The returned mutex is a leaf * level mutex, meaning that if you obtain it you cannot obtain any other * mutexes until you release it. You can legally msleep() on the mutex. */ struct mtx * mtx_pool_alloc(struct mtx_pool *pool) { int i; KASSERT(pool != NULL, ("mtx_pool_alloc(): null pool")); /* * mtx_pool_next is unprotected against multiple accesses, * but simultaneous access by two CPUs should not be very * harmful. */ i = pool->mtx_pool_next; pool->mtx_pool_next = (i + 1) & pool->mtx_pool_mask; return (&pool->mtx_pool_ary[i]); } /* * The lockbuilder pool must be initialized early because the lockmgr * and sx locks depend on it. The sx locks are used in the kernel * memory allocator. The lockmgr subsystem is initialized by * SYSINIT(..., SI_SUB_LOCKMGR, ...). * * We can't call MALLOC() to dynamically allocate the sleep pool * until after kmeminit() has been called, which is done by * SYSINIT(..., SI_SUB_KMEM, ...). */ SYSINIT(mtxpooli1, SI_SUB_MTX_POOL_STATIC, SI_ORDER_FIRST, mtx_pool_setup_static, NULL); SYSINIT(mtxpooli2, SI_SUB_MTX_POOL_DYNAMIC, SI_ORDER_FIRST, mtx_pool_setup_dynamic, NULL); Index: head/sys/kern/kern_resource.c =================================================================== --- head/sys/kern/kern_resource.c (revision 170034) +++ head/sys/kern/kern_resource.c (revision 170035) @@ -1,1255 +1,1255 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) static struct mtx uihashtbl_mtx; static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp); static int donice(struct thread *td, struct proc *chgp, int n); static struct uidinfo *uilookup(uid_t uid); /* * Resource controls and accounting. */ #ifndef _SYS_SYSPROTO_H_ struct getpriority_args { int which; int who; }; #endif int getpriority(td, uap) struct thread *td; register struct getpriority_args *uap; { struct proc *p; struct pgrp *pg; int error, low; error = 0; low = PRIO_MAX + 1; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) low = td->td_proc->p_nice; else { p = pfind(uap->who); if (p == NULL) break; if (p_cansee(td, p) == 0) low = p->p_nice; PROC_UNLOCK(p); } break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = td->td_proc->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (!p_cansee(td, p)) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { /* Do not bother to check PRS_NEW processes */ if (p->p_state == PRS_NEW) continue; PROC_LOCK(p); if (!p_cansee(td, p) && p->p_ucred->cr_uid == uap->who) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (low == PRIO_MAX + 1 && error == 0) error = ESRCH; td->td_retval[0] = low; return (error); } #ifndef _SYS_SYSPROTO_H_ struct setpriority_args { int which; int who; int prio; }; #endif int setpriority(td, uap) struct thread *td; struct setpriority_args *uap; { struct proc *curp, *p; struct pgrp *pg; int found = 0, error = 0; curp = td->td_proc; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) { PROC_LOCK(curp); error = donice(td, curp, uap->prio); PROC_UNLOCK(curp); } else { p = pfind(uap->who); if (p == 0) break; if (p_cansee(td, p) == 0) error = donice(td, p, uap->prio); PROC_UNLOCK(p); } found++; break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = curp->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (!p_cansee(td, p)) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_ucred->cr_uid == uap->who && !p_cansee(td, p)) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (found == 0 && error == 0) error = ESRCH; return (error); } /* * Set "nice" for a (whole) process. */ static int donice(struct thread *td, struct proc *p, int n) { int error; PROC_LOCK_ASSERT(p, MA_OWNED); if ((error = p_cansched(td, p))) return (error); if (n > PRIO_MAX) n = PRIO_MAX; if (n < PRIO_MIN) n = PRIO_MIN; if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) return (EACCES); mtx_lock_spin(&sched_lock); sched_nice(p, n); mtx_unlock_spin(&sched_lock); return (0); } /* * Set realtime priority for LWP. */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_thread_args { int function; lwpid_t lwpid; struct rtprio *rtp; }; #endif int rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) { struct proc *curp; struct proc *p; struct rtprio rtp; struct thread *td1; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; curp = td->td_proc; /* * Though lwpid is unique, only current process is supported * since there is no efficient way to look up a LWP yet. */ p = curp; PROC_LOCK(p); switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; mtx_lock_spin(&sched_lock); if (uap->lwpid == 0 || uap->lwpid == td->td_tid) td1 = td; else td1 = thread_find(p, uap->lwpid); if (td1 != NULL) pri_to_rtp(td1, &rtp); else error = ESRCH; mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* Disallow setting rtprio in most cases if not superuser. */ if (priv_check(td, PRIV_SCHED_RTPRIO) != 0) { /* can't set realtime priority */ /* * Realtime priority has to be restricted for reasons which should be * obvious. However, for idle priority, there is a potential for * system deadlock if an idleprio process gains a lock on a resource * that other processes need (and the idleprio process can't run * due to a CPU-bound normal process). Fix me! XXX */ #if 0 if (RTP_PRIO_IS_REALTIME(rtp.type)) { #else if (rtp.type != RTP_PRIO_NORMAL) { #endif error = EPERM; break; } } mtx_lock_spin(&sched_lock); if (uap->lwpid == 0 || uap->lwpid == td->td_tid) td1 = td; else td1 = thread_find(p, uap->lwpid); if (td1 != NULL) error = rtp_to_pri(&rtp, td1); else error = ESRCH; mtx_unlock_spin(&sched_lock); break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } /* * Set realtime priority. */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_args { int function; pid_t pid; struct rtprio *rtp; }; #endif int rtprio(td, uap) struct thread *td; /* curthread */ register struct rtprio_args *uap; { struct proc *curp; struct proc *p; struct thread *tdp; struct rtprio rtp; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; curp = td->td_proc; if (uap->pid == 0) { p = curp; PROC_LOCK(p); } else { p = pfind(uap->pid); if (p == NULL) return (ESRCH); } switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; mtx_lock_spin(&sched_lock); /* * Return OUR priority if no pid specified, * or if one is, report the highest priority * in the process. There isn't much more you can do as * there is only room to return a single priority. * XXXKSE: maybe need a new interface to report * priorities of multiple system scope threads. * Note: specifying our own pid is not the same * as leaving it zero. */ if (uap->pid == 0) { pri_to_rtp(td, &rtp); } else { struct rtprio rtp2; rtp.type = RTP_PRIO_IDLE; rtp.prio = RTP_PRIO_MAX; FOREACH_THREAD_IN_PROC(p, tdp) { pri_to_rtp(tdp, &rtp2); if (rtp2.type < rtp.type || (rtp2.type == rtp.type && rtp2.prio < rtp.prio)) { rtp.type = rtp2.type; rtp.prio = rtp2.prio; } } } mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* Disallow setting rtprio in most cases if not superuser. */ if (priv_check(td, PRIV_SCHED_RTPRIO) != 0) { /* can't set someone else's */ if (uap->pid) { error = EPERM; break; } /* can't set realtime priority */ /* * Realtime priority has to be restricted for reasons which should be * obvious. However, for idle priority, there is a potential for * system deadlock if an idleprio process gains a lock on a resource * that other processes need (and the idleprio process can't run * due to a CPU-bound normal process). Fix me! XXX */ #if 0 if (RTP_PRIO_IS_REALTIME(rtp.type)) { #else if (rtp.type != RTP_PRIO_NORMAL) { #endif error = EPERM; break; } } /* * If we are setting our own priority, set just our * thread but if we are doing another process, * do all the threads on that process. If we * specify our own pid we do the latter. */ mtx_lock_spin(&sched_lock); if (uap->pid == 0) { error = rtp_to_pri(&rtp, td); } else { FOREACH_THREAD_IN_PROC(p, td) { if ((error = rtp_to_pri(&rtp, td)) != 0) break; } } mtx_unlock_spin(&sched_lock); break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } int rtp_to_pri(struct rtprio *rtp, struct thread *td) { u_char newpri; mtx_assert(&sched_lock, MA_OWNED); if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); switch (RTP_PRIO_BASE(rtp->type)) { case RTP_PRIO_REALTIME: newpri = PRI_MIN_REALTIME + rtp->prio; break; case RTP_PRIO_NORMAL: newpri = PRI_MIN_TIMESHARE + rtp->prio; break; case RTP_PRIO_IDLE: newpri = PRI_MIN_IDLE + rtp->prio; break; default: return (EINVAL); } sched_class(td, rtp->type); /* XXX fix */ sched_user_prio(td, newpri); if (curthread == td) sched_prio(curthread, td->td_user_pri); /* XXX dubious */ return (0); } void pri_to_rtp(struct thread *td, struct rtprio *rtp) { mtx_assert(&sched_lock, MA_OWNED); switch (PRI_BASE(td->td_pri_class)) { case PRI_REALTIME: rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; break; case PRI_TIMESHARE: rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; break; case PRI_IDLE: rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; break; default: break; } rtp->type = td->td_pri_class; } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct osetrlimit_args { u_int which; struct orlimit *rlp; }; #endif int osetrlimit(td, uap) struct thread *td; register struct osetrlimit_args *uap; { struct orlimit olim; struct rlimit lim; int error; if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) return (error); lim.rlim_cur = olim.rlim_cur; lim.rlim_max = olim.rlim_max; error = kern_setrlimit(td, uap->which, &lim); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ogetrlimit_args { u_int which; struct orlimit *rlp; }; #endif int ogetrlimit(td, uap) struct thread *td; register struct ogetrlimit_args *uap; { struct orlimit olim; struct rlimit rl; struct proc *p; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); p = td->td_proc; PROC_LOCK(p); lim_rlimit(p, uap->which, &rl); PROC_UNLOCK(p); /* * XXX would be more correct to convert only RLIM_INFINITY to the * old RLIM_INFINITY and fail with EOVERFLOW for other larger * values. Most 64->32 and 32->16 conversions, including not * unimportant ones of uids are even more broken than what we * do here (they blindly truncate). We don't do this correctly * here since we have little experience with EOVERFLOW yet. * Elsewhere, getuid() can't fail... */ olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; error = copyout(&olim, uap->rlp, sizeof(olim)); return (error); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct __setrlimit_args { u_int which; struct rlimit *rlp; }; #endif int setrlimit(td, uap) struct thread *td; register struct __setrlimit_args *uap; { struct rlimit alim; int error; if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) return (error); error = kern_setrlimit(td, uap->which, &alim); return (error); } int kern_setrlimit(td, which, limp) struct thread *td; u_int which; struct rlimit *limp; { struct plimit *newlim, *oldlim; struct proc *p; register struct rlimit *alimp; rlim_t oldssiz; int error; if (which >= RLIM_NLIMITS) return (EINVAL); /* * Preserve historical bugs by treating negative limits as unsigned. */ if (limp->rlim_cur < 0) limp->rlim_cur = RLIM_INFINITY; if (limp->rlim_max < 0) limp->rlim_max = RLIM_INFINITY; oldssiz = 0; p = td->td_proc; newlim = lim_alloc(); PROC_LOCK(p); oldlim = p->p_limit; alimp = &oldlim->pl_rlimit[which]; if (limp->rlim_cur > alimp->rlim_max || limp->rlim_max > alimp->rlim_max) if ((error = priv_check_cred(td->td_ucred, PRIV_PROC_SETRLIMIT, SUSER_ALLOWJAIL))) { PROC_UNLOCK(p); lim_free(newlim); return (error); } if (limp->rlim_cur > limp->rlim_max) limp->rlim_cur = limp->rlim_max; lim_copy(newlim, oldlim); alimp = &newlim->pl_rlimit[which]; switch (which) { case RLIMIT_CPU: mtx_lock_spin(&sched_lock); p->p_cpulimit = limp->rlim_cur; mtx_unlock_spin(&sched_lock); break; case RLIMIT_DATA: if (limp->rlim_cur > maxdsiz) limp->rlim_cur = maxdsiz; if (limp->rlim_max > maxdsiz) limp->rlim_max = maxdsiz; break; case RLIMIT_STACK: if (limp->rlim_cur > maxssiz) limp->rlim_cur = maxssiz; if (limp->rlim_max > maxssiz) limp->rlim_max = maxssiz; oldssiz = alimp->rlim_cur; break; case RLIMIT_NOFILE: if (limp->rlim_cur > maxfilesperproc) limp->rlim_cur = maxfilesperproc; if (limp->rlim_max > maxfilesperproc) limp->rlim_max = maxfilesperproc; break; case RLIMIT_NPROC: if (limp->rlim_cur > maxprocperuid) limp->rlim_cur = maxprocperuid; if (limp->rlim_max > maxprocperuid) limp->rlim_max = maxprocperuid; if (limp->rlim_cur < 1) limp->rlim_cur = 1; if (limp->rlim_max < 1) limp->rlim_max = 1; break; } if (td->td_proc->p_sysent->sv_fixlimit != NULL) td->td_proc->p_sysent->sv_fixlimit(limp, which); *alimp = *limp; p->p_limit = newlim; PROC_UNLOCK(p); lim_free(oldlim); if (which == RLIMIT_STACK) { /* * Stack is allocated to the max at exec time with only * "rlim_cur" bytes accessible. If stack limit is going * up make more accessible, if going down make inaccessible. */ if (limp->rlim_cur != oldssiz) { vm_offset_t addr; vm_size_t size; vm_prot_t prot; if (limp->rlim_cur > oldssiz) { prot = p->p_sysent->sv_stackprot; size = limp->rlim_cur - oldssiz; addr = p->p_sysent->sv_usrstack - limp->rlim_cur; } else { prot = VM_PROT_NONE; size = oldssiz - limp->rlim_cur; addr = p->p_sysent->sv_usrstack - oldssiz; } addr = trunc_page(addr); size = round_page(size); (void)vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, FALSE); } } return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getrlimit_args { u_int which; struct rlimit *rlp; }; #endif /* ARGSUSED */ int getrlimit(td, uap) struct thread *td; register struct __getrlimit_args *uap; { struct rlimit rlim; struct proc *p; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); p = td->td_proc; PROC_LOCK(p); lim_rlimit(p, uap->which, &rlim); PROC_UNLOCK(p); error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); return (error); } /* * Transform the running time and tick information for children of proc p * into user and system time usage. */ void calccru(p, up, sp) struct proc *p; struct timeval *up; struct timeval *sp; { PROC_LOCK_ASSERT(p, MA_OWNED); calcru1(p, &p->p_crux, up, sp); } /* * Transform the running time and tick information in proc p into user * and system time usage. If appropriate, include the current time slice * on this CPU. */ void calcru(struct proc *p, struct timeval *up, struct timeval *sp) { struct rusage_ext rux; struct thread *td; uint64_t u; PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&sched_lock, MA_NOTOWNED); mtx_lock_spin(&sched_lock); /* * If we are getting stats for the current process, then add in the * stats that this thread has accumulated in its current time slice. * We reset the thread and CPU state as if we had performed a context * switch right here. */ if (curthread->td_proc == p) { td = curthread; u = cpu_ticks(); p->p_rux.rux_runtime += u - PCPU_GET(switchtime); PCPU_SET(switchtime, u); p->p_rux.rux_uticks += td->td_uticks; td->td_uticks = 0; p->p_rux.rux_iticks += td->td_iticks; td->td_iticks = 0; p->p_rux.rux_sticks += td->td_sticks; td->td_sticks = 0; } /* Work on a copy of p_rux so we can let go of sched_lock */ rux = p->p_rux; mtx_unlock_spin(&sched_lock); calcru1(p, &rux, up, sp); /* Update the result from the p_rux copy */ p->p_rux.rux_uu = rux.rux_uu; p->p_rux.rux_su = rux.rux_su; p->p_rux.rux_tu = rux.rux_tu; } static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp) { /* {user, system, interrupt, total} {ticks, usec}: */ u_int64_t ut, uu, st, su, it, tt, tu; ut = ruxp->rux_uticks; st = ruxp->rux_sticks; it = ruxp->rux_iticks; tt = ut + st + it; if (tt == 0) { /* Avoid divide by zero */ st = 1; tt = 1; } tu = cputick2usec(ruxp->rux_runtime); if ((int64_t)tu < 0) { /* XXX: this should be an assert /phk */ printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", (intmax_t)tu, p->p_pid, p->p_comm); tu = ruxp->rux_tu; } if (tu >= ruxp->rux_tu) { /* * The normal case, time increased. * Enforce monotonicity of bucketed numbers. */ uu = (tu * ut) / tt; if (uu < ruxp->rux_uu) uu = ruxp->rux_uu; su = (tu * st) / tt; if (su < ruxp->rux_su) su = ruxp->rux_su; } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { /* * When we calibrate the cputicker, it is not uncommon to * see the presumably fixed frequency increase slightly over * time as a result of thermal stabilization and NTP * discipline (of the reference clock). We therefore ignore * a bit of backwards slop because we expect to catch up * shortly. We use a 3 microsecond limit to catch low * counts and a 1% limit for high counts. */ uu = ruxp->rux_uu; su = ruxp->rux_su; tu = ruxp->rux_tu; } else { /* tu < ruxp->rux_tu */ /* * What happene here was likely that a laptop, which ran at * a reduced clock frequency at boot, kicked into high gear. * The wisdom of spamming this message in that case is * dubious, but it might also be indicative of something * serious, so lets keep it and hope laptops can be made * more truthful about their CPU speed via ACPI. */ printf("calcru: runtime went backwards from %ju usec " "to %ju usec for pid %d (%s)\n", (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, p->p_pid, p->p_comm); uu = (tu * ut) / tt; su = (tu * st) / tt; } ruxp->rux_uu = uu; ruxp->rux_su = su; ruxp->rux_tu = tu; up->tv_sec = uu / 1000000; up->tv_usec = uu % 1000000; sp->tv_sec = su / 1000000; sp->tv_usec = su % 1000000; } #ifndef _SYS_SYSPROTO_H_ struct getrusage_args { int who; struct rusage *rusage; }; #endif int getrusage(td, uap) register struct thread *td; register struct getrusage_args *uap; { struct rusage ru; int error; error = kern_getrusage(td, uap->who, &ru); if (error == 0) error = copyout(&ru, uap->rusage, sizeof(struct rusage)); return (error); } int kern_getrusage(td, who, rup) struct thread *td; int who; struct rusage *rup; { struct proc *p; p = td->td_proc; PROC_LOCK(p); switch (who) { case RUSAGE_SELF: *rup = p->p_stats->p_ru; calcru(p, &rup->ru_utime, &rup->ru_stime); break; case RUSAGE_CHILDREN: *rup = p->p_stats->p_cru; calccru(p, &rup->ru_utime, &rup->ru_stime); break; default: PROC_UNLOCK(p); return (EINVAL); } PROC_UNLOCK(p); return (0); } void ruadd(ru, rux, ru2, rux2) struct rusage *ru; struct rusage_ext *rux; struct rusage *ru2; struct rusage_ext *rux2; { register long *ip, *ip2; register int i; rux->rux_runtime += rux2->rux_runtime; rux->rux_uticks += rux2->rux_uticks; rux->rux_sticks += rux2->rux_sticks; rux->rux_iticks += rux2->rux_iticks; rux->rux_uu += rux2->rux_uu; rux->rux_su += rux2->rux_su; rux->rux_tu += rux2->rux_tu; if (ru->ru_maxrss < ru2->ru_maxrss) ru->ru_maxrss = ru2->ru_maxrss; ip = &ru->ru_first; ip2 = &ru2->ru_first; for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) *ip++ += *ip2++; } /* * Allocate a new resource limits structure and initialize its * reference count and mutex pointer. */ struct plimit * lim_alloc() { struct plimit *limp; limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); refcount_init(&limp->pl_refcnt, 1); return (limp); } struct plimit * lim_hold(limp) struct plimit *limp; { refcount_acquire(&limp->pl_refcnt); return (limp); } void lim_free(limp) struct plimit *limp; { KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); if (refcount_release(&limp->pl_refcnt)) free((void *)limp, M_PLIMIT); } /* * Make a copy of the plimit structure. * We share these structures copy-on-write after fork. */ void lim_copy(dst, src) struct plimit *dst, *src; { KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); } /* * Return the hard limit for a particular system resource. The * which parameter specifies the index into the rlimit array. */ rlim_t lim_max(struct proc *p, int which) { struct rlimit rl; lim_rlimit(p, which, &rl); return (rl.rlim_max); } /* * Return the current (soft) limit for a particular system resource. * The which parameter which specifies the index into the rlimit array */ rlim_t lim_cur(struct proc *p, int which) { struct rlimit rl; lim_rlimit(p, which, &rl); return (rl.rlim_cur); } /* * Return a copy of the entire rlimit structure for the system limit * specified by 'which' in the rlimit structure pointed to by 'rlp'. */ void lim_rlimit(struct proc *p, int which, struct rlimit *rlp) { PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(which >= 0 && which < RLIM_NLIMITS, ("request for invalid resource limit")); *rlp = p->p_limit->pl_rlimit[which]; if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(rlp, which); } /* * Find the uidinfo structure for a uid. This structure is used to * track the total resource consumption (process count, socket buffer * size, etc.) for the uid and impose limits. */ void uihashinit() { uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF); } /* * Look up a uidinfo struct for the parameter uid. * uihashtbl_mtx must be locked. */ static struct uidinfo * uilookup(uid) uid_t uid; { struct uihashhead *uipp; struct uidinfo *uip; mtx_assert(&uihashtbl_mtx, MA_OWNED); uipp = UIHASH(uid); LIST_FOREACH(uip, uipp, ui_hash) if (uip->ui_uid == uid) break; return (uip); } /* * Find or allocate a struct uidinfo for a particular uid. * Increase refcount on uidinfo struct returned. * uifree() should be called on a struct uidinfo when released. */ struct uidinfo * uifind(uid) uid_t uid; { struct uidinfo *old_uip, *uip; mtx_lock(&uihashtbl_mtx); uip = uilookup(uid); if (uip == NULL) { mtx_unlock(&uihashtbl_mtx); uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); mtx_lock(&uihashtbl_mtx); /* * There's a chance someone created our uidinfo while we * were in malloc and not holding the lock, so we have to * make sure we don't insert a duplicate uidinfo. */ if ((old_uip = uilookup(uid)) != NULL) { /* Someone else beat us to it. */ free(uip, M_UIDINFO); uip = old_uip; } else { uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep); uip->ui_uid = uid; LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); } } uihold(uip); mtx_unlock(&uihashtbl_mtx); return (uip); } /* * Place another refcount on a uidinfo struct. */ void uihold(uip) struct uidinfo *uip; { UIDINFO_LOCK(uip); uip->ui_ref++; UIDINFO_UNLOCK(uip); } /*- * Since uidinfo structs have a long lifetime, we use an * opportunistic refcounting scheme to avoid locking the lookup hash * for each release. * * If the refcount hits 0, we need to free the structure, * which means we need to lock the hash. * Optimal case: * After locking the struct and lowering the refcount, if we find * that we don't need to free, simply unlock and return. * Suboptimal case: * If refcount lowering results in need to free, bump the count - * back up, lose the lock and aquire the locks in the proper + * back up, lose the lock and acquire the locks in the proper * order to try again. */ void uifree(uip) struct uidinfo *uip; { /* Prepare for optimal case. */ UIDINFO_LOCK(uip); if (--uip->ui_ref != 0) { UIDINFO_UNLOCK(uip); return; } /* Prepare for suboptimal case. */ uip->ui_ref++; UIDINFO_UNLOCK(uip); mtx_lock(&uihashtbl_mtx); UIDINFO_LOCK(uip); /* * We must subtract one from the count again because we backed out * our initial subtraction before dropping the lock. * Since another thread may have added a reference after we dropped the * initial lock we have to test for zero again. */ if (--uip->ui_ref == 0) { LIST_REMOVE(uip, ui_hash); mtx_unlock(&uihashtbl_mtx); if (uip->ui_sbsize != 0) printf("freeing uidinfo: uid = %d, sbsize = %jd\n", uip->ui_uid, (intmax_t)uip->ui_sbsize); if (uip->ui_proccnt != 0) printf("freeing uidinfo: uid = %d, proccnt = %ld\n", uip->ui_uid, uip->ui_proccnt); UIDINFO_UNLOCK(uip); FREE(uip, M_UIDINFO); return; } mtx_unlock(&uihashtbl_mtx); UIDINFO_UNLOCK(uip); } /* * Change the count associated with number of processes * a given user is using. When 'max' is 0, don't enforce a limit */ int chgproccnt(uip, diff, max) struct uidinfo *uip; int diff; int max; { UIDINFO_LOCK(uip); /* Don't allow them to exceed max, but allow subtraction. */ if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { UIDINFO_UNLOCK(uip); return (0); } uip->ui_proccnt += diff; if (uip->ui_proccnt < 0) printf("negative proccnt for uid = %d\n", uip->ui_uid); UIDINFO_UNLOCK(uip); return (1); } /* * Change the total socket buffer size a user has used. */ int chgsbsize(uip, hiwat, to, max) struct uidinfo *uip; u_int *hiwat; u_int to; rlim_t max; { rlim_t new; UIDINFO_LOCK(uip); new = uip->ui_sbsize + to - *hiwat; /* Don't allow them to exceed max, but allow subtraction. */ if (to > *hiwat && new > max) { UIDINFO_UNLOCK(uip); return (0); } uip->ui_sbsize = new; UIDINFO_UNLOCK(uip); *hiwat = to; if (new < 0) printf("negative sbsize for uid = %d\n", uip->ui_uid); return (1); } Index: head/sys/kern/vfs_lookup.c =================================================================== --- head/sys/kern/vfs_lookup.c (revision 170034) +++ head/sys/kern/vfs_lookup.c (revision 170035) @@ -1,1098 +1,1098 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_mac.h" #include "opt_vfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #define NAMEI_DIAGNOSTIC 1 #undef NAMEI_DIAGNOSTIC /* * Allocation zone for namei */ uma_zone_t namei_zone; /* * Placeholder vnode for mp traversal */ static struct vnode *vp_crossmp; static void nameiinit(void *dummy __unused) { int error; namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); error = getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); if (error != 0) panic("nameiinit: getnewvnode"); vp_crossmp->v_vnlock->lk_flags &= ~LK_NOSHARE; } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL) #ifdef LOOKUP_SHARED static int lookup_shared = 1; #else static int lookup_shared = 0; #endif SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, "Enables/Disables shared locks for path name translation"); /* * Convert a pathname into a pointer to a locked vnode. * * The FOLLOW flag is set when symbolic links are to be followed * when they occur at the end of the name translation process. * Symbolic links are always followed for all other pathname * components other than the last. * * The segflg defines whether the name is to be copied from user * space or kernel space. * * Overall outline of namei: * * copy in name * get starting directory * while (!done && !error) { * call lookup to search path. * if symbolic link, massage name in buffer and continue * } */ int namei(struct nameidata *ndp) { struct filedesc *fdp; /* pointer to file descriptor state */ char *cp; /* pointer into pathname argument */ struct vnode *dp; /* the directory we are searching */ struct iovec aiov; /* uio for reading symbolic links */ struct uio auio; int error, linklen; struct componentname *cnp = &ndp->ni_cnd; struct thread *td = cnp->cn_thread; struct proc *p = td->td_proc; int vfslocked; KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0, ("NOT MPSAFE and Giant not held")); ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, ("namei: nameiop contaminated with flags")); KASSERT((cnp->cn_flags & OPMASK) == 0, ("namei: flags contaminated with nameiops")); if (!lookup_shared) cnp->cn_flags &= ~LOCKSHARED; fdp = p->p_fd; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. */ if ((cnp->cn_flags & HASBUF) == 0) cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (ndp->ni_segflg == UIO_SYSSPACE) error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, (size_t *)&ndp->ni_pathlen); else error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, (size_t *)&ndp->ni_pathlen); /* If we are auditing the kernel pathname, save the user pathname. */ if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH1); if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH2); /* * Don't allow empty pathnames. */ if (!error && *cnp->cn_pnbuf == '\0') error = ENOENT; if (error) { uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif ndp->ni_vp = NULL; return (error); } ndp->ni_loopcnt = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_NAMEI)) { KASSERT(cnp->cn_thread == curthread, ("namei not using curthread")); ktrnamei(cnp->cn_pnbuf); } #endif /* * Get starting point for the translation. */ FILEDESC_SLOCK(fdp); ndp->ni_rootdir = fdp->fd_rdir; ndp->ni_topdir = fdp->fd_jdir; dp = fdp->fd_cdir; vfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); FILEDESC_SUNLOCK(fdp); for (;;) { /* * Check if root directory should replace current directory. * Done at start of translation and after symbolic link. */ cnp->cn_nameptr = cnp->cn_pnbuf; if (*(cnp->cn_nameptr) == '/') { vrele(dp); VFS_UNLOCK_GIANT(vfslocked); while (*(cnp->cn_nameptr) == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } dp = ndp->ni_rootdir; vfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); } if (vfslocked) ndp->ni_cnd.cn_flags |= GIANTHELD; ndp->ni_startdir = dp; error = lookup(ndp); if (error) { uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif return (error); } vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; ndp->ni_cnd.cn_flags &= ~GIANTHELD; /* * Check for symbolic link */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif } else cnp->cn_flags |= HASBUF; if ((cnp->cn_flags & MPSAFE) == 0) { VFS_UNLOCK_GIANT(vfslocked); } else if (vfslocked) ndp->ni_cnd.cn_flags |= GIANTHELD; return (0); } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; } #ifdef MAC if ((cnp->cn_flags & NOMACCHECK) == 0) { error = mac_check_vnode_readlink(td->td_ucred, ndp->ni_vp); if (error) break; } #endif if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = (struct thread *)0; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); error = ENOENT; break; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); error = ENAMETOOLONG; break; } if (ndp->ni_pathlen > 1) { bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; vput(ndp->ni_vp); dp = ndp->ni_dvp; } uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif vput(ndp->ni_vp); ndp->ni_vp = NULL; vrele(ndp->ni_dvp); VFS_UNLOCK_GIANT(vfslocked); return (error); } static int compute_cn_lkflags(struct mount *mp, int lkflags) { if (mp == NULL || ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { lkflags &= ~LK_SHARED; lkflags |= LK_EXCLUSIVE; } return lkflags; } /* * Search a pathname. * This is a very central and rather complicated routine. * * The pathname is pointed to by ni_ptr and is of length ni_pathlen. * The starting directory is taken from ni_startdir. The pathname is * descended until done, or a symbolic link is encountered. The variable * ni_more is clear if the path is completed; it is set to one if a * symbolic link needing interpretation is encountered. * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it, the parent directory is returned * locked. If flag has WANTPARENT or'ed into it, the parent directory is * returned unlocked. Otherwise the parent directory is not returned. If * the target of the pathname exists and LOCKLEAF is or'ed into the flag * the target is returned locked, otherwise it is returned unlocked. * When creating or renaming and LOCKPARENT is specified, the target may not * be ".". When deleting and LOCKPARENT is specified, the target may be ".". * * Overall outline of lookup: * * dirloop: * identify next component of name at ndp->ni_ptr * handle degenerate case where name is null string * if .. and crossing mount points and on mounted filesys, find parent * call VOP_LOOKUP routine for next component name * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set * component vnode returned in ni_vp (if it exists), locked. * if result vnode is mounted on and crossing mount points, * find mounted on vnode * if more components of name, do next level at dirloop * return the answer in ni_vp, locked if LOCKLEAF set * if LOCKPARENT set, return locked parent in ni_dvp * if WANTPARENT set, return unlocked parent in ni_dvp */ int lookup(struct nameidata *ndp) { char *cp; /* pointer into pathname argument */ struct vnode *dp = 0; /* the directory we are searching */ struct vnode *tdp; /* saved dp */ struct mount *mp; /* mount table entry */ int docache; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int trailing_slash; int error = 0; int dpunlocked = 0; /* dp has already been unlocked */ struct componentname *cnp = &ndp->ni_cnd; struct thread *td = cnp->cn_thread; int vfslocked; /* VFS Giant state for child */ int dvfslocked; /* VFS Giant state for parent */ int tvfslocked; int lkflags_save; /* * Setup: break out flag bits into variables. */ dvfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; vfslocked = 0; ndp->ni_cnd.cn_flags &= ~GIANTHELD; wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; if (cnp->cn_nameiop == DELETE || (wantparent && cnp->cn_nameiop != CREATE && cnp->cn_nameiop != LOOKUP)) docache = 0; rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; ndp->ni_dvp = NULL; /* * We use shared locks until we hit the parent of the last cn then * we adjust based on the requesting flags. */ if (lookup_shared) cnp->cn_lkflags = LK_SHARED; else cnp->cn_lkflags = LK_EXCLUSIVE; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td); dirloop: /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. */ cnp->cn_consume = 0; for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) continue; cnp->cn_namelen = cp - cnp->cn_nameptr; if (cnp->cn_namelen > NAME_MAX) { error = ENAMETOOLONG; goto bad; } #ifdef NAMEI_DIAGNOSTIC { char c = *cp; *cp = '\0'; printf("{%s}: ", cnp->cn_nameptr); *cp = c; } #endif ndp->ni_pathlen -= cnp->cn_namelen; ndp->ni_next = cp; /* * Replace multiple slashes by a single slash and trailing slashes * by a null. This must be done before VOP_LOOKUP() because some * fs's don't know about trailing slashes. Remember if there were * trailing slashes to handle symlinks, existing non-directories * and non-existing files that won't be directories specially later. */ trailing_slash = 0; while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { cp++; ndp->ni_pathlen--; if (*cp == '\0') { trailing_slash = 1; *ndp->ni_next = '\0'; /* XXX for direnter() ... */ } } ndp->ni_next = cp; cnp->cn_flags |= MAKEENTRY; if (*cp == '\0' && docache == 0) cnp->cn_flags &= ~MAKEENTRY; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') cnp->cn_flags |= ISDOTDOT; else cnp->cn_flags &= ~ISDOTDOT; if (*ndp->ni_next == 0) cnp->cn_flags |= ISLASTCN; else cnp->cn_flags &= ~ISLASTCN; /* * Check for degenerate name (e.g. / or "") * which is a way of talking about a directory, * e.g. like "/." or ".". */ if (cnp->cn_nameptr[0] == '\0') { if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } if (cnp->cn_nameiop != LOOKUP) { error = EISDIR; goto bad; } if (wantparent) { ndp->ni_dvp = dp; VREF(dp); } ndp->ni_vp = dp; if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG(vnode, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG(vnode, dp, ARG_VNODE2); if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) VOP_UNLOCK(dp, 0, td); /* XXX This should probably move to the top of function. */ if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); goto success; } /* * Handle "..": four special cases. * 1. Return an error if this is the last component of * the name and the operation is DELETE or RENAME. * 2. If at root directory (e.g. after chroot) * or at absolute root directory * then ignore it so can't get out. * 3. If this vnode is the root of a mounted * filesystem, then replace it with the * vnode which was mounted on so we take the * .. in the other filesystem. * 4. If the vnode is the top directory of * the jail or chroot, don't let them out. */ if (cnp->cn_flags & ISDOTDOT) { if ((cnp->cn_flags & ISLASTCN) != 0 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EINVAL; goto bad; } for (;;) { if (dp == ndp->ni_rootdir || dp == ndp->ni_topdir || dp == rootvnode || ((dp->v_vflag & VV_ROOT) != 0 && (cnp->cn_flags & NOCROSSMOUNT) != 0)) { ndp->ni_dvp = dp; ndp->ni_vp = dp; vfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); goto nextname; } if ((dp->v_vflag & VV_ROOT) == 0) break; if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ error = EBADF; goto bad; } tdp = dp; dp = dp->v_mount->mnt_vnodecovered; tvfslocked = dvfslocked; dvfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); vput(tdp); VFS_UNLOCK_GIANT(tvfslocked); vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td); } } /* * We now have a segment name to search for, and a directory to search. */ unionlookup: #ifdef MAC if ((cnp->cn_flags & NOMACCHECK) == 0) { error = mac_check_vnode_lookup(td->td_ucred, dp, cnp); if (error) goto bad; } #endif ndp->ni_dvp = dp; ndp->ni_vp = NULL; ASSERT_VOP_LOCKED(dp, "lookup"); VNASSERT(vfslocked == 0, dp, ("lookup: vfslocked %d", vfslocked)); /* * If we have a shared lock we may need to upgrade the lock for the * last operation. */ if (dp != vp_crossmp && VOP_ISLOCKED(dp, td) == LK_SHARED && (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) vn_lock(dp, LK_UPGRADE|LK_RETRY, td); /* * If we're looking up the last component and we need an exclusive * lock, adjust our lkflags. */ if ((cnp->cn_flags & (ISLASTCN|LOCKSHARED|LOCKLEAF)) == (ISLASTCN|LOCKLEAF)) cnp->cn_lkflags = LK_EXCLUSIVE; #ifdef NAMEI_DIAGNOSTIC vprint("lookup in", dp); #endif lkflags_save = cnp->cn_lkflags; cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags); if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { cnp->cn_lkflags = lkflags_save; KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); #ifdef NAMEI_DIAGNOSTIC printf("not found\n"); #endif if ((error == ENOENT) && (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { tdp = dp; dp = dp->v_mount->mnt_vnodecovered; tvfslocked = dvfslocked; dvfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); vput(tdp); VFS_UNLOCK_GIANT(tvfslocked); vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td); goto unionlookup; } if (error != EJUSTRETURN) goto bad; /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (rdonly) { error = EROFS; goto bad; } if (*cp == '\0' && trailing_slash && !(cnp->cn_flags & WILLBEDIR)) { error = ENOENT; goto bad; } if ((cnp->cn_flags & LOCKPARENT) == 0) VOP_UNLOCK(dp, 0, td); /* * This is a temporary assert to make sure I know what the * behavior here was. */ KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, ("lookup: Unhandled case.")); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory vnode in ndp->ni_dvp. */ if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; VREF(ndp->ni_startdir); } goto success; } else cnp->cn_lkflags = lkflags_save; #ifdef NAMEI_DIAGNOSTIC printf("found\n"); #endif /* * Take into account any additional components consumed by * the underlying filesystem. */ if (cnp->cn_consume > 0) { cnp->cn_nameptr += cnp->cn_consume; ndp->ni_next += cnp->cn_consume; ndp->ni_pathlen -= cnp->cn_consume; cnp->cn_consume = 0; } dp = ndp->ni_vp; vfslocked = VFS_LOCK_GIANT(dp->v_mount); /* * Check to see if the vnode has been mounted on; * if so find the root of the mounted filesystem. */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { if (vfs_busy(mp, 0, 0, td)) continue; vput(dp); VFS_UNLOCK_GIANT(vfslocked); vfslocked = VFS_LOCK_GIANT(mp); if (dp != ndp->ni_dvp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); VFS_UNLOCK_GIANT(dvfslocked); dvfslocked = 0; vref(vp_crossmp); ndp->ni_dvp = vp_crossmp; error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), &tdp, td); vfs_unbusy(mp, td); if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT, td)) panic("vp_crossmp exclusively locked or reclaimed"); if (error) { dpunlocked = 1; goto bad2; } ndp->ni_vp = dp = tdp; } /* * Check for symbolic link */ if ((dp->v_type == VLNK) && ((cnp->cn_flags & FOLLOW) || trailing_slash || *ndp->ni_next == '/')) { cnp->cn_flags |= ISSYMLINK; if (dp->v_iflag & VI_DOOMED) { /* We can't know whether the directory was mounted with * NOSYMFOLLOW, so we can't follow safely. */ error = EBADF; goto bad2; } if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { error = EACCES; goto bad2; } /* * Symlink code always expects an unlocked dvp. */ if (ndp->ni_dvp != ndp->ni_vp) VOP_UNLOCK(ndp->ni_dvp, 0, td); goto success; } /* * Check for bogus trailing slashes. */ if (trailing_slash && dp->v_type != VDIR) { error = ENOTDIR; goto bad2; } nextname: /* * Not a symbolic link. If more pathname, * continue at next component, else return. */ KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', ("lookup: invalid path state.")); if (*ndp->ni_next == '/') { cnp->cn_nameptr = ndp->ni_next; while (*cnp->cn_nameptr == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); VFS_UNLOCK_GIANT(dvfslocked); dvfslocked = vfslocked; /* dp becomes dvp in dirloop */ vfslocked = 0; goto dirloop; } /* * Disallow directory write attempts on read-only filesystems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EROFS; goto bad2; } if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; VREF(ndp->ni_startdir); } if (!wantparent) { if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); VFS_UNLOCK_GIANT(dvfslocked); dvfslocked = 0; } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) VOP_UNLOCK(ndp->ni_dvp, 0, td); if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG(vnode, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG(vnode, dp, ARG_VNODE2); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp, 0, td); success: if (vfslocked && dvfslocked) VFS_UNLOCK_GIANT(dvfslocked); /* Only need one */ if (vfslocked || dvfslocked) ndp->ni_cnd.cn_flags |= GIANTHELD; return (0); bad2: if (dp != ndp->ni_dvp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); bad: if (!dpunlocked) vput(dp); VFS_UNLOCK_GIANT(vfslocked); VFS_UNLOCK_GIANT(dvfslocked); ndp->ni_cnd.cn_flags &= ~GIANTHELD; ndp->ni_vp = NULL; return (error); } /* * relookup - lookup a path name component - * Used by lookup to re-aquire things. + * Used by lookup to re-acquire things. */ int relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { struct thread *td = cnp->cn_thread; struct vnode *dp = 0; /* the directory we are searching */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int error = 0; KASSERT(cnp->cn_flags & ISLASTCN, ("relookup: Not given last component.")); /* * Setup: break out flag bits into variables. */ wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); KASSERT(wantparent, ("relookup: parent not wanted.")); rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; dp = dvp; cnp->cn_lkflags = LK_EXCLUSIVE; vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, td); /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. */ #ifdef NAMEI_DIAGNOSTIC printf("{%s}: ", cnp->cn_nameptr); #endif /* * Check for degenerate name (e.g. / or "") * which is a way of talking about a directory, * e.g. like "/." or ".". */ if (cnp->cn_nameptr[0] == '\0') { if (cnp->cn_nameiop != LOOKUP || wantparent) { error = EISDIR; goto bad; } if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } if (!(cnp->cn_flags & LOCKLEAF)) VOP_UNLOCK(dp, 0, td); *vpp = dp; /* XXX This should probably move to the top of function. */ if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); return (0); } if (cnp->cn_flags & ISDOTDOT) panic ("relookup: lookup on dot-dot"); /* * We now have a segment name to search for, and a directory to search. */ #ifdef NAMEI_DIAGNOSTIC vprint("search in:", dp); #endif if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { KASSERT(*vpp == NULL, ("leaf should be empty")); if (error != EJUSTRETURN) goto bad; /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (rdonly) { error = EROFS; goto bad; } /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); if ((cnp->cn_flags & LOCKPARENT) == 0) VOP_UNLOCK(dp, 0, td); /* * This is a temporary assert to make sure I know what the * behavior here was. */ KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, ("relookup: Unhandled case.")); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory vnode in ndp->ni_dvp. */ return (0); } dp = *vpp; /* * Disallow directory write attempts on read-only filesystems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { if (dvp == dp) vrele(dvp); else vput(dvp); error = EROFS; goto bad; } /* * Set the parent lock/ref state to the requested state. */ if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { if (wantparent) VOP_UNLOCK(dvp, 0, td); else vput(dvp); } else if (!wantparent) vrele(dvp); /* * Check for symbolic link */ KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), ("relookup: symlink found.\n")); /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp, 0, td); return (0); bad: vput(dp); *vpp = NULL; return (error); } /* * Free data allocated by namei(); see namei(9) for details. */ void NDFREE(struct nameidata *ndp, const u_int flags) { int unlock_dvp; int unlock_vp; unlock_dvp = 0; unlock_vp = 0; if (!(flags & NDF_NO_FREE_PNBUF) && (ndp->ni_cnd.cn_flags & HASBUF)) { uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } if (!(flags & NDF_NO_VP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) unlock_vp = 1; if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { if (unlock_vp) { vput(ndp->ni_vp); unlock_vp = 0; } else vrele(ndp->ni_vp); ndp->ni_vp = NULL; } if (unlock_vp) VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_thread); if (!(flags & NDF_NO_DVP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKPARENT) && ndp->ni_dvp != ndp->ni_vp) unlock_dvp = 1; if (!(flags & NDF_NO_DVP_RELE) && (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { if (unlock_dvp) { vput(ndp->ni_dvp); unlock_dvp = 0; } else vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; } if (unlock_dvp) VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_thread); if (!(flags & NDF_NO_STARTDIR_RELE) && (ndp->ni_cnd.cn_flags & SAVESTART)) { vrele(ndp->ni_startdir); ndp->ni_startdir = NULL; } } /* * Determine if there is a suitable alternate filename under the specified * prefix for the specified path. If the create flag is set, then the * alternate prefix will be used so long as the parent directory exists. * This is used by the various compatiblity ABIs so that Linux binaries prefer * files under /compat/linux for example. The chosen path (whether under * the prefix or under /) is returned in a kernel malloc'd buffer pointed * to by pathbuf. The caller is responsible for free'ing the buffer from * the M_TEMP bucket if one is returned. */ int kern_alternate_path(struct thread *td, const char *prefix, char *path, enum uio_seg pathseg, char **pathbuf, int create) { struct nameidata nd, ndroot; char *ptr, *buf, *cp; size_t len, sz; int error; buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); *pathbuf = buf; /* Copy the prefix into the new pathname as a starting point. */ len = strlcpy(buf, prefix, MAXPATHLEN); if (len >= MAXPATHLEN) { *pathbuf = NULL; free(buf, M_TEMP); return (EINVAL); } sz = MAXPATHLEN - len; ptr = buf + len; /* Append the filename to the prefix. */ if (pathseg == UIO_SYSSPACE) error = copystr(path, ptr, sz, &len); else error = copyinstr(path, ptr, sz, &len); if (error) { *pathbuf = NULL; free(buf, M_TEMP); return (error); } /* Only use a prefix with absolute pathnames. */ if (*ptr != '/') { error = EINVAL; goto keeporig; } /* * We know that there is a / somewhere in this pathname. * Search backwards for it, to find the file's parent dir * to see if it exists in the alternate tree. If it does, * and we want to create a file (cflag is set). We don't * need to worry about the root comparison in this case. */ if (create) { for (cp = &ptr[len] - 1; *cp != '/'; cp--); *cp = '\0'; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); error = namei(&nd); *cp = '/'; if (error != 0) goto keeporig; } else { NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); error = namei(&nd); if (error != 0) goto keeporig; /* * We now compare the vnode of the prefix to the one * vnode asked. If they resolve to be the same, then we * ignore the match so that the real root gets used. * This avoids the problem of traversing "../.." to find the * root directory and never finding it, because "/" resolves * to the emulation root directory. This is expensive :-( */ NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix, td); /* We shouldn't ever get an error from this namei(). */ error = namei(&ndroot); if (error == 0) { if (nd.ni_vp == ndroot.ni_vp) error = ENOENT; NDFREE(&ndroot, NDF_ONLY_PNBUF); vrele(ndroot.ni_vp); VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot)); } } NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(NDHASGIANT(&nd)); keeporig: /* If there was an error, use the original path name. */ if (error) bcopy(ptr, buf, len); return (error); } Index: head/sys/kern/vfs_subr.c =================================================================== --- head/sys/kern/vfs_subr.c (revision 170034) +++ head/sys/kern/vfs_subr.c (revision 170035) @@ -1,3967 +1,3967 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 */ /* * External virtual filesystem routines */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif static MALLOC_DEFINE(M_NETADDR, "subr_export_host", "Export host address structure"); static void delmntque(struct vnode *vp); static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo); static void syncer_shutdown(void *arg, int howto); static int vtryrecycle(struct vnode *vp); static void vbusy(struct vnode *vp); static void vinactive(struct vnode *, struct thread *); static void v_incr_usecount(struct vnode *); static void v_decr_usecount(struct vnode *); static void v_decr_useonly(struct vnode *); static void v_upgrade_usecount(struct vnode *); static void vfree(struct vnode *); static void vnlru_free(int); static void vdestroy(struct vnode *); static void vgonel(struct vnode *); static void vfs_knllock(void *arg); static void vfs_knlunlock(void *arg); static int vfs_knllocked(void *arg); /* * Enable Giant pushdown based on whether or not the vm is mpsafe in this * build. Without mpsafevm the buffer cache can not run Giant free. */ int mpsafe_vfs = 1; TUNABLE_INT("debug.mpsafevfs", &mpsafe_vfs); SYSCTL_INT(_debug, OID_AUTO, mpsafevfs, CTLFLAG_RD, &mpsafe_vfs, 0, "MPSAFE VFS"); /* * Number of vnodes in existence. Increased whenever getnewvnode() * allocates a new vnode, decreased on vdestroy() called on VI_DOOMed * vnode. */ static unsigned long numvnodes; SYSCTL_LONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); /* * Conversion tables for conversion from vnode types to inode formats * and back. */ enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[10] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT }; /* * List of vnodes that are ready for recycling. */ static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* * Free vnode target. Free vnodes may simply be files which have been stat'd * but not read. This is somewhat common, and a small cache of such files * should be kept to avoid recreation costs. */ static u_long wantfreevnodes; SYSCTL_LONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); /* Number of vnodes in the free list. */ static u_long freevnodes; SYSCTL_LONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); /* * Various variables used for debugging the new implementation of * reassignbuf(). * XXX these are probably of (very) limited utility now. */ static int reassignbufcalls; SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); /* * Cache for the mount type id assigned to NFS. This is used for * special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c. */ int nfs_mount_type = -1; /* To keep more than one thread at a time from running vfs_getnewfsid */ static struct mtx mntid_mtx; /* * Lock for any access to the following: * vnode_free_list * numvnodes * freevnodes */ static struct mtx vnode_free_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; static uma_zone_t vnodepoll_zone; /* Set to 1 to print out reclaim of active vnodes */ int prtactive; /* * The workitem queue. * * It is useful to delay writes of file data and filesystem metadata * for tens of seconds so that quickly created and deleted files need * not waste disk bandwidth being created and removed. To realize this, * we append vnodes to a "workitem" queue. When running with a soft * updates implementation, most pending metadata dependencies should * not wait for more than a few seconds. Thus, mounted on block devices * are delayed only about a half the time that file data is delayed. * Similarly, directory updates are more critical, so are only delayed * about a third the time that file data is delayed. Thus, there are * SYNCER_MAXDELAY queues that are processed round-robin at a rate of * one each second (driven off the filesystem syncer process). The * syncer_delayno variable indicates the next queue that is to be processed. * Items that need to be processed soon are placed in this queue: * * syncer_workitem_pending[syncer_delayno] * * A delay of fifteen seconds is done by placing the request fifteen * entries later in the queue: * * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] * */ static int syncer_delayno; static long syncer_mask; LIST_HEAD(synclist, bufobj); static struct synclist *syncer_workitem_pending; /* * The sync_mtx protects: * bo->bo_synclist * sync_vnode_count * syncer_delayno * syncer_state * syncer_workitem_pending * syncer_worklist_len * rushjob */ static struct mtx sync_mtx; #define SYNCER_MAXDELAY 32 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ static int syncdelay = 30; /* max time to delay syncing data */ static int filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); static int dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); static int metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); static int rushjob; /* number of slots to run ASAP */ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); /* * When shutting down the syncer, run it at four times normal speed. */ #define SYNCER_SHUTDOWN_SPEEDUP 4 static int sync_vnode_count; static int syncer_worklist_len; static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } syncer_state; /* * Number of vnodes we want to exist at any one time. This is mostly used * to size hash tables in vnode-related code. It is normally not used in * getnewvnode(), as wantfreevnodes is normally nonzero.) * * XXX desiredvnodes is historical cruft and should not exist. */ int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "Maximum number of vnodes"); SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Minimum number of vnodes (legacy)"); static int vnlru_nowhere; SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, &vnlru_nowhere, 0, "Number of times the vnlru process ran without success"); /* * Macros to control when a vnode is freed and recycled. All require * the vnode interlock. */ #define VCANRECYCLE(vp) (((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) #define VSHOULDFREE(vp) (!((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) #define VSHOULDBUSY(vp) (((vp)->v_iflag & VI_FREE) && (vp)->v_holdcnt) /* * Initialize the vnode management data structures. */ #ifndef MAXVNODES_MAX #define MAXVNODES_MAX 100000 #endif static void vntblinit(void *dummy __unused) { /* * Desiredvnodes is a function of the physical memory size and * the kernel's heap size. Specifically, desiredvnodes scales * in proportion to the physical memory size until two fifths * of the kernel's heap size is consumed by vnodes and vm * objects. */ desiredvnodes = min(maxproc + VMCNT_GET(page_count) / 4, 2 * vm_kmem_size / (5 * (sizeof(struct vm_object) + sizeof(struct vnode)))); if (desiredvnodes > MAXVNODES_MAX) { if (bootverbose) printf("Reducing kern.maxvnodes %d -> %d\n", desiredvnodes, MAXVNODES_MAX); desiredvnodes = MAXVNODES_MAX; } wantfreevnodes = desiredvnodes / 4; mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); TAILQ_INIT(&vnode_free_list); mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); /* * Initialize the filesystem syncer. */ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_maxdelay = syncer_mask + 1; mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL) /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Interlock is not released on failure. */ int vfs_busy(struct mount *mp, int flags, struct mtx *interlkp, struct thread *td) { int lkflags; MNT_ILOCK(mp); MNT_REF(mp); if (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & LK_NOWAIT) { MNT_REL(mp); MNT_IUNLOCK(mp); return (ENOENT); } if (interlkp) mtx_unlock(interlkp); mp->mnt_kern_flag |= MNTK_MWAIT; /* * Since all busy locks are shared except the exclusive * lock granted when unmounting, the only place that a * wakeup needs to be done is at the release of the * exclusive lock at the end of dounmount. */ msleep(mp, MNT_MTX(mp), PVFS, "vfs_busy", 0); MNT_REL(mp); MNT_IUNLOCK(mp); if (interlkp) mtx_lock(interlkp); return (ENOENT); } if (interlkp) mtx_unlock(interlkp); lkflags = LK_SHARED | LK_INTERLOCK; if (lockmgr(&mp->mnt_lock, lkflags, MNT_MTX(mp), td)) panic("vfs_busy: unexpected lock failure"); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(struct mount *mp, struct thread *td) { lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td); vfs_rel(mp); } /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid_t *fsid) { struct mount *mp; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { vfs_ref(mp); mtx_unlock(&mountlist_mtx); return (mp); } } mtx_unlock(&mountlist_mtx); return ((struct mount *) 0); } /* * Check if a user can access privileged mount options. */ int vfs_suser(struct mount *mp, struct thread *td) { int error; /* * If the thread is jailed, but this is not a jail-friendly file * system, deny immediately. */ if (jailed(td->td_ucred) && !(mp->mnt_vfc->vfc_flags & VFCF_JAIL)) return (EPERM); /* * If the file system was mounted outside a jail and a jailed thread * tries to access it, deny immediately. */ if (!jailed(mp->mnt_cred) && jailed(td->td_ucred)) return (EPERM); /* * If the file system was mounted inside different jail that the jail of * the calling thread, deny immediately. */ if (jailed(mp->mnt_cred) && jailed(td->td_ucred) && mp->mnt_cred->cr_prison != td->td_ucred->cr_prison) { return (EPERM); } if ((mp->mnt_flag & MNT_USER) == 0 || mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) { if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0) return (error); } return (0); } /* * Get a new unique fsid. Try to make its val[0] unique, since this value * will be used to create fake device numbers for stat(). Also try (but * not so hard) make its val[0] unique mod 2^16, since some emulators only * support 16-bit device numbers. We end up with unique val[0]'s for the * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. * * Keep in mind that several mounts may be running in parallel. Starting * the search one past where the previous search terminated is both a * micro-optimization and a defense against returning the same fsid to * different mounts. */ void vfs_getnewfsid(struct mount *mp) { static u_int16_t mntid_base; struct mount *nmp; fsid_t tfsid; int mtype; mtx_lock(&mntid_mtx); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 24; for (;;) { tfsid.val[0] = makedev(255, mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); mntid_base++; if ((nmp = vfs_getvfs(&tfsid)) == NULL) break; vfs_rel(nmp); } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; mtx_unlock(&mntid_mtx); } /* * Knob to control the precision of file timestamps: * * 0 = seconds only; nanoseconds zeroed. * 1 = seconds and nanoseconds, accurate within 1/HZ. * 2 = seconds and nanoseconds, truncated to microseconds. * >=3 = seconds and nanoseconds, maximum precision. */ enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; static int timestamp_precision = TSP_SEC; SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, ×tamp_precision, 0, ""); /* * Get a current timestamp. */ void vfs_timestamp(struct timespec *tsp) { struct timeval tv; switch (timestamp_precision) { case TSP_SEC: tsp->tv_sec = time_second; tsp->tv_nsec = 0; break; case TSP_HZ: getnanotime(tsp); break; case TSP_USEC: microtime(&tv); TIMEVAL_TO_TIMESPEC(&tv, tsp); break; case TSP_NSEC: default: nanotime(tsp); break; } } /* * Set vnode attributes to VNOVAL */ void vattr_null(struct vattr *vap) { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = VNOVAL; vap->va_nlink = VNOVAL; vap->va_uid = VNOVAL; vap->va_gid = VNOVAL; vap->va_fsid = VNOVAL; vap->va_fileid = VNOVAL; vap->va_blocksize = VNOVAL; vap->va_rdev = VNOVAL; vap->va_atime.tv_sec = VNOVAL; vap->va_atime.tv_nsec = VNOVAL; vap->va_mtime.tv_sec = VNOVAL; vap->va_mtime.tv_nsec = VNOVAL; vap->va_ctime.tv_sec = VNOVAL; vap->va_ctime.tv_nsec = VNOVAL; vap->va_birthtime.tv_sec = VNOVAL; vap->va_birthtime.tv_nsec = VNOVAL; vap->va_flags = VNOVAL; vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * This routine is called when we have too many vnodes. It attempts * to free vnodes and will potentially free vnodes that still * have VM backing store (VM backing store is typically the cause * of a vnode blowout so we want to do this). Therefore, this operation * is not considered cheap. * * A number of conditions may prevent a vnode from being reclaimed. * the buffer cache may have references on the vnode, a directory * vnode may still have references due to the namei cache representing * underlying files, or the vnode may be in active use. It is not * desireable to reuse such vnodes. These conditions may cause the * number of vnodes to reach some minimum value regardless of what * you set kern.maxvnodes to. Do not set kern.maxvnodes too low. */ static int vlrureclaim(struct mount *mp) { struct thread *td; struct vnode *vp; int done; int trigger; int usevnodes; int count; /* * Calculate the trigger point, don't allow user * screwups to blow us up. This prevents us from * recycling vnodes with lots of resident pages. We * aren't trying to free memory, we are trying to * free vnodes. */ usevnodes = desiredvnodes; if (usevnodes <= 0) usevnodes = 1; trigger = VMCNT_GET(page_count) * 2 / usevnodes; done = 0; td = curthread; vn_start_write(NULL, &mp, V_WAIT); MNT_ILOCK(mp); count = mp->mnt_nvnodelistsize / 10 + 1; while (count != 0) { vp = TAILQ_FIRST(&mp->mnt_nvnodelist); while (vp != NULL && vp->v_type == VMARKER) vp = TAILQ_NEXT(vp, v_nmntvnodes); if (vp == NULL) break; TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); --count; if (!VI_TRYLOCK(vp)) goto next_iter; /* * If it's been deconstructed already, it's still * referenced, or it exceeds the trigger, skip it. */ if (vp->v_usecount || !LIST_EMPTY(&(vp)->v_cache_src) || (vp->v_iflag & VI_DOOMED) != 0 || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VI_UNLOCK(vp); goto next_iter; } MNT_IUNLOCK(mp); vholdl(vp); if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT, td)) { vdrop(vp); goto next_iter_mntunlocked; } VI_LOCK(vp); /* * v_usecount may have been bumped after VOP_LOCK() dropped * the vnode interlock and before it was locked again. * * It is not necessary to recheck VI_DOOMED because it can * only be set by another thread that holds both the vnode * lock and vnode interlock. If another thread has the * vnode lock before we get to VOP_LOCK() and obtains the * vnode interlock after VOP_LOCK() drops the vnode * interlock, the other thread will be unable to drop the * vnode lock before our VOP_LOCK() call fails. */ if (vp->v_usecount || !LIST_EMPTY(&(vp)->v_cache_src) || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VOP_UNLOCK(vp, LK_INTERLOCK, td); goto next_iter_mntunlocked; } KASSERT((vp->v_iflag & VI_DOOMED) == 0, ("VI_DOOMED unexpectedly detected in vlrureclaim()")); vgonel(vp); VOP_UNLOCK(vp, 0, td); vdropl(vp); done++; next_iter_mntunlocked: if ((count % 256) != 0) goto relock_mnt; goto yield; next_iter: if ((count % 256) != 0) continue; MNT_IUNLOCK(mp); yield: uio_yield(); relock_mnt: MNT_ILOCK(mp); } MNT_IUNLOCK(mp); vn_finished_write(mp); return done; } /* * Attempt to keep the free list at wantfreevnodes length. */ static void vnlru_free(int count) { struct vnode *vp; int vfslocked; mtx_assert(&vnode_free_list_mtx, MA_OWNED); for (; count > 0; count--) { vp = TAILQ_FIRST(&vnode_free_list); /* * The list can be modified while the free_list_mtx * has been dropped and vp could be NULL here. */ if (!vp) break; VNASSERT(vp->v_op != NULL, vp, ("vnlru_free: vnode already reclaimed.")); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); /* * Don't recycle if we can't get the interlock. */ if (!VI_TRYLOCK(vp)) { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); continue; } VNASSERT(VCANRECYCLE(vp), vp, ("vp inconsistent on freelist")); freevnodes--; vp->v_iflag &= ~VI_FREE; vholdl(vp); mtx_unlock(&vnode_free_list_mtx); VI_UNLOCK(vp); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vtryrecycle(vp); VFS_UNLOCK_GIANT(vfslocked); /* * If the recycled succeeded this vdrop will actually free * the vnode. If not it will simply place it back on * the free list. */ vdrop(vp); mtx_lock(&vnode_free_list_mtx); } } /* * Attempt to recycle vnodes in a context that is always safe to block. * Calling vlrurecycle() from the bowels of filesystem code has some * interesting deadlock problems. */ static struct proc *vnlruproc; static int vnlruproc_sig; static void vnlru_proc(void) { struct mount *mp, *nmp; int done; struct proc *p = vnlruproc; struct thread *td = FIRST_THREAD_IN_PROC(p); mtx_lock(&Giant); EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, p, SHUTDOWN_PRI_FIRST); for (;;) { kthread_suspend_check(p); mtx_lock(&vnode_free_list_mtx); if (freevnodes > wantfreevnodes) vnlru_free(freevnodes - wantfreevnodes); if (numvnodes <= desiredvnodes * 9 / 10) { vnlruproc_sig = 0; wakeup(&vnlruproc_sig); msleep(vnlruproc, &vnode_free_list_mtx, PVFS|PDROP, "vlruwt", hz); continue; } mtx_unlock(&vnode_free_list_mtx); done = 0; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { int vfsunlocked; if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if (!VFS_NEEDSGIANT(mp)) { mtx_unlock(&Giant); vfsunlocked = 1; } else vfsunlocked = 0; done += vlrureclaim(mp); if (vfsunlocked) mtx_lock(&Giant); mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } mtx_unlock(&mountlist_mtx); if (done == 0) { EVENTHANDLER_INVOKE(vfs_lowvnodes, desiredvnodes / 10); #if 0 /* These messages are temporary debugging aids */ if (vnlru_nowhere < 5) printf("vnlru process getting nowhere..\n"); else if (vnlru_nowhere == 5) printf("vnlru process messages stopped.\n"); #endif vnlru_nowhere++; tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3); } else uio_yield(); } } static struct kproc_desc vnlru_kp = { "vnlru", vnlru_proc, &vnlruproc }; SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp) /* * Routines having to do with the management of the vnode table. */ static void vdestroy(struct vnode *vp) { struct bufobj *bo; CTR1(KTR_VFS, "vdestroy vp %p", vp); mtx_lock(&vnode_free_list_mtx); numvnodes--; mtx_unlock(&vnode_free_list_mtx); bo = &vp->v_bufobj; VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("cleaned vnode still on the free list.")); VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL")); VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL")); VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); VI_UNLOCK(vp); #ifdef MAC mac_destroy_vnode(vp); #endif if (vp->v_pollinfo != NULL) { knlist_destroy(&vp->v_pollinfo->vpi_selinfo.si_note); mtx_destroy(&vp->v_pollinfo->vpi_lock); uma_zfree(vnodepoll_zone, vp->v_pollinfo); } #ifdef INVARIANTS /* XXX Elsewhere we can detect an already freed vnode via NULL v_op. */ vp->v_op = NULL; #endif lockdestroy(vp->v_vnlock); mtx_destroy(&vp->v_interlock); uma_zfree(vnode_zone, vp); } /* * Try to recycle a freed vnode. We abort if anyone picks up a reference * before we actually vgone(). This function must be called with the vnode * held to prevent the vnode from being returned to the free list midway * through vgone(). */ static int vtryrecycle(struct vnode *vp) { struct thread *td = curthread; struct mount *vnmp; CTR1(KTR_VFS, "vtryrecycle: trying vp %p", vp); VNASSERT(vp->v_holdcnt, vp, ("vtryrecycle: Recycling vp %p without a reference.", vp)); /* * This vnode may found and locked via some other list, if so we * can't recycle it yet. */ if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT, td) != 0) return (EWOULDBLOCK); /* * Don't recycle if its filesystem is being suspended. */ if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) { VOP_UNLOCK(vp, 0, td); return (EBUSY); } /* * If we got this far, we need to acquire the interlock and see if * anyone picked up this vnode from another list. If not, we will * mark it with DOOMED via vgonel() so that anyone who does find it * will skip over it. */ VI_LOCK(vp); if (vp->v_usecount) { VOP_UNLOCK(vp, LK_INTERLOCK, td); vn_finished_write(vnmp); return (EBUSY); } if ((vp->v_iflag & VI_DOOMED) == 0) vgonel(vp); VOP_UNLOCK(vp, LK_INTERLOCK, td); vn_finished_write(vnmp); CTR1(KTR_VFS, "vtryrecycle: recycled vp %p", vp); return (0); } /* * Return the next vnode from the free list. */ int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp) { struct vnode *vp = NULL; struct bufobj *bo; mtx_lock(&vnode_free_list_mtx); /* * Lend our context to reclaim vnodes if they've exceeded the max. */ if (freevnodes > wantfreevnodes) vnlru_free(1); /* * Wait for available vnodes. */ if (numvnodes > desiredvnodes) { if (mp != NULL && (mp->mnt_kern_flag & MNTK_SUSPEND)) { /* * File system is beeing suspended, we cannot risk a * deadlock here, so allocate new vnode anyway. */ if (freevnodes > wantfreevnodes) vnlru_free(freevnodes - wantfreevnodes); goto alloc; } if (vnlruproc_sig == 0) { vnlruproc_sig = 1; /* avoid unnecessary wakeups */ wakeup(vnlruproc); } msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS, "vlruwk", hz); #if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ if (numvnodes > desiredvnodes) { mtx_unlock(&vnode_free_list_mtx); return (ENFILE); } #endif } alloc: numvnodes++; mtx_unlock(&vnode_free_list_mtx); vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO); /* * Setup locks. */ vp->v_vnlock = &vp->v_lock; mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF); /* * By default, don't allow shared locks unless filesystems * opt-in. */ lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOSHARE); /* * Initialize bufobj. */ bo = &vp->v_bufobj; bo->__bo_vnode = vp; bo->bo_mtx = &vp->v_interlock; bo->bo_ops = &buf_ops_bio; bo->bo_private = vp; TAILQ_INIT(&bo->bo_clean.bv_hd); TAILQ_INIT(&bo->bo_dirty.bv_hd); /* * Initialize namecache. */ LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); /* * Finalize various vnode identity bits. */ vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; v_incr_usecount(vp); vp->v_data = 0; #ifdef MAC mac_init_vnode(vp); if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0) mac_associate_vnode_singlelabel(mp, vp); else if (mp == NULL) printf("NULL mp in getnewvnode()\n"); #endif if (mp != NULL) { bo->bo_bsize = mp->mnt_stat.f_iosize; if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0) vp->v_vflag |= VV_NOKNOTE; } CTR2(KTR_VFS, "getnewvnode: mp %p vp %p", mp, vp); *vpp = vp; return (0); } /* * Delete from old mount point vnode list, if on one. */ static void delmntque(struct vnode *vp) { struct mount *mp; mp = vp->v_mount; if (mp == NULL) return; MNT_ILOCK(mp); vp->v_mount = NULL; VNASSERT(mp->mnt_nvnodelistsize > 0, vp, ("bad mount point vnode list size")); TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); mp->mnt_nvnodelistsize--; MNT_REL(mp); MNT_IUNLOCK(mp); } static void insmntque_stddtr(struct vnode *vp, void *dtr_arg) { struct thread *td; td = curthread; /* XXX ? */ vp->v_data = NULL; vp->v_op = &dead_vnodeops; /* XXX non mp-safe fs may still call insmntque with vnode unlocked */ if (!VOP_ISLOCKED(vp, td)) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); vgone(vp); vput(vp); } /* * Insert into list of vnodes for the new mount point, if available. */ int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg) { KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 && mp->mnt_nvnodelistsize == 0) { MNT_IUNLOCK(mp); if (dtr != NULL) dtr(vp, dtr_arg); return (EBUSY); } vp->v_mount = mp; MNT_REF(mp); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, ("neg mount point vnode list size")); mp->mnt_nvnodelistsize++; MNT_IUNLOCK(mp); return (0); } int insmntque(struct vnode *vp, struct mount *mp) { return (insmntque1(vp, mp, insmntque_stddtr, NULL)); } /* * Flush out and invalidate all buffers associated with a bufobj * Called with the underlying object locked. */ int bufobj_invalbuf(struct bufobj *bo, int flags, struct thread *td, int slpflag, int slptimeo) { int error; BO_LOCK(bo); if (flags & V_SAVE) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); return (error); } if (bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); if ((error = BO_SYNC(bo, MNT_WAIT, td)) != 0) return (error); /* * XXX We could save a lock/unlock if this was only * enabled under INVARIANTS */ BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) panic("vinvalbuf: dirty bufs"); } } /* * If you alter this loop please notice that interlock is dropped and * reacquired in flushbuflist. Special care is needed to ensure that * no race conditions occur from this. */ do { error = flushbuflist(&bo->bo_clean, flags, bo, slpflag, slptimeo); if (error == 0) error = flushbuflist(&bo->bo_dirty, flags, bo, slpflag, slptimeo); if (error != 0 && error != EAGAIN) { BO_UNLOCK(bo); return (error); } } while (error != 0); /* * Wait for I/O to complete. XXX needs cleaning up. The vnode can * have write I/O in-progress but if there is a VM object then the * VM object can also have read-I/O in-progress. */ do { bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); if (bo->bo_object != NULL) { VM_OBJECT_LOCK(bo->bo_object); vm_object_pip_wait(bo->bo_object, "bovlbx"); VM_OBJECT_UNLOCK(bo->bo_object); } BO_LOCK(bo); } while (bo->bo_numoutput > 0); BO_UNLOCK(bo); /* * Destroy the copy in the VM cache, too. */ if (bo->bo_object != NULL) { VM_OBJECT_LOCK(bo->bo_object); vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); VM_OBJECT_UNLOCK(bo->bo_object); } #ifdef INVARIANTS BO_LOCK(bo); if ((flags & (V_ALT | V_NORMAL)) == 0 && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("vinvalbuf: flush failed"); BO_UNLOCK(bo); #endif return (0); } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(struct vnode *vp, int flags, struct thread *td, int slpflag, int slptimeo) { CTR2(KTR_VFS, "vinvalbuf vp %p flags %d", vp, flags); ASSERT_VOP_LOCKED(vp, "vinvalbuf"); return (bufobj_invalbuf(&vp->v_bufobj, flags, td, slpflag, slptimeo)); } /* * Flush out buffers on the specified list. * */ static int flushbuflist( struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo) { struct buf *bp, *nbp; int retval, error; daddr_t lblkno; b_xflags_t xflags; ASSERT_BO_LOCKED(bo); retval = 0; TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) { if (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA)) || ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0)) { continue; } lblkno = 0; xflags = 0; if (nbp != NULL) { lblkno = nbp->b_lblkno; xflags = nbp->b_xflags & (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN); } retval = EAGAIN; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo), "flushbuf", slpflag, slptimeo); if (error) { BO_LOCK(bo); return (error != ENOLCK ? error : EAGAIN); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); if (bp->b_bufobj != bo) { /* XXX: necessary ? */ BUF_UNLOCK(bp); BO_LOCK(bo); return (EAGAIN); } /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && (flags & V_SAVE)) { bremfree(bp); bp->b_flags |= B_ASYNC; bwrite(bp); BO_LOCK(bo); return (EAGAIN); /* XXX: why not loop ? */ } bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); BO_LOCK(bo); if (nbp != NULL && (nbp->b_bufobj != bo || nbp->b_lblkno != lblkno || (nbp->b_xflags & (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN)) != xflags)) break; /* nbp invalid */ } return (retval); } /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. */ int vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, off_t length, int blksize) { struct buf *bp, *nbp; int anyfreed; int trunclbn; struct bufobj *bo; CTR2(KTR_VFS, "vtruncbuf vp %p length %jd", vp, length); /* * Round up to the *next* lbn. */ trunclbn = (length + blksize - 1) / blksize; ASSERT_VOP_LOCKED(vp, "vtruncbuf"); restart: VI_LOCK(vp); bo = &vp->v_bufobj; anyfreed = 1; for (;anyfreed;) { anyfreed = 0; TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; if (nbp != NULL && (((nbp->b_xflags & BX_VNCLEAN) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI))) { goto restart; } VI_LOCK(vp); } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; if (nbp != NULL && (((nbp->b_xflags & BX_VNDIRTY) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) { goto restart; } VI_LOCK(vp); } } if (length > 0) { restartsync: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno > 0) continue; /* * Since we hold the vnode lock this should only * fail if we're racing with the buf daemon. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) { goto restart; } VNASSERT((bp->b_flags & B_DELWRI), vp, ("buf(%p) on dirty queue without DELWRI", bp)); bremfree(bp); bawrite(bp); VI_LOCK(vp); goto restartsync; } } bufobj_wwait(bo, 0, 0); VI_UNLOCK(vp); vnode_pager_setsize(vp, length); return (0); } /* * buf_splay() - splay tree core for the clean/dirty list of buffers in * a vnode. * * NOTE: We have to deal with the special case of a background bitmap * buffer, a situation where two buffers will have the same logical * block offset. We want (1) only the foreground buffer to be accessed * in a lookup and (2) must differentiate between the foreground and * background buffer in the splay tree algorithm because the splay * tree cannot normally handle multiple entities with the same 'index'. * We accomplish this by adding differentiating flags to the splay tree's * numerical domain. */ static struct buf * buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root) { struct buf dummy; struct buf *lefttreemax, *righttreemin, *y; if (root == NULL) return (NULL); lefttreemax = righttreemin = &dummy; for (;;) { if (lblkno < root->b_lblkno || (lblkno == root->b_lblkno && (xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) { if ((y = root->b_left) == NULL) break; if (lblkno < y->b_lblkno) { /* Rotate right. */ root->b_left = y->b_right; y->b_right = root; root = y; if ((y = root->b_left) == NULL) break; } /* Link into the new root's right tree. */ righttreemin->b_left = root; righttreemin = root; } else if (lblkno > root->b_lblkno || (lblkno == root->b_lblkno && (xflags & BX_BKGRDMARKER) > (root->b_xflags & BX_BKGRDMARKER))) { if ((y = root->b_right) == NULL) break; if (lblkno > y->b_lblkno) { /* Rotate left. */ root->b_right = y->b_left; y->b_left = root; root = y; if ((y = root->b_right) == NULL) break; } /* Link into the new root's left tree. */ lefttreemax->b_right = root; lefttreemax = root; } else { break; } root = y; } /* Assemble the new root. */ lefttreemax->b_right = root->b_left; righttreemin->b_left = root->b_right; root->b_left = dummy.b_right; root->b_right = dummy.b_left; return (root); } static void buf_vlist_remove(struct buf *bp) { struct buf *root; struct bufv *bv; KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); ASSERT_BO_LOCKED(bp->b_bufobj); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) != (BX_VNDIRTY|BX_VNCLEAN), ("buf_vlist_remove: Buf %p is on two lists", bp)); if (bp->b_xflags & BX_VNDIRTY) bv = &bp->b_bufobj->bo_dirty; else bv = &bp->b_bufobj->bo_clean; if (bp != bv->bv_root) { root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); KASSERT(root == bp, ("splay lookup failed in remove")); } if (bp->b_left == NULL) { root = bp->b_right; } else { root = buf_splay(bp->b_lblkno, bp->b_xflags, bp->b_left); root->b_right = bp->b_right; } bv->bv_root = root; TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); bv->bv_cnt--; bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } /* * Add the buffer to the sorted clean or dirty block list using a * splay tree algorithm. * * NOTE: xflags is passed as a constant, optimizing this inline function! */ static void buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) { struct buf *root; struct bufv *bv; ASSERT_BO_LOCKED(bo); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags)); bp->b_xflags |= xflags; if (xflags & BX_VNDIRTY) bv = &bo->bo_dirty; else bv = &bo->bo_clean; root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); if (root == NULL) { bp->b_left = NULL; bp->b_right = NULL; TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); } else if (bp->b_lblkno < root->b_lblkno || (bp->b_lblkno == root->b_lblkno && (bp->b_xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) { bp->b_left = root->b_left; bp->b_right = root; root->b_left = NULL; TAILQ_INSERT_BEFORE(root, bp, b_bobufs); } else { bp->b_right = root->b_right; bp->b_left = root; root->b_right = NULL; TAILQ_INSERT_AFTER(&bv->bv_hd, root, bp, b_bobufs); } bv->bv_cnt++; bv->bv_root = bp; } /* * Lookup a buffer using the splay tree. Note that we specifically avoid * shadow buffers used in background bitmap writes. * * This code isn't quite efficient as it could be because we are maintaining * two sorted lists and do not know which list the block resides in. * * During a "make buildworld" the desired buffer is found at one of * the roots more than 60% of the time. Thus, checking both roots * before performing either splay eliminates unnecessary splays on the * first tree splayed. */ struct buf * gbincore(struct bufobj *bo, daddr_t lblkno) { struct buf *bp; ASSERT_BO_LOCKED(bo); if ((bp = bo->bo_clean.bv_root) != NULL && bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); if ((bp = bo->bo_dirty.bv_root) != NULL && bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); if ((bp = bo->bo_clean.bv_root) != NULL) { bo->bo_clean.bv_root = bp = buf_splay(lblkno, 0, bp); if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); } if ((bp = bo->bo_dirty.bv_root) != NULL) { bo->bo_dirty.bv_root = bp = buf_splay(lblkno, 0, bp); if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); } return (NULL); } /* * Associate a buffer with a vnode. */ void bgetvp(struct vnode *vp, struct buf *bp) { VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free")); CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags); VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp, ("bgetvp: bp already attached! %p", bp)); ASSERT_VI_LOCKED(vp, "bgetvp"); vholdl(vp); if (VFS_NEEDSGIANT(vp->v_mount) || vp->v_bufobj.bo_flag & BO_NEEDSGIANT) bp->b_flags |= B_NEEDSGIANT; bp->b_vp = vp; bp->b_bufobj = &vp->v_bufobj; /* * Insert onto list for new vnode. */ buf_vlist_add(bp, &vp->v_bufobj, BX_VNCLEAN); } /* * Disassociate a buffer from a vnode. */ void brelvp(struct buf *bp) { struct bufobj *bo; struct vnode *vp; CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); /* * Delete from old vnode list, if on one. */ vp = bp->b_vp; /* XXX */ bo = bp->b_bufobj; BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("brelvp: Buffer %p not on queue.", bp); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { bo->bo_flag &= ~BO_ONWORKLST; mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); } bp->b_flags &= ~B_NEEDSGIANT; bp->b_vp = NULL; bp->b_bufobj = NULL; vdropl(vp); } /* * Add an item to the syncer work queue. */ static void vn_syncer_add_to_worklist(struct bufobj *bo, int delay) { int slot; ASSERT_BO_LOCKED(bo); mtx_lock(&sync_mtx); if (bo->bo_flag & BO_ONWORKLST) LIST_REMOVE(bo, bo_synclist); else { bo->bo_flag |= BO_ONWORKLST; syncer_worklist_len++; } if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist); mtx_unlock(&sync_mtx); } static int sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS) { int error, len; mtx_lock(&sync_mtx); len = syncer_worklist_len - sync_vnode_count; mtx_unlock(&sync_mtx); error = SYSCTL_OUT(req, &len, sizeof(len)); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0, sysctl_vfs_worklist_len, "I", "Syncer thread worklist length"); static struct proc *updateproc; static void sched_sync(void); static struct kproc_desc up_kp = { "syncer", sched_sync, &updateproc }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) static int sync_vnode(struct bufobj *bo, struct thread *td) { struct vnode *vp; struct mount *mp; vp = bo->__bo_vnode; /* XXX */ if (VOP_ISLOCKED(vp, NULL) != 0) return (1); if (VI_TRYLOCK(vp) == 0) return (1); /* * We use vhold in case the vnode does not * successfully sync. vhold prevents the vnode from * going away when we unlock the sync_mtx so that * we can acquire the vnode interlock. */ vholdl(vp); mtx_unlock(&sync_mtx); VI_UNLOCK(vp); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); mtx_lock(&sync_mtx); return (1); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); (void) VOP_FSYNC(vp, MNT_LAZY, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); VI_LOCK(vp); if ((bo->bo_flag & BO_ONWORKLST) != 0) { /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(bo, syncdelay); } vdropl(vp); mtx_lock(&sync_mtx); return (0); } /* * System filesystem synchronizer daemon. */ static void sched_sync(void) { struct synclist *next; struct synclist *slp; struct bufobj *bo; long starttime; struct thread *td = FIRST_THREAD_IN_PROC(updateproc); static int dummychan; int last_work_seen; int net_worklist_len; int syncer_final_iter; int first_printf; int error; mtx_lock(&Giant); last_work_seen = 0; syncer_final_iter = 0; first_printf = 1; syncer_state = SYNCER_RUNNING; starttime = time_uptime; td->td_pflags |= TDP_NORUNNINGBUF; EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, SHUTDOWN_PRI_LAST); mtx_lock(&sync_mtx); for (;;) { if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter == 0) { mtx_unlock(&sync_mtx); kthread_suspend_check(td->td_proc); mtx_lock(&sync_mtx); } net_worklist_len = syncer_worklist_len - sync_vnode_count; if (syncer_state != SYNCER_RUNNING && starttime != time_uptime) { if (first_printf) { printf("\nSyncing disks, vnodes remaining..."); first_printf = 0; } printf("%d ", net_worklist_len); } starttime = time_uptime; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. * * Skip over empty worklist slots when shutting down. */ do { slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; next = &syncer_workitem_pending[syncer_delayno]; /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, * switch to the FINAL_DELAY state and run * for one more second. */ if (syncer_state == SYNCER_SHUTTING_DOWN && net_worklist_len == 0 && last_work_seen == syncer_delayno) { syncer_state = SYNCER_FINAL_DELAY; syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && syncer_worklist_len > 0); /* * Keep track of the last time there was anything * on the worklist other than syncer vnodes. * Return to the SHUTTING_DOWN state if any * new work appears. */ if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) last_work_seen = syncer_delayno; if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) syncer_state = SYNCER_SHUTTING_DOWN; while ((bo = LIST_FIRST(slp)) != NULL) { error = sync_vnode(bo, td); if (error == 1) { LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(next, bo, bo_synclist); continue; } } if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) syncer_final_iter--; /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * Just sleep for a short period of time between * iterations when shutting down to allow some I/O * to happen. * * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (syncer_state != SYNCER_RUNNING) msleep(&dummychan, &sync_mtx, PPAUSE, "syncfnl", hz / SYNCER_SHUTDOWN_SPEEDUP); else if (time_uptime == starttime) msleep(&lbolt, &sync_mtx, PPAUSE, "syncer", 0); } } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer(void) { struct thread *td; int ret = 0; td = FIRST_THREAD_IN_PROC(updateproc); sleepq_remove(td, &lbolt); mtx_lock(&sync_mtx); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; ret = 1; } mtx_unlock(&sync_mtx); return (ret); } /* * Tell the syncer to speed up its work and run though its work * list several times, then tell it to shut down. */ static void syncer_shutdown(void *arg, int howto) { struct thread *td; if (howto & RB_NOSYNC) return; td = FIRST_THREAD_IN_PROC(updateproc); sleepq_remove(td, &lbolt); mtx_lock(&sync_mtx); syncer_state = SYNCER_SHUTTING_DOWN; rushjob = 0; mtx_unlock(&sync_mtx); kproc_shutdown(arg, howto); } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(struct buf *bp) { struct vnode *vp; struct bufobj *bo; int delay; #ifdef INVARIANTS struct bufv *bv; #endif vp = bp->b_vp; bo = bp->b_bufobj; ++reassignbufcalls; CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); /* * B_PAGING flagged buffers cannot be reassigned because their vp * is not fully linked in. */ if (bp->b_flags & B_PAGING) panic("cannot reassign paging buffer"); /* * Delete from old vnode list, if on one. */ VI_LOCK(vp); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("reassignbuf: Buffer %p not on queue.", bp); /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { if ((bo->bo_flag & BO_ONWORKLST) == 0) { switch (vp->v_type) { case VDIR: delay = dirdelay; break; case VCHR: delay = metadelay; break; default: delay = filedelay; } vn_syncer_add_to_worklist(bo, delay); } buf_vlist_add(bp, bo, BX_VNDIRTY); } else { buf_vlist_add(bp, bo, BX_VNCLEAN); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } } #ifdef INVARIANTS bv = &bo->bo_clean; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bv = &bo->bo_dirty; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif VI_UNLOCK(vp); } /* * Increment the use and hold counts on the vnode, taking care to reference * the driver's usecount if this is a chardev. The vholdl() will remove * the vnode from the free list if it is presently free. Requires the * vnode interlock and returns with it held. */ static void v_incr_usecount(struct vnode *vp) { CTR3(KTR_VFS, "v_incr_usecount: vp %p holdcnt %d usecount %d\n", vp, vp->v_holdcnt, vp->v_usecount); vp->v_usecount++; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } vholdl(vp); } /* * Turn a holdcnt into a use+holdcnt such that only one call to * v_decr_usecount is needed. */ static void v_upgrade_usecount(struct vnode *vp) { CTR3(KTR_VFS, "v_upgrade_usecount: vp %p holdcnt %d usecount %d\n", vp, vp->v_holdcnt, vp->v_usecount); vp->v_usecount++; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } } /* * Decrement the vnode use and hold count along with the driver's usecount * if this is a chardev. The vdropl() below releases the vnode interlock * as it may free the vnode. */ static void v_decr_usecount(struct vnode *vp) { CTR3(KTR_VFS, "v_decr_usecount: vp %p holdcnt %d usecount %d\n", vp, vp->v_holdcnt, vp->v_usecount); ASSERT_VI_LOCKED(vp, __FUNCTION__); VNASSERT(vp->v_usecount > 0, vp, ("v_decr_usecount: negative usecount")); vp->v_usecount--; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } vdropl(vp); } /* * Decrement only the use count and driver use count. This is intended to * be paired with a follow on vdropl() to release the remaining hold count. * In this way we may vgone() a vnode with a 0 usecount without risk of * having it end up on a free list because the hold count is kept above 0. */ static void v_decr_useonly(struct vnode *vp) { CTR3(KTR_VFS, "v_decr_useonly: vp %p holdcnt %d usecount %d\n", vp, vp->v_holdcnt, vp->v_usecount); ASSERT_VI_LOCKED(vp, __FUNCTION__); VNASSERT(vp->v_usecount > 0, vp, ("v_decr_useonly: negative usecount")); vp->v_usecount--; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. The vnode lock bit is set if the * vnode is being eliminated in vgone. The process is awakened * when the transition is completed, and an error returned to * indicate that the vnode is no longer usable (possibly having * been changed to a new filesystem type). */ int vget(struct vnode *vp, int flags, struct thread *td) { int oweinact; int oldflags; int error; error = 0; oldflags = flags; oweinact = 0; VFS_ASSERT_GIANT(vp->v_mount); if ((flags & LK_INTERLOCK) == 0) VI_LOCK(vp); /* * If the inactive call was deferred because vput() was called * with a shared lock, we have to do it here before another thread * gets a reference to data that should be dead. */ if (vp->v_iflag & VI_OWEINACT) { if (flags & LK_NOWAIT) { VI_UNLOCK(vp); return (EBUSY); } flags &= ~LK_TYPE_MASK; flags |= LK_EXCLUSIVE; oweinact = 1; } vholdl(vp); if ((error = vn_lock(vp, flags | LK_INTERLOCK, td)) != 0) { vdrop(vp); return (error); } VI_LOCK(vp); /* Upgrade our holdcnt to a usecount. */ v_upgrade_usecount(vp); if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0) panic("vget: vn_lock failed to return ENOENT\n"); if (oweinact) { if (vp->v_iflag & VI_OWEINACT) vinactive(vp, td); VI_UNLOCK(vp); if ((oldflags & LK_TYPE_MASK) == 0) VOP_UNLOCK(vp, 0, td); } else VI_UNLOCK(vp); return (0); } /* * Increase the reference count of a vnode. */ void vref(struct vnode *vp) { VI_LOCK(vp); v_incr_usecount(vp); VI_UNLOCK(vp); } /* * Return reference count of a vnode. * * The results of this call are only guaranteed when some mechanism other * than the VI lock is used to stop other processes from gaining references * to the vnode. This may be the case if the caller holds the only reference. * This is also useful when stale data is acceptable as race conditions may * be accounted for by some other means. */ int vrefcnt(struct vnode *vp) { int usecnt; VI_LOCK(vp); usecnt = vp->v_usecount; VI_UNLOCK(vp); return (usecnt); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(struct vnode *vp) { struct thread *td = curthread; /* XXX */ KASSERT(vp != NULL, ("vrele: null vp")); VFS_ASSERT_GIANT(vp->v_mount); VI_LOCK(vp); /* Skip this v_writecount check if we're going to panic below. */ VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp, ("vrele: missed vn_close")); if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) && vp->v_usecount == 1)) { v_decr_usecount(vp); return; } if (vp->v_usecount != 1) { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); #endif VI_UNLOCK(vp); panic("vrele: negative ref cnt"); } /* * We want to hold the vnode until the inactive finishes to * prevent vgone() races. We drop the use count here and the * hold count below when we're done. */ v_decr_useonly(vp); /* * We must call VOP_INACTIVE with the node locked. Mark * as VI_DOINGINACT to avoid recursion. */ vp->v_iflag |= VI_OWEINACT; if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td) == 0) { VI_LOCK(vp); if (vp->v_usecount > 0) vp->v_iflag &= ~VI_OWEINACT; if (vp->v_iflag & VI_OWEINACT) vinactive(vp, td); VOP_UNLOCK(vp, 0, td); } else { VI_LOCK(vp); if (vp->v_usecount > 0) vp->v_iflag &= ~VI_OWEINACT; } vdropl(vp); } /* * Release an already locked vnode. This give the same effects as * unlock+vrele(), but takes less time and avoids releasing and - * re-aquiring the lock (as vrele() aquires the lock internally.) + * re-aquiring the lock (as vrele() acquires the lock internally.) */ void vput(struct vnode *vp) { struct thread *td = curthread; /* XXX */ int error; KASSERT(vp != NULL, ("vput: null vp")); ASSERT_VOP_LOCKED(vp, "vput"); VFS_ASSERT_GIANT(vp->v_mount); VI_LOCK(vp); /* Skip this v_writecount check if we're going to panic below. */ VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp, ("vput: missed vn_close")); error = 0; if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) && vp->v_usecount == 1)) { VOP_UNLOCK(vp, 0, td); v_decr_usecount(vp); return; } if (vp->v_usecount != 1) { #ifdef DIAGNOSTIC vprint("vput: negative ref count", vp); #endif panic("vput: negative ref cnt"); } /* * We want to hold the vnode until the inactive finishes to * prevent vgone() races. We drop the use count here and the * hold count below when we're done. */ v_decr_useonly(vp); vp->v_iflag |= VI_OWEINACT; if (VOP_ISLOCKED(vp, NULL) != LK_EXCLUSIVE) { error = VOP_LOCK(vp, LK_EXCLUPGRADE|LK_INTERLOCK|LK_NOWAIT, td); VI_LOCK(vp); if (error) { if (vp->v_usecount > 0) vp->v_iflag &= ~VI_OWEINACT; goto done; } } if (vp->v_usecount > 0) vp->v_iflag &= ~VI_OWEINACT; if (vp->v_iflag & VI_OWEINACT) vinactive(vp, td); VOP_UNLOCK(vp, 0, td); done: vdropl(vp); } /* * Somebody doesn't want the vnode recycled. */ void vhold(struct vnode *vp) { VI_LOCK(vp); vholdl(vp); VI_UNLOCK(vp); } void vholdl(struct vnode *vp) { vp->v_holdcnt++; if (VSHOULDBUSY(vp)) vbusy(vp); } /* * Note that there is one less who cares about this vnode. vdrop() is the * opposite of vhold(). */ void vdrop(struct vnode *vp) { VI_LOCK(vp); vdropl(vp); } /* * Drop the hold count of the vnode. If this is the last reference to * the vnode we will free it if it has been vgone'd otherwise it is * placed on the free list. */ void vdropl(struct vnode *vp) { ASSERT_VI_LOCKED(vp, "vdropl"); if (vp->v_holdcnt <= 0) panic("vdrop: holdcnt %d", vp->v_holdcnt); vp->v_holdcnt--; if (vp->v_holdcnt == 0) { if (vp->v_iflag & VI_DOOMED) { vdestroy(vp); return; } else vfree(vp); } VI_UNLOCK(vp); } /* * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT * flags. DOINGINACT prevents us from recursing in calls to vinactive. * OWEINACT tracks whether a vnode missed a call to inactive due to a * failed lock upgrade. */ static void vinactive(struct vnode *vp, struct thread *td) { ASSERT_VOP_LOCKED(vp, "vinactive"); ASSERT_VI_LOCKED(vp, "vinactive"); VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, ("vinactive: recursed on VI_DOINGINACT")); vp->v_iflag |= VI_DOINGINACT; vp->v_iflag &= ~VI_OWEINACT; VI_UNLOCK(vp); VOP_INACTIVE(vp, td); VI_LOCK(vp); VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, ("vinactive: lost VI_DOINGINACT")); vp->v_iflag &= ~VI_DOINGINACT; } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If FORCECLOSE is not specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If FORCECLOSE is specified, detach any active vnodes * that are found. * * If WRITECLOSE is set, only flush out regular file vnodes open for * writing. * * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. * * `rootrefs' specifies the base reference count for the root vnode * of this filesystem. The root vnode is considered busy if its * v_usecount exceeds this value. On a successful return, vflush(, td) * will call vrele() on the root vnode exactly rootrefs times. * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must * be zero. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); #endif int vflush( struct mount *mp, int rootrefs, int flags, struct thread *td) { struct vnode *vp, *mvp, *rootvp = NULL; struct vattr vattr; int busy = 0, error; CTR1(KTR_VFS, "vflush: mp %p", mp); if (rootrefs > 0) { KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0, ("vflush: bad args")); /* * Get the filesystem root vnode. We can vput() it * immediately, since with rootrefs > 0, it won't go away. */ if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp, td)) != 0) return (error); vput(rootvp); } MNT_ILOCK(mp); loop: MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); vholdl(vp); MNT_IUNLOCK(mp); error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE, td); if (error) { vdrop(vp); MNT_ILOCK(mp); MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); goto loop; } /* * Skip over a vnodes marked VV_SYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { VOP_UNLOCK(vp, 0, td); vdrop(vp); MNT_ILOCK(mp); continue; } /* * If WRITECLOSE is set, flush out unlinked but still open * files (even if open only for reading) and regular file * vnodes open for writing. */ if (flags & WRITECLOSE) { error = VOP_GETATTR(vp, &vattr, td->td_ucred, td); VI_LOCK(vp); if ((vp->v_type == VNON || (error == 0 && vattr.va_nlink > 0)) && (vp->v_writecount == 0 || vp->v_type != VREG)) { VOP_UNLOCK(vp, 0, td); vdropl(vp); MNT_ILOCK(mp); continue; } } else VI_LOCK(vp); /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. * * If FORCECLOSE is set, forcibly close the vnode. */ if (vp->v_usecount == 0 || (flags & FORCECLOSE)) { VNASSERT(vp->v_usecount == 0 || (vp->v_type != VCHR && vp->v_type != VBLK), vp, ("device VNODE %p is FORCECLOSED", vp)); vgonel(vp); } else { busy++; #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif } VOP_UNLOCK(vp, 0, td); vdropl(vp); MNT_ILOCK(mp); } MNT_IUNLOCK(mp); if (rootrefs > 0 && (flags & FORCECLOSE) == 0) { /* * If just the root vnode is busy, and if its refcount * is equal to `rootrefs', then go ahead and kill it. */ VI_LOCK(rootvp); KASSERT(busy > 0, ("vflush: not busy")); VNASSERT(rootvp->v_usecount >= rootrefs, rootvp, ("vflush: usecount %d < rootrefs %d", rootvp->v_usecount, rootrefs)); if (busy == 1 && rootvp->v_usecount == rootrefs) { VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK, td); vgone(rootvp); VOP_UNLOCK(rootvp, 0, td); busy = 0; } else VI_UNLOCK(rootvp); } if (busy) return (EBUSY); for (; rootrefs > 0; rootrefs--) vrele(rootvp); return (0); } /* * Recycle an unused vnode to the front of the free list. */ int vrecycle(struct vnode *vp, struct thread *td) { int recycled; ASSERT_VOP_LOCKED(vp, "vrecycle"); recycled = 0; VI_LOCK(vp); if (vp->v_usecount == 0) { recycled = 1; vgonel(vp); } VI_UNLOCK(vp); return (recycled); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(struct vnode *vp) { VI_LOCK(vp); vgonel(vp); VI_UNLOCK(vp); } /* * vgone, with the vp interlock held. */ void vgonel(struct vnode *vp) { struct thread *td; int oweinact; int active; struct mount *mp; CTR1(KTR_VFS, "vgonel: vp %p", vp); ASSERT_VOP_LOCKED(vp, "vgonel"); ASSERT_VI_LOCKED(vp, "vgonel"); VNASSERT(vp->v_holdcnt, vp, ("vgonel: vp %p has no reference.", vp)); td = curthread; /* * Don't vgonel if we're already doomed. */ if (vp->v_iflag & VI_DOOMED) return; vp->v_iflag |= VI_DOOMED; /* * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). */ active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. */ mp = NULL; if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd)) (void) vn_start_secondary_write(vp, &mp, V_WAIT); if (vinvalbuf(vp, V_SAVE, td, 0, 0) != 0) vinvalbuf(vp, 0, td, 0, 0); /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. */ if (active) VOP_CLOSE(vp, FNONBLOCK, NOCRED, td); if (oweinact || active) { VI_LOCK(vp); if ((vp->v_iflag & VI_DOINGINACT) == 0) vinactive(vp, td); VI_UNLOCK(vp); } /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, td)) panic("vgone: cannot reclaim"); if (mp != NULL) vn_finished_secondary_write(mp); VNASSERT(vp->v_object == NULL, vp, ("vop_reclaim left v_object vp=%p, tag=%s", vp, vp->v_tag)); /* * Delete from old mount point vnode list. */ delmntque(vp); cache_purge(vp); /* * Done with purge, reset to the standard lock and invalidate * the vnode. */ VI_LOCK(vp); vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_tag = "none"; vp->v_type = VBAD; } /* * Calculate the total number of references to a special device. */ int vcount(struct vnode *vp) { int count; dev_lock(); count = vp->v_rdev->si_usecount; dev_unlock(); return (count); } /* * Same as above, but using the struct cdev *as argument */ int count_dev(struct cdev *dev) { int count; dev_lock(); count = dev->si_usecount; dev_unlock(); return(count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD", "VMARKER"}; void vn_printf(struct vnode *vp, const char *fmt, ...) { va_list ap; char buf[96]; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("%p: ", (void *)vp); printf("tag %s, type %s\n", vp->v_tag, typename[vp->v_type]); printf(" usecount %d, writecount %d, refcount %d mountedhere %p\n", vp->v_usecount, vp->v_writecount, vp->v_holdcnt, vp->v_mountedhere); buf[0] = '\0'; buf[1] = '\0'; if (vp->v_vflag & VV_ROOT) strcat(buf, "|VV_ROOT"); if (vp->v_vflag & VV_TEXT) strcat(buf, "|VV_TEXT"); if (vp->v_vflag & VV_SYSTEM) strcat(buf, "|VV_SYSTEM"); if (vp->v_vflag & VV_DELETED) strcat(buf, "|VV_DELETED"); if (vp->v_iflag & VI_DOOMED) strcat(buf, "|VI_DOOMED"); if (vp->v_iflag & VI_FREE) strcat(buf, "|VI_FREE"); printf(" flags (%s)\n", buf + 1); if (mtx_owned(VI_MTX(vp))) printf(" VI_LOCKed"); if (vp->v_object != NULL) printf(" v_object %p ref %d pages %d\n", vp->v_object, vp->v_object->ref_count, vp->v_object->resident_page_count); printf(" "); lockmgr_printinfo(vp->v_vnlock); printf("\n"); if (vp->v_data != NULL) VOP_PRINT(vp); } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ DB_SHOW_COMMAND(lockedvnods, lockedvnodes) { struct mount *mp, *nmp; struct vnode *vp; /* * Note: because this is DDB, we can't obey the locking semantics * for these structures, which means we could catch an inconsistent * state and dereference a nasty pointer. Not much to be done * about that. */ printf("Locked vnodes\n"); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { nmp = TAILQ_NEXT(mp, mnt_list); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && VOP_ISLOCKED(vp, NULL)) vprint("", vp); } nmp = TAILQ_NEXT(mp, mnt_list); } } /* * Show details about the given vnode. */ DB_SHOW_COMMAND(vnode, db_show_vnode) { struct vnode *vp; if (!have_addr) return; vp = (struct vnode *)addr; vn_printf(vp, "vnode "); } #endif /* DDB */ /* * Fill in a struct xvfsconf based on a struct vfsconf. */ static void vfsconf2x(struct vfsconf *vfsp, struct xvfsconf *xvfsp) { strcpy(xvfsp->vfc_name, vfsp->vfc_name); xvfsp->vfc_typenum = vfsp->vfc_typenum; xvfsp->vfc_refcount = vfsp->vfc_refcount; xvfsp->vfc_flags = vfsp->vfc_flags; /* * These are unused in userland, we keep them * to not break binary compatibility. */ xvfsp->vfc_vfsops = NULL; xvfsp->vfc_next = NULL; } /* * Top level filesystem related information gathering. */ static int sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS) { struct vfsconf *vfsp; struct xvfsconf xvfsp; int error; error = 0; TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&xvfsp, sizeof(xvfsp)); vfsconf2x(vfsp, &xvfsp); error = SYSCTL_OUT(req, &xvfsp, sizeof xvfsp); if (error) break; } return (error); } SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLFLAG_RD, NULL, 0, sysctl_vfs_conflist, "S,xvfsconf", "List of all configured filesystems"); #ifndef BURN_BRIDGES static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS); static int vfs_sysctl(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; struct xvfsconf xvfsp; printf("WARNING: userland calling deprecated sysctl, " "please rebuild world\n"); #if 1 || defined(COMPAT_PRELITE2) /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) if (vfsp->vfc_typenum == name[2]) break; if (vfsp == NULL) return (EOPNOTSUPP); bzero(&xvfsp, sizeof(xvfsp)); vfsconf2x(vfsp, &xvfsp); return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } return (EOPNOTSUPP); } static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP, vfs_sysctl, "Generic filesystem"); #if 1 || defined(COMPAT_PRELITE2) static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&ovfs, sizeof(ovfs)); ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error) return error; } return 0; } #endif /* 1 || COMPAT_PRELITE2 */ #endif /* !BURN_BRIDGES */ #define KINFO_VNODESLOP 10 #ifdef notyet /* * Dump vnode list (via sysctl). */ /* ARGSUSED */ static int sysctl_vnode(SYSCTL_HANDLER_ARGS) { struct xvnode *xvn; struct thread *td = req->td; struct mount *mp; struct vnode *vp; int error, len, n; /* * Stale numvnodes access is not fatal here. */ req->lock = 0; len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, len)); error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK); n = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) continue; MNT_ILOCK(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (n == len) break; vref(vp); xvn[n].xv_size = sizeof *xvn; xvn[n].xv_vnode = vp; xvn[n].xv_id = 0; /* XXX compat */ #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field XV_COPY(usecount); XV_COPY(writecount); XV_COPY(holdcnt); XV_COPY(mount); XV_COPY(numoutput); XV_COPY(type); #undef XV_COPY xvn[n].xv_flag = vp->v_vflag; switch (vp->v_type) { case VREG: case VDIR: case VLNK: break; case VBLK: case VCHR: if (vp->v_rdev == NULL) { vrele(vp); continue; } xvn[n].xv_dev = dev2udev(vp->v_rdev); break; case VSOCK: xvn[n].xv_socket = vp->v_socket; break; case VFIFO: xvn[n].xv_fifo = vp->v_fifoinfo; break; case VNON: case VBAD: default: /* shouldn't happen? */ vrele(vp); continue; } vrele(vp); ++n; } MNT_IUNLOCK(mp); mtx_lock(&mountlist_mtx); vfs_unbusy(mp, td); if (n == len) break; } mtx_unlock(&mountlist_mtx); error = SYSCTL_OUT(req, xvn, n * sizeof *xvn); free(xvn, M_TEMP); return (error); } SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_vnode, "S,xvnode", ""); #endif /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall(void) { struct mount *mp; struct thread *td; int error; KASSERT(curthread != NULL, ("vfs_unmountall: NULL curthread")); td = curthread; /* * Since this only runs when rebooting, it is not interlocked. */ while(!TAILQ_EMPTY(&mountlist)) { mp = TAILQ_LAST(&mountlist, mntlist); error = dounmount(mp, MNT_FORCE, td); if (error) { TAILQ_REMOVE(&mountlist, mp, mnt_list); /* * XXX: Due to the way in which we mount the root * file system off of devfs, devfs will generate a * "busy" warning when we try to unmount it before * the root. Don't print a warning as a result in * order to avoid false positive errors that may * cause needless upset. */ if (strcmp(mp->mnt_vfc->vfc_name, "devfs") != 0) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } else { /* The unmount has removed mp from the mountlist */ } } } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *mvp; struct vm_object *obj; MNT_ILOCK(mp); MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); if ((vp->v_iflag & VI_OBJDIRTY) && (flags == MNT_WAIT || VOP_ISLOCKED(vp, NULL) == 0)) { MNT_IUNLOCK(mp); if (!vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, curthread)) { if (vp->v_vflag & VV_NOSYNC) { /* unlinked */ vput(vp); MNT_ILOCK(mp); continue; } obj = vp->v_object; if (obj != NULL) { VM_OBJECT_LOCK(obj); vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); VM_OBJECT_UNLOCK(obj); } vput(vp); } MNT_ILOCK(mp); } else VI_UNLOCK(vp); } MNT_IUNLOCK(mp); } /* * Mark a vnode as free, putting it up for recycling. */ static void vfree(struct vnode *vp) { CTR1(KTR_VFS, "vfree vp %p", vp); ASSERT_VI_LOCKED(vp, "vfree"); mtx_lock(&vnode_free_list_mtx); VNASSERT(vp->v_op != NULL, vp, ("vfree: vnode already reclaimed.")); VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("vnode already free")); VNASSERT(VSHOULDFREE(vp), vp, ("vfree: freeing when we shouldn't")); VNASSERT((vp->v_iflag & VI_DOOMED) == 0, vp, ("vfree: Freeing doomed vnode")); if (vp->v_iflag & VI_AGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; vp->v_iflag &= ~VI_AGE; vp->v_iflag |= VI_FREE; mtx_unlock(&vnode_free_list_mtx); } /* * Opposite of vfree() - mark a vnode as in use. */ static void vbusy(struct vnode *vp) { CTR1(KTR_VFS, "vbusy vp %p", vp); ASSERT_VI_LOCKED(vp, "vbusy"); VNASSERT((vp->v_iflag & VI_FREE) != 0, vp, ("vnode not free")); VNASSERT(vp->v_op != NULL, vp, ("vbusy: vnode already reclaimed.")); mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; vp->v_iflag &= ~(VI_FREE|VI_AGE); mtx_unlock(&vnode_free_list_mtx); } /* * Initalize per-vnode helper structure to hold poll-related state. */ void v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; vi = uma_zalloc(vnodepoll_zone, M_WAITOK); if (vp->v_pollinfo != NULL) { uma_zfree(vnodepoll_zone, vi); return; } vp->v_pollinfo = vi; mtx_init(&vp->v_pollinfo->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vp->v_pollinfo->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knllocked); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(struct vnode *vp, struct thread *td, int events) { if (vp->v_pollinfo == NULL) v_addpollinfo(vp); mtx_lock(&vp->v_pollinfo->vpi_lock); if (vp->v_pollinfo->vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo->vpi_revents; vp->v_pollinfo->vpi_revents &= ~events; mtx_unlock(&vp->v_pollinfo->vpi_lock); return events; } vp->v_pollinfo->vpi_events |= events; selrecord(td, &vp->v_pollinfo->vpi_selinfo); mtx_unlock(&vp->v_pollinfo->vpi_lock); return 0; } /* * Routine to create and manage a filesystem syncer vnode. */ #define sync_close ((int (*)(struct vop_close_args *))nullop) static int sync_fsync(struct vop_fsync_args *); static int sync_inactive(struct vop_inactive_args *); static int sync_reclaim(struct vop_reclaim_args *); static struct vop_vector sync_vnodeops = { .vop_bypass = VOP_EOPNOTSUPP, .vop_close = sync_close, /* close */ .vop_fsync = sync_fsync, /* fsync */ .vop_inactive = sync_inactive, /* inactive */ .vop_reclaim = sync_reclaim, /* reclaim */ .vop_lock1 = vop_stdlock, /* lock */ .vop_unlock = vop_stdunlock, /* unlock */ .vop_islocked = vop_stdislocked, /* islocked */ }; /* * Create a new filesystem syncer vnode for the specified mount point. */ int vfs_allocate_syncvnode(struct mount *mp) { struct vnode *vp; static long start, incr, next; int error; /* Allocate a new vnode */ if ((error = getnewvnode("syncer", mp, &sync_vnodeops, &vp)) != 0) { mp->mnt_syncer = NULL; return (error); } vp->v_type = VNON; error = insmntque(vp, mp); if (error != 0) panic("vfs_allocate_syncvnode: insmntque failed"); /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off * at evenly distributed times even if all the filesystems * are mounted at once. */ next += incr; if (next == 0 || next > syncer_maxdelay) { start /= 2; incr /= 2; if (start == 0) { start = syncer_maxdelay / 2; incr = syncer_maxdelay; } next = start; } VI_LOCK(vp); vn_syncer_add_to_worklist(&vp->v_bufobj, syncdelay > 0 ? next % syncdelay : 0); /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ mtx_lock(&sync_mtx); sync_vnode_count++; mtx_unlock(&sync_mtx); VI_UNLOCK(vp); mp->mnt_syncer = vp; return (0); } /* * Do a lazy sync of the filesystem. */ static int sync_fsync(struct vop_fsync_args *ap) { struct vnode *syncvp = ap->a_vp; struct mount *mp = syncvp->v_mount; struct thread *td = ap->a_td; int error; struct bufobj *bo; /* * We only need to do something if this is a lazy evaluation. */ if (ap->a_waitfor != MNT_LAZY) return (0); /* * Move ourselves to the back of the sync list. */ bo = &syncvp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay); BO_UNLOCK(bo); /* * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ mtx_lock(&mountlist_mtx); if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_mtx, td) != 0) { mtx_unlock(&mountlist_mtx); return (0); } if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { vfs_unbusy(mp, td); return (0); } MNT_ILOCK(mp); mp->mnt_noasync++; mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); vfs_msync(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY, td); MNT_ILOCK(mp); mp->mnt_noasync--; if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) mp->mnt_kern_flag |= MNTK_ASYNC; MNT_IUNLOCK(mp); vn_finished_write(mp); vfs_unbusy(mp, td); return (error); } /* * The syncer vnode is no referenced. */ static int sync_inactive(struct vop_inactive_args *ap) { vgone(ap->a_vp); return (0); } /* * The syncer vnode is no longer needed and is being decommissioned. * * Modifications to the worklist must be protected by sync_mtx. */ static int sync_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct bufobj *bo; VI_LOCK(vp); bo = &vp->v_bufobj; vp->v_mount->mnt_syncer = NULL; if (bo->bo_flag & BO_ONWORKLST) { mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; sync_vnode_count--; mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } VI_UNLOCK(vp); return (0); } /* * Check if vnode represents a disk device */ int vn_isdisk(struct vnode *vp, int *errp) { int error; error = 0; dev_lock(); if (vp->v_type != VCHR) error = ENOTBLK; else if (vp->v_rdev == NULL) error = ENXIO; else if (vp->v_rdev->si_devsw == NULL) error = ENXIO; else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK)) error = ENOTBLK; dev_unlock(); if (errp != NULL) *errp = error; return (error == 0); } /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, * and optional call-by-reference privused argument allowing vaccess() * to indicate to the caller whether privilege was used to satisfy the * request (obsoleted). Returns 0 on success, or an errno on failure. * * The ifdef'd CAPABILITIES version is here for reference, but is not * actually used. */ int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, mode_t acc_mode, struct ucred *cred, int *privused) { mode_t dac_granted; mode_t priv_granted; /* * Look for a normal, non-privileged way to access the file/directory * as requested. If it exists, go with that. */ if (privused != NULL) *privused = 0; dac_granted = 0; /* Check the owner. */ if (cred->cr_uid == file_uid) { dac_granted |= VADMIN; if (file_mode & S_IXUSR) dac_granted |= VEXEC; if (file_mode & S_IRUSR) dac_granted |= VREAD; if (file_mode & S_IWUSR) dac_granted |= (VWRITE | VAPPEND); if ((acc_mode & dac_granted) == acc_mode) return (0); goto privcheck; } /* Otherwise, check the groups (first match) */ if (groupmember(file_gid, cred)) { if (file_mode & S_IXGRP) dac_granted |= VEXEC; if (file_mode & S_IRGRP) dac_granted |= VREAD; if (file_mode & S_IWGRP) dac_granted |= (VWRITE | VAPPEND); if ((acc_mode & dac_granted) == acc_mode) return (0); goto privcheck; } /* Otherwise, check everyone else. */ if (file_mode & S_IXOTH) dac_granted |= VEXEC; if (file_mode & S_IROTH) dac_granted |= VREAD; if (file_mode & S_IWOTH) dac_granted |= (VWRITE | VAPPEND); if ((acc_mode & dac_granted) == acc_mode) return (0); privcheck: /* * Build a privilege mask to determine if the set of privileges * satisfies the requirements when combined with the granted mask * from above. For each privilege, if the privilege is required, * bitwise or the request type onto the priv_granted mask. */ priv_granted = 0; if (type == VDIR) { /* * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC * requests, instead of PRIV_VFS_EXEC. */ if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) && !priv_check_cred(cred, PRIV_VFS_LOOKUP, SUSER_ALLOWJAIL)) priv_granted |= VEXEC; } else { if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) && !priv_check_cred(cred, PRIV_VFS_EXEC, SUSER_ALLOWJAIL)) priv_granted |= VEXEC; } if ((acc_mode & VREAD) && ((dac_granted & VREAD) == 0) && !priv_check_cred(cred, PRIV_VFS_READ, SUSER_ALLOWJAIL)) priv_granted |= VREAD; if ((acc_mode & VWRITE) && ((dac_granted & VWRITE) == 0) && !priv_check_cred(cred, PRIV_VFS_WRITE, SUSER_ALLOWJAIL)) priv_granted |= (VWRITE | VAPPEND); if ((acc_mode & VADMIN) && ((dac_granted & VADMIN) == 0) && !priv_check_cred(cred, PRIV_VFS_ADMIN, SUSER_ALLOWJAIL)) priv_granted |= VADMIN; if ((acc_mode & (priv_granted | dac_granted)) == acc_mode) { /* XXX audit: privilege used */ if (privused != NULL) *privused = 1; return (0); } return ((acc_mode & VADMIN) ? EPERM : EACCES); } /* * Credential check based on process requesting service, and per-attribute * permissions. */ int extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred, struct thread *td, int access) { /* * Kernel-invoked always succeeds. */ if (cred == NOCRED) return (0); /* * Do not allow privileged processes in jail to directly manipulate * system attributes. */ switch (attrnamespace) { case EXTATTR_NAMESPACE_SYSTEM: /* Potentially should be: return (EPERM); */ return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM, 0)); case EXTATTR_NAMESPACE_USER: return (VOP_ACCESS(vp, access, cred, td)); default: return (EPERM); } } #ifdef DEBUG_VFS_LOCKS /* * This only exists to supress warnings from unlocked specfs accesses. It is * no longer ok to have an unlocked VFS. */ #define IGNORE_LOCK(vp) ((vp)->v_type == VCHR || (vp)->v_type == VBAD) int vfs_badlock_ddb = 1; /* Drop into debugger on violation. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0, ""); int vfs_badlock_mutex = 1; /* Check for interlock across VOPs. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex, 0, ""); int vfs_badlock_print = 1; /* Print lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print, 0, ""); #ifdef KDB int vfs_badlock_backtrace = 1; /* Print backtrace at lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW, &vfs_badlock_backtrace, 0, ""); #endif static void vfs_badlock(const char *msg, const char *str, struct vnode *vp) { #ifdef KDB if (vfs_badlock_backtrace) kdb_backtrace(); #endif if (vfs_badlock_print) printf("%s: %p %s\n", str, (void *)vp, msg); if (vfs_badlock_ddb) kdb_enter("lock violation"); } void assert_vi_locked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is not locked but should be", str, vp); } void assert_vi_unlocked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is locked but should not be", str, vp); } void assert_vop_locked(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, NULL) == 0) vfs_badlock("is not locked but should be", str, vp); } void assert_vop_unlocked(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, curthread) == LK_EXCLUSIVE) vfs_badlock("is locked but should not be", str, vp); } void assert_vop_elocked(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, curthread) != LK_EXCLUSIVE) vfs_badlock("is not exclusive locked but should be", str, vp); } #if 0 void assert_vop_elocked_other(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, curthread) != LK_EXCLOTHER) vfs_badlock("is not exclusive locked by another thread", str, vp); } void assert_vop_slocked(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, curthread) != LK_SHARED) vfs_badlock("is not locked shared but should be", str, vp); } #endif /* 0 */ #endif /* DEBUG_VFS_LOCKS */ void vop_rename_pre(void *ap) { struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME"); /* Check the source (from). */ if (a->a_tdvp != a->a_fdvp && a->a_tvp != a->a_fdvp) ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked"); if (a->a_tvp != a->a_fvp) ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked"); /* Check the target. */ if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) vhold(a->a_fvp); vhold(a->a_tdvp); if (a->a_tvp) vhold(a->a_tvp); } void vop_strategy_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_strategy_args *a; struct buf *bp; a = ap; bp = a->a_bp; /* * Cluster ops lock their component buffers but not the IO container. */ if ((bp->b_flags & B_CLUSTER) != 0) return; if (BUF_REFCNT(bp) < 1) { if (vfs_badlock_print) printf( "VOP_STRATEGY: bp is not locked but should be\n"); if (vfs_badlock_ddb) kdb_enter("lock violation"); } #endif } void vop_lookup_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_lookup_args *a; struct vnode *dvp; a = ap; dvp = a->a_dvp; ASSERT_VI_UNLOCKED(dvp, "VOP_LOOKUP"); ASSERT_VOP_LOCKED(dvp, "VOP_LOOKUP"); #endif } void vop_lookup_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_lookup_args *a; struct vnode *dvp; struct vnode *vp; a = ap; dvp = a->a_dvp; vp = *(a->a_vpp); ASSERT_VI_UNLOCKED(dvp, "VOP_LOOKUP"); ASSERT_VOP_LOCKED(dvp, "VOP_LOOKUP"); if (!rc) ASSERT_VOP_LOCKED(vp, "VOP_LOOKUP (child)"); #endif } void vop_lock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; if ((a->a_flags & LK_INTERLOCK) == 0) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); else ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_lock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); if (rc == 0) ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_unlock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_LOCKED(a->a_vp, "VOP_UNLOCK"); ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_unlock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_create_post(void *ap, int rc) { struct vop_create_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_link_post(void *ap, int rc) { struct vop_link_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK); VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE); } } void vop_mkdir_post(void *ap, int rc) { struct vop_mkdir_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); } void vop_mknod_post(void *ap, int rc) { struct vop_mknod_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_remove_post(void *ap, int rc) { struct vop_remove_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_rename_post(void *ap, int rc) { struct vop_rename_args *a = ap; if (!rc) { VFS_KNOTE_UNLOCKED(a->a_fdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_tdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); if (a->a_tvp != a->a_fvp) vdrop(a->a_fvp); vdrop(a->a_tdvp); if (a->a_tvp) vdrop(a->a_tvp); } void vop_rmdir_post(void *ap, int rc) { struct vop_rmdir_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_setattr_post(void *ap, int rc) { struct vop_setattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_symlink_post(void *ap, int rc) { struct vop_symlink_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } static struct knlist fs_knlist; static void vfs_event_init(void *arg) { knlist_init(&fs_knlist, NULL, NULL, NULL, NULL); } /* XXX - correct order? */ SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL); void vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data __unused) { KNOTE_UNLOCKED(&fs_knlist, event); } static int filt_fsattach(struct knote *kn); static void filt_fsdetach(struct knote *kn); static int filt_fsevent(struct knote *kn, long hint); struct filterops fs_filtops = { 0, filt_fsattach, filt_fsdetach, filt_fsevent }; static int filt_fsattach(struct knote *kn) { kn->kn_flags |= EV_CLEAR; knlist_add(&fs_knlist, kn, 0); return (0); } static void filt_fsdetach(struct knote *kn) { knlist_remove(&fs_knlist, kn, 0); } static int filt_fsevent(struct knote *kn, long hint) { kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } static int sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS) { struct vfsidctl vc; int error; struct mount *mp; error = SYSCTL_IN(req, &vc, sizeof(vc)); if (error) return (error); if (vc.vc_vers != VFS_CTL_VERS1) return (EINVAL); mp = vfs_getvfs(&vc.vc_fsid); if (mp == NULL) return (ENOENT); /* ensure that a specific sysctl goes to the right filesystem. */ if (strcmp(vc.vc_fstypename, "*") != 0 && strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) { vfs_rel(mp); return (EINVAL); } VCTLTOREQ(&vc, req); error = VFS_SYSCTL(mp, vc.vc_op, req); vfs_rel(mp); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLFLAG_WR, NULL, 0, sysctl_vfs_ctl, "", "Sysctl by fsid"); /* * Function to initialize a va_filerev field sensibly. * XXX: Wouldn't a random number make a lot more sense ?? */ u_quad_t init_va_filerev(void) { struct bintime bt; getbinuptime(&bt); return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL)); } static int filt_vfsread(struct knote *kn, long hint); static int filt_vfswrite(struct knote *kn, long hint); static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); static struct filterops vfsread_filtops = { 1, NULL, filt_vfsdetach, filt_vfsread }; static struct filterops vfswrite_filtops = { 1, NULL, filt_vfsdetach, filt_vfswrite }; static struct filterops vfsvnode_filtops = { 1, NULL, filt_vfsdetach, filt_vfsvnode }; static void vfs_knllock(void *arg) { struct vnode *vp = arg; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); } static void vfs_knlunlock(void *arg) { struct vnode *vp = arg; VOP_UNLOCK(vp, 0, curthread); } static int vfs_knllocked(void *arg) { struct vnode *vp = arg; return (VOP_ISLOCKED(vp, curthread) == LK_EXCLUSIVE); } int vfs_kqfilter(struct vop_kqfilter_args *ap) { struct vnode *vp = ap->a_vp; struct knote *kn = ap->a_kn; struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &vfsread_filtops; break; case EVFILT_WRITE: kn->kn_fop = &vfswrite_filtops; break; case EVFILT_VNODE: kn->kn_fop = &vfsvnode_filtops; break; default: return (EINVAL); } kn->kn_hook = (caddr_t)vp; if (vp->v_pollinfo == NULL) v_addpollinfo(vp); if (vp->v_pollinfo == NULL) return (ENOMEM); knl = &vp->v_pollinfo->vpi_selinfo.si_note; knlist_add(knl, kn, 0); return (0); } /* * Detach knote from vnode */ static void filt_vfsdetach(struct knote *kn) { struct vnode *vp = (struct vnode *)kn->kn_hook; KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo")); knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0); } /*ARGSUSED*/ static int filt_vfsread(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; struct vattr va; /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE) { kn->kn_flags |= (EV_EOF | EV_ONESHOT); return (1); } if (VOP_GETATTR(vp, &va, curthread->td_ucred, curthread)) return (0); kn->kn_data = va.va_size - kn->kn_fp->f_offset; return (kn->kn_data != 0); } /*ARGSUSED*/ static int filt_vfswrite(struct knote *kn, long hint) { /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE) kn->kn_flags |= (EV_EOF | EV_ONESHOT); kn->kn_data = 0; return (1); } static int filt_vfsvnode(struct knote *kn, long hint) { if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; if (hint == NOTE_REVOKE) { kn->kn_flags |= EV_EOF; return (1); } return (kn->kn_fflags != 0); } int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off) { int error; if (dp->d_reclen > ap->a_uio->uio_resid) return (ENAMETOOLONG); error = uiomove(dp, dp->d_reclen, ap->a_uio); if (error) { if (ap->a_ncookies != NULL) { if (ap->a_cookies != NULL) free(ap->a_cookies, M_TEMP); ap->a_cookies = NULL; *ap->a_ncookies = 0; } return (error); } if (ap->a_ncookies == NULL) return (0); KASSERT(ap->a_cookies, ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!")); *ap->a_cookies = realloc(*ap->a_cookies, (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO); (*ap->a_cookies)[*ap->a_ncookies] = off; return (0); } /* * Mark for update the access time of the file if the filesystem * supports VA_MARK_ATIME. This functionality is used by execve * and mmap, so we want to avoid the synchronous I/O implied by * directly setting va_atime for the sake of efficiency. */ void vfs_mark_atime(struct vnode *vp, struct thread *td) { struct vattr atimeattr; if ((vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) { VATTR_NULL(&atimeattr); atimeattr.va_vaflags |= VA_MARK_ATIME; (void)VOP_SETATTR(vp, &atimeattr, td->td_ucred, td); } } Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c =================================================================== --- head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c (revision 170034) +++ head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c (revision 170035) @@ -1,2812 +1,2812 @@ /* * ng_btsocket_l2cap.c */ /*- * Copyright (c) 2001-2002 Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: ng_btsocket_l2cap.c,v 1.16 2003/09/14 23:29:06 max Exp $ * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* MALLOC define */ #ifdef NG_SEPARATE_MALLOC MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_L2CAP, "netgraph_btsocks_l2cap", "Netgraph Bluetooth L2CAP sockets"); #else #define M_NETGRAPH_BTSOCKET_L2CAP M_NETGRAPH #endif /* NG_SEPARATE_MALLOC */ /* Netgraph node methods */ static ng_constructor_t ng_btsocket_l2cap_node_constructor; static ng_rcvmsg_t ng_btsocket_l2cap_node_rcvmsg; static ng_shutdown_t ng_btsocket_l2cap_node_shutdown; static ng_newhook_t ng_btsocket_l2cap_node_newhook; static ng_connect_t ng_btsocket_l2cap_node_connect; static ng_rcvdata_t ng_btsocket_l2cap_node_rcvdata; static ng_disconnect_t ng_btsocket_l2cap_node_disconnect; static void ng_btsocket_l2cap_input (void *, int); static void ng_btsocket_l2cap_rtclean (void *, int); /* Netgraph type descriptor */ static struct ng_type typestruct = { .version = NG_ABI_VERSION, .name = NG_BTSOCKET_L2CAP_NODE_TYPE, .constructor = ng_btsocket_l2cap_node_constructor, .rcvmsg = ng_btsocket_l2cap_node_rcvmsg, .shutdown = ng_btsocket_l2cap_node_shutdown, .newhook = ng_btsocket_l2cap_node_newhook, .connect = ng_btsocket_l2cap_node_connect, .rcvdata = ng_btsocket_l2cap_node_rcvdata, .disconnect = ng_btsocket_l2cap_node_disconnect, }; /* Globals */ extern int ifqmaxlen; static u_int32_t ng_btsocket_l2cap_debug_level; static node_p ng_btsocket_l2cap_node; static struct ng_bt_itemq ng_btsocket_l2cap_queue; static struct mtx ng_btsocket_l2cap_queue_mtx; static struct task ng_btsocket_l2cap_queue_task; static LIST_HEAD(, ng_btsocket_l2cap_pcb) ng_btsocket_l2cap_sockets; static struct mtx ng_btsocket_l2cap_sockets_mtx; static LIST_HEAD(, ng_btsocket_l2cap_rtentry) ng_btsocket_l2cap_rt; static struct mtx ng_btsocket_l2cap_rt_mtx; static struct task ng_btsocket_l2cap_rt_task; /* Sysctl tree */ SYSCTL_DECL(_net_bluetooth_l2cap_sockets); SYSCTL_NODE(_net_bluetooth_l2cap_sockets, OID_AUTO, seq, CTLFLAG_RW, 0, "Bluetooth SEQPACKET L2CAP sockets family"); SYSCTL_INT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, debug_level, CTLFLAG_RW, &ng_btsocket_l2cap_debug_level, NG_BTSOCKET_WARN_LEVEL, "Bluetooth SEQPACKET L2CAP sockets debug level"); SYSCTL_INT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_len, CTLFLAG_RD, &ng_btsocket_l2cap_queue.len, 0, "Bluetooth SEQPACKET L2CAP sockets input queue length"); SYSCTL_INT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_maxlen, CTLFLAG_RD, &ng_btsocket_l2cap_queue.maxlen, 0, "Bluetooth SEQPACKET L2CAP sockets input queue max. length"); SYSCTL_INT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_drops, CTLFLAG_RD, &ng_btsocket_l2cap_queue.drops, 0, "Bluetooth SEQPACKET L2CAP sockets input queue drops"); /* Debug */ #define NG_BTSOCKET_L2CAP_INFO \ if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_INFO_LEVEL) \ printf #define NG_BTSOCKET_L2CAP_WARN \ if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_WARN_LEVEL) \ printf #define NG_BTSOCKET_L2CAP_ERR \ if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_ERR_LEVEL) \ printf #define NG_BTSOCKET_L2CAP_ALERT \ if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_ALERT_LEVEL) \ printf /* * Netgraph message processing routines */ static int ng_btsocket_l2cap_process_l2ca_con_req_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_con_rsp_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_con_ind (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_cfg_req_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_cfg_ind (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_discon_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_discon_ind (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); static int ng_btsocket_l2cap_process_l2ca_write_rsp (struct ng_mesg *, ng_btsocket_l2cap_rtentry_p); /* * Send L2CA_xxx messages to the lower layer */ static int ng_btsocket_l2cap_send_l2ca_con_req (ng_btsocket_l2cap_pcb_p); static int ng_btsocket_l2cap_send_l2ca_con_rsp_req (u_int32_t, ng_btsocket_l2cap_rtentry_p, bdaddr_p, int, int, int); static int ng_btsocket_l2cap_send_l2ca_cfg_req (ng_btsocket_l2cap_pcb_p); static int ng_btsocket_l2cap_send_l2ca_cfg_rsp (ng_btsocket_l2cap_pcb_p); static int ng_btsocket_l2cap_send_l2ca_discon_req (u_int32_t, ng_btsocket_l2cap_pcb_p); static int ng_btsocket_l2cap_send2 (ng_btsocket_l2cap_pcb_p); /* * Timeout processing routines */ static void ng_btsocket_l2cap_timeout (ng_btsocket_l2cap_pcb_p); static void ng_btsocket_l2cap_untimeout (ng_btsocket_l2cap_pcb_p); static void ng_btsocket_l2cap_process_timeout (void *); /* * Other stuff */ static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_addr(bdaddr_p, int); static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_token(u_int32_t); static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_cid (bdaddr_p, int); static int ng_btsocket_l2cap_result2errno(int); #define ng_btsocket_l2cap_wakeup_input_task() \ taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_l2cap_queue_task) #define ng_btsocket_l2cap_wakeup_route_task() \ taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_l2cap_rt_task) /***************************************************************************** ***************************************************************************** ** Netgraph node interface ***************************************************************************** *****************************************************************************/ /* * Netgraph node constructor. Do not allow to create node of this type. */ static int ng_btsocket_l2cap_node_constructor(node_p node) { return (EINVAL); } /* ng_btsocket_l2cap_node_constructor */ /* * Do local shutdown processing. Let old node go and create new fresh one. */ static int ng_btsocket_l2cap_node_shutdown(node_p node) { int error = 0; NG_NODE_UNREF(node); /* Create new node */ error = ng_make_node_common(&typestruct, &ng_btsocket_l2cap_node); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not create Netgraph node, error=%d\n", __func__, error); ng_btsocket_l2cap_node = NULL; return (error); } error = ng_name_node(ng_btsocket_l2cap_node, NG_BTSOCKET_L2CAP_NODE_TYPE); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not name Netgraph node, error=%d\n", __func__, error); NG_NODE_UNREF(ng_btsocket_l2cap_node); ng_btsocket_l2cap_node = NULL; return (error); } return (0); } /* ng_btsocket_l2cap_node_shutdown */ /* * We allow any hook to be connected to the node. */ static int ng_btsocket_l2cap_node_newhook(node_p node, hook_p hook, char const *name) { return (0); } /* ng_btsocket_l2cap_node_newhook */ /* * Just say "YEP, that's OK by me!" */ static int ng_btsocket_l2cap_node_connect(hook_p hook) { NG_HOOK_SET_PRIVATE(hook, NULL); NG_HOOK_REF(hook); /* Keep extra reference to the hook */ #if 0 NG_HOOK_FORCE_QUEUE(NG_HOOK_PEER(hook)); NG_HOOK_FORCE_QUEUE(hook); #endif return (0); } /* ng_btsocket_l2cap_node_connect */ /* * Hook disconnection. Schedule route cleanup task */ static int ng_btsocket_l2cap_node_disconnect(hook_p hook) { /* * If hook has private information than we must have this hook in * the routing table and must schedule cleaning for the routing table. * Otherwise hook was connected but we never got "hook_info" message, * so we have never added this hook to the routing table and it save * to just delete it. */ if (NG_HOOK_PRIVATE(hook) != NULL) return (ng_btsocket_l2cap_wakeup_route_task()); NG_HOOK_UNREF(hook); /* Remove extra reference */ return (0); } /* ng_btsocket_l2cap_node_disconnect */ /* * Process incoming messages */ static int ng_btsocket_l2cap_node_rcvmsg(node_p node, item_p item, hook_p hook) { struct ng_mesg *msg = NGI_MSG(item); /* item still has message */ int error = 0; if (msg != NULL && msg->header.typecookie == NGM_L2CAP_COOKIE) { mtx_lock(&ng_btsocket_l2cap_queue_mtx); if (NG_BT_ITEMQ_FULL(&ng_btsocket_l2cap_queue)) { NG_BTSOCKET_L2CAP_ERR( "%s: Input queue is full (msg)\n", __func__); NG_BT_ITEMQ_DROP(&ng_btsocket_l2cap_queue); NG_FREE_ITEM(item); error = ENOBUFS; } else { if (hook != NULL) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_l2cap_queue, item); error = ng_btsocket_l2cap_wakeup_input_task(); } mtx_unlock(&ng_btsocket_l2cap_queue_mtx); } else { NG_FREE_ITEM(item); error = EINVAL; } return (error); } /* ng_btsocket_l2cap_node_rcvmsg */ /* * Receive data on a hook */ static int ng_btsocket_l2cap_node_rcvdata(hook_p hook, item_p item) { int error = 0; mtx_lock(&ng_btsocket_l2cap_queue_mtx); if (NG_BT_ITEMQ_FULL(&ng_btsocket_l2cap_queue)) { NG_BTSOCKET_L2CAP_ERR( "%s: Input queue is full (data)\n", __func__); NG_BT_ITEMQ_DROP(&ng_btsocket_l2cap_queue); NG_FREE_ITEM(item); error = ENOBUFS; } else { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_l2cap_queue, item); error = ng_btsocket_l2cap_wakeup_input_task(); } mtx_unlock(&ng_btsocket_l2cap_queue_mtx); return (error); } /* ng_btsocket_l2cap_node_rcvdata */ /* * Process L2CA_Connect respose. Socket layer must have initiated connection, * so we have to have a socket associated with message token. */ static int ng_btsocket_l2cap_process_l2ca_con_req_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_con_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; int error = 0; if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_con_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with the token */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Connect response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, status=%d, " \ "state=%d\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, op->lcid, op->result, op->status, pcb->state); if (pcb->state != NG_BTSOCKET_L2CAP_CONNECTING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } ng_btsocket_l2cap_untimeout(pcb); if (op->result == NG_L2CAP_PENDING) { ng_btsocket_l2cap_timeout(pcb); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } if (op->result == NG_L2CAP_SUCCESS) { /* * Channel is now open, so update local channel ID and * start configuration process. Source and destination * addresses as well as route must be already set. */ pcb->cid = op->lcid; error = ng_btsocket_l2cap_send_l2ca_cfg_req(pcb); if (error != 0) { /* Send disconnect request with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } else { pcb->cfg_state = NG_BTSOCKET_L2CAP_CFG_IN_SENT; pcb->state = NG_BTSOCKET_L2CAP_CONFIGURING; ng_btsocket_l2cap_timeout(pcb); } } else { /* * We have failed to open connection, so convert result * code to "errno" code and disconnect the socket. Channel * already has been closed. */ pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_process_l2ca_con_req_rsp */ /* * Process L2CA_ConnectRsp response */ static int ng_btsocket_l2cap_process_l2ca_con_rsp_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_con_rsp_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_con_rsp_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with the token */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_ConnectRsp response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, pcb->state); if (pcb->state != NG_BTSOCKET_L2CAP_CONNECTING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } ng_btsocket_l2cap_untimeout(pcb); /* Check the result and disconnect the socket on failure */ if (op->result != NG_L2CAP_SUCCESS) { /* Close the socket - channel already closed */ pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } else { /* Move to CONFIGURING state and wait for CONFIG_IND */ pcb->cfg_state = 0; pcb->state = NG_BTSOCKET_L2CAP_CONFIGURING; ng_btsocket_l2cap_timeout(pcb); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_process_l2ca_con_rsp_rsp */ /* * Process L2CA_Connect indicator. Find socket that listens on address * and PSM. Find exact or closest match. Create new socket and initiate * connection. */ static int ng_btsocket_l2cap_process_l2ca_con_ind(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_con_ind_ip *ip = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL, *pcb1 = NULL; int error = 0; u_int32_t token = 0; u_int16_t result = 0; if (msg->header.arglen != sizeof(*ip)) return (EMSGSIZE); ip = (ng_l2cap_l2ca_con_ind_ip *)(msg->data); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Connect indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, ident=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], ip->bdaddr.b[5], ip->bdaddr.b[4], ip->bdaddr.b[3], ip->bdaddr.b[2], ip->bdaddr.b[1], ip->bdaddr.b[0], ip->psm, ip->lcid, ip->ident); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); pcb = ng_btsocket_l2cap_pcb_by_addr(&rt->src, ip->psm); if (pcb != NULL) { struct socket *so1 = NULL; mtx_lock(&pcb->pcb_mtx); /* * First check the pending connections queue and if we have * space then create new socket and set proper source address. */ if (pcb->so->so_qlen <= pcb->so->so_qlimit) so1 = sonewconn(pcb->so, 0); if (so1 == NULL) { result = NG_L2CAP_NO_RESOURCES; goto respond; } /* * If we got here than we have created new socket. So complete * connection. If we we listening on specific address then copy * source address from listening socket, otherwise copy source * address from hook's routing information. */ pcb1 = so2l2cap_pcb(so1); KASSERT((pcb1 != NULL), ("%s: pcb1 == NULL\n", __func__)); mtx_lock(&pcb1->pcb_mtx); if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)) != 0) bcopy(&pcb->src, &pcb1->src, sizeof(pcb1->src)); else bcopy(&rt->src, &pcb1->src, sizeof(pcb1->src)); pcb1->flags &= ~NG_BTSOCKET_L2CAP_CLIENT; bcopy(&ip->bdaddr, &pcb1->dst, sizeof(pcb1->dst)); pcb1->psm = ip->psm; pcb1->cid = ip->lcid; pcb1->rt = rt; /* Copy socket settings */ pcb1->imtu = pcb->imtu; bcopy(&pcb->oflow, &pcb1->oflow, sizeof(pcb1->oflow)); pcb1->flush_timo = pcb->flush_timo; token = pcb1->token; } else /* Nobody listens on requested BDADDR/PSM */ result = NG_L2CAP_PSM_NOT_SUPPORTED; respond: error = ng_btsocket_l2cap_send_l2ca_con_rsp_req(token, rt, &ip->bdaddr, ip->ident, ip->lcid, result); if (pcb1 != NULL) { if (error != 0) { pcb1->so->so_error = error; pcb1->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb1->so); } else { pcb1->state = NG_BTSOCKET_L2CAP_CONNECTING; soisconnecting(pcb1->so); ng_btsocket_l2cap_timeout(pcb1); } mtx_unlock(&pcb1->pcb_mtx); } if (pcb != NULL) mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_process_l2ca_con_ind */ /* * Process L2CA_Config response */ static int ng_btsocket_l2cap_process_l2ca_cfg_req_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_cfg_op *op = NULL; ng_btsocket_l2cap_pcb_p pcb = NULL; if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_cfg_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* * Socket must have issued a Configure request, so we must have a * socket that wants to be configured. Use Netgraph message token * to find it */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { /* * XXX FIXME what to do here? We could not find a * socket with requested token. We even can not send * Disconnect, because we do not know channel ID */ mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Config response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d, " \ "cfg_state=%x\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, pcb->state, pcb->cfg_state); if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } if (op->result == NG_L2CAP_SUCCESS) { /* * XXX FIXME Actually set flush and link timeout. * Set QoS here if required. Resolve conficts (flush_timo). * Save incoming MTU (peer's outgoing MTU) and outgoing flow * spec. */ pcb->imtu = op->imtu; bcopy(&op->oflow, &pcb->oflow, sizeof(pcb->oflow)); pcb->flush_timo = op->flush_timo; /* * We have configured incoming side, so record it and check * if configuration is complete. If complete then mark socket * as connected, otherwise wait for the peer. */ pcb->cfg_state &= ~NG_BTSOCKET_L2CAP_CFG_IN_SENT; pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_IN; if (pcb->cfg_state == NG_BTSOCKET_L2CAP_CFG_BOTH) { /* Configuration complete - mark socket as open */ ng_btsocket_l2cap_untimeout(pcb); pcb->state = NG_BTSOCKET_L2CAP_OPEN; soisconnected(pcb->so); } } else { /* * Something went wrong. Could be unacceptable parameters, * reject or unknown option. That's too bad, but we will * not negotiate. Send Disconnect and close the channel. */ ng_btsocket_l2cap_untimeout(pcb); switch (op->result) { case NG_L2CAP_UNACCEPTABLE_PARAMS: case NG_L2CAP_UNKNOWN_OPTION: pcb->so->so_error = EINVAL; break; default: pcb->so->so_error = ECONNRESET; break; } /* Send disconnect with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_process_l2ca_cfg_req_rsp */ /* * Process L2CA_ConfigRsp response */ static int ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_cfg_rsp_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; int error = 0; if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_cfg_rsp_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with the token */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_ConfigRsp response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d, " \ "cfg_state=%x\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, pcb->state, pcb->cfg_state); if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } /* Check the result and disconnect socket of failure */ if (op->result != NG_L2CAP_SUCCESS) goto disconnect; /* * Now we done with remote side configuration. Configure local * side if we have not done it yet. */ pcb->cfg_state &= ~NG_BTSOCKET_L2CAP_CFG_OUT_SENT; pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_OUT; if (pcb->cfg_state == NG_BTSOCKET_L2CAP_CFG_BOTH) { /* Configuration complete - mask socket as open */ ng_btsocket_l2cap_untimeout(pcb); pcb->state = NG_BTSOCKET_L2CAP_OPEN; soisconnected(pcb->so); } else { if (!(pcb->cfg_state & NG_BTSOCKET_L2CAP_CFG_IN_SENT)) { /* Send L2CA_Config request - incoming path */ error = ng_btsocket_l2cap_send_l2ca_cfg_req(pcb); if (error != 0) goto disconnect; pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_IN_SENT; } } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); disconnect: ng_btsocket_l2cap_untimeout(pcb); /* Send disconnect with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp */ /* * Process L2CA_Config indicator */ static int ng_btsocket_l2cap_process_l2ca_cfg_ind(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_cfg_ind_ip *ip = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; int error = 0; if (msg->header.arglen != sizeof(*ip)) return (EMSGSIZE); ip = (ng_l2cap_l2ca_cfg_ind_ip *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Check for the open socket that has given channel ID */ pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, ip->lcid); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Config indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, state=%d, cfg_state=%x\n", __func__, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, pcb->state, pcb->cfg_state); /* XXX FIXME re-configuration on open socket */ if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } /* * XXX FIXME Actually set flush and link timeout. Set QoS here if * required. Resolve conficts (flush_timo). Note outgoing MTU (peer's * incoming MTU) and incoming flow spec. */ pcb->omtu = ip->omtu; bcopy(&ip->iflow, &pcb->iflow, sizeof(pcb->iflow)); pcb->flush_timo = ip->flush_timo; /* * Send L2CA_Config response to our peer and check for the errors, * if any send disconnect to close the channel. */ if (!(pcb->cfg_state & NG_BTSOCKET_L2CAP_CFG_OUT_SENT)) { error = ng_btsocket_l2cap_send_l2ca_cfg_rsp(pcb); if (error != 0) { ng_btsocket_l2cap_untimeout(pcb); pcb->so->so_error = error; /* Send disconnect with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } else pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_OUT_SENT; } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_process_l2cap_cfg_ind */ /* * Process L2CA_Disconnect response */ static int ng_btsocket_l2cap_process_l2ca_discon_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_discon_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; /* Check message */ if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_discon_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* * Socket layer must have issued L2CA_Disconnect request, so there * must be a socket that wants to be disconnected. Use Netgraph * message token to find it. */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } mtx_lock(&pcb->pcb_mtx); /* XXX Close socket no matter what op->result says */ if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) { NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Disconnect response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d\n", __func__, msg->header.token, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, pcb->state); ng_btsocket_l2cap_untimeout(pcb); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_process_l2ca_discon_rsp */ /* * Process L2CA_Disconnect indicator */ static int ng_btsocket_l2cap_process_l2ca_discon_ind(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_discon_ind_ip *ip = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; /* Check message */ if (msg->header.arglen != sizeof(*ip)) return (EMSGSIZE); ip = (ng_l2cap_l2ca_discon_ind_ip *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with given channel ID */ pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, ip->lcid); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* * Channel has already been destroyed, so disconnect the socket * and be done with it. If there was any pending request we can * not do anything here anyway. */ mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Disconnect indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, state=%d\n", __func__, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, pcb->state); if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) ng_btsocket_l2cap_untimeout(pcb); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_process_l2ca_discon_ind */ /* * Process L2CA_Write response */ static int ng_btsocket_l2cap_process_l2ca_write_rsp(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt) { ng_l2cap_l2ca_write_op *op = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; /* Check message */ if (msg->header.arglen != sizeof(*op)) return (EMSGSIZE); op = (ng_l2cap_l2ca_write_op *)(msg->data); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Look for the socket with given token */ pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } mtx_lock(&pcb->pcb_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CA_Write response, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, length=%d, " \ "state=%d\n", __func__, pcb->src.b[5], pcb->src.b[4], pcb->src.b[3], pcb->src.b[2], pcb->src.b[1], pcb->src.b[0], pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3], pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0], pcb->psm, pcb->cid, op->result, op->length, pcb->state); if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) { mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (ENOENT); } ng_btsocket_l2cap_untimeout(pcb); /* * Check if we have more data to send */ sbdroprecord(&pcb->so->so_snd); if (pcb->so->so_snd.sb_cc > 0) { if (ng_btsocket_l2cap_send2(pcb) == 0) ng_btsocket_l2cap_timeout(pcb); else sbdroprecord(&pcb->so->so_snd); /* XXX */ } /* * Now set the result, drop packet from the socket send queue and * ask for more (wakeup sender) */ pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result); sowwakeup(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_process_l2ca_write_rsp */ /* * Send L2CA_Connect request */ static int ng_btsocket_l2cap_send_l2ca_con_req(ng_btsocket_l2cap_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_con_ip *ip = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CON, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = pcb->token; ip = (ng_l2cap_l2ca_con_ip *)(msg->data); bcopy(&pcb->dst, &ip->bdaddr, sizeof(ip->bdaddr)); ip->psm = pcb->psm; NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_con_req */ /* * Send L2CA_Connect response */ static int ng_btsocket_l2cap_send_l2ca_con_rsp_req(u_int32_t token, ng_btsocket_l2cap_rtentry_p rt, bdaddr_p dst, int ident, int lcid, int result) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_con_rsp_ip *ip = NULL; int error = 0; if (rt == NULL || rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CON_RSP, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = token; ip = (ng_l2cap_l2ca_con_rsp_ip *)(msg->data); bcopy(dst, &ip->bdaddr, sizeof(ip->bdaddr)); ip->ident = ident; ip->lcid = lcid; ip->result = result; ip->status = 0; NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg, rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_con_rsp_req */ /* * Send L2CA_Config request */ static int ng_btsocket_l2cap_send_l2ca_cfg_req(ng_btsocket_l2cap_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_cfg_ip *ip = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CFG, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = pcb->token; ip = (ng_l2cap_l2ca_cfg_ip *)(msg->data); ip->lcid = pcb->cid; ip->imtu = pcb->imtu; bcopy(&pcb->oflow, &ip->oflow, sizeof(ip->oflow)); ip->flush_timo = pcb->flush_timo; ip->link_timo = pcb->link_timo; NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_cfg_req */ /* * Send L2CA_Config response */ static int ng_btsocket_l2cap_send_l2ca_cfg_rsp(ng_btsocket_l2cap_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_cfg_rsp_ip *ip = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CFG_RSP, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = pcb->token; ip = (ng_l2cap_l2ca_cfg_rsp_ip *)(msg->data); ip->lcid = pcb->cid; ip->omtu = pcb->omtu; bcopy(&pcb->iflow, &ip->iflow, sizeof(ip->iflow)); NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg, pcb->rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_cfg_rsp */ /* * Send L2CA_Disconnect request */ static int ng_btsocket_l2cap_send_l2ca_discon_req(u_int32_t token, ng_btsocket_l2cap_pcb_p pcb) { struct ng_mesg *msg = NULL; ng_l2cap_l2ca_discon_ip *ip = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) return (ENETDOWN); NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_DISCON, sizeof(*ip), M_NOWAIT); if (msg == NULL) return (ENOMEM); msg->header.token = token; ip = (ng_l2cap_l2ca_discon_ip *)(msg->data); ip->lcid = pcb->cid; NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0); return (error); } /* ng_btsocket_l2cap_send_l2ca_discon_req */ /***************************************************************************** ***************************************************************************** ** Socket interface ***************************************************************************** *****************************************************************************/ /* * L2CAP sockets data input routine */ static void ng_btsocket_l2cap_data_input(struct mbuf *m, hook_p hook) { ng_l2cap_hdr_t *hdr = NULL; ng_l2cap_clt_hdr_t *clt_hdr = NULL; ng_btsocket_l2cap_pcb_t *pcb = NULL; ng_btsocket_l2cap_rtentry_t *rt = NULL; if (hook == NULL) { NG_BTSOCKET_L2CAP_ALERT( "%s: Invalid source hook for L2CAP data packet\n", __func__); goto drop; } rt = (ng_btsocket_l2cap_rtentry_t *) NG_HOOK_PRIVATE(hook); if (rt == NULL) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not find out source bdaddr for L2CAP data packet\n", __func__); goto drop; } /* Make sure we can access header */ if (m->m_pkthdr.len < sizeof(*hdr)) { NG_BTSOCKET_L2CAP_ERR( "%s: L2CAP data packet too small, len=%d\n", __func__, m->m_pkthdr.len); goto drop; } if (m->m_len < sizeof(*hdr)) { m = m_pullup(m, sizeof(*hdr)); if (m == NULL) goto drop; } /* Strip L2CAP packet header and verify packet length */ hdr = mtod(m, ng_l2cap_hdr_t *); m_adj(m, sizeof(*hdr)); if (hdr->length != m->m_pkthdr.len) { NG_BTSOCKET_L2CAP_ERR( "%s: Bad L2CAP data packet length, len=%d, length=%d\n", __func__, m->m_pkthdr.len, hdr->length); goto drop; } /* * Now process packet. Two cases: * * 1) Normal packet (cid != 2) then find connected socket and append * mbuf to the socket queue. Wakeup socket. * * 2) Broadcast packet (cid == 2) then find all sockets that connected * to the given PSM and have SO_BROADCAST bit set and append mbuf * to the socket queue. Wakeup socket. */ NG_BTSOCKET_L2CAP_INFO( "%s: Received L2CAP data packet: src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dcid=%d, length=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->dcid, hdr->length); if (hdr->dcid >= NG_L2CAP_FIRST_CID) { mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* Normal packet: find connected socket */ pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, hdr->dcid); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); goto drop; } mtx_lock(&pcb->pcb_mtx); if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) { NG_BTSOCKET_L2CAP_ERR( "%s: No connected socket found, src bdaddr=%x:%x:%x:%x:%x:%x, dcid=%d, " \ "state=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->dcid, pcb->state); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); goto drop; } /* Check packet size against socket's incoming MTU */ if (hdr->length > pcb->imtu) { NG_BTSOCKET_L2CAP_ERR( "%s: L2CAP data packet too big, src bdaddr=%x:%x:%x:%x:%x:%x, " \ "dcid=%d, length=%d, imtu=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->dcid, hdr->length, pcb->imtu); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); goto drop; } /* Check if we have enough space in socket receive queue */ if (m->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) { /* * This is really bad. Receive queue on socket does * not have enough space for the packet. We do not * have any other choice but drop the packet. L2CAP * does not provide any flow control. */ NG_BTSOCKET_L2CAP_ERR( "%s: Not enough space in socket receive queue. Dropping L2CAP data packet, " \ "src bdaddr=%x:%x:%x:%x:%x:%x, dcid=%d, len=%d, space=%ld\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->dcid, m->m_pkthdr.len, sbspace(&pcb->so->so_rcv)); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); goto drop; } /* Append packet to the socket receive queue and wakeup */ sbappendrecord(&pcb->so->so_rcv, m); m = NULL; sorwakeup(pcb->so); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); } else if (hdr->dcid == NG_L2CAP_CLT_CID) { /* Broadcast packet: give packet to all sockets */ /* Check packet size against connectionless MTU */ if (hdr->length > NG_L2CAP_MTU_DEFAULT) { NG_BTSOCKET_L2CAP_ERR( "%s: Connectionless L2CAP data packet too big, " \ "src bdaddr=%x:%x:%x:%x:%x:%x, length=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->length); goto drop; } /* Make sure we can access connectionless header */ if (m->m_pkthdr.len < sizeof(*clt_hdr)) { NG_BTSOCKET_L2CAP_ERR( "%s: Can not get L2CAP connectionless packet header, " \ "src bdaddr=%x:%x:%x:%x:%x:%x, length=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], hdr->length); goto drop; } if (m->m_len < sizeof(*clt_hdr)) { m = m_pullup(m, sizeof(*clt_hdr)); if (m == NULL) goto drop; } /* Strip connectionless header and deliver packet */ clt_hdr = mtod(m, ng_l2cap_clt_hdr_t *); m_adj(m, sizeof(*clt_hdr)); NG_BTSOCKET_L2CAP_INFO( "%s: Got L2CAP connectionless data packet, " \ "src bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, length=%d\n", __func__, rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0], clt_hdr->psm, hdr->length); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); LIST_FOREACH(pcb, &ng_btsocket_l2cap_sockets, next) { struct mbuf *copy = NULL; mtx_lock(&pcb->pcb_mtx); if (bcmp(&rt->src, &pcb->src, sizeof(pcb->src)) != 0 || pcb->psm != clt_hdr->psm || pcb->state != NG_BTSOCKET_L2CAP_OPEN || (pcb->so->so_options & SO_BROADCAST) == 0 || m->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) goto next; /* * Create a copy of the packet and append it to the * socket's queue. If m_dup() failed - no big deal * it is a broadcast traffic after all */ copy = m_dup(m, M_DONTWAIT); if (copy != NULL) { sbappendrecord(&pcb->so->so_rcv, copy); sorwakeup(pcb->so); } next: mtx_unlock(&pcb->pcb_mtx); } mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); } drop: NG_FREE_M(m); /* checks for m != NULL */ } /* ng_btsocket_l2cap_data_input */ /* * L2CAP sockets default message input routine */ static void ng_btsocket_l2cap_default_msg_input(struct ng_mesg *msg, hook_p hook) { switch (msg->header.cmd) { case NGM_L2CAP_NODE_HOOK_INFO: { ng_btsocket_l2cap_rtentry_t *rt = NULL; if (hook == NULL || msg->header.arglen != sizeof(bdaddr_t)) break; if (bcmp(msg->data, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) break; mtx_lock(&ng_btsocket_l2cap_rt_mtx); rt = (ng_btsocket_l2cap_rtentry_t *) NG_HOOK_PRIVATE(hook); if (rt == NULL) { MALLOC(rt, ng_btsocket_l2cap_rtentry_p, sizeof(*rt), M_NETGRAPH_BTSOCKET_L2CAP, M_NOWAIT|M_ZERO); if (rt == NULL) { mtx_unlock(&ng_btsocket_l2cap_rt_mtx); break; } LIST_INSERT_HEAD(&ng_btsocket_l2cap_rt, rt, next); NG_HOOK_SET_PRIVATE(hook, rt); } bcopy(msg->data, &rt->src, sizeof(rt->src)); rt->hook = hook; mtx_unlock(&ng_btsocket_l2cap_rt_mtx); NG_BTSOCKET_L2CAP_INFO( "%s: Updating hook \"%s\", src bdaddr=%x:%x:%x:%x:%x:%x\n", __func__, NG_HOOK_NAME(hook), rt->src.b[5], rt->src.b[4], rt->src.b[3], rt->src.b[2], rt->src.b[1], rt->src.b[0]); } break; default: NG_BTSOCKET_L2CAP_WARN( "%s: Unknown message, cmd=%d\n", __func__, msg->header.cmd); break; } NG_FREE_MSG(msg); /* Checks for msg != NULL */ } /* ng_btsocket_l2cap_default_msg_input */ /* * L2CAP sockets L2CA message input routine */ static void ng_btsocket_l2cap_l2ca_msg_input(struct ng_mesg *msg, hook_p hook) { ng_btsocket_l2cap_rtentry_p rt = NULL; if (hook == NULL) { NG_BTSOCKET_L2CAP_ALERT( "%s: Invalid source hook for L2CA message\n", __func__); goto drop; } rt = (ng_btsocket_l2cap_rtentry_p) NG_HOOK_PRIVATE(hook); if (rt == NULL) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not find out source bdaddr for L2CA message\n", __func__); goto drop; } switch (msg->header.cmd) { case NGM_L2CAP_L2CA_CON: /* L2CA_Connect response */ ng_btsocket_l2cap_process_l2ca_con_req_rsp(msg, rt); break; case NGM_L2CAP_L2CA_CON_RSP: /* L2CA_ConnectRsp response */ ng_btsocket_l2cap_process_l2ca_con_rsp_rsp(msg, rt); break; case NGM_L2CAP_L2CA_CON_IND: /* L2CA_Connect indicator */ ng_btsocket_l2cap_process_l2ca_con_ind(msg, rt); break; case NGM_L2CAP_L2CA_CFG: /* L2CA_Config response */ ng_btsocket_l2cap_process_l2ca_cfg_req_rsp(msg, rt); break; case NGM_L2CAP_L2CA_CFG_RSP: /* L2CA_ConfigRsp response */ ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp(msg, rt); break; case NGM_L2CAP_L2CA_CFG_IND: /* L2CA_Config indicator */ ng_btsocket_l2cap_process_l2ca_cfg_ind(msg, rt); break; case NGM_L2CAP_L2CA_DISCON: /* L2CA_Disconnect response */ ng_btsocket_l2cap_process_l2ca_discon_rsp(msg, rt); break; case NGM_L2CAP_L2CA_DISCON_IND: /* L2CA_Disconnect indicator */ ng_btsocket_l2cap_process_l2ca_discon_ind(msg, rt); break; case NGM_L2CAP_L2CA_WRITE: /* L2CA_Write response */ ng_btsocket_l2cap_process_l2ca_write_rsp(msg, rt); break; /* XXX FIXME add other L2CA messages */ default: NG_BTSOCKET_L2CAP_WARN( "%s: Unknown L2CA message, cmd=%d\n", __func__, msg->header.cmd); break; } drop: NG_FREE_MSG(msg); } /* ng_btsocket_l2cap_l2ca_msg_input */ /* * L2CAP sockets input routine */ static void ng_btsocket_l2cap_input(void *context, int pending) { item_p item = NULL; hook_p hook = NULL; for (;;) { mtx_lock(&ng_btsocket_l2cap_queue_mtx); NG_BT_ITEMQ_DEQUEUE(&ng_btsocket_l2cap_queue, item); mtx_unlock(&ng_btsocket_l2cap_queue_mtx); if (item == NULL) break; NGI_GET_HOOK(item, hook); if (hook != NULL && NG_HOOK_NOT_VALID(hook)) goto drop; switch(item->el_flags & NGQF_TYPE) { case NGQF_DATA: { struct mbuf *m = NULL; NGI_GET_M(item, m); ng_btsocket_l2cap_data_input(m, hook); } break; case NGQF_MESG: { struct ng_mesg *msg = NULL; NGI_GET_MSG(item, msg); switch (msg->header.cmd) { case NGM_L2CAP_L2CA_CON: case NGM_L2CAP_L2CA_CON_RSP: case NGM_L2CAP_L2CA_CON_IND: case NGM_L2CAP_L2CA_CFG: case NGM_L2CAP_L2CA_CFG_RSP: case NGM_L2CAP_L2CA_CFG_IND: case NGM_L2CAP_L2CA_DISCON: case NGM_L2CAP_L2CA_DISCON_IND: case NGM_L2CAP_L2CA_WRITE: /* XXX FIXME add other L2CA messages */ ng_btsocket_l2cap_l2ca_msg_input(msg, hook); break; default: ng_btsocket_l2cap_default_msg_input(msg, hook); break; } } break; default: KASSERT(0, ("%s: invalid item type=%ld\n", __func__, (item->el_flags & NGQF_TYPE))); break; } drop: if (hook != NULL) NG_HOOK_UNREF(hook); NG_FREE_ITEM(item); } } /* ng_btsocket_l2cap_input */ /* * Route cleanup task. Gets scheduled when hook is disconnected. Here we * will find all sockets that use "invalid" hook and disconnect them. */ static void ng_btsocket_l2cap_rtclean(void *context, int pending) { ng_btsocket_l2cap_pcb_p pcb = NULL, pcb_next = NULL; ng_btsocket_l2cap_rtentry_p rt = NULL; mtx_lock(&ng_btsocket_l2cap_rt_mtx); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); /* * First disconnect all sockets that use "invalid" hook */ for (pcb = LIST_FIRST(&ng_btsocket_l2cap_sockets); pcb != NULL; ) { mtx_lock(&pcb->pcb_mtx); pcb_next = LIST_NEXT(pcb, next); if (pcb->rt != NULL && pcb->rt->hook != NULL && NG_HOOK_NOT_VALID(pcb->rt->hook)) { if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) ng_btsocket_l2cap_untimeout(pcb); pcb->so->so_error = ENETDOWN; pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); pcb->token = 0; pcb->cid = 0; pcb->rt = NULL; } mtx_unlock(&pcb->pcb_mtx); pcb = pcb_next; } /* * Now cleanup routing table */ for (rt = LIST_FIRST(&ng_btsocket_l2cap_rt); rt != NULL; ) { ng_btsocket_l2cap_rtentry_p rt_next = LIST_NEXT(rt, next); if (rt->hook != NULL && NG_HOOK_NOT_VALID(rt->hook)) { LIST_REMOVE(rt, next); NG_HOOK_SET_PRIVATE(rt->hook, NULL); NG_HOOK_UNREF(rt->hook); /* Remove extra reference */ bzero(rt, sizeof(*rt)); FREE(rt, M_NETGRAPH_BTSOCKET_L2CAP); } rt = rt_next; } mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); mtx_unlock(&ng_btsocket_l2cap_rt_mtx); } /* ng_btsocket_l2cap_rtclean */ /* * Initialize everything */ void ng_btsocket_l2cap_init(void) { int error = 0; ng_btsocket_l2cap_node = NULL; ng_btsocket_l2cap_debug_level = NG_BTSOCKET_WARN_LEVEL; /* Register Netgraph node type */ error = ng_newtype(&typestruct); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not register Netgraph node type, error=%d\n", __func__, error); return; } /* Create Netgrapg node */ error = ng_make_node_common(&typestruct, &ng_btsocket_l2cap_node); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not create Netgraph node, error=%d\n", __func__, error); ng_btsocket_l2cap_node = NULL; return; } error = ng_name_node(ng_btsocket_l2cap_node, NG_BTSOCKET_L2CAP_NODE_TYPE); if (error != 0) { NG_BTSOCKET_L2CAP_ALERT( "%s: Could not name Netgraph node, error=%d\n", __func__, error); NG_NODE_UNREF(ng_btsocket_l2cap_node); ng_btsocket_l2cap_node = NULL; return; } /* Create input queue */ NG_BT_ITEMQ_INIT(&ng_btsocket_l2cap_queue, ifqmaxlen); mtx_init(&ng_btsocket_l2cap_queue_mtx, "btsocks_l2cap_queue_mtx", NULL, MTX_DEF); TASK_INIT(&ng_btsocket_l2cap_queue_task, 0, ng_btsocket_l2cap_input, NULL); /* Create list of sockets */ LIST_INIT(&ng_btsocket_l2cap_sockets); mtx_init(&ng_btsocket_l2cap_sockets_mtx, "btsocks_l2cap_sockets_mtx", NULL, MTX_DEF); /* Routing table */ LIST_INIT(&ng_btsocket_l2cap_rt); mtx_init(&ng_btsocket_l2cap_rt_mtx, "btsocks_l2cap_rt_mtx", NULL, MTX_DEF); TASK_INIT(&ng_btsocket_l2cap_rt_task, 0, ng_btsocket_l2cap_rtclean, NULL); } /* ng_btsocket_l2cap_init */ /* * Abort connection on socket */ void ng_btsocket_l2cap_abort(struct socket *so) { so->so_error = ECONNABORTED; (void)ng_btsocket_l2cap_disconnect(so); } /* ng_btsocket_l2cap_abort */ void ng_btsocket_l2cap_close(struct socket *so) { (void)ng_btsocket_l2cap_disconnect(so); } /* ng_btsocket_l2cap_close */ /* * Accept connection on socket. Nothing to do here, socket must be connected * and ready, so just return peer address and be done with it. */ int ng_btsocket_l2cap_accept(struct socket *so, struct sockaddr **nam) { if (ng_btsocket_l2cap_node == NULL) return (EINVAL); return (ng_btsocket_l2cap_peeraddr(so, nam)); } /* ng_btsocket_l2cap_accept */ /* * Create and attach new socket */ int ng_btsocket_l2cap_attach(struct socket *so, int proto, struct thread *td) { static u_int32_t token = 0; ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); int error; /* Check socket and protocol */ if (ng_btsocket_l2cap_node == NULL) return (EPROTONOSUPPORT); if (so->so_type != SOCK_SEQPACKET) return (ESOCKTNOSUPPORT); #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */ if (proto != 0) if (proto != BLUETOOTH_PROTO_L2CAP) return (EPROTONOSUPPORT); #endif /* XXX */ if (pcb != NULL) return (EISCONN); /* Reserve send and receive space if it is not reserved yet */ if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) { error = soreserve(so, NG_BTSOCKET_L2CAP_SENDSPACE, NG_BTSOCKET_L2CAP_RECVSPACE); if (error != 0) return (error); } /* Allocate the PCB */ MALLOC(pcb, ng_btsocket_l2cap_pcb_p, sizeof(*pcb), M_NETGRAPH_BTSOCKET_L2CAP, M_NOWAIT | M_ZERO); if (pcb == NULL) return (ENOMEM); /* Link the PCB and the socket */ so->so_pcb = (caddr_t) pcb; pcb->so = so; pcb->state = NG_BTSOCKET_L2CAP_CLOSED; /* Initialize PCB */ pcb->imtu = pcb->omtu = NG_L2CAP_MTU_DEFAULT; /* Default flow */ pcb->iflow.flags = 0x0; pcb->iflow.service_type = NG_HCI_SERVICE_TYPE_BEST_EFFORT; pcb->iflow.token_rate = 0xffffffff; /* maximum */ pcb->iflow.token_bucket_size = 0xffffffff; /* maximum */ pcb->iflow.peak_bandwidth = 0x00000000; /* maximum */ pcb->iflow.latency = 0xffffffff; /* don't care */ pcb->iflow.delay_variation = 0xffffffff; /* don't care */ bcopy(&pcb->iflow, &pcb->oflow, sizeof(pcb->oflow)); pcb->flush_timo = NG_L2CAP_FLUSH_TIMO_DEFAULT; pcb->link_timo = NG_L2CAP_LINK_TIMO_DEFAULT; callout_handle_init(&pcb->timo); /* * XXX Mark PCB mutex as DUPOK to prevent "duplicated lock of * the same type" message. When accepting new L2CAP connection * ng_btsocket_l2cap_process_l2ca_con_ind() holds both PCB mutexes * for "old" (accepting) PCB and "new" (created) PCB. */ mtx_init(&pcb->pcb_mtx, "btsocks_l2cap_pcb_mtx", NULL, MTX_DEF|MTX_DUPOK); /* * Add the PCB to the list * * XXX FIXME VERY IMPORTANT! * * This is totally FUBAR. We could get here in two cases: * * 1) When user calls socket() * 2) When we need to accept new incomming connection and call * sonewconn() * - * In the first case we must aquire ng_btsocket_l2cap_sockets_mtx. + * In the first case we must acquire ng_btsocket_l2cap_sockets_mtx. * In the second case we hold ng_btsocket_l2cap_sockets_mtx already. * So we now need to distinguish between these cases. From reading * /sys/kern/uipc_socket.c we can find out that sonewconn() calls * pru_attach with proto == 0 and td == NULL. For now use this fact * to figure out if we were called from socket() or from sonewconn(). */ if (td != NULL) mtx_lock(&ng_btsocket_l2cap_sockets_mtx); else mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED); /* Set PCB token. Use ng_btsocket_l2cap_sockets_mtx for protection */ if (++ token == 0) token ++; pcb->token = token; LIST_INSERT_HEAD(&ng_btsocket_l2cap_sockets, pcb, next); if (td != NULL) mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); } /* ng_btsocket_l2cap_attach */ /* * Bind socket */ int ng_btsocket_l2cap_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_l2cap_pcb_t *pcb = NULL; struct sockaddr_l2cap *sa = (struct sockaddr_l2cap *) nam; int psm, error = 0; if (ng_btsocket_l2cap_node == NULL) return (EINVAL); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->l2cap_family != AF_BLUETOOTH) return (EAFNOSUPPORT); if (sa->l2cap_len != sizeof(*sa)) return (EINVAL); psm = le16toh(sa->l2cap_psm); /* * Check if other socket has this address already (look for exact * match PSM and bdaddr) and assign socket address if it's available. * * Note: socket can be bound to ANY PSM (zero) thus allowing several * channels with the same PSM between the same pair of BD_ADDR'es. */ mtx_lock(&ng_btsocket_l2cap_sockets_mtx); LIST_FOREACH(pcb, &ng_btsocket_l2cap_sockets, next) if (psm != 0 && psm == pcb->psm && bcmp(&pcb->src, &sa->l2cap_bdaddr, sizeof(bdaddr_t)) == 0) break; if (pcb == NULL) { /* Set socket address */ pcb = so2l2cap_pcb(so); if (pcb != NULL) { bcopy(&sa->l2cap_bdaddr, &pcb->src, sizeof(pcb->src)); pcb->psm = psm; } else error = EINVAL; } else error = EADDRINUSE; mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (error); } /* ng_btsocket_l2cap_bind */ /* * Connect socket */ int ng_btsocket_l2cap_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { ng_btsocket_l2cap_pcb_t *pcb = so2l2cap_pcb(so); struct sockaddr_l2cap *sa = (struct sockaddr_l2cap *) nam; ng_btsocket_l2cap_rtentry_t *rt = NULL; int have_src, error = 0; /* Check socket */ if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); if (pcb->state == NG_BTSOCKET_L2CAP_CONNECTING) return (EINPROGRESS); /* Verify address */ if (sa == NULL) return (EINVAL); if (sa->l2cap_family != AF_BLUETOOTH) return (EAFNOSUPPORT); if (sa->l2cap_len != sizeof(*sa)) return (EINVAL); if (sa->l2cap_psm == 0 || bcmp(&sa->l2cap_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) return (EDESTADDRREQ); if (pcb->psm != 0 && pcb->psm != le16toh(sa->l2cap_psm)) return (EINVAL); /* * Routing. Socket should be bound to some source address. The source * address can be ANY. Destination address must be set and it must not * be ANY. If source address is ANY then find first rtentry that has * src != dst. */ mtx_lock(&ng_btsocket_l2cap_rt_mtx); mtx_lock(&ng_btsocket_l2cap_sockets_mtx); mtx_lock(&pcb->pcb_mtx); /* Send destination address and PSM */ bcopy(&sa->l2cap_bdaddr, &pcb->dst, sizeof(pcb->dst)); pcb->psm = le16toh(sa->l2cap_psm); pcb->rt = NULL; have_src = bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)); LIST_FOREACH(rt, &ng_btsocket_l2cap_rt, next) { if (rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook)) continue; /* Match src and dst */ if (have_src) { if (bcmp(&pcb->src, &rt->src, sizeof(rt->src)) == 0) break; } else { if (bcmp(&pcb->dst, &rt->src, sizeof(rt->src)) != 0) break; } } if (rt != NULL) { pcb->rt = rt; if (!have_src) bcopy(&rt->src, &pcb->src, sizeof(pcb->src)); } else error = EHOSTUNREACH; /* * Send L2CA_Connect request */ if (error == 0) { error = ng_btsocket_l2cap_send_l2ca_con_req(pcb); if (error == 0) { pcb->flags |= NG_BTSOCKET_L2CAP_CLIENT; pcb->state = NG_BTSOCKET_L2CAP_CONNECTING; soisconnecting(pcb->so); ng_btsocket_l2cap_timeout(pcb); } } mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); mtx_unlock(&ng_btsocket_l2cap_rt_mtx); return (error); } /* ng_btsocket_l2cap_connect */ /* * Process ioctl's calls on socket */ int ng_btsocket_l2cap_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { return (EINVAL); } /* ng_btsocket_l2cap_control */ /* * Process getsockopt/setsockopt system calls */ int ng_btsocket_l2cap_ctloutput(struct socket *so, struct sockopt *sopt) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); int error = 0; ng_l2cap_cfg_opt_val_t v; if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); if (sopt->sopt_level != SOL_L2CAP) return (0); mtx_lock(&pcb->pcb_mtx); switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case SO_L2CAP_IMTU: /* get incoming MTU */ error = sooptcopyout(sopt, &pcb->imtu, sizeof(pcb->imtu)); break; case SO_L2CAP_OMTU: /* get outgoing (peer incoming) MTU */ error = sooptcopyout(sopt, &pcb->omtu, sizeof(pcb->omtu)); break; case SO_L2CAP_IFLOW: /* get incoming flow spec. */ error = sooptcopyout(sopt, &pcb->iflow, sizeof(pcb->iflow)); break; case SO_L2CAP_OFLOW: /* get outgoing flow spec. */ error = sooptcopyout(sopt, &pcb->oflow, sizeof(pcb->oflow)); break; case SO_L2CAP_FLUSH: /* get flush timeout */ error = sooptcopyout(sopt, &pcb->flush_timo, sizeof(pcb->flush_timo)); break; default: error = ENOPROTOOPT; break; } break; case SOPT_SET: /* * XXX * We do not allow to change these parameters while socket is * connected or we are in the process of creating a connection. * May be this should indicate re-configuration of the open * channel? */ if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) return (EACCES); switch (sopt->sopt_name) { case SO_L2CAP_IMTU: /* set incoming MTU */ error = sooptcopyin(sopt, &v, sizeof(v), sizeof(v.mtu)); if (error == 0) pcb->imtu = v.mtu; break; case SO_L2CAP_OFLOW: /* set outgoing flow spec. */ error = sooptcopyin(sopt, &v, sizeof(v),sizeof(v.flow)); if (error == 0) bcopy(&v.flow, &pcb->oflow, sizeof(pcb->oflow)); break; case SO_L2CAP_FLUSH: /* set flush timeout */ error = sooptcopyin(sopt, &v, sizeof(v), sizeof(v.flush_timo)); if (error == 0) pcb->flush_timo = v.flush_timo; break; default: error = ENOPROTOOPT; break; } break; default: error = EINVAL; break; } mtx_unlock(&pcb->pcb_mtx); return (error); } /* ng_btsocket_l2cap_ctloutput */ /* * Detach and destroy socket */ void ng_btsocket_l2cap_detach(struct socket *so) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); KASSERT(pcb != NULL, ("ng_btsocket_l2cap_detach: pcb == NULL")); if (ng_btsocket_l2cap_node == NULL) return; mtx_lock(&ng_btsocket_l2cap_sockets_mtx); mtx_lock(&pcb->pcb_mtx); /* XXX what to do with pending request? */ if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) ng_btsocket_l2cap_untimeout(pcb); if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED && pcb->state != NG_BTSOCKET_L2CAP_DISCONNECTING) /* Send disconnect request with "zero" token */ ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); pcb->state = NG_BTSOCKET_L2CAP_CLOSED; LIST_REMOVE(pcb, next); mtx_unlock(&pcb->pcb_mtx); mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); mtx_destroy(&pcb->pcb_mtx); bzero(pcb, sizeof(*pcb)); FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP); soisdisconnected(so); so->so_pcb = NULL; } /* ng_btsocket_l2cap_detach */ /* * Disconnect socket */ int ng_btsocket_l2cap_disconnect(struct socket *so) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); int error = 0; if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); mtx_lock(&pcb->pcb_mtx); if (pcb->state == NG_BTSOCKET_L2CAP_DISCONNECTING) { mtx_unlock(&pcb->pcb_mtx); return (EINPROGRESS); } if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) { /* XXX FIXME what to do with pending request? */ if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) ng_btsocket_l2cap_untimeout(pcb); error = ng_btsocket_l2cap_send_l2ca_discon_req(pcb->token, pcb); if (error == 0) { pcb->state = NG_BTSOCKET_L2CAP_DISCONNECTING; soisdisconnecting(so); ng_btsocket_l2cap_timeout(pcb); } /* XXX FIXME what to do if error != 0 */ } mtx_unlock(&pcb->pcb_mtx); return (error); } /* ng_btsocket_l2cap_disconnect */ /* * Listen on socket */ int ng_btsocket_l2cap_listen(struct socket *so, int backlog, struct thread *td) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); int error; SOCK_LOCK(so); error = solisten_proto_check(so); if (error != 0) goto out; if (pcb == NULL) { error = EINVAL; goto out; } if (ng_btsocket_l2cap_node == NULL) { error = EINVAL; goto out; } if (pcb->psm == 0) { error = EDESTADDRREQ; goto out; } solisten_proto(so, backlog); out: SOCK_UNLOCK(so); return (error); } /* ng_btsocket_listen */ /* * Get peer address */ int ng_btsocket_l2cap_peeraddr(struct socket *so, struct sockaddr **nam) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); struct sockaddr_l2cap sa; if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); bcopy(&pcb->dst, &sa.l2cap_bdaddr, sizeof(sa.l2cap_bdaddr)); sa.l2cap_psm = htole16(pcb->psm); sa.l2cap_len = sizeof(sa); sa.l2cap_family = AF_BLUETOOTH; *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); } /* ng_btsocket_l2cap_peeraddr */ /* * Send data to socket */ int ng_btsocket_l2cap_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { ng_btsocket_l2cap_pcb_t *pcb = so2l2cap_pcb(so); int error = 0; if (ng_btsocket_l2cap_node == NULL) { error = ENETDOWN; goto drop; } /* Check socket and input */ if (pcb == NULL || m == NULL || control != NULL) { error = EINVAL; goto drop; } mtx_lock(&pcb->pcb_mtx); /* Make sure socket is connected */ if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) { mtx_unlock(&pcb->pcb_mtx); error = ENOTCONN; goto drop; } /* Check route */ if (pcb->rt == NULL || pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) { mtx_unlock(&pcb->pcb_mtx); error = ENETDOWN; goto drop; } /* Check packet size agains outgoing (peer's incoming) MTU) */ if (m->m_pkthdr.len > pcb->omtu) { NG_BTSOCKET_L2CAP_ERR( "%s: Packet too big, len=%d, omtu=%d\n", __func__, m->m_pkthdr.len, pcb->omtu); mtx_unlock(&pcb->pcb_mtx); error = EMSGSIZE; goto drop; } /* * First put packet on socket send queue. Then check if we have * pending timeout. If we do not have timeout then we must send * packet and schedule timeout. Otherwise do nothing and wait for * L2CA_WRITE_RSP. */ sbappendrecord(&pcb->so->so_snd, m); m = NULL; if (!(pcb->flags & NG_BTSOCKET_L2CAP_TIMO)) { error = ng_btsocket_l2cap_send2(pcb); if (error == 0) ng_btsocket_l2cap_timeout(pcb); else sbdroprecord(&pcb->so->so_snd); /* XXX */ } mtx_unlock(&pcb->pcb_mtx); drop: NG_FREE_M(m); /* checks for != NULL */ NG_FREE_M(control); return (error); } /* ng_btsocket_l2cap_send */ /* * Send first packet in the socket queue to the L2CAP layer */ static int ng_btsocket_l2cap_send2(ng_btsocket_l2cap_pcb_p pcb) { struct mbuf *m = NULL; ng_l2cap_l2ca_hdr_t *hdr = NULL; int error = 0; mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->so->so_snd.sb_cc == 0) return (EINVAL); /* XXX */ m = m_dup(pcb->so->so_snd.sb_mb, M_DONTWAIT); if (m == NULL) return (ENOBUFS); /* Create L2CA packet header */ M_PREPEND(m, sizeof(*hdr), M_DONTWAIT); if (m != NULL) if (m->m_len < sizeof(*hdr)) m = m_pullup(m, sizeof(*hdr)); if (m == NULL) { NG_BTSOCKET_L2CAP_ERR( "%s: Failed to create L2CA packet header\n", __func__); return (ENOBUFS); } hdr = mtod(m, ng_l2cap_l2ca_hdr_t *); hdr->token = pcb->token; hdr->length = m->m_pkthdr.len - sizeof(*hdr); hdr->lcid = pcb->cid; NG_BTSOCKET_L2CAP_INFO( "%s: Sending packet: len=%d, length=%d, lcid=%d, token=%d, state=%d\n", __func__, m->m_pkthdr.len, hdr->length, hdr->lcid, hdr->token, pcb->state); /* * If we got here than we have successfuly creates new L2CAP * data packet and now we can send it to the L2CAP layer */ NG_SEND_DATA_ONLY(error, pcb->rt->hook, m); return (error); } /* ng_btsocket_l2cap_send2 */ /* * Get socket address */ int ng_btsocket_l2cap_sockaddr(struct socket *so, struct sockaddr **nam) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); struct sockaddr_l2cap sa; if (pcb == NULL) return (EINVAL); if (ng_btsocket_l2cap_node == NULL) return (EINVAL); bcopy(&pcb->src, &sa.l2cap_bdaddr, sizeof(sa.l2cap_bdaddr)); sa.l2cap_psm = htole16(pcb->psm); sa.l2cap_len = sizeof(sa); sa.l2cap_family = AF_BLUETOOTH; *nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT); return ((*nam == NULL)? ENOMEM : 0); } /* ng_btsocket_l2cap_sockaddr */ /***************************************************************************** ***************************************************************************** ** Misc. functions ***************************************************************************** *****************************************************************************/ /* * Look for the socket that listens on given PSM and bdaddr. Returns exact or * close match (if any). Caller must hold ng_btsocket_l2cap_sockets_mtx. */ static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_addr(bdaddr_p bdaddr, int psm) { ng_btsocket_l2cap_pcb_p p = NULL, p1 = NULL; mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED); LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next) { if (p->so == NULL || !(p->so->so_options & SO_ACCEPTCONN) || p->psm != psm) continue; if (bcmp(&p->src, bdaddr, sizeof(p->src)) == 0) break; if (bcmp(&p->src, NG_HCI_BDADDR_ANY, sizeof(p->src)) == 0) p1 = p; } return ((p != NULL)? p : p1); } /* ng_btsocket_l2cap_pcb_by_addr */ /* * Look for the socket that has given token. * Caller must hold ng_btsocket_l2cap_sockets_mtx. */ static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_token(u_int32_t token) { ng_btsocket_l2cap_pcb_p p = NULL; if (token == 0) return (NULL); mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED); LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next) if (p->token == token) break; return (p); } /* ng_btsocket_l2cap_pcb_by_token */ /* * Look for the socket that assigned to given source address and channel ID. * Caller must hold ng_btsocket_l2cap_sockets_mtx */ static ng_btsocket_l2cap_pcb_p ng_btsocket_l2cap_pcb_by_cid(bdaddr_p src, int cid) { ng_btsocket_l2cap_pcb_p p = NULL; mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED); LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next) if (p->cid == cid && bcmp(src, &p->src, sizeof(p->src)) == 0) break; return (p); } /* ng_btsocket_l2cap_pcb_by_cid */ /* * Set timeout on socket */ static void ng_btsocket_l2cap_timeout(ng_btsocket_l2cap_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (!(pcb->flags & NG_BTSOCKET_L2CAP_TIMO)) { pcb->flags |= NG_BTSOCKET_L2CAP_TIMO; pcb->timo = timeout(ng_btsocket_l2cap_process_timeout, pcb, bluetooth_l2cap_ertx_timeout()); } else KASSERT(0, ("%s: Duplicated socket timeout?!\n", __func__)); } /* ng_btsocket_l2cap_timeout */ /* * Unset timeout on socket */ static void ng_btsocket_l2cap_untimeout(ng_btsocket_l2cap_pcb_p pcb) { mtx_assert(&pcb->pcb_mtx, MA_OWNED); if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) { untimeout(ng_btsocket_l2cap_process_timeout, pcb, pcb->timo); pcb->flags &= ~NG_BTSOCKET_L2CAP_TIMO; } else KASSERT(0, ("%s: No socket timeout?!\n", __func__)); } /* ng_btsocket_l2cap_untimeout */ /* * Process timeout on socket */ static void ng_btsocket_l2cap_process_timeout(void *xpcb) { ng_btsocket_l2cap_pcb_p pcb = (ng_btsocket_l2cap_pcb_p) xpcb; mtx_lock(&pcb->pcb_mtx); pcb->flags &= ~NG_BTSOCKET_L2CAP_TIMO; pcb->so->so_error = ETIMEDOUT; switch (pcb->state) { case NG_BTSOCKET_L2CAP_CONNECTING: case NG_BTSOCKET_L2CAP_CONFIGURING: /* Send disconnect request with "zero" token */ if (pcb->cid != 0) ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb); /* ... and close the socket */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); break; case NG_BTSOCKET_L2CAP_OPEN: /* Send timeout - drop packet and wakeup sender */ sbdroprecord(&pcb->so->so_snd); sowwakeup(pcb->so); break; case NG_BTSOCKET_L2CAP_DISCONNECTING: /* Disconnect timeout - disconnect the socket anyway */ pcb->state = NG_BTSOCKET_L2CAP_CLOSED; soisdisconnected(pcb->so); break; default: NG_BTSOCKET_L2CAP_ERR( "%s: Invalid socket state=%d\n", __func__, pcb->state); break; } mtx_unlock(&pcb->pcb_mtx); } /* ng_btsocket_l2cap_process_timeout */ /* * Translate HCI/L2CAP error code into "errno" code * XXX Note: Some L2CAP and HCI error codes have the same value, but * different meaning */ static int ng_btsocket_l2cap_result2errno(int result) { switch (result) { case 0x00: /* No error */ return (0); case 0x01: /* Unknown HCI command */ return (ENODEV); case 0x02: /* No connection */ return (ENOTCONN); case 0x03: /* Hardware failure */ return (EIO); case 0x04: /* Page timeout */ return (EHOSTDOWN); case 0x05: /* Authentication failure */ case 0x06: /* Key missing */ case 0x18: /* Pairing not allowed */ case 0x21: /* Role change not allowed */ case 0x24: /* LMP PSU not allowed */ case 0x25: /* Encryption mode not acceptable */ case 0x26: /* Unit key used */ return (EACCES); case 0x07: /* Memory full */ return (ENOMEM); case 0x08: /* Connection timeout */ case 0x10: /* Host timeout */ case 0x22: /* LMP response timeout */ case 0xee: /* HCI timeout */ case 0xeeee: /* L2CAP timeout */ return (ETIMEDOUT); case 0x09: /* Max number of connections */ case 0x0a: /* Max number of SCO connections to a unit */ return (EMLINK); case 0x0b: /* ACL connection already exists */ return (EEXIST); case 0x0c: /* Command disallowed */ return (EBUSY); case 0x0d: /* Host rejected due to limited resources */ case 0x0e: /* Host rejected due to securiity reasons */ case 0x0f: /* Host rejected due to remote unit is a personal unit */ case 0x1b: /* SCO offset rejected */ case 0x1c: /* SCO interval rejected */ case 0x1d: /* SCO air mode rejected */ return (ECONNREFUSED); case 0x11: /* Unsupported feature or parameter value */ case 0x19: /* Unknown LMP PDU */ case 0x1a: /* Unsupported remote feature */ case 0x20: /* Unsupported LMP parameter value */ case 0x27: /* QoS is not supported */ case 0x29: /* Paring with unit key not supported */ return (EOPNOTSUPP); case 0x12: /* Invalid HCI command parameter */ case 0x1e: /* Invalid LMP parameters */ return (EINVAL); case 0x13: /* Other end terminated connection: User ended connection */ case 0x14: /* Other end terminated connection: Low resources */ case 0x15: /* Other end terminated connection: About to power off */ return (ECONNRESET); case 0x16: /* Connection terminated by local host */ return (ECONNABORTED); #if 0 /* XXX not yet */ case 0x17: /* Repeated attempts */ case 0x1f: /* Unspecified error */ case 0x23: /* LMP error transaction collision */ case 0x28: /* Instant passed */ #endif } return (ENOSYS); } /* ng_btsocket_l2cap_result2errno */ Index: head/sys/netgraph/ng_base.c =================================================================== --- head/sys/netgraph/ng_base.c (revision 170034) +++ head/sys/netgraph/ng_base.c (revision 170035) @@ -1,3777 +1,3778 @@ /* * ng_base.c */ /*- * Copyright (c) 1996-1999 Whistle Communications, Inc. * All rights reserved. * * Subject to the following obligations and disclaimer of warranty, use and * redistribution of this software, in source or object code forms, with or * without modifications are expressly permitted by Whistle Communications; * provided, however, that: * 1. Any and all reproductions of the source or object code must include the * copyright notice above and the following disclaimer of warranties; and * 2. No rights are granted, in any manner or form, to use Whistle * Communications, Inc. trademarks, including the mark "WHISTLE * COMMUNICATIONS" on advertising, endorsements, or otherwise except as * such appears in the above copyright notice or in the software. * * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE, * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE. * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * Authors: Julian Elischer * Archie Cobbs * * $FreeBSD$ * $Whistle: ng_base.c,v 1.39 1999/01/28 23:54:53 julian Exp $ */ /* * This file implements the base netgraph code. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(netgraph, NG_ABI_VERSION); /* List of all active nodes */ static LIST_HEAD(, ng_node) ng_nodelist; static struct mtx ng_nodelist_mtx; /* Mutex to protect topology events. */ static struct mtx ng_topo_mtx; #ifdef NETGRAPH_DEBUG static struct mtx ngq_mtx; /* protects the queue item list */ static SLIST_HEAD(, ng_node) ng_allnodes; static LIST_HEAD(, ng_node) ng_freenodes; /* in debug, we never free() them */ static SLIST_HEAD(, ng_hook) ng_allhooks; static LIST_HEAD(, ng_hook) ng_freehooks; /* in debug, we never free() them */ static void ng_dumpitems(void); static void ng_dumpnodes(void); static void ng_dumphooks(void); #endif /* NETGRAPH_DEBUG */ /* * DEAD versions of the structures. * In order to avoid races, it is sometimes neccesary to point * at SOMETHING even though theoretically, the current entity is * INVALID. Use these to avoid these races. */ struct ng_type ng_deadtype = { NG_ABI_VERSION, "dead", NULL, /* modevent */ NULL, /* constructor */ NULL, /* rcvmsg */ NULL, /* shutdown */ NULL, /* newhook */ NULL, /* findhook */ NULL, /* connect */ NULL, /* rcvdata */ NULL, /* disconnect */ NULL, /* cmdlist */ }; struct ng_node ng_deadnode = { "dead", &ng_deadtype, NGF_INVALID, 1, /* refs */ 0, /* numhooks */ NULL, /* private */ 0, /* ID */ LIST_HEAD_INITIALIZER(ng_deadnode.hooks), {}, /* all_nodes list entry */ {}, /* id hashtable list entry */ {}, /* workqueue entry */ { 0, {}, /* should never use! (should hang) */ NULL, &ng_deadnode.nd_input_queue.queue, &ng_deadnode }, #ifdef NETGRAPH_DEBUG ND_MAGIC, __FILE__, __LINE__, {NULL} #endif /* NETGRAPH_DEBUG */ }; struct ng_hook ng_deadhook = { "dead", NULL, /* private */ HK_INVALID | HK_DEAD, 1, /* refs always >= 1 */ 0, /* undefined data link type */ &ng_deadhook, /* Peer is self */ &ng_deadnode, /* attached to deadnode */ {}, /* hooks list */ NULL, /* override rcvmsg() */ NULL, /* override rcvdata() */ #ifdef NETGRAPH_DEBUG HK_MAGIC, __FILE__, __LINE__, {NULL} #endif /* NETGRAPH_DEBUG */ }; /* * END DEAD STRUCTURES */ /* List nodes with unallocated work */ static TAILQ_HEAD(, ng_node) ng_worklist = TAILQ_HEAD_INITIALIZER(ng_worklist); static struct mtx ng_worklist_mtx; /* MUST LOCK NODE FIRST */ /* List of installed types */ static LIST_HEAD(, ng_type) ng_typelist; static struct mtx ng_typelist_mtx; /* Hash related definitions */ /* XXX Don't need to initialise them because it's a LIST */ #define NG_ID_HASH_SIZE 32 /* most systems wont need even this many */ static LIST_HEAD(, ng_node) ng_ID_hash[NG_ID_HASH_SIZE]; static struct mtx ng_idhash_mtx; /* Method to find a node.. used twice so do it here */ #define NG_IDHASH_FN(ID) ((ID) % (NG_ID_HASH_SIZE)) #define NG_IDHASH_FIND(ID, node) \ do { \ mtx_assert(&ng_idhash_mtx, MA_OWNED); \ LIST_FOREACH(node, &ng_ID_hash[NG_IDHASH_FN(ID)], \ nd_idnodes) { \ if (NG_NODE_IS_VALID(node) \ && (NG_NODE_ID(node) == ID)) { \ break; \ } \ } \ } while (0) /* Internal functions */ static int ng_add_hook(node_p node, const char *name, hook_p * hookp); static int ng_generic_msg(node_p here, item_p item, hook_p lasthook); static ng_ID_t ng_decodeidname(const char *name); static int ngb_mod_event(module_t mod, int event, void *data); static void ng_worklist_remove(node_p node); static void ngintr(void); static void ng_apply_item(node_p node, item_p item, int rw); static void ng_flush_input_queue(struct ng_queue * ngq); static void ng_setisr(node_p node); static node_p ng_ID2noderef(ng_ID_t ID); static int ng_con_nodes(node_p node, const char *name, node_p node2, const char *name2); static void ng_con_part2(node_p node, hook_p hook, void *arg1, int arg2); static void ng_con_part3(node_p node, hook_p hook, void *arg1, int arg2); static int ng_mkpeer(node_p node, const char *name, const char *name2, char *type); /* Imported, these used to be externally visible, some may go back. */ void ng_destroy_hook(hook_p hook); node_p ng_name2noderef(node_p node, const char *name); int ng_path2noderef(node_p here, const char *path, node_p *dest, hook_p *lasthook); int ng_make_node(const char *type, node_p *nodepp); int ng_path_parse(char *addr, char **node, char **path, char **hook); void ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3); void ng_unname(node_p node); /* Our own netgraph malloc type */ MALLOC_DEFINE(M_NETGRAPH, "netgraph", "netgraph structures and ctrl messages"); MALLOC_DEFINE(M_NETGRAPH_HOOK, "netgraph_hook", "netgraph hook structures"); MALLOC_DEFINE(M_NETGRAPH_NODE, "netgraph_node", "netgraph node structures"); MALLOC_DEFINE(M_NETGRAPH_ITEM, "netgraph_item", "netgraph item structures"); MALLOC_DEFINE(M_NETGRAPH_MSG, "netgraph_msg", "netgraph name storage"); /* Should not be visible outside this file */ #define _NG_ALLOC_HOOK(hook) \ MALLOC(hook, hook_p, sizeof(*hook), M_NETGRAPH_HOOK, M_NOWAIT | M_ZERO) #define _NG_ALLOC_NODE(node) \ MALLOC(node, node_p, sizeof(*node), M_NETGRAPH_NODE, M_NOWAIT | M_ZERO) #define NG_QUEUE_LOCK_INIT(n) \ mtx_init(&(n)->q_mtx, "ng_node", NULL, MTX_DEF) #define NG_QUEUE_LOCK(n) \ mtx_lock(&(n)->q_mtx) #define NG_QUEUE_UNLOCK(n) \ mtx_unlock(&(n)->q_mtx) #define NG_WORKLIST_LOCK_INIT() \ mtx_init(&ng_worklist_mtx, "ng_worklist", NULL, MTX_DEF) #define NG_WORKLIST_LOCK() \ mtx_lock(&ng_worklist_mtx) #define NG_WORKLIST_UNLOCK() \ mtx_unlock(&ng_worklist_mtx) #ifdef NETGRAPH_DEBUG /*----------------------------------------------*/ /* * In debug mode: * In an attempt to help track reference count screwups * we do not free objects back to the malloc system, but keep them * in a local cache where we can examine them and keep information safely * after they have been freed. * We use this scheme for nodes and hooks, and to some extent for items. */ static __inline hook_p ng_alloc_hook(void) { hook_p hook; SLIST_ENTRY(ng_hook) temp; mtx_lock(&ng_nodelist_mtx); hook = LIST_FIRST(&ng_freehooks); if (hook) { LIST_REMOVE(hook, hk_hooks); bcopy(&hook->hk_all, &temp, sizeof(temp)); bzero(hook, sizeof(struct ng_hook)); bcopy(&temp, &hook->hk_all, sizeof(temp)); mtx_unlock(&ng_nodelist_mtx); hook->hk_magic = HK_MAGIC; } else { mtx_unlock(&ng_nodelist_mtx); _NG_ALLOC_HOOK(hook); if (hook) { hook->hk_magic = HK_MAGIC; mtx_lock(&ng_nodelist_mtx); SLIST_INSERT_HEAD(&ng_allhooks, hook, hk_all); mtx_unlock(&ng_nodelist_mtx); } } return (hook); } static __inline node_p ng_alloc_node(void) { node_p node; SLIST_ENTRY(ng_node) temp; mtx_lock(&ng_nodelist_mtx); node = LIST_FIRST(&ng_freenodes); if (node) { LIST_REMOVE(node, nd_nodes); bcopy(&node->nd_all, &temp, sizeof(temp)); bzero(node, sizeof(struct ng_node)); bcopy(&temp, &node->nd_all, sizeof(temp)); mtx_unlock(&ng_nodelist_mtx); node->nd_magic = ND_MAGIC; } else { mtx_unlock(&ng_nodelist_mtx); _NG_ALLOC_NODE(node); if (node) { node->nd_magic = ND_MAGIC; mtx_lock(&ng_nodelist_mtx); SLIST_INSERT_HEAD(&ng_allnodes, node, nd_all); mtx_unlock(&ng_nodelist_mtx); } } return (node); } #define NG_ALLOC_HOOK(hook) do { (hook) = ng_alloc_hook(); } while (0) #define NG_ALLOC_NODE(node) do { (node) = ng_alloc_node(); } while (0) #define NG_FREE_HOOK(hook) \ do { \ mtx_lock(&ng_nodelist_mtx); \ LIST_INSERT_HEAD(&ng_freehooks, hook, hk_hooks); \ hook->hk_magic = 0; \ mtx_unlock(&ng_nodelist_mtx); \ } while (0) #define NG_FREE_NODE(node) \ do { \ mtx_lock(&ng_nodelist_mtx); \ LIST_INSERT_HEAD(&ng_freenodes, node, nd_nodes); \ node->nd_magic = 0; \ mtx_unlock(&ng_nodelist_mtx); \ } while (0) #else /* NETGRAPH_DEBUG */ /*----------------------------------------------*/ #define NG_ALLOC_HOOK(hook) _NG_ALLOC_HOOK(hook) #define NG_ALLOC_NODE(node) _NG_ALLOC_NODE(node) #define NG_FREE_HOOK(hook) do { FREE((hook), M_NETGRAPH_HOOK); } while (0) #define NG_FREE_NODE(node) do { FREE((node), M_NETGRAPH_NODE); } while (0) #endif /* NETGRAPH_DEBUG */ /*----------------------------------------------*/ /* Set this to kdb_enter("X") to catch all errors as they occur */ #ifndef TRAP_ERROR #define TRAP_ERROR() #endif static ng_ID_t nextID = 1; #ifdef INVARIANTS #define CHECK_DATA_MBUF(m) do { \ struct mbuf *n; \ int total; \ \ M_ASSERTPKTHDR(m); \ for (total = 0, n = (m); n != NULL; n = n->m_next) { \ total += n->m_len; \ if (n->m_nextpkt != NULL) \ panic("%s: m_nextpkt", __func__); \ } \ \ if ((m)->m_pkthdr.len != total) { \ panic("%s: %d != %d", \ __func__, (m)->m_pkthdr.len, total); \ } \ } while (0) #else #define CHECK_DATA_MBUF(m) #endif /************************************************************************ Parse type definitions for generic messages ************************************************************************/ /* Handy structure parse type defining macro */ #define DEFINE_PARSE_STRUCT_TYPE(lo, up, args) \ static const struct ng_parse_struct_field \ ng_ ## lo ## _type_fields[] = NG_GENERIC_ ## up ## _INFO args; \ static const struct ng_parse_type ng_generic_ ## lo ## _type = { \ &ng_parse_struct_type, \ &ng_ ## lo ## _type_fields \ } DEFINE_PARSE_STRUCT_TYPE(mkpeer, MKPEER, ()); DEFINE_PARSE_STRUCT_TYPE(connect, CONNECT, ()); DEFINE_PARSE_STRUCT_TYPE(name, NAME, ()); DEFINE_PARSE_STRUCT_TYPE(rmhook, RMHOOK, ()); DEFINE_PARSE_STRUCT_TYPE(nodeinfo, NODEINFO, ()); DEFINE_PARSE_STRUCT_TYPE(typeinfo, TYPEINFO, ()); DEFINE_PARSE_STRUCT_TYPE(linkinfo, LINKINFO, (&ng_generic_nodeinfo_type)); /* Get length of an array when the length is stored as a 32 bit value immediately preceding the array -- as with struct namelist and struct typelist. */ static int ng_generic_list_getLength(const struct ng_parse_type *type, const u_char *start, const u_char *buf) { return *((const u_int32_t *)(buf - 4)); } /* Get length of the array of struct linkinfo inside a struct hooklist */ static int ng_generic_linkinfo_getLength(const struct ng_parse_type *type, const u_char *start, const u_char *buf) { const struct hooklist *hl = (const struct hooklist *)start; return hl->nodeinfo.hooks; } /* Array type for a variable length array of struct namelist */ static const struct ng_parse_array_info ng_nodeinfoarray_type_info = { &ng_generic_nodeinfo_type, &ng_generic_list_getLength }; static const struct ng_parse_type ng_generic_nodeinfoarray_type = { &ng_parse_array_type, &ng_nodeinfoarray_type_info }; /* Array type for a variable length array of struct typelist */ static const struct ng_parse_array_info ng_typeinfoarray_type_info = { &ng_generic_typeinfo_type, &ng_generic_list_getLength }; static const struct ng_parse_type ng_generic_typeinfoarray_type = { &ng_parse_array_type, &ng_typeinfoarray_type_info }; /* Array type for array of struct linkinfo in struct hooklist */ static const struct ng_parse_array_info ng_generic_linkinfo_array_type_info = { &ng_generic_linkinfo_type, &ng_generic_linkinfo_getLength }; static const struct ng_parse_type ng_generic_linkinfo_array_type = { &ng_parse_array_type, &ng_generic_linkinfo_array_type_info }; DEFINE_PARSE_STRUCT_TYPE(typelist, TYPELIST, (&ng_generic_nodeinfoarray_type)); DEFINE_PARSE_STRUCT_TYPE(hooklist, HOOKLIST, (&ng_generic_nodeinfo_type, &ng_generic_linkinfo_array_type)); DEFINE_PARSE_STRUCT_TYPE(listnodes, LISTNODES, (&ng_generic_nodeinfoarray_type)); /* List of commands and how to convert arguments to/from ASCII */ static const struct ng_cmdlist ng_generic_cmds[] = { { NGM_GENERIC_COOKIE, NGM_SHUTDOWN, "shutdown", NULL, NULL }, { NGM_GENERIC_COOKIE, NGM_MKPEER, "mkpeer", &ng_generic_mkpeer_type, NULL }, { NGM_GENERIC_COOKIE, NGM_CONNECT, "connect", &ng_generic_connect_type, NULL }, { NGM_GENERIC_COOKIE, NGM_NAME, "name", &ng_generic_name_type, NULL }, { NGM_GENERIC_COOKIE, NGM_RMHOOK, "rmhook", &ng_generic_rmhook_type, NULL }, { NGM_GENERIC_COOKIE, NGM_NODEINFO, "nodeinfo", NULL, &ng_generic_nodeinfo_type }, { NGM_GENERIC_COOKIE, NGM_LISTHOOKS, "listhooks", NULL, &ng_generic_hooklist_type }, { NGM_GENERIC_COOKIE, NGM_LISTNAMES, "listnames", NULL, &ng_generic_listnodes_type /* same as NGM_LISTNODES */ }, { NGM_GENERIC_COOKIE, NGM_LISTNODES, "listnodes", NULL, &ng_generic_listnodes_type }, { NGM_GENERIC_COOKIE, NGM_LISTTYPES, "listtypes", NULL, &ng_generic_typeinfo_type }, { NGM_GENERIC_COOKIE, NGM_TEXT_CONFIG, "textconfig", NULL, &ng_parse_string_type }, { NGM_GENERIC_COOKIE, NGM_TEXT_STATUS, "textstatus", NULL, &ng_parse_string_type }, { NGM_GENERIC_COOKIE, NGM_ASCII2BINARY, "ascii2binary", &ng_parse_ng_mesg_type, &ng_parse_ng_mesg_type }, { NGM_GENERIC_COOKIE, NGM_BINARY2ASCII, "binary2ascii", &ng_parse_ng_mesg_type, &ng_parse_ng_mesg_type }, { 0 } }; /************************************************************************ Node routines ************************************************************************/ /* * Instantiate a node of the requested type */ int ng_make_node(const char *typename, node_p *nodepp) { struct ng_type *type; int error; /* Check that the type makes sense */ if (typename == NULL) { TRAP_ERROR(); return (EINVAL); } /* Locate the node type. If we fail we return. Do not try to load * module. */ if ((type = ng_findtype(typename)) == NULL) return (ENXIO); /* * If we have a constructor, then make the node and * call the constructor to do type specific initialisation. */ if (type->constructor != NULL) { if ((error = ng_make_node_common(type, nodepp)) == 0) { if ((error = ((*type->constructor)(*nodepp)) != 0)) { NG_NODE_UNREF(*nodepp); } } } else { /* * Node has no constructor. We cannot ask for one * to be made. It must be brought into existence by * some external agency. The external agency should * call ng_make_node_common() directly to get the * netgraph part initialised. */ TRAP_ERROR(); error = EINVAL; } return (error); } /* * Generic node creation. Called by node initialisation for externally * instantiated nodes (e.g. hardware, sockets, etc ). * The returned node has a reference count of 1. */ int ng_make_node_common(struct ng_type *type, node_p *nodepp) { node_p node; /* Require the node type to have been already installed */ if (ng_findtype(type->name) == NULL) { TRAP_ERROR(); return (EINVAL); } /* Make a node and try attach it to the type */ NG_ALLOC_NODE(node); if (node == NULL) { TRAP_ERROR(); return (ENOMEM); } node->nd_type = type; NG_NODE_REF(node); /* note reference */ type->refs++; NG_QUEUE_LOCK_INIT(&node->nd_input_queue); node->nd_input_queue.queue = NULL; node->nd_input_queue.last = &node->nd_input_queue.queue; node->nd_input_queue.q_flags = 0; node->nd_input_queue.q_node = node; /* Initialize hook list for new node */ LIST_INIT(&node->nd_hooks); /* Link us into the node linked list */ mtx_lock(&ng_nodelist_mtx); LIST_INSERT_HEAD(&ng_nodelist, node, nd_nodes); mtx_unlock(&ng_nodelist_mtx); /* get an ID and put us in the hash chain */ mtx_lock(&ng_idhash_mtx); for (;;) { /* wrap protection, even if silly */ node_p node2 = NULL; node->nd_ID = nextID++; /* 137/second for 1 year before wrap */ /* Is there a problem with the new number? */ NG_IDHASH_FIND(node->nd_ID, node2); /* already taken? */ if ((node->nd_ID != 0) && (node2 == NULL)) { break; } } LIST_INSERT_HEAD(&ng_ID_hash[NG_IDHASH_FN(node->nd_ID)], node, nd_idnodes); mtx_unlock(&ng_idhash_mtx); /* Done */ *nodepp = node; return (0); } /* * Forceably start the shutdown process on a node. Either call * its shutdown method, or do the default shutdown if there is * no type-specific method. * * We can only be called from a shutdown message, so we know we have * a writer lock, and therefore exclusive access. It also means * that we should not be on the work queue, but we check anyhow. * * Persistent node types must have a type-specific method which * allocates a new node in which case, this one is irretrievably going away, * or cleans up anything it needs, and just makes the node valid again, * in which case we allow the node to survive. * * XXX We need to think of how to tell a persistent node that we * REALLY need to go away because the hardware has gone or we * are rebooting.... etc. */ void ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3) { hook_p hook; /* Check if it's already shutting down */ if ((node->nd_flags & NGF_CLOSING) != 0) return; if (node == &ng_deadnode) { printf ("shutdown called on deadnode\n"); return; } /* Add an extra reference so it doesn't go away during this */ NG_NODE_REF(node); /* * Mark it invalid so any newcomers know not to try use it * Also add our own mark so we can't recurse * note that NGF_INVALID does not do this as it's also set during * creation */ node->nd_flags |= NGF_INVALID|NGF_CLOSING; /* If node has its pre-shutdown method, then call it first*/ if (node->nd_type && node->nd_type->close) (*node->nd_type->close)(node); /* Notify all remaining connected nodes to disconnect */ while ((hook = LIST_FIRST(&node->nd_hooks)) != NULL) ng_destroy_hook(hook); /* * Drain the input queue forceably. * it has no hooks so what's it going to do, bleed on someone? * Theoretically we came here from a queue entry that was added * Just before the queue was closed, so it should be empty anyway. * Also removes us from worklist if needed. */ ng_flush_input_queue(&node->nd_input_queue); /* Ask the type if it has anything to do in this case */ if (node->nd_type && node->nd_type->shutdown) { (*node->nd_type->shutdown)(node); if (NG_NODE_IS_VALID(node)) { /* * Well, blow me down if the node code hasn't declared * that it doesn't want to die. * Presumably it is a persistant node. * If we REALLY want it to go away, * e.g. hardware going away, * Our caller should set NGF_REALLY_DIE in nd_flags. */ node->nd_flags &= ~(NGF_INVALID|NGF_CLOSING); NG_NODE_UNREF(node); /* Assume they still have theirs */ return; } } else { /* do the default thing */ NG_NODE_UNREF(node); } ng_unname(node); /* basically a NOP these days */ /* * Remove extra reference, possibly the last * Possible other holders of references may include * timeout callouts, but theoretically the node's supposed to * have cancelled them. Possibly hardware dependencies may * force a driver to 'linger' with a reference. */ NG_NODE_UNREF(node); } /* * Remove a reference to the node, possibly the last. * deadnode always acts as it it were the last. */ int ng_unref_node(node_p node) { int v; if (node == &ng_deadnode) { return (0); } do { v = node->nd_refs - 1; } while (! atomic_cmpset_int(&node->nd_refs, v + 1, v)); if (v == 0) { /* we were the last */ mtx_lock(&ng_nodelist_mtx); node->nd_type->refs--; /* XXX maybe should get types lock? */ LIST_REMOVE(node, nd_nodes); mtx_unlock(&ng_nodelist_mtx); mtx_lock(&ng_idhash_mtx); LIST_REMOVE(node, nd_idnodes); mtx_unlock(&ng_idhash_mtx); mtx_destroy(&node->nd_input_queue.q_mtx); NG_FREE_NODE(node); } return (v); } /************************************************************************ Node ID handling ************************************************************************/ static node_p ng_ID2noderef(ng_ID_t ID) { node_p node; mtx_lock(&ng_idhash_mtx); NG_IDHASH_FIND(ID, node); if(node) NG_NODE_REF(node); mtx_unlock(&ng_idhash_mtx); return(node); } ng_ID_t ng_node2ID(node_p node) { return (node ? NG_NODE_ID(node) : 0); } /************************************************************************ Node name handling ************************************************************************/ /* * Assign a node a name. Once assigned, the name cannot be changed. */ int ng_name_node(node_p node, const char *name) { int i; node_p node2; /* Check the name is valid */ for (i = 0; i < NG_NODESIZ; i++) { if (name[i] == '\0' || name[i] == '.' || name[i] == ':') break; } if (i == 0 || name[i] != '\0') { TRAP_ERROR(); return (EINVAL); } if (ng_decodeidname(name) != 0) { /* valid IDs not allowed here */ TRAP_ERROR(); return (EINVAL); } /* Check the name isn't already being used */ if ((node2 = ng_name2noderef(node, name)) != NULL) { NG_NODE_UNREF(node2); TRAP_ERROR(); return (EADDRINUSE); } /* copy it */ strlcpy(NG_NODE_NAME(node), name, NG_NODESIZ); return (0); } /* * Find a node by absolute name. The name should NOT end with ':' * The name "." means "this node" and "[xxx]" means "the node * with ID (ie, at address) xxx". * * Returns the node if found, else NULL. * Eventually should add something faster than a sequential search. - * Note it aquires a reference on the node so you can be sure it's still there. + * Note it acquires a reference on the node so you can be sure it's still + * there. */ node_p ng_name2noderef(node_p here, const char *name) { node_p node; ng_ID_t temp; /* "." means "this node" */ if (strcmp(name, ".") == 0) { NG_NODE_REF(here); return(here); } /* Check for name-by-ID */ if ((temp = ng_decodeidname(name)) != 0) { return (ng_ID2noderef(temp)); } /* Find node by name */ mtx_lock(&ng_nodelist_mtx); LIST_FOREACH(node, &ng_nodelist, nd_nodes) { if (NG_NODE_IS_VALID(node) && NG_NODE_HAS_NAME(node) && (strcmp(NG_NODE_NAME(node), name) == 0)) { break; } } if (node) NG_NODE_REF(node); mtx_unlock(&ng_nodelist_mtx); return (node); } /* * Decode an ID name, eg. "[f03034de]". Returns 0 if the * string is not valid, otherwise returns the value. */ static ng_ID_t ng_decodeidname(const char *name) { const int len = strlen(name); char *eptr; u_long val; /* Check for proper length, brackets, no leading junk */ if ((len < 3) || (name[0] != '[') || (name[len - 1] != ']') || (!isxdigit(name[1]))) { return ((ng_ID_t)0); } /* Decode number */ val = strtoul(name + 1, &eptr, 16); if ((eptr - name != len - 1) || (val == ULONG_MAX) || (val == 0)) { return ((ng_ID_t)0); } return (ng_ID_t)val; } /* * Remove a name from a node. This should only be called * when shutting down and removing the node. * IF we allow name changing this may be more resurrected. */ void ng_unname(node_p node) { } /************************************************************************ Hook routines Names are not optional. Hooks are always connected, except for a brief moment within these routines. On invalidation or during creation they are connected to the 'dead' hook. ************************************************************************/ /* * Remove a hook reference */ void ng_unref_hook(hook_p hook) { int v; if (hook == &ng_deadhook) { return; } do { v = hook->hk_refs; } while (! atomic_cmpset_int(&hook->hk_refs, v, v - 1)); if (v == 1) { /* we were the last */ if (_NG_HOOK_NODE(hook)) { /* it'll probably be ng_deadnode */ _NG_NODE_UNREF((_NG_HOOK_NODE(hook))); hook->hk_node = NULL; } NG_FREE_HOOK(hook); } } /* * Add an unconnected hook to a node. Only used internally. * Assumes node is locked. (XXX not yet true ) */ static int ng_add_hook(node_p node, const char *name, hook_p *hookp) { hook_p hook; int error = 0; /* Check that the given name is good */ if (name == NULL) { TRAP_ERROR(); return (EINVAL); } if (ng_findhook(node, name) != NULL) { TRAP_ERROR(); return (EEXIST); } /* Allocate the hook and link it up */ NG_ALLOC_HOOK(hook); if (hook == NULL) { TRAP_ERROR(); return (ENOMEM); } hook->hk_refs = 1; /* add a reference for us to return */ hook->hk_flags = HK_INVALID; hook->hk_peer = &ng_deadhook; /* start off this way */ hook->hk_node = node; NG_NODE_REF(node); /* each hook counts as a reference */ /* Set hook name */ strlcpy(NG_HOOK_NAME(hook), name, NG_HOOKSIZ); /* * Check if the node type code has something to say about it * If it fails, the unref of the hook will also unref the node. */ if (node->nd_type->newhook != NULL) { if ((error = (*node->nd_type->newhook)(node, hook, name))) { NG_HOOK_UNREF(hook); /* this frees the hook */ return (error); } } /* * The 'type' agrees so far, so go ahead and link it in. * We'll ask again later when we actually connect the hooks. */ LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks); node->nd_numhooks++; NG_HOOK_REF(hook); /* one for the node */ if (hookp) *hookp = hook; return (0); } /* * Find a hook * * Node types may supply their own optimized routines for finding * hooks. If none is supplied, we just do a linear search. * XXX Possibly we should add a reference to the hook? */ hook_p ng_findhook(node_p node, const char *name) { hook_p hook; if (node->nd_type->findhook != NULL) return (*node->nd_type->findhook)(node, name); LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) { if (NG_HOOK_IS_VALID(hook) && (strcmp(NG_HOOK_NAME(hook), name) == 0)) return (hook); } return (NULL); } /* * Destroy a hook * * As hooks are always attached, this really destroys two hooks. * The one given, and the one attached to it. Disconnect the hooks * from each other first. We reconnect the peer hook to the 'dead' * hook so that it can still exist after we depart. We then * send the peer its own destroy message. This ensures that we only * interact with the peer's structures when it is locked processing that * message. We hold a reference to the peer hook so we are guaranteed that * the peer hook and node are still going to exist until * we are finished there as the hook holds a ref on the node. * We run this same code again on the peer hook, but that time it is already * attached to the 'dead' hook. * * This routine is called at all stages of hook creation * on error detection and must be able to handle any such stage. */ void ng_destroy_hook(hook_p hook) { hook_p peer; node_p node; if (hook == &ng_deadhook) { /* better safe than sorry */ printf("ng_destroy_hook called on deadhook\n"); return; } /* * Protect divorce process with mutex, to avoid races on * simultaneous disconnect. */ mtx_lock(&ng_topo_mtx); hook->hk_flags |= HK_INVALID; peer = NG_HOOK_PEER(hook); node = NG_HOOK_NODE(hook); if (peer && (peer != &ng_deadhook)) { /* * Set the peer to point to ng_deadhook * from this moment on we are effectively independent it. * send it an rmhook message of it's own. */ peer->hk_peer = &ng_deadhook; /* They no longer know us */ hook->hk_peer = &ng_deadhook; /* Nor us, them */ if (NG_HOOK_NODE(peer) == &ng_deadnode) { /* * If it's already divorced from a node, * just free it. */ mtx_unlock(&ng_topo_mtx); } else { mtx_unlock(&ng_topo_mtx); ng_rmhook_self(peer); /* Send it a surprise */ } NG_HOOK_UNREF(peer); /* account for peer link */ NG_HOOK_UNREF(hook); /* account for peer link */ } else mtx_unlock(&ng_topo_mtx); mtx_assert(&ng_topo_mtx, MA_NOTOWNED); /* * Remove the hook from the node's list to avoid possible recursion * in case the disconnection results in node shutdown. */ if (node == &ng_deadnode) { /* happens if called from ng_con_nodes() */ return; } LIST_REMOVE(hook, hk_hooks); node->nd_numhooks--; if (node->nd_type->disconnect) { /* * The type handler may elect to destroy the node so don't * trust its existence after this point. (except * that we still hold a reference on it. (which we * inherrited from the hook we are destroying) */ (*node->nd_type->disconnect) (hook); } /* * Note that because we will point to ng_deadnode, the original node * is not decremented automatically so we do that manually. */ _NG_HOOK_NODE(hook) = &ng_deadnode; NG_NODE_UNREF(node); /* We no longer point to it so adjust count */ NG_HOOK_UNREF(hook); /* Account for linkage (in list) to node */ } /* * Take two hooks on a node and merge the connection so that the given node * is effectively bypassed. */ int ng_bypass(hook_p hook1, hook_p hook2) { if (hook1->hk_node != hook2->hk_node) { TRAP_ERROR(); return (EINVAL); } hook1->hk_peer->hk_peer = hook2->hk_peer; hook2->hk_peer->hk_peer = hook1->hk_peer; hook1->hk_peer = &ng_deadhook; hook2->hk_peer = &ng_deadhook; NG_HOOK_UNREF(hook1); NG_HOOK_UNREF(hook2); /* XXX If we ever cache methods on hooks update them as well */ ng_destroy_hook(hook1); ng_destroy_hook(hook2); return (0); } /* * Install a new netgraph type */ int ng_newtype(struct ng_type *tp) { const size_t namelen = strlen(tp->name); /* Check version and type name fields */ if ((tp->version != NG_ABI_VERSION) || (namelen == 0) || (namelen >= NG_TYPESIZ)) { TRAP_ERROR(); if (tp->version != NG_ABI_VERSION) { printf("Netgraph: Node type rejected. ABI mismatch. Suggest recompile\n"); } return (EINVAL); } /* Check for name collision */ if (ng_findtype(tp->name) != NULL) { TRAP_ERROR(); return (EEXIST); } /* Link in new type */ mtx_lock(&ng_typelist_mtx); LIST_INSERT_HEAD(&ng_typelist, tp, types); tp->refs = 1; /* first ref is linked list */ mtx_unlock(&ng_typelist_mtx); return (0); } /* * unlink a netgraph type * If no examples exist */ int ng_rmtype(struct ng_type *tp) { /* Check for name collision */ if (tp->refs != 1) { TRAP_ERROR(); return (EBUSY); } /* Unlink type */ mtx_lock(&ng_typelist_mtx); LIST_REMOVE(tp, types); mtx_unlock(&ng_typelist_mtx); return (0); } /* * Look for a type of the name given */ struct ng_type * ng_findtype(const char *typename) { struct ng_type *type; mtx_lock(&ng_typelist_mtx); LIST_FOREACH(type, &ng_typelist, types) { if (strcmp(type->name, typename) == 0) break; } mtx_unlock(&ng_typelist_mtx); return (type); } /************************************************************************ Composite routines ************************************************************************/ /* * Connect two nodes using the specified hooks, using queued functions. */ static void ng_con_part3(node_p node, hook_p hook, void *arg1, int arg2) { /* * When we run, we know that the node 'node' is locked for us. * Our caller has a reference on the hook. * Our caller has a reference on the node. * (In this case our caller is ng_apply_item() ). * The peer hook has a reference on the hook. * We are all set up except for the final call to the node, and * the clearing of the INVALID flag. */ if (NG_HOOK_NODE(hook) == &ng_deadnode) { /* * The node must have been freed again since we last visited * here. ng_destry_hook() has this effect but nothing else does. * We should just release our references and * free anything we can think of. * Since we know it's been destroyed, and it's our caller * that holds the references, just return. */ return ; } if (hook->hk_node->nd_type->connect) { if ((*hook->hk_node->nd_type->connect) (hook)) { ng_destroy_hook(hook); /* also zaps peer */ printf("failed in ng_con_part3()\n"); return ; } } /* * XXX this is wrong for SMP. Possibly we need * to separate out 'create' and 'invalid' flags. * should only set flags on hooks we have locked under our node. */ hook->hk_flags &= ~HK_INVALID; return ; } static void ng_con_part2(node_p node, hook_p hook, void *arg1, int arg2) { hook_p peer; /* * When we run, we know that the node 'node' is locked for us. * Our caller has a reference on the hook. * Our caller has a reference on the node. * (In this case our caller is ng_apply_item() ). * The peer hook has a reference on the hook. * our node pointer points to the 'dead' node. * First check the hook name is unique. * Should not happen because we checked before queueing this. */ if (ng_findhook(node, NG_HOOK_NAME(hook)) != NULL) { TRAP_ERROR(); ng_destroy_hook(hook); /* should destroy peer too */ printf("failed in ng_con_part2()\n"); return ; } /* * Check if the node type code has something to say about it * If it fails, the unref of the hook will also unref the attached node, * however since that node is 'ng_deadnode' this will do nothing. * The peer hook will also be destroyed. */ if (node->nd_type->newhook != NULL) { if ((*node->nd_type->newhook)(node, hook, hook->hk_name)) { ng_destroy_hook(hook); /* should destroy peer too */ printf("failed in ng_con_part2()\n"); return ; } } /* * The 'type' agrees so far, so go ahead and link it in. * We'll ask again later when we actually connect the hooks. */ hook->hk_node = node; /* just overwrite ng_deadnode */ NG_NODE_REF(node); /* each hook counts as a reference */ LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks); node->nd_numhooks++; NG_HOOK_REF(hook); /* one for the node */ /* * We now have a symmetrical situation, where both hooks have been * linked to their nodes, the newhook methods have been called * And the references are all correct. The hooks are still marked * as invalid, as we have not called the 'connect' methods * yet. * We can call the local one immediately as we have the * node locked, but we need to queue the remote one. */ if (hook->hk_node->nd_type->connect) { if ((*hook->hk_node->nd_type->connect) (hook)) { ng_destroy_hook(hook); /* also zaps peer */ printf("failed in ng_con_part2(A)\n"); return ; } } /* * Acquire topo mutex to avoid race with ng_destroy_hook(). */ mtx_lock(&ng_topo_mtx); peer = hook->hk_peer; if (peer == &ng_deadhook) { mtx_unlock(&ng_topo_mtx); printf("failed in ng_con_part2(B)\n"); ng_destroy_hook(hook); return ; } mtx_unlock(&ng_topo_mtx); if (ng_send_fn(peer->hk_node, peer, &ng_con_part3, arg1, arg2)) { printf("failed in ng_con_part2(C)\n"); ng_destroy_hook(hook); /* also zaps peer */ return ; } hook->hk_flags &= ~HK_INVALID; /* need both to be able to work */ return ; } /* * Connect this node with another node. We assume that this node is * currently locked, as we are only called from an NGM_CONNECT message. */ static int ng_con_nodes(node_p node, const char *name, node_p node2, const char *name2) { int error; hook_p hook; hook_p hook2; if (ng_findhook(node2, name2) != NULL) { return(EEXIST); } if ((error = ng_add_hook(node, name, &hook))) /* gives us a ref */ return (error); /* Allocate the other hook and link it up */ NG_ALLOC_HOOK(hook2); if (hook2 == NULL) { TRAP_ERROR(); ng_destroy_hook(hook); /* XXX check ref counts so far */ NG_HOOK_UNREF(hook); /* including our ref */ return (ENOMEM); } hook2->hk_refs = 1; /* start with a reference for us. */ hook2->hk_flags = HK_INVALID; hook2->hk_peer = hook; /* Link the two together */ hook->hk_peer = hook2; NG_HOOK_REF(hook); /* Add a ref for the peer to each*/ NG_HOOK_REF(hook2); hook2->hk_node = &ng_deadnode; strlcpy(NG_HOOK_NAME(hook2), name2, NG_HOOKSIZ); /* * Queue the function above. * Procesing continues in that function in the lock context of * the other node. */ ng_send_fn(node2, hook2, &ng_con_part2, NULL, 0); NG_HOOK_UNREF(hook); /* Let each hook go if it wants to */ NG_HOOK_UNREF(hook2); return (0); } /* * Make a peer and connect. * We assume that the local node is locked. * The new node probably doesn't need a lock until * it has a hook, because it cannot really have any work until then, * but we should think about it a bit more. * * The problem may come if the other node also fires up * some hardware or a timer or some other source of activation, * also it may already get a command msg via it's ID. * * We could use the same method as ng_con_nodes() but we'd have * to add ability to remove the node when failing. (Not hard, just * make arg1 point to the node to remove). * Unless of course we just ignore failure to connect and leave * an unconnected node? */ static int ng_mkpeer(node_p node, const char *name, const char *name2, char *type) { node_p node2; hook_p hook1, hook2; int error; if ((error = ng_make_node(type, &node2))) { return (error); } if ((error = ng_add_hook(node, name, &hook1))) { /* gives us a ref */ ng_rmnode(node2, NULL, NULL, 0); return (error); } if ((error = ng_add_hook(node2, name2, &hook2))) { ng_rmnode(node2, NULL, NULL, 0); ng_destroy_hook(hook1); NG_HOOK_UNREF(hook1); return (error); } /* * Actually link the two hooks together. */ hook1->hk_peer = hook2; hook2->hk_peer = hook1; /* Each hook is referenced by the other */ NG_HOOK_REF(hook1); NG_HOOK_REF(hook2); /* Give each node the opportunity to veto the pending connection */ if (hook1->hk_node->nd_type->connect) { error = (*hook1->hk_node->nd_type->connect) (hook1); } if ((error == 0) && hook2->hk_node->nd_type->connect) { error = (*hook2->hk_node->nd_type->connect) (hook2); } /* * drop the references we were holding on the two hooks. */ if (error) { ng_destroy_hook(hook2); /* also zaps hook1 */ ng_rmnode(node2, NULL, NULL, 0); } else { /* As a last act, allow the hooks to be used */ hook1->hk_flags &= ~HK_INVALID; hook2->hk_flags &= ~HK_INVALID; } NG_HOOK_UNREF(hook1); NG_HOOK_UNREF(hook2); return (error); } /************************************************************************ Utility routines to send self messages ************************************************************************/ /* Shut this node down as soon as everyone is clear of it */ /* Should add arg "immediately" to jump the queue */ int ng_rmnode_self(node_p node) { int error; if (node == &ng_deadnode) return (0); node->nd_flags |= NGF_INVALID; if (node->nd_flags & NGF_CLOSING) return (0); error = ng_send_fn(node, NULL, &ng_rmnode, NULL, 0); return (error); } static void ng_rmhook_part2(node_p node, hook_p hook, void *arg1, int arg2) { ng_destroy_hook(hook); return ; } int ng_rmhook_self(hook_p hook) { int error; node_p node = NG_HOOK_NODE(hook); if (node == &ng_deadnode) return (0); error = ng_send_fn(node, hook, &ng_rmhook_part2, NULL, 0); return (error); } /*********************************************************************** * Parse and verify a string of the form: * * Such a string can refer to a specific node or a specific hook * on a specific node, depending on how you look at it. In the * latter case, the PATH component must not end in a dot. * * Both and are optional. The is a string * of hook names separated by dots. This breaks out the original * string, setting *nodep to "NODE" (or NULL if none) and *pathp * to "PATH" (or NULL if degenerate). Also, *hookp will point to * the final hook component of , if any, otherwise NULL. * * This returns -1 if the path is malformed. The char ** are optional. ***********************************************************************/ int ng_path_parse(char *addr, char **nodep, char **pathp, char **hookp) { char *node, *path, *hook; int k; /* * Extract absolute NODE, if any */ for (path = addr; *path && *path != ':'; path++); if (*path) { node = addr; /* Here's the NODE */ *path++ = '\0'; /* Here's the PATH */ /* Node name must not be empty */ if (!*node) return -1; /* A name of "." is OK; otherwise '.' not allowed */ if (strcmp(node, ".") != 0) { for (k = 0; node[k]; k++) if (node[k] == '.') return -1; } } else { node = NULL; /* No absolute NODE */ path = addr; /* Here's the PATH */ } /* Snoop for illegal characters in PATH */ for (k = 0; path[k]; k++) if (path[k] == ':') return -1; /* Check for no repeated dots in PATH */ for (k = 0; path[k]; k++) if (path[k] == '.' && path[k + 1] == '.') return -1; /* Remove extra (degenerate) dots from beginning or end of PATH */ if (path[0] == '.') path++; if (*path && path[strlen(path) - 1] == '.') path[strlen(path) - 1] = 0; /* If PATH has a dot, then we're not talking about a hook */ if (*path) { for (hook = path, k = 0; path[k]; k++) if (path[k] == '.') { hook = NULL; break; } } else path = hook = NULL; /* Done */ if (nodep) *nodep = node; if (pathp) *pathp = path; if (hookp) *hookp = hook; return (0); } /* * Given a path, which may be absolute or relative, and a starting node, * return the destination node. */ int ng_path2noderef(node_p here, const char *address, node_p *destp, hook_p *lasthook) { char fullpath[NG_PATHSIZ]; char *nodename, *path, pbuf[2]; node_p node, oldnode; char *cp; hook_p hook = NULL; /* Initialize */ if (destp == NULL) { TRAP_ERROR(); return EINVAL; } *destp = NULL; /* Make a writable copy of address for ng_path_parse() */ strncpy(fullpath, address, sizeof(fullpath) - 1); fullpath[sizeof(fullpath) - 1] = '\0'; /* Parse out node and sequence of hooks */ if (ng_path_parse(fullpath, &nodename, &path, NULL) < 0) { TRAP_ERROR(); return EINVAL; } if (path == NULL) { pbuf[0] = '.'; /* Needs to be writable */ pbuf[1] = '\0'; path = pbuf; } /* * For an absolute address, jump to the starting node. * Note that this holds a reference on the node for us. * Don't forget to drop the reference if we don't need it. */ if (nodename) { node = ng_name2noderef(here, nodename); if (node == NULL) { TRAP_ERROR(); return (ENOENT); } } else { if (here == NULL) { TRAP_ERROR(); return (EINVAL); } node = here; NG_NODE_REF(node); } /* * Now follow the sequence of hooks * XXX * We actually cannot guarantee that the sequence * is not being demolished as we crawl along it * without extra-ordinary locking etc. * So this is a bit dodgy to say the least. * We can probably hold up some things by holding * the nodelist mutex for the time of this * crawl if we wanted.. At least that way we wouldn't have to * worry about the nodes disappearing, but the hooks would still * be a problem. */ for (cp = path; node != NULL && *cp != '\0'; ) { char *segment; /* * Break out the next path segment. Replace the dot we just * found with a NUL; "cp" points to the next segment (or the * NUL at the end). */ for (segment = cp; *cp != '\0'; cp++) { if (*cp == '.') { *cp++ = '\0'; break; } } /* Empty segment */ if (*segment == '\0') continue; /* We have a segment, so look for a hook by that name */ hook = ng_findhook(node, segment); /* Can't get there from here... */ if (hook == NULL || NG_HOOK_PEER(hook) == NULL || NG_HOOK_NOT_VALID(hook) || NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))) { TRAP_ERROR(); NG_NODE_UNREF(node); #if 0 printf("hooknotvalid %s %s %d %d %d %d ", path, segment, hook == NULL, NG_HOOK_PEER(hook) == NULL, NG_HOOK_NOT_VALID(hook), NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))); #endif return (ENOENT); } /* * Hop on over to the next node * XXX * Big race conditions here as hooks and nodes go away * *** Idea.. store an ng_ID_t in each hook and use that * instead of the direct hook in this crawl? */ oldnode = node; if ((node = NG_PEER_NODE(hook))) NG_NODE_REF(node); /* XXX RACE */ NG_NODE_UNREF(oldnode); /* XXX another race */ if (NG_NODE_NOT_VALID(node)) { NG_NODE_UNREF(node); /* XXX more races */ node = NULL; } } /* If node somehow missing, fail here (probably this is not needed) */ if (node == NULL) { TRAP_ERROR(); return (ENXIO); } /* Done */ *destp = node; if (lasthook != NULL) *lasthook = (hook ? NG_HOOK_PEER(hook) : NULL); return (0); } /***************************************************************\ * Input queue handling. * All activities are submitted to the node via the input queue * which implements a multiple-reader/single-writer gate. * Items which cannot be handled immediately are queued. * * read-write queue locking inline functions * \***************************************************************/ static __inline item_p ng_dequeue(struct ng_queue * ngq, int *rw); static __inline item_p ng_acquire_read(struct ng_queue * ngq, item_p item); static __inline item_p ng_acquire_write(struct ng_queue * ngq, item_p item); static __inline void ng_leave_read(struct ng_queue * ngq); static __inline void ng_leave_write(struct ng_queue * ngq); static __inline void ng_queue_rw(struct ng_queue * ngq, item_p item, int rw); /* * Definition of the bits fields in the ng_queue flag word. * Defined here rather than in netgraph.h because no-one should fiddle * with them. * * The ordering here may be important! don't shuffle these. */ /*- Safety Barrier--------+ (adjustable to suit taste) (not used yet) | V +-------+-------+-------+-------+-------+-------+-------+-------+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |A|c|t|i|v|e| |R|e|a|d|e|r| |C|o|u|n|t| | | | | | | | | |P|A| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |O|W| +-------+-------+-------+-------+-------+-------+-------+-------+ \___________________________ ____________________________/ | | V | | [active reader count] | | | | Operation Pending -------------------------------+ | | Active Writer ---------------------------------------+ */ #define WRITER_ACTIVE 0x00000001 #define OP_PENDING 0x00000002 #define READER_INCREMENT 0x00000004 #define READER_MASK 0xfffffffc /* Not valid if WRITER_ACTIVE is set */ #define SAFETY_BARRIER 0x00100000 /* 128K items queued should be enough */ /* Defines of more elaborate states on the queue */ /* Mask of bits a new read cares about */ #define NGQ_RMASK (WRITER_ACTIVE|OP_PENDING) /* Mask of bits a new write cares about */ #define NGQ_WMASK (NGQ_RMASK|READER_MASK) /* Test to decide if there is something on the queue. */ #define QUEUE_ACTIVE(QP) ((QP)->q_flags & OP_PENDING) /* How to decide what the next queued item is. */ #define HEAD_IS_READER(QP) NGI_QUEUED_READER((QP)->queue) #define HEAD_IS_WRITER(QP) NGI_QUEUED_WRITER((QP)->queue) /* notused */ /* Read the status to decide if the next item on the queue can now run. */ #define QUEUED_READER_CAN_PROCEED(QP) \ (((QP)->q_flags & (NGQ_RMASK & ~OP_PENDING)) == 0) #define QUEUED_WRITER_CAN_PROCEED(QP) \ (((QP)->q_flags & (NGQ_WMASK & ~OP_PENDING)) == 0) /* Is there a chance of getting ANY work off the queue? */ #define NEXT_QUEUED_ITEM_CAN_PROCEED(QP) \ (QUEUE_ACTIVE(QP) && \ ((HEAD_IS_READER(QP)) ? QUEUED_READER_CAN_PROCEED(QP) : \ QUEUED_WRITER_CAN_PROCEED(QP))) #define NGQRW_R 0 #define NGQRW_W 1 /* * Taking into account the current state of the queue and node, possibly take * the next entry off the queue and return it. Return NULL if there was * nothing we could return, either because there really was nothing there, or * because the node was in a state where it cannot yet process the next item * on the queue. * * This MUST MUST MUST be called with the mutex held. */ static __inline item_p ng_dequeue(struct ng_queue *ngq, int *rw) { item_p item; u_int add_arg; mtx_assert(&ngq->q_mtx, MA_OWNED); /* * If there is nothing queued, then just return. * No point in continuing. * XXXGL: assert this? */ if (!QUEUE_ACTIVE(ngq)) { CTR4(KTR_NET, "%20s: node [%x] (%p) queue empty; " "queue flags 0x%lx", __func__, ngq->q_node->nd_ID, ngq->q_node, ngq->q_flags); return (NULL); } /* * From here, we can assume there is a head item. * We need to find out what it is and if it can be dequeued, given * the current state of the node. */ if (HEAD_IS_READER(ngq)) { if (!QUEUED_READER_CAN_PROCEED(ngq)) { /* * It's a reader but we can't use it. * We are stalled so make sure we don't * get called again until something changes. */ ng_worklist_remove(ngq->q_node); CTR4(KTR_NET, "%20s: node [%x] (%p) queued reader " "can't proceed; queue flags 0x%lx", __func__, ngq->q_node->nd_ID, ngq->q_node, ngq->q_flags); return (NULL); } /* * Head of queue is a reader and we have no write active. * We don't care how many readers are already active. * Add the correct increment for the reader count. */ add_arg = READER_INCREMENT; *rw = NGQRW_R; } else if (QUEUED_WRITER_CAN_PROCEED(ngq)) { /* * There is a pending write, no readers and no active writer. * This means we can go ahead with the pending writer. Note * the fact that we now have a writer, ready for when we take * it off the queue. * * We don't need to worry about a possible collision with the * fasttrack reader. * * The fasttrack thread may take a long time to discover that we * are running so we would have an inconsistent state in the * flags for a while. Since we ignore the reader count * entirely when the WRITER_ACTIVE flag is set, this should * not matter (in fact it is defined that way). If it tests * the flag before this operation, the OP_PENDING flag * will make it fail, and if it tests it later, the * WRITER_ACTIVE flag will do the same. If it is SO slow that * we have actually completed the operation, and neither flag * is set by the time that it tests the flags, then it is * actually ok for it to continue. If it completes and we've * finished and the read pending is set it still fails. * * So we can just ignore it, as long as we can ensure that the * transition from WRITE_PENDING state to the WRITER_ACTIVE * state is atomic. * * After failing, first it will be held back by the mutex, then * when it can proceed, it will queue its request, then it * would arrive at this function. Usually it will have to * leave empty handed because the ACTIVE WRITER bit will be * set. * * Adjust the flags for the new active writer. */ add_arg = WRITER_ACTIVE; *rw = NGQRW_W; /* * We want to write "active writer, no readers " Now go make * it true. In fact there may be a number in the readers * count but we know it is not true and will be fixed soon. * We will fix the flags for the next pending entry in a * moment. */ } else { /* * We can't dequeue anything.. return and say so. Probably we * have a write pending and the readers count is non zero. If * we got here because a reader hit us just at the wrong * moment with the fasttrack code, and put us in a strange * state, then it will be coming through in just a moment, * (just as soon as we release the mutex) and keep things * moving. * Make sure we remove ourselves from the work queue. It * would be a waste of effort to do all this again. */ ng_worklist_remove(ngq->q_node); CTR4(KTR_NET, "%20s: node [%x] (%p) can't dequeue anything; " "queue flags 0x%lx", __func__, ngq->q_node->nd_ID, ngq->q_node, ngq->q_flags); return (NULL); } /* * Now we dequeue the request (whatever it may be) and correct the * pending flags and the next and last pointers. */ item = ngq->queue; ngq->queue = item->el_next; CTR6(KTR_NET, "%20s: node [%x] (%p) dequeued item %p with flags 0x%lx; " "queue flags 0x%lx", __func__, ngq->q_node->nd_ID,ngq->q_node, item, item->el_flags, ngq->q_flags); if (ngq->last == &(item->el_next)) { /* * that was the last entry in the queue so set the 'last * pointer up correctly and make sure the pending flag is * clear. */ add_arg += -OP_PENDING; ngq->last = &(ngq->queue); /* * Whatever flag was set will be cleared and * the new acive field will be set by the add as well, * so we don't need to change add_arg. * But we know we don't need to be on the work list. */ atomic_add_long(&ngq->q_flags, add_arg); ng_worklist_remove(ngq->q_node); } else { /* * Since there is still something on the queue * we don't need to change the PENDING flag. */ atomic_add_long(&ngq->q_flags, add_arg); /* * If we see more doable work, make sure we are * on the work queue. */ if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) { ng_setisr(ngq->q_node); } } CTR6(KTR_NET, "%20s: node [%x] (%p) returning item %p as %s; " "queue flags 0x%lx", __func__, ngq->q_node->nd_ID, ngq->q_node, item, *rw ? "WRITER" : "READER" , ngq->q_flags); return (item); } /* * Queue a packet to be picked up by someone else. * We really don't care who, but we can't or don't want to hang around * to process it ourselves. We are probably an interrupt routine.. * If the queue could be run, flag the netisr handler to start. */ static __inline void ng_queue_rw(struct ng_queue * ngq, item_p item, int rw) { mtx_assert(&ngq->q_mtx, MA_OWNED); if (rw == NGQRW_W) NGI_SET_WRITER(item); else NGI_SET_READER(item); item->el_next = NULL; /* maybe not needed */ *ngq->last = item; CTR5(KTR_NET, "%20s: node [%x] (%p) queued item %p as %s", __func__, ngq->q_node->nd_ID, ngq->q_node, item, rw ? "WRITER" : "READER" ); /* * If it was the first item in the queue then we need to * set the last pointer and the type flags. */ if (ngq->last == &(ngq->queue)) { atomic_add_long(&ngq->q_flags, OP_PENDING); CTR3(KTR_NET, "%20s: node [%x] (%p) set OP_PENDING", __func__, ngq->q_node->nd_ID, ngq->q_node); } ngq->last = &(item->el_next); /* * We can take the worklist lock with the node locked * BUT NOT THE REVERSE! */ if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) ng_setisr(ngq->q_node); } /* * This function 'cheats' in that it first tries to 'grab' the use of the * node, without going through the mutex. We can do this becasue of the * semantics of the lock. The semantics include a clause that says that the * value of the readers count is invalid if the WRITER_ACTIVE flag is set. It * also says that the WRITER_ACTIVE flag cannot be set if the readers count * is not zero. Note that this talks about what is valid to SET the * WRITER_ACTIVE flag, because from the moment it is set, the value if the * reader count is immaterial, and not valid. The two 'pending' flags have a * similar effect, in that If they are orthogonal to the two active fields in * how they are set, but if either is set, the attempted 'grab' need to be * backed out because there is earlier work, and we maintain ordering in the * queue. The result of this is that the reader request can try obtain use of * the node with only a single atomic addition, and without any of the mutex * overhead. If this fails the operation degenerates to the same as for other * cases. * */ static __inline item_p ng_acquire_read(struct ng_queue *ngq, item_p item) { KASSERT(ngq != &ng_deadnode.nd_input_queue, ("%s: working on deadnode", __func__)); /* ######### Hack alert ######### */ atomic_add_long(&ngq->q_flags, READER_INCREMENT); if ((ngq->q_flags & NGQ_RMASK) == 0) { /* Successfully grabbed node */ CTR4(KTR_NET, "%20s: node [%x] (%p) fast acquired item %p", __func__, ngq->q_node->nd_ID, ngq->q_node, item); return (item); } /* undo the damage if we didn't succeed */ atomic_subtract_long(&ngq->q_flags, READER_INCREMENT); /* ######### End Hack alert ######### */ NG_QUEUE_LOCK(ngq); /* * Try again. Another processor (or interrupt for that matter) may * have removed the last queued item that was stopping us from * running, between the previous test, and the moment that we took * the mutex. (Or maybe a writer completed.) * Even if another fast-track reader hits during this period * we don't care as multiple readers is OK. */ if ((ngq->q_flags & NGQ_RMASK) == 0) { atomic_add_long(&ngq->q_flags, READER_INCREMENT); NG_QUEUE_UNLOCK(ngq); CTR4(KTR_NET, "%20s: node [%x] (%p) slow acquired item %p", __func__, ngq->q_node->nd_ID, ngq->q_node, item); return (item); } /* * and queue the request for later. */ ng_queue_rw(ngq, item, NGQRW_R); NG_QUEUE_UNLOCK(ngq); return (NULL); } static __inline item_p ng_acquire_write(struct ng_queue *ngq, item_p item) { KASSERT(ngq != &ng_deadnode.nd_input_queue, ("%s: working on deadnode", __func__)); restart: NG_QUEUE_LOCK(ngq); /* * If there are no readers, no writer, and no pending packets, then * we can just go ahead. In all other situations we need to queue the * request */ if ((ngq->q_flags & NGQ_WMASK) == 0) { /* collision could happen *HERE* */ atomic_add_long(&ngq->q_flags, WRITER_ACTIVE); NG_QUEUE_UNLOCK(ngq); if (ngq->q_flags & READER_MASK) { /* Collision with fast-track reader */ atomic_subtract_long(&ngq->q_flags, WRITER_ACTIVE); goto restart; } CTR4(KTR_NET, "%20s: node [%x] (%p) acquired item %p", __func__, ngq->q_node->nd_ID, ngq->q_node, item); return (item); } /* * and queue the request for later. */ ng_queue_rw(ngq, item, NGQRW_W); NG_QUEUE_UNLOCK(ngq); return (NULL); } #if 0 static __inline item_p ng_upgrade_write(struct ng_queue *ngq, item_p item) { KASSERT(ngq != &ng_deadnode.nd_input_queue, ("%s: working on deadnode", __func__)); NGI_SET_WRITER(item); mtx_lock_spin(&(ngq->q_mtx)); /* * There will never be no readers as we are there ourselves. * Set the WRITER_ACTIVE flags ASAP to block out fast track readers. * The caller we are running from will call ng_leave_read() * soon, so we must account for that. We must leave again with the * READER lock. If we find other readers, then * queue the request for later. However "later" may be rignt now * if there are no readers. We don't really care if there are queued * items as we will bypass them anyhow. */ atomic_add_long(&ngq->q_flags, WRITER_ACTIVE - READER_INCREMENT); if (ngq->q_flags & (NGQ_WMASK & ~OP_PENDING) == WRITER_ACTIVE) { mtx_unlock_spin(&(ngq->q_mtx)); /* It's just us, act on the item. */ /* will NOT drop writer lock when done */ ng_apply_item(node, item, 0); /* * Having acted on the item, atomically * down grade back to READER and finish up */ atomic_add_long(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE); /* Our caller will call ng_leave_read() */ return; } /* * It's not just us active, so queue us AT THE HEAD. * "Why?" I hear you ask. * Put us at the head of the queue as we've already been * through it once. If there is nothing else waiting, * set the correct flags. */ if ((item->el_next = ngq->queue) == NULL) { /* * Set up the "last" pointer. * We are the only (and thus last) item */ ngq->last = &(item->el_next); /* We've gone from, 0 to 1 item in the queue */ atomic_add_long(&ngq->q_flags, OP_PENDING); CTR3(KTR_NET, "%20s: node [%x] (%p) set OP_PENDING", __func__, ngq->q_node->nd_ID, ngq->q_node); }; ngq->queue = item; CTR5(KTR_NET, "%20s: node [%x] (%p) requeued item %p as WRITER", __func__, ngq->q_node->nd_ID, ngq->q_node, item ); /* Reverse what we did above. That downgrades us back to reader */ atomic_add_long(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE); if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) ng_setisr(ngq->q_node); mtx_unlock_spin(&(ngq->q_mtx)); return; } #endif static __inline void ng_leave_read(struct ng_queue *ngq) { atomic_subtract_long(&ngq->q_flags, READER_INCREMENT); } static __inline void ng_leave_write(struct ng_queue *ngq) { atomic_subtract_long(&ngq->q_flags, WRITER_ACTIVE); } static void ng_flush_input_queue(struct ng_queue * ngq) { item_p item; NG_QUEUE_LOCK(ngq); while (ngq->queue) { item = ngq->queue; ngq->queue = item->el_next; if (ngq->last == &(item->el_next)) { ngq->last = &(ngq->queue); atomic_add_long(&ngq->q_flags, -OP_PENDING); } NG_QUEUE_UNLOCK(ngq); /* If the item is supplying a callback, call it with an error */ if (item->apply != NULL) { (item->apply)(item->context, ENOENT); item->apply = NULL; } NG_FREE_ITEM(item); NG_QUEUE_LOCK(ngq); } /* * Take us off the work queue if we are there. * We definately have no work to be done. */ ng_worklist_remove(ngq->q_node); NG_QUEUE_UNLOCK(ngq); } /*********************************************************************** * Externally visible method for sending or queueing messages or data. ***********************************************************************/ /* * The module code should have filled out the item correctly by this stage: * Common: * reference to destination node. * Reference to destination rcv hook if relevant. * Data: * pointer to mbuf * Control_Message: * pointer to msg. * ID of original sender node. (return address) * Function: * Function pointer * void * argument * integer argument * * The nodes have several routines and macros to help with this task: */ int ng_snd_item(item_p item, int flags) { hook_p hook = NGI_HOOK(item); node_p node = NGI_NODE(item); int queue, rw; struct ng_queue * ngq = &node->nd_input_queue; int error = 0; #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif queue = (flags & NG_QUEUE) ? 1 : 0; if (item == NULL) { TRAP_ERROR(); return (EINVAL); /* failed to get queue element */ } if (node == NULL) { NG_FREE_ITEM(item); TRAP_ERROR(); return (EINVAL); /* No address */ } switch(item->el_flags & NGQF_TYPE) { case NGQF_DATA: /* * DATA MESSAGE * Delivered to a node via a non-optional hook. * Both should be present in the item even though * the node is derivable from the hook. * References are held on both by the item. */ /* Protect nodes from sending NULL pointers * to each other */ if (NGI_M(item) == NULL) return (EINVAL); CHECK_DATA_MBUF(NGI_M(item)); if (hook == NULL) { NG_FREE_ITEM(item); TRAP_ERROR(); return(EINVAL); } if ((NG_HOOK_NOT_VALID(hook)) || (NG_NODE_NOT_VALID(NG_HOOK_NODE(hook)))) { NG_FREE_ITEM(item); return (ENOTCONN); } if ((hook->hk_flags & HK_QUEUE)) { queue = 1; } break; case NGQF_MESG: /* * CONTROL MESSAGE * Delivered to a node. * Hook is optional. * References are held by the item on the node and * the hook if it is present. */ if (hook && (hook->hk_flags & HK_QUEUE)) { queue = 1; } break; case NGQF_FN: break; default: NG_FREE_ITEM(item); TRAP_ERROR(); return (EINVAL); } switch(item->el_flags & NGQF_RW) { case NGQF_READER: rw = NGQRW_R; break; case NGQF_WRITER: rw = NGQRW_W; break; default: panic("%s: invalid item flags %lx", __func__, item->el_flags); } /* * If the node specifies single threading, force writer semantics. * Similarly, the node may say one hook always produces writers. * These are overrides. */ if ((node->nd_flags & NGF_FORCE_WRITER) || (hook && (hook->hk_flags & HK_FORCE_WRITER))) rw = NGQRW_W; if (queue) { /* Put it on the queue for that node*/ #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif NG_QUEUE_LOCK(ngq); ng_queue_rw(ngq, item, rw); NG_QUEUE_UNLOCK(ngq); if (flags & NG_PROGRESS) return (EINPROGRESS); else return (0); } /* * We already decided how we will be queueud or treated. * Try get the appropriate operating permission. */ if (rw == NGQRW_R) item = ng_acquire_read(ngq, item); else item = ng_acquire_write(ngq, item); if (item == NULL) { if (flags & NG_PROGRESS) return (EINPROGRESS); else return (0); } #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif NGI_GET_NODE(item, node); /* zaps stored node */ /* Don't report any errors. act as if it had been queued */ ng_apply_item(node, item, rw); /* drops r/w lock when done */ /* * If the node goes away when we remove the reference, * whatever we just did caused it.. whatever we do, DO NOT * access the node again! */ if (NG_NODE_UNREF(node) == 0) { return (error); } NG_QUEUE_LOCK(ngq); if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) ng_setisr(ngq->q_node); NG_QUEUE_UNLOCK(ngq); return (error); } /* * We have an item that was possibly queued somewhere. * It should contain all the information needed * to run it on the appropriate node/hook. */ static void ng_apply_item(node_p node, item_p item, int rw) { hook_p hook; int error = 0; ng_rcvdata_t *rcvdata; ng_rcvmsg_t *rcvmsg; ng_apply_t *apply = NULL; void *context = NULL; NGI_GET_HOOK(item, hook); /* clears stored hook */ #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif /* * If the item has an "apply" callback, store it. * Clear item's callback immediately, to avoid an extra call if * the item is reused by the destination node. */ if (item->apply != NULL) { apply = item->apply; context = item->context; item->apply = NULL; } switch (item->el_flags & NGQF_TYPE) { case NGQF_DATA: /* * Check things are still ok as when we were queued. */ if ((hook == NULL) || NG_HOOK_NOT_VALID(hook) || NG_NODE_NOT_VALID(node) ) { error = EIO; NG_FREE_ITEM(item); break; } /* * If no receive method, just silently drop it. * Give preference to the hook over-ride method */ if ((!(rcvdata = hook->hk_rcvdata)) && (!(rcvdata = NG_HOOK_NODE(hook)->nd_type->rcvdata))) { error = 0; NG_FREE_ITEM(item); break; } error = (*rcvdata)(hook, item); break; case NGQF_MESG: if (hook) { if (NG_HOOK_NOT_VALID(hook)) { /* * The hook has been zapped then we can't * use it. Immediately drop its reference. * The message may not need it. */ NG_HOOK_UNREF(hook); hook = NULL; } } /* * Similarly, if the node is a zombie there is * nothing we can do with it, drop everything. */ if (NG_NODE_NOT_VALID(node)) { TRAP_ERROR(); error = EINVAL; NG_FREE_ITEM(item); } else { /* * Call the appropriate message handler for the object. * It is up to the message handler to free the message. * If it's a generic message, handle it generically, * otherwise call the type's message handler * (if it exists) * XXX (race). Remember that a queued message may * reference a node or hook that has just been * invalidated. It will exist as the queue code * is holding a reference, but.. */ struct ng_mesg *msg = NGI_MSG(item); /* * check if the generic handler owns it. */ if ((msg->header.typecookie == NGM_GENERIC_COOKIE) && ((msg->header.flags & NGF_RESP) == 0)) { error = ng_generic_msg(node, item, hook); break; } /* * Now see if there is a handler (hook or node specific) * in the target node. If none, silently discard. */ if (((!hook) || (!(rcvmsg = hook->hk_rcvmsg))) && (!(rcvmsg = node->nd_type->rcvmsg))) { TRAP_ERROR(); error = 0; NG_FREE_ITEM(item); break; } error = (*rcvmsg)(node, item, hook); } break; case NGQF_FN: /* * We have to implicitly trust the hook, * as some of these are used for system purposes * where the hook is invalid. In the case of * the shutdown message we allow it to hit * even if the node is invalid. */ if ((NG_NODE_NOT_VALID(node)) && (NGI_FN(item) != &ng_rmnode)) { TRAP_ERROR(); error = EINVAL; NG_FREE_ITEM(item); break; } (*NGI_FN(item))(node, hook, NGI_ARG1(item), NGI_ARG2(item)); NG_FREE_ITEM(item); break; } /* * We held references on some of the resources * that we took from the item. Now that we have * finished doing everything, drop those references. */ if (hook) { NG_HOOK_UNREF(hook); } if (rw == NGQRW_R) { ng_leave_read(&node->nd_input_queue); } else { ng_leave_write(&node->nd_input_queue); } /* Apply callback. */ if (apply != NULL) (*apply)(context, error); return; } /*********************************************************************** * Implement the 'generic' control messages ***********************************************************************/ static int ng_generic_msg(node_p here, item_p item, hook_p lasthook) { int error = 0; struct ng_mesg *msg; struct ng_mesg *resp = NULL; NGI_GET_MSG(item, msg); if (msg->header.typecookie != NGM_GENERIC_COOKIE) { TRAP_ERROR(); error = EINVAL; goto out; } switch (msg->header.cmd) { case NGM_SHUTDOWN: ng_rmnode(here, NULL, NULL, 0); break; case NGM_MKPEER: { struct ngm_mkpeer *const mkp = (struct ngm_mkpeer *) msg->data; if (msg->header.arglen != sizeof(*mkp)) { TRAP_ERROR(); error = EINVAL; break; } mkp->type[sizeof(mkp->type) - 1] = '\0'; mkp->ourhook[sizeof(mkp->ourhook) - 1] = '\0'; mkp->peerhook[sizeof(mkp->peerhook) - 1] = '\0'; error = ng_mkpeer(here, mkp->ourhook, mkp->peerhook, mkp->type); break; } case NGM_CONNECT: { struct ngm_connect *const con = (struct ngm_connect *) msg->data; node_p node2; if (msg->header.arglen != sizeof(*con)) { TRAP_ERROR(); error = EINVAL; break; } con->path[sizeof(con->path) - 1] = '\0'; con->ourhook[sizeof(con->ourhook) - 1] = '\0'; con->peerhook[sizeof(con->peerhook) - 1] = '\0'; /* Don't forget we get a reference.. */ error = ng_path2noderef(here, con->path, &node2, NULL); if (error) break; error = ng_con_nodes(here, con->ourhook, node2, con->peerhook); NG_NODE_UNREF(node2); break; } case NGM_NAME: { struct ngm_name *const nam = (struct ngm_name *) msg->data; if (msg->header.arglen != sizeof(*nam)) { TRAP_ERROR(); error = EINVAL; break; } nam->name[sizeof(nam->name) - 1] = '\0'; error = ng_name_node(here, nam->name); break; } case NGM_RMHOOK: { struct ngm_rmhook *const rmh = (struct ngm_rmhook *) msg->data; hook_p hook; if (msg->header.arglen != sizeof(*rmh)) { TRAP_ERROR(); error = EINVAL; break; } rmh->ourhook[sizeof(rmh->ourhook) - 1] = '\0'; if ((hook = ng_findhook(here, rmh->ourhook)) != NULL) ng_destroy_hook(hook); break; } case NGM_NODEINFO: { struct nodeinfo *ni; NG_MKRESPONSE(resp, msg, sizeof(*ni), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } /* Fill in node info */ ni = (struct nodeinfo *) resp->data; if (NG_NODE_HAS_NAME(here)) strcpy(ni->name, NG_NODE_NAME(here)); strcpy(ni->type, here->nd_type->name); ni->id = ng_node2ID(here); ni->hooks = here->nd_numhooks; break; } case NGM_LISTHOOKS: { const int nhooks = here->nd_numhooks; struct hooklist *hl; struct nodeinfo *ni; hook_p hook; /* Get response struct */ NG_MKRESPONSE(resp, msg, sizeof(*hl) + (nhooks * sizeof(struct linkinfo)), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } hl = (struct hooklist *) resp->data; ni = &hl->nodeinfo; /* Fill in node info */ if (NG_NODE_HAS_NAME(here)) strcpy(ni->name, NG_NODE_NAME(here)); strcpy(ni->type, here->nd_type->name); ni->id = ng_node2ID(here); /* Cycle through the linked list of hooks */ ni->hooks = 0; LIST_FOREACH(hook, &here->nd_hooks, hk_hooks) { struct linkinfo *const link = &hl->link[ni->hooks]; if (ni->hooks >= nhooks) { log(LOG_ERR, "%s: number of %s changed\n", __func__, "hooks"); break; } if (NG_HOOK_NOT_VALID(hook)) continue; strcpy(link->ourhook, NG_HOOK_NAME(hook)); strcpy(link->peerhook, NG_PEER_HOOK_NAME(hook)); if (NG_PEER_NODE_NAME(hook)[0] != '\0') strcpy(link->nodeinfo.name, NG_PEER_NODE_NAME(hook)); strcpy(link->nodeinfo.type, NG_PEER_NODE(hook)->nd_type->name); link->nodeinfo.id = ng_node2ID(NG_PEER_NODE(hook)); link->nodeinfo.hooks = NG_PEER_NODE(hook)->nd_numhooks; ni->hooks++; } break; } case NGM_LISTNAMES: case NGM_LISTNODES: { const int unnamed = (msg->header.cmd == NGM_LISTNODES); struct namelist *nl; node_p node; int num = 0; mtx_lock(&ng_nodelist_mtx); /* Count number of nodes */ LIST_FOREACH(node, &ng_nodelist, nd_nodes) { if (NG_NODE_IS_VALID(node) && (unnamed || NG_NODE_HAS_NAME(node))) { num++; } } mtx_unlock(&ng_nodelist_mtx); /* Get response struct */ NG_MKRESPONSE(resp, msg, sizeof(*nl) + (num * sizeof(struct nodeinfo)), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } nl = (struct namelist *) resp->data; /* Cycle through the linked list of nodes */ nl->numnames = 0; mtx_lock(&ng_nodelist_mtx); LIST_FOREACH(node, &ng_nodelist, nd_nodes) { struct nodeinfo *const np = &nl->nodeinfo[nl->numnames]; if (NG_NODE_NOT_VALID(node)) continue; if (!unnamed && (! NG_NODE_HAS_NAME(node))) continue; if (nl->numnames >= num) { log(LOG_ERR, "%s: number of %s changed\n", __func__, "nodes"); break; } if (NG_NODE_HAS_NAME(node)) strcpy(np->name, NG_NODE_NAME(node)); strcpy(np->type, node->nd_type->name); np->id = ng_node2ID(node); np->hooks = node->nd_numhooks; nl->numnames++; } mtx_unlock(&ng_nodelist_mtx); break; } case NGM_LISTTYPES: { struct typelist *tl; struct ng_type *type; int num = 0; mtx_lock(&ng_typelist_mtx); /* Count number of types */ LIST_FOREACH(type, &ng_typelist, types) { num++; } mtx_unlock(&ng_typelist_mtx); /* Get response struct */ NG_MKRESPONSE(resp, msg, sizeof(*tl) + (num * sizeof(struct typeinfo)), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } tl = (struct typelist *) resp->data; /* Cycle through the linked list of types */ tl->numtypes = 0; mtx_lock(&ng_typelist_mtx); LIST_FOREACH(type, &ng_typelist, types) { struct typeinfo *const tp = &tl->typeinfo[tl->numtypes]; if (tl->numtypes >= num) { log(LOG_ERR, "%s: number of %s changed\n", __func__, "types"); break; } strcpy(tp->type_name, type->name); tp->numnodes = type->refs - 1; /* don't count list */ tl->numtypes++; } mtx_unlock(&ng_typelist_mtx); break; } case NGM_BINARY2ASCII: { int bufSize = 20 * 1024; /* XXX hard coded constant */ const struct ng_parse_type *argstype; const struct ng_cmdlist *c; struct ng_mesg *binary, *ascii; /* Data area must contain a valid netgraph message */ binary = (struct ng_mesg *)msg->data; if (msg->header.arglen < sizeof(struct ng_mesg) || (msg->header.arglen - sizeof(struct ng_mesg) < binary->header.arglen)) { TRAP_ERROR(); error = EINVAL; break; } /* Get a response message with lots of room */ NG_MKRESPONSE(resp, msg, sizeof(*ascii) + bufSize, M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } ascii = (struct ng_mesg *)resp->data; /* Copy binary message header to response message payload */ bcopy(binary, ascii, sizeof(*binary)); /* Find command by matching typecookie and command number */ for (c = here->nd_type->cmdlist; c != NULL && c->name != NULL; c++) { if (binary->header.typecookie == c->cookie && binary->header.cmd == c->cmd) break; } if (c == NULL || c->name == NULL) { for (c = ng_generic_cmds; c->name != NULL; c++) { if (binary->header.typecookie == c->cookie && binary->header.cmd == c->cmd) break; } if (c->name == NULL) { NG_FREE_MSG(resp); error = ENOSYS; break; } } /* Convert command name to ASCII */ snprintf(ascii->header.cmdstr, sizeof(ascii->header.cmdstr), "%s", c->name); /* Convert command arguments to ASCII */ argstype = (binary->header.flags & NGF_RESP) ? c->respType : c->mesgType; if (argstype == NULL) { *ascii->data = '\0'; } else { if ((error = ng_unparse(argstype, (u_char *)binary->data, ascii->data, bufSize)) != 0) { NG_FREE_MSG(resp); break; } } /* Return the result as struct ng_mesg plus ASCII string */ bufSize = strlen(ascii->data) + 1; ascii->header.arglen = bufSize; resp->header.arglen = sizeof(*ascii) + bufSize; break; } case NGM_ASCII2BINARY: { int bufSize = 2000; /* XXX hard coded constant */ const struct ng_cmdlist *c; const struct ng_parse_type *argstype; struct ng_mesg *ascii, *binary; int off = 0; /* Data area must contain at least a struct ng_mesg + '\0' */ ascii = (struct ng_mesg *)msg->data; if ((msg->header.arglen < sizeof(*ascii) + 1) || (ascii->header.arglen < 1) || (msg->header.arglen < sizeof(*ascii) + ascii->header.arglen)) { TRAP_ERROR(); error = EINVAL; break; } ascii->data[ascii->header.arglen - 1] = '\0'; /* Get a response message with lots of room */ NG_MKRESPONSE(resp, msg, sizeof(*binary) + bufSize, M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } binary = (struct ng_mesg *)resp->data; /* Copy ASCII message header to response message payload */ bcopy(ascii, binary, sizeof(*ascii)); /* Find command by matching ASCII command string */ for (c = here->nd_type->cmdlist; c != NULL && c->name != NULL; c++) { if (strcmp(ascii->header.cmdstr, c->name) == 0) break; } if (c == NULL || c->name == NULL) { for (c = ng_generic_cmds; c->name != NULL; c++) { if (strcmp(ascii->header.cmdstr, c->name) == 0) break; } if (c->name == NULL) { NG_FREE_MSG(resp); error = ENOSYS; break; } } /* Convert command name to binary */ binary->header.cmd = c->cmd; binary->header.typecookie = c->cookie; /* Convert command arguments to binary */ argstype = (binary->header.flags & NGF_RESP) ? c->respType : c->mesgType; if (argstype == NULL) { bufSize = 0; } else { if ((error = ng_parse(argstype, ascii->data, &off, (u_char *)binary->data, &bufSize)) != 0) { NG_FREE_MSG(resp); break; } } /* Return the result */ binary->header.arglen = bufSize; resp->header.arglen = sizeof(*binary) + bufSize; break; } case NGM_TEXT_CONFIG: case NGM_TEXT_STATUS: /* * This one is tricky as it passes the command down to the * actual node, even though it is a generic type command. * This means we must assume that the item/msg is already freed * when control passes back to us. */ if (here->nd_type->rcvmsg != NULL) { NGI_MSG(item) = msg; /* put it back as we found it */ return((*here->nd_type->rcvmsg)(here, item, lasthook)); } /* Fall through if rcvmsg not supported */ default: TRAP_ERROR(); error = EINVAL; } /* * Sometimes a generic message may be statically allocated * to avoid problems with allocating when in tight memeory situations. * Don't free it if it is so. * I break them appart here, because erros may cause a free if the item * in which case we'd be doing it twice. * they are kept together above, to simplify freeing. */ out: NG_RESPOND_MSG(error, here, item, resp); if (msg) NG_FREE_MSG(msg); return (error); } /************************************************************************ Queue element get/free routines ************************************************************************/ uma_zone_t ng_qzone; static int maxalloc = 512; /* limit the damage of a leak */ TUNABLE_INT("net.graph.maxalloc", &maxalloc); SYSCTL_INT(_net_graph, OID_AUTO, maxalloc, CTLFLAG_RDTUN, &maxalloc, 0, "Maximum number of queue items to allocate"); #ifdef NETGRAPH_DEBUG static TAILQ_HEAD(, ng_item) ng_itemlist = TAILQ_HEAD_INITIALIZER(ng_itemlist); static int allocated; /* number of items malloc'd */ #endif /* * Get a queue entry. * This is usually called when a packet first enters netgraph. * By definition, this is usually from an interrupt, or from a user. * Users are not so important, but try be quick for the times that it's * an interrupt. */ static __inline item_p ng_getqblk(int flags) { item_p item = NULL; int wait; wait = (flags & NG_WAITOK) ? M_WAITOK : M_NOWAIT; item = uma_zalloc(ng_qzone, wait | M_ZERO); #ifdef NETGRAPH_DEBUG if (item) { mtx_lock(&ngq_mtx); TAILQ_INSERT_TAIL(&ng_itemlist, item, all); allocated++; mtx_unlock(&ngq_mtx); } #endif return (item); } /* * Release a queue entry */ void ng_free_item(item_p item) { KASSERT(item->apply == NULL, ("%s: leaking apply callback", __func__)); /* * The item may hold resources on it's own. We need to free * these before we can free the item. What they are depends upon * what kind of item it is. it is important that nodes zero * out pointers to resources that they remove from the item * or we release them again here. */ switch (item->el_flags & NGQF_TYPE) { case NGQF_DATA: /* If we have an mbuf still attached.. */ NG_FREE_M(_NGI_M(item)); break; case NGQF_MESG: _NGI_RETADDR(item) = 0; NG_FREE_MSG(_NGI_MSG(item)); break; case NGQF_FN: /* nothing to free really, */ _NGI_FN(item) = NULL; _NGI_ARG1(item) = NULL; _NGI_ARG2(item) = 0; case NGQF_UNDEF: break; } /* If we still have a node or hook referenced... */ _NGI_CLR_NODE(item); _NGI_CLR_HOOK(item); #ifdef NETGRAPH_DEBUG mtx_lock(&ngq_mtx); TAILQ_REMOVE(&ng_itemlist, item, all); allocated--; mtx_unlock(&ngq_mtx); #endif uma_zfree(ng_qzone, item); } /************************************************************************ Module routines ************************************************************************/ /* * Handle the loading/unloading of a netgraph node type module */ int ng_mod_event(module_t mod, int event, void *data) { struct ng_type *const type = data; int s, error = 0; switch (event) { case MOD_LOAD: /* Register new netgraph node type */ s = splnet(); if ((error = ng_newtype(type)) != 0) { splx(s); break; } /* Call type specific code */ if (type->mod_event != NULL) if ((error = (*type->mod_event)(mod, event, data))) { mtx_lock(&ng_typelist_mtx); type->refs--; /* undo it */ LIST_REMOVE(type, types); mtx_unlock(&ng_typelist_mtx); } splx(s); break; case MOD_UNLOAD: s = splnet(); if (type->refs > 1) { /* make sure no nodes exist! */ error = EBUSY; } else { if (type->refs == 0) { /* failed load, nothing to undo */ splx(s); break; } if (type->mod_event != NULL) { /* check with type */ error = (*type->mod_event)(mod, event, data); if (error != 0) { /* type refuses.. */ splx(s); break; } } mtx_lock(&ng_typelist_mtx); LIST_REMOVE(type, types); mtx_unlock(&ng_typelist_mtx); } splx(s); break; default: if (type->mod_event != NULL) error = (*type->mod_event)(mod, event, data); else error = EOPNOTSUPP; /* XXX ? */ break; } return (error); } /* * Handle loading and unloading for this code. * The only thing we need to link into is the NETISR strucure. */ static int ngb_mod_event(module_t mod, int event, void *data) { int error = 0; switch (event) { case MOD_LOAD: /* Initialize everything. */ NG_WORKLIST_LOCK_INIT(); mtx_init(&ng_typelist_mtx, "netgraph types mutex", NULL, MTX_DEF); mtx_init(&ng_nodelist_mtx, "netgraph nodelist mutex", NULL, MTX_DEF); mtx_init(&ng_idhash_mtx, "netgraph idhash mutex", NULL, MTX_DEF); mtx_init(&ng_topo_mtx, "netgraph topology mutex", NULL, MTX_DEF); #ifdef NETGRAPH_DEBUG mtx_init(&ngq_mtx, "netgraph item list mutex", NULL, MTX_DEF); #endif ng_qzone = uma_zcreate("NetGraph items", sizeof(struct ng_item), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(ng_qzone, maxalloc); netisr_register(NETISR_NETGRAPH, (netisr_t *)ngintr, NULL, NETISR_MPSAFE); break; case MOD_UNLOAD: /* You can't unload it because an interface may be using it. */ error = EBUSY; break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t netgraph_mod = { "netgraph", ngb_mod_event, (NULL) }; DECLARE_MODULE(netgraph, netgraph_mod, SI_SUB_NETGRAPH, SI_ORDER_MIDDLE); SYSCTL_NODE(_net, OID_AUTO, graph, CTLFLAG_RW, 0, "netgraph Family"); SYSCTL_INT(_net_graph, OID_AUTO, abi_version, CTLFLAG_RD, 0, NG_ABI_VERSION,""); SYSCTL_INT(_net_graph, OID_AUTO, msg_version, CTLFLAG_RD, 0, NG_VERSION, ""); #ifdef NETGRAPH_DEBUG void dumphook (hook_p hook, char *file, int line) { printf("hook: name %s, %d refs, Last touched:\n", _NG_HOOK_NAME(hook), hook->hk_refs); printf(" Last active @ %s, line %d\n", hook->lastfile, hook->lastline); if (line) { printf(" problem discovered at file %s, line %d\n", file, line); } } void dumpnode(node_p node, char *file, int line) { printf("node: ID [%x]: type '%s', %d hooks, flags 0x%x, %d refs, %s:\n", _NG_NODE_ID(node), node->nd_type->name, node->nd_numhooks, node->nd_flags, node->nd_refs, node->nd_name); printf(" Last active @ %s, line %d\n", node->lastfile, node->lastline); if (line) { printf(" problem discovered at file %s, line %d\n", file, line); } } void dumpitem(item_p item, char *file, int line) { printf(" ACTIVE item, last used at %s, line %d", item->lastfile, item->lastline); switch(item->el_flags & NGQF_TYPE) { case NGQF_DATA: printf(" - [data]\n"); break; case NGQF_MESG: printf(" - retaddr[%d]:\n", _NGI_RETADDR(item)); break; case NGQF_FN: printf(" - fn@%p (%p, %p, %p, %d (%x))\n", item->body.fn.fn_fn, _NGI_NODE(item), _NGI_HOOK(item), item->body.fn.fn_arg1, item->body.fn.fn_arg2, item->body.fn.fn_arg2); break; case NGQF_UNDEF: printf(" - UNDEFINED!\n"); } if (line) { printf(" problem discovered at file %s, line %d\n", file, line); if (_NGI_NODE(item)) { printf("node %p ([%x])\n", _NGI_NODE(item), ng_node2ID(_NGI_NODE(item))); } } } static void ng_dumpitems(void) { item_p item; int i = 1; TAILQ_FOREACH(item, &ng_itemlist, all) { printf("[%d] ", i++); dumpitem(item, NULL, 0); } } static void ng_dumpnodes(void) { node_p node; int i = 1; mtx_lock(&ng_nodelist_mtx); SLIST_FOREACH(node, &ng_allnodes, nd_all) { printf("[%d] ", i++); dumpnode(node, NULL, 0); } mtx_unlock(&ng_nodelist_mtx); } static void ng_dumphooks(void) { hook_p hook; int i = 1; mtx_lock(&ng_nodelist_mtx); SLIST_FOREACH(hook, &ng_allhooks, hk_all) { printf("[%d] ", i++); dumphook(hook, NULL, 0); } mtx_unlock(&ng_nodelist_mtx); } static int sysctl_debug_ng_dump_items(SYSCTL_HANDLER_ARGS) { int error; int val; int i; val = allocated; i = 1; error = sysctl_handle_int(oidp, &val, sizeof(int), req); if (error != 0 || req->newptr == NULL) return (error); if (val == 42) { ng_dumpitems(); ng_dumpnodes(); ng_dumphooks(); } return (0); } SYSCTL_PROC(_debug, OID_AUTO, ng_dump_items, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(int), sysctl_debug_ng_dump_items, "I", "Number of allocated items"); #endif /* NETGRAPH_DEBUG */ /*********************************************************************** * Worklist routines **********************************************************************/ /* NETISR thread enters here */ /* * Pick a node off the list of nodes with work, * try get an item to process off it. * If there are no more, remove the node from the list. */ static void ngintr(void) { item_p item; node_p node = NULL; for (;;) { NG_WORKLIST_LOCK(); node = TAILQ_FIRST(&ng_worklist); if (!node) { NG_WORKLIST_UNLOCK(); break; } node->nd_flags &= ~NGF_WORKQ; TAILQ_REMOVE(&ng_worklist, node, nd_work); NG_WORKLIST_UNLOCK(); CTR3(KTR_NET, "%20s: node [%x] (%p) taken off worklist", __func__, node->nd_ID, node); /* * We have the node. We also take over the reference * that the list had on it. * Now process as much as you can, until it won't * let you have another item off the queue. * All this time, keep the reference * that lets us be sure that the node still exists. * Let the reference go at the last minute. * ng_dequeue will put us back on the worklist * if there is more too do. This may be of use if there * are Multiple Processors and multiple Net threads in the * future. */ for (;;) { int rw; NG_QUEUE_LOCK(&node->nd_input_queue); item = ng_dequeue(&node->nd_input_queue, &rw); if (item == NULL) { NG_QUEUE_UNLOCK(&node->nd_input_queue); break; /* go look for another node */ } else { NG_QUEUE_UNLOCK(&node->nd_input_queue); NGI_GET_NODE(item, node); /* zaps stored node */ ng_apply_item(node, item, rw); NG_NODE_UNREF(node); } } NG_NODE_UNREF(node); } } static void ng_worklist_remove(node_p node) { mtx_assert(&node->nd_input_queue.q_mtx, MA_OWNED); NG_WORKLIST_LOCK(); if (node->nd_flags & NGF_WORKQ) { node->nd_flags &= ~NGF_WORKQ; TAILQ_REMOVE(&ng_worklist, node, nd_work); NG_WORKLIST_UNLOCK(); NG_NODE_UNREF(node); CTR3(KTR_NET, "%20s: node [%x] (%p) removed from worklist", __func__, node->nd_ID, node); } else { NG_WORKLIST_UNLOCK(); } } /* * XXX * It's posible that a debugging NG_NODE_REF may need * to be outside the mutex zone */ static void ng_setisr(node_p node) { mtx_assert(&node->nd_input_queue.q_mtx, MA_OWNED); if ((node->nd_flags & NGF_WORKQ) == 0) { /* * If we are not already on the work queue, * then put us on. */ node->nd_flags |= NGF_WORKQ; NG_WORKLIST_LOCK(); TAILQ_INSERT_TAIL(&ng_worklist, node, nd_work); NG_WORKLIST_UNLOCK(); NG_NODE_REF(node); /* XXX fafe in mutex? */ CTR3(KTR_NET, "%20s: node [%x] (%p) put on worklist", __func__, node->nd_ID, node); } else CTR3(KTR_NET, "%20s: node [%x] (%p) already on worklist", __func__, node->nd_ID, node); schednetisr(NETISR_NETGRAPH); } /*********************************************************************** * Externally useable functions to set up a queue item ready for sending ***********************************************************************/ #ifdef NETGRAPH_DEBUG #define ITEM_DEBUG_CHECKS \ do { \ if (NGI_NODE(item) ) { \ printf("item already has node"); \ kdb_enter("has node"); \ NGI_CLR_NODE(item); \ } \ if (NGI_HOOK(item) ) { \ printf("item already has hook"); \ kdb_enter("has hook"); \ NGI_CLR_HOOK(item); \ } \ } while (0) #else #define ITEM_DEBUG_CHECKS #endif /* * Put mbuf into the item. * Hook and node references will be removed when the item is dequeued. * (or equivalent) * (XXX) Unsafe because no reference held by peer on remote node. * remote node might go away in this timescale. * We know the hooks can't go away because that would require getting * a writer item on both nodes and we must have at least a reader * here to be able to do this. * Note that the hook loaded is the REMOTE hook. * * This is possibly in the critical path for new data. */ item_p ng_package_data(struct mbuf *m, int flags) { item_p item; if ((item = ng_getqblk(flags)) == NULL) { NG_FREE_M(m); return (NULL); } ITEM_DEBUG_CHECKS; item->el_flags = NGQF_DATA | NGQF_READER; item->el_next = NULL; NGI_M(item) = m; return (item); } /* * Allocate a queue item and put items into it.. * Evaluate the address as this will be needed to queue it and * to work out what some of the fields should be. * Hook and node references will be removed when the item is dequeued. * (or equivalent) */ item_p ng_package_msg(struct ng_mesg *msg, int flags) { item_p item; if ((item = ng_getqblk(flags)) == NULL) { NG_FREE_MSG(msg); return (NULL); } ITEM_DEBUG_CHECKS; /* Messages items count as writers unless explicitly exempted. */ if (msg->header.cmd & NGM_READONLY) item->el_flags = NGQF_MESG | NGQF_READER; else item->el_flags = NGQF_MESG | NGQF_WRITER; item->el_next = NULL; /* * Set the current lasthook into the queue item */ NGI_MSG(item) = msg; NGI_RETADDR(item) = 0; return (item); } #define SET_RETADDR(item, here, retaddr) \ do { /* Data or fn items don't have retaddrs */ \ if ((item->el_flags & NGQF_TYPE) == NGQF_MESG) { \ if (retaddr) { \ NGI_RETADDR(item) = retaddr; \ } else { \ /* \ * The old return address should be ok. \ * If there isn't one, use the address \ * here. \ */ \ if (NGI_RETADDR(item) == 0) { \ NGI_RETADDR(item) \ = ng_node2ID(here); \ } \ } \ } \ } while (0) int ng_address_hook(node_p here, item_p item, hook_p hook, ng_ID_t retaddr) { hook_p peer; node_p peernode; ITEM_DEBUG_CHECKS; /* * Quick sanity check.. * Since a hook holds a reference on it's node, once we know * that the peer is still connected (even if invalid,) we know * that the peer node is present, though maybe invalid. */ if ((hook == NULL) || NG_HOOK_NOT_VALID(hook) || (NG_HOOK_PEER(hook) == NULL) || NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook)) || NG_NODE_NOT_VALID(NG_PEER_NODE(hook))) { NG_FREE_ITEM(item); TRAP_ERROR(); return (ENETDOWN); } /* * Transfer our interest to the other (peer) end. */ peer = NG_HOOK_PEER(hook); NG_HOOK_REF(peer); NGI_SET_HOOK(item, peer); peernode = NG_PEER_NODE(hook); NG_NODE_REF(peernode); NGI_SET_NODE(item, peernode); SET_RETADDR(item, here, retaddr); return (0); } int ng_address_path(node_p here, item_p item, char *address, ng_ID_t retaddr) { node_p dest = NULL; hook_p hook = NULL; int error; ITEM_DEBUG_CHECKS; /* * Note that ng_path2noderef increments the reference count * on the node for us if it finds one. So we don't have to. */ error = ng_path2noderef(here, address, &dest, &hook); if (error) { NG_FREE_ITEM(item); return (error); } NGI_SET_NODE(item, dest); if ( hook) { NG_HOOK_REF(hook); /* don't let it go while on the queue */ NGI_SET_HOOK(item, hook); } SET_RETADDR(item, here, retaddr); return (0); } int ng_address_ID(node_p here, item_p item, ng_ID_t ID, ng_ID_t retaddr) { node_p dest; ITEM_DEBUG_CHECKS; /* * Find the target node. */ dest = ng_ID2noderef(ID); /* GETS REFERENCE! */ if (dest == NULL) { NG_FREE_ITEM(item); TRAP_ERROR(); return(EINVAL); } /* Fill out the contents */ NGI_SET_NODE(item, dest); NGI_CLR_HOOK(item); SET_RETADDR(item, here, retaddr); return (0); } /* * special case to send a message to self (e.g. destroy node) * Possibly indicate an arrival hook too. * Useful for removing that hook :-) */ item_p ng_package_msg_self(node_p here, hook_p hook, struct ng_mesg *msg) { item_p item; /* * Find the target node. * If there is a HOOK argument, then use that in preference * to the address. */ if ((item = ng_getqblk(NG_NOFLAGS)) == NULL) { NG_FREE_MSG(msg); return (NULL); } /* Fill out the contents */ item->el_flags = NGQF_MESG | NGQF_WRITER; item->el_next = NULL; NG_NODE_REF(here); NGI_SET_NODE(item, here); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_MSG(item) = msg; NGI_RETADDR(item) = ng_node2ID(here); return (item); } int ng_send_fn1(node_p node, hook_p hook, ng_item_fn *fn, void * arg1, int arg2, int flags) { item_p item; if ((item = ng_getqblk(flags)) == NULL) { return (ENOMEM); } item->el_flags = NGQF_FN | NGQF_WRITER; NG_NODE_REF(node); /* and one for the item */ NGI_SET_NODE(item, node); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_FN(item) = fn; NGI_ARG1(item) = arg1; NGI_ARG2(item) = arg2; return(ng_snd_item(item, flags)); } /* * Official timeout routines for Netgraph nodes. */ static void ng_callout_trampoline(void *arg) { item_p item = arg; ng_snd_item(item, 0); } int ng_callout(struct callout *c, node_p node, hook_p hook, int ticks, ng_item_fn *fn, void * arg1, int arg2) { item_p item, oitem; if ((item = ng_getqblk(NG_NOFLAGS)) == NULL) return (ENOMEM); item->el_flags = NGQF_FN | NGQF_WRITER; NG_NODE_REF(node); /* and one for the item */ NGI_SET_NODE(item, node); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_FN(item) = fn; NGI_ARG1(item) = arg1; NGI_ARG2(item) = arg2; oitem = c->c_arg; if (callout_reset(c, ticks, &ng_callout_trampoline, item) == 1 && oitem != NULL) NG_FREE_ITEM(oitem); return (0); } /* A special modified version of untimeout() */ int ng_uncallout(struct callout *c, node_p node) { item_p item; int rval; KASSERT(c != NULL, ("ng_uncallout: NULL callout")); KASSERT(node != NULL, ("ng_uncallout: NULL node")); rval = callout_stop(c); item = c->c_arg; /* Do an extra check */ if ((rval > 0) && (c->c_func == &ng_callout_trampoline) && (NGI_NODE(item) == node)) { /* * We successfully removed it from the queue before it ran * So now we need to unreference everything that was * given extra references. (NG_FREE_ITEM does this). */ NG_FREE_ITEM(item); } c->c_arg = NULL; return (rval); } /* * Set the address, if none given, give the node here. */ void ng_replace_retaddr(node_p here, item_p item, ng_ID_t retaddr) { if (retaddr) { NGI_RETADDR(item) = retaddr; } else { /* * The old return address should be ok. * If there isn't one, use the address here. */ NGI_RETADDR(item) = ng_node2ID(here); } } #define TESTING #ifdef TESTING /* just test all the macros */ void ng_macro_test(item_p item); void ng_macro_test(item_p item) { node_p node = NULL; hook_p hook = NULL; struct mbuf *m; struct ng_mesg *msg; ng_ID_t retaddr; int error; NGI_GET_M(item, m); NGI_GET_MSG(item, msg); retaddr = NGI_RETADDR(item); NG_SEND_DATA(error, hook, m, NULL); NG_SEND_DATA_ONLY(error, hook, m); NG_FWD_NEW_DATA(error, item, hook, m); NG_FWD_ITEM_HOOK(error, item, hook); NG_SEND_MSG_HOOK(error, node, msg, hook, retaddr); NG_SEND_MSG_ID(error, node, msg, retaddr, retaddr); NG_SEND_MSG_PATH(error, node, msg, ".:", retaddr); NG_FWD_MSG_HOOK(error, node, item, hook, retaddr); } #endif /* TESTING */